Skip to content

Commit ab0d2ea

Browse files
committed
New step: Log file scraper
1 parent afd41fd commit ab0d2ea

File tree

5 files changed

+41
-0
lines changed

5 files changed

+41
-0
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"creationTimeStamp":"2024-06-14T18:32:14.921Z","modifiedTimeStamp":"2024-11-08T12:17:53.917Z","createdBy":"Ethan.Kavanaugh@sas.com","modifiedBy":"Remco.Gooijer@sas.com","name":"Log file scraper.step","displayName":"Log file scraper.step","localDisplayName":"Log file scraper.step","properties":{},"links":[{"method":"GET","rel":"self","href":"/dataFlows/steps/e2302a5f-e35c-4cc9-ac58-ed6e369c4a1d","uri":"/dataFlows/steps/e2302a5f-e35c-4cc9-ac58-ed6e369c4a1d","type":"application/vnd.sas.data.flow.step"},{"method":"GET","rel":"alternate","href":"/dataFlows/steps/e2302a5f-e35c-4cc9-ac58-ed6e369c4a1d","uri":"/dataFlows/steps/e2302a5f-e35c-4cc9-ac58-ed6e369c4a1d","type":"application/vnd.sas.data.flow.step.summary"},{"method":"GET","rel":"up","href":"/dataFlows/steps","uri":"/dataFlows/steps","type":"application/vnd.sas.collection","itemType":"application/vnd.sas.data.flow.step.summary"},{"method":"PUT","rel":"update","href":"/dataFlows/steps/e2302a5f-e35c-4cc9-ac58-ed6e369c4a1d","uri":"/dataFlows/steps/e2302a5f-e35c-4cc9-ac58-ed6e369c4a1d","type":"application/vnd.sas.data.flow.step","responseType":"application/vnd.sas.data.flow.step"},{"method":"DELETE","rel":"delete","href":"/dataFlows/steps/e2302a5f-e35c-4cc9-ac58-ed6e369c4a1d","uri":"/dataFlows/steps/e2302a5f-e35c-4cc9-ac58-ed6e369c4a1d"},{"method":"POST","rel":"copy","href":"/dataFlows/steps/e2302a5f-e35c-4cc9-ac58-ed6e369c4a1d/copy","uri":"/dataFlows/steps/e2302a5f-e35c-4cc9-ac58-ed6e369c4a1d/copy","responseType":"application/vnd.sas.data.flow.step"},{"method":"GET","rel":"transferExport","href":"/dataFlows/steps/e2302a5f-e35c-4cc9-ac58-ed6e369c4a1d","uri":"/dataFlows/steps/e2302a5f-e35c-4cc9-ac58-ed6e369c4a1d","responseType":"application/vnd.sas.transfer.object"},{"method":"PUT","rel":"transferImportUpdate","href":"/dataFlows/steps/e2302a5f-e35c-4cc9-ac58-ed6e369c4a1d","uri":"/dataFlows/steps/e2302a5f-e35c-4cc9-ac58-ed6e369c4a1d","type":"application/vnd.sas.transfer.object","responseType":"application/vnd.sas.summary"}],"metadataVersion":0.0,"version":2,"type":"code","flowMetadata":{"inputPorts":[],"outputPorts":[{"name":"_output1","displayName":"_output1","localDisplayName":"_output1","minEntries":1,"maxEntries":1,"defaultEntries":0,"type":"table","supportsView":false,"requiresStructure":false}]},"ui":"{\n\t\"showPageContentOnly\": true,\n\t\"pages\": [\n\t\t{\n\t\t\t\"id\": \"page1\",\n\t\t\t\"type\": \"page\",\n\t\t\t\"label\": \"Properties\",\n\t\t\t\"children\": [\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"logfile_dir\",\n\t\t\t\t\t\"type\": \"path\",\n\t\t\t\t\t\"label\": \"Select the SAS Compute file directory where the log files can be found:\",\n\t\t\t\t\t\"pathtype\": \"folder\",\n\t\t\t\t\t\"placeholder\": \"\",\n\t\t\t\t\t\"required\": true,\n\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"today_min_days\",\n\t\t\t\t\t\"type\": \"numstepper\",\n\t\t\t\t\t\"label\": \"'Today' minus number of days:\",\n\t\t\t\t\t\"required\": true,\n\t\t\t\t\t\"integer\": true,\n\t\t\t\t\t\"min\": 0,\n\t\t\t\t\t\"max\": 365,\n\t\t\t\t\t\"stepsize\": 1\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"_output1\",\n\t\t\t\t\t\"type\": \"outputtable\",\n\t\t\t\t\t\"label\": \"Output table:\",\n\t\t\t\t\t\"required\": true,\n\t\t\t\t\t\"placeholder\": \"\",\n\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t}\n\t\t\t]\n\t\t},\n\t\t{\n\t\t\t\"id\": \"page2\",\n\t\t\t\"type\": \"page\",\n\t\t\t\"label\": \"About\",\n\t\t\t\"children\": [\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"text2\",\n\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\"text\": \"Log file scraper custom step\\n=============================\\n\\nThe \\\"Log file scraper\\\" custom step makes it possible to extract ERRORS and/or WARNINGS from one or more SAS log files and makes them available in a table. Finding the ERRORS and/or WARNINGS is done using a regular expression.\",\n\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"section1\",\n\t\t\t\t\t\"type\": \"section\",\n\t\t\t\t\t\"label\": \"Changelog\",\n\t\t\t\t\t\"open\": false,\n\t\t\t\t\t\"visible\": \"\",\n\t\t\t\t\t\"children\": [\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"text1\",\n\t\t\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\t\t\"text\": \"* Version 1.1 (08NOV2024)\\n - Published extrnally\\n\\n* Version 1,0 (11DEC2023)\\n - Published internally\\n - Initial version\\n\\n\",\n\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t}\n\t\t\t\t\t]\n\t\t\t\t}\n\t\t\t]\n\t\t}\n\t],\n\t\"syntaxversion\": \"1.3.0\",\n\t\"values\": {\n\t\t\"logfile_dir\": \"\",\n\t\t\"today_min_days\": 0,\n\t\t\"_output1\": {\n\t\t\t\"library\": \"\",\n\t\t\t\"table\": \"\"\n\t\t}\n\t}\n}","templates":{"SAS":"%macro directory_contents(directory, pattern); \n \n\t%let regexp =%sysfunc(tranwrd(&pattern ,.,\\.)); \n\t%let regexp2=%sysfunc(tranwrd(&regexp,*,.*)); \n\t%let regexp3=%sysfunc(tranwrd(&regexp2,?,.)) ; \n\t%let pattern =/&regexp3/; \n \n\tdata log_files(keep=filename file_dt file_tm); \n \n\t\tlength \n\t\t\tfref $8 \n\t\t\tfilename $256; \n \n\t\tattrib file_dt length=8 format=date9.;\n\t\tattrib file_tm length=8 format=time.; \n \n\t\t/*\n\t\t\tAdd the '/' character if needed.\n\t\t*/\n\t\tif ksubstr(kreverse(\"&directory\"), 1, 1) ^= '/' then do; \n\t\t\tdirectory = strip(\"&directory\" || '/');\t \n\t\tend; \n\t\telse do; \n\t\t\tdirectory = strip(\"&directory\"); \n\t\tend; \n \n\t\t/*\n\t\t\ttry to open the specified directory.\n\t\t*/\n\t\trc = filename(fref, \"'&directory'\"); \n\t\tif rc = 0 then do; \n\t\t\tdid = dopen(fref); \n\t\t\trc = filename(fref); \n\t\tend; \n\t\telse do; \n\t\t\tlength msg $200.; \n\t\t\tmsg = sysmsg(); \n\t\t\tput msg=; \n\t\t\tdid = .; \n\t\tend; \n \n\t\tif did <= 0 then \n\t\t\tput 'error: unable to open directory.'; \n \n\t\tdnum = dnum(did); \n\t\tregexp = prxparse(\"&pattern\"); \n \t\tdo i = 1 to dnum; \n\t\t\tfilename = dread(did, i); \n\t\t\trc = prxmatch(regexp, filename); \n\t\t\tif (rc > 0) then do;\n\t\t\t\trc = filename('fid', strip(directory) || strip(filename));\n\t\t\t\tfid = fopen('fid'); \n \t\t\t\tfilename = strip(directory) || strip(filename); \n /*\n\t\t\t\t\tgenerate the file_dt and file_tm based on the 'last modified' file property.\n\t\t\t\t*/\n\t\t\t\tfile_dt = datepart(input(finfo(fid,'last modified'), datetime.));\n file_tm = timepart(input(finfo(fid,'last modified'), datetime.));\n rc = fclose(fid);\n\t\t\t\t/*\n\t\t\t\t\tapply the date range filter.\n\t\t\t\t\tWhen in range, write to the output table.\n\t\t\t\t*/\n\t\t\t\tif file_dt >= date() - &today_min_days then do;\n\t\t\t\t\toutput;\n\t\t\t\tend;\n\t\t\tend; \n\t\tend; \n\t\trc = dclose(did); \n\trun; \n%mend directory_contents; \n\n%macro searchlog; \n\t/* \n\t\tcreate and initialize the 'all_messages' table. \n\t\tthis is done in case there are no files. \n\t*/ \n\tdata &_output1; \n\t\tattrib logfile length=$256; \n\t\tattrib file_dt length=8. format=date9.; \n\t\tattrib file_tm length=8. format=time.; \n\t\tattrib path length=$256; \n\t\tattrib text length=$512; \n\t\tattrib linenum length=8.; \n\t\tstop; \n run; \n \n\t%do i = 1 %to &totobs; \n\t\t%let rc = %sysfunc(filename(filrf, &&inlog&i)); \n\t\t%if %sysfunc(fileref(&filrf)) = 0 %then %do; \n\t\t\t%let fid = %sysfunc(fopen(&filrf)); \n\t\t\t%if &fid ne 0 %then %do; \n\t\t\t\tdata log_messages(drop=regexp_search); \n\t\t\t\t\tattrib file_dt length=8. format=date9.; \n\t\t\t\t\tattrib file_tm length=8. format=time.; \n\t\t\t\t\tformat \n\t\t\t\t\t\tlogfile $256. \n\t\t\t\t\t\tpath $256. \n\t\t\t\t\t\ttext $512. ; \n\t\t\t\t\tinfile &filrf length = len; \n\t\t\t\t\tinput text $varying500. len; \n\t\t\t\t\tlinenum = _n_; \n\t\t\t\t\t \n\t\t\t\t\tregexp_search = prxparse(\"/^ERROR(.*):|^WARNING(.*)/\");\n\t\t\t\t\tif prxmatch(regexp_search, text) > 0 then do; \n\t\t\t\t\t\tpath = \"&log_dir\"; \n\t\t\t\t\t\tlogfile = kreverse(kscan(kreverse(\"&&inlog&i\"), 1, '/')); \n\t\t\t\t\t\tfile_dt = input(\"&&filedt&i\", date9.); \n\t\t\t\t\t\tfile_tm = input(\"&&filetm&i\", time.); \n\t\t\t\t\t\toutput; \n\t\t\t\t\tend; \n\t\t\t\trun; \n\t\t\t%end; \n\t\t\t%else %do; \n\t\t\t\t%put WARNING: file in use: &&inlog&i; \n\t\t\t\tdata log_messages; \n\t\t\t\t\tattrib file_dt length=8. format=date9.; \n\t\t\t\t\tattrib file_tm length=8. format=time.; \n\t\t\t\t\tformat \n\t\t\t\t\t\tlogfile $256. \n\t\t\t\t\t\tpath $256. \n\t\t\t\t\t\ttext $512. ; \n\t\t\t\t\tlinenum = 1; \n\t\t\t\t\tpath = \"&log_dir\"; \n\t\t\t\t\tlogfile = kreverse(kscan(kreverse(\"&&inlog&i\"), 1, '/')); \n\t\t\t\t\tfile_dt = input(\"&&filedt&i\", date9.); \n\t\t\t\t\tfile_tm = input(\"&&filetim&i\", time.); \n\t\t\t\t\ttext = \"checklog message: logfile could not be opened because the job was not finished at the time of checking the logs\"; \n\t\t\t\t\toutput; \n\t\t\t\trun; \n\t\t\t%end; \n\t\t\t%let cfid=%sysfunc(fclose(&fid)); \n\t\t\t%let rc=%sysfunc(filename(&filrf)); \n\t\t\tproc append data=log_messages base=&_output1 force;run; \n\t\t%end; \n \t\t%else %do; \n\t\t\t%let message = %sysfunc(sysmsg());\n\t\t\t%put &message; \n\t\t\t%let filrf = ; \n\t\t%end;\n\t%end; \n%mend searchlog; \n\n%macro check_batch_log; \n \n\t/*\n\t\tremove the directory type, sascontent or sasserver, from the log_dir macro variable\n\t*/\n\tdata _null_;\n\t\tcall symput (\"log_dir\", scan(\"&logfile_dir\", 2, ':'));\n\t\tcall symput (\"dir_type\", scan(\"&logfile_dir\", 1, ':'));\n\trun;\n\n\t/*\n\t\tThis custom step only supports physical files.\n\t*/\n\t%if &dir_type eq sasserver %then %do;\n \n\t\t/*\n\t\t\tsearch for log files in the log directory\n\t\t*/\n\t\t%directory_contents \n\t\t\t( \n\t\t\t\t&log_dir, \n\t\t\t\t*.log \n\t\t\t); \n \n\t\t/* \n\t\t\tput the log files, and related information, in memory, ready to be used.\t \n\t\t*/ \n\t\t%let totobs = 0;\n\t\tdata _null_; \n\t\t\tset \n\t\t\t\twork.log_files end = final; \n\t \t\tlength \n\t\t\t\tmacvar $12.; \n\t\t\tmacvar = 'inlog'||kcompress(put(_n_,3.)); \n\t\t\tfiledt = 'filedt'||kcompress(put(_n_,3.)); \n\t\t\tfiletm = 'filetm'||kcompress(put(_n_,3.)); \n \n\t\t\tcall symput(macvar,filename); \n\t\t\tcall symput(filedt,kcompress(put(file_dt, date9.))); \n\t\t\tcall symput(filetm,kcompress(put(file_tm, time.))); \n \n\t\t\tif final then call symput('totobs',kcompress(put(_n_,3.))); \n\t\trun;\n \n\t\t/* \n\t\t\tsearch for errors and warnings in the log files. \n\t\t*/\n\t\t%put NOTE: &totobs log files encountered for the given date range.;\n\t\t%searchlog;\n\t%end;\n\t%else %do;\n\t\t%put ERROR: Please select a location on the SAS Server. SAS Content folders are not supported by this custom step;\n\t\t%abort exit;\n\t%end;\n%mend check_batch_log; \n%CHECK_BATCH_LOG; \n\n/*\n\tRemove all macro variables from memory.\n*/\n%symdel log_dir /NOWARN;\n%symdel dir_type /NOWARN;\n%symdel totobs /NOWARN;\n\n/*\n\tRemove all macros from memory.\n*/\n%SYSMACDELETE directory_contents / NOWARN;\n%SYSMACDELETE searchlog / NOWARN;\n%SYSMACDELETE check_batch_log / NOWARN;"}}

LOG file scraping/README.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Log file scraper
2+
3+
### Description
4+
5+
The "**Log file scraper**" custom step makes it possible to extract ERRORS and WARNINGS from one or more SAS log files and makes them available in a table. Finding the ERRORS and/or WARNINGS is done using a regular expression.
6+
7+
## Typical usage
8+
9+
In a situation where your process generates physical log files, you can extract the generated ERRORS and WARNINGS and make them available for further processing.
10+
11+
## User Interface
12+
13+
![Properties](img/UI_Properties.PNG)
14+
15+
Here you need to select the following:
16+
- **Select the SAS Compute file directory where the log files can be found:**: The needs to be the directory where the log files are written and/or downloaded to. Note that the custom step doesn't process subfolders and that the directory can't be a SAS Viya content directory.
17+
- **'Today' minus number of days**: With this value you can specify a date range for the log files you want to include in the scraping process. In this example, this custom step processes the log files for the last 7 days. The default value is 0, meaning it will only scrape log files generated today.
18+
19+
## Usage
20+
Download the step, upload it into your environment drag it on the convas of your flow, specify the directory your log files are located and specify the date range and run the custom step.
21+
22+
An example of its usage is:
23+
![Usage](img/UI_Usage.png)
24+
25+
Running the custom step results in a table containing the WARNINGS and/or ERRORS found in the log files for the give date range.
26+
27+
![Usage](img/Output_01.PNG)
28+
29+
The table contains the following columns:
30+
- **logfile**: The name of the logfile contains the ERROR(s) and/or WARNING(s).
31+
- **file_dt**: The date the log file is created or downloaded.
32+
- **file_tm**: The time the log file got created or downloaded.
33+
- **path**: The location of the logfile
34+
- **text**: The actual warning- or error message.
35+
- **linenum**: The line number the warning or error is found at.
36+
37+
## Change log
38+
Version 1.1 (08/11/2024): Published externally.
39+
40+
Version 1.0 (11/12/2023): Initial version.
31.6 KB
Loading
16.7 KB
Loading

LOG file scraping/img/UI_Usage.png

18.8 KB
Loading

0 commit comments

Comments
 (0)