SAS宏分析Log找出最費(fèi)時(shí)間的步驟
我經(jīng)常遇到的問(wèn)題是想找出成百上千個(gè)DATA步和過(guò)程步中花費(fèi)時(shí)間最長(zhǎng)的步驟(對(duì)程序進(jìn)行優(yōu)化),但SAS輸出的Log文件可讀性不強(qiáng),也不能從里面方便的找到我要的信息。
我寫的這個(gè)宏是把Log文件里每一個(gè)步驟的名稱、所在Log文件的行號(hào)、所花的實(shí)際時(shí)間和CPU時(shí)間提取出來(lái),方便分析。也可以對(duì)日志進(jìn)行分析找出錯(cuò)誤信息和警告信息,這個(gè)我們留到以后再說(shuō)。
Log文件大致是下面這個(gè)樣子:
NOTE: Deleting WORK.SS_TEMP (memtype=DATA).
NOTE: PROCEDURE DELETE used (Total process time):
real time 0.00 seconds
cpu time 0.01 seconds
NOTE: The data set WORK.SS_TEMP has 1 observations and 1 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
NOTE: There were 1 observations read from the data set WORK.SS_TEMP.
NOTE: The data set WORK.SS_TEMP has 1 observations and 2 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
NOTE: PROCEDURE SQL used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
此宏的作用是讀取SAS系統(tǒng)生成的Log文件,并將得到的步驟名稱和對(duì)應(yīng)的運(yùn)行時(shí)間保存至結(jié)果表格中。其中,InFilePath指定的Log文件,要寫全從根目錄至最后的文件名稱;TargetTable是結(jié)果表格。最終得到的是原始Log文件中包含的步驟名稱和對(duì)應(yīng)的運(yùn)行時(shí)間(實(shí)際時(shí)間和CPU時(shí)間),其中N是步驟在原始Log文件中的行號(hào)。
%macro GetTimeTableForSASLog(InFilePath,TargetTable);
/**********************************************************************/
/* 檢查InFilePath的合法性,后綴必須為TXT或LOG */
%if %SYSFUNC(FIND(&InFilePath,%STR(.))) EQ 0 OR (%SYSFUNC(FIND(&InFilePath,%STR(.))) NE 0 AND %UPCASE(%SCAN(&InFilePath,-1,%STR(.))) NE TXT AND %UPCASE(%SCAN(&InFilePath,-1,%STR(.))) NE LOG) %then
%do;
%put ERROR: The suffix of InFilePath should be txt or log, please check it again.;
%goto exit;
%end;
/* 檢查TargetTable的存在性 */
%if &TargetTable EQ %STR() %then
%do;
%put ERROR: The TargetTable should not be blank, please check it again.;
%goto exit;
%end;
/* 開(kāi)始進(jìn)行計(jì)算 */
/* 第一步:導(dǎo)入LOG文件 */
proc import datafile="&InfilePath" out=&TargetTable replace;
getnames=no;
run;
/* 第二步:處理結(jié)果表格 */
data &TargetTable;
set &TargetTable;
N=_N_;
if SUBSTR(UPCASE(VAR1),1,10) EQ 'NOTE: DATA' OR SUBSTR(UPCASE(VAR1),1,15) EQ 'NOTE: PROCEDURE' OR
SUBSTR(UPCASE(VAR1),1,9) EQ 'REAL TIME' OR SUBSTR(UPCASE(VAR1),1,8) EQ 'CPU TIME';
run;
data &TargetTable;
set &TargetTable;
if SUBSTR(UPCASE(VAR1),1,5) EQ 'NOTE:' then
do;
Name=SUBSTR(VAR1,7,FIND(UPCASE(VAR1),'USED')-8);
end;
else if SUBSTR(UPCASE(VAR1),1,9) EQ 'REAL TIME' then
do;
Real_Time=SUBSTR(VAR1,PRXMATCH('/\d+.\d+/',VAR1));
end;
else if SUBSTR(UPCASE(VAR1),1,9) EQ 'CPU TIME' then
do;
CPU_Time=SUBSTR(VAR1,PRXMATCH('/\d+.\d+/',VAR1));
end;
run;
data &TargetTable(keep=Col_N Col_Name Col_Content);
set &TargetTable;
retain Col_N;
length Col_Name $50;
length Col_Content $100;
if Name NE '' then
do;
Col_N=N;
Col_Name='Name';
Col_Content=Name;
output;
end;
else if Real_Time NE '' then
do;
Col_Name='Real_Time_Temp';
Col_Content=Real_Time;
output;
end;
else if CPU_Time NE '' then
do;
Col_Name='CPU_Time_Temp';
Col_Content=CPU_Time;
output;
end;
run;
proc transpose data=&TargetTable out=&TargetTable;
by Col_N;
id Col_Name;
var Col_Content;
run;
/* 第三步:處理結(jié)果表格中的時(shí)間變量 */
data &TargetTable(keep=N Name Real_Time CPU_Time);
retain N Name Real_Time CPU_Time;
set &TargetTable(rename=(Col_N=N));
format Real_Time TIME10.4;
format CPU_Time TIME10.4;
if FIND(UPCASE(Real_Time_Temp),'SECONDS') NE 0 then
Real_Time=HMS(0,0,SCAN(Real_Time_Temp,1,' '));
else if LENGTH(COMPRESS(Real_Time_Temp,'.','d')) EQ 2 then
Real_Time=HMS(SCAN(Real_Time_Temp,1,':'),SCAN(Real_Time_Temp,2,':'),SCAN(Real_Time_Temp,3,':'));
else if LENGTH(COMPRESS(Real_Time_Temp,'.','d')) EQ 1 then
Real_Time=HMS(0,SCAN(Real_Time_Temp,1,':'),SCAN(Real_Time_Temp,2,':'));
if FIND(UPCASE(CPU_Time_Temp),'SECONDS') NE 0 then
CPU_Time=HMS(0,0,SCAN(CPU_Time_Temp,1,' '));
else if LENGTH(COMPRESS(CPU_Time_Temp,'.','d')) EQ 2 then
CPU_Time=HMS(SCAN(CPU_Time_Temp,1,':'),SCAN(CPU_Time_Temp,2,':'),SCAN(CPU_Time_Temp,3,':'));
else if LENGTH(COMPRESS(CPU_Time_Temp,'.','d')) EQ 1 then
CPU_Time=HMS(0,SCAN(CPU_Time_Temp,1,':'),SCAN(CPU_Time_Temp,2,':'));
run;
%exit:
%mend;