SAS编程-宏:检查缺失或多余的文件

日常项目中,程序或Outputs一般会提前确定好,通过Excel Tracker或者文件管理系统进行管理。各个公司一般也有相关工具进行检查,以确保Tracker中的程序或Outputs都在输出文件夹中。

不过,本公司工具不会检查文件夹中多余的程序或Outputs。于是,写了个SAS宏程序用于实现这个功能。这个宏程序也可用于检查其它所需文件,例如xml

宏程序汇总在文章末尾,若参考引用,需要结合各自项目路径结构,以及Tracker信息,来更新程序。

宏程序的分为3个部分:

  1. 获取系统或Tracker中的文件名称
  2. 获取文件夹中已有的文件名称
  3. 通过比较以上两类名称,获取缺失或多余的文件

第一部分,可能通过公司的管理工具获取,也可以简单导入EXCEL Tracker。

options validvarname = v7;
proc import datafile = "/xxx/xxx/xxx.xlsx"
   out = tfls dbms = xlsx replace;
run;

第二部分,可以通过Dopen系列函数获取文件名称,具体可以参考文章SAS编程:如何获取某路径下文件的名称?,完整内容可以整合成一个宏程序。

%macro get_file(dirpath =, outdt =, type = sas );

%if "&dirpath." ne "" %then %do;

%local dirpath_tmp slash;

%let slash = %substr(%sysfunc(compress(&dirpath., : _ , a d)), 1, 1);

*Remove trailing slash;
%if "%substr(&dirpath., %length(&dirpath.),1)"= "&slash." %then %let dirpath_tmp=%substr(&dirpath.,1, %length(&dirpath.) -1); 
%else %let dirpath_tmp = &dirpath.;

** Dopen--Get filepath;
data &outdt.;
  fileres = filename("dirpath", "&dirpath_tmp");
  dirid = dopen("dirpath");
  num = dnum(dirid);

  length direct filename filepath $200;

  if dirid >0 and num >0 then do;
    do i=1 to num;
      direct = "&dirpath_tmp.";
      filename = dread(dirid, i);
      filepath = catx("&slash.", direct, filename);

      %if %index(%upcase(&type), SAS) %then %do;
        if strip(scan(filename, 2, "."))="sas" then output;
      %end;

      %if %index(%upcase(&type), RTF) %then %do;
        if strip(scan(filename, 2, "."))="rtf" then output;
      %end;

      %if %index(%upcase(&type), XLSX) %then %do;
        if strip(scan(filename, 2, "."))= "xlsx" then output;
      %end;

      end;
  end;

  keep filename filepath;

  proc sort;
    by filename;
  run;
%end;
%mend get_file;

%get_file(
  dirpath = &path_ana./tables
  ,outdt = source_pgm_tables
  ,type = sas 
);

第三部分,可以通过SQL简单的筛选呈现缺失或多余的文件。

%let out_type = tables;
%let type = T;

proc sql noprint;
  **in tracker not in folder;
  create table source_in_tracker_&out_type. as
    select output_number, base_output_name, program_name, programmer, tester
    from tfls
    where output_type = "&type." and program_name not in
    (
      select filename from source_pgm_&out_type.
    )
  ;

  **in folder not in tracker;
   create table source_in_folder_&out_type. as
      select *
      from source_pgm_&out_type.
      where filename not in
      (
        select program_name from tfls
      )
  ;
quit;

以上三部分,可以整合成一个完整的宏程序,用于检查同一个分析文件夹下的TFLs程序或Output缺失或多余的情况。

以我当前公司的数据结构为例,汇总程序如下。若读者想要引用,需结合项目文件结构以及Tracker信息进行更新。

***1. Get file names in tracker (Company internal tool);
%global path_ana pi_sas;

%let path_ana = /xxx/xxx;
%let pi_sas = pi_xxx_xxx.sas;

%include "&path_ana./docs/pi/&pi_sas.";

%pi_amg757_bla_iss_90d_2023(
  lib = work
  ,txtpath = &path_ana./docs/pi/
  ,outds = all
);


***2. Macro to get  file names in folder;
%macro get_file(dirpath =, outdt =, type = sas );

%if "&dirpath." ne "" %then %do;

%local dirpath_tmp slash;

%let slash = %substr(%sysfunc(compress(&dirpath., : _ , a d)), 1, 1);

*Remove trailing slash;
%if "%substr(&dirpath., %length(&dirpath.),1)"= "&slash." %then %let dirpath_tmp=%substr(&dirpath.,1, %length(&dirpath.) -1); 
%else %let dirpath_tmp = &dirpath.;

** Dopen--Get filepath;
data &outdt.;
  fileres = filename("dirpath", "&dirpath_tmp");
  dirid = dopen("dirpath");
  num = dnum(dirid);

  length direct filename filepath $200;

  if dirid >0 and num >0 then do;
    do i=1 to num;
      direct = "&dirpath_tmp.";
      filename = dread(dirid, i);
      filepath = catx("&slash.", direct, filename);

      %if %index(%upcase(&type), SAS) %then %do;
        if strip(scan(filename, 2, "."))="sas" then output;
      %end;

      %if %index(%upcase(&type), RTF) %then %do;
        if strip(scan(filename, 2, "."))="rtf" then output;
      %end;

      %if %index(%upcase(&type), XLSX) %then %do;
        if strip(scan(filename, 2, "."))= "xlsx" then output;
      %end;

      %if %index(%upcase(&type), XML) %then %do;
        if strip(scan(filename, 2, "."))= "xml" then output;
      %end;

      end;
  end;

  keep filename filepath;

  proc sort;
    by filename;
  run;
%end;
%mend get_file;


***3. Macro to check folder file;
%macro check_folder_file(out_type = tables);

%local type;
%if %upcase(&out_type) = TABLES %then %let type = T;
%else %if %upcase(&out_type) = LISTINGS %then %let type = L;
%else %if %upcase(&out_type) = FIGURES %then %let type = F;

**3.1 Source folder;
%get_file(
  dirpath = &path_ana./&out_type.
  ,outdt = source_pgm_&out_type.
);

proc sql noprint;
  **in tracker not in folder;
  create table source_in_tracker_&out_type. as
    select output_number, base_output_name, program_name, programmer, tester
    from tfls
    where output_type = "&type." and program_name not in
    (
      select filename from source_pgm_&out_type.
    )
  ;

  **in folder not in tracker;
   create table source_in_folder_&out_type. as
      select *
      from source_pgm_&out_type.
      where filename not in
      (
        select program_name from tfls
      )
  ;
quit;


**3.2 QC folder;
%get_file(
  dirpath = &path_ana./&out_type./validation
  ,outdt = qc_pgm_&out_type.
);

proc sql noprint;
  **in tracker not in folder;
  create table qc_in_tracker_&out_type. as
    select output_number, testing_program_name, tester
    from tfls
    where output_type = "&type." and testing_program_name not in
    (
      select filename from qc_pgm_&out_type.
    )
  ;

  **in folder not in tracker;
   create table qc_in_folder_&out_type. as
      select *
      from qc_pgm_&out_type.
      where filename not in
      (
        select testing_program_name from tfls
      )
  ;
quit;


**3.3 Output folder;
%get_file(
  dirpath = &path_ana./&out_type./output
  ,outdt = output_&out_type.
  ,type = rtf-xlsx
);

proc sql noprint;

  **in tracker not in folder;
  create table output_in_tracker_&out_type. as
    select *
    from tfls
    where output_type = "&type." and output_name not in
    (
      select filename from output_&out_type.
    )
  ;

  **in folder not in tracker;
   create table output_in_folder_&out_type. as
      select *
      from output_&out_type.
      where filename not in
      (
        select output_name from tfls
        where output_type = "&type."
      )
  ;

**3.4 xml template folder (internal use);
%get_file(
  dirpath = &path_ana./docs/templates
  ,outdt = tflgen_template
  ,type = xml
);

proc sql noprint;

  **in tracker not in folder;
  create table tflgen_template_in_tracker as
    select *
    from tfls
    where tflgen_allow_code_generation = "Yes" and strip(tflgen_template_name)||".xml" not in
    (
      select filename from tflgen_template
    )
  ;

  **in folder not in tracker;
   create table tflgen_template_in_folder  as
      select *
      from tflgen_template
      where filename not in
      (
        select strip(tflgen_template_name)||".xml" from tfls
        where tflgen_allow_code_generation = "Yes" 
      )
  ;

quit;


%mend check_folder_file;

%check_folder_file(out_type = tables);

%check_folder_file(out_type = listings);

%check_folder_file(out_type = figures);

感谢阅读, 欢迎关注:SAS茶谈!
若有疑问,欢迎评论交流!

梳理不易,转载请注明出处 (by Jihai / SAS茶谈)

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容