Base file: K:\TIDE Projects\FDA_Sentinel\07. Projects and Task Orders\01. Modular Programs\QRP\Distributed\QRP_3.3.3\inputfiles\macros\ms_covariate_adjustment.sas

Compared file: K:\Sentinel\requests\mpl2\cder_mpl2p_wp001\packages\cder_mpl2p_wp001_nsdp_v01\WithRadar\inputfiles\macros\ms_covariate_adjustment.sas

Generated by CSDiff on 1/24/2017 12:31 PM  

 
****************************************************************************************************
*                                           PROGRAM OVERVIEW
****************************************************************************************************
*
* PROGRAM: ms_covariate_adjustment.sas  
*
* Created (mm/dd/yyyy): 06/11/2014
* Last modified: 10/17/2016
* Version: 2.7
*
*--------------------------------------------------------------------------------------------------
* PURPOSE:
*  MS PROMPT Module 2: Analytic code for PROMPT Module 2. Models defined in the comparison input
*  file with CLASS NOCLASS variables. HDPS defined using HDPS variables. Creates output dataset 
*  to be used by MSOC for aggregating data. PS is run in a separate program.
*  
*--------------------------------------------------------------------------------------------------
* CONTACT INFO: 
*  Mini-Sentinel Coordinating Center
*  info@mini-sentinel.org
*
*--------------------------------------------------------------------------------------------------
*  CHANGE LOG: 
*
*   Version   Date       Initials      Comment (reference external documentation when available)
*   -------   --------   --------   ---------------------------------------------------------------
*   1.1       09/10/14   MSOC(JRP)  Added proc options to gather Java-related environment data.
*                                   Added defensive coding to strip SAS dataset extension from COMPARISON.
*                                   Added VARIABLE parameter to allow users to run EITHER 1:1 OR 
*                                      variable (up to 100) matching rather than both.
*
*   1.2       12/01/14   SL(EM)     Changed variable matching ratio from 100 to 10.
*                                   Included YEAR and TIME to logit model.
*                                   Included  &covarlist to class statement of the HDPS=N logit.
*
*   1.3       02/19/2015 SL(EM)     Changed stratamatch such that matching is always executed
*                                   Added bypass code in stratamatch for USE_HDPS=N.
*                                   Modified initHDPS and local proc logistic to keep all convergence 
*                                      details and store them into msoc.&RUNID._estimates_&comp._&look.
*                                   Output varinfo to msoc.&RUNID._VARINFO_&comp._&LOOK.
*                                   Only retain PS estimate used to match on all output 
*                                      datasets
*
*   2.0       03/06/15    SL(EM)    Major changes to include the creation of aggregated datasets (riskset, 
*                                      Riskdiff and survival)in addition to the matched patient level data which
*                                      is now optional
*                                   Subgroup analysis re-matching is now handled at the distributed level
*                                      and defined using the ANALYTICSUBGROUPS input file (and macro parameter).  
*                                      Use the INDLEVEL=Y macro parameter to output the patient level data to MSOC
*                                      in addition to the aggregated files.
*
*   2.1       08/26/15    SL(EM)    Restriction to M and F removed
*                                   Corrected matchtables macro call
*
*   2.2       11/09/15    SL(EM)    Removed TEMPLATETABLE macro variable because Excel dependency is removed
*                                   from matchtable
*
*   2.3       12/23/15    SL(EM)    Made MatchedinFull optional.
*
*   2.4       01/15/16    SL(EM)    Remove Java. 
*                                   Resurrect keep statement.
*                                   Move HDPS variable from TYPE2FILE to COMPARISON input file.
*                                   Remove MODEL variable from COMPARISON file.
*
*   2.5       08/10/16   AP(SOC)    Added 2 new HDPS dimension files (DX10 and PX10) for ICD10 (QCI-186)
*                                   Added back in requester selected HDPS parameters (QCI-192)
*
*   2.6       08/15/16    DM        Added Surveillance related code
*
*   2.7       10/17/16    DM        Fixed issues rlated so surveillance
*
****************************************************************************************************;


/**********************************************************************************************************
ADJUSTMENT QUERY - CREATE HDPS COVARIATES, MATCH
**********************************************************************************************************/

/* The following option is required to prevent the following warn in the log:
   The quoted string currently being processed has become more than 262 characters long.  
   You might have unbalanced quotation marks
*/
options noquotelenmax;

%MACRO MS_COVARIATEADJUSTMENT(INDATA=,
                              PERIODSTART=,
                              PERIODEND=,
                              COVARIATECONDITION=,
                              COVARIATES_CONSIDERED=,
                              COVARIATES_SELECTED=,
                              RANKING=,
                              HEALTH_SERVICE_INTENSITY=,
                              ZERO_CELL_CORR=,
                              HDVARSEL_INPUT=,                              
                              COMPARE_INPUT=,
                              TYPE2FILE=,
                              ANALYTICSUBGROUPS=,
                              INDLEVEL=Y,
                              PERCENTILES=10,
                              UNCONDITIONAL=Y,
                              DIAGNOSTICS= 
                              ) ;

%PUT =====> MACRO CALLED: MS_COVARIATEADJUSTMENT v2.7;

/*********************************************************************
  SET UP 
**********************************************************************/
proc options option=jreoptions;
run;

%LET INDLEVEL=%LOWCASE(&INDLEVEL.);
%LET COMPARE_INPUT=%TRIM(%SYSFUNC(TranWrd(%LOWCASE(&COMPARE_INPUT.), .sas7bdat, %STR())));
%LET ANALYTICSUBGROUPS=%TRIM(%SYSFUNC(TranWrd(%LOWCASE(&ANALYTICSUBGROUPS.), .sas7bdat, %STR())));
%LET UNCONDITIONAL=%LOWCASE(&UNCONDITIONAL.);
%LET match_Error=;

%if %sysfunc(exist(HDPSSettings)) %then %do;
    data DPlocal.HDPSSettings;
    set HDPSSettings;
    run;
%end;

/*********************************************************************
  LOOP THROUGH EACH PERIOD
**********************************************************************/
%do look = &PERIODSTART. %to &PERIODEND. ;  ** Loop through looks ;

  %if %sysfunc(exist(&INDATA._&LOOK.)) = 0 %then %do;   
    %put WARNING: No users in the look &LOOK.;
    %goto nextlook;
  %end; 

  %put ======> Look = &look.;
  %global prior;
  %let prior = %eval(&look-1); 

  %let DPLOCALPOINTER=DPLocal;
  %if "&SURVEILLANCEMODE."="f" or "&SURVEILLANCEMODE."="p" %then %let DPLocalPointer=DPLPrior;
  %put NOTE: DPLOCAL for prior run is set to &DPLocalPointer.;

/*********************************************************************
  CREATE MACRO VARIABLES FOR EACH COMPARISON.  LOOP THROUGH EACH
  COMPARISON.
**********************************************************************/

  proc sql ;

    select count(*) into :compcnt
    from infolder.&COMPARE_INPUT. ;

  quit ;


  %do comp = 1 %to &compcnt.;  ** Loop through comparisons ;
  %put ======> comp = &comp.;

/*********************************************************************
    START LOG
**********************************************************************/

    proc printto log="&MSOC.&RUNID._adjusted_&comp._&look..log" new;
    run;

/********************************************************
   GET OVERALL ADJUSTMENT START TIME
 *******************************************************/

    DATA _NULL_;
    temp=DATETIME();
    call symput('START',temp);
    call symput('STARTDATE',put(datepart(temp),date9.));
    call symput('STARTTIME',put(timepart(temp),time4.));
    RUN;

/*********************************************************************
  KEEP DRUGS OF INTEREST - DEFINE EXPOSURE.  KEEP ONLY FIRST EXPOSURE
  PER PATIENT.  DEFINE MACRO VARS. IF PATIENT HAS  SAME INDEX DATE 
  FOR BOTH EXPOSURES THEN EXCLUDE THE PATIENT ENTIRELY.
**********************************************************************/

    %let use_hdps=N;

    data _null_ ;
      set infolder.&COMPARE_INPUT. ;
      if (comp_order eq %eval(&COMP.)) then do ;
        call symputx('GRP1', comp) ;
        call symputx('GRP2', control) ;              

        if upcase(Ratio)="F" then RATIO2=1;
        else if upcase(Ratio)="V" then RATIO2=10;
 
        call symputx('RATIO', put(RATIO2,best.)) ;
        call symputx('CALIPER', put(CALIPER,best.)) ;
        if RATIO2=10 then RATIO2=100;  *Trick: 100 is required by hdps toolbox - however it is really 10;

        score_type_3= 0;
        score_type_4= 0;
        score_type_5= 0;

        if UPCASE(HDPS)="N" then do;
            call symputx('matchid', "PREDEFINEDPSMATCHID"||strip(put(RATIO2,best.)||"_1"));
            call symputx('matchds', "PREDEFINEDMATCHCAL"||strip(put(RATIO2,best.)||"_1"));
            call symputx('psvar',   "PREDEFINEDPS");
            call symputx('use_hdps',"N");
            score_type_3= 1;
        end;
        if UPCASE(HDPS)="Y" and (CLASS="" AND NOCLASS="") then do;
            call symputx('matchid', "HDPSONLYMATCHID"||strip(put(RATIO2,best.)||"_1"));
            call symputx('matchds', "HDPSONLYMATCHCAL"||strip(put(RATIO2,best.)||"_1"));
            call symputx('psvar',   "HDPSONLY");
            call symputx('use_hdps',"Y");
            score_type_5= 1;
        end;
        if UPCASE(HDPS)="Y" and not (CLASS="" AND NOCLASS="") then do;
            call symputx('matchid', "HDPSPREDEFINEDMATCHID"||strip(put(RATIO2,best.)||"_1"));
            call symputx('matchds', "HDPSPREDEFINEDMATCHCAL"||strip(put(RATIO2,best.)||"_1"));
            call symputx('psvar',   "HDPSPREDEFINED");
            call symputx('use_hdps',"Y");
            score_type_4= 1;
        end;

       *without e due to scoping with hdps;
       call symputx('score_typ_3', put(score_type_3,best.)) ; 
       call symputx('score_typ_4', put(score_type_4,best.)) ;
       call symputx('score_typ_5', put(score_type_5,best.)) ;

       call symputx('ClassVars', Class) ;
       call symputx('NoClassVars', NoClass) ;

       call symputx('HDPSWinFrom', put(HDPSWinFrom,best.)) ;
       call symputx('HDPSWinTo', put(HDPSWinTo,best.)) ;

      end ;
    run ;

    %put  &CALIPER. &RATIO. &matchid. &matchds. &score_typ_3. &score_typ_4. &score_typ_5. &psvar.;
    %put  &classVars. &NoclassVars. &HDPSWinFrom. &HDPSWinTo. &use_hdps.;

    data temp3 ;
      set &INDATA._&LOOK. ;
    
      if (UPCASE(Group) eq %UPCASE("&GRP1.")) then do ;
        exposure = 1 ;
        output ;
      end ;
      else if (UPCASE(Group) eq %UPCASE("&GRP2.")) then do ;
        exposure = 0 ;
        output ;
      end ;
      rename group = studyclass ;
    run ;
   
    proc sort data = temp3 ;
    by patid IndexDt ;
    run ;

    data temp3;
      set temp3 ;
      by PatID IndexDt ;
      if first.PatID ;   
      if first.IndexDt AND last.IndexDt ;
    run ;

/****************************************************************
  GET ITT AND HDPS MACRO PARAMETERS.  MAKE SURE THEY ARE NOT
  DIFFERENT FOR EXPOSURE AND CONTROL GROUPS.
*****************************************************************/

    /* ITT  */
    data itt_days ;
      set infolder.&TYPE2FILE. ;
      where (UPCASE(Group) in (%UPCASE("&GRP1.") %UPCASE("&GRP2."))) ;
      itt = ^missing(ittdays) ;
    run ;
    
    proc summary missing nway data = itt_days ;
    class itt ;
    output out = itt_summ (drop = _:) ;
    run ;
    
    proc sql noprint;

      select itt into :itt
      from itt_summ ;

      select count(*) into :itt_obs
      from itt_summ ;

    quit ;
    
    /* If multiple ITT values in comparison groups, abort */
    %if (&itt_obs. gt 1) %then %do;
      %put ERROR: One group has missing ITT days and the other group does not ;
      %goto exit ;
    %end;


/*********************************************************************
  ASSIGN NEEDED MACRO PARMS
**********************************************************************/
    %put =====>MATCHID = &matchid.;
    %put =====>MATCHDS = &matchds.;

    * Set the list of dichotomous and continous variables;
    %if &ITT = 0 %then %let a_type = ASTREATED;
    %if &ITT = 1 %then %let a_type = ITT;


/****************************************************************
  Restrict PSM cohort to women without a pregnancy start [-183,0]
  from index date and with delivery date [-255,0] from index date

  Also censor follow-up time at start of a pregnancy episode

  Exclusion and censoring information output to 
  &runid._preg_attrition.sas7bdat
*****************************************************************/

/*1. Join pregnancy cohort with temp3 to determine which members of temp3 had a pregnancy*/
 
    proc sql noprint;
        create table pregnancylist as
        select ads.* ,
            preg.EpisodeStartDt ,
            preg.EpisodeEndDt ,
            case when %MS_PeriodsOverlap(period1=IndexDt-225 IndexDt, period2=EpisodeStartDt EpisodeEndDt) then 1 
                else .
                end as exclude_pregnancy
        from temp3 as ads,
        dplocal.&pregrunid._alldeliveries as preg
        where ads.patid = preg.patid;
    quit;

    /*Original PSM cohort*/
    proc sql noprint;
        select count(*) into: exp_count
        from temp3
        where (UPCASE(studyclass) eq %UPCASE("&GRP1."));

        select count(*) into: comp_count
        from temp3
        where (UPCASE(studyclass) eq %UPCASE("&GRP2."));
    quit;

    /*Remove pregnancies from PSM cohort*/
    proc sql noprint;
        create table _temp_removed as
        select *
        from temp3
        where patid not in 
            (   select distinct patid 
                from pregnancylist 
                where exclude_pregnancy = 1 )
        order by patid;
    quit;

    /*Pregnancy Exclusion PSM cohort*/
    proc sql noprint;
        select count(*) into: exp_preg_excl
        from _temp_removed
        where (UPCASE(studyclass) eq %UPCASE("&GRP1."));

        select count(*) into: comp_preg_excl
        from _temp_removed
        where (UPCASE(studyclass) eq %UPCASE("&GRP2."));
    quit;

    /*Censor followup at start of pregnancy episode*/
    proc sql noprint;
        create table _temp_censored as
        select ads.patid ,
            min(preg.EpisodeStartDt) as PregCensorDate format=date9. /*in case of multiple pregnancies post index*/
        from _temp_removed as ads,
        pregnancylist as preg
        where ads.patid = preg.patid and preg.EpisodeStartDt > ads.IndexDt
        group by ads.patid
        order by patid;
    quit;

    /*temp3*/
    data temp3;
        merge _temp_removed (in=a)
            _temp_censored (in=b);

        by patid;
        if a;

        if PregCensorDate ne . then do;
            NewFollowUp = PregCensorDate - Indexdt - 1;  /*Not eligible on day of pregnancy*/

            If NewFollowup <= FOLLOWUPTIME_&a_type then do;
                FOLLOWUPTIME_&a_type = NewFollowup;
                censor_pregnancy = 1;
                /*Recode event status*/
                if PregCensorDate <= EventDt then do;
                    EventDt = .;
                end;
            end;
        end;
    run;

    /*Pregnancy Censoring PSM cohort*/
    proc sql noprint;
        select count(*) into: exp_censor
        from temp3
        where (UPCASE(studyclass) eq %UPCASE("&GRP1.")) and censor_pregnancy =1;

        select count(*) into: comp_censor
        from temp3
        where (UPCASE(studyclass) eq %UPCASE("&GRP2.")) and censor_pregnancy =1;
    quit;

    /*&runid._preg_attrition.sas7bdat*/
        data _PregAttrition;
          format Group $50. Original_Cohort Pregnancy_Exclusion Censoring comma10.;
            comp = input("&comp.",best.);
            Group="&GRP1.";
            Original_Cohort=input("&exp_count.",best.);
            Pregnancy_Exclusion = input("&exp_preg_excl.",best.);
            Censoring = input("&exp_censor.",best.);
            output;

            comp = input("&comp.",best.);
            Group="&GRP2.";
            Original_Cohort=input("&comp_count.",best.);
            Pregnancy_Exclusion = input("&comp_preg_excl.",best.);
            Censoring = input("&comp_censor.",best.);
            output;
        run;
 
    *Store to MSOC;
    %IF %EVAL(&comp.=1) %THEN %DO;
        data MSOC.&RUNID._preg_attrition;
        retain comp group Original_Cohort Pregnancy_Exclusion Censoring;
        set _PregAttrition;
        run;
    %END;
    %ELSE %DO;
        proc append base=MSOC.&RUNID._preg_attrition 
                    data=_PregAttrition force;
        run;
    %END;


/****************************************************************
MAKE SURE THERE ARE ENOUGH DRUGS AND PATIENTS
*****************************************************************/
    %global numNewExpgroups numNewPatients;
    %let numNewExpgroups = 0;
    %let numNewPatients = 0;
  
    PROC SQL;
  
      select count(distinct studyclass) into :numNewExpgroups
      from temp3
      where Time eq &look. ;

      select count(distinct patid) into :numNewPatients
      from temp3
      where Time eq &look. ;
  
    QUIT;
    
    /* If no new users identified, abort program */
    %if &numNewPatients = 0 %then %do;
      %put WARNING: No new users within calendar time window and age boundaries of this scenario have been identified;
      %GOTO continue ;
    %end;

/****************************************************************
MACRO LIST PREDEFINED COVARIATE CONDITIONS 
*****************************************************************/

    %macro confounderlist();
    
      %if %sysfunc(exist(infolder.&COVARIATECONDITION)) %then %do;
    
        %global numc nocovarcondition keepcovars ;
    
        %let nocovarcondition = 0;
    
        proc sql ;
    
          create table count AS
          select DISTINCT UPCASE(studyname) as distinct_cov
          from infolder.&COVARIATECONDITION
          order by distinct_cov asc;
     
          select COUNT(DISTINCT distinct_cov)
          into :numc
          from count;  

          select max(keep)
          into :has_keep
          from infolder.&COVARIATECONDITION ;

       %if (&has_keep. ne 0) %then %do ;
    
        proc sql ;           
            
          create table covar_labels AS
          select UPCASE(name) as varname, UPCASE(label) as label
          from dictionary.columns
          where libname eq 'WORK' and memname eq 'TEMP3' and UPCASE(name) like 'COVAR%'
          order by label ;

          create table keep_covars AS
          select DISTINCT UPCASE(studyname) as label
          from infolder.&COVARIATECONDITION
          where (keep eq 1)
          order by label asc;
                        
          select count(*)
          into :numkeep
          from keep_covars ; 

          create table keep_covar_vars AS
          select varname
          from keep_covars a inner join covar_labels b on (a.label = b.label) 
          order by varname asc ;

          select varname
          into :COVARK1-:COVARK%trim(&numkeep.)
          from keep_covar_vars
          order by varname asc ;

        quit ;

        %end ;
    
          %else %let numkeep = 0 ;
    
        quit ;

        %let keepcovars = ;
        %do i = 1 %to &numkeep. ;
          %let keepcovars = &keepcovars &&covark&i. ;
        %end ;
    
      %end ;
    
      %else %do ;
    
        %global nocovarcondition keepcovars;
            
        /** IF NO COVARIATE CONDITIONS OR NO COVARIATE RX OR NO COVARIATE PROCEDURES DEFINED THEN DO **/
        %let keepcovars = ;
        %let nocovarcondition = 1;
    
      %end;

    %mend confounderlist ;
    %confounderlist();

/****************************************************************
CREATE CLAIMS FILES FOR HDPS FOR ONLY PATS IN CURRENT COMPARISON
*****************************************************************/
    %macro get_codes ;
    
    %let dimensionlist = ;

    %let FILECNT = %SYSFUNC(COUNTW(&HDVARSEL_INPUT.));
    %do i = 1 %to &FILECNT. ; 
        %global CODELIST&i. ;
        %let CODELIST&i. = %scan(&HDVARSEL_INPUT., &i.) ;

        /*Depending on query dates, some dimension files could be empty, 
        need to rebuild HDVARSEL_INPUT with only dimension files that have records*/

        %ISDATA(dataset=DPLocal.&&CODELIST&i..);    
        %IF %EVAL(&NOBS.>0) %THEN %DO;
            %let dimensionlist = &dimensionlist &&CODELIST&i..;
        %end;
    %end;

    %let HDVARSEL_INPUT = &dimensionlist;
    %put &HDVARSEL_INPUT.;

      %let FILECNT = %SYSFUNC(COUNTW(&HDVARSEL_INPUT.));
      %do i = 1 %to &FILECNT. ; 
      
        %global CODELIST&i. ;
        %let CODELIST&i. = %scan(&HDVARSEL_INPUT., &i.) ;
    
        proc sort data = temp3 ;
        by patid studyclass;
        run ;

        proc sort data = DPLocal.&&CODELIST&i.. out = &&CODELIST&i.. ;
        by patid group;
        run ;
    
        data &&CODELIST&i.. ;
          merge temp3 (in = a keep = patid studyclass indexdt)
                &&CODELIST&i.. (in = b rename = (group = studyclass)) ;
          by patid studyclass ;
          if a AND b and %MS_PeriodsOverlap(period1=IndexDt+coalesce("&HDPSWinfrom.",-99999) IndexDt+coalesce("&HDPSWinTo.",99999),                                             
                                            period2=ADate);
        run ;

      %end ;
      
      ** Define macro vars to missing if codelists do not exist ** ;
      %if (&FILECNT. < 7) %then %do j = %eval(&FILECNT+1) %to 7 ;
        %global CODELIST&j. ;
        %let CODELIST&j. = ;
      %end ;
    
    %mend get_codes;
    
    %if %eval("&psvar." ne "PREDEFINEDPS") %then %do;
      %get_codes;
    %end ;

/****************************************************************
DEFINE DEFAULTS AND CALL HDPS MACROS
*****************************************************************/

    /** IF <2 EXPOSURE GROUPS WITH NEW USERS IDENTIFIED THEN SKIP HDPS and MATCHING  **/
    %IF &numNewExpgroups ge 2 %THEN %DO;

      /** GET START TIME FOR CREATING HDPS COVARIATES **/
      /** GENERATE THIS TIME EVEN IF HDPS IS NOT RAN  **/
      DATA _NULL_;
        temp=DATETIME();
        call symput('STHD',temp);
        call symput('STDATEHD',put(datepart(temp),date9.));
        call symput('STTIMEHD',put(timepart(temp),time4.));
      RUN;
    
      %global hdpsvariables;
      %let hdpsvariables=;
      %let hd_VarsSelected=;

      %if %eval("&psvar." ne "PREDEFINEDPS") %then %do;

          %let hdpsvariables=D0:;

          %if &covariates_considered =%str(  ) %then %do;
            %let default_n = 100; 
          %end;
          %else %let default_n = &covariates_considered;

          %if &covariates_selected=%str( ) %then %do;
            %let newusernum = 200;
          %end;
          %else %let newusernum = &covariates_selected;
          /** SET DEFAULT NUMBER OF HDPS COVARIATES TO BE THE SAMPLE SIZE (N)
                OF NEW USERS OF A STUDY DRUG (WHICHEVER IS THE SMALLEST) **/
            proc sql;
                select count(*)
                into :size1-:size2
                from temp3
                group by exposure;
            quit;
            %put &size1 &size2;

          %let min=%sysfunc(min(&size1.,&size2.));
          %if &newusernum.>&min. %then %let default_k=&min.; 
          %else %let default_k=&newusernum.;

          %if &ranking=%str(  ) %then %do;
            %let default_rank = EXP_ASSOC; 
          %end;
          %else %let default_rank = &ranking;

          %if &zero_cell_corr=%str(  ) %then %do;
            %let default_outcome_zero = 1; 
          %end;
          %else %let default_outcome_zero = &zero_cell_corr;

        *to use just one PS model call;
        %macro NoJavaHDPS;
          %ms_HDVariableSelection(
                                                var_patient_id = patid,
                                                var_exposure = exposure,
                                                var_outcome = event_&A_type.,
                                                top_n   = &default_n.,
                                                k   = &default_k.,
                                                ranking_method = "&default_rank.",
                                                outcome_zero_cell_corr = &default_outcome_zero.,
                                                outcome_type = 'DICHOTOMOUS',
                          input_cohort = temp3,
                          input_dim1= &CODELIST1 %IF (&CODELIST1. NE %str()) %THEN %DO ; CODE %END ;,
                          input_dim2= &CODELIST2 %IF (&CODELIST2. NE %str()) %THEN %DO ; CODE %END ;,
                          input_dim3= &CODELIST3 %IF (&CODELIST3. NE %str()) %THEN %DO ; CODE %END ;,
                          input_dim4= &CODELIST4 %IF (&CODELIST4. NE %str()) %THEN %DO ; CODE %END ;,
                          input_dim5= &CODELIST5 %IF (&CODELIST5. NE %str()) %THEN %DO ; CODE %END ;,
                          input_dim6= &CODELIST6 %IF (&CODELIST6. NE %str()) %THEN %DO ; CODE %END ;,
                          input_dim7= &CODELIST7 %IF (&CODELIST7. NE %str()) %THEN %DO ; CODE %END ;
                                             );


          *Merge selected variables back to HD vars;
          proc sql noprint undo_policy=none;;
          create table temp3 as
          select t3.*,
                 hdps.*
          from temp3 as t3 left join
               OUTPUT_FULL_COHORT as hdps
               on t3.patid =  hdps.patient_id;
          quit;

          *copy selected varaible name into a macro variable for the logit;
          proc contents data=OUTPUT_FULL_COHORT(drop=patient_id) out=SelectedVars noprint;
          run;

          proc sql noprint;
            select NAME
            into :hd_VarsSelected separated by ' ' 
            from SelectedVars;
          quit;
          run;
        %mend;
        %NoJavaHDPS;
    
      %end ;  *USE_HDPS;

      *Run PS Model;
*todo: YEAR - OUTPUT VARIABLE NAME PARAMETER (OUTVAR);
      %ms_runmodel(infile = temp3,
                   CategoricalVars = &classvars. /*SEX YEAR  TIME &covarlist.*/, 
                   ContinuousVars = &noclassvars./* AGE comorbidscore numIP numED numIS numAV numOA numRX numGENERIC*/,
                   HDPSVars=&hd_VarsSelected., 
                   var_exposure = exposure,
                   var_patient_id = PatId, 
                   outfile=DATASET_WITH_PS
                   );
/*        %let hd_VarsSelected=; */

      *Merge selected variables back to HD vars;
      proc sql noprint undo_policy=none;;
      create table DATASET_WITH_PS as
      select t3.*,
             ps.ps
      from temp3 as t3 left join
           DATASET_WITH_PS as ps
           on t3.patid =  ps.patid;
      quit;

      data converge2;
        set converge;
        if &score_typ_3. then model_number = 3; 
        if &score_typ_4. then model_number = 4; 
        if &score_typ_5. then model_number = 5; 
        t = model_number-2; 
        if status = 0 then score_model_converged = 1;
        if status = 1 then score_model_converged = 0;
        keep t model_number score_model_converged;
      run;
      
      data fit2;
        set fit;
        where label2 = "c";
        length c_stat 8.;
        c_stat = input(cvalue2,best.);
        t = 1;
        keep t c_stat;
      run;
      
      data estimates;
        merge converge2 fit2;
        by t;
      run;    

      proc datasets library=work nowarn nolist;
      delete converge_all;
      quit;

      data converge_all;
      set converge;
        if &score_typ_3. then model_number = 3; 
        if &score_typ_4. then model_number = 4; 
        if &score_typ_5. then model_number = 5; 
      run;


      /** TIMING TO CREATE HDPS COVARIATES **/
      DATA _NULL_;
        temp=DATETIME();
        seconds=temp-&STHD.;
        hours=int(seconds/3600);
        minutes=int((seconds-hours*3600)/60);
        seconds2=int((seconds-hours*3600-minutes*60));
        call symput('STOP',temp);
        call symput('hours',put(hours,4.0));
        call symput('minutes',put(minutes,2.0));
        call symput('seconds',put(seconds2,2.0));
      RUN;
      %global HDPStime;
      %let HDPStime = &hours. h &minutes. m &seconds. s ;

/*********************************************************************
 OUTPUT FULL DATASETS TO DPLOCAL
 *********************************************************************/
      %put comp = &comp.;
      %put look = &comp.;
      %put psvar = &psvar.;   

      * Copy dataset with HDPS variables for later use;
      data dplocal.&RUNID._DATASET_WITH_PS_&comp._&look.;
      set DATASET_WITH_PS;
      run;

      *Calculate percentile for members in the current look period;
      proc rank data=DATASET_WITH_PS
          out=DATASET_WITH_PS_LOOK
          groups=&percentiles.;
      var PS;
      ranks percentile;
      where time = &look.;
      run;

      DATA dplocal.&RUNID._scores_&comp._&look.(rename=percentile1=percentile);
      SET DATASET_WITH_PS_LOOK(RENAME = (PS = &psvar.));
      by patid;
      LABEL TIME = "INTERIM ANALYSIS NUMBER";
            percentile1=strip(put(percentile+1,best.));  *need character variable for risk set;
            drop percentile;
      RUN;

      *Get percentiles already calculated in prior looks;
      %if %sysfunc(exist(dplocal.&RUNID._scores_&comp._&PRIOR)) %then %do;
      
          proc sort data=dplocal.&RUNID._scores_&comp._&PRIOR
                    out=PS_PRIOR(keep=patid indexdt time percentile);
          by patid indexdt time;
          where time < &look.;
          run;
            
          proc sort data=DATASET_WITH_PS
                    out=DATASET_WITH_PS_PRIOR(RENAME = (PS = &psvar.));
          by patid indexdt time;
          where time < &look.;
          run;

          data DATASET_WITH_PS;
          merge DATASET_WITH_PS_PRIOR
                PS_PRIOR;
          by patid indexdt time;
          run;                
    
          DATA dplocal.&RUNID._scores_&comp._&look.;
          set  DATASET_WITH_PS
               dplocal.&RUNID._scores_&comp._&look.;
          run;

          proc sort data=dplocal.&RUNID._scores_&comp._&look.;
          by PatId StudyClass;
          run;
      %end;
      
      DATA dplocal.&RUNID._estimates_&comp._&look.;
        MERGE ESTIMATES converge_all;
        by MODEL_NUMBER;
        length PSTYPE $32.;
        IF MODEL_NUMBER = 3 THEN PSTYPE = "PREDEFINED";
        IF MODEL_NUMBER = 4 THEN PSTYPE = "HDPS and PREDEFINED";
        IF MODEL_NUMBER = 5 THEN PSTYPE = "HDPS ONLY"; 
        IF PSTYPE ne (" ");
        KEEP PSTYPE C_STAT SCORE_MODEL_CONVERGED
             NullModel Reason Status;
      RUN;

      DATA msoc.&RUNID._estimates_&comp._&look.;
      SET dplocal.&RUNID._estimates_&comp._&look.;
      RUN;
 
      PROC SQL;
        select score_model_converged into :c1
        from  dplocal.&RUNID._estimates_&comp._&look.
        where score_model_converged>=0;  *this will select the one that was executed;
      QUIT;

      %if %eval("&psvar." ne "PREDEFINEDPS") %then %do;
        DATA dplocal.&RUNID._VARINFO_&comp._&LOOK
                msoc.&RUNID._VARINFO_&comp._&LOOK;
          SET Output_all_vars/*VARIABLE_INFO_ALL_VARS*/;
        RUN;
      %END ; 

/*********************************************************************
 RUN FULL AND SUBGROUP MATCHING 
 *********************************************************************/

*FOR DEBUG OPTION;
%CreateCopyDir(tmp_libref=CovA&Comp.&look.);
%CopyAllFiles(prefix=,suffix=,tmp_libref=CovA&Comp.&look.);

      /** CLEAN UP **/
      PROC DATASETS LIBRARY = WORK nolist kill; QUIT;
      RUN;

      *Initialize macro variables for full analysis case;
        %global numcat;
      %let numcat=0;   *Number of subcategories to analyse for Full analysis;
      %let COVARNUM=0; *0 is for Overall analysis;


      /** SELECT PATIENTS IDENTIFIED IN THIS MONITORING PERIOD **/
      DATA TEMP&comp._&look;
      
        SET dplocal.&RUNID._scores_&Comp._&look.;        
        WHERE TIME = &look;
        DUM0=1;
        MatchedinFull=1;
        format cat $20.;
        cat="Overall";
      RUN;

      /**Find out if subgroup analyses are required for this comp order**/
      %let NumSub=0;

%let MatchedinFullOnlyVarExist=0;

      %ISDATA(dataset=infolder.&ANALYTICSUBGROUPS.);
      %IF %EVAL(&NOBS.>=1) %THEN %DO;

        *Check if the MatchedinFullOnly variable exists in the comparison file;
        data _null_;
        dset=open("infolder.&ANALYTICSUBGROUPS.");
        call symput ('MatchedinFullOnlyVarExist',put(varnum(dset,'MatchedinFullOnly'),best.));
        run;
        %put &MatchedinFullOnlyVarExist.;

        *How many subgroup analyses for this comp?;
        data _SubComp;
        set infolder.&ANALYTICSUBGROUPS.;
        where comp_order eq &COMP.;       
        run;
        %ISDATA(dataset=_SubComp);
        %let NumSub=&NObs.;
        %put Number of Subgroups for comp &comp.: &NumSub.;
      %END;

      %do Sub=0 %to &NumSub.;  *Note: 0 is for full analysis;

%let MatchedinFull=N;

        /** GET START TIME FOR MATCHING **/
        DATA _NULL_;
        temp=DATETIME();
        call symput('STMATCH',temp);
        call symput('STDATEM',put(datepart(temp),date9.));
        call symput('STTIMEM',put(timepart(temp),time4.));
        RUN;

        %IF %EVAL(&Sub.>=1) %THEN %DO;  *processing a subgroup analysis;

          *what are the covarnum and categorization to be applied;
          data _null_;
          set _SubComp;
          if _N_ = &Sub.;
          call symputx("COVARNUM",COVARNUM);
          call symputx("CATEGORIZATION",CATEGORIZATION);
          NumCat=countw(CATEGORIZATION," ");
          call symputx("NumCat",NumCat);

          *to select whether subgroup rematching restricts only to those matched in the full analysis;
          if &MatchedinFullOnlyVarExist.< 1 then  MatchedinFullOnly="n";
          if &MatchedinFullOnlyVarExist.>=1 then call symputx("MatchedinFull",upcase(MatchedinFullOnly));

          run;
          %put &COVARNUM. &CATEGORIZATION. &NumCat.;

          *Populate macro variables required for subgroup analysis;
          %IF (&COVARNUM. eq %eval(1000)) %THEN %DO; * Sex subgroup;
            *Sex;
            data _null_;
              call symputx("var","Sex");
              call symputx("CATEGORIZATION","M F A U");
              call symputx("NumCat","4");
            run;
              data TEMP&comp._&look;  
                set TEMP&comp._&look;
                cat=Sex;
              run;
            %put &COVARNUM. &CATEGORIZATION. &NumCat.;
          %END; *COVARNUM = 1000;
          %IF (&COVARNUM. eq %eval(1001)) %THEN %DO;  * Age subgroups;

            *Extract birth_date;
            proc sql noprint;
            create table temp4(drop=cat) as
            select b.*,
                   d.birth_date
            from TEMP&comp._&look as b,
                 indata.&demtable. as d
            where b.PatId = d.PatId
                  order by b.patid;
            quit;

            *assigning AgeGroup categories;
            %ms_agestrat(infile=temp4,
                         outfile=TEMP&comp._&look,
                         startdt=birth_date,
                         enddt=indexdt,
                         timestrat=&CATEGORIZATION.);

            proc sql;
               alter table TEMP&comp._&look drop birth_date;
            quit;

            proc datasets library=work nowarn nolist;
            MODIFY TEMP&comp._&look;  
            RENAME AgeGroup=Cat;
            DELETE temp4;
            quit; 

              data _null_;
              call symputx("var","age_cat");
              run;
          %END; *COVARNUM = 1001;
          %IF (&COVARNUM. gt %eval(1001)) %THEN %DO;
            %let var=;
            data _null_;
            if &COVARNUM. = 1002 then call symputx("var","Year");
            if &COVARNUM. = 1003 then call symputx("var","Time");
            if &COVARNUM. = 1004 then call symputx("var","Comorbidscore");
            if &COVARNUM. = 1005 then call symputx("var","NumIP");
            if &COVARNUM. = 1006 then call symputx("var","NumIS");
            if &COVARNUM. = 1007 then call symputx("var","NumED");
            if &COVARNUM. = 1008 then call symputx("var","NumAV");
            if &COVARNUM. = 1009 then call symputx("var","NumOA");
            if &COVARNUM. = 1010 then call symputx("var","NumRx");
            if &COVARNUM. = 1011 then call symputx("var","NumGeneric");
            run;
            %put &var.;

            %ms_CreateNumCat(infile=TEMP&comp._&look,
                             outfile=TEMP&comp._&look,
                             var=&var.,
                             catString=&CATEGORIZATION.);

          %END; *COVARNUM > 1001;   
          %IF (&COVARNUM. lt %eval(1000)) %THEN %DO; * Other covariates;    
            %let CovarVar=COVAR&COVARNUM.;  
            %put &CovarVar.;
              data _null_;                  
              call symputx("var","&CovarVar.");
              call symputx("CATEGORIZATION","0 1");
              call symputx("NumCat","2");
            run;

            data TEMP&comp._&look;  
            set TEMP&comp._&look(drop=cat); 
              format cat $20.;  
              cat=strip(put(&CovarVar.,best.));
              run;
            %put &COVARNUM. &CATEGORIZATION. &NumCat.;
          %END; *COVARNUM < 1000;

  /***************************************************************************/
  /** MATCH WITHIN MONITORING PERIOD                                        **/
  /** KEEP PRIOR MATCHES                                                    **/
  /***************************************************************************/

*FOR DEBUG OPTION;
%CopyFile(FileName=TEMP&comp._&look,suffix=,tmp_libref=CovA&comp.&look.);

          *assigning generic names to subgroups;
          data  TEMP&comp._&look;
          set  TEMP&comp._&look(drop=dum:);
          dum0=0;
          array DUM{*} DUM1-DUM&NumCat.;
          do SubComp=1 to &NumCat.;
              if scan("&CATEGORIZATION.",SubComp,' ')=Cat then dum(SubComp)=1; else dum(SubComp)=0; 
          end;
          SubComp=SubComp-1;
          run;

          * Special note: for subgroup analyses, we only use the patients that were matched in the overall 
            analysis (not the entire population that includes folks that couldnt be matched).;
          %ISDATA(dataset=MatchedInFullAnalysis);
          %IF %EVAL(&NOBS.=0) %THEN %DO;
            proc sort nodupkey data=dplocal.&RUNID._matched_&Comp._&look. 
                                out=MatchedInFullAnalysis(keep=Patid);
            by PatId;
            where missing(&matchid.)=0;
            run;
          %end;

*FOR DEBUG OPTION;
%CopyFile(FileName=TEMP&comp._&look,suffix=,tmp_libref=CovA&comp.&look.);

          data  TEMP&comp._&look;
          merge  TEMP&comp._&look(in=a)
                 MatchedInFullAnalysis(in=b);
          by PatId;

          *the variable MatchedinFull was inintialized to 1 in TEMP&comp._&look;
          if upcase("&MatchedInFull.")="Y" then do;
                    if a and b then MatchedinFull=1; 
                    else MatchedinFull=0; 
          end;
          run;

        %end;*sub>=1;


        %MACRO STRATAMATCH(PSTYPE = , converged= );
  
          %let v = &ratio.;
          %put =====>v = &v.;

          /*JIRA QRP-7*/
          proc datasets library=work nowarn nolist;
          delete MATCHCALIPER&v._1;
          quit;    

          %if "&PSTYPE."="PREDEFINEDPS" | %UPCASE("&USE_HDPS.") eq "Y" %then %do; 

            %ms_NearestNeighborMatch(InFile=TEMP&comp._&look._&cat.,
                                     OutFile=MATCHCALIPER&v._1,
                                     MatchVars=Patid,
                                     PSVar=&PSTYPE,
                                     MatchRatio=&v,
                                     WithReplacement=N,
                                     Caliper=&caliper);

            *depile;

            data MATCHCALIPER&v._1_new(rename=match_distance2=match_distance);
            set MATCHCALIPER&v._1;
            by Patid;

            if first.patid then do;
              MatchNumber=0;
              match_distance2=.;
              group_indicator=0; *needed? - only here to match Java;
              ps=TRT_PS;
              output;
            end;

            patid=ctrl_Patid;
            ps=CTRL_PS;
            match_distance2=match_distance;
            group_indicator=1; *needed? - only here to match Java;
            output;

            keep Patid MatchNumber set_num ps group_indicator match_distance2;
            run;

            proc sql noprint  undo_policy=none;
            create table MATCHCALIPER&v._1 as
            select base.*,
                   mtch.MatchNumber,
                   mtch.set_num,
                   mtch.ps, 
                   mtch.group_indicator, 
                   mtch.match_distance
            from TEMP&comp._&look._&cat. as base
            inner join  MATCHCALIPER&v._1_new as mtch
            on base.Patid = mtch.Patid
            order by Patid;
            run;

          %end;

          /** IF AT LEAST ONE MATCH FOUND THEN DO **/
          %if %sysfunc(exist(MATCHCALIPER&v._1)) %then %do;
            DATA &PSTYPE.MATCHCAL&v._1;
              SET MATCHCALIPER&v._1;
              length &PSTYPE.MATCHID&v._1 $50.;
              &PSTYPE.MATCHID&v._1  = COMPRESS(strip(put(TIME,best.))||strip(put(SET_NUM,best.)));
              LABEL &PSTYPE.MATCHID&v._1= "&PSTYPE NEAREST NEIGHBOR CALIPER &caliper ratio &v.";
              KEEP patid &PSTYPE.MATCHID&v._1;
            RUN;    
          %end;

          /** IF NO MATCHES FOUND THEN DO **/
          %else %do;
            DATA &PSTYPE.MATCHCAL&v._1;
              SET TEMP&comp._&look._&cat. (KEEP =patid);
              length &PSTYPE.MATCHID&v._1 $50.;
              &PSTYPE.MATCHID&v._1  = " ";
              LABEL &PSTYPE.MATCHID&v._1= "&PSTYPE NEAREST NEIGHBOR CALIPER &caliper ratio &v.";
              KEEP patid &PSTYPE.MATCHID&v._1;
            RUN; 
          %end;

        %MEND STRATAMATCH ; 

        *For full analsysis, only process cat=0 (here %eval(1-(&sub.=0))) with NumCat=0
         Otherwise, start at 1 and then loop until the &NumCat.>=1;
        %do Cat=%eval(1-(&sub.=0)) %to &NumCat.; 

          *select subgroup data from those that have been matched in full analysis;
          data  TEMP&comp._&look._&cat.;
          set TEMP&comp._&look;
          if Dum&cat.=1 and MatchedinFull=1; 
          run;

          %ISDATA(dataset=TEMP&comp._&look._&cat.);
          %IF %EVAL(&NOBS.>0) %THEN %DO;

            %STRATAMATCH(PSTYPE = &psvar., converged = &c1);

            %put &match_Error.;
            *Create dummy formated output file when match failed;
            %if %eval(&match_Error.>=1) %then %do;
               %put WARNING:  No matches were found for Sub=&sub. and Cat=&Cat.;
               %macro CreateMatchError;
               data &matchds._0;
               set TEMP&comp._&look._&cat.(keep=PatId);
                %if %eval(&ratio. = 1) %then %do;
                  format &psvar.matchid1_1 $50.; 
                  &psvar.matchid1_1='';
                %end;
                %if %eval(&ratio. = 10) %then %do;
                  format &psvar.matchid100_1 $50.; 
                  &psvar.matchid100_1='';/*100 intenteded*/
                %end;
               run;
               %mend CreateMatchError;
               %CreateMatchError;
            %end;
            %else %do;
              %if %eval(&ratio. = 1) %then %do; 
                PROC SORT DATA = &psvar.MATCHCAL1_1 out=&matchds._0; 
                by patid; 
                RUN;
              %end;
              %if %eval(&ratio. = 10) %then %do;
                PROC SORT DATA = &psvar.MATCHCAL10_1 out=&matchds._0 (rename = (&psvar.matchid10_1 = &psvar.matchid100_1)); 
                by patid; 
                RUN;
              %end;
            %END;
          %END; 
          %ELSE %DO;  *no obs in TEMP&comp._&look._&cat.;
            *STRATAMATCH macro from hdps toolbox does not work with empty dataset (because of it uses txt import/export method)
             Instead, we will create a empty formatted file;
             %macro CreateEmpty;
             data &matchds._0;
             set TEMP&comp._&look(obs=0 keep=PatId);
              %if %eval(&ratio. = 1) %then %do;
                format &psvar.matchid1_1 $50.; 
                &psvar.matchid1_1='';
              %end;
              %if %eval(&ratio. = 10) %then %do;
                format &psvar.matchid100_1 $50.; 
                &psvar.matchid100_1='';/*100 intenteded*/
              %end;
             run;
             %mend CreateEmpty;
             %CreateEmpty;
          %end;

*FOR DEBUG OPTION;
%CopyFile(FileName=&matchds._0,suffix=,tmp_libref=CovA&comp.&look.);

          *Square with category;
          data &matchds._0(rename=cat2=cat);
          merge &matchds._0(in=a)
                TEMP&comp._&look./*_&cat.*/(where=(Dum&cat.=1) keep=PatId Cat Dum&cat.);
          by PatId;
          *if a;
          format cat2 $20. covarnum 4.;
          cat2=cat;
          covarnum=&COVARNUM.;
          drop cat Dum&cat.;
          run;

          %ISDATA(dataset=&matchds._0);
          %IF %EVAL(&NOBS.=0) %THEN %DO;*Empty - one empty line added to show it ran, but empty;
            data cat;
            format cat $20. covarnum 4.;
            Cat = scan("&CATEGORIZATION.", &cat., ' ') ;
            covarnum=&COVARNUM.;
            run;

            data &matchds._0;
            set &matchds._0
                cat;
            run;
          %END; 

          *NOTE: the "_" prefix is for the accumulating file because match_NearestNeighborMatch 
                will overwrite &psvar.matchid1_1 in the 1:1 case;
          %if %eval(&cat.>=2) %then %do;  
            data _&matchds.;
            set _&matchds. 
                &matchds._0;
            by PatId;
            run; 
          %end;
          %else %do;
             data _&matchds.;
             set &matchds._0;
             run; 
          %end;

*FOR DEBUG OPTION;
%CopyFile(FileName=&matchds._0,suffix=,tmp_libref=CovA&comp.&look.);
%CopyFile(FileName=_&matchds.,suffix=,tmp_libref=CovA&comp.&look.);
%CopyFile(FileName=TEMP&comp._&look._&cat.,suffix=,tmp_libref=CovA&comp.&look.);
%CopyFile(FileName=cat,suffix=,tmp_libref=CovA&comp.&look.);

          proc datasets library=work nolist nowarn;
          DELETE &matchds.: TEMP&comp._&look._&cat. cat Predefinedpsmatchcal10_1; 
          quit;

        %end; * cat;


        %if &sub.=0 %then %do;
          *Squaring and adding variables in scores;
          DATA dplocal.&RUNID._matched_&comp._&LOOK.;
            MERGE _&matchds. (rename=Cat=StratumName)
                  dplocal.&RUNID._scores_&Comp._&look.(where=(time=&look.));*All vars including PS;
            by patid;
            format AllPts $50.;
            AllPts="1";*required for creating risk sets for analyses unadjusted for Matchid;
          RUN; 
        %end;
        %else %do;
          *adding variables from scores;
          DATA  _&matchds.0;
            MERGE _&matchds.(in=a)
                  dplocal.&RUNID._scores_&Comp._&look.(in=b where=(time=&look.));
            by patid;
            if a/* and b*/;
            format AllPts $50.;
            AllPts="1";
          RUN; 

          *If no match was possible among the patients, reset to empty line;
          %ISDATA(dataset=_&matchds.0);
          %IF %EVAL(&NOBS.=0) %THEN %DO;
            data _&matchds.0;
            set _&matchds.;
            run;
          %END;

          *need to set instead of append to avoid missing variable warns;  
           data dplocal.&RUNID._matched_&comp._&LOOK.;
           set  dplocal.&RUNID._matched_&comp._&LOOK.
                _&matchds.0 (rename=Cat=StratumName);*only incremental patients are added here;
           by Patid;
           run;
        %end;

        proc datasets nowarn nolist;
        delete _&matchds.:;
        quit;


        * if a prior look exists, update vars from scores for those matched in previous look(s) from the same run
          and append newly matched patients (this look=time);
         
        %if %sysfunc(exist(&DPLocalPointer..&RUNID._matched_&comp._&PRIOR)) %then %do;

          %if &sub.=0 %then %let where=COVARNUM in(0,.); 
          %else %let where=COVARNUM=&COVARNUM.;

          *Update event and Followuptime for those previously matched; 
          *Percentiles calculated using reestimated PS (regression is run on all patients);
          proc sort data=&DPLocalPointer..&RUNID._matched_&comp._&PRIOR. out=_Tempo1 (drop= &hdpsvariables.);
          by PatId IndexDt StudyClass;
          run;

          proc sort data=&INDATA._&look. out=_Tempo2(rename=Group=StudyClass);
          by PatId IndexDt Group;
          run;

          proc sort data=dplocal.&RUNID._DATASET_WITH_PS_&comp._&look. out=_Tempo3;
          by PatId IndexDt StudyClass;
          run;

          DATA PrevMatched;
          MERGE _Tempo1(in=a where=(time<&look. and &where.)) /*Only this covarnum*/
                _Tempo2(keep=StudyClass PatId IndexDt EventDt LastLook: Followuptime_: Event_:) /*all patients*/
                _Tempo3(keep=StudyClass PatId IndexDt &hdpsvariables.); /*all patients*/
          by PatId IndexDt StudyClass;
          if a and missing(PatId)=0;
          format AllPts $50.;
          AllPts="1";*required for creating risk sets for analyses unadjusted for Matchid;
          run;

          %ISDATA(dataset=PrevMatched);
          %IF %EVAL(&NOBS.>0) %THEN %DO;
            *Adding row for patients identified in prior look(s) to patient identified in this look;
            *Note that Matchid and PS are carried forward from the past;
            *using a data step method instead of proc append to avoid numerous warnings from 
             the arbitrary HDPS variables names across looks;
            data dplocal.&RUNID._matched_&comp._&LOOK.;
            set dplocal.&RUNID._matched_&comp._&LOOK.
                PrevMatched;
            by Patid;
            run;
          %END;
  
          proc sort nodupkey data=dplocal.&RUNID._matched_&comp._&LOOK.;
          by _ALL_;
          run;

          proc datasets library=work nolist nowarn;
          delete PrevMatched;
          quit;
         
        %end;  *&DPLocalPointer..&RUNID._matched_&comp._&PRIOR exist;


        /** TIMING TO RUN MATCHING **/
        DATA _NULL_;
          temp=DATETIME();
          seconds=temp-&STMATCH.;
          hours=int(seconds/3600);
          minutes=int((seconds-hours*3600)/60);
          seconds2=int((seconds-hours*3600-minutes*60));
          call symput('STOP',temp);
          call symput('hours',put(hours,4.0));
          call symput('minutes',put(minutes,2.0));
          call symput('seconds',put(seconds2,2.0));
        RUN;
        
        %global matchingtime;
        %let matchingtime = &hours. h &minutes. m &seconds. s ;
        %put &matchingtime.;

    /***************************************************************************/
    /** CREATE RISK SETS                                                      **/
    /***************************************************************************/

        %if %eval(&sub.=0) %then %do;  * full analysis;

          proc datasets library=msoc nolist nowarn;
          delete &RUNID._RISKSETDATA_&COMP._&LOOK.
                 &RUNID._RISKDIFFDATA_&COMP._&LOOK.
                 &RUNID._SURVIVALDATA_&COMP._&LOOK.; 
          quit;

          %let covarnum=0;
          %let Stratvar=Overall;
          %let CurrentCat = Overall;

          *At least one event;
          proc sql;
          select max(EVENT_&a_type) into :HadEvent
          from dplocal.&RUNID._matched_&Comp._&LOOK.
          where covarnum=0;
          quit;

            %let var = AllPts;
          %let varmiss = AllPts;
          %put Creating RiskSet for: &var.;  
          %ms_CreateRiskSet(file=DPlocal.&RUNID._matched_&comp._&LOOK,
                           varmiss=&varmiss.,
                            var=&var.,
                            covarnum=&COVARNUM.);
          %let var = &matchid.;
          %let varmiss = &matchid.;
          %put Creating RiskSet for: &var.;
          %ms_CreateRiskSet(file=DPlocal.&RUNID._matched_&comp._&LOOK,
                           varmiss=&varmiss.,
                            var=&var.,
                            covarnum=&COVARNUM.);

          %let Stratvar=Percentiles;
          %let CurrentCat = Percentiles;
                    %let var = percentile;
          %let varmiss = percentile;
          %put Creating RiskSet for: &var.;
          %ms_CreateRiskSet(file=DPlocal.&RUNID._matched_&comp._&LOOK,
                            varmiss=&var.,
                            var=&var.,
                            covarnum=&COVARNUM.);

         /**Unconditional Analysis**/
      %if "&unconditional"="y" %then %do;
          %let Stratvar=Overall;
              %let CurrentCat = Overall Unconditional;
          %let var =AllPts;
          %let varmiss = &matchid.;
          %put Creating RiskSet for: &var.;
          %ms_CreateRiskSet(file=DPlocal.&RUNID._matched_&comp._&LOOK,
                            varmiss=&varmiss.,
                            var=&var.,
                            covarnum=&COVARNUM.);
      %end;

          %if %eval(&HadEvent. eq 0) %then %do;
            %put "WARNING: no patients in the exposure or comparator cohort have an event";
          %end;
        %end;
        %else %do;* Sub gt 1;
/*
          proc datasets library=msoc nolist nowarn;
          delete &RUNID._RISKSETDATA_&COVARNUM._&COMP._&LOOK.; 
          quit;
*/
          %let Stratvar=&var.;

          %do cat=1 %to &numcat.;  *categories within this subgroup;

            %let CurrentCat = %scan(&CATEGORIZATION., &cat., ' ') ;
            %put &CurrentCat.;
            %put &Stratvar.;

            data ForRisk;
            set DPlocal.&RUNID._matched_&comp._&LOOK;
            where covarnum=&covarnum. and scan("&CATEGORIZATION.",&cat.,' ')=StratumName;
            run;

*FOR DEBUG OPTION;
%CopyFile(FileName=ForRisk,suffix=,tmp_libref=CovA&comp.&look.);

            %let var = AllPts;
            %let varmiss = AllPts;
            %put Creating RiskSet for: &var.;
            %ms_CreateRiskSet(file=ForRisk,
                              varmiss=&varmiss.,
                              var=&var.,
                              covarnum=&COVARNUM.);
            %let var = &matchid.;
            %let varmiss = &matchid.;
            %put Creating RiskSet for: &var.;
            %ms_CreateRiskSet(file=ForRisk,
                              var=&var.,
                              varmiss=&varmiss.,
                              covarnum=&COVARNUM.);

          /**Unconditional Analysis**/
      %if "&unconditional"="y" %then %do;
              %let CurrentCat = &CurrentCat. Unconditional;
          %let var =AllPts;
          %let varmiss = &matchid.;
          %put Creating RiskSet for: &var.;
          %ms_CreateRiskSet(file=ForRisk,
                            varmiss=&varmiss.,
                            var=&var.,
                            covarnum=&COVARNUM.);
      %end;

            proc sql;
            select max(EVENT_&a_type) into :HadEvent
            from ForRisk;
            quit;

            %if %eval(&HadEvent. eq 0) %then %do;
                %put WARNING: no patients in the exposure or comparator cohort have an event for covarnum &covarnum. and stratum &var. = &CurrentCat.;
            %end;*HadEvent;

          %end;*cat;

          proc datasets library=work nowarn nolist;
          DELETE ForRisk;
          quit;

        %end;* Subgroup analysis;


      %end; ** Loop through sub;

      proc datasets library=dplocal nolist nowarn;
      delete &RUNID._DATASET_WITH_PS_&comp._&look.;
      quit;

    %END; * if &numNewExpgroups ge 2;


    %IF &numNewExpGroups<2 %THEN %DO;
    
      %if &look = 1 or %sysfunc(exist(&DPLocalPointer..&RUNID._matched_&comp._&PRIOR)) ne 1 %then %do;
    
        DATA dplocal.&RUNID._scores_&comp._&look.;
          SET TEMP3;
          by patid;
          LABEL TIME = "INTERIM ANALYSIS NUMBER";
        RUN;
    
        DATA dplocal.&RUNID._matched_&comp._&look.;          
          SET dplocal.&RUNID._scores_&comp._&look.;
          length &matchid. $50.;
            
          &matchid.="";
          &psvar. = .;

          format cat $20.  percentile $12.;
          cat="Overall";
          percentile="";
          StratumName="Overall";
          COVARNUM=0;
          _LEVEL_=.;
        RUN;
    
      %end;
      
      %if %sysfunc(exist(&DPLocalPointer..&RUNID._matched_&comp._&PRIOR)) %then %do;

        DATA dplocal.&RUNID._scores_&comp._&look.;
          SET TEMP3;
          by patid;
          LABEL TIME = "INTERIM ANALYSIS NUMBER";
        RUN;
    
        DATA TEMP_matched;
          set dplocal.&RUNID._scores_&comp._&look.;
          where TIME = &look;
          length &matchid. $50.;
          &matchid.="";
        RUN;
    
        DATA temp_allmatched;
          SET temp_matched 
              &DPLocalPointer..&RUNID._matched_&comp._&PRIOR;
        RUN;
        
        PROC SORT DATA = temp_allmatched; 
        by patid; 
        RUN;
        
        DATA dplocal.&RUNID._matched_&comp._&LOOK;
        MERGE temp_allmatched 
              dplocal.&RUNID._scores_&comp._&look.;
        by patid;
            format cat $20. percentile $12.;
            cat="Overall";
            percentile="";
            COVARNUM=0;
            _LEVEL_=.;
        RUN;
    
      %end;  
    
    %END; *&numexposuregroups<2;

    %IF %LOWCASE("&INDLEVEL")="y" %then %do;
        /* Output dataset for MSOC without patient identifiers or covariates (other than subgroup indicators) */
        DATA msoc.&RUNID._matched_&comp._&look.;
          RETAIN studyclass age_cat sex race EVENT_&a_type FOLLOWUPTIME_&a_type &matchid;
          SET dplocal.&RUNID._matched_&comp._&LOOK;
          KEEP exposure studyclass age_cat sex race EVENT_&a_type FOLLOWUPTIME_&a_type /*eventdt*/
               &psvar. /*AGE*/AGE COMORBIDSCORE TIME YEAR LastLookFollowed
               numIP numED numIS numAV numOA numRX numGeneric numClass &matchid &keepcovars. 
               stratumname covarnum percentile;

               age=floor(age); /*only return age as an integer*/
        RUN;
    %END;

    /** GET RUN TIME **/
    DATA _NULL_;
      temp=DATETIME();
      seconds=temp-&start.;
      hours=int(seconds/3600);
      minutes=int((seconds-hours*3600)/60);
      seconds2=int((seconds-hours*3600-minutes*60));
      call symput('STOP',temp);
      call symput('hours',put(hours,4.0));
      call symput('minutes',put(minutes,2.0));
      call symput('seconds',put(seconds2,2.0));
    RUN;

    %put TOTAL RUN TIME was &hours. h &minutes. m &seconds. s;

    %global covariatetime;
    %let covariatetime =  &hours. h &minutes. m &seconds. s ;

    DATA TEMP;
      length ADJUSTMENTRUNTIME $150.;
      length HDPSRUNTIME $150.;
      length MATCHRUNTIME $150.;
      length MONITORINGPERIOD $2.;      
      ADJUSTMENTRUNTIME = "&covariatetime ";
      %IF &numNewExpGroups<2 %THEN %DO;
          HDPSRUNTIME = "N/A";
          MATCHRUNTIME = "N/A";
      %END;   
      %ELSE %DO;
          HDPSRUNTIME = "&HDPStime ";
          MATCHRUNTIME = "&matchingtime ";
      %END;
      MONITORINGPERIOD = put(&LOOK, 2.);
      COMPARISON = PUT(&comp., 2.) ;             
    RUN;


    %if %sysfunc(exist(MSOC.&RUNID._TIMING)) %then %do;

      DATA MSOC.&RUNID._TIMING;
        RETAIN MONITORINGPERIOD COMPARISON ADJUSTMENTRUNTIME HDPSRUNTIME MATCHRUNTIME;
        MERGE MSOC.&RUNID._TIMING TEMP;
        BY MONITORINGPERIOD COMPARISON ;
      RUN;

    %end ;

    %else %do ;

      DATA MSOC.&RUNID._TIMING;
        RETAIN MONITORINGPERIOD COMPARISON ADJUSTMENTRUNTIME HDPSRUNTIME MATCHRUNTIME;
        set TEMP;
        BY MONITORINGPERIOD COMPARISON ;
      RUN;

    %end ;


    *Signature file;
    data signature;
    DPID="&DPID.";
    SITEID="&SITEID.";
    MSReqID="&MSREQID.";
    MSProjID="&MSPROJID.";  
    MSWPType="&MSWPTYPE.";
    MSWPID="&MSWPID.";
    MSDPID="&MSDPID.";
    MSVerID="&MSVERID.";
    RunID="&RUNID.";
      COMP_ORDER="&comp.";
      PERIODID="&look.";
    MPNum="QRP";
    MPVer="&Ver.";
    format StartTime StopTime datetime21.2; 
    StartTime=input("&START.",best.); 
    StopTime=input("&STOP.",best.);   
    format Seconds $20.;
      Seconds=put(int(&stop.-&START.),best.)||" s";
    ExecutionTime="&hours. h &minutes. m &seconds. s";    
    Model=strip("&psvar.");
      Caliper="&CALIPER.";
      Ratio="&RATIO.";
    COMPARE_INPUT="&COMPARE_INPUT.";
    ANALYTICSUBGROUPS="&ANALYTICSUBGROUPS.";
    INDLEVEL="&INDLEVEL.";
    COVARIATES_CONSIDERED="&COVARIATES_CONSIDERED.";
    COVARIATES_SELECTED="&COVARIATES_SELECTED.";
    RANKING="&RANKING.";
    HEALTH_SERVICE_INTENSITY="&HEALTH_SERVICE_INTENSITY.";
    ZERO_CELL_CORR="&ZERO_CELL_CORR.";
    output;
    run;

    proc transpose data=signature out=msoc.&RUNID._signature_ps_&comp._&look.(rename=_NAME_=Var rename=COL1=VALUE);
       var _ALL_;
    run;


/***********************************
 CLEAN UP AND CREATE OUTPUT TABLES
***********************************/
*FOR DEBUG OPTION;
%CopyAllFiles(prefix=,suffix=,tmp_libref=CovA&Comp.&look.);

    ** Clean up work directory before next comp ** ;
    PROC DATASETS LIBRARY = WORK nolist kill; QUIT; RUN; 
    RUN;

/*    proc printto log=log;*/
/*    RUN;*/
    
    %let pstype=;
    %if "&psvar." eq "PREDEFINEDPS" %then %do;
        %let pstype=PREDEFINED;
    %end;
    %if "&psvar." eq "HDPSPREDEFINED" %then %do;
        %let pstype=HDPS and PREDEFINED;
    %end;
    %if "&psvar." eq "HDPSONLY" %then %do;
        %let pstype=HDPS ONLY;
    %end;

    %matchtables(comp = &comp., look = &look., psvar=&psvar., pstype=&pstype., matchvar=&matchid., caliper=&caliper.);  

  %continue:

  %end ; ** Loop through comparisons ;

%nextlook:
%end ; ** Loop through looks ;

%exit:

%if %sysfunc(exist(DPLocal.HDPSSettings)) %then %do;
    proc datasets library=DPLocal nowarn nolist;
    delete HDPSSettings;
    quit;
%end;

%put NOTE: ********END OF MACRO: MS_COVARIATEADJUSTMENT v2.7 ********;

%MEND MS_COVARIATEADJUSTMENT;