*----------------------------------------------------------------* | Purpose: To list N highest and lowest values (approximately) | | Arguments: DSN - Data set name (one- or two-level | | VAR - Variable to test | | N - Number of highest and lowest values | | IDVAR - ID variable | | Example: %HI_LOW_N(CLEAN.PATIENTS,SBP,10,PATNO) | *----------------------------------------------------------------*; %MACRO HI_LOW_N(DSN,VAR,N,IDVAR); ***Find the number of observations in data set; %LET DSID = %SYSFUNC(OPEN(&DSN)); %LET N_OBS = %SYSFUNC(ATTRN(&DSID,NOBS)); %LET RETURN = %SYSFUNC(CLOSE(&DSID)); ***Compute number of groups, from N and N_OBS; %LET GRP = %SYSEVALF(&N_OBS / &N,FLOOR); ***Continue as in the macro based on percents; %LET TOP = %EVAL(&GRP - 1); PROC FORMAT; VALUE RNK 0='Low' &TOP='High'; RUN; PROC RANK DATA=&DSN OUT=NEW GROUPS=&GRP; VAR &VAR; RANKS RANGE; RUN; ***Sort and keep top and bottom n%; PROC SORT DATA=NEW (WHERE=(RANGE IN (0,&TOP))); BY &VAR; RUN; ***Produce the report; PROC PRINT DATA=NEW; TITLE "Approximate Highest And Lowest &N Values for %UPCASE(&VAR)"; ID &IDVAR; VAR RANGE &VAR; FORMAT RANGE RNK.; RUN; PROC DATASETS LIBRARY=WORK NOLIST; DELETE NEW; RUN; QUIT; %MEND HI_LOW_N;