*-------------------------------------------------------------------* | Purpose: To identify outliers based on n standard deviations | | from the mean, computed from the middle 50% of the data. | | Arguments: DSN - Data set name | | VAR - Numeric variable to be checked | | IDVAR - ID variable name | | N_SD - The number of standard deviation units you | | would specify if the data values were not | | trimmed. | | | | EXAMPLE: %SD_TRIM(CLEAN.PATIENTS,HR,PATNO,2) | *-------------------------------------------------------------------*; %MACRO SD_TRIM(DSN,VAR,IDVAR,N_SD); TITLE1 "Outliers for Variable &VAR Data Set &DSN"; TITLE2 "Based on &N_SD Standard Deviations Estimated from Trimmed (50%)Data"; PROC RANK DATA=&DSN OUT=TMP GROUPS=4; VAR &VAR; RANKS R; RUN; PROC MEANS DATA=TMP NOPRINT; WHERE R IN (1,2); ***The middle 50%; VAR &VAR; OUTPUT OUT=MEANS(DROP=_TYPE_ _FREQ_) MEAN=M STD=S; RUN; DATA _NULL_; FILE PRINT; SET &DSN; IF _N_ = 1 THEN SET MEANS; IF &VAR LT M - &N_SD*S*2.65 AND &VAR NE . OR &VAR GT M + &N_SD*S*2.65 THEN PUT &IDVAR= &VAR=; RUN; PROC DATASETS LIBRARY=WORK NOLIST; DELETE MEANS; DELETE TMP; RUN; QUIT; %MEND SD_TRIM;