Hope someone can send help for a desperate student :-) I have a set of procedure codes for which I have a different number of surgeries (here: procedures) with their respective durations. I would like to get some descriptive statistics on the durations. For that, I would like my loop to already detect and remove the outliers by IQR function. This is the code without outlier detection and removal:
# variables for output - run before each loop
Counter0<-1
Procedure_codes<-NULL
Number<-NULL
Min_Times<-NULL
Max_Times<-NULL
Average_Times<-NULL
Median_Times<-NULL
SD_Times<-NULL
#loop over all procedure codes
while(Counter0<=number_of_different_procedurecodes) {
a_g_procedures2<-NULL
Procedure_Name<-eval(list_of_procedurecodes[Counter0])
Procedure_name<-unlist(Procedure_Name)
print(Procedure_Name)
a_g_procedures2$Duration<-NULL
Durations<-NULL
number_of_procedures<-0
#Subset data for the specific procedure
a_g_procedures2<-subset(a_g_procedures1,ProcedureCode==Procedure_Name)
number_of_procedures<-length(a_g_procedures2$ProcedureCode)
Counter1<-1
#loop over specific procedure
while(Counter1<=number_of_procedures){
a_g_procedures$Duration<-NULL
TimeIn_1_Selected<-a_g_procedures2$"TimeIn_1"[Counter1]
TimeIn_1_Selected<-as.POSIXct(TimeIn_1_Selected,format="%d/%m/%Y %H:%M")
TimeIn_1_S<-as.numeric(TimeIn_1_Selected)
TimeIn_2_Selected<-a_g_procedures2$"TimeIn_2"[Counter1]
TimeIn_2_Selected<-as.POSIXct(TimeIn_2_Selected,format="%d/%m/%Y %H:%M")
TimeIn_2_S<-as.numeric(TimeIn_2_Selected)
TimeOut_Selected<-a_g_procedures2$"TimeOut"[Counter1]
TimeOut_Selected<-as.POSIXct(TimeOut_Selected,format="%d/%m/%Y %H:%M")
if (TimeIn_1_S>TimeIn_2_S) {
Start_Time<-TimeIn_2_Selected
}
if (TimeIn_1_S<=TimeIn_2_S) {
Start_Time<-TimeIn_1_Selected
}
print (Start_Time)
print(TimeOut_Selected)
Duration<-difftime(TimeOut_Selected, Start_Time, units = "mins")
Durations<-c(Durations,Duration)
Counter1<-Counter1+1
}
Procedure_codes<-c(Procedure_codes,Procedure_name)
Durations<-as.numeric(Durations)
Mean_Time<-mean(Durations, digits=1)
SD_Time<-sd(Durations,na.rm=TRUE)
Min_Time<-min(Durations, na.rm=TRUE)
Max_Time<-max(Durations, na.rm=TRUE)
Median_Time<-median(Durations, na.rm=TRUE)
Average_Times<-c(Average_Times,Mean_Time)
SD_Times<-c(SD_Times,SD_Time)
Min_Times<-c(Min_Times, Min_Time)
Max_Times<-c(Max_Times, Max_Time)
Median_Times<-c(Median_Times, Median_Time)
Number<-c(Number,number_of_procedures)
Counter0<-Counter0+1
}
ag_output<-data.frame(Procedure_codes,Number,Min_Times, Max_Times, Average_Times, Median_Times, SD_Times)
This is what I would have liked to add to the loop over specific procedure:
Q<-quantile(Duration, probs=c(.25,.75), na.rm=FALSE)
iqr<-IQR(Duration)
up<-Q[2]+1.5*iqr
low<-Q[1]-1.5*iqr
remove<-Duration>(Q[1]-1.5*iqr) & Durations<(Q[1]-1.5*iqr)
setdiff(Duration, remove)
Does somebody have an idea how I could do this?
Thank you very much in advance!