-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathEWS_calc.R
More file actions
64 lines (56 loc) · 2.77 KB
/
EWS_calc.R
File metadata and controls
64 lines (56 loc) · 2.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
EWS_calc <- function(df, plot_pre=FALSE){
# Get the id of the user as an integer
user <- df$user_id[1]
# remove the user_id column
df <- subset(df, select = -c(user_id))
cat("Calculating Early Warning Signals and Cumulative Complexity Peaks for user == ", user, "\n")
# remove columns with too many NAs
df <- df[, colSums(is.na(df)) < nrow(df)/3]
vars <- colnames(df)[2:length(colnames(df))]
if (plot_pre == TRUE) {
# plot the time series with missing values highlighted
op <- par(mfrow = c(ceiling(length(vars)/4),3),mar =c(2,2,2,2))
l_ply(seq_along(vars), function(c){
imputeTS::plotNA.distribution(x = as.numeric(df[,c]),
main=colnames(df)[c],
xlab = "", ylab = "")
})
par(op)
}
# Impute missing values with Classification And Regression Trees / Random Forests
# RF and CART return (identical) discrete numbers
imp.cart <- mice::mice(df[vars], method = 'cart', remove.constant = TRUE, remove.collinear = TRUE, printFlag = FALSE)
df_imp <- mice::complete(imp.cart)
if (plot_pre == TRUE) {
# Plot the timeseries with imputed values, where NAs used to be, in red
par(og_par)
for(c in c(vars)){
# cat("Classification And Regression Trees\n")
imputeTS::plotNA.imputations(x.withNA = as.numeric(df[,c]),
x.withImputations = as.numeric(df_imp[,c]),
main = paste(c,"CART"), xlab = "", ylab = "")
}
}
# put each column between 0 and 1 using elastic scaler
elasc_df <- data.frame(apply(df_imp, 2, elascer))
# dynamic complexity of the variables with the imputed data
win = 28 # here the window is set to 28 due to the slow changing nature of MS
dc <- dc_win(elasc_df, win = win, scale_min=0, scale_max=1, doPlot = FALSE, colOrder = NA)
datesIMP <- df$timestamp
ccp.caseIMP <- dc_ccp(df_win = dc, alpha_item = 0.001, alpha_time = 0.001)
if (plot_pre == TRUE){
# Plot the Complexity Resonance Diagram Plot
plotDC_res(df_win = dc, win = win, colOrder = NA,
useTimeVector = datesIMP, timeStamp = "99-01-31",
title = paste("Complexity Resonance Diagram (CART) user == ", user))
# Plot the Cumulative Complexity Peak Plot
plotDC_ccp(df_ccp = ccp.caseIMP, win = win, colOrder = NA,
useTimeVector = datesIMP, timeStamp = "99-01-31",
title = paste("Cumulative Complexity Peak Plot (CART) user == ", user))
}
user_complexity_df <- data.frame("user_id" = user,
"timestamp" = datesIMP,
"dynamic_complexity_sum" = rowSums(dc),
"complexity_peaks" = ccp.caseIMP$sig.peaks)
return(user_complexity_df)
}