These are some useful R functions for wrangling data. The functions include: aggregate, apply, bash engine, c (concatenate), colnames, ddply (for row counts), drop_na, filter, flags, grepl, group_by, gsub, ifelse, is.na, join, left_join, paste, rbind, read_excel, save, select, separate, separate_rows, sort, spread, substr, summarise, t.test, unique, write.csv, ...
df <- aggregate(df$varToFunctionalize, by df["varToGroup"], FUN = mean)
apply(df[1:3], 2, function(x) (min(x) + max(x)) / 2) # 2 is for col, 1 is by row
midrange <- function(x) (min(x) + max(x)) / 2
apply(df[1:3], 2, midrange)
apply(df[1:3], 2, summary) # this forms a matrix
sapply(df, function(x) round(coef(lm(df[1] ~ x, data = df)), 3) # table of coefficients for linear regressions on variables in col2,3 against df col1
by(df[1:3], df$CategoricalVariable, summary)
bash engine='bash'
colnames(df)[names(df) == "varName"] <- "newVarName"
rowCountForVar <- plyr::ddply(df, .(df$groupingVar, df$varToCount), nrow)
dfRmNa <- drop_na(df, "Var")
AE <- read_csv("ae.csv")
DT_AE <- as.data.table(AE)
CONDENSED_AE <- DT_AE[,which(unlist(lapply(DT_AE, function(x)!all(is.na(x))))),with=F]
df <- filter(df, df$\`Variable Name\` == "Value")
df <- filter(df, !grepl('ID1|ID2|ID3', idVar))
d.filt <- filter(d, d$Gene == "GoI")
t.test(d.filt$IC50~d.filt$Mutated)
df$`Consolidated Vars` <- ifelse((df$"Var to Consolidate" == "high") | (df$"Var to Consolidate" == "very high"), "Responding Patient", df$"Var to Consolidate")
df <- filter(df, !grepl('ID1|ID2|ID3',idVar))
df <- df %>%
group_by(groupVar) %>%
summarise_each(funs(min(resultVar, na.rm = TRUE)))
df$`Some Variable` <- gsub(",", "", df$`Some Variable`)
df$`newVariable` <- ifelse(is.na(df$variable), FALSE, TRUE)
df <- left_join(df, altDf, by = c("Var1" = "Var1", "Var2" = "Var2"))
df$newVar <- paste("Year", df$year, "Month", df$month, "Day", df$`The Day`)
dfBound <- rbind(df1, df2)
df <- read_excel("/Path/to/file.xlsx")
save(df, file = "df.RData")
dfSvelte <- select(dfSvelte, 1:42,45)
df <- df[,c("Col 1", "Col 2", "Col 3")]
df <- separate(df, `Var Name`, c("Token 1", "Token 2", "Token 3"), sep = "_")
dfSplit <- separate_rows(df, delimVar1, delimVar2, delimVar3, delimVar4, sep = ";", convert = FALSE)
sort -k1,1 /Users/jlong/file.txt > /Users/jlong/file.sorted.txt
dfWide <- dfLong %>%
spread(`Row Name Var`, `Result To Spread`, convert = TRUE)
df$`The Var` <- substr(as.character(df$`The Var`), index1, index2)
anotherDf <- df %>%
group_by(groupVar) %>%
summarise(df = mean(result))
table(df[1])
d.filt <- filter(d, d$Gene == "GoI")
t.test(d.filt$IC50~d.filt$Mutated)
df <- unique(df)
stringr::str_to_upper("anonymous function")
write.csv(df, "Filename.csv", row.names=FALSE, na="")