rm(list = ls(all.names = TRUE)) #will clear all objects includes hidden objects.
gc() #free up memrory and report the memory usage.
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 421808 22.6 878077 46.9 654177 35.0
## Vcells 815118 6.3 8388608 64.0 1770539 13.6
The following libraries and default settings were used during the analysis:
options(scipen = 999)
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install("survcomp")
library(tidyverse)
library(broom)
library(eNetXplorer)
library(glmnet)
library(olsrr)
library(caret)
library(pander)
library(tidymodels)
library("cowplot")
##parallel map
#library(parallel)
library("furrr")
future::plan(multiprocess(workers = 16))
theme_set(theme_bw() + theme(panel.grid = element_blank()))
We first loaded all of the relevant data files (not shown here as they refer to local directories):
MRFINDINGS01 <-read.csv(paste0(dataFold, "ABCD_MRFINDINGS01_DATA_TABLE.csv")) %>%
filter(EVENTNAME =="baseline_year_1_arm_1")
MRIQCRP102 <-read.csv(paste0(dataFold, "MRIQCRP102_DATA_TABLE.csv")) %>%
filter(EVENTNAME =="baseline_year_1_arm_1")
MRIQCRP202 <-read.csv(paste0(dataFold, "MRIQCRP202_DATA_TABLE.csv")) %>%
filter(EVENTNAME =="baseline_year_1_arm_1")
MRIQCRP302 <-read.csv(paste0(dataFold, "MRIQCRP302_DATA_TABLE.csv")) %>%
filter(EVENTNAME =="baseline_year_1_arm_1")
FREESQC01 <-read.csv(paste0(dataFold, "FREESQC01_DATA_TABLE.csv")) %>%
filter(EVENTNAME =="baseline_year_1_arm_1")
DMRIQC01 <-read.csv(paste0(dataFold, "DMRIQC01_DATA_TABLE.csv")) %>%
filter(EVENTNAME =="baseline_year_1_arm_1")
NBackBeh <-read.csv(paste0(dataFold, "ABCD_MRINBACK02_DATA_TABLE.csv")) %>%
filter(EVENTNAME =="baseline_year_1_arm_1")
NBackAparc <-read.csv(paste0(dataFold, "NBACK_BWROI02_DATA_TABLE.csv")) %>%
filter(EVENTNAME =="baseline_year_1_arm_1")
NbackAsegDest <-read.csv(paste0(manipuFold, "NbackDestAsegReadableGgseg3d.csv"))
# NbackAsegDestR1 <-read.csv(paste0(manipuFold, "NbackDestAsegReadableGgseg3dRunOne.csv"))
# NbackAsegDestR2 <-read.csv(paste0(manipuFold, "NbackDestAsegReadableGgseg3dRunTwo.csv"))
MRIinfo <-tbl_df(read.csv(paste0(dataFold, "ABCD_MRI01_DATA_TABLE.csv"))) %>%
filter(EVENTNAME =="baseline_year_1_arm_1")
Siteinfo <-tbl_df(read.csv(paste0(dataFold, "ABCD_LT01_DATA_TABLE.csv"))) %>%
filter(EVENTNAME =="baseline_year_1_arm_1")
NIH_TB <-tbl_df(read.csv(paste0(dataFold,"ABCD_TBSS01_DATA_TABLE.csv"))) %>%
filter(EVENTNAME =="baseline_year_1_arm_1")
LittleMan <-tbl_df(read.csv(paste0(dataFold,"LMTP201_DATA_TABLE.csv"))) %>%
filter(EVENTNAME =="baseline_year_1_arm_1")
Pearson <-tbl_df(read.csv(paste0(dataFold,"ABCD_PS01_DATA_TABLE.csv"))) %>%
filter(EVENTNAME =="baseline_year_1_arm_1")
short_names <- tbl_df(read.csv(paste0(anotherFold,"ShortNames_all.csv") ))
MRIQcAll <- plyr::join_all(list(MRFINDINGS01,MRIQCRP102,
MRIQCRP202,MRIQCRP302,FREESQC01,DMRIQC01,
NBackBeh,NBackAparc,NbackAsegDest,MRIinfo,Siteinfo,NIH_TB,LittleMan,Pearson),
by='SUBJECTKEY', type='full')
MRIQcAll <- MRIQcAll[,!duplicated(colnames(MRIQcAll))]
Next, we included only the participants that passed the following QC:
MRIQcAll$NoIncidental <- ifelse((MRIQcAll$MRIF_SCORE== 3 |
MRIQcAll$MRIF_SCORE== 4 |
MRIQcAll$MRIF_HYDROCEPHALUS == "yes"|
MRIQcAll$MRIF_HERNIATION == "yes"), 0, 1)
count(MRIQcAll,NoIncidental)
## NoIncidental n
## 1 0 451
## 2 1 11359
## 3 NA 65
MRIQcAll %>% count(IQC_T1_OK_SER)
## IQC_T1_OK_SER n
## 1 0 63
## 2 1 10553
## 3 2 870
## 4 3 97
## 5 NA 292
MRIQcAll %>% count(FSQC_QC)
## FSQC_QC n
## 1 0 462
## 2 1 11076
## 3 NA 337
MRIQcAll$T1FreeSurferQCOk <- ifelse((MRIQcAll$IQC_T1_OK_SER > 0 &
MRIQcAll$FSQC_QC == 1), 1, 0)
count(MRIQcAll,T1FreeSurferQCOk)
## T1FreeSurferQCOk n
## 1 0 524
## 2 1 11004
## 3 NA 347
MRIQcAll %>% count(IQC_NBACK_OK_SER>0)
## IQC_NBACK_OK_SER > 0 n
## 1 FALSE 143
## 2 TRUE 10045
## 3 NA 1687
MRIQcAll %>% count(TFMRI_NBACK_BEH_PERFORMFLAG==1)
## TFMRI_NBACK_BEH_PERFORMFLAG == 1 n
## 1 FALSE 1464
## 2 TRUE 8004
## 3 NA 2407
MRIQcAll %>% count(TFMRI_NBACK_ALL_BETA_DOF>200)
## TFMRI_NBACK_ALL_BETA_DOF > 200 n
## 1 FALSE 33
## 2 TRUE 8821
## 3 NA 3021
MRIQcAll$NbackBehDofOk <- ifelse((MRIQcAll$IQC_NBACK_OK_SER>0 &
MRIQcAll$TFMRI_NBACK_BEH_PERFORMFLAG ==1 &
MRIQcAll$TFMRI_NBACK_ALL_BETA_DOF>200), 1, 0)
count(MRIQcAll,NbackBehDofOk)
## NbackBehDofOk n
## 1 0 1602
## 2 1 7439
## 3 NA 2834
MRIQcAll$AllNbackQc <- ifelse((MRIQcAll$NoIncidental == 1 &
MRIQcAll$T1FreeSurferQCOk == 1 &
MRIQcAll$NbackBehDofOk == 1), 1, 0)
count(MRIQcAll,AllNbackQc)
## AllNbackQc n
## 1 0 2399
## 2 1 6947
## 3 NA 2529
Nback.QCed <- MRIQcAll %>% filter(AllNbackQc == 1)
There was an issue was reported with the Philips scanners and it was recommended that the data from these scanners should be dropped. We did so:
Nback.QCed %>% count(MRI_INFO_MANUFACTURER)
## MRI_INFO_MANUFACTURER n
## 1 GE MEDICAL SYSTEMS 1353
## 2 Philips Medical Systems 868
## 3 SIEMENS 4726
#remove phil and add beh during fMRI
Nback.QCedNoPhil <- Nback.QCed %>%
filter(MRI_INFO_MANUFACTURER != 'Philips Medical Systems')
#check how many variables in X2backVS0back and the list of ROIs
Nback.2backVS0back <- Nback.QCedNoPhil%>% select(.,starts_with("X2backvs0back"))
#colnames(Nback.2backVS0back)
Here are a list of responsevariable names which are used in the later analysis. Both short names ang long names are used in plotting.
Resp_Var <- c('TFMRI_NB_ALL_BEH_C2B_RATE',
"NIHTBX_PICVOCAB_UNCORRECTED",
"NIHTBX_FLANKER_UNCORRECTED",
"NIHTBX_LIST_UNCORRECTED",
"NIHTBX_CARDSORT_UNCORRECTED",
"NIHTBX_PATTERN_UNCORRECTED",
"NIHTBX_PICTURE_UNCORRECTED",
"NIHTBX_READING_UNCORRECTED",
"LMT_SCR_PERC_CORRECT",
"PEA_RAVLT_LD_TRIAL_VII_TC",
"PEA_WISCV_TRS")
resp_var_plotting_long <- c("2-back working memory",
"Picture vocabulary test",
"Flanker test",
"List sorting working memory",
"Dimentional change card sort test",
"Pattern comparison processing speed test",
"Picture sequence memory test",
"Oral reading recognition test",
"Little man task correct percentage",
"RAVLT long delay trial VII total correct",
"WISC_V matrix reasoning total raw score"
)
resp_var_plotting_short <- c("2-back Work Mem","Pic Vocab","Flanker","List Work Mem","Cog Flex","Pattern Speed","Seq Memory","Reading Recog","Little Man","Audi Verbal","Matrix Reason")
resp_var_plotting <- tibble("response" = all_of(Resp_Var), "longer_name"=resp_var_plotting_long,"short_name"=resp_var_plotting_short)
subj_info <- c('SUBJECTKEY', 'MRI_INFO_DEVICESERIALNUMBER', 'SITE_ID_L')
data_all_average <- Nback.QCedNoPhil %>%
select(SUBJECTKEY, all_of(subj_info), all_of(Resp_Var),
starts_with('X2backvs0back')) %>%
rename_at(vars(-all_of(subj_info),-all_of(Resp_Var)),
~ str_replace(., 'X2backvs0back_ROI_', 'roi_'))
### checking whether the short names and ROI names in the data are the same
name_check <- which(short_names$roi != str_remove(names(select(data_all_average,starts_with("roi_"))),"roi_"))
print(name_check)
## integer(0)
new_shorter_names <- short_names
new_shorter_names$roiShort[97] <- "R Subcentral"
new_shorter_names$roiShort <- str_squish(string = new_shorter_names$roiShort)
#new_shorter_names$roiShort <- str_replace_all(string = new_shorter_names$roiShort, pattern = "\t", replacement = "")
### change ROI names to shorter ones (does not compat with formulas so not used)
#data_all_average <- data_all_average %>% rename_at(vars(starts_with("roi_")),~ new_shorter_names$roiShort)
We also performed listwise deletion and dropped all participants for whom either the behavioral performance or the activation across some brain area was greater than 3 * IQR (interquartile range).
data_all_listwise <- data_all_average %>%
drop_na()
IQR_remove <- function(data_split){
data_split%>%
mutate_at(vars(- all_of(subj_info)), ~ ifelse(
.x > quantile(.x, na.rm = TRUE)[4] + 3 * IQR(.x, na.rm = TRUE) |
.x < quantile(.x, na.rm = TRUE)[2] - 3 * IQR(.x, na.rm = TRUE),
NA, .x)) %>%
drop_na() %>%
mutate_if(is.double, ~ (.x - mean(.x)) / sd(.x))
}
n_all <- nrow(data_all_average)
n_listwise <- nrow(data_all_listwise)
n_diff_all_listwise <- nrow(data_all_average) - nrow(data_all_listwise)
The outliers are removed with respect to training and testing data set. So no count of columns is displayed here.
Next we checked the number of participants across sites and scanners: Note that site 22 and site08 have fewer than 100 participants whcn IQR rules were applied.
nrow(data_all_listwise)
## [1] 5668
#26 scanners but 8 have fewer than 100 participants (as low as 3)
data_all_listwise %>% count(MRI_INFO_DEVICESERIALNUMBER) %>%
summarize(`Number of scanners`=n()) %>%
pander(split.cell = 80, split.table = Inf, justify = 'left')
Number of scanners |
---|
26 |
data_all_listwise %>%
count(MRI_INFO_DEVICESERIALNUMBER) %>%
arrange(n) %>%
rename(`Scanner ID` = MRI_INFO_DEVICESERIALNUMBER, `Number of participants` = n) %>%
pander(split.cell = 80, split.table = Inf, justify = 'left')
Scanner ID | Number of participants |
---|---|
HASHe76e6d72 | 6 |
HASH48f7cbc3 | 20 |
HASH31ce566d | 32 |
HASHe3ce02d3 | 40 |
HASH7f91147d | 59 |
HASH69f406fa | 77 |
HASHfeb7e81a | 84 |
HASHc9398971 | 129 |
HASH5b2fcf80 | 142 |
HASH4b0b8b05 | 169 |
HASH7911780b | 174 |
HASHa3e45734 | 191 |
HASH65b39280 | 193 |
HASH03db707f | 211 |
HASH311170b9 | 221 |
HASH4d1ed7b1 | 231 |
HASHc3bf3d9c | 243 |
HASHd422be27 | 273 |
HASHd7cb4c6d | 294 |
HASHb640a1b8 | 305 |
HASHe4f6957a | 342 |
HASH11ad4ed5 | 350 |
HASH5b0cf1bb | 359 |
HASH1314a204 | 370 |
HASH96a0c182 | 371 |
HASH3935c89e | 782 |
#19 sites, but 2 sites are fewer than 100 -> use sites?
data_all_listwise %>%
count(SITE_ID_L) %>%
summarize(`Number of sites`=n()) %>%
pander(split.cell = 80, split.table = Inf, justify = 'left')
Number of sites |
---|
19 |
data_all_listwise %>%
count(SITE_ID_L) %>%
arrange(n) %>%
rename(`Site ID` = SITE_ID_L, `Number of participants` = n) %>%
pander(split.cell = 80, split.table = Inf, justify = 'left')
Site ID | Number of participants |
---|---|
site22 | 20 |
site08 | 143 |
site15 | 174 |
site18 | 191 |
site07 | 193 |
site11 | 211 |
site05 | 221 |
site09 | 230 |
site04 | 253 |
site21 | 311 |
site13 | 320 |
site10 | 334 |
site03 | 359 |
site02 | 370 |
site06 | 371 |
site12 | 374 |
site20 | 402 |
site14 | 409 |
site16 | 782 |
# Remove site 22 and 8
data_all_listwise <- data_all_listwise %>%
filter(SITE_ID_L != 'site22' & SITE_ID_L != 'site08')
Most are normally distributed.
#Look at distribution of NIHTBX_LIST_UNCORRECTED (i.e., working memory from Nback)
resp_names <-data_all_listwise %>% select(all_of(Resp_Var))%>%
names()%>%
set_names()
density_plot_grid <- resp_names %>%
map(~ggplot(data_all_listwise,aes(x=.data[[.]]))
+stat_function(fun=dnorm,
color="skyblue", size = 1.5,
args=list(mean=mean(data_all_listwise$.),
sd=sd(data_all_listwise$.))) +
geom_density() +
labs(x = NULL, y =NULL,title = resp_var_plotting$short_name[[which(resp_var_plotting$response==.)]])
)
title_density_plot <- ggdraw() +
draw_label(
"Density plots of all the Cognitive Performance Variables",
fontface = 'bold',
x = 0,
hjust = 0
) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0, 0, 0, 7)
)
plot_grid(title_density_plot,plot_grid(plotlist = density_plot_grid),nrow = 2 , rel_heights = c(0.1, 1))
To create the training and test data, we randomly sampled 75% of participants and assigned them to the training set. The rest of the participants were assigned to hold-out test data.
set.seed(123456)
data_vfold =vfold_cv(data = data_all_listwise, v=4,repeats = 1)
Split the data for the cross validation grouped by site. Use one site as holdout and the other sites for training
unique_site <- length(unique(data_all_listwise$SITE_ID_L))
site_vfold <- group_vfold_cv(data = data_all_listwise, group = "SITE_ID_L", v= unique_site)
## the following two lines take the split results and transform it into a data frame
## so that all the results can be checked.
#model_data_1 <- site_vfold$splits[[1]]%>% analysis()
#holdout__data_1 <- site_vfold$splits[[1]] %>% assessment()
## splits will be the `rsplit` object with the 90/10 partition
holdout_results <- function(.x,splits, ...) {
# Fit the model to the 75%
mod <- lm(..., data = analysis(splits)%>% IQR_remove())
# Save the 25%
holdout <- assessment(splits)%>% IQR_remove()
# `augment` will save the predictions with the holdout data set
res <- broom::augment(mod, newdata = holdout)
preds <- predict(mod, newdata = holdout)
performance <- cor(preds, holdout[[.x]])
slope <- mod %>% broom::tidy() %>%
filter(term != '(Intercept)') %>%
rename(roi = term)
slope_corr <- slope %>% bind_cols(performance = performance)
slope_corr
}
resp_result <- function(.x){
x=.x
formulas <- paste0(x ,' ~ ', colnames(select(data_all_listwise,-all_of(Resp_Var),-all_of(subj_info))))
results_test_simple <-map(formulas, ~ holdout_results(.x=x,splits = data_vfold$splits[[1]],...)) %>% bind_rows() %>%
mutate(FDR = p.adjust(p.value, method = 'fdr'),
bonferroni= p.adjust(p.value, method = 'bonferroni'))
return(results_test_simple)
}
simple_all_IQR <- map(.x=resp_names,~resp_result(.x))
longer_all <-resp_names %>% map(.,~pivot_longer(simple_all_IQR[[.]],cols = c("bonferroni","FDR", "p.value"),names_to="p_val_type",values_to="p_vals"))
##To plot all the density of all the areas vurses significant areas after two corrections
##I set all the uncorrected p value to 0 to pass the screening in the plotting function
longer_all <-resp_names %>%map(.,~mutate(longer_all[[.]],plt_val = ifelse(p_val_type=="p.value",0,p_vals)))
longer_all <-resp_names %>%map(.,~mutate(longer_all[[.]],plt_type = ifelse(p_val_type=="p.value","All",ifelse(p_val_type=="FDR","FDR","Bonferroni"))))
sig_all <- longer_all %>%map(.,~filter(.,plt_val < 0.05)%>%mutate(.,plt_type= as.factor(plt_type))%>%
mutate(.,plt_type = factor(.$plt_type,levels =c ("All","FDR","Bonferroni"))))
simple_plot_list <- resp_names %>%map(.,~ggplot(sig_all[[.]],aes(performance,color=plt_type, fill=plt_type)) +
geom_histogram(binwidth = 0.01, position = 'identity',alpha=0.5) +
scale_x_continuous(limits = c(-0.1, 0.25)) +
scale_y_continuous(limits = c(0, 30)) +
labs(x = NULL, fill = NULL, y = NULL,
title = resp_var_plotting$short_name[[which(resp_var_plotting$response==.)]])+
#scale_color_manual(values = c("darkgoldenrod1","brown2","cyan3"))+
#scale_fill_manual(values = c("darkgoldenrod1","brown2","cyan3"))+
guides(color=FALSE)
)
title_simple_plot <- ggdraw() +
draw_label(
"Out-of-sample Mass-Univariate Predictive Ability (Pearson's r)",
fontface = 'bold',
x = 0,
hjust = 0
) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0, 0, 0, 7)
)
ggpubr::ggarrange(title_simple_plot,ggpubr::ggarrange(plotlist=simple_plot_list, common.legend = TRUE),nrow = 2 , heights = c(0.1, 1))
n_signif_fdr <- resp_names %>% map(.,~filter(simple_all_IQR[[.]],FDR < 0.05))
n_signif_fdr <- resp_names %>% map(.,~count(n_signif_fdr[[.]]))%>%do.call(rbind,.)
n_signif_bonf <- resp_names %>% map(.,~filter(simple_all_IQR[[.]], bonferroni< 0.05))
n_signif_bonf <- resp_names %>% map(.,~count(n_signif_bonf[[.]]))%>%do.call(rbind,.)
n_sig_all <- data.frame("var_name"=resp_var_plotting$short_name,"FDR_total"=n_signif_fdr$n,"BONF_total"=n_signif_bonf$n)%>% as_tibble()%>%print()
## # A tibble: 11 x 3
## var_name FDR_total BONF_total
## <chr> <int> <int>
## 1 2-back Work Mem 97 69
## 2 Pic Vocab 99 55
## 3 Flanker 59 25
## 4 List Work Mem 86 54
## 5 Cog Flex 63 40
## 6 Pattern Speed 43 14
## 7 Seq Memory 57 28
## 8 Reading Recog 85 46
## 9 Little Man 75 41
## 10 Audi Verbal 52 31
## 11 Matrix Reason 79 51
For the n-back behavioral performance, With FDR adjustment, there were 97 significant ROIs out of the 167 total. With Bonferroni adjustment, there were 69 significant ROIs.
Plotting all significant areas for mass univariate with 2SE
sig_all_wider_FDR <- resp_names %>% map(.,~filter(simple_all_IQR[[.]],FDR < 0.05))
sig_all_wider_FDR_rename <- resp_names %>% map(.,~mutate(sig_all_wider_FDR[[.]],roi = str_remove(roi, 'roi_')))
sig_all_wider_FDR_rename <- sig_all_wider_FDR_rename %>% map(.,~left_join( .,new_shorter_names,by="roi"))
roi_num_simple_FDR <- sig_all_wider_FDR_rename %>% map(.,~dim(.)[1])
max_roi_simple_FDR <- max(as.numeric(roi_num_simple_FDR))
sig_all_wider_FDR_rename <- resp_names %>% map(.,~mutate(sig_all_wider_FDR_rename[[.]],direction = ifelse(sig_all_wider_FDR_rename[[.]]$estimate >= median(sig_all_wider_FDR_rename[[.]]$estimate)|roi_num_simple_FDR[[.]] <= floor(max_roi_simple_FDR/2),"big","small")))
resp_names %>% map(~ggplot(sig_all_wider_FDR_rename[[.]],aes(x = fct_reorder(roiShort, estimate), y = estimate,
ymin = estimate - 2 * std.error, ymax = estimate + 2 * std.error)) +
geom_hline(yintercept = 0, linetype = 'dashed', col = 'grey60') +
geom_pointrange(fatten = 1.5, col = 'grey60') +
coord_flip() +
guides(colour = guide_legend(override.aes = list(size = 2.5)))+
labs(x = 'Brain Regoins that passed FDR', y=NULL,
title =paste0("Mass-Univariate Parameter Estimates (Training Set)\n",resp_var_plotting$longer_name[[which(resp_var_plotting$response==.)]]) ) +
facet_wrap(~direction, scales = "free_y" ) +
theme_bw() +
#theme(axis.text.y = element_text(angle = 15)) +
theme(legend.title=element_blank()) +
theme(legend.position = "top") +
theme(
axis.title.x = element_text(size = 20),
axis.text.x = element_text(size = 10),
axis.title.y = element_text(size = 10),
axis.text.y = element_text(size = 10),
legend.text = element_text(size = 10)) +
theme(
strip.background = element_blank(),
strip.text.x = element_blank()
)
)
## $TFMRI_NB_ALL_BEH_C2B_RATE
##
## $NIHTBX_PICVOCAB_UNCORRECTED
##
## $NIHTBX_FLANKER_UNCORRECTED
##
## $NIHTBX_LIST_UNCORRECTED
##
## $NIHTBX_CARDSORT_UNCORRECTED
##
## $NIHTBX_PATTERN_UNCORRECTED
##
## $NIHTBX_PICTURE_UNCORRECTED
##
## $NIHTBX_READING_UNCORRECTED
##
## $LMT_SCR_PERC_CORRECT
##
## $PEA_RAVLT_LD_TRIAL_VII_TC
##
## $PEA_WISCV_TRS
## print the summary statistics for rois passed FDR correction
## maximum
max_FDR <-resp_names %>% map(., ~sig_all_wider_FDR_rename[[.]][which(sig_all_wider_FDR_rename[[.]]$performance==max(sig_all_wider_FDR_rename[[.]]$performance)),]%>% select(.,-roi,-direction)) %>% do.call(rbind,.)%>%select("estimate", "performance","FDR","bonferroni","roiShort")%>%print()
## # A tibble: 11 x 5
## estimate performance FDR bonferroni roiShort
## <dbl> <dbl> <dbl> <dbl> <chr>
## 1 0.260 0.245 2.43e-47 4.85e-47 R Precuneus
## 2 0.165 0.159 2.61e-18 1.30e-17 R Middle Frontal G
## 3 0.109 0.142 7.97e- 8 3.19e- 7 R Sulcus Intermedius Primus
## 4 0.170 0.155 4.62e-20 1.85e-19 L Intra/Transv Parietal S
## 5 -0.0664 0.157 1.04e- 3 4.04e- 2 L Planum Polare of STG
## 6 -0.0497 0.137 3.30e- 2 1.00e+ 0 R Subcentral
## 7 0.0924 0.170 2.63e- 6 3.68e- 5 R Precuneus
## 8 0.176 0.156 6.89e-21 1.38e-20 L Intra/Transv Parietal S
## 9 0.122 0.144 2.89e-10 3.18e- 9 L Middle Frontal G
## 10 -0.148 0.0953 2.23e- 2 9.94e- 1 L Planum Polare of STG
## 11 -0.253 0.156 2.73e- 4 1.26e- 2 L Postcentral G
##minimum
min_FDR <-resp_names %>% map(., ~sig_all_wider_FDR_rename[[.]][which(sig_all_wider_FDR_rename[[.]]$performance==min(sig_all_wider_FDR_rename[[.]]$performance)),]%>% select(.,-roi,-direction)) %>% do.call(rbind,.)%>%select("estimate", "performance","FDR","bonferroni","roiShort")%>%print()
## # A tibble: 11 x 5
## estimate performance FDR bonferroni roiShort
## <dbl> <dbl> <dbl> <dbl> <chr>
## 1 0.0414 -0.0581 0.0418 1 L Inf Occipital
## 2 0.0423 -0.0292 0.0361 1 L Supramarginal G
## 3 -0.0453 -0.0378 0.0417 1 R Inf Temporal G
## 4 0.0425 -0.0461 0.0397 1 R Med Orbital S
## 5 0.0508 -0.0479 0.0148 0.845 R Subparietal S
## 6 0.0529 -0.0125 0.0210 0.609 L Opercular of IFG
## 7 -0.0451 -0.00509 0.0375 1 R Suborbital S
## 8 0.0482 -0.0470 0.0185 1 L Inf Occipital
## 9 -0.0436 -0.0180 0.0411 1 L Transv Temporal S
## 10 -0.137 -0.0297 0.0439 1 R Accumben
## 11 0.159 -0.0379 0.0275 1 R Supramarginal G
##median
median_FDR <-resp_names %>% map(., ~sig_all_wider_FDR_rename[[.]][which(sig_all_wider_FDR_rename[[.]]$performance==median(sig_all_wider_FDR_rename[[.]]$performance)),]%>% select(.,-roi,-direction)) %>% do.call(rbind,.)%>%select("estimate", "performance","FDR","bonferroni","roiShort")%>%print()
## # A tibble: 9 x 5
## estimate performance FDR bonferroni roiShort
## <dbl> <dbl> <dbl> <dbl> <chr>
## 1 -0.0710 0.104 0.000279 0.0184 R Subcallosal G
## 2 -0.0679 0.0781 0.000696 0.0376 R Amygdala
## 3 0.0593 0.0625 0.00590 0.177 R Superior Parietal G
## 4 -0.0507 0.0673 0.0164 0.998 R Med Occi-temp/Lingual S
## 5 -0.0519 0.0547 0.0238 0.761 L Ant Transv Temporal G
## 6 -0.0584 0.0748 0.00634 0.228 R Subcentral
## 7 0.0637 0.0708 0.00144 0.0721 Brain Stem
## 8 0.0665 0.0663 0.00120 0.0492 L Ventral DC
## 9 0.238 0.0700 0.000595 0.0292 Brain Stem
## print the mean parameter estimation and performance that passed FDR corredtion
FDR_mean_est <- resp_names %>% map(.,~mean(sig_all_wider_FDR_rename[[.]]$estimate))%>%do.call(rbind,.)
FDR_mean_perform <- resp_names %>% map(.,~mean(sig_all_wider_FDR_rename[[.]]$performance))%>%do.call(rbind,.)
FDR_sd_est <- resp_names %>% map(.,~sd(sig_all_wider_FDR_rename[[.]]$estimate))%>%do.call(rbind,.)
FDR_sd_perform <- resp_names %>% map(.,~sd(sig_all_wider_FDR_rename[[.]]$performance))%>%do.call(rbind,.)
FDR_print <- data.frame("var_name_FDR"=resp_var_plotting$short_name,"Mean_estimation"=FDR_mean_est,"SD_Estimation"=FDR_sd_est,"Mean_Performance"=FDR_mean_perform,"SD_performance"=FDR_sd_perform)%>% as_tibble()%>%print()
## # A tibble: 11 x 5
## var_name_FDR Mean_estimation SD_Estimation Mean_Performance SD_performance
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2-back Work Mem 0.0495 0.112 0.0966 0.0692
## 2 Pic Vocab 0.0154 0.0896 0.0729 0.0400
## 3 Flanker 0.0342 0.0630 0.0629 0.0370
## 4 List Work Mem 0.0335 0.0845 0.0784 0.0392
## 5 Cog Flex 0.0202 0.0804 0.0608 0.0496
## 6 Pattern Speed 0.0237 0.0609 0.0572 0.0348
## 7 Seq Memory 0.0545 0.0503 0.0714 0.0409
## 8 Reading Recog 0.0362 0.0874 0.0671 0.0422
## 9 Little Man 0.0238 0.0807 0.0664 0.0322
## 10 Audi Verbal 0.163 0.154 0.0349 0.0316
## 11 Matrix Reason 0.189 0.294 0.0707 0.0452
sig_all_wider_bonf <- resp_names %>% map(.,~filter(simple_all_IQR[[.]],bonferroni < 0.05))
sig_all_wider_bonf_rename <- resp_names %>% map(.,~mutate(sig_all_wider_bonf[[.]],roi = str_remove(roi, 'roi_')))
sig_all_wider_bonf_rename <- sig_all_wider_bonf_rename %>% map(.,~left_join( .,new_shorter_names,by="roi"))
roi_num_simple_bonf <- sig_all_wider_bonf_rename %>% map(.,~dim(.)[1])
max_roi_simple_bonf <- max(as.numeric(roi_num_simple_bonf))
sig_all_wider_bonf_rename <- resp_names %>% map(.,~mutate(sig_all_wider_bonf_rename[[.]],direction = ifelse(sig_all_wider_bonf_rename[[.]]$estimate >= median(sig_all_wider_bonf_rename[[.]]$estimate)|roi_num_simple_bonf[[.]] <= floor(max_roi_simple_bonf/2),"big","small")))
resp_names %>% map(~ggplot(sig_all_wider_bonf_rename[[.]],aes(x = fct_reorder(roiShort, estimate), y = estimate,
ymin = estimate - 2 * std.error, ymax = estimate + 2 * std.error)) +
geom_hline(yintercept = 0, linetype = 'dashed', col = 'grey60') +
geom_pointrange(fatten = 1.5, col = 'grey60') +
coord_flip() +
guides(colour = guide_legend(override.aes = list(size = 2.5)))+
labs(x = 'Brain Regions that passed Bonferroni', y=NULL,
title = paste0("Mass-Univariate Parameter Estimates (Training Set)\n",resp_var_plotting$longer_name[[which(resp_var_plotting$response==.)]]) ) +
facet_wrap(~direction, scales = "free_y" ) +
theme_bw() +
#theme(axis.text.y = element_text(angle = 15)) +
theme(legend.title=element_blank()) +
theme(legend.position = "top") +
theme(
axis.title.x = element_text(size = 20),
axis.text.x = element_text(size = 10),
axis.title.y = element_text(size = 10),
axis.text.y = element_text(size = 10),
legend.text = element_text(size = 10)) +
theme(
strip.background = element_blank(),
strip.text.x = element_blank()
)
)
## $TFMRI_NB_ALL_BEH_C2B_RATE
##
## $NIHTBX_PICVOCAB_UNCORRECTED
##
## $NIHTBX_FLANKER_UNCORRECTED
##
## $NIHTBX_LIST_UNCORRECTED
##
## $NIHTBX_CARDSORT_UNCORRECTED
##
## $NIHTBX_PATTERN_UNCORRECTED
##
## $NIHTBX_PICTURE_UNCORRECTED
##
## $NIHTBX_READING_UNCORRECTED
##
## $LMT_SCR_PERC_CORRECT
##
## $PEA_RAVLT_LD_TRIAL_VII_TC
##
## $PEA_WISCV_TRS
print the summary statistics for rois passed Bonf correction:
max_bonf <-resp_names %>% map(., ~sig_all_wider_bonf_rename[[.]][which(sig_all_wider_bonf_rename[[.]]$performance==max(sig_all_wider_bonf_rename[[.]]$performance)),]%>% select(.,-roi,-direction)) %>% do.call(rbind,.)%>%select("estimate", "performance","FDR","bonferroni","roiShort")%>%print()
## # A tibble: 11 x 5
## estimate performance FDR bonferroni roiShort
## <dbl> <dbl> <dbl> <dbl> <chr>
## 1 0.260 0.245 2.43e-47 4.85e-47 R Precuneus
## 2 0.165 0.159 2.61e-18 1.30e-17 R Middle Frontal G
## 3 0.109 0.142 7.97e- 8 3.19e- 7 R Sulcus Intermedius Primus
## 4 0.170 0.155 4.62e-20 1.85e-19 L Intra/Transv Parietal S
## 5 -0.0664 0.157 1.04e- 3 4.04e- 2 L Planum Polare of STG
## 6 0.0861 0.0996 5.15e- 5 3.09e- 4 R Intra/Transv Parietal S
## 7 0.0924 0.170 2.63e- 6 3.68e- 5 R Precuneus
## 8 0.176 0.156 6.89e-21 1.38e-20 L Intra/Transv Parietal S
## 9 0.122 0.144 2.89e-10 3.18e- 9 L Middle Frontal G
## 10 0.360 0.0924 4.70e- 9 4.70e- 9 R Sup Frontal S
## 11 -0.253 0.156 2.73e- 4 1.26e- 2 L Postcentral G
min_bonf <-resp_names %>% map(., ~sig_all_wider_bonf_rename[[.]][which(sig_all_wider_bonf_rename[[.]]$performance==min(sig_all_wider_bonf_rename[[.]]$performance)),]%>% select(.,-roi,-direction)) %>% do.call(rbind,.)%>%select("estimate", "performance","FDR","bonferroni","roiShort")%>%print()
## # A tibble: 11 x 5
## estimate performance FDR bonferroni roiShort
## <dbl> <dbl> <dbl> <dbl> <chr>
## 1 0.0747 -0.0000541 0.0000929 0.00566 L Short Insular G
## 2 0.0801 0.0171 0.0000343 0.00144 R Superior Parietal G
## 3 0.0727 0.0259 0.000441 0.00970 L Mid Frontal S
## 4 0.0886 0.0328 0.00000586 0.000182 L Cerebellum
## 5 0.0670 -0.0220 0.00104 0.0418 L Vert Ramus of Ant Lat S
## 6 0.0847 0.00383 0.0000820 0.000574 L Sulcus Intermedius Primus
## 7 0.0675 0.0314 0.00132 0.0355 R Opercular of IFG
## 8 0.0743 -0.00409 0.000183 0.00712 L Lingual G
## 9 -0.0676 0.0118 0.00100 0.0392 L Ant Transv Temporal G
## 10 0.206 0.00331 0.000927 0.0250 L Lingual G
## 11 0.274 0.00567 0.0000941 0.00377 R Mid Temporal G
median_bonf <-resp_names %>% map(., ~sig_all_wider_bonf_rename[[.]][which(sig_all_wider_bonf_rename[[.]]$performance==median(sig_all_wider_bonf_rename[[.]]$performance)),]%>% select(.,-roi,-direction)) %>% do.call(rbind,.)%>%select("estimate", "performance","FDR","bonferroni","roiShort")%>%print()
## # A tibble: 6 x 5
## estimate performance FDR bonferroni roiShort
## <dbl> <dbl> <dbl> <dbl> <chr>
## 1 0.163 0.122 6.30e-19 1.13e-17 R Ant Circular S of Insula
## 2 0.122 0.0951 1.76e-10 3.16e- 9 L Sup Percentral S
## 3 0.0945 0.0865 3.77e- 6 3.77e- 5 R Middle Frontal G
## 4 -0.0861 0.0852 1.93e- 5 5.60e- 4 R Long/Central Insular
## 5 0.272 0.0491 8.27e- 6 9.07e- 5 R Sup Percentral S
## 6 0.432 0.0866 6.72e-11 1.21e- 9 L Mid Frontal S
bonf_mean_est <- resp_names %>% map(.,~mean(sig_all_wider_bonf_rename[[.]]$estimate))%>%do.call(rbind,.)
bonf_mean_perform <- resp_names %>% map(.,~mean(sig_all_wider_bonf_rename[[.]]$performance))%>%do.call(rbind,.)
bonf_sd_est <- resp_names %>% map(.,~sd(sig_all_wider_bonf_rename[[.]]$estimate))%>%do.call(rbind,.)
bonf_sd_perform <- resp_names %>% map(.,~sd(sig_all_wider_bonf_rename[[.]]$performance))%>%do.call(rbind,.)
bonf_print <- data.frame("var_name_bonf"=resp_var_plotting$short_name,"Mean_estimation"=bonf_mean_est,"SD_Estimation"=bonf_sd_est,"Mean_Performance"=bonf_mean_perform,"SD_performance"=bonf_sd_perform)%>% as_tibble()%>%print()
## # A tibble: 11 x 5
## var_name_bonf Mean_estimation SD_Estimation Mean_Performance SD_performance
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2-back Work Mem 0.0681 0.124 0.118 0.0604
## 2 Pic Vocab 0.0320 0.108 0.0885 0.0334
## 3 Flanker 0.0842 0.0344 0.0894 0.0309
## 4 List Work Mem 0.0598 0.0897 0.0943 0.0339
## 5 Cog Flex 0.0409 0.0864 0.0686 0.0441
## 6 Pattern Speed 0.0829 0.00884 0.0579 0.0252
## 7 Seq Memory 0.0843 0.0331 0.0950 0.0358
## 8 Reading Recog 0.0687 0.0969 0.0857 0.0356
## 9 Little Man 0.0492 0.0911 0.0826 0.0246
## 10 Audi Verbal 0.213 0.147 0.0471 0.0234
## 11 Matrix Reason 0.297 0.282 0.0825 0.0410
slope_plot_grid <-resp_names %>% map(.,~ggplot(simple_all_IQR[[.]],aes(estimate, performance)) +
geom_point(col = 'grey60') +
labs(x = NULL, y = NULL,
title =resp_var_plotting$short_name[[which(resp_var_plotting$response==.)]] )
)
title_slope_plot <- ggdraw() +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0, 0, 0, 7)
)
slope_plot_figure<- plot_grid(title_slope_plot,plot_grid(plotlist = slope_plot_grid, nrow=4, ncol=3),nrow = 2 , rel_heights = c(0.1, 1))
ggpubr::annotate_figure(slope_plot_figure,left= ggpubr::text_grob("Mass-Univariate Predictive Ability (Pearson's r)",size=15,rot=90),
bottom = ggpubr::text_grob("Mass-Univariate Training Coefficients",size=15))
Set up folds & summary functions for cross-validation across sites
all_sample <- site_vfold$id
all_site <- all_sample %>% map (.,~unique(assessment(site_vfold$splits[[which(site_vfold$id==.)]])$SITE_ID_L)) %>% do.call(rbind,.)
all_site <- as.vector(all_site)
all_site_number <- str_remove_all(all_site,"site")
site_result <- function(site_var,resp_var){
data_split <- site_vfold$splits[[which(site_vfold$id==site_var)]]
formula_site <- paste0(resp_var ,' ~ ', colnames(select(data_all_listwise,-all_of(Resp_Var),-all_of(subj_info))))
results_test_simple <-map(formula_site,~holdout_results(.x=resp_var,splits = data_split,...)) %>% bind_rows()%>%
mutate(site=unique(assessment(data_split)$SITE_ID_L))
return(results_test_simple)
}
leave_one_all_IQR <- vector("list", length = length(resp_names))
names(leave_one_all_IQR)<- resp_names
#this command runs 10*17*167 simple regressions
#pretty time-consuming. So not recommended to run
for(i in 1:length(resp_names)){
leave_one_all_IQR[[resp_names[i]]]<- all_sample %>% map(.,~site_result(.,resp_var=resp_names[i]))%>%bind_rows() %>%
mutate(FDR = p.adjust(p.value, method = 'fdr'),
bonferroni= p.adjust(p.value, method = 'bonferroni'))
}
simple_leave_one_all_IQR <- resp_names %>% map(., ~mutate(simple_leave_one_all_IQR[[.]],site=str_remove(simple_leave_one_all_IQR[[.]]$site,"site")))
simple_site_plot_grid <- resp_names %>% map(.,~ggplot(simple_leave_one_all_IQR[[.]],aes(x = site, y = performance)) +
geom_jitter(width = 0.1, height = 0, size = 0.5, col = 'grey40') +
geom_boxplot(alpha = 0.5, fill = 'grey80', col = 'grey40', outlier.colour = NA) +
scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
labs(x = NULL, y = NULL,
title =resp_var_plotting$short_name[[which(resp_var_plotting$response==.)]] )
)
title_simple_site <- ggdraw() +
draw_label(
"Leave-One-Site-Out Mass-Univariate Predictive Ability",
fontface = 'bold',
x = 0,
hjust = 0
) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0, 0, 0, 7)
)
simple_site_plot_figure<- plot_grid(title_simple_site,plot_grid(plotlist = simple_site_plot_grid),nrow = 2 , rel_heights = c(0.1, 1))
ggpubr::annotate_figure(simple_site_plot_figure,left= ggpubr::text_grob("Predictive Ability (Pearson's r)",size=15,rot=90),
bottom = ggpubr::text_grob("Sites",size=15))
## select the important areas for cross method plot
simple_leave_one_all_FDR_IQR <- simple_leave_one_all_IQR %>% map(.,~filter(.,FDR<=0.05)%>% group_by(.,site)%>%nest(.,data=-site))
simple_leave_one_all_bonf_IQR <- simple_leave_one_all_IQR %>% map(.,~filter(.,bonferroni<=0.05)%>% group_by(.,site)%>%nest(.,data=-site))
cross_site_median = function(resp_var,data_input,method){
median_extract <- all_site_number %>% map(.,~median(data_input[[resp_var]]$data[[which(data_input[[resp_var]]$site==.)]]$performance))
combined_results <- tibble(performance = as.numeric(median_extract),site=simple_leave_one_all_FDR_IQR$TFMRI_NB_ALL_BEH_C2B_RATE$site,
method=method,response=resp_var)
combined_results$site <- paste0("site", combined_results$site)
return(combined_results)
}
simple_leave_one_all_FDR_median_IQR<- resp_names %>% map (.,~cross_site_median(.,data_input=simple_leave_one_all_FDR_IQR,method = "FDR"))%>% bind_rows()%>%
left_join(.,resp_var_plotting,by="response")
simple_leave_one_all_bonf_median_IQR<- resp_names %>% map (.,~cross_site_median(.,data_input=simple_leave_one_all_bonf_IQR,method = "Bonferroni"))%>% bind_rows()%>%left_join(.,resp_var_plotting,by="response")
rois_fdr <- simple_all_IQR %>%map(.,~filter(., FDR < 0.05) )
rois_fdr <- rois_fdr %>% map(.,~pluck(.,'roi'))
rois_bonf <- simple_all_IQR %>%map(.,~filter(., bonferroni < 0.05) )
rois_bonf <- rois_bonf %>% map(.,~pluck(.,'roi'))
nest_all <- resp_names %>%map(., ~group_by(simple_leave_one_all_IQR[[.]],roi))
nest_all <- resp_names %>%map(., ~nest(simple_leave_one_all_IQR[[.]],data=-roi))
roi_names <- nest_all[[resp_names[1]]]$roi
nest_all_list <- nest_all %>% map(., ~as.list(.))
nest_all_list <- nest_all_list%>% map (., ~.[["data"]])
for(i in 1:length(resp_names)){
names(nest_all_list[[resp_names[i]]]) <- roi_names
}
nest_list_fdr <- resp_names %>% map(., ~nest_all_list[[.]][rois_fdr[[.]]])
nest_list_fdr <- resp_names %>% map(.,~bind_rows(nest_list_fdr[[.]],.id = "roi"))
nest_list_fdr <- nest_list_fdr %>% map(.,~mutate(., roi=str_remove(roi, "roi_"))%>%
left_join( .,new_shorter_names,by="roi"))
roi_num_nest_list <- resp_names %>% map(.,~length(unique(nest_list_fdr[[.]]$roi)))
max_roi_nest_FDR <- max(as.numeric(roi_num_nest_list))
nest_list_fdr <- resp_names %>% map(.,~mutate(nest_list_fdr[[.]],direction = ifelse(nest_list_fdr[[.]]$estimate >= median(nest_list_fdr[[.]]$estimate)|roi_num_nest_list[[.]] <= floor(max_roi_nest_FDR/2),"big","small")))
resp_names %>% map(.,~ggplot(nest_list_fdr[[.]],aes(x = roiShort, y = performance)) +
geom_jitter(width = 0.1, height = 0, size = 0.5) +
geom_boxplot(alpha = 0.5, fill = 'grey80', col = 'grey40', outlier.colour = NA) +
labs(x = "Regions significant after FDR correction", y = "Predictive Ability across sites (Pearson's r)",
title =paste0("Out-Of-Site Mass-Univariate Predictive Ability\n" , resp_var_plotting$longer_name[[which(resp_var_plotting$response==.)]] )) +
coord_flip()+
facet_wrap(~direction, scales = "free_y" ) +
guides(colour = guide_legend(override.aes = list(size = 2.5)))+theme(
strip.background = element_blank(),
strip.text.x = element_blank())+
theme(axis.title.y = element_text(size = 16),
axis.title.x = element_text(size = 16),
plot.title = element_text(size=16))
)
## $TFMRI_NB_ALL_BEH_C2B_RATE
##
## $NIHTBX_PICVOCAB_UNCORRECTED
##
## $NIHTBX_FLANKER_UNCORRECTED
##
## $NIHTBX_LIST_UNCORRECTED
##
## $NIHTBX_CARDSORT_UNCORRECTED
##
## $NIHTBX_PATTERN_UNCORRECTED
##
## $NIHTBX_PICTURE_UNCORRECTED
##
## $NIHTBX_READING_UNCORRECTED
##
## $LMT_SCR_PERC_CORRECT
##
## $PEA_RAVLT_LD_TRIAL_VII_TC
##
## $PEA_WISCV_TRS
nest_list_bonf <- resp_names %>% map(., ~nest_all_list[[.]][rois_bonf[[.]]])
nest_list_bonf <- resp_names %>% map(.,~bind_rows(nest_list_bonf[[.]],.id = "roi"))
nest_list_bonf <- nest_list_bonf %>% map(.,~mutate(., roi=str_remove(roi, "roi_"))%>%
left_join( .,new_shorter_names,by="roi"))
roi_num_nest_list_bonf <- resp_names %>% map(.,~length(unique(nest_list_bonf[[.]]$roi)))
max_roi_nest_bonf <- max(as.numeric(roi_num_nest_list_bonf))
nest_list_bonf <- resp_names %>% map(.,~mutate(nest_list_bonf[[.]],direction = ifelse(nest_list_bonf[[.]]$estimate >= median(nest_list_bonf[[.]]$estimate)|roi_num_nest_list_bonf[[.]] <= floor(max_roi_nest_bonf/2),"big","small")))
resp_names %>% map(.,~ggplot(nest_list_bonf[[.]],aes(x = roiShort, y = performance)) +
geom_jitter(width = 0.1, height = 0, size = 0.5) +
geom_boxplot(alpha = 0.5, fill = 'grey80', col = 'grey40', outlier.colour = NA) +
labs(x = "ROI significant after Bonferroni correction", y = "Predictive Ability across sites (Pearson's r)",
title = paste0("Out-Of-Site Mass-Univariate Predictive Ability\n" , resp_var_plotting$longer_name[[which(resp_var_plotting$response==.)]] )) +
coord_flip()+
facet_wrap(~direction, scales = "free_y" ) +
guides(colour = guide_legend(override.aes = list(size = 2.5)))+theme(
strip.background = element_blank(),
strip.text.x = element_blank())+
theme(axis.title.y = element_text(size = 16),
axis.title.x = element_text(size = 16),
plot.title = element_text(size=16))
)
## $TFMRI_NB_ALL_BEH_C2B_RATE
##
## $NIHTBX_PICVOCAB_UNCORRECTED
##
## $NIHTBX_FLANKER_UNCORRECTED
##
## $NIHTBX_LIST_UNCORRECTED
##
## $NIHTBX_CARDSORT_UNCORRECTED
##
## $NIHTBX_PATTERN_UNCORRECTED
##
## $NIHTBX_PICTURE_UNCORRECTED
##
## $NIHTBX_READING_UNCORRECTED
##
## $LMT_SCR_PERC_CORRECT
##
## $PEA_RAVLT_LD_TRIAL_VII_TC
##
## $PEA_WISCV_TRS
ols_var_all_train_IQR <- resp_names %>% map(.,~select(analysis(data_vfold$splits[[1]])%>% IQR_remove(),.,starts_with("roi")))
ols_var_all_test_IQR <- resp_names %>% map(.,~select(assessment(data_vfold$splits[[1]])%>% IQR_remove(),.,starts_with("roi")))
fit_ols_all <- map(.x = resp_names, ~lm(paste0(.x, "~ ."), data = ols_var_all_train_IQR[[.x]]))
rsq_fit <- fit_ols_all %>% map(.,~summary(.)$adj.r.squared)%>% do.call(rbind,.)
tibble(response=names(fit_ols_all),rsquared = rsq_fit[,1])%>% left_join(.,resp_var_plotting ,by="response")%>%
select("longer_name","rsquared")%>%
pander(split.cell = 80, split.table = Inf, justify = 'left')
longer_name | rsquared |
---|---|
2-back working memory | 0.2741 |
Picture vocabulary test | 0.1371 |
Flanker test | 0.05319 |
List sorting working memory | 0.106 |
Dimentional change card sort test | 0.07561 |
Pattern comparison processing speed test | 0.03644 |
Picture sequence memory test | 0.04465 |
Oral reading recognition test | 0.1033 |
Little man task correct percentage | 0.0701 |
RAVLT long delay trial VII total correct | 0.05506 |
WISC_V matrix reasoning total raw score | 0.1076 |
preds_ols_all <- resp_names %>% map(.,~predict(fit_ols_all[[.]], newdata = ols_var_all_test_IQR[[.x]]))
predvsreal_ols_all <- resp_names %>% map(., ~ tibble(predicted = preds_ols_all[[.]], observed=select(assessment(data_vfold$splits[[1]])%>% IQR_remove(),.)))
cor_ols_all <- resp_names %>% map(.,~cor(preds_ols_all[[.]], ols_var_all_test_IQR[[.]][[.]]))
rmse_ols_all <- resp_names %>% map(.,~sqrt(mean((preds_ols_all[[.]] - ols_var_all_test_IQR[[.]][[.]]) ^ 2)))
ols_pred_all <- resp_names %>% map(., ~ggplot(predvsreal_ols_all[[.]], aes(x = scale(predicted) , y = scale(observed[[.]]))) +
geom_jitter(height = 0.1, width = 0.1, size = 1, col = 'grey60') +
geom_smooth(method = 'lm', se = FALSE, col = 'black') +
labs(x = NULL,
y = NULL,
title = paste (resp_var_plotting$short_name[[which(resp_var_plotting$response==.)]],'\nr = ',round(cor_ols_all[[.]], 3)))
)
title_ols_pred_plot <- ggdraw() +
draw_label(
"Out-of-Sample OLS Predictive Ability",
fontface = 'bold',
x = 0,
hjust = 0
) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0, 0, 0, 7)
)
ols_pred_all_figure<- plot_grid(title_ols_pred_plot,plot_grid(plotlist = ols_pred_all),nrow = 2 , rel_heights = c(0.1, 1))
ggpubr::annotate_figure(ols_pred_all_figure,left= ggpubr::text_grob("Observed Cognitive Performance (Z)",size=15,rot=90),
bottom = ggpubr::text_grob("Predicted Cognitive Performance (Z)",size=15))
ggplot(predvsreal_ols_all[[resp_names[1]]], aes(x = scale(predicted) , y = scale(observed) )) +
geom_jitter(height = 0.1, width = 0.1, size = 1, col = 'grey60') +
geom_smooth(method = 'lm', se = FALSE, col = 'black') +
labs(x =paste0("Predicted ", resp_var_plotting$short_name[[which(resp_var_plotting$response==resp_names[1])]] ," (Z)") ,
y = paste0("Observed\n", resp_var_plotting$short_name[[which(resp_var_plotting$response==resp_names[1])]] ," (Z)"),
title = paste ("Out-of-Sample OLS\nPredictive Ability ",'r = ',round(cor_ols_all[[resp_names[1]]], 2)))+
theme(axis.text=element_text(size=28),
axis.title=element_text(size=28),
plot.title = element_text(size=32))
tidy_fit_ols_all <-fit_ols_all %>% map(., ~broom::tidy(.))
tidy_fit_ols_all <- tidy_fit_ols_all %>% map(.,~filter(.,term != '(Intercept)' & p.value < 0.05 )%>%
mutate(.,roi = str_remove(term, 'roi_'))%>%
left_join( .,new_shorter_names,by="roi")%>%
mutate(.,direction = ifelse(estimate >= median(estimate), "big","small")))
resp_names %>% map(~ggplot(tidy_fit_ols_all[[.]],aes(fct_reorder(roiShort, estimate), estimate,
ymin = estimate - 2 * std.error,
ymax = estimate + 2 * std.error)) +
geom_hline(yintercept = 0, linetype = 'dashed', col = 'grey60') +
geom_pointrange(fatten = 1.5, col = 'grey60') +
coord_flip() +
labs(x = 'Explanatory variables (Brain Regions)', y = 'Coefficients (± 2 std. errors)',
title = paste0(resp_var_plotting$longer_name[[which(resp_var_plotting$response==.)]],
'\nOLS Coeffcients (p < .05)')) +
facet_wrap(~ direction, scales = 'free_y') +
theme(
axis.title.x = element_text(size = 15),
axis.text.x = element_text(size = 12),
axis.title.y = element_text(size = 15),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 10),
plot.title = element_text(size=16)) +
theme(
strip.background = element_blank(),
strip.text.x = element_blank()
))
## $TFMRI_NB_ALL_BEH_C2B_RATE
##
## $NIHTBX_PICVOCAB_UNCORRECTED
##
## $NIHTBX_FLANKER_UNCORRECTED
##
## $NIHTBX_LIST_UNCORRECTED
##
## $NIHTBX_CARDSORT_UNCORRECTED
##
## $NIHTBX_PATTERN_UNCORRECTED
##
## $NIHTBX_PICTURE_UNCORRECTED
##
## $NIHTBX_READING_UNCORRECTED
##
## $LMT_SCR_PERC_CORRECT
##
## $PEA_RAVLT_LD_TRIAL_VII_TC
##
## $PEA_WISCV_TRS
n_sig_ols <- tidy_fit_ols_all %>% map(.,~filter(., p.value< 0.05) )
n_sig_ols %>%map(.,~count(.))%>%do.call(rbind,.)%>% print()
## # A tibble: 11 x 1
## n
## * <int>
## 1 22
## 2 15
## 3 11
## 4 7
## 5 14
## 6 16
## 7 9
## 8 14
## 9 8
## 10 16
## 11 19
ols_site <- function(resp_var,data_split = data_vfold$splits[[1]]){
#recipe train and test data are from ols as the variables are the same
data_train <- analysis(data_split)%>%IQR_remove()
data_test <- assessment(data_split)%>%IQR_remove()
ols_data_train <- select(data_train, resp_var, starts_with("roi"))
ols_data_test <- select(data_test,resp_var,starts_with("roi"))
norm_recipe <- recipe(as.formula(paste0(resp_var, "~ .")) , data = ols_data_train) %>%
step_dummy(all_nominal()) %>%
step_center(all_predictors()) %>%
step_scale(all_predictors()) %>%
# estimate the means and standard deviations
prep(training = ols_data_train, retain = TRUE)
## model
ols_mod <- linear_reg(penalty = tune() ,mixture =tune()) %>%
set_engine("lm")
fit_ols <- ols_mod %>% fit(as.formula(paste0(resp_var, "~ .")),data= ols_data_train)
pred_fit <-predict.model_fit(fit_ols,new_data = ols_data_test)
corval <- cor(pred_fit$.pred, ols_data_test[[resp_var]])
site_output <- tibble(performance = corval, site =unique(data_test$SITE_ID_L))
site_output
}
ols_site_all <- function(resp_var){
site_ols_all <- all_sample %>% future_map(.,~ols_site(all_of(resp_var), data_split=site_vfold$splits[[which(site_vfold$id==.)]]))
site_ols_all<- bind_rows(site_ols_all)
site_ols_all <- site_ols_all %>% mutate(response=resp_var,method="OLS")
return(site_ols_all)
}
ols_cross_site <- vector("list", length = length(resp_names))
names(ols_cross_site)<- resp_names
for(i in 1:length(resp_names)){
ols_cross_site[[i]] <- ols_site_all(resp_names[i])
}
ols_cross_site_list <- ols_cross_site
ols_cross_site <- bind_rows(ols_cross_site)
ols_cross_site <- left_join (ols_cross_site,resp_var_plotting, by="response")
ols_site <- ols_cross_site %>%
ggplot(aes(x = reorder(short_name,performance), y = performance)) +
geom_jitter(width = 0.1, height = 0, size = 0.5) +
geom_boxplot(alpha = 0.5, fill = 'grey80', col = 'grey40', outlier.colour = NA) +
labs(x=NULL,y=NULL,title="OLS") +
coord_flip()
ols_site
Elastic net is based on tidymodels. Once tuned eNetXplorer is used for stats inference
enet_tuning <- function(resp_var,data_split = data_vfold$splits[[1]]){
#recipe train and test data are from ols as the variables are the same
data_train <- analysis(data_split)%>% IQR_remove()
data_test <- assessment(data_split)%>% IQR_remove()
ols_data_train <- select(data_train, resp_var, starts_with("roi"))
ols_data_test <- select(data_test,resp_var,starts_with("roi"))
norm_recipe <- recipe(as.formula(paste0(resp_var, "~ .")) , data = ols_data_train) %>%
step_dummy(all_nominal()) %>%
step_center(all_predictors()) %>%
step_scale(all_predictors()) %>%
# estimate the means and standard deviations
prep(training = ols_data_train, retain = TRUE)
## model
glmn_mod <- linear_reg(penalty = tune() ,mixture =tune()) %>%
set_engine("glmnet")
set.seed(123456)
all_rs <- rsample::vfold_cv(data=ols_data_train,v = 10,repeats = 1)#10fold split within the original 75% training data
glmn_wfl <- workflow()%>%
add_recipe(norm_recipe)%>%
add_model(glmn_mod)
##penalty is lambda, mixture is alpha
##log transformed to get lambda between 0.025 to 10
glmn_set <- dials::parameters(penalty(range = c(-5,1), trans = log10_trans()),mixture())
## 100 levels of lambda and 10 levels of alpha
glmn_grid <- grid_regular(glmn_set, levels = c(400,100))
ctrl <- control_grid(save_pred = TRUE, verbose = TRUE)
glmn_tune <- tune_grid(glmn_wfl,
resamples = all_rs,
grid = glmn_grid,
metrics = metric_set(rsq_trad),##use traditional r square because there is no correlation
control = ctrl)
##select the best tuned lambda and alpha
best_glmn <- select_best(glmn_tune, metric = "rsq_trad")
## use the best tunes parameters to fit the test data
glmn_wfl_final <-
glmn_wfl %>%
finalize_workflow(best_glmn) %>%
fit(data = ols_data_train)
pred_glmn <- predict(glmn_wfl_final, new_data =ols_data_test)
##compute the correlation between predicted results and the test data
performance_glmn <- cor(pred_glmn$.pred,ols_data_test[[resp_var]])
best_glmn <- best_glmn %>% mutate(performance=performance_glmn)
return(best_glmn)
}
#this line tuning all the parameters in all 11 response variables
# time consuming so the default of this section is set to eval= FALSE
glmn_tune_all_IQR <- resp_names%>% future_map(.,~enet_tuning(.))
enet_test <- select(assessment(data_vfold$splits[[1]])%>%IQR_remove() ,starts_with("roi"))
enet_train <- select(analysis(data_vfold$splits[[1]])%>%IQR_remove(),starts_with("roi"))
enet_resp_train <- resp_names %>% map(., ~select(analysis(data_vfold$splits[[1]])%>%IQR_remove(),starts_with(.)))
enet_resp_test <- resp_names %>% map(., ~select(assessment(data_vfold$splits[[1]])%>%IQR_remove(),starts_with(.)))
### NOT RUN by default, takes long time
fit_enet_all_IQR <-resp_names %>% future_map(.,~eNetXplorer(x = as.matrix(enet_train) , y = as.vector(enet_resp_train[[.]][[.]]), alpha = glmn_tuning_all_IQR[[.]]$mixture, n_fold = 10,nlambda.ext = 1000, nlambda = 1000, scaled = TRUE, seed = 123456))
lambdas_all <- vector("list", length = length(resp_names))
names(lambdas_all)<- resp_names
lambdas_all_best <- vector("list", length = length(resp_names))
names(lambdas_all_best)<- resp_names
summary_enet_all <- vector("list", length = length(resp_names))
names(summary_enet_all)<- resp_names
for(i in 1:length(resp_names)){
lambdas_all[[resp_names[i]]] <- fit_enet_all_IQR[[resp_names[i]]][["lambda_values"]]
lambdas_all_best[[resp_names[i]]] <- fit_enet_all_IQR[[resp_names[i]]][["best_lambda"]]
summary_enet_all[[resp_names[i]]]<- as_tibble(summary(fit_enet_all_IQR[[resp_names[i]]])[[2]]) %>% slice(1)
}
summary_enet_all %>% bind_rows() %>%
mutate(respones = resp_var_plotting$short_name)%>%
rename(.,Alpha = alpha, `Best-tune lambda` = lambda.max,
`Accuracy (Pearson r)` = QF.est,
`P-value` = model.vs.null.pval) %>%
pander(split.cell = 80, split.table = Inf, justify = 'left')
Alpha | Best-tune lambda | Accuracy (Pearson r) | P-value | respones |
---|---|---|---|---|
0.1111 | 0.06412 | 0.4987 | 0.0003998 | 2-back Work Mem |
1 | 0.0117 | 0.3424 | 0.0003998 | Pic Vocab |
0.0101 | 1.041 | 0.2017 | 0.0003998 | Flanker |
0.0202 | 0.4938 | 0.2993 | 0.0003998 | List Work Mem |
0 | 0.9964 | 0.2347 | 0.0003998 | Cog Flex |
0.0101 | 0.9176 | 0.1523 | 0.0003998 | Pattern Speed |
0 | 2.214 | 0.1877 | 0.0003998 | Seq Memory |
0.798 | 0.02045 | 0.2922 | 0.0003998 | Reading Recog |
0 | 1.495 | 0.2477 | 0.0003998 | Little Man |
0.0101 | 1.612 | 0.1812 | 0.0003998 | Audi Verbal |
0.0101 | 1.442 | 0.2906 | 0.0003998 | Matrix Reason |
alpha_vals <- glmn_tuning_all_IQR %>% map(.,~paste0("a",.[["mixture"]]))
enet_lambda_grid <-resp_names%>% map(.,~qplot(fit_enet_all_IQR[[.]][["lambda_values"]][[alpha_vals[[.]]]], fit_enet_all_IQR[[.]][["lambda_QF_est"]][[alpha_vals[[.]]]], geom = 'line') +
# scale_y_continuous(breaks = seq(0, 0.6, by = 0.1)) +
scale_x_log10() +
geom_vline(xintercept = lambdas_all_best[[.]], col = 'red', linetype = 'dashed') +
labs(x = NULL, y = NULL, title =resp_var_plotting$short_name[[which(resp_var_plotting$response==.)]] )
#+theme(axis.title.y=element_blank(),
# axis.text.y=element_blank(),
# axis.ticks.y=element_blank())
)
title_enet_lambda <- ggdraw() +
draw_label(
"Elastic Net Lambda (Penalty) Parameter Tuning",
fontface = 'bold',
x = 0,
hjust = 0
) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0.1, 0.1, 0.1, 7)
)
enet_lambda_all_figure<- plot_grid(title_enet_lambda,plot_grid(plotlist = enet_lambda_grid,nrow=4,ncol=3),nrow = 2 , rel_heights = c(0.1, 1))
ggpubr::annotate_figure(enet_lambda_all_figure,left= ggpubr::text_grob("Cross Validated Predictive Ability\n(Pearson's r)",size=15,rot=90),
bottom = ggpubr::text_grob("Lambda",size=15))
fit_enet_single_all <- resp_names %>% map(.,~glmnet(x = as.matrix(select(ols_var_all_train_IQR[[.]], starts_with("roi"))) , y = ols_var_all_train_IQR[[.]][[.]], alpha = glmn_tuning_all_IQR[[.]]$mixture, lambda = glmn_tuning_all_IQR[[.]]$penalty))
preds_enet_all <- resp_names%>% map(.,~predict(fit_enet_single_all[[.]] , newx = as.matrix(select(ols_var_all_test_IQR[[.]], starts_with("roi")))))
predvsreal_enet_all <- resp_names %>% map (.,~tibble(predicted = preds_enet_all[[.]], observed = as.numeric(ols_var_all_test_IQR[[.]][[.]])))
cor_enet_all <- resp_names %>% map(.,~cor(preds_enet_all[[.]], as.numeric(ols_var_all_test_IQR[[.]][[.]])))
rmse_enet_all <-resp_names %>% map(.,~sqrt(mean((preds_enet_all[[.]] - as.numeric(ols_var_all_test_IQR[[.]][[.]])) ^ 2)))
enet_single_plot <- resp_names %>% map(.,~ggplot(predvsreal_enet_all[[.]], aes(x = scale(predicted) , y = scale(observed) )) +
geom_jitter(height = 0.1, width = 0.1, size = 1, col = 'grey60') +
geom_smooth(method = 'lm', se = FALSE, col = 'black') +
labs(x = NULL,
y = NULL,
title = paste (resp_var_plotting$short_name[[which(resp_var_plotting$response==.)]],'\nr = ',round(cor_enet_all[[.]], 3))))
title_enet_pred_plot <- ggdraw() +
draw_label(
"Out-of-Sample Elastic Net Predictive Ability",
fontface = 'bold',
x = 0,
hjust = 0
) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0, 0, 0, 7)
)
enet_pred_all_figure<- plot_grid(title_enet_pred_plot,plot_grid(plotlist = enet_single_plot),nrow = 2 , rel_heights = c(0.1, 1))
ggpubr::annotate_figure(enet_pred_all_figure,left= ggpubr::text_grob("Observed Cognitive Performance (Z)",size=15,rot=90),
bottom = ggpubr::text_grob("Predicted Cognitive Performance (Z)",size=15))
ggplot(predvsreal_enet_all[[resp_names[1]]], aes(x = scale(predicted) , y = scale(observed) )) +
geom_jitter(height = 0.1, width = 0.1, size = 1, col = 'grey60') +
geom_smooth(method = 'lm', se = FALSE, col = 'black') +
labs(x =paste0("Predicted ", resp_var_plotting$short_name[[which(resp_var_plotting$response==resp_names[1])]] ," (Z)") ,
y = paste0("Observed\n", resp_var_plotting$short_name[[which(resp_var_plotting$response==resp_names[1])]] ," (Z)"),
title = paste ("Out-of-Sample Elastic Net\nPredictive Ability ",'r = ',round(cor_enet_all[[resp_names[1]]], 2)))+
theme(axis.text=element_text(size=28),
axis.title=element_text(size=28),
plot.title = element_text(size=32))
extract_tibble <- function(elastic_mod, alpha_index) {
variable <- elastic_mod$feature_coef_wmean[, alpha_index] %>% names()
wmean <- elastic_mod$feature_coef_wmean[, alpha_index]
wsd <- elastic_mod$feature_coef_wsd[, alpha_index]
null_wmean <- elastic_mod$null_feature_coef_wmean[, alpha_index]
null_wsd <- elastic_mod$null_feature_coef_wsd[, alpha_index]
pvalue <- elastic_mod$feature_coef_model_vs_null_pval[, alpha_index]
tib <- tibble(variable, wmean, wsd, null_wmean, null_wsd, pvalue)
tib <- tib %>%
gather(key = 'placeholder', value = 'value', wmean, wsd, null_wmean, null_wsd) %>%
mutate(type = ifelse(str_detect(placeholder, 'null'), 'null', 'target'),
placeholder = (str_remove(placeholder, 'null_'))) %>%
mutate(type = factor(type, labels = c('Null', 'Target'))) %>%
spread(placeholder, value)
tib
}
elastic_mod <- fit_enet_all_IQR[[resp_names[1]]]
alpha_index <- paste0("a",glmn_tuning_all_IQR[[resp_names[1]]]$mixture)
coefs_enet_all <- resp_names %>% map(.,~extract_tibble(fit_enet_all_IQR[[.]], alpha_index = paste0("a",glmn_tuning_all_IQR[[.]]$mixture)))
coefs_enet_all <- coefs_enet_all %>% map(.,~filter(.,pvalue < 0.05) %>%
mutate(.,type = ifelse(type == 'Null', 'Null permuted models', 'Target models'),
roi = str_remove(variable, 'roi_'))%>%left_join( .,new_shorter_names,by="roi"))
roi_num_enet <- coefs_enet_all %>% map(.,~dim(.)[1])
max_roi_enet <- max(as.numeric(roi_num_enet))
coefs_enet_test <- coefs_enet_all[[resp_names[1]]] %>% group_by(type)
coefs_enet_test <- coefs_enet_test%>% nest(-type)
coefs_enet_test[[2]][[1]] <- coefs_enet_test[[2]][[1]] %>% mutate(direction1 = ifelse(coefs_enet_test[[2]][[1]]$wmean >= median(coefs_enet_test[[2]][[1]]$wmean)|roi_num_enet[[resp_names[1]]] <= floor(max_roi_enet/2),"big","small"))
coefs_enet_test[[2]][[2]] <- coefs_enet_test[[2]][[2]] %>% mutate(direction1=coefs_enet_test[[2]][[1]]$direction1)
coefs_enet_test <- coefs_enet_test %>% unnest()
coefs_enet_all <- coefs_enet_all%>%map(.,~group_by(.,type))
coefs_enet_all <- coefs_enet_all%>%map(.,~nest(.,-type))
for(i in 1:length(resp_names)){
coefs_enet_all[[resp_names[i]]][["data"]][[2]]<-coefs_enet_all[[resp_names[i]]][["data"]][[2]] %>% mutate(direction = ifelse(coefs_enet_all[[resp_names[i]]][["data"]][[2]]$wmean >= median(coefs_enet_all[[resp_names[i]]][["data"]][[2]]$wmean)|roi_num_enet[[resp_names[i]]] <= floor(max_roi_enet/2),"big","small"))
coefs_enet_all[[resp_names[i]]][["data"]][[1]] <- coefs_enet_all[[resp_names[i]]][["data"]][[1]] %>% mutate(direction=coefs_enet_all[[resp_names[i]]][["data"]][[2]]$direction)
}
coefs_enet_all <- coefs_enet_all %>%map(.,~unnest(.))
resp_names %>% map(.,~ggplot(coefs_enet_all[[.]], aes(x = fct_reorder(roiShort, wmean),
y = wmean,
ymax = wmean + 2 * wsd, ymin = wmean - 2 * wsd,
col = type)) +
geom_pointrange(fatten = 0.5, key_glyph = 'point') +
scale_y_continuous(labels = numform::ff_num(zero = 0, digits = 2)) +
scale_color_grey(start = 0.7, end = 0.5) +
coord_flip() +
guides(colour = guide_legend(override.aes = list(size = 2.5)))+
labs(x = 'Explanatory Variables (Brain Regions)', y = 'Averaged Coefficient Across Models (±2 Std. dev)', col = 'Model type',
title = paste0(resp_var_plotting$longer_name[[which(resp_var_plotting$response==.)]],"\nElastic Net Coefficients (p < .05)")) +
facet_wrap(~ direction, scales = 'free_y') +
scale_color_manual(values = c("#56B4E9", "black"),labels = c("Permuted Null", "Target")) +
theme_bw() +
#theme(axis.text.y = element_text(angle = 15)) +
theme(legend.title=element_blank()) +
theme(legend.position = "top") +
theme(
axis.title.x = element_text(size = 15),
axis.text.x = element_text(size = 12),
axis.title.y = element_text(size = 15),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 15),
plot.title = element_text(size=15)) +
theme(
strip.background = element_blank(),
strip.text.x = element_blank()
)
+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
)
## $TFMRI_NB_ALL_BEH_C2B_RATE
##
## $NIHTBX_PICVOCAB_UNCORRECTED
##
## $NIHTBX_FLANKER_UNCORRECTED
##
## $NIHTBX_LIST_UNCORRECTED
##
## $NIHTBX_CARDSORT_UNCORRECTED
##
## $NIHTBX_PATTERN_UNCORRECTED
##
## $NIHTBX_PICTURE_UNCORRECTED
##
## $NIHTBX_READING_UNCORRECTED
##
## $LMT_SCR_PERC_CORRECT
##
## $PEA_RAVLT_LD_TRIAL_VII_TC
##
## $PEA_WISCV_TRS
n_signif_enet_all <- coefs_enet_all %>% map(.,~filter(.,type == 'Target models')%>%count())%>% bind_rows()
n_sig_enet_all <- data.frame("var_name"=resp_var_plotting$short_name,"ENET_total"=n_signif_enet_all$n)%>% as_tibble()%>%print()
## # A tibble: 11 x 2
## var_name ENET_total
## <chr> <int>
## 1 2-back Work Mem 29
## 2 Pic Vocab 18
## 3 Flanker 28
## 4 List Work Mem 33
## 5 Cog Flex 28
## 6 Pattern Speed 24
## 7 Seq Memory 30
## 8 Reading Recog 19
## 9 Little Man 37
## 10 Audi Verbal 21
## 11 Matrix Reason 30
enet_site_all <- function(resp_var){
site_tuning_all <- all_sample %>% future_map(.,~enet_tuning(resp_var, data_split=site_vfold$splits[[which(site_vfold$id==.)]]))
names(site_tuning_all)<- all_sample %>% map (.,~unique(assessment(site_vfold$splits[[which(site_vfold$id==.)]])$SITE_ID_L))
site_tuning_all<- site_tuning_all %>% bind_rows()
site_tuning_all <- site_tuning_all %>% mutate(site=all_site, response=resp_var,method="Elastic Net")
return(site_tuning_all)
}
enet_site_tuning_all_IQR <- resp_names %>% future_map(.,~enet_site_all(resp_var=.))
enet_site_tune_all_plot <- enet_site_tune_all_IQR %>% bind_rows()
enet_site_tune_all_plot <- left_join(enet_site_tune_all_plot,resp_var_plotting,by="response")
joint_enet_tune_all <- enet_site_tune_all_plot %>% select(performance, site, method, longer_name, short_name, response)
all_method_performance <- bind_rows(joint_enet_tune_all,ols_cross_site,simple_leave_one_all_FDR_median_IQR,simple_leave_one_all_bonf_median_IQR)%>%
mutate(method= as.factor(method))%>%
mutate(method = factor(method,levels =c ("FDR","Bonferroni","OLS","Elastic Net")))
all_method_performance%>%
ggplot(aes(x = reorder(short_name,performance), y = performance, fill=method,color=method)) +
geom_jitter(width = 0.1, height = 0, size = 0.5) +
geom_boxplot(alpha = 0.5, outlier.colour = NA, position = "identity") +
labs(x = NULL, y=NULL,title="Out-of-site Predictive Ability (Pearson's r)") +
coord_flip()+theme(axis.text=element_text(size=15),
axis.title=element_text(size=15,face="bold"),
legend.position = "bottom",
legend.title=element_blank(),
legend.text = element_text( size=15, face="bold"),
plot.title = element_text(size=17))
simple_site_FDR <- simple_leave_one_all_FDR_IQR%>%map(.,~unnest(.)%>% mutate(method="FDR"))
simple_site_bonf <- simple_leave_one_all_bonf_IQR %>% map(.,~unnest(.)%>%mutate(method="Bonferroni"))
ols_site <- ols_cross_site_list %>% map(.,~dplyr::select(., method, site,performance)%>%mutate(site = str_remove_all(site,"site")))
enet_site <- enet_site_tune_all_IQR %>% map(.,~select(.,method,site,performance)%>%mutate(site = str_remove_all(site,"site")))
cross_site_multi <- resp_names %>% map(.,~bind_rows(ols_site[[.]],enet_site[[.]]))
resp_names %>%map(.,~ggplot(cross_site_multi[[.]], aes(site, performance, col = method)) +
geom_point(key_glyph = "point", size = 3) +
geom_jitter(data = simple_site_FDR[[.]], height = 0, width = 0.2, size = 0.5,
key_glyph = "point") +
geom_boxplot(data = simple_site_FDR[[.]], alpha = 0.75, outlier.colour = NA,
key_glyph = "point")+
geom_jitter(data = simple_site_bonf[[.]], height = 0, width = 0.2, size = 0.5,
key_glyph = "point") +
geom_boxplot(data = simple_site_bonf[[.]], alpha = 0.75, outlier.colour = NA,
key_glyph = "point") +
# scale_x_discrete(guide = guide_axis(n.dodge = )) +
scale_color_manual(breaks=c("Elastic Net","OLS", "Bonferroni","FDR"),
values = c( "#C77CFF","#00BFC4" ,"#7CAE00","#F8766D" )) +
guides(colour = guide_legend(override.aes = list(size = 2.5)))+
labs(x = 'Site', y=NULL, col = 'Model',
title = paste0("Out-of-site Predictive Ability (Pearson's r)\n",resp_var_plotting$longer_name[[which(resp_var_plotting$response==.)]]))+
theme(axis.text.x=element_text(size=16, angle = 45),
axis.text.y=element_text(size=16),
axis.title.y=element_text(size=18),
axis.title.x=element_text(size=18),
plot.title=element_text(size=20),
legend.position = "right",
legend.title=element_blank(),
legend.text = element_text( size=18)))
## $TFMRI_NB_ALL_BEH_C2B_RATE
##
## $NIHTBX_PICVOCAB_UNCORRECTED
##
## $NIHTBX_FLANKER_UNCORRECTED
##
## $NIHTBX_LIST_UNCORRECTED
##
## $NIHTBX_CARDSORT_UNCORRECTED
##
## $NIHTBX_PATTERN_UNCORRECTED
##
## $NIHTBX_PICTURE_UNCORRECTED
##
## $NIHTBX_READING_UNCORRECTED
##
## $LMT_SCR_PERC_CORRECT
##
## $PEA_RAVLT_LD_TRIAL_VII_TC
##
## $PEA_WISCV_TRS
we used the training data and OLS to compute the VIF
vifs_ols_all <- resp_names %>% future_map(.,~ols_vif_tol(fit_ols_all[[.]]))
vifs_ols_all <- vifs_ols_all %>% map(.,~mutate(., term=Variables))
vifs_ols_all[[resp_names[1]]] %>% ggplot(aes(x = VIF)) +
geom_histogram(fill = 'grey80', binwidth = .5) +
scale_x_continuous(breaks = seq(0, 13, by = 3)) +
labs(x = "Explanatory Variables (Regions)",
y = 'Count', title="Variance Inflation Factor of\nOLS Explanatory Variables")+
theme_light() +
theme(text = element_text(size = 30))
summary(vifs_ols_all[[1]]$VIF)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.582 2.636 4.189 4.647 6.160 11.229
realModelTidy <- tidy(fit_ols_all[[resp_names[1]]])
realModelVif <- tibble::enframe(car::vif(fit_ols_all[[resp_names[1]]])) %>%
rename(term = name, vif = value)
realModelTidyVif <- realModelTidy %>% left_join(realModelVif, by ="term")
coefs_enet <- extract_tibble(elastic_mod = fit_enet_all_IQR[[resp_names[1]]],alpha_index = paste0("a",glmn_tuning_all_IQR[[1]]$mixture))%>%
mutate(type = ifelse(type == 'Null', 'Null permuted models', 'Target models'))
coefs_enetTrimmed <- coefs_enet %>% filter(type != "Null permuted models") %>%
rename(term = variable, eNetPval = pvalue)
realModelTidyVifEnet <- realModelTidyVif %>% left_join(coefs_enetTrimmed, by ="term")
realModelTidyVifEnetNonTrimmed <- coefs_enet %>% rename(term = variable, eNetPval = pvalue) %>%
left_join(realModelTidyVif, by ="term")
coefs_enetNull <- coefs_enet %>% filter(type == "Null permuted models") %>%
rename(nullWmean = wmean, nullWsd = wsd, term = variable) %>% select(term, nullWmean, nullWsd)
coefs_enetTarget <- coefs_enet %>% filter(type != "Null permuted models") %>%
rename(targetWmean = wmean, targetWsd = wsd, term = variable,eNetPval = pvalue) %>% select(term, eNetPval, targetWmean, targetWsd)
realModelTidyVifEnetNull <- realModelTidyVif %>% left_join(coefs_enetTarget,by ="term") %>% left_join(coefs_enetNull, by ="term")
ggplot(realModelTidyVifEnetNull , aes(x = vif, y = std.error, color= p.value<=.05, )) +
geom_point(size = 5, alpha = 0.4) +
scale_colour_discrete(name="p value",
breaks=c("FALSE", "TRUE"),
labels=c("> .05", "< .05")) +
# geom_linerange() +
theme_light() +
theme(text = element_text(size = 30)) +
theme(legend.position="top") +
xlab("Variance Inflation Factor") + ylab("OLS\nStandard Error") +
ylim(0, .05)
ggplot(realModelTidyVifEnetNull , aes(x = vif, y = estimate, color= p.value<=.05, ymin = estimate - (2 * std.error), ymax = estimate + (2 * std.error))) +
geom_point(size = 5, alpha = 0.4) +
scale_colour_discrete(name="p value",
breaks=c("FALSE", "TRUE"),
labels=c("> .05", "< .05")) +
geom_linerange() +
theme_light() +
theme(text = element_text(size = 30)) +
guides(color = FALSE) +
# theme(legend.position="top") +
xlab("Variance Inflation Factor") + ylab("OLS\nCoefficients ±2SE") +
geom_hline(yintercept = 0)
realModelTidyVifEnetNull %>%
ggplot(aes(x = vif, y = nullWsd, color= p.value<=.05, )) +
geom_point(size = 5, alpha = 0.4) +
scale_colour_discrete(name="p value",
breaks=c("FALSE", "TRUE"),
labels=c("> .05", "< .05")) +
# geom_linerange() +
theme_light() +
theme(text = element_text(size = 30)) +
theme(legend.position="top") +
xlab("Variance Inflation Factor") + ylab("Elastic Net\nSD of Permuted Null") +
ylim(0, .021)
ggplot(realModelTidyVifEnetNull, aes(x = vif, y = nullWmean, color= eNetPval<=.05,
ymin = nullWmean - (2 * nullWsd), ymax = nullWmean + (2 * nullWsd))) +
geom_point(size = 5, alpha = 0.4) +
scale_colour_discrete(name="p value",
breaks=c("FALSE", "TRUE"),
labels=c("> .05", "< .05")) +
geom_linerange() +
theme_light() +
theme(text = element_text(size = 30)) +
guides(color = FALSE) +
#theme(legend.position="top") +
xlab("Variance Inflation Factor") + ylab("Elastic Net\nCoefficients of\n Permuted Null ± 2SD")
ggplot(realModelTidyVifEnetNonTrimmed, aes(x = vif, y = wmean,
color= eNetPval<=.05,
ymin = wmean - (2 * wsd), ymax = wmean + (2 * wsd),
shape = type)) +
geom_point(size = 5, alpha = 0.4) +
scale_colour_discrete(name="p value",
breaks=c("FALSE", "TRUE"),
labels=c("> .05", "< .05")) +
scale_shape_discrete(name="",
breaks=c("Null permuted models", "Target models"),
labels=c("Null", "Target")) +
geom_linerange() +
theme_light() +
theme(text = element_text(size = 30)) +
guides(color = FALSE) +
theme(legend.justification=c(1,0), legend.position=c(1,0)) +
#theme(legend.position="top") +
xlab("Variance Inflation Factor") + ylab("Elastic Net\nCoefficients ± 2SD")
signif_rois_fdr_all <- simple_all_IQR %>% map(.,~mutate(.,signif = ifelse(FDR < 0.05, 1, 0),model = 'FDR',roi = str_remove(roi, 'roi_')) %>%
select(.,model, roi, signif)%>%
left_join(.,new_shorter_names, by= "roi"))
signif_rois_bonf_all <- simple_all_IQR %>% map(.,~mutate(.,signif = ifelse(bonferroni < 0.05, 1, 0),model = 'Bonferroni',roi = str_remove(roi, 'roi_')) %>%select(.,model, roi, signif)%>%
left_join(.,new_shorter_names, by= "roi"))
signif_rois_ols_all <-fit_ols_all %>% map(.,~broom::tidy(.) %>%filter(.,term != '(Intercept)') %>%
mutate(.,signif = ifelse(p.value < 0.05, 1, 0),model = 'OLS',roi = str_remove(term, 'roi_')) %>%
select(.,model, roi, signif)%>%
left_join(.,new_shorter_names, by= "roi"))
signif_rois_enet_all <- resp_names %>% map(.,~extract_tibble(fit_enet_all_IQR[[.]], alpha_index = paste0("a",glmn_tuning_all_IQR[[.]]$mixture))%>%
filter(.,type != 'Null') %>%
mutate(.,roi = str_remove(variable, '^roi_'),signif = ifelse(pvalue < 0.05, 1, 0),model = 'Elastic net') %>%
select(.,model, roi, signif)%>%
left_join(.,new_shorter_names, by= "roi"))
signif_rois_all <- resp_names %>% map(.,~bind_rows(signif_rois_fdr_all[[.]], signif_rois_bonf_all[[.]], signif_rois_ols_all[[.]], signif_rois_enet_all[[.]]) %>%mutate(.,model = factor(model, levels = c('FDR', 'Bonferroni','OLS', 'Elastic net'))))
signif_rois_long_all <- signif_rois_all %>% map(.,~pivot_wider(.,names_from = model, values_from = signif) %>%
filter(.,`FDR` == 1 | `Bonferroni` == 1 | `OLS` == 1 | `Elastic net` == 1) %>%
mutate(.,n_mods = 1 * `OLS` + 2 * `Elastic net` + 4 * `Bonferroni` + 6 * `FDR`) %>%
pivot_longer(.,names_to = 'model', values_to = 'signif', cols = `FDR`:`Elastic net`) %>%
mutate(.,signif = ifelse(signif == 0, 'Not significant', 'Significant'))%>%
mutate(.,model = factor(model, levels = c('Elastic net','OLS' ,'Bonferroni', 'FDR'))) %>%
mutate(.,facetRow4 = ifelse(row_number() <= (nrow(.)/4)+2, 'Left',
ifelse(row_number() <= ((nrow(.)/4)*2), 'LeftMid',
ifelse(row_number() <= ((nrow(.)/4)*3)+2, 'RightMid', 'Right')))) %>%
mutate(.,facetRow3 = ifelse(row_number() <= nrow(.)/3, 'Left',
ifelse(row_number() <= (nrow(.)/3)*2, 'Mid','Right'))) %>%
mutate(.,facetRow2 = ifelse(row_number() <= nrow(.)/2, 'Left', 'Right'))
)
resp_names%>% map(~ggplot(signif_rois_long_all[[.]],aes(x = fct_rev(model), y = fct_reorder(roiShort, n_mods), fill = signif)) +
geom_tile(col = 'grey60') +
# scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
scale_fill_manual(values = c('grey80', "#56B4E9")) +
labs(x = '', y = 'Brain Regions', fill = '',
title = resp_var_plotting$longer_name[[which(resp_var_plotting$response==.)]]) +
facet_wrap(~ facetRow4, scales = 'free_y', ncol= 4) +
# theme(aspect.ratio = 6) +
theme(plot.title = element_text(size = 20)) +
theme(legend.position="top") +
theme(legend.text = element_text(size = 15)) +
theme(axis.text.x = element_text(angle = 90)) +
theme(axis.title.y = element_text(size =15)) +
theme(axis.text.x = element_text(size = 10)) +
theme(axis.text.y = element_text(size = 10)) +
theme(
strip.background = element_blank(),
strip.text.x = element_blank()))
## $TFMRI_NB_ALL_BEH_C2B_RATE
##
## $NIHTBX_PICVOCAB_UNCORRECTED
##
## $NIHTBX_FLANKER_UNCORRECTED
##
## $NIHTBX_LIST_UNCORRECTED
##
## $NIHTBX_CARDSORT_UNCORRECTED
##
## $NIHTBX_PATTERN_UNCORRECTED
##
## $NIHTBX_PICTURE_UNCORRECTED
##
## $NIHTBX_READING_UNCORRECTED
##
## $LMT_SCR_PERC_CORRECT
##
## $PEA_RAVLT_LD_TRIAL_VII_TC
##
## $PEA_WISCV_TRS
library(ggseg)
library(ggsegExtra)
#library(ggseg3d)
library(ggsegDesterieux)
library(RColorBrewer)
library(ggpubr)
library(png)
library("cowplot")
Resp_Var <- c('TFMRI_NB_ALL_BEH_C2B_RATE',
"NIHTBX_PICVOCAB_UNCORRECTED",
"NIHTBX_FLANKER_UNCORRECTED",
"NIHTBX_LIST_UNCORRECTED",
"NIHTBX_CARDSORT_UNCORRECTED",
"NIHTBX_PATTERN_UNCORRECTED",
"NIHTBX_PICTURE_UNCORRECTED",
"NIHTBX_READING_UNCORRECTED",
"LMT_SCR_PERC_CORRECT",
"PEA_RAVLT_LD_TRIAL_VII_TC",
"PEA_WISCV_TRS")
resp_var_plotting_long <- c("2-back task: percent accuracy",
"Picture vocabulary test",
"Flanker test",
"List sorting working memory",
"Dimentional change card sort test",
"Pattern comparison processing speed test",
"Picture sequence memory test",
"Oral reading recognition test",
"Little man task correct percentage",
"RAVLT long delay trial VII total correct",
"WISC_V matrix reasoning total raw score"
)
resp_var_plotting_short <- c("2-back Work Mem","Pic Vocab","Flanker","List Work Mem","Cog Flex","Pattern Speed","Seq Memory","Reading Recog","Little Man","Audi Verbal","Matrix Reason")
resp_var_plotting <- tibble("response" =Resp_Var, "longer_name"=resp_var_plotting_long,"short_name"=resp_var_plotting_short)
resp_names <-data_all_listwise %>% select(all_of(Resp_Var))%>%
names()%>%
set_names()
##data manipulation for plotting
coefs_simple_all <- simple_all %>% map(.,~mutate(.,label = str_replace_all(roi, '\\.', '-')) %>%
mutate(.,label = str_replace_all(label, 'roi_', '')) %>%
mutate(.,label = str_replace_all(label, 'Brain-Stem', 'brain-stem')) %>%
mutate(.,label = str_replace_all(label, 'Right-Cerebellum-Cortex
', 'right-cerebellum-cortex')) %>%
mutate(.,estimateFDR = ifelse(FDR <= .05, estimate, NA)) %>%
mutate(.,estimateBon = ifelse(bonferroni <= .05, estimate, NA)))
coefs_simple_ggsegDes_all <- vector("list", length = length(resp_names))
names(coefs_simple_ggsegDes_all)<- resp_names
for(i in 1:length(resp_names)){
#desterieux is our subcortical atlas
coefs_simple_ggsegDes_all[[resp_names[i]]] <- desterieux %>%
select(label) %>%
na.omit() %>%
left_join(select(coefs_simple_all[[resp_names[i]]], label, estimate, estimateFDR, estimateBon,performance), by = "label")
}
#original aseg has both axial and sagital views. Need to filter out sagital view
aseg <- aseg %>% filter(side=="axial")
coefs_simple_ggsegAseg_all <- vector("list", length = length(resp_names))
names(coefs_simple_ggsegAseg_all)<- resp_names
for(i in 1:length(resp_names)){
#aseg is our subcortical atlas
coefs_simple_ggsegAseg_all[[resp_names[i]]] <- aseg %>%
select(label) %>%
na.omit() %>%
left_join(select(coefs_simple_all[[resp_names[i]]], label, estimate, estimateFDR, estimateBon,performance), by = "label")
}
estimationLimit_all <-resp_names%>% map(.,~c(range(c(coefs_simple_ggsegDes_all[[.]]$estimate,coefs_simple_ggsegAseg_all[[.]]$estimate), na.rm = TRUE)))
performanceLimit_all <-resp_names%>% map(.,~c(range(c(coefs_simple_ggsegDes_all[[.]]$performance,coefs_simple_ggsegAseg_all[[.]]$performance), na.rm = TRUE)))
estimateUncorrectedCort_all <-resp_names %>% map(.,~ggseg(.data = coefs_simple_ggsegDes_all[[.]] ,
atlas = 'desterieux',
mapping = aes(fill = estimate),
# position = "stacked",
colour="black"
) +
theme_void() +
scale_fill_gradient2(limits = estimationLimit_all[[.]], midpoint = 0, low = "blue", mid = "white",
high = "red", space = "Lab", na.value="transparent" ) +
theme(legend.position = "none")+
labs(title =resp_var_plotting$longer_name[[which(resp_var_plotting$response==.)]]))
ggpubr::ggarrange(plotlist=estimateUncorrectedCort_all,nrow=6)
## $`1`
##
## $`2`
##
## attr(,"class")
## [1] "list" "ggarrange"
estimateUncorrectedSub_all <- resp_names %>% map(.,~ggseg(.data = coefs_simple_ggsegAseg_all[[.]] ,
atlas = 'aseg',
mapping = aes(fill = estimate),
view = "axial",
# position = "stacked",
colour="black"
) +
theme_void() +
scale_fill_gradient2(limits = estimationLimit_all[[.]], midpoint = 0, low = "blue", mid = "white",
high = "red", space = "Lab", na.value="transparent" ) +
theme(legend.position = "none")
#+ labs(title =resp_var_plotting$short_name[[which(resp_var_plotting$response==.)]])
)
ggpubr::ggarrange(plotlist=estimateUncorrectedSub_all,nrow=3,ncol=4)
estimateFDRCort_all <- resp_names %>% map(.,~ggseg(.data = coefs_simple_ggsegDes_all[[.]] ,
atlas = 'desterieux',
mapping = aes(fill = estimateFDR),
# position = "stacked",
colour="black"
) +
theme_void() +
scale_fill_gradient2(limits = estimationLimit_all[[.]], midpoint = 0, low = "blue", mid = "white",
high = "red", space = "Lab", na.value="transparent" ) +
theme(legend.position = "none") +
labs(title =paste0("Mass-Univariate FDR-Corrected Coefficients ")))
ggarrange(plotlist=estimateFDRCort_all,nrow=6)
## $`1`
##
## $`2`
##
## attr(,"class")
## [1] "list" "ggarrange"
estimateFDRSub_all <- resp_names %>% map(.,~ggseg(.data = coefs_simple_ggsegAseg_all[[.]] ,
atlas = 'aseg',
mapping = aes(fill = estimateFDR),
view = "axial",
# position = "stacked",
colour="black"
) +
theme_void() +
scale_fill_gradient2(limits = estimationLimit_all[[.]], midpoint = 0, low = "blue", mid = "white",
high = "red", space = "Lab", na.value="transparent" ) +
guides(fill = guide_colourbar(barwidth = 0.5, barheight = 3, title = NULL))
# +labs(title =resp_var_plotting$short_name[[which(resp_var_plotting$response==.)]])
)
ggpubr::ggarrange(plotlist=estimateFDRSub_all,nrow=3,ncol=4,common.legend = TRUE,legend = "right")
estimateBonCort_all <- resp_names %>% map(.,~ggseg(.data = coefs_simple_ggsegDes_all[[.]] ,
atlas = 'desterieux',
mapping = aes(fill = estimateBon),
# position = "stacked",
colour="black"
) +
theme_void() +
scale_fill_gradient2(limits = estimationLimit_all[[.]], midpoint = 0, low = "blue", mid = "white",
high = "red", space = "Lab", na.value="transparent" ) +
theme(legend.position = "none") +
ggtitle(paste0("Mass-Univariate Bonferroni-Corrected Coefficients ")))
ggarrange(plotlist=estimateBonCort_all,nrow=6)
## $`1`
##
## $`2`
##
## attr(,"class")
## [1] "list" "ggarrange"
estimateBonSub_all <- resp_names %>% map(.,~ggseg(.data = coefs_simple_ggsegAseg_all[[.]] ,
atlas = 'aseg',
mapping = aes(fill = estimateBon),
view = "axial",
# position = "stacked",
colour="black"
) +
theme_void() +
scale_fill_gradient2(limits = estimationLimit_all[[.]], midpoint = 0, low = "blue", mid = "white",
high = "red", space = "Lab", na.value="transparent" ) +
guides(fill = guide_colourbar(barwidth = 0.5, barheight = 3, title = NULL))
#+labs(title =resp_var_plotting$short_name[[which(resp_var_plotting$response==.)]])
)
ggpubr::ggarrange(plotlist=estimateBonSub_all,nrow=3,ncol=4,common.legend = TRUE,legend = "right")
predictionCort_all <-resp_names %>% map(.,~ggseg(.data = coefs_simple_ggsegDes_all[[.]] ,
atlas = 'desterieux',
mapping = aes(fill = performance),
# position = "stacked",
colour="black"
) +
theme_void() +
scale_fill_gradient2(limits = performanceLimit_all[[.]],
midpoint = 0, low = "blue", mid = "white",
high = "red", space = "Lab", na.value="transparent" ) +
theme(legend.position = "none") +
ggtitle(paste0("Mass-Univariate Out-Of-Sample Prediction (Pearson's r) ")))
ggarrange(plotlist=predictionCort_all,nrow=6)
## $`1`
##
## $`2`
##
## attr(,"class")
## [1] "list" "ggarrange"
predictionSub_all <-resp_names%>% map (.,~ggseg(.data = coefs_simple_ggsegAseg_all[[.]] ,
atlas = 'aseg',
mapping = aes(fill = performance),
view = "axial",
# position = "stacked",
colour="black"
) +
theme_void() +
scale_fill_gradient2(limits = performanceLimit_all[[.]], midpoint = 0, low = "blue", mid = "white",
high = "red", space = "Lab", na.value="transparent") +
guides(fill = guide_colourbar(barwidth = 0.5, barheight = 3, title = NULL))
# theme(legend.position = "none"))
)
ggpubr::ggarrange(plotlist=predictionSub_all,nrow=3,ncol=4,common.legend = TRUE,legend = "right")
tidy_fit_ols_all <-fit_ols_all %>% map(., ~broom::tidy(.)%>%
filter(.,term != '(Intercept)')%>%
mutate(.,label = str_replace_all(term, '\\.', '-')) %>%
mutate(.,label = str_replace_all(label, 'roi_', '')) %>%
mutate(.,label = str_replace_all(label, 'Brain-Stem', 'brain-stem')) %>%
mutate(.,label = str_replace_all(label, 'Right-Cerebellum-Cortex', 'right-cerebellum-cortex'))%>%
mutate(.,estimate_sig = ifelse(p.value <= .05, estimate, NA))
)
coefs_ols_ggsegDes_all <- vector("list", length = length(resp_names))
names(coefs_ols_ggsegDes_all)<- resp_names
for(i in 1:length(resp_names)){
#desterieux is our subcortical atlas
coefs_ols_ggsegDes_all[[resp_names[i]]] <- desterieux %>%
select(label) %>%
na.omit() %>%
left_join(select(tidy_fit_ols_all[[resp_names[i]]], label, estimate, estimate_sig), by = "label")
}
#original aseg has both axial and sagital views. Need to filter out sagital view
aseg <- aseg %>% filter(side=="axial")
coefs_ols_ggsegAseg_all <- vector("list", length = length(resp_names))
names(coefs_ols_ggsegAseg_all)<- resp_names
for(i in 1:length(resp_names)){
#aseg is our subcortical atlas
coefs_ols_ggsegAseg_all[[resp_names[i]]] <- aseg %>%
select(label) %>%
na.omit() %>%
left_join(select(tidy_fit_ols_all[[resp_names[i]]], label, estimate, estimate_sig), by = "label")
}
estimationLimit_ols_all <-resp_names%>% map(.,~c(range(c(coefs_ols_ggsegDes_all[[.]]$estimate,coefs_ols_ggsegAseg_all[[.]]$estimate), na.rm = TRUE)))
estimateCort_ols_all <-resp_names %>% map(.,~ggseg(.data = coefs_ols_ggsegDes_all[[.]] ,
atlas = 'desterieux',
mapping = aes(fill = estimate_sig),
# position = "stacked",
colour="black"
) +
theme_void() +
scale_fill_gradient2(limits = estimationLimit_ols_all[[.]], midpoint = 0, low = "blue", mid = "white",
high = "red", space = "Lab", na.value="transparent" ) +
theme(legend.position = "none")+
labs(title =paste0("OLS Coefficients ")) )
ggpubr::ggarrange(plotlist=estimateCort_ols_all,nrow=6)
## $`1`
##
## $`2`
##
## attr(,"class")
## [1] "list" "ggarrange"
estimateSub_ols_all <- resp_names %>% map(.,~ggseg(.data = coefs_ols_ggsegAseg_all[[.]] ,
atlas = 'aseg',
mapping = aes(fill = estimate_sig),
view = "axial",
# position = "stacked",
colour="black"
) +
theme_void() +
scale_fill_gradient2(limits = estimationLimit_ols_all[[.]], midpoint = 0, low = "blue", mid = "white",
high = "red", space = "Lab", na.value="transparent" ) +
guides(fill = guide_colourbar(barwidth = 0.5, barheight = 3, title = NULL))
#+labs(title =resp_var_plotting$short_name[[which(resp_var_plotting$response==.)]])
)
ggpubr::ggarrange(plotlist=estimateSub_ols_all,nrow=3,ncol=4,common.legend = TRUE,legend = "right")
tidy_fit_enet_all <- vector("list", length = length(resp_names))
names(tidy_fit_enet_all)<- resp_names
for(i in 1: length(resp_names)){
elastic_mod=fit_ridge_all[[resp_names[i]]]
alpha_index = paste0("a",glmn_tuning_all[[resp_names[i]]]$mixture)
variable <- elastic_mod$feature_coef_wmean[, alpha_index] %>% names()
wmean <- as.numeric(elastic_mod$feature_coef_wmean[, alpha_index])
pvalue <- as.numeric(elastic_mod$feature_coef_model_vs_null_pval[, alpha_index])
tib <- tibble::tibble(variable, wmean, pvalue)
tidy_fit_enet_all[[resp_names[i]]]<- tib}
tidy_fit_enet_all <-tidy_fit_enet_all %>% map(., ~mutate(.,label = str_replace_all(variable, '\\.', '-')) %>%
mutate(.,label = str_replace_all(label, 'roi_', '')) %>%
mutate(.,label = str_replace_all(label, 'Brain-Stem', 'brain-stem')) %>%
mutate(.,label = str_replace_all(label, 'Right-Cerebellum-Cortex', 'right-cerebellum-cortex'))%>%
mutate(.,estimate_sig = ifelse(pvalue <= .05, wmean, NA)))
coefs_enet_ggsegDes_all <- vector("list", length = length(resp_names))
names(coefs_enet_ggsegDes_all)<- resp_names
for(i in 1:length(resp_names)){
#desterieux is our subcortical atlas
coefs_enet_ggsegDes_all[[resp_names[i]]] <- desterieux %>%
select(label) %>%
na.omit() %>%
left_join(select(tidy_fit_enet_all[[resp_names[i]]], label, wmean, estimate_sig), by = "label")
}
#original aseg has both axial and sagital views. Need to filter out sagital view
aseg <- aseg %>% filter(side=="axial")
coefs_enet_ggsegAseg_all <- vector("list", length = length(resp_names))
names(coefs_enet_ggsegAseg_all)<- resp_names
for(i in 1:length(resp_names)){
#aseg is our subcortical atlas
coefs_enet_ggsegAseg_all[[resp_names[i]]] <- aseg %>%
select(label) %>%
na.omit() %>%
left_join(select(tidy_fit_enet_all[[resp_names[i]]], label, wmean, estimate_sig), by = "label")
}
estimationLimit_enet_all <-resp_names%>% map(.,~c(range(c(coefs_enet_ggsegDes_all[[.]]$wmean,coefs_enet_ggsegAseg_all[[.]]$wmean), na.rm = TRUE)))
estimateCort_enet_all <-resp_names %>% map(.,~ggseg(.data = coefs_enet_ggsegDes_all[[.]] ,
atlas = 'desterieux',
mapping = aes(fill = estimate_sig),
# position = "stacked",
colour="black"
) +
theme_void() +
scale_fill_gradient2(limits = estimationLimit_enet_all[[.]], midpoint = 0, low = "blue", mid = "white",
high = "red", space = "Lab", na.value="transparent" ) +
theme(legend.position = "none")+
labs(title =paste0("Elastic Net Coefficients ")) )
ggpubr::ggarrange(plotlist=estimateCort_enet_all,nrow=6)
## $`1`
##
## $`2`
##
## attr(,"class")
## [1] "list" "ggarrange"
estimateSub_enet_all <- resp_names %>% map(.,~ggseg(.data = coefs_enet_ggsegAseg_all[[.]] ,
atlas = 'aseg',
mapping = aes(fill = estimate_sig),
view = "axial",
# position = "stacked",
colour="black"
) +
theme_void() +
scale_fill_gradient2(limits = estimationLimit_enet_all[[.]], midpoint = 0, low = "blue", mid = "white",
high = "red", space = "Lab", na.value="transparent" ) +
guides(fill = guide_colourbar(barwidth = 0.5, barheight = 3, title = NULL))
#+labs(title =resp_var_plotting$short_name[[which(resp_var_plotting$response==.)]])
)
ggpubr::ggarrange(plotlist=estimateSub_enet_all,nrow=3,ncol=4,common.legend = TRUE,legend = "right")
## $TFMRI_NB_ALL_BEH_C2B_RATE
## TableGrob (6 x 2) "arrange": 11 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (4-4,1-1) arrange gtable[layout]
## 6 6 (4-4,2-2) arrange gtable[layout]
## 7 7 (5-5,1-1) arrange gtable[layout]
## 8 8 (5-5,2-2) arrange gtable[layout]
## 9 9 (6-6,1-1) arrange gtable[layout]
## 10 10 (6-6,2-2) arrange gtable[layout]
## 11 11 (1-1,1-2) arrange text[GRID.text.32362]
##
## $NIHTBX_PICVOCAB_UNCORRECTED
## TableGrob (6 x 2) "arrange": 11 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (4-4,1-1) arrange gtable[layout]
## 6 6 (4-4,2-2) arrange gtable[layout]
## 7 7 (5-5,1-1) arrange gtable[layout]
## 8 8 (5-5,2-2) arrange gtable[layout]
## 9 9 (6-6,1-1) arrange gtable[layout]
## 10 10 (6-6,2-2) arrange gtable[layout]
## 11 11 (1-1,1-2) arrange text[GRID.text.32578]
##
## $NIHTBX_FLANKER_UNCORRECTED
## TableGrob (6 x 2) "arrange": 11 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (4-4,1-1) arrange gtable[layout]
## 6 6 (4-4,2-2) arrange gtable[layout]
## 7 7 (5-5,1-1) arrange gtable[layout]
## 8 8 (5-5,2-2) arrange gtable[layout]
## 9 9 (6-6,1-1) arrange gtable[layout]
## 10 10 (6-6,2-2) arrange gtable[layout]
## 11 11 (1-1,1-2) arrange text[GRID.text.32794]
##
## $NIHTBX_LIST_UNCORRECTED
## TableGrob (6 x 2) "arrange": 11 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (4-4,1-1) arrange gtable[layout]
## 6 6 (4-4,2-2) arrange gtable[layout]
## 7 7 (5-5,1-1) arrange gtable[layout]
## 8 8 (5-5,2-2) arrange gtable[layout]
## 9 9 (6-6,1-1) arrange gtable[layout]
## 10 10 (6-6,2-2) arrange gtable[layout]
## 11 11 (1-1,1-2) arrange text[GRID.text.33010]
##
## $NIHTBX_CARDSORT_UNCORRECTED
## TableGrob (6 x 2) "arrange": 11 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (4-4,1-1) arrange gtable[layout]
## 6 6 (4-4,2-2) arrange gtable[layout]
## 7 7 (5-5,1-1) arrange gtable[layout]
## 8 8 (5-5,2-2) arrange gtable[layout]
## 9 9 (6-6,1-1) arrange gtable[layout]
## 10 10 (6-6,2-2) arrange gtable[layout]
## 11 11 (1-1,1-2) arrange text[GRID.text.33226]
##
## $NIHTBX_PATTERN_UNCORRECTED
## TableGrob (6 x 2) "arrange": 11 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (4-4,1-1) arrange gtable[layout]
## 6 6 (4-4,2-2) arrange gtable[layout]
## 7 7 (5-5,1-1) arrange gtable[layout]
## 8 8 (5-5,2-2) arrange gtable[layout]
## 9 9 (6-6,1-1) arrange gtable[layout]
## 10 10 (6-6,2-2) arrange gtable[layout]
## 11 11 (1-1,1-2) arrange text[GRID.text.33442]
##
## $NIHTBX_PICTURE_UNCORRECTED
## TableGrob (6 x 2) "arrange": 11 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (4-4,1-1) arrange gtable[layout]
## 6 6 (4-4,2-2) arrange gtable[layout]
## 7 7 (5-5,1-1) arrange gtable[layout]
## 8 8 (5-5,2-2) arrange gtable[layout]
## 9 9 (6-6,1-1) arrange gtable[layout]
## 10 10 (6-6,2-2) arrange gtable[layout]
## 11 11 (1-1,1-2) arrange text[GRID.text.33658]
##
## $NIHTBX_READING_UNCORRECTED
## TableGrob (6 x 2) "arrange": 11 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (4-4,1-1) arrange gtable[layout]
## 6 6 (4-4,2-2) arrange gtable[layout]
## 7 7 (5-5,1-1) arrange gtable[layout]
## 8 8 (5-5,2-2) arrange gtable[layout]
## 9 9 (6-6,1-1) arrange gtable[layout]
## 10 10 (6-6,2-2) arrange gtable[layout]
## 11 11 (1-1,1-2) arrange text[GRID.text.33874]
##
## $LMT_SCR_PERC_CORRECT
## TableGrob (6 x 2) "arrange": 11 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (4-4,1-1) arrange gtable[layout]
## 6 6 (4-4,2-2) arrange gtable[layout]
## 7 7 (5-5,1-1) arrange gtable[layout]
## 8 8 (5-5,2-2) arrange gtable[layout]
## 9 9 (6-6,1-1) arrange gtable[layout]
## 10 10 (6-6,2-2) arrange gtable[layout]
## 11 11 (1-1,1-2) arrange text[GRID.text.34090]
##
## $PEA_RAVLT_LD_TRIAL_VII_TC
## TableGrob (6 x 2) "arrange": 11 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (4-4,1-1) arrange gtable[layout]
## 6 6 (4-4,2-2) arrange gtable[layout]
## 7 7 (5-5,1-1) arrange gtable[layout]
## 8 8 (5-5,2-2) arrange gtable[layout]
## 9 9 (6-6,1-1) arrange gtable[layout]
## 10 10 (6-6,2-2) arrange gtable[layout]
## 11 11 (1-1,1-2) arrange text[GRID.text.34306]
##
## $PEA_WISCV_TRS
## TableGrob (6 x 2) "arrange": 11 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (4-4,1-1) arrange gtable[layout]
## 6 6 (4-4,2-2) arrange gtable[layout]
## 7 7 (5-5,1-1) arrange gtable[layout]
## 8 8 (5-5,2-2) arrange gtable[layout]
## 9 9 (6-6,1-1) arrange gtable[layout]
## 10 10 (6-6,2-2) arrange gtable[layout]
## 11 11 (1-1,1-2) arrange text[GRID.text.34522]
pander::pander(sessionInfo())
R version 4.0.3 (2020-10-10)
Platform: x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_NZ.UTF-8, LC_NUMERIC=C, LC_TIME=en_NZ.UTF-8, LC_COLLATE=en_NZ.UTF-8, LC_MONETARY=en_NZ.UTF-8, LC_MESSAGES=en_NZ.UTF-8, LC_PAPER=en_NZ.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_NZ.UTF-8 and LC_IDENTIFICATION=C
attached base packages: grid, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: gridExtra(v.2.3), png(v.0.1-7), ggpubr(v.0.4.0), RColorBrewer(v.1.1-2), ggsegDesterieux(v.1.0), ggsegExtra(v.1.5.2), ggseg(v.1.5.5), furrr(v.0.2.0), future(v.1.19.1), cowplot(v.1.1.0), yardstick(v.0.0.7), workflows(v.0.2.1), tune(v.0.1.1), rsample(v.0.0.8), recipes(v.0.1.14), parsnip(v.0.1.3), modeldata(v.0.0.2), infer(v.0.5.3), dials(v.0.0.9), scales(v.1.1.1), tidymodels(v.0.1.1), pander(v.0.6.3), caret(v.6.0-86), lattice(v.0.20-41), olsrr(v.0.5.3), glmnet(v.4.0-2), Matrix(v.1.2-18), eNetXplorer(v.1.1.2), broom(v.0.7.1), forcats(v.0.5.0), stringr(v.1.4.0), dplyr(v.1.0.2), purrr(v.0.3.4), readr(v.1.4.0), tidyr(v.1.1.2), tibble(v.3.0.4), ggplot2(v.3.3.2) and tidyverse(v.1.3.0)
loaded via a namespace (and not attached): readxl(v.1.3.1), backports(v.1.1.10), plyr(v.1.8.6), splines(v.4.0.3), listenv(v.0.8.0), digest(v.0.6.26), SuppDists(v.1.1-9.5), foreach(v.1.5.1), htmltools(v.0.5.0), fansi(v.0.4.1), magrittr(v.1.5), openxlsx(v.4.2.2), globals(v.0.13.1), modelr(v.0.1.8), gower(v.0.2.2), prettyunits(v.1.1.1), colorspace(v.1.4-1), blob(v.1.2.1), rvest(v.0.3.6), haven(v.2.3.1), xfun(v.0.18), crayon(v.1.3.4), jsonlite(v.1.7.1), survival(v.3.2-7), iterators(v.1.0.13), glue(v.1.4.2), gtable(v.0.3.0), ipred(v.0.9-9), car(v.3.0-10), shape(v.1.4.5), abind(v.1.4-5), mvtnorm(v.1.1-1), DBI(v.1.1.0), rstatix(v.0.6.0), Rcpp(v.1.0.5), progress(v.1.2.2), GPfit(v.1.0-8), foreign(v.0.8-79), stats4(v.4.0.3), lava(v.1.6.8), prodlim(v.2019.11.13), httr(v.1.4.2), gplots(v.3.1.0), calibrate(v.1.7.7), ellipsis(v.0.3.1), farver(v.2.0.3), pkgconfig(v.2.0.3), nnet(v.7.3-14), dbplyr(v.1.4.4), utf8(v.1.1.4), labeling(v.0.3), tidyselect(v.1.1.0), rlang(v.0.4.8), DiceDesign(v.1.8-1), reshape2(v.1.4.4), munsell(v.0.5.0), cellranger(v.1.1.0), tools(v.4.0.3), cli(v.2.1.0), generics(v.0.0.2), evaluate(v.0.14), yaml(v.2.2.1), goftest(v.1.2-2), bootstrap(v.2019.6), ModelMetrics(v.1.2.2.2), knitr(v.1.30), fs(v.1.5.0), timereg(v.1.9.8), zip(v.2.1.1), caTools(v.1.18.0), pec(v.2019.11.03), nlme(v.3.1-149), xml2(v.1.3.2), compiler(v.4.0.3), rstudioapi(v.0.11), curl(v.4.3), ggsignif(v.0.6.0), reprex(v.0.3.0), lhs(v.1.1.1), stringi(v.1.5.3), survcomp(v.1.38.0), survivalROC(v.1.0.3), vctrs(v.0.3.4), pillar(v.1.4.6), lifecycle(v.0.2.0), data.table(v.1.13.2), bitops(v.1.0-6), R6(v.2.4.1), KernSmooth(v.2.23-17), rio(v.0.5.16), timeROC(v.0.4), codetools(v.0.2-16), MASS(v.7.3-53), gtools(v.3.8.2), assertthat(v.0.2.1), withr(v.2.3.0), nortest(v.1.0-4), mgcv(v.1.8-33), expm(v.0.999-5), parallel(v.4.0.3), hms(v.0.5.3), numform(v.0.6.4), rpart(v.4.1-15), timeDate(v.3043.102), class(v.7.3-17), rmarkdown(v.2.4), carData(v.3.0-4), pROC(v.1.16.2), numDeriv(v.2016.8-1.1), lubridate(v.1.7.9) and rmeta(v.3.0)