1 Socio-Demographic & Psychological Data Preparation

1.1 Reset workspace and load libraries

This analysis uses ABCD Release 3

rm(list=ls())
gc()

library(tidyverse)
library(qgraph)
library(pander)
library(summarytools)
library(sjPlot)
library(sjmisc)
library(sjlabelled)
library(tidymodels)
library(knitr)

1.2 Setting up paths

1.3 Family relationship

ACS <-read_csv(paste0(dataFold,"ACSPSW03_DATA_TABLE.csv")) 
## Rows: 23113 Columns: 32
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (8): SUBJECTKEY, SRC_SUBJECT_ID, INTERVIEW_DATE, SEX, EVENTNAME, GENETI...
## dbl (16): ACSPSW03_ID, DATASET_ID, INTERVIEW_AGE, RACE_ETHNICITY, REL_FAMILY...
## lgl  (8): GENETIC_PAIRED_SUBJECTID_4, GENETIC_PI_HAT_2, GENETIC_PI_HAT_3, GE...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#knitr::kable(glimpse(ACS))

#race_ethnicity
#1 = White; 2 = Black; 3 = Hispanic; 4 = Asian; 5 = Other

# guardian-report relationship
# Relationship of the participant in his or her family
# 0 = single; 1 = sibling; 2 = twin; 3 = triplet
# ACS %>% count(REL_RELATIONSHIP)

ACSselected <- ACS %>% 
  select(SUBJECTKEY, EVENTNAME, SEX, INTERVIEW_AGE, RACE_ETHNICITY, 
                              REL_FAMILY_ID, ACS_RAKED_PROPENSITY_SCORE) %>%
  mutate(RACE_ETHNICITY = recode_factor(as.factor(RACE_ETHNICITY),
                `1` = "White", `2` = "Black", `3` = "Hispanic", `4` = "Asian", `5` = "Other",
                .default = "White")) %>%
  mutate(SEX = as.factor(SEX)) %>%
  mutate(REL_FAMILY_ID = as.factor(REL_FAMILY_ID))

ACSselected %>%
 filter(EVENTNAME =="baseline_year_1_arm_1") %>%
 skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 7
_______________________
Column type frequency:
character 2
factor 3
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11878 0
EVENTNAME 0 1 21 21 0 1 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
SEX 0 1 FALSE 2 M: 6196, F: 5682
RACE_ETHNICITY 2 1 FALSE 5 Whi: 6182, His: 2411, Bla: 1784, Oth: 1247
REL_FAMILY_ID 0 1 FALSE 9856 373: 5, 749: 4, 11: 3, 400: 3

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
INTERVIEW_AGE 0 1 118.98 7.50 107.00 112.00 119.00 126.00 133.00 ▇▆▆▆▆
ACS_RAKED_PROPENSITY_SCORE 0 1 691.33 350.96 161.36 449.42 619.31 821.72 1778.92 ▅▇▂▂▁

1.4 site information

###loading site and scanner information
MRIinfo <-tibble::as_tibble(read.csv(paste0(dataFold, "ABCD_MRI01_DATA_TABLE.csv"))) 
Siteinfo <-tibble::as_tibble(read.csv(paste0(dataFold, "ABCD_LT01_DATA_TABLE.csv")))
MRIinfo  %>% count(EVENTNAME,SEX)
## # A tibble: 4 × 3
##   EVENTNAME                SEX       n
##   <chr>                    <chr> <int>
## 1 2_year_follow_up_y_arm_1 F      2617
## 2 2_year_follow_up_y_arm_1 M      3076
## 3 baseline_year_1_arm_1    F      5631
## 4 baseline_year_1_arm_1    M      6161
MriandSite <- left_join(MRIinfo,Siteinfo, by=c('SUBJECTKEY','EVENTNAME')) %>% 
  select(SUBJECTKEY,EVENTNAME,SITE_ID_L, MRI_INFO_DEVICESERIALNUMBER )
#glimpse(MriandSite)

# probably drop site22 with 34 subjects
MriandSite %>%
 filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
 count(SITE_ID_L)
## # A tibble: 22 × 2
##    SITE_ID_L     n
##    <chr>     <int>
##  1 site01      402
##  2 site02      554
##  3 site03      631
##  4 site04      747
##  5 site05      376
##  6 site06      584
##  7 site07      339
##  8 site08      350
##  9 site09      431
## 10 site10      735
## # … with 12 more rows
MriandSite %>%
 filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
 skimr::skim()
Data summary
Name Piped data
Number of rows 11792
Number of columns 4
_______________________
Column type frequency:
character 4
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11787 0
EVENTNAME 0 1 21 21 0 1 0
SITE_ID_L 0 1 6 6 0 22 0
MRI_INFO_DEVICESERIALNUMBER 0 1 0 12 201 30 0

1.5 Cognition

1.5.1 Load neuro cognitive measures

NIH_TB <-as_tibble(read.csv(paste0(dataFold,"ABCD_TBSS01_DATA_TABLE.csv"))) 
CashChoice <-as_tibble(read.csv(paste0(dataFold,"CCT01_DATA_TABLE.csv"))) 
LittleMan <-as_tibble(read.csv(paste0(dataFold,"LMTP201_DATA_TABLE.csv"))) 
Pearson <-as_tibble(read.csv(paste0(dataFold,"ABCD_PS01_DATA_TABLE.csv"))) 
#ABCD Youth Delay Discounting Scores    only used in the follow up data
DelayDis <-as_tibble(read.csv(paste0(dataFold,"ABCD_YDDSS01_DATA_TABLE.csv")))
#ABCD Emotional Stroop Task only used in the follow up data
EmoStroop <-as_tibble(read.csv(paste0(dataFold,"ABCD_YEST01_DATA_TABLE.csv"))) 
#ABCD Game of Dice Task abcd_gdss01 only used in the follow up data
GameOfDice <-as_tibble(read.csv(paste0(dataFold,"ABCD_GDSS01_DATA_TABLE.csv"))) 
#ABCD Social Influence Task abcd_siss01 only used in the follow up data
SocialInfluence <-as_tibble(read.csv(paste0(dataFold,"ABCD_SISS01_DATA_TABLE.csv"))) 

vision_idx <- as_tibble(read.csv(paste0(dataFold,"ABCD_SVS01_DATA_TABLE.CSV"))) %>% 
  mutate(visionProb = ifelse(SNELLEN_VA_Y == 0 | SNELLEN_VA_Y == 1 | VIS_FLG == 2, 1, 0))

#vision_idx %>% select(SNELLEN_VA_Y, VIS_FLG, visionProb) %>%  arrange(SNELLEN_VA_Y)

1.5.2 summary of NIH toolbox cognition

NIH_TB %>% 
  filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
  select(ends_with("_UNCORRECTED")) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 10
_______________________
Column type frequency:
numeric 10
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
NIHTBX_PICVOCAB_UNCORRECTED 149 0.99 84.46 8.12 29 79 84 90 119 ▁▁▇▇▁
NIHTBX_FLANKER_UNCORRECTED 155 0.99 94.00 9.14 51 89 95 100 116 ▁▁▃▇▂
NIHTBX_LIST_UNCORRECTED 198 0.98 96.65 12.09 36 90 97 105 136 ▁▁▅▇▁
NIHTBX_CARDSORT_UNCORRECTED 154 0.99 92.52 9.51 50 88 93 99 120 ▁▁▆▇▁
NIHTBX_PATTERN_UNCORRECTED 173 0.99 88.06 14.58 30 80 88 99 140 ▁▃▇▅▁
NIHTBX_PICTURE_UNCORRECTED 161 0.99 102.81 12.07 76 94 102 111 136 ▃▇▇▅▁
NIHTBX_READING_UNCORRECTED 163 0.99 90.86 6.91 59 87 91 95 119 ▁▁▇▂▁
NIHTBX_FLUIDCOMP_UNCORRECTED 237 0.98 91.55 10.66 44 85 92 99 131 ▁▂▇▅▁
NIHTBX_CRYST_UNCORRECTED 181 0.98 86.36 7.07 51 82 86 91 115 ▁▁▇▃▁
NIHTBX_TOTALCOMP_UNCORRECTED 241 0.98 86.22 9.14 44 81 87 92 117 ▁▂▇▇▁

1.5.3 distribution of other cognitive tasks

ABCD Cash Choice Task - Impulsivity, delayed gratification; this single-item task asked the child “Let’s pretend a kind person wanted to give you some money. Would you rather have $75 in three days or $115 in 3 months?”. The child indicates one of these two options or a third “can’t decide” option. See Wulfert, E., Block, J.A., Santa Ana, E., Rodriguez, M.L., & Colsman, M., 2002; Anokhin, A.P., Golosheykin, S., Grant, J.D. & Heath, A.C., 2011. Administered in Baseline assessment only.

ABCD Little Man Task - Visuospatial processing flexibility, attention; participants view pictures of a figure (little man) presented in different orientations and holding a suitcase and must use mental rotation skills to assess which hand (left or right) is holding the suitcase. Accuracy and latency scores are provided for each trial. For details, see Acker, W., & Acker, W., 1982; Nixon, S. J., Prather, R. A., & Lewis, B., 2014.

ABCD Pearson Scores - Rey Auditory Verbal Learning Test – Verbal learning and memory; the task is administered according to standard instructions using a 15-item word list; there are five learning trials (Trials I-V), a distractor trial (List B), measures of immediate recall (Trial VI) and 30-minute delayed recall (Trial VII); for all trials, the total correct is recorded together with the number of perseverations and intrusions. Details can be found in Strauss, E., Sherman, E.M.S., & Spreen, O., 2006; Lezak, M.D., Howieson, D.B., Bigler, E.D., & Tranel, D., 2012. Matrix Reasoning Task – Measures fluid intelligence, visuospatial reasoning; the task is from the Wechsler Intelligence Scale for Children-V and administered using Pearson Clinical Assessment-s Q-interactive platform. Total raw scores, scaled scores (ranging from 0-19; mean = 10, SD = 3) and scores for each item are available. See Wechsler, D., 2014; Daniel, M.H., Wahlstrom, D. & Zhang, O., 2014. Administered in Baseline assessment only.

CashChoice %>% 
  filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
  select(CASH_CHOICE_TASK) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 1
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
CASH_CHOICE_TASK 20 1 1.62 0.52 1 1 2 2 3 ▆▁▇▁▁
LittleMan %>% 
  filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
  select(-1:-7) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 52
_______________________
Column type frequency:
character 2
logical 2
numeric 48
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
EVENTNAME 0 1 21 21 0 1 0
STUDY_COHORT_NAME 0 1 21 21 0 1 0

Variable type: logical

skim_variable n_missing complete_rate mean count
LMT_RUN 11878 0 NaN :
LMT_SCR_EFFICIENCY 11878 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
LMT_SCR_PERC_CORRECT 337 0.97 0.59 0.17 0.00 0.47 0.56 0.72 1.00 ▁▂▇▅▂
LMT_SCR_PERC_WRONG 337 0.97 0.31 0.17 0.00 0.16 0.31 0.44 0.84 ▅▆▇▂▁
LMT_SCR_NUM_CORRECT 337 0.97 18.84 5.45 0.00 15.00 18.00 23.00 32.00 ▁▂▇▅▂
LMT_SCR_NUM_WRONG 337 0.97 9.81 5.30 0.00 5.00 10.00 14.00 27.00 ▅▆▇▂▁
LMT_SCR_NUM_TIMED_OUT 337 0.97 3.34 2.72 0.00 1.00 3.00 5.00 32.00 ▇▁▁▁▁
LMT_SCR_AVG_RT 342 0.97 2658.38 474.80 1063.88 2355.68 2700.25 2989.48 4450.19 ▁▃▇▂▁
LMT_SCR_RT_CORRECT 343 0.97 2661.99 470.65 1091.88 2357.26 2697.14 2982.15 4695.50 ▁▅▇▂▁
LMT_SCR_RT_WRONG 653 0.95 2688.16 625.58 824.00 2233.53 2681.00 3100.91 4990.00 ▁▆▇▂▁
LMT_SCR_CORRECT_RT_STIMTYPE1 1270 0.89 2727.44 739.10 789.00 2200.00 2671.00 3192.00 17963.00 ▇▁▁▁▁
LMT_SCR_CORRECT_RT_STIMTYPE2 1235 0.90 2796.90 768.13 550.00 2243.50 2765.00 3309.00 5001.00 ▁▅▇▅▁
LMT_SCR_CORRECT_RT_STIMTYPE3 3050 0.74 2874.75 661.98 526.00 2444.00 2845.00 3287.00 5006.00 ▁▂▇▃▁
LMT_SCR_CORRECT_RT_STIMTYPE4 2900 0.76 2722.26 679.02 591.00 2248.00 2674.00 3147.00 6100.00 ▁▇▆▁▁
LMT_SCR_CORRECT_RT_STIMTYPE5 3674 0.69 3050.74 760.48 347.00 2534.00 3052.50 3572.00 4999.00 ▁▂▇▇▂
LMT_SCR_CORRECT_RT_STIMTYPE6 3590 0.70 2957.41 779.92 313.00 2429.00 2951.00 3480.25 5006.00 ▁▃▇▆▂
LMT_SCR_CORRECT_RT_STIMTYPE7 717 0.94 2353.55 581.18 798.00 1944.00 2270.00 2672.00 4982.00 ▁▇▅▁▁
LMT_SCR_CORRECT_RT_STIMTYPE8 733 0.94 2576.56 633.26 649.00 2138.00 2513.00 2962.00 5000.00 ▁▇▇▂▁
LMT_SCR_CORRECT_NUM_STIMTYPE1 338 0.97 2.46 1.27 0.00 1.00 3.00 4.00 4.00 ▂▅▆▇▇
LMT_SCR_CORRECT_NUM_STIMTYPE2 338 0.97 2.46 1.27 0.00 1.00 3.00 4.00 4.00 ▂▅▆▇▇
LMT_SCR_CORRECT_NUM_STIMTYPE3 338 0.97 2.17 1.51 0.00 1.00 2.00 4.00 4.00 ▇▃▅▇▇
LMT_SCR_CORRECT_NUM_STIMTYPE4 338 0.97 2.24 1.51 0.00 1.00 3.00 4.00 4.00 ▆▃▅▇▇
LMT_SCR_CORRECT_NUM_STIMTYPE5 338 0.97 1.59 1.33 0.00 0.00 2.00 3.00 4.00 ▇▆▆▅▃
LMT_SCR_CORRECT_NUM_STIMTYPE6 338 0.97 1.56 1.30 0.00 0.00 1.00 3.00 4.00 ▇▆▆▅▂
LMT_SCR_CORRECT_NUM_STIMTYPE7 338 0.97 3.23 1.09 0.00 3.00 4.00 4.00 4.00 ▁▁▂▃▇
LMT_SCR_CORRECT_NUM_STIMTYPE8 338 0.97 3.13 1.11 0.00 3.00 4.00 4.00 4.00 ▁▁▂▃▇
LMT_SCR_WRONG_RT_STIMTYPE1 5529 0.53 2766.92 843.40 43.00 2149.00 2678.00 3311.00 5003.00 ▁▃▇▅▂
LMT_SCR_WRONG_RT_STIMTYPE2 5260 0.56 2974.29 824.25 108.00 2384.25 2924.00 3540.00 5006.00 ▁▂▇▆▂
LMT_SCR_WRONG_RT_STIMTYPE3 4761 0.60 2445.67 799.05 510.00 1851.00 2317.00 2892.00 5000.00 ▁▇▆▂▁
LMT_SCR_WRONG_RT_STIMTYPE4 4799 0.60 2431.67 789.55 314.00 1844.00 2301.00 2896.50 5002.00 ▁▇▇▃▁
LMT_SCR_WRONG_RT_STIMTYPE5 2787 0.77 2699.10 843.32 523.00 2048.00 2616.00 3272.50 5003.00 ▁▇▇▅▂
LMT_SCR_WRONG_RT_STIMTYPE6 2671 0.78 2626.09 794.87 681.00 2019.00 2524.00 3132.00 5003.00 ▁▇▇▃▁
LMT_SCR_WRONG_RT_STIMTYPE7 8283 0.30 2548.21 830.29 82.00 1966.00 2470.00 3056.50 5003.00 ▁▅▇▃▁
LMT_SCR_WRONG_RT_STIMTYPE8 7965 0.33 2727.80 865.44 175.00 2107.00 2675.00 3306.00 5005.00 ▁▅▇▅▂
LMT_SCR_WRONG_NUM_STIMTYPE1 338 0.97 0.97 1.08 0.00 0.00 1.00 2.00 4.00 ▇▅▃▂▁
LMT_SCR_WRONG_NUM_STIMTYPE2 338 0.97 1.05 1.13 0.00 0.00 1.00 2.00 4.00 ▇▅▃▂▁
LMT_SCR_WRONG_NUM_STIMTYPE3 338 0.97 1.46 1.50 0.00 0.00 1.00 3.00 4.00 ▇▅▂▂▃
LMT_SCR_WRONG_NUM_STIMTYPE4 338 0.97 1.45 1.50 0.00 0.00 1.00 3.00 4.00 ▇▅▂▂▃
LMT_SCR_WRONG_NUM_STIMTYPE5 338 0.97 1.89 1.40 0.00 1.00 2.00 3.00 4.00 ▇▇▇▆▆
LMT_SCR_WRONG_NUM_STIMTYPE6 338 0.97 1.89 1.37 0.00 1.00 2.00 3.00 4.00 ▇▇▇▆▆
LMT_SCR_WRONG_NUM_STIMTYPE7 338 0.97 0.54 0.95 0.00 0.00 0.00 1.00 4.00 ▇▂▁▁▁
LMT_SCR_WRONG_NUM_STIMTYPE8 338 0.97 0.56 0.93 0.00 0.00 0.00 1.00 4.00 ▇▂▁▁▁
LMT_SCR_TOUT_NUM_STIMTYPE1 338 0.97 0.56 0.73 0.00 0.00 0.00 1.00 4.00 ▇▅▁▁▁
LMT_SCR_TOUT_NUM_STIMTYPE2 338 0.97 0.49 0.72 0.00 0.00 0.00 1.00 4.00 ▇▃▁▁▁
LMT_SCR_TOUT_NUM_STIMTYPE3 338 0.97 0.37 0.63 0.00 0.00 0.00 1.00 4.00 ▇▃▁▁▁
LMT_SCR_TOUT_NUM_STIMTYPE4 338 0.97 0.31 0.58 0.00 0.00 0.00 1.00 4.00 ▇▂▁▁▁
LMT_SCR_TOUT_NUM_STIMTYPE5 338 0.97 0.52 0.74 0.00 0.00 0.00 1.00 4.00 ▇▃▁▁▁
LMT_SCR_TOUT_NUM_STIMTYPE6 338 0.97 0.54 0.73 0.00 0.00 0.00 1.00 4.00 ▇▅▁▁▁
LMT_SCR_TOUT_NUM_STIMTYPE7 338 0.97 0.23 0.50 0.00 0.00 0.00 0.00 4.00 ▇▂▁▁▁
LMT_SCR_TOUT_NUM_STIMTYPE8 338 0.97 0.31 0.58 0.00 0.00 0.00 1.00 4.00 ▇▂▁▁▁
Pearson %>% 
  filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
  select(-1:-11) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 61
_______________________
Column type frequency:
character 1
numeric 60
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
STUDY_COHORT_NAME 0 1 21 21 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PEA_RAVLT_SD_TRIAL_I_TC 234 0.98 5.05 1.79 0 4 5 6 14 ▁▇▅▁▁
PEA_RAVLT_SD_TRIAL_I_TR 143 0.99 0.36 0.91 0 0 0 0 15 ▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_I_TI 143 0.99 0.57 1.02 0 0 0 1 22 ▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_II_TC 199 0.98 7.85 2.27 0 6 8 9 15 ▁▅▇▃▁
PEA_RAVLT_SD_TRIAL_II_TR 145 0.99 0.69 1.32 0 0 0 1 31 ▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_II_TI 145 0.99 0.31 0.77 0 0 0 0 28 ▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_III_TC 193 0.98 9.50 2.65 0 8 10 11 15 ▁▂▇▇▂
PEA_RAVLT_SD_TRIAL_III_TR 145 0.99 1.16 1.90 0 0 1 2 32 ▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_III_TI 145 0.99 0.26 0.68 0 0 0 0 14 ▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_IV_TC 192 0.98 10.54 2.68 0 9 11 13 15 ▁▁▅▇▅
PEA_RAVLT_SD_TRIAL_IV_TR 147 0.99 1.41 2.26 0 0 1 2 41 ▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_IV_TI 147 0.99 0.24 0.63 0 0 0 0 9 ▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_V_TC 188 0.98 11.20 2.62 0 10 12 13 15 ▁▁▃▇▇
PEA_RAVLT_SD_TRIAL_V_TR 147 0.99 1.55 2.63 0 0 1 2 96 ▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_V_TI 147 0.99 0.23 0.66 0 0 0 0 21 ▇▁▁▁▁
PEA_RAVLT_SD_LISTB_TC 210 0.98 4.85 1.69 0 4 5 6 13 ▁▇▃▁▁
PEA_RAVLT_SD_LISTB_TR 147 0.99 0.24 0.68 0 0 0 0 13 ▇▁▁▁▁
PEA_RAVLT_SD_LISTB_TI 147 0.99 0.46 1.02 0 0 0 1 16 ▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_VI_TC 210 0.98 9.66 3.05 0 8 10 12 15 ▁▂▆▇▃
PEA_RAVLT_SD_TRIAL_VI_TR 147 0.99 1.01 1.89 0 0 0 1 35 ▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_VI_TI 147 0.99 0.40 0.92 0 0 0 1 27 ▇▁▁▁▁
PEA_RAVLT_LD_TRIAL_VII_TC 264 0.98 9.18 3.20 0 7 9 12 15 ▁▃▇▇▃
PEA_RAVLT_LD_TRIAL_VII_TR 171 0.99 0.83 1.79 0 0 0 1 45 ▇▁▁▁▁
PEA_RAVLT_LD_TRIAL_VII_TI 171 0.99 0.49 0.97 0 0 0 1 17 ▇▁▁▁▁
PEA_WISCV_TRS 243 0.98 17.91 3.84 0 15 18 20 32 ▁▁▇▅▁
PEA_WISCV_TSS 249 0.98 9.86 2.99 1 8 10 12 19 ▁▆▇▅▁
PEA_WISCV_ITEM_A_RS 219 0.98 0.98 0.12 0 1 1 1 1 ▁▁▁▁▇
PEA_WISCV_ITEM_B_RS 209 0.98 0.98 0.14 0 1 1 1 1 ▁▁▁▁▇
PEA_WISCV_ITEM_1_RS 11625 0.02 0.92 0.26 0 1 1 1 1 ▁▁▁▁▇
PEA_WISCV_ITEM_2_RS 11585 0.02 0.95 0.21 0 1 1 1 1 ▁▁▁▁▇
PEA_WISCV_ITEM_3_RS 11380 0.04 0.90 0.30 0 1 1 1 1 ▁▁▁▁▇
PEA_WISCV_ITEM_4_RS 10670 0.10 0.94 0.24 0 1 1 1 1 ▁▁▁▁▇
PEA_WISCV_ITEM_5_RS 203 0.98 0.98 0.15 0 1 1 1 1 ▁▁▁▁▇
PEA_WISCV_ITEM_6_RS 219 0.98 0.93 0.25 0 1 1 1 1 ▁▁▁▁▇
PEA_WISCV_ITEM_7_RS 221 0.98 0.96 0.19 0 1 1 1 1 ▁▁▁▁▇
PEA_WISCV_ITEM_8_RS 226 0.98 0.97 0.16 0 1 1 1 1 ▁▁▁▁▇
PEA_WISCV_ITEM_9_RS 234 0.98 0.95 0.22 0 1 1 1 1 ▁▁▁▁▇
PEA_WISCV_ITEM_10_RS 250 0.98 0.97 0.17 0 1 1 1 1 ▁▁▁▁▇
PEA_WISCV_ITEM_11_RS 267 0.98 0.94 0.23 0 1 1 1 1 ▁▁▁▁▇
PEA_WISCV_ITEM_12_RS 306 0.97 0.93 0.25 0 1 1 1 1 ▁▁▁▁▇
PEA_WISCV_ITEM_13_RS 323 0.97 0.92 0.28 0 1 1 1 1 ▁▁▁▁▇
PEA_WISCV_ITEM_14_RS 367 0.97 0.80 0.40 0 1 1 1 1 ▂▁▁▁▇
PEA_WISCV_ITEM_15_RS 470 0.96 0.74 0.44 0 0 1 1 1 ▃▁▁▁▇
PEA_WISCV_ITEM_16_RS 758 0.94 0.70 0.46 0 0 1 1 1 ▃▁▁▁▇
PEA_WISCV_ITEM_17_RS 1186 0.90 0.58 0.49 0 0 1 1 1 ▆▁▁▁▇
PEA_WISCV_ITEM_18_RS 1626 0.86 0.56 0.50 0 0 1 1 1 ▆▁▁▁▇
PEA_WISCV_ITEM_19_RS 2147 0.82 0.56 0.50 0 0 1 1 1 ▆▁▁▁▇
PEA_WISCV_ITEM_20_RS 2833 0.76 0.53 0.50 0 0 1 1 1 ▇▁▁▁▇
PEA_WISCV_ITEM_21_RS 3462 0.71 0.47 0.50 0 0 0 1 1 ▇▁▁▁▇
PEA_WISCV_ITEM_22_RS 4189 0.65 0.32 0.46 0 0 0 1 1 ▇▁▁▁▃
PEA_WISCV_ITEM_23_RS 5086 0.57 0.43 0.50 0 0 0 1 1 ▇▁▁▁▆
PEA_WISCV_ITEM_24_RS 6025 0.49 0.31 0.46 0 0 0 1 1 ▇▁▁▁▃
PEA_WISCV_ITEM_25_RS 7052 0.41 0.22 0.41 0 0 0 0 1 ▇▁▁▁▂
PEA_WISCV_ITEM_26_RS 7813 0.34 0.30 0.46 0 0 0 1 1 ▇▁▁▁▃
PEA_WISCV_ITEM_27_RS 8983 0.24 0.31 0.46 0 0 0 1 1 ▇▁▁▁▃
PEA_WISCV_ITEM_28_RS 9694 0.18 0.22 0.41 0 0 0 0 1 ▇▁▁▁▂
PEA_WISCV_ITEM_29_RS 10058 0.15 0.28 0.45 0 0 0 1 1 ▇▁▁▁▃
PEA_WISCV_ITEM_30_RS 10532 0.11 0.10 0.30 0 0 0 0 1 ▇▁▁▁▁
PEA_WISCV_ITEM_31_RS 10969 0.08 0.15 0.36 0 0 0 0 1 ▇▁▁▁▂
PEA_WISCV_ITEM_32_RS 11206 0.06 0.06 0.24 0 0 0 0 1 ▇▁▁▁▁

1.5.4 sum cognition

sumCog <- plyr::join_all(list(NIH_TB, CashChoice, LittleMan, Pearson, vision_idx), 
               by=c('SUBJECTKEY','EVENTNAME'), type='full') %>%
  select(SUBJECTKEY,EVENTNAME,
         NIHTBX_FLANKER_UNCORRECTED, NIHTBX_CARDSORT_UNCORRECTED, NIHTBX_PATTERN_UNCORRECTED, 
         NIHTBX_PICVOCAB_UNCORRECTED, NIHTBX_READING_UNCORRECTED, NIHTBX_PICTURE_UNCORRECTED,
         PEA_RAVLT_LD_TRIAL_VII_TC, NIHTBX_LIST_UNCORRECTED, LMT_SCR_PERC_CORRECT, PEA_WISCV_TRS,
         NIHTBX_FLUIDCOMP_UNCORRECTED, NIHTBX_CRYST_UNCORRECTED, NIHTBX_TOTALCOMP_UNCORRECTED, visionProb)
  
sumCog %>%
  filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
  select(-1:-2) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 14
_______________________
Column type frequency:
numeric 14
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
NIHTBX_FLANKER_UNCORRECTED 155 0.99 94.00 9.14 51 89.00 95.00 100.00 116 ▁▁▃▇▂
NIHTBX_CARDSORT_UNCORRECTED 154 0.99 92.52 9.51 50 88.00 93.00 99.00 120 ▁▁▆▇▁
NIHTBX_PATTERN_UNCORRECTED 173 0.99 88.06 14.58 30 80.00 88.00 99.00 140 ▁▃▇▅▁
NIHTBX_PICVOCAB_UNCORRECTED 149 0.99 84.46 8.12 29 79.00 84.00 90.00 119 ▁▁▇▇▁
NIHTBX_READING_UNCORRECTED 163 0.99 90.86 6.91 59 87.00 91.00 95.00 119 ▁▁▇▂▁
NIHTBX_PICTURE_UNCORRECTED 161 0.99 102.81 12.07 76 94.00 102.00 111.00 136 ▃▇▇▅▁
PEA_RAVLT_LD_TRIAL_VII_TC 264 0.98 9.18 3.20 0 7.00 9.00 12.00 15 ▁▃▇▇▃
NIHTBX_LIST_UNCORRECTED 198 0.98 96.65 12.09 36 90.00 97.00 105.00 136 ▁▁▅▇▁
LMT_SCR_PERC_CORRECT 337 0.97 0.59 0.17 0 0.47 0.56 0.72 1 ▁▂▇▅▂
PEA_WISCV_TRS 243 0.98 17.91 3.84 0 15.00 18.00 20.00 32 ▁▁▇▅▁
NIHTBX_FLUIDCOMP_UNCORRECTED 237 0.98 91.55 10.66 44 85.00 92.00 99.00 131 ▁▂▇▅▁
NIHTBX_CRYST_UNCORRECTED 181 0.98 86.36 7.07 51 82.00 86.00 91.00 115 ▁▁▇▃▁
NIHTBX_TOTALCOMP_UNCORRECTED 241 0.98 86.22 9.14 44 81.00 87.00 92.00 117 ▁▂▇▇▁
visionProb 11085 0.07 0.04 0.19 0 0.00 0.00 0.00 1 ▇▁▁▁▁
sumCog %>%
  filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>% 
  select(-1:-2) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 6571
Number of columns 14
_______________________
Column type frequency:
numeric 14
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
NIHTBX_FLANKER_UNCORRECTED 166 0.97 100.26 7.49 54.00 96.00 101.00 105.00 117 ▁▁▂▇▅
NIHTBX_CARDSORT_UNCORRECTED 6550 0.00 94.71 8.53 76.00 90.00 93.00 100.00 109 ▁▃▇▅▃
NIHTBX_PATTERN_UNCORRECTED 200 0.97 103.53 15.12 35.00 94.00 103.00 113.00 153 ▁▂▇▇▁
NIHTBX_PICVOCAB_UNCORRECTED 280 0.96 89.18 8.33 60.00 84.00 89.00 95.00 123 ▁▅▇▂▁
NIHTBX_READING_UNCORRECTED 293 0.96 94.87 6.59 67.00 91.00 95.00 98.00 180 ▂▇▁▁▁
NIHTBX_PICTURE_UNCORRECTED 183 0.97 109.48 11.98 76.00 101.00 109.00 118.00 133 ▁▃▇▇▃
PEA_RAVLT_LD_TRIAL_VII_TC 213 0.97 9.28 2.86 0.00 7.00 9.00 11.00 15 ▁▃▇▇▃
NIHTBX_LIST_UNCORRECTED 6544 0.00 96.00 17.87 36.00 90.00 97.00 105.00 128 ▁▁▃▇▂
LMT_SCR_PERC_CORRECT 77 0.99 0.74 0.18 0.03 0.59 0.75 0.91 1 ▁▁▅▅▇
PEA_WISCV_TRS 6571 0.00 NaN NA NA NA NA NA NA
NIHTBX_FLUIDCOMP_UNCORRECTED 6547 0.00 94.58 13.66 75.00 84.75 91.50 99.00 127 ▇▇▃▃▂
NIHTBX_CRYST_UNCORRECTED 192 0.97 91.03 7.09 66.00 86.00 91.00 95.00 125 ▁▆▇▁▁
NIHTBX_TOTALCOMP_UNCORRECTED 6547 0.00 89.08 10.28 77.00 82.00 86.50 91.50 114 ▇▅▂▂▂
visionProb 6176 0.06 0.06 0.23 0.00 0.00 0.00 0.00 1 ▇▁▁▁▁

1.6 mental health

1.6.1 Parent’s report child’s mental health CBCL

CBCL definitions from Michelini et al. Translational Psychiatry (2019)

CBCL <-read_csv(paste0(dataFold,"ABCD_CBCL01_DATA_TABLE.csv")) 
LowFreqComp <- read_csv(paste0(utilFold,"CBCLLowFreqCompositMichelini.csv")) %>% 
  mutate(CBCL_COL = as.factor(CBCL_COL))

LowFreq <- levels(droplevels(subset(LowFreqComp,LowFreq == 1, select = CBCL_COL)$CBCL_COL[]))
Com1_Attack <- levels(droplevels(subset(LowFreqComp,Com1_Attack== 1, select = CBCL_COL)$CBCL_COL[]))
Com2_Destroy <- levels(droplevels(subset(LowFreqComp,Com2_Destroy== 1, select = CBCL_COL)$CBCL_COL[]))
Com3_Disobeys <- levels(droplevels(subset(LowFreqComp,Com3_Disobeys== 1, select = CBCL_COL)$CBCL_COL[]))
Com4_Steals <- levels(droplevels(subset(LowFreqComp,Com4_Steals== 1, select = CBCL_COL)$CBCL_COL[]))
Com5_Peer <- levels(droplevels(subset(LowFreqComp,Com5_Peer== 1, select = CBCL_COL)$CBCL_COL[]))
Com6_Distracted <- levels(droplevels(subset(LowFreqComp,Com6_Distracted== 1, select = CBCL_COL)$CBCL_COL[]))
Com7_Hallucinations <- levels(droplevels(subset(LowFreqComp,Com7_Hallucinations== 1, select = CBCL_COL)$CBCL_COL[]))
Com8_SexPlay <- levels(droplevels(subset(LowFreqComp,Com8_SexPlay== 1, select = CBCL_COL)$CBCL_COL[]))
Com9_Weight <- levels(droplevels(subset(LowFreqComp,Com9_Weight== 1, select = CBCL_COL)$CBCL_COL[]))

# add .01 so that rounding .5 becomes 1 as opposed to 0
CBCLLowFreqDroppedCompAdded <- 
  CBCL %>% 
  select(-one_of(LowFreq)) %>% 
  mutate(Com1_Attack = round(rowMeans(select(.,one_of(Com1_Attack)))+.01)) %>% 
  mutate(Com2_Destroy = round(rowMeans(select(.,one_of(Com2_Destroy)))+.01)) %>% 
  mutate(Com3_Disobeys = round(rowMeans(select(.,one_of(Com3_Disobeys)))+.01)) %>%
  mutate(Com4_Steals = round(rowMeans(select(.,one_of(Com4_Steals)))+.01)) %>%
  mutate(Com5_Peer = round(rowMeans(select(.,one_of(Com5_Peer)))+.01)) %>%
  mutate(Com6_Distracted  = round(rowMeans(select(.,one_of(Com6_Distracted )))+.01)) %>%
  mutate(Com7_Hallucinations = round(rowMeans(select(.,one_of(Com7_Hallucinations)))+.01)) %>%
  mutate(Com8_SexPlay = round(rowMeans(select(.,one_of(Com8_SexPlay)))+.01)) %>%
  mutate(Com9_Weight = round(rowMeans(select(.,one_of(Com9_Weight )))+.01)) 

comNames <-    
c(Com1_Attack, Com2_Destroy, Com3_Disobeys, Com4_Steals, Com5_Peer, Com6_Distracted, Com7_Hallucinations, Com8_SexPlay, Com9_Weight) 

CBCLLowFreqCompDroppedCompAdded <- 
  CBCLLowFreqDroppedCompAdded %>% select(-one_of(comNames))

CBCLLowFreqCompDroppedCompAddedNoNa <- CBCLLowFreqCompDroppedCompAdded %>% select(-TIMEPT) %>% drop_na()
CBCLNamesAll <- CBCLLowFreqCompDroppedCompAddedNoNa %>% 
  select(matches('CBCL_Q|Com')) %>% colnames()

CBCLLowFreqCompDroppedCompAddedNoNaDef <- 
  plyr::join_all(list(data.frame(CBCL_COL = CBCLNamesAll), LowFreqComp), by='CBCL_COL', type='left')

prevFac <-as_tibble(read.csv(paste0(utilFold,"CBCLLowFreqCompDroppedCompAddedNoNaDefMicheliniFac.csv"))) %>% mutate(CBCL_COL = as.factor(CBCL_COL))

dropFCol <- levels(droplevels(subset(prevFac,prevFactor == "drop", select = CBCL_COL)$CBCL_COL[]))
F1Externalizing <- levels(droplevels(subset(prevFac,prevFactor == "F1", select = CBCL_COL)$CBCL_COL[]))
F2Internalizing <- levels(droplevels(subset(prevFac,prevFactor == "F2", select = CBCL_COL)$CBCL_COL[]))
F3NeuroDevelopmental <- levels(droplevels(subset(prevFac,prevFactor == "F3", select = CBCL_COL)$CBCL_COL[]))
F4Somatoform <- levels(droplevels(subset(prevFac,prevFactor == "F4", select = CBCL_COL)$CBCL_COL[]))
F5Detachment <- levels(droplevels(subset(prevFac,prevFactor == "F5", select = CBCL_COL)$CBCL_COL[]))

CBCLMicheliniPruned <- 
  CBCLLowFreqCompDroppedCompAddedNoNa %>% select(-one_of(dropFCol)) %>% filter(EVENTNAME =="baseline_year_1_arm_1") 

CBCLNames <- CBCLMicheliniPruned %>% 
  select(matches('CBCL_Q|Com')) %>% colnames()

CBCLMicheliniPruned.NewNames <- 
  CBCLLowFreqCompDroppedCompAddedNoNa %>% select(-one_of(dropFCol)) %>%
  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  rename_at(vars(all_of(F1Externalizing)), ~ paste0("Ext",1:length(F1Externalizing))) %>%
  rename_at(vars(all_of(F2Internalizing)), ~ paste0("Int",1:length(F2Internalizing))) %>%
  rename_at(vars(all_of(F3NeuroDevelopmental)), ~ paste0("NDe",1:length(F3NeuroDevelopmental))) %>%
  rename_at(vars(all_of(F4Somatoform)), ~ paste0("Som",1:length(F4Somatoform))) %>%
  rename_at(vars(all_of(F5Detachment)), ~ paste0("Det",1:length(F5Detachment))) 

CBCLfaDat.NewNames <- CBCLMicheliniPruned.NewNames %>% 
  select(matches('Ext|Int|NDe|Som|Det')) %>% 
  select(-matches('INTERVIEW'))

1.6.2 Load precomputed CBCL

Social, thought, attention, int and ext?

CBCLPrecomputed <-read_csv(paste0(dataFold,"ABCD_CBCLS01_DATA_TABLE.csv")) 
## Warning: One or more parsing issues, see `problems()` for details
#glimpse(CBCLPrecomputed)

CBCLPrecomputedSelected <- CBCLPrecomputed %>% select(SUBJECTKEY,EVENTNAME, 
  CBCLPrecomputed %>% select(ends_with("_R")) %>% colnames()) 

CBCLPrecomputedSelected %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 29684
Number of columns 22
_______________________
Column type frequency:
character 2
numeric 20
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11878 0
EVENTNAME 0 1 21 24 0 3 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
CBCL_SCR_SYN_ANXDEP_R 71 1 2.48 3.05 0 0 1 4 26 ▇▁▁▁▁
CBCL_SCR_SYN_WITHDEP_R 71 1 1.10 1.78 0 0 0 2 16 ▇▁▁▁▁
CBCL_SCR_SYN_SOMATIC_R 71 1 1.46 1.94 0 0 1 2 18 ▇▁▁▁▁
CBCL_SCR_SYN_SOCIAL_R 71 1 1.51 2.20 0 0 1 2 19 ▇▁▁▁▁
CBCL_SCR_SYN_THOUGHT_R 71 1 1.58 2.18 0 0 1 2 22 ▇▁▁▁▁
CBCL_SCR_SYN_ATTENTION_R 71 1 2.87 3.43 0 0 2 5 20 ▇▂▁▁▁
CBCL_SCR_SYN_RULEBREAK_R 71 1 1.13 1.84 0 0 0 2 23 ▇▁▁▁▁
CBCL_SCR_SYN_AGGRESSIVE_R 71 1 3.10 4.21 0 0 2 4 36 ▇▁▁▁▁
CBCL_SCR_SYN_INTERNAL_R 71 1 5.05 5.56 0 1 3 7 51 ▇▁▁▁▁
CBCL_SCR_SYN_EXTERNAL_R 71 1 4.23 5.71 0 0 2 6 49 ▇▁▁▁▁
CBCL_SCR_SYN_TOTPROB_R 71 1 17.55 17.62 0 5 12 24 161 ▇▁▁▁▁
CBCL_SCR_DSM5_DEPRESS_R 71 1 1.36 2.13 0 0 1 2 19 ▇▁▁▁▁
CBCL_SCR_DSM5_ANXDISORD_R 71 1 2.01 2.42 0 0 1 3 18 ▇▂▁▁▁
CBCL_SCR_DSM5_SOMATICPR_R 71 1 1.07 1.50 0 0 0 2 13 ▇▁▁▁▁
CBCL_SCR_DSM5_ADHD_R 71 1 2.49 2.90 0 0 1 4 14 ▇▃▁▁▁
CBCL_SCR_DSM5_OPPOSIT_R 71 1 1.70 2.00 0 0 1 3 10 ▇▂▁▁▁
CBCL_SCR_DSM5_CONDUCT_R 71 1 1.21 2.29 0 0 0 2 25 ▇▁▁▁▁
CBCL_SCR_07_SCT_R 71 1 0.51 0.99 0 0 0 1 8 ▇▁▁▁▁
CBCL_SCR_07_OCD_R 71 1 1.33 1.81 0 0 1 2 16 ▇▁▁▁▁
CBCL_SCR_07_STRESS_R 71 1 2.87 3.34 0 0 2 4 28 ▇▂▁▁▁

1.6.3 Parent’s report parent’s mental health

Parent Adult Self Report Raw Scores Aseba

Adaptive Functioning and Strengths Scales: Education; Friends; Spouse/Partner; Family; Job; Personal Strengths

Syndrome Scales: Anxious/Depressed; Withdrawn; Somatic Complaints; Thought Problems; Attention Problems; Aggressive Behavior; Rule-breaking Behavior; and Intrusive

DSM-5-oriented Scales: Depressive Problems; Anxiety Problems; Somatic Problems; Avoidant Personality Problems; Attention Deficit/Hyperactivity Problems (Inattention and Hyperactivity/Impulsivity subscales); and Antisocial Personality Problems

Substance Use Scales: Tobacco; Alcohol; Drugs

Internalizing (INT), Attention Problems (ATT), Externalizing (EXT), and Total Problems (TOT) scales.

ASRPrecomputed <-read_csv(paste0(dataFold,"ABCD_ASRS01_DATA_TABLE.csv")) 
#glimpse(ASRPrecomputed)

ASRPrecomputedSelected <- ASRPrecomputed %>% select(SUBJECTKEY,EVENTNAME, 
  ASRPrecomputed %>% select(ends_with("_T")) %>% colnames()) 

ASRPrecomputedSelected %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 18449
Number of columns 22
_______________________
Column type frequency:
character 2
numeric 20
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11878 0
EVENTNAME 0 1 21 24 0 2 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
ASR_SCR_PERSTR_T 163 0.99 47.49 9.44 20 42 49 55 60 ▁▂▃▇▇
ASR_SCR_ANXDEP_T 163 0.99 53.49 5.74 50 50 51 55 98 ▇▁▁▁▁
ASR_SCR_WITHDRAWN_T 163 0.99 52.82 5.03 50 50 51 53 100 ▇▁▁▁▁
ASR_SCR_SOMATIC_T 163 0.99 54.81 6.18 50 50 52 57 98 ▇▂▁▁▁
ASR_SCR_THOUGHT_T 163 0.99 52.82 4.97 50 50 51 54 95 ▇▁▁▁▁
ASR_SCR_ATTENTION_T 163 0.99 53.94 5.89 50 50 51 57 94 ▇▂▁▁▁
ASR_SCR_AGGRESSIVE_T 163 0.99 53.32 5.02 50 50 51 54 89 ▇▂▁▁▁
ASR_SCR_RULEBREAK_T 163 0.99 52.53 4.68 50 50 50 52 87 ▇▁▁▁▁
ASR_SCR_INTRUSIVE_T 163 0.99 51.65 3.40 50 50 50 51 76 ▇▁▁▁▁
ASR_SCR_INTERNAL_T 163 0.99 48.23 10.53 30 40 48 55 95 ▆▇▃▁▁
ASR_SCR_EXTERNAL_T 163 0.99 45.91 9.57 30 38 46 52 90 ▇▇▃▁▁
ASR_SCR_TOTPROB_T 163 0.99 43.02 10.21 25 36 43 50 89 ▆▇▃▁▁
ASR_SCR_DEPRESS_T 163 0.99 54.07 6.02 50 50 51 57 100 ▇▁▁▁▁
ASR_SCR_ANXDISORD_T 163 0.99 53.46 5.36 50 50 51 54 80 ▇▁▁▁▁
ASR_SCR_SOMATICPR_T 163 0.99 54.82 6.51 50 50 51 58 100 ▇▂▁▁▁
ASR_SCR_AVOIDANT_T 163 0.99 53.18 5.35 50 50 51 54 90 ▇▁▁▁▁
ASR_SCR_ADHD_T 163 0.99 53.24 5.53 50 50 51 53 98 ▇▁▁▁▁
ASR_SCR_ANTISOCIAL_T 163 0.99 53.03 4.63 50 50 51 55 84 ▇▁▁▁▁
ASR_SCR_INATTENTION_T 163 0.99 54.35 6.50 50 50 51 57 90 ▇▂▁▁▁
ASR_SCR_HYPERACTIVE_T 163 0.99 51.98 4.25 50 50 50 51 80 ▇▁▁▁▁

1.6.4 ohter mental health surveys from parents

only Parent General Behavior Inventory -Mania is available in the baseline

#ABCD Sum Scores Mental Health Parent
mentalHealthParent <-as_tibble(read.csv(paste0(dataFold,"ABCD_MHP02_DATA_TABLE.csv"))) 

mentalHealthParent  %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
  select(-1:-8) %>% select(-STUDY_COHORT_NAME) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 76
_______________________
Column type frequency:
logical 8
numeric 68
________________________
Group variables None

Variable type: logical

skim_variable n_missing complete_rate mean count
PLE_P_SS_TOTAL_GOOD_NT 11878 0 NaN :
PLE_P_SS_TOTAL_BAD_NT 11878 0 NaN :
PLE_P_SS_AFFECTED_GOOD_SUM_NT 11878 0 NaN :
PLE_P_SS_AFFECTED_GOOD_MEAN_NT 11878 0 NaN :
PLE_P_SS_AFFECTED_BAD_SUM_NT 11878 0 NaN :
PLE_P_SS_AFFECTED_BAD_MEAN_NT 11878 0 NaN :
PLE_P_SS_AFFECTED_MEAN_NT 11878 0 NaN :
PLE_P_SS_AFFECT_SUM_NT 11878 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PGBI_P_SS_SCORE 8 1 1.30 2.77 0 0 0 1 28 ▇▁▁▁▁
PGBI_P_SS_SCORE_NM 0 1 0.01 0.22 0 0 0 0 10 ▇▁▁▁▁
PGBI_P_SS_SCORE_NT 0 1 10.00 0.00 10 10 10 10 10 ▁▁▇▁▁
PLE_P_SS_TOTAL_NUMBER 11878 0 NaN NA NA NA NA NA NA
PLE_P_SS_TOTAL_NUMBER_NM 11878 0 NaN NA NA NA NA NA NA
PLE_P_SS_TOTAL_NUMBER_NT 11878 0 NaN NA NA NA NA NA NA
PLE_P_SS_TOTAL_GOOD 11878 0 NaN NA NA NA NA NA NA
PLE_P_SS_TOTAL_BAD 11878 0 NaN NA NA NA NA NA NA
PLE_P_SS_AFFECTED_GOOD_SUM 11878 0 NaN NA NA NA NA NA NA
PLE_P_SS_AFFECTED_GOOD_MEAN 11878 0 NaN NA NA NA NA NA NA
PLE_P_SS_AFFECTED_BAD_SUM 11878 0 NaN NA NA NA NA NA NA
PLE_P_SS_AFFECTED_BAD_MEAN 11878 0 NaN NA NA NA NA NA NA
PLE_P_SS_AFFECTED_MEAN 11878 0 NaN NA NA NA NA NA NA
PLE_P_SS_AFFECT_SUM 11878 0 NaN NA NA NA NA NA NA
SSRS_P_SS_SUM 11878 0 NaN NA NA NA NA NA NA
SSRS_P_SS_SUM_NM 11878 0 NaN NA NA NA NA NA NA
SSRS_SS_SUM_NT 11878 0 NaN NA NA NA NA NA NA
GISH_P_SS_M_SUM 11878 0 NaN NA NA NA NA NA NA
GISH_P_SS_M_SUM_NM 11878 0 NaN NA NA NA NA NA NA
GISH_P_SS_M_SUM_NT 11878 0 NaN NA NA NA NA NA NA
GISH_P_SS_F_SUM 11878 0 NaN NA NA NA NA NA NA
GISH_P_SS_F_SUM_NM 11878 0 NaN NA NA NA NA NA NA
GISH_P_SS_F_SUM_NT 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_AGGRESSION_NM 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_AGGRESSION_NT 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_ATTENTION 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_ATTENTION_NM 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_ATTENTION_NT 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_DEPRESSIVE_MOOD 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_DEPRESSIVE_MOOD_NM 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_DEPRESSIVE_MOOD_NT 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_EFFORT_CONT_SS 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_EFFORT_CONT_SS_NM 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_EFFORT_CONT_SS_NT 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_FEAR 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_FEAR_NM 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_FEAR_NT 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_FRUSTRATION 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_FRUSTRATION_NM 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_FRUSTRATION_NT 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_INHIBITORY 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_INHIBITORY_NM 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_INHIBITORY_NT 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_ACTIVATION 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_NEG_AFFECT_SS 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_NEG_AFFECT_SS_NM 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_NEG_AFFECT_SS_NT 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_SHYNESS 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_SHYNESS_NM 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_SHYNESS_NT 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_SURGENCY 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_SURGENCY_NM 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_SURGENCY_NT 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_SURGENCY_SS 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_ACTIVATION_NM 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_SURGENCY_SS_NM 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_SURGENCY_SS_NT 11878 0 NaN NA NA NA NA NA NA
GISH_P_SS_F_SUM2 11878 0 NaN NA NA NA NA NA NA
GISH_P_SS_F_SUM2_NM 11878 0 NaN NA NA NA NA NA NA
GISH_P_SS_F_SUM2_NT 11878 0 NaN NA NA NA NA NA NA
GISH_P_SS_M_SUM2 11878 0 NaN NA NA NA NA NA NA
GISH_P_SS_M_SUM2_NM 11878 0 NaN NA NA NA NA NA NA
GISH_P_SS_M_SUM2_NT 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_ACTIVATION_NT 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_AFFILIATION 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_AFFILIATION_NM 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_AFFILIATION_NT 11878 0 NaN NA NA NA NA NA NA
EATQ_P_SS_AGGRESSION 11878 0 NaN NA NA NA NA NA NA
ggplot(mentalHealthParent, aes(x=PGBI_P_SS_SCORE)) + 
  geom_histogram(color="black", fill="white") +
  xlab("Parent General Behavior-Mania") +
  theme_classic(base_size = 30)
## Warning: Removed 73 rows containing non-finite values (stat_bin).

mentalHealthParent %>% count(PGBI_P_SS_SCORE > 0)
## # A tibble: 3 × 2
##   `PGBI_P_SS_SCORE > 0`     n
##   <lgl>                 <int>
## 1 FALSE                 19013
## 2 TRUE                  10598
## 3 NA                       73
maniaParent <- mentalHealthParent %>% 
  rename(mania_parent = PGBI_P_SS_SCORE) %>%
  select(SUBJECTKEY,EVENTNAME, mania_parent) 

1.7 Personality

1.7.1 Load BIS/BAS

BISBAS <-as_tibble(read.csv(paste0(dataFold,"ABCD_BISBAS01_DATA_TABLE.csv"))) %>% 
# filter(EVENTNAME =="baseline_year_1_arm_1")   %>%
  mutate(BISAvg=rowMeans(cbind(BISBAS2_Y,BISBAS3_Y,BISBAS4_Y,BISBAS6_Y), na.rm=F)) %>%
  mutate(BASRRAvg=rowMeans(cbind(BISBAS8_Y,BISBAS10_Y,BISBAS11_Y,BISBAS12_Y), na.rm=F)) %>%
  mutate(BASDriveAvg=rowMeans(cbind(BISBAS13_Y,BISBAS14_Y,BISBAS15_Y,BISBAS16_Y), na.rm=F)) %>%
  mutate(BASFunAvg=rowMeans(cbind(BISBAS17_Y,BISBAS18_Y,BISBAS19_Y,BISBAS20_Y), na.rm=F)) %>% 
  mutate(BASAllAvg=rowMeans(cbind(BASRRAvg,BASDriveAvg,BASFunAvg), na.rm=F)) %>%
  select(SUBJECTKEY,EVENTNAME,BISAvg,BASRRAvg,BASDriveAvg,BASFunAvg,BASAllAvg)

1.7.2 histrogram of BIS/BAS

BISBAS %>% filter(EVENTNAME =="baseline_year_1_arm_1")   %>%
ggplot(aes(x=BISAvg), bins = 100) + 
  geom_histogram(color="black", fill="white") +
  xlab("averaged BIS") +
  theme_classic(base_size = 30)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 22 rows containing non-finite values (stat_bin).

BISBAS %>% filter(EVENTNAME =="baseline_year_1_arm_1")   %>%
ggplot(aes(x=BASAllAvg)) + 
  geom_histogram(color="black", fill="white", bins = 100) +
  xlab("averaged BAS") +
  theme_classic(base_size = 30)
## Warning: Removed 23 rows containing non-finite values (stat_bin).

### other mental health surveys from youth look like only ABCD Prodromal Psychosis Scale (PPS) and UPSS (beside BIS/BAS) are avaliable for the baseline.
Though 4595 data from PPS is missing. So we will not use PPS and only focus on UPPS.

#ABCD Sum Scores Mental Health Youth
mentalHealthYouth <-as_tibble(read.csv(paste0(dataFold,"ABCD_MHY02_DATA_TABLE.csv"))) 

mentalHealthYouth %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>% select(-1:-8) %>% select(-STUDY_COHORT_NAME) %>%
 skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 97
_______________________
Column type frequency:
logical 8
numeric 89
________________________
Group variables None

Variable type: logical

skim_variable n_missing complete_rate mean count
PLE_Y_SS_TOTAL_GOOD_NT 11878 0 NaN :
PLE_Y_SS_TOTAL_BAD_NT 11878 0 NaN :
PLE_Y_SS_AFFECT_SUM_NT 11878 0 NaN :
PLE_Y_SS_AFFECTED_MEAN_NT 11878 0 NaN :
PLE_Y_SS_AFFECTED_GOOD_SUM_NT 11878 0 NaN :
PLE_Y_SS_AFFECTED_GOOD_MEAN_NT 11878 0 NaN :
PLE_Y_SS_AFFECTED_BAD_SUM_NT 11878 0 NaN :
PLE_Y_SS_AFFECTED_BAD_MEAN_NT 11878 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PLE_Y_SS_TOTAL_NUMBER 11878 0.00 NaN NA NA NA NA NA NA
PLE_Y_SS_TOTAL_NUMBER_NM 11878 0.00 NaN NA NA NA NA NA NA
PLE_Y_SS_TOTAL_NUMBER_NT 11878 0.00 NaN NA NA NA NA NA NA
PLE_Y_SS_TOTAL_GOOD 11878 0.00 NaN NA NA NA NA NA NA
PLE_Y_SS_TOTAL_BAD 11878 0.00 NaN NA NA NA NA NA NA
PLE_Y_SS_AFFECT_SUM 11878 0.00 NaN NA NA NA NA NA NA
PLE_Y_SS_AFFECTED_GOOD_SUM 11878 0.00 NaN NA NA NA NA NA NA
PLE_Y_SS_AFFECTED_GOOD_MEAN 11878 0.00 NaN NA NA NA NA NA NA
PLE_Y_SS_AFFECTED_BAD_SUM 11878 0.00 NaN NA NA NA NA NA NA
PLE_Y_SS_AFFECTED_BAD_MEAN 11878 0.00 NaN NA NA NA NA NA NA
PPS_Y_SS_NUMBER 11 1.00 2.63 3.56 0 0 1 4 21 ▇▂▁▁▁
PPS_Y_SS_NUMBER_NM 0 1.00 0.02 0.68 0 0 0 0 21 ▇▁▁▁▁
PPS_Y_SS_NUMBER_NT 0 1.00 21.00 0.00 21 21 21 21 21 ▁▁▇▁▁
PPS_Y_SS_BOTHER_SUM 4595 0.61 2.14 2.62 0 0 1 3 19 ▇▂▁▁▁
PPS_Y_SS_BOTHER_SUM_NM 0 1.00 18.37 3.56 0 17 20 21 21 ▁▁▁▂▇
PPS_Y_SS_BOTHER_SUM_NT 0 1.00 21.00 0.00 21 21 21 21 21 ▁▁▇▁▁
PPS_Y_SS_BOTHER_N_1 4595 0.61 2.15 2.07 0 1 2 3 17 ▇▂▁▁▁
PPS_Y_SS_BOTHER_N_1_NM 0 1.00 18.37 3.56 0 17 20 21 21 ▁▁▁▂▇
PPS_Y_SS_BOTHER_N_1_NT 0 1.00 21.00 0.00 21 21 21 21 21 ▁▁▇▁▁
PPS_Y_SS_SEVERITY_SCORE 10 1.00 6.32 10.61 0 0 2 8 104 ▇▁▁▁▁
PPS_Y_SS_SEVERITY_SCORE_NM 0 1.00 38.05 5.67 3 36 40 42 42 ▁▁▁▁▇
PPS_Y_SS_SEVERITY_SCORE_NT 0 1.00 42.00 0.00 42 42 42 42 42 ▁▁▇▁▁
PPS_SS_MEAN_SEVERITY 4596 0.61 2.15 1.10 1 1 2 3 6 ▇▃▂▁▁
UPPS_Y_SS_NEGATIVE_URGENCY 23 1.00 8.49 2.65 4 7 8 10 16 ▆▇▇▂▁
UPPS_Y_SS_NEGATIVE_URGENCY_NM 0 1.00 0.01 0.17 0 0 0 0 4 ▇▁▁▁▁
UPPS_Y_SS_NEGATIVE_URGENCY_NT 0 1.00 4.00 0.00 4 4 4 4 4 ▁▁▇▁▁
UPPS_Y_SS_LACK_OF_PLANNING 23 1.00 7.74 2.38 4 6 8 9 16 ▆▇▅▁▁
UPPS_Y_SS_LACK_OF_PLANNING_NM 0 1.00 0.01 0.18 0 0 0 0 4 ▇▁▁▁▁
UPPS_Y_SS_LACK_OF_PLANNING_NT 0 1.00 4.00 0.00 4 4 4 4 4 ▁▁▇▁▁
UPPS_Y_SS_SENSATION_SEEKING 23 1.00 9.77 2.68 4 8 10 12 16 ▂▅▇▃▂
UPPS_Y_SS_SENSATION_SEEKING_NM 0 1.00 0.01 0.18 0 0 0 0 4 ▇▁▁▁▁
UPPS_Y_SS_SENSATION_SEEKING_NT 0 1.00 4.00 0.00 4 4 4 4 4 ▁▁▇▁▁
UPPS_Y_SS_POSITIVE_URGENCY 23 1.00 7.99 2.96 4 6 8 10 16 ▇▆▆▂▁
UPPS_Y_SS_POSITIVE_URGENCY_NM 0 1.00 0.01 0.18 0 0 0 0 4 ▇▁▁▁▁
UPPS_Y_SS_POSITIVE_URGENCY_NT 0 1.00 4.00 0.00 4 4 4 4 4 ▁▁▇▁▁
UPPS_Y_SS_LACK_OF_PERSEVERANCE 23 1.00 7.04 2.25 4 5 7 8 16 ▇▆▃▁▁
UPPS_Y_SS_LACK_OF_PERS_NM 0 1.00 0.01 0.18 0 0 0 0 4 ▇▁▁▁▁
UPPS_Y_SS_LACK_OF_PERS_NT 0 1.00 4.00 0.00 4 4 4 4 4 ▁▁▇▁▁
BIS_Y_SS_BIS_SUM 22 1.00 9.51 3.75 0 7 9 12 21 ▂▇▇▃▁
BIS_Y_SS_BIS_SUM_NM 22 1.00 0.00 0.00 0 0 0 0 0 ▁▁▇▁▁
BIS_Y_SS_BIS_SUM_NT 22 1.00 7.00 0.00 7 7 7 7 7 ▁▁▇▁▁
BIS_Y_SS_BAS_RR 23 1.00 11.00 2.92 0 9 11 13 15 ▁▂▅▇▇
BIS_Y_SS_BAS_RR_NM 23 1.00 0.00 0.00 0 0 0 0 0 ▁▁▇▁▁
BIS_Y_SS_BAS_RR_NT 23 1.00 5.00 0.00 5 5 5 5 5 ▁▁▇▁▁
BIS_Y_SS_BAS_DRIVE 23 1.00 4.14 3.06 0 2 4 6 12 ▇▆▅▂▂
BIS_Y_SS_BAS_DRIVE_NM 23 1.00 0.00 0.00 0 0 0 0 0 ▁▁▇▁▁
BIS_Y_SS_BAS_DRIVE_NT 23 1.00 4.00 0.00 4 4 4 4 4 ▁▁▇▁▁
BIS_Y_SS_BAS_FS 23 1.00 5.71 2.64 0 4 6 7 12 ▂▅▇▃▂
BIS_Y_SS_BAS_FS_NM 23 1.00 0.00 0.00 0 0 0 0 0 ▁▁▇▁▁
BIS_Y_SS_BAS_FS_NT 23 1.00 4.00 0.00 4 4 4 4 4 ▁▁▇▁▁
BIS_Y_SS_BISM_SUM 22 1.00 5.53 2.84 0 3 5 8 12 ▃▅▇▃▂
BIS_Y_SS_BISM_SUM_NM 22 1.00 0.00 0.00 0 0 0 0 0 ▁▁▇▁▁
BIS_Y_SS_BISM_SUM_NT 22 1.00 4.00 0.00 4 4 4 4 4 ▁▁▇▁▁
BIS_Y_SS_BASM_RR 23 1.00 8.82 2.39 0 7 9 11 12 ▁▁▅▆▇
BIS_Y_SS_BASM_RR_NM 23 1.00 0.00 0.00 0 0 0 0 0 ▁▁▇▁▁
BIS_Y_SS_BASM_RR_NT 23 1.00 4.00 0.00 4 4 4 4 4 ▁▁▇▁▁
BIS_Y_SS_BASM_DRIVE 23 1.00 4.14 3.06 0 2 4 6 12 ▇▆▅▂▂
BIS_Y_SS_BASM_DRIVE_NM 0 1.00 0.01 0.18 0 0 0 0 4 ▇▁▁▁▁
BIS_Y_SS_BASM_DRIVE_NT 0 1.00 4.00 0.00 4 4 4 4 4 ▁▁▇▁▁
DELQ_Y_SS_SUM 11878 0.00 NaN NA NA NA NA NA NA
DELQ_Y_SS_SUM_NM 11878 0.00 NaN NA NA NA NA NA NA
DELQ_Y_SS_SUM_NT 11878 0.00 NaN NA NA NA NA NA NA
SUP_Y_SS_SUM 11878 0.00 NaN NA NA NA NA NA NA
SUP_Y_SS_SUM_NM 11878 0.00 NaN NA NA NA NA NA NA
SUP_Y_SS_SUM_NT 11878 0.00 NaN NA NA NA NA NA NA
GISH_Y_SS_M_SUM 11878 0.00 NaN NA NA NA NA NA NA
GISH_Y_SS_M_SUM_NM 11878 0.00 NaN NA NA NA NA NA NA
GISH_Y_SS_M_SUM_NT 11878 0.00 NaN NA NA NA NA NA NA
GISH_Y_SS_F_SUM 11878 0.00 NaN NA NA NA NA NA NA
GISH_Y_SS_F_SUM_NM 11878 0.00 NaN NA NA NA NA NA NA
GISH_Y_SS_F_SUM_NT 11878 0.00 NaN NA NA NA NA NA NA
PEQ_SS_RELATIONAL_AGGS_NM 11878 0.00 NaN NA NA NA NA NA NA
PEQ_SS_RELATIONAL_AGGS_NT 11878 0.00 NaN NA NA NA NA NA NA
PEQ_SS_RELATIONAL_VICTIM 11878 0.00 NaN NA NA NA NA NA NA
PEQ_SS_RELATIONAL_VICTIM_NM 11878 0.00 NaN NA NA NA NA NA NA
PEQ_SS_RELATIONAL_VICTIM_NT 11878 0.00 NaN NA NA NA NA NA NA
PEQ_SS_REPUTATION_AGGS 11878 0.00 NaN NA NA NA NA NA NA
PEQ_SS_REPUTATION_AGGS_NM 11878 0.00 NaN NA NA NA NA NA NA
PEQ_SS_REPUTATION_AGGS_NT 11878 0.00 NaN NA NA NA NA NA NA
PEQ_SS_REPUTATION_VICTIM 11878 0.00 NaN NA NA NA NA NA NA
PEQ_SS_REPUTATION_VICTIM_NM 11878 0.00 NaN NA NA NA NA NA NA
PEQ_SS_REPUTATION_VICTIM_NT 11878 0.00 NaN NA NA NA NA NA NA
PEQ_SS_OVERT_AGGRESSION 11878 0.00 NaN NA NA NA NA NA NA
PEQ_SS_OVERT_AGGRESSION_NM 11878 0.00 NaN NA NA NA NA NA NA
PEQ_SS_OVERT_AGGRESSION_NT 11878 0.00 NaN NA NA NA NA NA NA
PEQ_SS_OVERT_VICTIM 11878 0.00 NaN NA NA NA NA NA NA
PEQ_SS_OVERT_VICTIM_NM 11878 0.00 NaN NA NA NA NA NA NA
PEQ_SS_OVERT_VICTIM_NT 11878 0.00 NaN NA NA NA NA NA NA
PEQ_SS_RELATIONAL_AGGS 11878 0.00 NaN NA NA NA NA NA NA
UPPS <- mentalHealthYouth %>% select(SUBJECTKEY,EVENTNAME, 
                             UPPS_Y_SS_NEGATIVE_URGENCY, UPPS_Y_SS_LACK_OF_PLANNING,
                             UPPS_Y_SS_SENSATION_SEEKING, UPPS_Y_SS_POSITIVE_URGENCY, UPPS_Y_SS_LACK_OF_PERSEVERANCE)

1.8 Physical

basically only sleep related are good

not very relevant: ABCD Sum Scores Traumatic Brain Injury abcd_tbi01 ABCD Longitudinal Summary Scores Traumatic Brain Injury abcd_lsstbi01 ABCD Sum Scores Parent Sports and Activities Involvement abcd__spacss01 ABCD Longitudinal Summary Scores Sports Activity abcd_lsssa01 ABCD Sum Scores Parent Medical History abcd_medhxss01 ABCD Longitudinal Summary Scores Medical History abcd_lssmh01 ABCD Sum Scores Developmental History abcd_devhxss01

this is mainly about puberty: ABCD Sum Scores Physical Health Youth abcd_ssphy01

sleep scores: ABCD Parent Sleep Disturbance Scale for Children abcd_sds01 Diet only at one year follow up: ABCD Child Nutrition Assessment abcd_cna01 sum sleep score + diet ABCD Sum Scores Physical Health Parent abcd_ssphp01

#ABCD Parent Sleep Disturbance Scale for Children
#SLEEPDISTURB1_P
#How many hours of sleep does your child get on most nights? ¿Cuántas horas duerme su niño(a) la mayoría de las noches?
#1 = 9-11 hours/ 9 a 11 horas; 2 = 8-9 hours /8 a 9 horas; 3 = 7-8 hours /7 a 8 horas; 4 = 5-7 hours /5 a 7 horas; 5 = Less than 5 hours/ Menos de 5 horas// Consider each question pertaining to the PAST 6 MONTHS of the child's life
#SLEEPDISTURB2_P
#How long after going to bed does your child usually fall asleep? Después de acostarse ¿generalmente cuánto tiempo tarda su niño(a) en quedarse dormido(a)?
#1 = Less than 15 minutes /Menos de 15 minutos; 2 = 15-30 minutes 15 a 30 minutos; 3 = 30-45 minutes /30 a 45 minutos; 4 = 45-60 minutes /45 a 60 minutos; 5 = More than 60 minutes /M√°s de 60 minutos//Consider each question pertaining to the PAST 6 MONTHS of the child's life
sleepDis <-as_tibble(read.csv(paste0(dataFold,"ABCD_SDS01_DATA_TABLE.csv"))) %>%
    distinct(select(.,-ABCD_SDS01_ID, -DATASET_ID),.keep_all = TRUE) #for some reason there is a duplicate based on these two variables

# sleepDis %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
#   distinct(select(.,-ABCD_SDS01_ID, -DATASET_ID),.keep_all = TRUE) %>% 
#   arrange(SUBJECTKEY)

sleepDis %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  select(SLEEPDISTURB1_P,SLEEPDISTURB2_P) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SLEEPDISTURB1_P 5 1 1.72 0.81 1 1 2 2 5 ▇▆▂▁▁
SLEEPDISTURB2_P 5 1 1.93 0.98 1 1 2 2 5 ▇▇▂▁▁
PhysicalSum <-as_tibble(read.csv(paste0(dataFold,"ABCD_SSPHP01_DATA_TABLE.csv"))) %>% 
 filter(EVENTNAME =="baseline_year_1_arm_1") 

PhysicalSum %>% select(-1:-8) %>% select(-STUDY_COHORT_NAME) %>% 
   skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 36
_______________________
Column type frequency:
numeric 36
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SDS_P_SS_DIMS 5 1.00 11.75 3.75 7 9 11 13 35 ▇▃▁▁▁
SDS_P_SS_DIMS_NM 0 1.00 0.00 0.14 0 0 0 0 7 ▇▁▁▁▁
SDS_P_SS_DIMS_NT 0 1.00 7.00 0.00 7 7 7 7 7 ▁▁▇▁▁
SDS_P_SS_SBD 5 1.00 3.77 1.26 3 3 3 4 15 ▇▁▁▁▁
SDS_P_SS_SBD_NM 0 1.00 0.00 0.06 0 0 0 0 3 ▇▁▁▁▁
SDS_P_SS_SBD_NT 0 1.00 3.00 0.00 3 3 3 3 3 ▁▁▇▁▁
SDS_P_SS_DA 5 1.00 3.44 0.92 3 3 3 4 15 ▇▁▁▁▁
SDS_P_SS_DA_NM 0 1.00 0.00 0.06 0 0 0 0 3 ▇▁▁▁▁
SDS_P_SS_DA_NT 0 1.00 3.00 0.00 3 3 3 3 3 ▁▁▇▁▁
SDS_P_SS_SWTD 32 1.00 8.18 2.63 6 6 7 9 30 ▇▁▁▁▁
SDS_P_SS_SWTD_NM 0 1.00 0.00 0.13 0 0 0 0 6 ▇▁▁▁▁
SDS_P_SS_SWTD_NT 0 1.00 6.00 0.00 6 6 6 6 6 ▁▁▇▁▁
SDS_P_SS_DOES 6 1.00 6.95 2.44 5 5 6 8 25 ▇▁▁▁▁
SDS_P_SS_DOES_NM 0 1.00 0.00 0.10 0 0 0 0 5 ▇▁▁▁▁
SDS_P_SS_DOES_NT 0 1.00 5.00 0.00 5 5 5 5 5 ▁▁▇▁▁
SDS_P_SS_SHY 5 1.00 2.44 1.18 2 2 2 2 10 ▇▁▁▁▁
SDS_P_SS_SHY_NM 0 1.00 0.00 0.04 0 0 0 0 2 ▇▁▁▁▁
SDS_P_SS_SHY_NT 0 1.00 2.00 0.00 2 2 2 2 2 ▁▁▇▁▁
SDS_P_SS_TOTAL 33 1.00 36.53 8.24 26 31 35 40 126 ▇▁▁▁▁
SDS_P_SS_TOTAL_NM 0 1.00 0.01 0.54 0 0 0 0 26 ▇▁▁▁▁
SDS_P_SS_TOTAL_NT 0 1.00 26.00 0.00 26 26 26 26 26 ▁▁▇▁▁
PDS_P_SS_MALE_CATEGORY 5924 0.50 1.37 0.61 1 1 1 2 5 ▇▃▁▁▁
PDS_P_SS_MALE_CAT_NM 5714 0.52 0.04 0.26 0 0 0 0 3 ▇▁▁▁▁
PDS_P_SS_MALE_CAT_NT 5714 0.52 3.00 0.00 3 3 3 3 3 ▁▁▇▁▁
PDS_P_SS_FEMALE_CATEGORY 6384 0.46 2.18 0.91 1 1 2 3 5 ▆▅▇▁▁
PDS_P_SS_FEMALE_CAT_NM 6172 0.48 0.05 0.27 0 0 0 0 3 ▇▁▁▁▁
PDS_P_SS_FEMALE_CAT_NT 6172 0.48 3.00 0.00 3 3 3 3 3 ▁▁▇▁▁
CNA_P_SS_SUM 11878 0.00 NaN NA NA NA NA NA NA
CNA_P_SS_SUM_NM 1166 0.90 14.00 0.00 14 14 14 14 14 ▁▁▇▁▁
CNA_P_SS_SUM_NT 1166 0.90 14.00 0.00 14 14 14 14 14 ▁▁▇▁▁
PDS_P_SS_FEMALE_CAT_2_NM 6192 0.48 0.05 0.29 0 0 0 0 3 ▇▁▁▁▁
PDS_P_SS_FEMALE_CAT_2_NT 6192 0.48 3.00 0.00 3 3 3 3 3 ▁▁▇▁▁
PDS_P_SS_FEMALE_CATEGORY_2 6414 0.46 2.18 0.91 1 1 2 3 5 ▆▅▇▁▁
PDS_P_SS_MALE_CATEGORY_2 5934 0.50 1.36 0.61 1 1 1 2 5 ▇▃▁▁▁
PDS_P_SS_MALE_CAT_2_NM 5676 0.52 0.06 0.32 0 0 0 0 3 ▇▁▁▁▁
PDS_P_SS_MALE_CAT_2_NT 5676 0.52 3.00 0.00 3 3 3 3 3 ▁▁▇▁▁
# sds_p_ss_dims
# Disorders of Initiating and Maintaining Sleep (DIMS) SUM:  sleepdisturb1_p +  sleepdisturb2_p + sleepdisturb3_p + sleepdisturb4_p + sleepdisturb5_p + sleepdisturb10_p + sleepdisturb11_p;  Validation: All items must be answered
# 
# sds_p_ss_sbd
# Sleep Breathing disorders (SBD):  SUM sleepdisturb13_p +  sleepdisturb14_p + sleepdisturb15_p; Validation: All items must be answered
# 
# sds_p_ss_da
# Disorder of Arousal (DA) SUM: sleepdisturb17_p +  sleepdisturb20_p + sleepdisturb21_p;  Validation: All items must be answered
# 
# sds_p_ss_swtd
# Sleep-Wake transition Disorders (SWTD) SUM: sleepdisturb6_p + sleepdisturb7_p + sleepdisturb8_p + sleepdisturb12_p +  sleepdisturb18_p + sleepdisturb19_p; Validation: All items must be answered
# 
# sds_p_ss_does
# Disorders of Excessive Somnolence (DOES) SUM:  sleepdisturb22_p + sleepdisturb23_p +  sleepdisturb24_p +  sleepdisturb25_p + sleepdisturb26_p; Validation: All items must be answered
# 
# sds_p_ss_shy
# Sleep Hyperhydrosis (SHY) SUM: sleepdisturb9_p + sleepdisturb16_p; Validation: All items must be answered
# 
# sds_p_ss_total
# Total Score (Sum of 6 Factors): sds_p_ss_dims + sds_p_ss_sbd + sds_p_ss_da + sds_p_ss_swtd + sds_p_ss_does + sds_p_ss_shy; Validation: All items must be answered

sleepSum <- sleepDis %>% full_join(PhysicalSum, by = c("SUBJECTKEY", "EVENTNAME")) %>%
  select(SUBJECTKEY, EVENTNAME, SLEEPDISTURB1_P, SLEEPDISTURB2_P,
         SDS_P_SS_DIMS, SDS_P_SS_SBD, SDS_P_SS_DA, SDS_P_SS_SWTD, SDS_P_SS_DOES, SDS_P_SS_SHY, SDS_P_SS_TOTAL) %>%
  rename(sleep_hours = SLEEPDISTURB1_P) %>%
  rename(sleep_disturb = SLEEPDISTURB2_P) %>%
  rename(sleep_initiate_maintain = SDS_P_SS_DIMS) %>%
  rename(sleep_breath = SDS_P_SS_SBD) %>%
  rename(sleep_arousal = SDS_P_SS_DA) %>%
  rename(sleep_transition = SDS_P_SS_SWTD) %>%
  rename(sleep_somnolence = SDS_P_SS_DOES) %>%
  rename(sleep_hyperhydrosis = SDS_P_SS_SHY) %>%
  rename(sleep_total = SDS_P_SS_TOTAL) 

sleepSum %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  select(-SUBJECTKEY,-EVENTNAME) %>%
   skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 9
_______________________
Column type frequency:
numeric 9
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
sleep_hours 5 1 1.72 0.81 1 1 2 2 5 ▇▆▂▁▁
sleep_disturb 5 1 1.93 0.98 1 1 2 2 5 ▇▇▂▁▁
sleep_initiate_maintain 5 1 11.75 3.75 7 9 11 13 35 ▇▃▁▁▁
sleep_breath 5 1 3.77 1.26 3 3 3 4 15 ▇▁▁▁▁
sleep_arousal 5 1 3.44 0.92 3 3 3 4 15 ▇▁▁▁▁
sleep_transition 32 1 8.18 2.63 6 6 7 9 30 ▇▁▁▁▁
sleep_somnolence 6 1 6.95 2.44 5 5 6 8 25 ▇▁▁▁▁
sleep_hyperhydrosis 5 1 2.44 1.18 2 2 2 2 10 ▇▁▁▁▁
sleep_total 33 1 36.53 8.24 26 31 35 40 126 ▇▁▁▁▁

1.9 behavioral

1.9.1 screen time

ABCD Youth Screen Time Survey abcd_stq01

Youth Screen Time Survey This measure includes customized questions about the overall amount of time that the youth spends using visual media, on a typical weekday and weekend day. Media activities assessed include: (1) Watching TV shows or movies; (2) Watching videos (such as YouTube); (3) Playing video games on a computer, console, phone or other device; (4) Texting on a cell phone, tablet, or computer; (5) Visiting social networking sites like Facebook, Twitter, Instagram; (6) Video chat. Seven response options were: none, < 30 minutes, 30 minutes, 1 hour, 2 hours, 3 hours, and 4+ hours.

youthScreen <-as_tibble(read.csv(paste0(dataFold,"ABCD_STQ01_DATA_TABLE.csv")))  
# filter(EVENTNAME =="baseline_year_1_arm_1") 

#On a typical weekend/weekday, how many hours do you
#0 = None; .25 = < 30 minutes; 0.5 = 30 minutes; 1 = 1 hour; 2 = 2 hours; 3 = 3 hours; 4 = 4+ hours //Example: 1½ hours would be coded as 1 hour, rather than 2 hours.  
#How often do you play mature-rated video games (e.g., Call of Duty, Grand Theft Auto, Assassin's Creed, etc.)?
#How often do you watch R-rated movies?

youthScreenAdded <- youthScreen %>% 
  mutate(wkdySum_Screen = rowSums(dplyr::select(.,ends_with("WKDY_Y")))) %>% 
  mutate(wkndSum_Screen = rowSums(dplyr::select(.,ends_with("WKND_Y")))) %>%
  rename(matureGames_Screen = SCREEN13_Y) %>%
  rename(matureMovies_Screen = SCREEN14_Y) 

youthScreenSum <- youthScreenAdded %>%
  select(SUBJECTKEY,EVENTNAME, ends_with("_Screen"))

youthScreenSum %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  select(-SUBJECTKEY,-EVENTNAME) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 4
_______________________
Column type frequency:
numeric 4
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
matureGames_Screen 20 1 0.57 0.87 0 0.00 0.0 1.00 3 ▇▃▁▁▁
matureMovies_Screen 21 1 0.38 0.64 0 0.00 0.0 1.00 3 ▇▃▁▁▁
wkdySum_Screen 38 1 3.46 3.10 0 1.25 2.5 4.75 24 ▇▂▁▁▁
wkndSum_Screen 43 1 4.62 3.63 0 2.00 3.5 6.25 24 ▇▃▁▁▁

1.9.2 maternal substance use

#ABCD Developmental History Questionnaire
DevHis <-as_tibble(read.csv(paste0(dataFold,"DHX01_DATA_TABLE.csv"))) %>% 
 #filter(VISIT =="baseline_year_1_arm_1")  %>% 
  rename(EVENTNAME = VISIT)

#glimpse(DevHis)


DevHis %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  select(starts_with("DEVHX_8")) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 83
_______________________
Column type frequency:
character 3
logical 10
numeric 70
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
DEVHX_8_RXNORM_MED1 0 1 0 94 10580 541 0
DEVHX_8_OTHER1_NAME_OTH 0 1 0 36 11856 19 0
DEVHX_8_OTHER3_NAME_OTH 0 1 0 30 11874 5 0

Variable type: logical

skim_variable n_missing complete_rate mean count
DEVHX_8_PRESCRIPT_YES 11878 0 NaN :
DEVHX_8_OTHER2_NAME_OTH 11878 0 NaN :
DEVHX_8_OTHER4_NAME_OTH 11878 0 NaN :
DEVHX_8_OTHER4_TIMES 11878 0 NaN :
DEVHX_8_OTHER4_AMT 11878 0 NaN :
DEVHX_8_OTHER4_UNIT 11878 0 NaN :
DEVHX_8_OTHER5_NAME_OTH 11878 0 NaN :
DEVHX_8_OTHER5_TIMES 11878 0 NaN :
DEVHX_8_OTHER5_AMT 11878 0 NaN :
DEVHX_8_OTHER5_UNIT 11878 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
DEVHX_8_PRESCRIPT_MED 5 1.00 73.99 261.41 0 0.00 0.0 0.00 999 ▇▁▁▁▁
DEVHX_8_MED1_PRN 11247 0.05 36.69 187.31 0 0.00 0.0 1.00 999 ▇▁▁▁▁
DEVHX_8_MED1_TIMES 10808 0.09 1.26 0.75 0 1.00 1.0 1.00 10 ▇▁▁▁▁
DEVHX_8_MED1_TIMES_DK 11720 0.01 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_MED1_HOW_MUCH 10971 0.08 82.50 191.78 0 1.00 10.0 88.00 2000 ▇▁▁▁▁
DEVHX_8_MED1_HOW_MUCH_DK 11454 0.04 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_MED1_UNIT 11046 0.07 2.69 0.79 1 2.75 3.0 3.00 10 ▃▇▁▁▁
DEVHX_8_MED1_FU 11101 0.07 0.25 0.43 0 0.00 0.0 1.00 1 ▇▁▁▁▃
DEVHX_8_RXNORM_MED2 11693 0.02 621559.43 539911.02 281 196496.00 372861.0 1151133.00 1806414 ▇▁▂▃▁
DEVHX_8_MED2_PRN 11711 0.01 36.19 186.43 0 0.00 0.0 1.00 999 ▇▁▁▁▁
DEVHX_8_MED2_TIMES 11732 0.01 1.40 0.77 0 1.00 1.0 2.00 6 ▇▂▁▁▁
DEVHX_8_MED2_TIMES_DK 11846 0.00 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_MED2_HOW_MUCH 11784 0.01 87.57 247.66 0 1.00 2.5 50.00 2000 ▇▁▁▁▁
DEVHX_8_MED2_HOW_MUCH_DK 11790 0.01 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_MED2_UNIT 11793 0.01 2.74 1.09 1 2.00 3.0 3.00 10 ▃▇▁▁▁
DEVHX_8_MED2_FU 11700 0.01 0.26 0.44 0 0.00 0.0 1.00 1 ▇▁▁▁▃
DEVHX_8_RXNORM_MED3 11833 0.00 537198.29 483611.33 1191 82728.00 324026.0 1151133.00 1187033 ▇▁▁▂▅
DEVHX_8_MED3_PRN 11848 0.00 0.27 0.45 0 0.00 0.0 0.75 1 ▇▁▁▁▃
DEVHX_8_MED3_TIMES 11821 0.00 1.30 0.80 0 1.00 1.0 1.00 4 ▁▇▂▁▁
DEVHX_8_MED3_TIMES_DK 11868 0.00 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_MED3_HOW_MUCH 11854 0.00 189.33 478.06 0 1.00 1.5 42.50 2000 ▇▁▁▁▁
DEVHX_8_MED3_HOW_MUCH_DK 11855 0.00 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_MED3_UNIT 11858 0.00 2.55 0.76 1 2.00 3.0 3.00 3 ▂▁▂▁▇
DEVHX_8_TOBACCO 5 1.00 23.11 149.72 0 0.00 0.0 0.00 999 ▇▁▁▁▁
DEVHX_8_CIGS_PER_DAY 10454 0.12 8.35 6.44 0 4.00 6.0 10.00 80 ▇▁▁▁▁
DEVHX_8_CIGS_PER_DAY_DK 11691 0.02 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_ALCOHOL 5 1.00 57.29 231.76 0 0.00 0.0 1.00 999 ▇▁▁▁▁
DEVHX_8_ALCHOHOL_MAX 9336 0.21 2.40 1.50 0 1.00 2.0 3.00 20 ▇▁▁▁▁
DEVHX_8_ALCHOHOL_MAX_DK 11538 0.03 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_ALCHOHOL_AVG 9371 0.21 3.97 4.38 0 1.00 3.0 5.00 52 ▇▁▁▁▁
DEVHX_8_ALCHOHOL_AVG_DK 11503 0.03 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_ALCOHOL_EFFECTS 9514 0.20 2.08 1.24 0 1.00 2.0 2.00 20 ▇▁▁▁▁
DEVHX_8_ALCOHOL_EFFECTS_DK 11360 0.04 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_MARIJUANA 5 1.00 28.24 165.42 0 0.00 0.0 0.00 999 ▇▁▁▁▁
DEVHX_8_MARIJUANA_AMT 11389 0.04 2.09 2.49 0 1.00 2.0 3.00 40 ▇▁▁▁▁
DEVHX_8_MARIJUANA_AMT_DK 11687 0.02 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_COC_CRACK 5 1.00 23.74 152.13 0 0.00 0.0 0.00 999 ▇▁▁▁▁
DEVHX_8_COC_CRACK_AMT 11845 0.00 3.00 3.71 0 1.00 2.0 3.00 20 ▇▁▁▁▁
DEVHX_8_COC_CRACK_AMT_DK 11817 0.01 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_HER_MORPH 5 1.00 24.40 154.22 0 0.00 0.0 0.00 999 ▇▁▁▁▁
DEVHX_8_HER_MORPH_AMT 11875 0.00 2.67 1.15 2 2.00 2.0 3.00 4 ▇▁▁▁▃
DEVHX_8_HER_MORPH_AMT_DK 11857 0.00 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OXYCONT 5 1.00 25.08 156.28 0 0.00 0.0 0.00 999 ▇▁▁▁▁
DEVHX_8_OXYCONT_AMT 11863 0.00 2.13 1.19 0 1.00 2.0 3.00 4 ▂▇▇▇▃
DEVHX_8_OXYCONT_AMT_DK 11851 0.00 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OTHER_DRUGS 5 1.00 32.07 176.07 0 0.00 0.0 0.00 999 ▇▁▁▁▁
DEVHX_8_OTHER1_NAME_2 11781 0.01 46.53 198.62 0 1.00 3.0 12.00 999 ▇▁▁▁▁
DEVHX_8_OTHER1_TIMES 11831 0.00 1.94 1.98 0 1.00 1.0 2.00 10 ▇▂▁▁▁
DEVHX_8_OTHER1_TIMES_DK 11826 0.00 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OTHER1_AMT 11847 0.00 92.94 286.57 0 1.00 2.0 16.50 1500 ▇▁▁▁▁
DEVHX_8_OTHER1_AMT_DK 11810 0.01 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OTHER1_UNIT 11848 0.00 3.20 2.11 1 1.25 3.0 3.00 7 ▆▇▁▁▃
DEVHX_8_OTHER2_NAME_2 11799 0.01 13.77 112.32 0 0.00 0.0 0.00 999 ▇▁▁▁▁
DEVHX_8_OTHER2_TIMES 11873 0.00 1.40 0.89 1 1.00 1.0 1.00 3 ▇▁▁▁▂
DEVHX_8_OTHER2_TIMES_DK 11871 0.00 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OTHER2_AMT 11876 0.00 6.00 2.83 4 5.00 6.0 7.00 8 ▇▁▁▁▇
DEVHX_8_OTHER2_AMT_DK 11868 0.00 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OTHER2_UNIT 11876 0.00 5.00 2.83 3 4.00 5.0 6.00 7 ▇▁▁▁▇
DEVHX_8_OTHER3_NAME_2 11870 0.00 3.00 5.26 0 0.00 0.0 3.50 12 ▇▁▁▁▂
DEVHX_8_OTHER3_TIMES 11876 0.00 2.00 1.41 1 1.50 2.0 2.50 3 ▇▁▁▁▇
DEVHX_8_OTHER3_TIMES_DK 11877 0.00 999.00 NA 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OTHER3_AMT 11877 0.00 10.00 NA 10 10.00 10.0 10.00 10 ▁▁▇▁▁
DEVHX_8_OTHER3_AMT_DK 11876 0.00 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OTHER3_UNIT 11877 0.00 3.00 NA 3 3.00 3.0 3.00 3 ▁▁▇▁▁
DEVHX_8_OTHER4_NAME_2 11875 0.00 0.33 0.58 0 0.00 0.0 0.50 1 ▇▁▁▁▃
DEVHX_8_OTHER4_TIMES_DK 11877 0.00 999.00 NA 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OTHER4_AMT_DK 11877 0.00 999.00 NA 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OTHER5_NAME_2 11877 0.00 1.00 NA 1 1.00 1.0 1.00 1 ▁▁▇▁▁
DEVHX_8_OTHER5_TIMES_DK 11877 0.00 999.00 NA 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OTHER5_AMT_DK 11877 0.00 999.00 NA 999 999.00 999.0 999.00 999 ▁▁▇▁▁
#devhx_8_tobacco
#Before knowing of pregnancy. Tobacco? How many times per day?/ ?Cuantas veces al d?a?

#devhx_9_tobacco
#Knowing of pregnancy. Tobacco? How many times per day?/ ?Cuantas veces al d?a?

#devhx_8_alcohol
#Before knowing of pregnancy. Alcohol? /?Alcohol?

#devhx_9_alcohol
#Knowing of pregnancy. Alcohol? /?Alcohol?

#devhx_8_marijuana
#Before knowing of pregnancy. Marijuana? /?Marihuana?

#devhx_9_marijuana
#Knowing of pregnancy. Marijuana? /?Marihuana?

# change name and replace 999 with na

momSubstanceUse <- DevHis %>% 
  mutate(tobacco_before_preg = as.factor(DEVHX_8_TOBACCO)) %>% 
  mutate(tobacco_after_preg = as.factor(DEVHX_9_TOBACCO)) %>% 
  mutate(alcohol_before_preg = as.factor(DEVHX_8_ALCOHOL)) %>% 
  mutate(alcohol_after_preg = as.factor(DEVHX_9_ALCOHOL)) %>% 
  mutate(marijuana_before_preg = as.factor(DEVHX_8_MARIJUANA)) %>% 
  mutate(marijuana_after_preg = as.factor(DEVHX_9_MARIJUANA)) %>%
  mutate(across(ends_with("_preg"), ~na_if(., 999))) %>% 
  select(SUBJECTKEY,EVENTNAME, ends_with("_preg")) %>%
  droplevels()

momSubstanceUse %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  select(-SUBJECTKEY,-EVENTNAME) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 6
_______________________
Column type frequency:
factor 6
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
tobacco_before_preg 278 0.98 FALSE 2 0: 9989, 1: 1611
tobacco_after_preg 265 0.98 FALSE 2 0: 10993, 1: 620
alcohol_before_preg 683 0.94 FALSE 2 0: 8313, 1: 2882
alcohol_after_preg 293 0.98 FALSE 2 0: 11270, 1: 315
marijuana_before_preg 340 0.97 FALSE 2 0: 10851, 1: 687
marijuana_after_preg 277 0.98 FALSE 2 0: 11356, 1: 245

1.9.3 developmental adversity

DevHis %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  select(starts_with(c("BIRTH_WEIGHT","DEVHX_10","DEVHX_12","DEVHX_13","DEVHX_14"))) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 27
_______________________
Column type frequency:
logical 2
numeric 25
________________________
Group variables None

Variable type: logical

skim_variable n_missing complete_rate mean count
BIRTH_WEIGHT_LBS 11878 0 NaN :
BIRTH_WEIGHT_OZ 11878 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
DEVHX_10 9 1.00 39.30 191.84 -1 1 1 1 999 ▇▁▁▁▁
DEVHX_10A3_P 4 1.00 32.53 176.93 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10B3_P 4 1.00 27.22 162.51 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10C3_P 4 1.00 32.47 176.94 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10D3_P 4 1.00 27.94 164.70 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10E3_P 4 1.00 34.16 181.55 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10F3_P 4 1.00 26.25 159.80 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10G3_P 4 1.00 31.51 174.48 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10H3_P 4 1.00 39.36 194.18 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10I3_P 4 1.00 30.86 172.66 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10J3_P 4 1.00 34.00 180.88 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10K3_P 4 1.00 30.07 170.60 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10L3_P 4 1.00 26.69 161.04 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10M3_P 4 1.00 29.02 167.55 0 0 0 0 999 ▇▁▁▁▁
DEVHX_12A_P 5 1.00 11.97 107.82 0 0 0 0 999 ▇▁▁▁▁
DEVHX_12_P 9670 0.19 17.10 109.30 1 3 4 6 999 ▇▁▁▁▁
DEVHX_13_3_P 5 1.00 12.58 109.69 0 0 0 1 999 ▇▁▁▁▁
DEVHX_14A3_P 5 1.00 26.70 161.04 0 0 0 0 999 ▇▁▁▁▁
DEVHX_14B3_P 5 1.00 28.55 166.38 0 0 0 0 999 ▇▁▁▁▁
DEVHX_14C3_P 5 1.00 25.04 156.01 0 0 0 0 999 ▇▁▁▁▁
DEVHX_14D3_P 5 1.00 18.43 134.43 0 0 0 0 999 ▇▁▁▁▁
DEVHX_14E3_P 5 1.00 24.22 153.15 0 0 0 0 999 ▇▁▁▁▁
DEVHX_14F3_P 5 1.00 25.08 156.01 0 0 0 0 999 ▇▁▁▁▁
DEVHX_14G3_P 5 1.00 17.42 130.76 0 0 0 0 999 ▇▁▁▁▁
DEVHX_14H3_P 6 1.00 34.36 181.99 0 0 0 0 999 ▇▁▁▁▁
#devhx_12a_p
#Was the child born prematurely? /?Naci? el ni?o o la ni?a antes de tiempo?
adversitySum <- DevHis %>% 
  mutate(across(starts_with("DEVHX_"), ~na_if(., 999))) %>%
  mutate(deveplopment_prematurity = as.factor(DEVHX_12A_P)) %>%
  mutate(deveplopment_birth_complications = rowSums(dplyr::select(.,starts_with("DEVHX_14")))) %>%
  #mutate(deveplopment_birth_kg = BIRTH_WEIGHT_LBS*0.453592) %>% #all na???
  mutate(deveplopment_pregnancy_complications = rowSums(select(.,DEVHX_10A3_P:DEVHX_10L3_P))) %>%
  select(SUBJECTKEY,EVENTNAME, starts_with("deveplopment_")) 

adversitySum %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  select(-SUBJECTKEY,-EVENTNAME) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 3
_______________________
Column type frequency:
factor 1
numeric 2
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
deveplopment_prematurity 145 0.99 FALSE 2 0: 9525, 1: 2208

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
deveplopment_birth_complications 760 0.94 0.37 0.75 0 0 0 1 8 ▇▁▁▁▁
deveplopment_pregnancy_complications 744 0.94 0.61 1.02 0 0 0 1 12 ▇▁▁▁▁

1.9.4 brain truma

most events are quite rare.

brainTruma <- as_tibble(read.csv(paste0(dataFold,"ABCD_OTBI01_DATA_TABLE.csv"))) 

brainTruma %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  select(-1:-8) %>%
 skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 40
_______________________
Column type frequency:
character 1
logical 3
numeric 36
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
STUDY_COHORT_NAME 0 1 21 21 0 1 0

Variable type: logical

skim_variable n_missing complete_rate mean count
TBI_8I 11878 0 NaN :
TBI_8K 11878 0 NaN :
TBI_8L 11878 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
TBI_SELECT_LANGUAGE___1 0 1.00 0.05 0.23 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_1 4 1.00 0.12 0.33 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_1B 10436 0.12 0.07 0.28 0.0 0.00 0.0 0.00 3 ▇▁▁▁▁
TBI_1C 10438 0.12 0.17 0.37 0.0 0.00 0.0 0.00 1 ▇▁▁▁▂
TBI_1D 10437 0.12 4.98 2.63 0.0 3.00 5.0 7.00 11 ▇▇▇▇▃
TBI_2 4 1.00 0.02 0.12 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_2B 11690 0.02 0.04 0.19 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_2C 11690 0.02 0.12 0.33 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_2D 11690 0.02 6.61 2.18 0.0 5.00 7.0 8.00 10 ▁▂▅▇▃
TBI_3 4 1.00 0.12 0.33 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_3B 10398 0.12 0.04 0.21 0.0 0.00 0.0 0.00 3 ▇▁▁▁▁
TBI_3C 10399 0.12 0.16 0.37 0.0 0.00 0.0 0.00 1 ▇▁▁▁▂
TBI_3D 10400 0.12 5.86 2.70 0.0 4.00 6.0 8.00 11 ▅▅▆▇▅
TBI_4 4 1.00 0.00 0.07 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_4B 11819 0.00 0.02 0.13 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_4C 11819 0.00 0.17 0.38 0.0 0.00 0.0 0.00 1 ▇▁▁▁▂
TBI_4D 11819 0.00 6.53 2.44 0.0 5.00 7.0 8.00 10 ▂▂▅▇▅
TBI_5 4 1.00 0.00 0.03 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_5B 11868 0.00 0.00 0.00 0.0 0.00 0.0 0.00 0 ▁▁▇▁▁
TBI_5C 11868 0.00 0.00 0.00 0.0 0.00 0.0 0.00 0 ▁▁▇▁▁
TBI_5D 11868 0.00 5.10 2.91 1.5 2.25 5.5 7.75 9 ▇▁▃▂▆
TBI_6O 4 1.00 0.00 0.03 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_6P 11869 0.00 1.67 1.12 1.0 1.00 1.0 2.00 4 ▇▁▁▁▁
TBI_6Q 11869 0.00 1.78 1.20 0.0 1.00 2.0 2.00 4 ▂▇▇▂▂
TBI_6R 11869 0.00 0.00 0.00 0.0 0.00 0.0 0.00 0 ▁▁▇▁▁
TBI_6S 11869 0.00 4.56 3.71 1.0 1.00 3.0 8.00 10 ▇▂▂▂▃
TBI_7A 4 1.00 0.01 0.09 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_7C1 11771 0.01 0.07 0.37 0.0 0.00 0.0 0.00 3 ▇▁▁▁▁
TBL_7C2 11775 0.01 0.08 0.27 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_7E 11771 0.01 4.50 3.63 0.0 1.00 5.0 8.00 10 ▇▁▂▅▃
TBI_7F 11771 0.01 5.62 4.11 0.0 0.50 8.0 9.00 10 ▆▂▁▂▇
TBI_7G 11771 0.01 0.02 0.14 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_7I 11876 0.00 1.00 0.00 1.0 1.00 1.0 1.00 1 ▁▁▇▁▁
TBI_7K 11876 0.00 4.00 5.66 0.0 2.00 4.0 6.00 8 ▇▁▁▁▇
TBI_7L 11876 0.00 4.00 5.66 0.0 2.00 4.0 6.00 8 ▇▁▁▁▇
TBI_8G 11876 0.00 0.00 0.00 0.0 0.00 0.0 0.00 0 ▁▁▇▁▁

1.10 culture

1.10.1 bilingual

https://www.nature.com/articles/s41562-019-0609-3 https://github.com/anthonystevendick/bilingual_abcd/blob/master/bilingual_analysis.r accult_q1_y How well do you speak English?
1 = Poor; 2 = Fair; 3 = Good; 4 = Excellent accult_q2_y Besides English, do you speak or understand another language or dialect? If child asks about languages learned in school, the RA should state: That’s OK, as long as it is a language or dialect that you speak or understand.
1 = Poor Mal; 2 = Fair Regular; 3 = Good Bien; 4 = Excellent Excelente; 777 = Refused Niego contestar; 999 = Don’t Know No se
accult_q4_y What language do you speak with most of your friends?
1 = (Other language) all the time; 2 = (Other language) most of the time; 3 = (Other language) and English equally; 4 = English most of the time; 5 = English all the time
accult_q5_y What language do you speak with most of your family? 1 = (Other language) all the time; 2 = (Other language) most of the time; 3 = (Other language) and English equally; 4 = English most of the time; 5 = English all the time

bilingual <-as_tibble(read.csv(paste0(dataFold,"YACC01_DATA_TABLE.csv"))) 
#%>% filter(EVENTNAME =="baseline_year_1_arm_1")

#bilingual_status
# #recode the accult_q2_y variable into a binary "Bilingual Status", 0 = not bilingual; 1 = bilingual
# 
# bilingual_status <- biLingual$ACCULT_Q2_Y
# sum(is.na(bilingual_status))

#bilingual_degree
# #dimension a 'bilingual degree' variable, where 1 = participant said they were bilingual, and they speak the other language with friends all the time, most of the time,
# #or equally, OR they speak the other language with family all the time, most of the time, or equally.
# 
# bilingual_degree <- ifelse(bilingual_status == 0, 0, ifelse(bilingual_status == 1 & (as.numeric(accult_q4_y) <= 3 | as.numeric(accult_q5_y) <= 3), 1, NA))
# count(bilingual_degree) #check the data
# sum(is.na(bilingual_degree))
#### here I change it such that 0 = non-bilingual, 1 = bilingual who use (Other language) < English, 2 = bilingual who use (Other language) >= English 

#bilingual_use
# 
# #dimension a continuous 'bilingual use' variable, and reverse-score so that if participants speak the other language with friends all the time, most of the time...,
# #they will receive high scores on this measure (range 0-8, with 8 indicating a high-degree of other language use)
# 
# bilingual_use<-10-(as.numeric(abcd_subset$accult_q4_y)+as.numeric(abcd_subset$accult_q5_y))
# sum(is.na(bilingual_use))
####  here I change it such that 0 = non-bilingual, 1 = bilingual who use (Other language) < English, 2 = bilingual who use (Other language) >= English  


bilingualAdded <- bilingual %>% 
  mutate(bilingual_status = factor(ifelse(ACCULT_Q2_Y==777,NA,ACCULT_Q2_Y))) %>%
  mutate(bilingual_degree = factor(ifelse(bilingual_status == 0, 0, 
                                    ifelse(bilingual_status == 1 & (as.numeric(ACCULT_Q4_Y) <= 3 | as.numeric(ACCULT_Q5_Y) <= 3), 1,
                                           ifelse(bilingual_status == 1 & (as.numeric(ACCULT_Q4_Y) > 3 | as.numeric(ACCULT_Q5_Y) > 3), 2,NA))))) %>%
      mutate(bilingual_use = ifelse(bilingual_status == 0, 0,
                                  11-(as.numeric(ACCULT_Q4_Y)+as.numeric(ACCULT_Q5_Y))))

bilingualSum <- bilingualAdded %>%
  select(SUBJECTKEY,EVENTNAME, starts_with("bilingual_"))

bilingualSum %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  select(-SUBJECTKEY,-EVENTNAME) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 10920
Number of columns 3
_______________________
Column type frequency:
factor 2
numeric 1
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
bilingual_status 74 0.99 FALSE 2 0: 6756, 1: 4090
bilingual_degree 74 0.99 FALSE 3 0: 6756, 2: 2544, 1: 1546

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
bilingual_use 74 0.99 1.02 1.69 0 0 0 1 9 ▇▂▁▁▁

1.11 social demographics

race/ethnicity is from ACS sex is from ACS family income family type household size parents’ work status # demo_prnt_empl_v2 [a bit too much to include] parents’ education sumEcon_insecurities

1.11.1 ABCD Parent Demographics Survey

demograp <-as_tibble(read.csv(paste0(dataFold,"PDEM02_DATA_TABLE.csv"))) 

demograp %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  select(-1:-8) %>%
   skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 124
_______________________
Column type frequency:
character 2
logical 2
numeric 120
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
EVENTNAME 0 1 21 21 0 1 0
STUDY_COHORT_NAME 0 1 21 21 0 1 0

Variable type: logical

skim_variable n_missing complete_rate mean count
DEMO_RACE_NOTES_V2 11878 0 NaN :
DEMO_RELIG2_V2 11878 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
DEMO_PRIM 0 1.00 1.23 0.67 1 1.00 1.0 1.00 5 ▇▁▁▁▁
DEMO_BRTHDAT_V2 11 1.00 9.48 0.51 8 9.00 9.0 10.00 11 ▁▇▁▇▁
DEMO_ED_V2 2 1.00 4.21 0.79 0 4.00 4.0 5.00 12 ▁▇▅▁▁
DEMO_ADOPT_AGEX_V2 11603 0.02 2.17 2.43 0 0.00 1.0 3.00 10 ▇▂▁▁▁
DEMO_ADOPT_AGEX_V2_BL_DK 11874 0.00 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEMO_SEX_V2 0 1.00 1.48 0.50 1 1.00 1.0 2.00 3 ▇▁▇▁▁
DEMO_GENDER_ID_V2 2 1.00 2.20 26.22 1 1.00 1.0 2.00 999 ▇▁▁▁▁
DEMO_RACE_A_P___10 0 1.00 0.74 0.44 0 0.00 1.0 1.00 1 ▃▁▁▁▇
DEMO_RACE_A_P___11 0 1.00 0.21 0.41 0 0.00 0.0 0.00 1 ▇▁▁▁▂
DEMO_RACE_A_P___12 0 1.00 0.03 0.18 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___13 0 1.00 0.00 0.02 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___14 0 1.00 0.00 0.04 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___15 0 1.00 0.00 0.01 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___16 0 1.00 0.00 0.03 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___17 0 1.00 0.00 0.06 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___18 0 1.00 0.01 0.10 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___19 0 1.00 0.02 0.13 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___20 0 1.00 0.01 0.12 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___21 0 1.00 0.01 0.08 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___22 0 1.00 0.01 0.09 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___23 0 1.00 0.01 0.07 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___24 0 1.00 0.01 0.09 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___25 0 1.00 0.07 0.25 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___77 0 1.00 0.00 0.07 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___99 0 1.00 0.01 0.09 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_ETHN_V2 10 1.00 12.93 101.49 1 2.00 2.0 2.00 999 ▇▁▁▁▁
DEMO_ETHN2_V2 9479 0.20 34.04 124.33 10 13.00 14.0 20.00 999 ▇▁▁▁▁
DEMO_ORIGIN_V2 2 1.00 187.28 31.07 1 189.00 189.0 189.00 999 ▇▁▁▁▁
DEMO_YEARS_US_V2 8016 0.33 9.47 0.94 0 9.00 10.0 10.00 19 ▁▁▇▁▁
DEMO_YEARS_US_V2_DK 11859 0.00 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEMO_RELIG_V2 4 1.00 52.61 197.16 1 4.00 11.0 17.00 999 ▇▁▁▁▁
DEMO_PRNT_AGE_V2 91 0.99 39.96 6.84 23 35.00 40.0 44.00 80 ▃▇▂▁▁
DEMO_PRNT_AGE_V2_BL_REFUSE 11791 0.01 863.76 108.95 777 777.00 777.0 999.00 999 ▇▁▁▁▅
DEMO_PRNT_GENDER_ID_V2 2 1.00 2.24 16.91 1 2.00 2.0 2.00 999 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___10 0 1.00 0.73 0.44 0 0.00 1.0 1.00 1 ▃▁▁▁▇
DEMO_PRNT_RACE_A_V2___11 0 1.00 0.17 0.38 0 0.00 0.0 0.00 1 ▇▁▁▁▂
DEMO_PRNT_RACE_A_V2___12 0 1.00 0.03 0.16 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___13 0 1.00 0.00 0.01 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___14 0 1.00 0.00 0.04 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___15 0 1.00 0.00 0.01 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___16 0 1.00 0.00 0.02 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___17 0 1.00 0.00 0.05 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___18 0 1.00 0.01 0.08 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___19 0 1.00 0.01 0.11 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___20 0 1.00 0.01 0.10 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___21 0 1.00 0.01 0.07 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___22 0 1.00 0.01 0.07 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___23 0 1.00 0.00 0.05 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___24 0 1.00 0.01 0.07 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___25 0 1.00 0.06 0.24 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___77 0 1.00 0.01 0.08 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___99 0 1.00 0.01 0.11 0 0.00 0.0 0.00 1 ▇▁▁▁▁
NAAS_ID 11472 0.03 100.79 297.31 1 2.00 2.0 4.00 999 ▇▁▁▁▁
NAAS_MOM_ID 11472 0.03 155.13 358.70 1 2.00 4.0 5.00 999 ▇▁▁▁▂
NAAS_ID_DAD 11472 0.03 268.13 440.53 1 2.00 5.0 999.00 999 ▇▁▁▁▃
NAAS_BIRTHPLACE 11472 0.03 249.36 429.07 1 5.00 5.0 5.00 999 ▇▁▁▁▂
NAAS_RAISED 11472 0.03 227.40 415.23 1 5.00 5.0 5.00 999 ▇▁▁▁▂
NAAS_COMM_CONTACT 11472 0.03 209.88 403.54 1 4.00 5.0 5.00 999 ▇▁▁▁▂
NAAS_PRIDE 11472 0.03 115.17 316.32 1 1.00 2.0 3.00 999 ▇▁▁▁▁
NAAS_SELF_RATING 11472 0.03 59.91 230.42 1 3.00 4.0 4.00 999 ▇▁▁▁▁
NAAS_TRADITIONS 11472 0.03 53.31 215.53 1 4.00 5.0 5.00 999 ▇▁▁▁▁
DEMO_PRNT_ETHN_V2 2 1.00 7.28 71.20 1 2.00 2.0 2.00 999 ▇▁▁▁▁
DEMO_PRNT_ETHN2_V2 9879 0.17 21.38 62.54 10 13.00 14.0 20.00 999 ▇▁▁▁▁
DEMO_PRNT_16 2 1.00 0.32 0.47 0 0.00 0.0 1.00 1 ▇▁▁▁▃
DEMO_PRNT_16A 3817 0.68 0.99 0.11 0 1.00 1.0 1.00 1 ▁▁▁▁▇
DEMO_PRNT_ORIGIN_V2 7965 0.33 142.07 62.37 1 111.00 185.0 189.00 999 ▇▁▁▁▁
DEMO_BIOFATHER_V2 7965 0.33 149.84 102.20 1 111.00 186.0 189.00 999 ▇▁▁▁▁
DEMO_BIOMOTHER_V2 7965 0.33 149.56 80.95 1 111.00 189.0 189.00 999 ▇▁▁▁▁
DEMO_MATGRANDM_V2 7965 0.33 137.89 103.11 1 82.00 125.0 189.00 999 ▇▁▁▁▁
DEMO_MATGRANDF_V2 7965 0.33 141.08 116.07 1 82.00 125.0 189.00 999 ▇▁▁▁▁
DEMO_PATGRANDM_V2 7965 0.33 149.74 144.75 1 81.00 125.0 189.00 999 ▇▁▁▁▁
DEMO_PATGRANDF_V2 7965 0.33 154.67 158.47 1 82.00 125.0 189.00 999 ▇▁▁▁▁
DEMO_PRNT_YEARS_US_V2 1948 0.84 35.90 10.77 1 31.00 38.0 43.00 100 ▂▇▅▁▁
DEMO_PRNT_YEARS_US_V2_DK 11747 0.01 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEMO_PRNT_MARITAL_V2 2 1.00 8.23 68.69 1 1.00 1.0 3.00 777 ▇▁▁▁▁
DEMO_PRNT_ED_V2 0 1.00 17.68 28.88 1 15.00 18.0 19.00 777 ▇▁▁▁▁
DEMO_PRNT_EMPL_V2 2 1.00 5.97 52.17 1 1.00 1.0 5.00 777 ▇▁▁▁▁
DEMO_PRNT_EMPL_TIME 3693 0.69 1.27 0.44 1 1.00 1.0 2.00 2 ▇▁▁▁▃
DEMO_PRNT_INCOME_V2 2 1.00 84.94 253.45 1 3.00 6.0 8.00 999 ▇▁▁▁▁
DEMO_PRNT_PRTNR_V2 2 1.00 9.23 78.55 1 1.00 1.0 1.00 777 ▇▁▁▁▁
DEMO_PRNT_PRTNR_BIO 2398 0.80 2.53 32.49 1 1.00 1.0 1.00 999 ▇▁▁▁▁
DEMO_PRNT_PRTNR_ADOPT 10096 0.15 0.20 0.40 0 0.00 0.0 0.00 1 ▇▁▁▁▂
DEMO_PRTNR_ED_V2 2401 0.80 22.91 78.49 0 15.00 18.0 18.00 999 ▇▁▁▁▁
DEMO_PRTNR_EMPL_V2 2454 0.79 11.16 90.79 1 1.00 1.0 1.00 999 ▇▁▁▁▁
DEMO_PRTNR_EMPL_TIME 3665 0.69 1.06 0.23 1 1.00 1.0 1.00 2 ▇▁▁▁▁
DEMO_PRTNR_INCOME_V2 2397 0.80 95.58 269.81 1 6.00 7.0 9.00 999 ▇▁▁▁▁
DEMO_COMB_INCOME_V2 2 1.00 82.50 248.26 1 6.00 8.0 9.00 999 ▇▁▁▁▁
DEMO_FAM_EXP1_V2 2 1.00 4.92 61.14 0 0.00 0.0 0.00 777 ▇▁▁▁▁
DEMO_FAM_EXP2_V2 2 1.00 2.94 47.21 0 0.00 0.0 0.00 777 ▇▁▁▁▁
DEMO_FAM_EXP3_V2 2 1.00 3.97 54.63 0 0.00 0.0 0.00 777 ▇▁▁▁▁
DEMO_FAM_EXP4_V2 3 1.00 1.98 39.01 0 0.00 0.0 0.00 777 ▇▁▁▁▁
DEMO_FAM_EXP5_V2 3 1.00 2.74 45.58 0 0.00 0.0 0.00 777 ▇▁▁▁▁
DEMO_FAM_EXP6_V2 2 1.00 2.54 43.88 0 0.00 0.0 0.00 777 ▇▁▁▁▁
DEMO_FAM_EXP7_V2 2 1.00 3.18 48.78 0 0.00 0.0 0.00 777 ▇▁▁▁▁
DEMO_ROSTER_V2 279 0.98 4.72 1.77 0 4.00 4.0 5.00 77 ▇▁▁▁▁
DEMO_ROSTER_V2_REFUSE 11600 0.02 850.47 104.65 777 777.00 777.0 999.00 999 ▇▁▁▁▃
FAM_ROSTER_2C_V2 575 0.95 2.39 2.82 1 1.00 1.0 3.00 15 ▇▁▁▁▁
FAM_ROSTER_3C_V2 1045 0.91 3.54 1.96 1 3.00 3.0 3.00 15 ▇▁▁▁▁
FAM_ROSTER_4C_V2 2493 0.79 3.50 1.87 1 3.00 3.0 3.00 15 ▇▁▁▁▁
FAM_ROSTER_5C_V2 6272 0.47 4.02 2.69 1 3.00 3.0 3.00 15 ▇▁▁▁▁
FAM_ROSTER_6C_V2 9110 0.23 4.56 3.22 1 3.00 3.0 4.00 15 ▇▁▁▁▁
FAM_ROSTER_7C_V2 10718 0.10 5.10 3.64 1 3.00 3.0 6.00 15 ▇▂▁▁▁
FAM_ROSTER_8C_V2 11378 0.04 5.23 3.67 1 3.00 3.0 7.00 15 ▇▂▂▁▁
FAM_ROSTER_9C_V2 11647 0.02 5.44 3.73 2 3.00 4.0 7.00 15 ▇▁▁▁▁
FAM_ROSTER_10C_V2 11750 0.01 5.00 3.48 1 3.00 3.5 4.25 14 ▇▅▁▁▂
FAM_ROSTER_11C_V2 11827 0.00 4.98 3.08 3 3.00 4.0 5.00 14 ▇▁▁▁▁
FAM_ROSTER_12C_V2 11847 0.00 5.03 3.19 3 3.00 4.0 5.00 14 ▇▁▁▁▁
FAM_ROSTER_13C_V2 11864 0.00 5.50 3.65 3 3.25 4.0 5.50 14 ▇▁▁▁▂
FAM_ROSTER_14C_V2 11867 0.00 6.18 3.87 3 4.00 4.0 7.00 14 ▇▁▁▁▂
FAM_ROSTER_15C_V2 11870 0.00 8.12 5.28 1 4.00 7.0 13.25 15 ▂▇▂▁▇
DEMO_CHILD_TIME_V2 3 1.00 5.41 63.95 0 0.00 0.0 0.00 777 ▇▁▁▁▁
DEMO_CHILD_TIME2_V2 10632 0.10 52.67 38.70 0 24.00 48.0 84.00 168 ▇▆▆▁▁
DEMO_CHILD_TIME2_V2_DK 11797 0.01 960.63 84.46 777 999.00 999.0 999.00 999 ▂▁▁▁▇
DEMO_CHILD_TIME3_V2 10576 0.11 2.29 1.99 1 1.00 1.0 4.00 8 ▇▁▃▁▁
DEMO_YRS_1 3 1.00 20.84 129.14 0 0.00 1.0 3.00 999 ▇▁▁▁▁
DEMO_YRS_2 5 1.00 35.91 173.28 1 2.00 3.0 4.00 999 ▇▁▁▁▁
DEMO_YRS_2A_2 5430 0.54 85.46 278.99 0 0.00 0.0 1.00 999 ▇▁▁▁▁
DEMO_YRS_2B_2 5432 0.54 128.37 333.80 0 0.00 0.0 1.00 999 ▇▁▁▁▁
DEMO_YRS_2_NO_DISPLAY___1 0 1.00 0.46 0.50 0 0.00 0.0 1.00 1 ▇▁▁▁▇
DEMO_RACE_A_P___0 0 1.00 0.00 0.01 0 0.00 0.0 0.00 1 ▇▁▁▁▁
#demo_prnt_marital_v2 
#marital 
#Are you now married, widowed, divorced, separated, never married or living with a partner? ¬øUsted actualmente est√° casado(a), viudo(a), divorciado(a), separado(a), nunca se ha casado(a) o vive con una pareja?
#1 = Married Casado(a) ; 2 = Widowed Viudo(a) ; 3 = Divorced Divorciado(a) ; 4 = Separated Separado(a) ; 5 = Never married Nunca me he casado ; 6 = Living with partner Vivo con una pareja ; 777 = Refused to answer Prefiero no responder

#demo_prnt_ed_v2
#What is the highest grade or level of school you have completed or the highest degree you have received? ¿Cuál es su máximo nivel de estudios completados o el máximo título que ha recibido?
#0 = Never attended/Kindergarten only Nunca asistí/Kinder solamente ; 1 = 1st grade 1.er grado ; 2 = 2nd grade 2.º grado ; 3 = 3rd grade 3.er grado ; 4 = 4th grade 4.º grado ; 5 = 5th grade 5.º grado ; 6 = 6th grade 6.º grado ; 7 = 7th grade 7.º grado ; 8 = 8th grade 8.º grado ; 9 = 9th grade 9.º grado ; 10 = 10th grade 10.º grado ; 11 = 11th grade 11.º grado ; 12 = 12th grade; 13 = High school graduate Preparatoria terminada ; 14 = GED or equivalent Diploma General de Equivalencia (GED) o equivalente ; 15 = Some college; 16 = Associate degree: Occupational; 17 = Associate degree: Academic Program Título de asociado: programa académico ; 18 = Bachelor's degree (ex. BA; 19 = Master's degree (ex. MA; 20 = Professional School degree (ex. MD; 21 = Doctoral degree (ex. PhD; 777 = Refused to answer Prefiero no responder // The following questions are about your partner. Your "partner" refers to any significant figure in your life that helps you in raising your child or has helped you for more than 2 years. This person should be involved 40% or more of the daily activities your child does. For example, your partner could be your spouse. However, your partner could also be your boyfriend/girlfriend or relative.

#demo_comb_income_v2
#What is your TOTAL COMBINED FAMILY INCOME for the past 12 months? This should include income (before taxes and deductions) from all sources, wages, rent from properties, social security, disability and/or veteran's benefits, unemployment benefits, workman's compensation, help from relative (include child payments and alimony), and so on. ¿Cuál de estas categorías es la que mejor describe su INGRESO FAMILIAR TOTAL COMBINADO de los últimos 12 meses? Este debe incluir los ingresos (antes de impuestos y deducciones) provenientes de todas las fuentes, salarios, renta de propiedades, seguro social, pagos por incapacidad o subsidios para veteranos, subsidios por desempleo, compensación por accidentes de trabajo, ayuda de familiares (incluya pensiones alimenticias para hijos y cónyuges divorciados), etc.
#1= Less than $5,000; 2=$5,000 through $11,999; 3=$12,000 through $15,999; 4=$16,000 through $24,999; 5=$25,000 through $34,999; 6=$35,000 through $49,999; 7=$50,000 through $74,999; 8= $75,000 through $99,999; 9=$100,000 through $199,999; 10=$200,000 and greater. 999 = Don't know No lo sé ; 777 = Refuse to answer No deseo responder  | If Separated/Divorced, please average the two household incomes. Si es Separado(a) / Divorciado(a), por favor promedie los dos ingresos familiares

#demo_roster_v2
#How many people are living at your address? INCLUDE everyone who is living or staying at your address for more than 2 months. ¬øCu√°ntas personas est√°n viviendo o qued√°ndose en su domicilio? INCLUYA a todas las personas que lleven viviendo o qued√°ndose en su domicilio durante m√°s de 2 meses.

#demo_fam_exp1_v2
#demo_fam_exp2_v2
#demo_fam_exp3_v2
#demo_fam_exp4_v2
#demo_fam_exp5_v2
#demo_fam_exp6_v2
#demo_fam_exp7_v2
# In the past 12 months, has there been a time when you and your immediate family experienced any of the following: Needed food but couldn't afford to buy it or couldn't afford to go out to get it? ¬øNecesitaban comida pero no les alcanzaba el dinero para comprarla o para salir a comprarla?
# 0 = No No; 1 = Yes Sí; 777 = Refuse to answer Niego contestar

demograpSum <- demograp %>% 
  select(SUBJECTKEY,EVENTNAME,DEMO_PRNT_MARITAL_V2,DEMO_PRNT_ED_V2,DEMO_PRTNR_ED_V2,DEMO_COMB_INCOME_V2,DEMO_ROSTER_V2, starts_with("DEMO_FAM_")) %>%
  mutate(marital = recode_factor(as.factor( DEMO_PRNT_MARITAL_V2 ),
                `1` = "married", `2` = "widowed", `3` = "divorced", 
                `4` = "separated", `5` = "neverMarried", `6` = "livingWithPartner",
                `777` = NA_character_, `999` = NA_character_,
                .default = "married")) %>%
  mutate(education1stPar = ifelse( DEMO_PRNT_ED_V2 %in% c(777,999) , NA, DEMO_PRNT_ED_V2) ) %>%
  mutate(education2ndPar = ifelse( DEMO_PRTNR_ED_V2 %in% c(777,999) , NA, DEMO_PRTNR_ED_V2) ) %>%
  mutate(educationAvg = rowMeans(cbind(education1stPar,education2ndPar),na.rm=T)) %>%
  mutate(combinedIncome = ifelse( DEMO_COMB_INCOME_V2 %in% c(777,999) , NA, DEMO_COMB_INCOME_V2) ) %>%
  mutate(householdSize= ifelse( DEMO_ROSTER_V2 %in% c(777,999) | DEMO_ROSTER_V2 > 20 , NA, DEMO_ROSTER_V2) ) %>% # trim people who live with > 20 ppl (2 people in total) 
  mutate(across((starts_with("DEMO_FAM_")), ~ na_if(., 777)),
        (across((starts_with("DEMO_FAM_")), ~ na_if(., 999)))) %>%
  mutate(econ_insecurities_sum = rowSums(select(.,starts_with("DEMO_FAM_")),na.rm=F)) %>% 
  select(-starts_with("DEMO_"))
  
  demograpSum  %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 9
_______________________
Column type frequency:
character 2
factor 1
numeric 6
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11878 0
EVENTNAME 0 1 21 21 0 1 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
marital 96 0.99 FALSE 6 mar: 7991, nev: 1460, div: 1082, liv: 688

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
education1stPar 17 1.00 16.59 2.77 1 15 18 19.0 21 ▁▁▂▅▇
education2ndPar 2467 0.79 16.37 3.06 0 15 18 18.0 21 ▁▁▁▅▇
educationAvg 14 1.00 16.38 2.70 3 15 17 18.5 21 ▁▁▂▇▇
combinedIncome 1018 0.91 7.22 2.42 1 6 8 9.0 10 ▂▂▃▆▇
householdSize 281 0.98 4.70 1.55 0 4 4 5.0 19 ▂▇▁▁▁
econ_insecurities_sum 135 0.99 0.47 1.10 0 0 0 0.0 7 ▇▁▁▁▁

1.11.2 more Social Demographics from Residential History Derived Scores

“RESHIST_ADDR1_ADI_WSUM” Residential history derived - Area Deprivation Index: scaled weighted sum based on Kind et al., Annals of Internal Medicine, 2014 1 “RESHIST_ADDR1_GRNDTOT” the grand total Uniform Crime Reports, “RESHIST_ADDR1_LEADRISK” the estimated lead risk in census tract of primary residential address

RHDS <-as_tibble(read.csv(paste0(dataFold,"ABCD_RHDS01_DATA_TABLE.csv"))) 

# RHDS %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>% select(RESHIST_ADDR1_GRNDTOT) %>%
#   #select(-1:-8) %>%
#   summarytools::dfSummary(
#                         style = 'grid', graph.magnif = 0.75,
#                         valid.col = FALSE, tmp.img.dir = "/tmp")

# Uniform Crime Reports seem to have some high values. quartic transformation will be applied.
hist(RHDS$RESHIST_ADDR1_GRNDTOT, breaks = 100)

hist((RHDS$RESHIST_ADDR1_GRNDTOT)^(1/4), breaks = 100)

ResidHistDer <- RHDS %>% 
  select(SUBJECTKEY,EVENTNAME,
         RESHIST_ADDR1_ADI_WSUM, RESHIST_ADDR1_GRNDTOT, RESHIST_ADDR1_LEADRISK) %>%
  rename(area_deprivation_index = RESHIST_ADDR1_ADI_WSUM) %>%
  mutate(quartic_uniform_crime_reports = (RESHIST_ADDR1_GRNDTOT)^(1/4)) %>% select(-RESHIST_ADDR1_GRNDTOT) %>%
  rename(lead_risk = RESHIST_ADDR1_LEADRISK) 
  
ResidHistDer %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 5
_______________________
Column type frequency:
character 2
numeric 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11878 0
EVENTNAME 0 1 21 21 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
area_deprivation_index 697 0.94 92.73 24.82 0 86.64 98.48 108.14 125.75 ▁▁▂▇▇
lead_risk 701 0.94 5.10 3.11 0 2.00 5.00 8.00 10.00 ▇▆▅▅▆
quartic_uniform_crime_reports 697 0.94 12.09 5.78 0 9.41 12.28 15.20 24.29 ▂▃▇▅▁

1.12 Proximal Environment

From Zhang et al. Translational Psychiatry (2020) https://doi.org/10.1038/s41398-020-0761-6 The “Safety from Crime” items from the PhenX Toolkit was used to assess neighborhood safety and crime reports. Additionally, children reported their schoolrisk and protective factors via a 12-item Inventory for School Risk and Protective Factors of the PhenX toolkit.

1.12.1 ABCD Neighborhood Safety/Crime Survey Modified from PhenX (NSC)

from parents and children

NeighboSafety_parent <-as_tibble(read.csv(paste0(dataFold,"ABCD_PNSC01_DATA_TABLE.csv"))) %>% 
  mutate(neighbo_safety_parent_sum = rowSums(select(.,starts_with("NEIGHBORHOOD")),na.rm=F)) %>%
  select(SUBJECTKEY,EVENTNAME, neighbo_safety_parent_sum)

# I feel safe walking in my neighborhood, day or night. Me siento seguro(a) caminando por mi vecindario, de día o de noche.
# Violence is not a problem in my neighborhood./ La violencia no es un problema en mi vecindario.
# My neighborhood is safe from crime. Mi vecindario est√° a salvo de la delincuencia.
#1 = Strongly Disagree /Muy en desacuerdo; 2 = Disagree /En desacuerdo; 3 = Neutral (neither agree nor disagree)/ Neutral (ni de acuerdo ni en desacuerdo); 4 = Agree /De acuerdo; 5 = Strongly Agree/ Muy de acuerdo//The following questions are about your neighborhood. Your neighborhood is the area within about a 20-minute walk (or about a mile) from your home. For each of the statements please indicate whether you strongly agree, agree, neither agree nor disagree, disagree, or strongly disagree

NeighboSafety_children<-as_tibble(read.csv(paste0(dataFold,"ABCD_NSC01_DATA_TABLE.csv"))) %>% 
  rename(neighbo_safety_child_sum = NEIGHBORHOOD_CRIME_Y) %>%
  select(SUBJECTKEY,EVENTNAME, neighbo_safety_child_sum)

#My neighborhood is safe from crime.

NeighboSafety <- plyr::join_all(list(NeighboSafety_parent, NeighboSafety_children), by=c('SUBJECTKEY','EVENTNAME'), type='full')

NeighboSafety  %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 4
_______________________
Column type frequency:
character 2
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11878 0
EVENTNAME 0 1 21 21 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
neighbo_safety_parent_sum 47 1 11.67 2.93 3 10 12 14 15 ▁▁▃▆▇
neighbo_safety_child_sum 24 1 4.03 1.09 1 3 4 5 5 ▁▁▃▆▇

1.12.2 School Risk and Protective Factors Survey

SchRisk <-as_tibble(read.csv(paste0(dataFold,"SRPF01_DATA_TABLE.csv"))) %>% 
 rename(EVENTNAME = VISIT)

school_risk_sum <- SchRisk %>% 
  mutate(sumSchool_environment = rowSums(dplyr::select(., "SCHOOL_2_Y", "SCHOOL_3_Y", "SCHOOL_4_Y", "SCHOOL_5_Y", "SCHOOL_6_Y", "SCHOOL_7_Y"))) %>%
  mutate(sumSchool_involvement = rowSums(dplyr::select(., "SCHOOL_8_Y", "SCHOOL_9_Y", "SCHOOL_10_Y", "SCHOOL_12_Y"))) %>%
  mutate(sumSchool_disengagement = rowSums(dplyr::select(., "SCHOOL_15_Y", "SCHOOL_17_Y"))) %>%
  select(SUBJECTKEY,EVENTNAME, starts_with("sumSchool"))

school_risk_sum %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
   skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 5
_______________________
Column type frequency:
character 2
numeric 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11878 0
EVENTNAME 0 1 21 21 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
sumSchool_environment 27 1 19.93 2.83 6 18 20 22 24 ▁▁▂▇▇
sumSchool_involvement 26 1 13.06 2.37 4 12 13 15 16 ▁▁▃▅▇
sumSchool_disengagement 25 1 3.74 1.46 2 3 4 5 8 ▇▃▃▁▁

1.13 Social Interaction

From Zhang et al. Translational Psychiatry (2020) https://doi.org/10.1038/s41398-020-0761-6 The child-reported parental monitoring and acceptance, as well as the child- and parent-reported prosocial tendency and family conflicts were included to measure social interactions. Parent monitoring was accessed by a 5-item summary score of the Parental Monitoring Scale24. Parent acceptance was evaluated by the Acceptance Scale, a subscale of the Child Report of Behavior Inventory (CRPBI)25. Prosocial behavior (e.g., being nice, helping, caring) was assessed using the Prosocial Behavior Scale, a subscale from the “Strengths and Difficulties Questionnaire” (SDQ)26. Both parents and youth reported on the youth’s prosocial behavior (e.g., being considerate of other people’s feelings, often offering to help others). In order to assess the family conflicts, the ABCD protocol utilized a 9-item Family Conflict subscale of the Moos Family Environment Scale (FES) for the baseline protocol27.

1.13.1 ABCD Parental Monitoring Survey

ParMonSur <-as_tibble(read.csv(paste0(dataFold,"PMQ01_DATA_TABLE.csv")))

# How often do your parents/guardians know where you are?
# How often do your parents know who you are with when you are not at school and away from home?
# If you are at home when your parents or guardians are not, how often do you know how to get in touch with them?
# How often do you talk to your mom/dad or guardian about your plans for the coming day, such as your plans about what will happen at school or what you are going to do with friends?
# In an average week, how many times do you and your parents/guardians, eat dinner together?
#1 = Never; 2 = Almost Never; 3 = Sometimes; 4 = Often; 5 = Always or Almost Always

ParentMonitoring <- ParMonSur %>% 
  mutate(parent_monitor_mean = rowMeans(select(., starts_with("PARENT_MONITOR_")))) %>%
  select(SUBJECTKEY,EVENTNAME, parent_monitor_mean)

ParentMonitoring %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
      skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 3
_______________________
Column type frequency:
character 2
numeric 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11878 0
EVENTNAME 0 1 21 21 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
parent_monitor_mean 23 1 4.38 0.52 1 4.2 4.4 4.8 5 ▁▁▁▃▇

1.13.2 ABCD Family Conflict

ABCD Parents Reported Parent Family Environment Scale-Family Conflict Subscale Modified from PhenX (FES), version 2 - short name: fes02 ABCD Youth Reported Parent Family Environment Scale-Family Conflict Subscale Modified from PhenX (FES), version 2 - short name: fes01

FamilyConflict_Parents <-as_tibble(read.csv(paste0(dataFold,"FES02_DATA_TABLE.csv"))) %>%
    distinct(select(.,-FES02_ID, -DATASET_ID),.keep_all = TRUE) #for some reason there is a duplicate based on these two variables

# We fight a lot in our family. /Peleamos mucho en nuestra familia.
# Family members rarely become openly angry. /Los miembros de la familia raramente se enojan abiertamente.
# Family members sometimes get so angry they throw things./ Los miembros de la familia algunas veces se enojan tanto que avientan cosas.
# Family members hardly ever lose their tempers. /Los miembros de la familia dificilmente pierden su temperamento.
# Family members often criticize each other. /Los miembros de la familia con frecuencia se critican unos a otros.
# Family members sometimes hit each other./ Los miembros de la familia algunas veces se golpean unos a otros.
# If there is a disagreement in our family, we try hard to smooth things over and keep the peace. /Si hay un desacuerdo en nuestra familia, hacemos todo lo posible por resolverlo y conservar la paz.
# Family members often try to one-up or outdo each other./ Los miembros de la familia con frecuencia tratan de superar a los dem√°s.
# In our family, we believe you don't ever get anywhere by raising your voice. /En nuestra familia, creemos que no se llega a nada levantando la voz.
# 1 = True /Verdadera; 0 = False/ Falsa or 0 = True /Verdadera; 1 = False/ Falsa  

FamilyConflict_parents_sum <- FamilyConflict_Parents  %>% 
  mutate(fam_conflict_parent = rowSums(select(., FAM_ENVIRO1_P:FAM_ENVIRO9R_P ), na.rm = F)) %>%           
  select(SUBJECTKEY,EVENTNAME, fam_conflict_parent)

# FamilyConflict_parents_sum  %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
#    summarytools::dfSummary(
#                          style = 'grid', graph.magnif = 0.75, 
#                          valid.col = FALSE, tmp.img.dir = "/tmp")

FamilyConflict_Children <-as_tibble(read.csv(paste0(dataFold,"ABCD_FES01_DATA_TABLE.csv"))) 

# We fight a lot in our family.
# Family members rarely become openly angry.
# Family members sometimes get so angry they throw things.
# Family members hardly ever lose their tempers.
# Family members often criticize each other.
# Family members sometimes hit each other.
# If there's a disagreement in our family, we try hard to smooth things over and keep the peace.
# Family members often try to one-up or outdo each other.
# In our family, we believe you don't ever get anywhere by raising your voice.

FamilyConflict_children_sum <- FamilyConflict_Children   %>% 
  mutate(fam_conflict_children = rowSums(select(., FES_YOUTH_Q1:FES_YOUTH_Q9 ), na.rm = F)) %>%           
  select(SUBJECTKEY,EVENTNAME, fam_conflict_children)

# FamilyConflict_children_sum %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
#    summarytools::dfSummary(
#                          style = 'grid', graph.magnif = 0.75, 
#                          valid.col = FALSE, tmp.img.dir = "/tmp")

FamilyConflict_sum <- plyr::join_all(list(FamilyConflict_parents_sum , FamilyConflict_children_sum), 
                                by=c('SUBJECTKEY','EVENTNAME'), type='full')

FamilyConflict_sum  %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
    skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 4
_______________________
Column type frequency:
character 2
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11878 0
EVENTNAME 0 1 21 21 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
fam_conflict_parent 12 1 2.54 1.96 0 1 2 4 9 ▇▇▅▂▁
fam_conflict_children 27 1 2.05 1.95 0 0 2 3 9 ▇▅▂▁▁
# FamilyConflict_Parents %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>% arrange(SUBJECTKEY)
# FamilyConflict_Children %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>% arrange(SUBJECTKEY)
# FamilyConflict_sum  %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>% arrange(SUBJECTKEY) 

1.13.3 Prosocial Tendency

Parent Prosocial Behavior Survey Youth Prosocial Behavior Survey

#Parent Prosocial Behavior Survey
ParPS <-as_tibble(read.csv(paste0(dataFold,"PSB01_DATA_TABLE.csv"))) 

#Youth Prosocial Behavior Survey
YouthPS <-as_tibble(read.csv(paste0(dataFold,"ABCD_PSB01_DATA_TABLE.csv")))

prosocial_sum <- plyr::join_all(list(ParPS , YouthPS), 
                                by=c('SUBJECTKEY','EVENTNAME'), type='full') %>%
 mutate(prosocial_parent_mean = rowMeans(dplyr::select(., "PROSOCIAL_Q1_P", "PROSOCIAL_Q2_P", "PROSOCIAL_Q3_P"))) %>%
 mutate(prosocial_youth_mean = rowMeans(dplyr::select(., "PROSOCIAL_Q1_Y", "PROSOCIAL_Q2_Y", "PROSOCIAL_Q3_Y"))) %>%
 select(SUBJECTKEY,EVENTNAME, prosocial_parent_mean,prosocial_youth_mean)  

prosocial_sum %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 4
_______________________
Column type frequency:
character 2
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11878 0
EVENTNAME 0 1 21 21 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
prosocial_parent_mean 62 0.99 1.75 0.40 0 1.67 2.00 2 2 ▁▁▁▁▇
prosocial_youth_mean 33 1.00 1.68 0.37 0 1.33 1.67 2 2 ▁▁▁▂▇

1.14 Parent Sports and Activities

1.14.1 ABCD Sum Scores Parent Sports and Activities Involvement

multiplying hours x days x 4 weeks x months x years /24 to get days this method leads to high zeros. this might be because of the 999 -> 0??

sport_act <-as_tibble(read.csv(paste0(dataFold,"ABCD_SPACSS01_DATA_TABLE.csv"))) 

sport_act_multiplied_sum <- sport_act %>% 
  # change 999 to 0. don't know seems to infer that the child doesn't do that activiy
  mutate_at(vars(starts_with("SAI_SS_")), ~ replace(., which(.==999), 0)) %>%
  # 0 = 0; 1 = 1; 2 = 2; 3 = 3; 4 = 4; 5 = 5; 6 = 6; 7 = 7; 8 = Once every 2 weeks; 9 = One day every month; 10 = Less than one day per month/; 999 = Don't know | When ballet/dance was not endorsed, values for the follow-up questions are missing. Here, missing values for  the  "... how many...?" follow-up questions have been replaced with "0". 
  #change this so that 0 = nothing, .125 = Less than one day per month, .25 = One day every month, .5 =Once every 2 weeks, 1 = 1 day per week and so on
  mutate_at(.vars = vars(ends_with("_PERWK_P")),
            .funs = funs(case_when(. == 10 ~ .125,
                                   . == 9 ~ .25,
                                   . == 8 ~ .5,
                                   TRUE ~ as.numeric(.)))) %>%
  # mutate_at(.vars = vars(ends_with("_PERWK_P")),
  #           .funs = funs(case_when(. == 10 ~ 1,
  #                                  . == 9 ~ 2,
  #                                  . == 8 ~ 3,
  #                                  . == 1 ~ 4,
  #                                  . == 2 ~ 5,
  #                                  . == 3 ~ 6,
  #                                  . == 4 ~ 7,
  #                                  . == 5 ~ 8,
  #                                  . == 6 ~ 9,
  #                                  . == 7 ~ 10,
  #                                  TRUE ~ as.numeric(.)))) %>%
  #0 = 0; 1 = less than 30 minutes; 2 = 30; 3 = 45; 4 = 60 (1 hr); 5 = 90 (1.5 hrs); 6 = 120 (2 hrs); 7 = 150 (2.5 hrs); 8 = 180 (3 hrs); 9 = greater than 3 hours; 999 = Don't know | When ballet/dance was not endorsed, values for the follow-up questions are missing. Here, missing values for  the  "... how many...?" follow-up questions have been replaced with "0".  
  #change to hours unit. assumes less than 30 minutes to be .25 hour(15 mins) and > 3 hrs to be 4 hours
 mutate_at(.vars = vars(ends_with("_TSPENT_P")),
            .funs = funs(case_when(. == 1 ~ .25,
                                   . == 2 ~ .5,
                                   . == 3 ~ .75,
                                   . == 4 ~ 1,
                                   . == 5 ~ 1.5,
                                   . == 6 ~ 2,
                                   . == 7 ~ 2.5,
                                   . == 8 ~ 3,
                                   . == 9 ~ 4,
                                   TRUE ~ as.numeric(.)))) %>%
  #hours x days x 4 weeks x months x years /24 to get days
  mutate(dance_days = SAI_SS_DANCE_TSPENT_P*SAI_SS_DANCE_PERWK_P*4*SAI_SS_DANCE_NMONTH_P*SAI_SS_DANCE_NYR_P/24) %>%
  mutate(base_days = SAI_SS_BASE_TSPENT_P*SAI_SS_BASE_PERWK_P*4*SAI_SS_BASE_NMONTH_P*SAI_SS_BASE_NYR_P/24) %>%
  mutate(basket_days = SAI_SS_BASKET_TSPENT_P*SAI_SS_BASKET_PERWK_P*4*SAI_SS_BASKET_NMONTH_P*SAI_SS_BASKET_NYR_P/24) %>%
  mutate(climb_days = SAI_SS_CLIMB_TSPENT_P*SAI_SS_CLIMB_PERWK_P*4*SAI_SS_CLIMB_NMONTH_P*SAI_SS_CLIMB_NYR_P/24) %>%
  mutate(fball_days = SAI_SS_FBALL_TSPENT_P*SAI_SS_FBALL_PERWK_P*4*SAI_SS_FBALL_NMONTH_P*SAI_SS_FBALL_NYR_P/24) %>%
  mutate(fhock_days = SAI_SS_FHOCK_TSPENT_P*SAI_SS_FHOCK_PERWK_P*4*SAI_SS_FHOCK_NMONTH_P*SAI_SS_FHOCK_NYR_P/24) %>%
  mutate(gym_days = SAI_SS_GYM_TSPENT_P*SAI_SS_GYM_PERWK_P*4*SAI_SS_GYM_NMONTH_P*SAI_SS_GYM_NYR_P/24) %>%
  mutate(ihock_days = SAI_SS_IHOCK_TSPENT_P*SAI_SS_IHOCK_PERWK_P*4*SAI_SS_IHOCK_NMONTH_P*SAI_SS_IHOCK_NYR_P/24) %>%
  mutate(polo_days = SAI_SS_POLO_TSPENT_P*SAI_SS_POLO_PERWK_P*4*SAI_SS_POLO_NMONTH_P*SAI_SS_POLO_NYR_P/24) %>%
  mutate(iskate_days = SAI_SS_ISKATE_TSPENT_P*SAI_SS_ISKATE_PERWK_P*4*SAI_SS_ISKATE_NMONTH_P*SAI_SS_ISKATE_NYR_P/24) %>%
  mutate(m_arts_days = SAI_SS_M_ARTS_TSPENT_P*SAI_SS_M_ARTS_PERWK_P*4*SAI_SS_M_ARTS_NMONTH_P*SAI_SS_M_ARTS_NYR_P/24) %>%
  mutate(lax_days = SAI_SS_LAX_TSPENT_P*SAI_SS_LAX_PERWK_P*4*SAI_SS_LAX_NMONTH_P*SAI_SS_LAX_NYR_P/24) %>%
  mutate(rugby_days = SAI_SS_RUGBY_TSPENT_P*SAI_SS_RUGBY_PERWK_P*4*SAI_SS_RUGBY_NMONTH_P*SAI_SS_RUGBY_NYR_P/24) %>%
  mutate(skate_days = SAI_SS_SKATE_TSPENT_P*SAI_SS_SKATE_PERWK_P*4*SAI_SS_SKATE_NMONTH_P*SAI_SS_SKATE_NYR_P/24) %>%
  mutate(sboard_days = SAI_SS_SBOARD_TSPENT_P*SAI_SS_SBOARD_PERWK_P*4*SAI_SS_SBOARD_NMONTH_P*SAI_SS_SBOARD_NYR_P/24) %>%
  mutate(soc_days = SAI_SS_SOC_TSPENT_P*SAI_SS_SOC_PERWK_P*4*SAI_SS_SOC_NMONTH_P*SAI_SS_SOC_NYR_P/24) %>%
  mutate(surf_days = SAI_SS_SURF_TSPENT_P*SAI_SS_SURF_PERWK_P*4*SAI_SS_SURF_NMONTH_P*SAI_SS_SURF_NYR_P/24) %>%
  mutate(wpolo_days = SAI_SS_WPOLO_TSPENT_P*SAI_SS_WPOLO_PERWK_P*4*SAI_SS_WPOLO_NMONTH_P*SAI_SS_WPOLO_NYR_P/24) %>%
  mutate(tennis_days = SAI_SS_TENNIS_TSPENT_P*SAI_SS_TENNIS_PERWK_P*4*SAI_SS_TENNIS_NMONTH_P*SAI_SS_TENNIS_NYR_P/24) %>%
  mutate(run_days = SAI_SS_RUN_TSPENT_P*SAI_SS_RUN_PERWK_P*4*SAI_SS_RUN_NMONTH_P*SAI_SS_RUN_NYR_P/24) %>%
  mutate(mma_days = SAI_SS_MMA_TSPENT_P*SAI_SS_MMA_PERWK_P*4*SAI_SS_MMA_NMONTH_P*SAI_SS_MMA_NYR_P/24) %>%
  mutate(vball_days = SAI_SS_VBALL_TSPENT_P*SAI_SS_VBALL_PERWK_P*4*SAI_SS_VBALL_NMONTH_P*SAI_SS_VBALL_NYR_P/24) %>%
  mutate(yoga_days = SAI_SS_YOGA_TSPENT_P*SAI_SS_YOGA_PERWK_P*4*SAI_SS_YOGA_NMONTH_P*SAI_SS_YOGA_NYR_P/24) %>%
  mutate(music_days = SAI_SS_MUSIC_TSPENT_P*SAI_SS_MUSIC_PERWK_P*4*SAI_SS_MUSIC_NMONTH_P*SAI_SS_MUSIC_NYR_P/24) %>%
  mutate(art_days = SAI_SS_ART_TSPENT_P*SAI_SS_ART_PERWK_P*4*SAI_SS_ART_NMONTH_P*SAI_SS_ART_NYR_P/24) %>%
  mutate(drama_days = SAI_SS_DRAMA_TSPENT_P*SAI_SS_DRAMA_PERWK_P*4*SAI_SS_DRAMA_NMONTH_P*SAI_SS_DRAMA_NYR_P/24) %>%
  mutate(craft_days = SAI_SS_CRAFTS_TSPENT_P*SAI_SS_CRAFTS_PERWK_P*4*SAI_SS_CRAFTS_NMONTH_P*SAI_SS_CRAFTS_NYR_P/24) %>%
  mutate(chess_days = SAI_SS_CHESS_TSPENT_P*SAI_SS_CHESS_PERWK_P*4*SAI_SS_CHESS_NMONTH_P*SAI_SS_CHESS_NYR_P/24) %>%
  mutate(collect_days = SAI_SS_COLLECT_TSPENT_P*SAI_SS_COLLECT_PERWK_P*4*SAI_SS_COLLECT_NMONTH_P*SAI_SS_COLLECT_NYR_P/24) %>%
# didn't include listening to music or reading since they are in the different scale
# summary based on kerlic's child dev paper
  mutate(phys_ind_days_sum = sboard_days + climb_days + gym_days + iskate_days + m_arts_days + skate_days + dance_days + surf_days + tennis_days + run_days + mma_days + yoga_days) %>%
  mutate(phys_team_days_sum = base_days + basket_days + fhock_days + fball_days + ihock_days + polo_days + lax_days + rugby_days + soc_days + wpolo_days +vball_days) %>%
  mutate(art_days_sum = collect_days + music_days + art_days + drama_days + craft_days + chess_days) %>%
  mutate(sport_act_all_days_sum = phys_ind_days_sum + phys_team_days_sum + art_days_sum) %>%
  mutate(phys_ind_daypweek_sum = SAI_SS_SBOARD_PERWK_P + SAI_SS_CLIMB_PERWK_P + SAI_SS_GYM_PERWK_P + SAI_SS_ISKATE_PERWK_P + SAI_SS_M_ARTS_PERWK_P + SAI_SS_SKATE_PERWK_P + SAI_SS_DANCE_PERWK_P + SAI_SS_SURF_PERWK_P + SAI_SS_TENNIS_PERWK_P + SAI_SS_RUN_PERWK_P + SAI_SS_MMA_PERWK_P + SAI_SS_YOGA_PERWK_P) %>%
  mutate(phys_team_daypweek_sum = SAI_SS_BASE_PERWK_P + SAI_SS_BASKET_PERWK_P + SAI_SS_FHOCK_PERWK_P + SAI_SS_FBALL_PERWK_P + SAI_SS_IHOCK_PERWK_P + SAI_SS_POLO_PERWK_P + SAI_SS_LAX_PERWK_P + SAI_SS_RUGBY_PERWK_P + SAI_SS_SOC_PERWK_P + SAI_SS_WPOLO_PERWK_P +SAI_SS_VBALL_PERWK_P) %>%
  mutate(art_daypweek_sum = SAI_SS_COLLECT_PERWK_P + SAI_SS_MUSIC_PERWK_P + SAI_SS_ART_PERWK_P + SAI_SS_DRAMA_PERWK_P + SAI_SS_CRAFTS_PERWK_P + SAI_SS_CHESS_PERWK_P) %>%
  mutate(sport_act_all_daypweek_sum = phys_ind_daypweek_sum + phys_team_daypweek_sum + art_daypweek_sum)

sport_act_multiplied_sum %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 168
_______________________
Column type frequency:
character 6
numeric 162
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11878 0
SRC_SUBJECT_ID 0 1 16 16 0 11878 0
INTERVIEW_DATE 0 1 9 9 0 756 0
SEX 0 1 1 1 0 2 0
EVENTNAME 0 1 21 21 0 1 0
STUDY_COHORT_NAME 0 1 21 21 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
ABCD_SPACSS01_ID 0 1.00 22337.50 3429.03 16399 19368.25 22337.50 25306.75 28276.00 ▇▇▇▇▇
DATASET_ID 0 1.00 34516.00 0.00 34516 34516.00 34516.00 34516.00 34516.00 ▁▁▇▁▁
INTERVIEW_AGE 0 1.00 118.98 7.50 107 112.00 119.00 126.00 133.00 ▇▆▆▆▆
SAI_SS_DANCE_NYR_P 24 1.00 0.64 1.44 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_DANCE_NMONTH_P 26 1.00 1.93 3.68 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_DANCE_PERWK_P 20 1.00 0.42 0.89 0 0.00 0.00 0.25 7.00 ▇▁▁▁▁
SAI_SS_DANCE_TSPENT_P 30 1.00 0.26 0.51 0 0.00 0.00 0.00 4.00 ▇▂▁▁▁
SAI_SS_BASE_NYR_P 17 1.00 0.74 1.50 0 0.00 0.00 1.00 10.00 ▇▁▁▁▁
SAI_SS_BASE_NMONTH_P 23 1.00 1.14 2.16 0 0.00 0.00 2.00 12.00 ▇▂▁▁▁
SAI_SS_BASE_PERWK_P 23 1.00 0.71 1.26 0 0.00 0.00 2.00 7.00 ▇▁▂▁▁
SAI_SS_BASE_TSPENT_P 27 1.00 0.37 0.66 0 0.00 0.00 1.00 4.00 ▇▂▁▁▁
SAI_SS_BASKET_NYR_P 21 1.00 0.59 1.23 0 0.00 0.00 1.00 10.00 ▇▁▁▁▁
SAI_SS_BASKET_NMONTH_P 25 1.00 1.07 2.14 0 0.00 0.00 1.00 12.00 ▇▂▁▁▁
SAI_SS_BASKET_PERWK_P 26 1.00 0.62 1.17 0 0.00 0.00 1.00 7.00 ▇▂▁▁▁
SAI_SS_BASKET_TSPENT_P 50 1.00 0.29 0.54 0 0.00 0.00 0.50 4.00 ▇▂▁▁▁
SAI_SS_CLIMB_NYR_P 15 1.00 0.08 0.53 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_CLIMB_NMONTH_P 14 1.00 0.16 1.09 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_CLIMB_PERWK_P 19 1.00 0.06 0.40 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_CLIMB_TSPENT_P 19 1.00 0.04 0.24 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_FHOCK_NYR_P 10 1.00 0.01 0.18 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_FHOCK_NMONTH_P 11 1.00 0.02 0.34 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_FHOCK_PERWK_P 10 1.00 0.01 0.18 0 0.00 0.00 0.00 5.00 ▇▁▁▁▁
SAI_SS_FHOCK_TSPENT_P 10 1.00 0.01 0.10 0 0.00 0.00 0.00 3.00 ▇▁▁▁▁
SAI_SS_FBALL_NYR_P 18 1.00 0.25 0.86 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_FBALL_NMONTH_P 21 1.00 0.44 1.42 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_FBALL_PERWK_P 20 1.00 0.35 1.10 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_FBALL_TSPENT_P 24 1.00 0.17 0.51 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_GYM_NYR_P 15 1.00 0.49 1.16 0 0.00 0.00 0.00 9.00 ▇▁▁▁▁
SAI_SS_GYM_NMONTH_P 19 1.00 1.71 3.59 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_GYM_PERWK_P 21 1.00 0.33 0.77 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_GYM_TSPENT_P 31 1.00 0.26 0.56 0 0.00 0.00 0.00 4.00 ▇▂▁▁▁
SAI_SS_IHOCK_NYR_P 10 1.00 0.08 0.56 0 0.00 0.00 0.00 9.00 ▇▁▁▁▁
SAI_SS_IHOCK_NMONTH_P 13 1.00 0.14 1.00 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_IHOCK_PERWK_P 10 1.00 0.07 0.48 0 0.00 0.00 0.00 6.00 ▇▁▁▁▁
SAI_SS_IHOCK_TSPENT_P 11 1.00 0.03 0.20 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_POLO_NYR_P 11 1.00 0.08 0.52 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_POLO_NMONTH_P 14 1.00 0.22 1.38 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_POLO_PERWK_P 14 1.00 0.05 0.36 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_POLO_TSPENT_P 17 1.00 0.04 0.25 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_ISKATE_NYR_P 11 1.00 0.13 0.64 0 0.00 0.00 0.00 9.00 ▇▁▁▁▁
SAI_SS_ISKATE_NMONTH_P 16 1.00 0.27 1.38 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_ISKATE_PERWK_P 18 1.00 0.08 0.40 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_ISKATE_TSPENT_P 19 1.00 0.06 0.26 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_M_ARTS_NYR_P 16 1.00 0.36 0.97 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_M_ARTS_NMONTH_P 23 1.00 1.49 3.62 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_M_ARTS_PERWK_P 19 1.00 0.36 0.91 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_M_ARTS_TSPENT_P 25 1.00 0.17 0.40 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_LAX_NYR_P 10 1.00 0.06 0.41 0 0.00 0.00 0.00 8.00 ▇▁▁▁▁
SAI_SS_LAX_NMONTH_P 12 1.00 0.11 0.76 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_LAX_PERWK_P 15 1.00 0.07 0.42 0 0.00 0.00 0.00 5.00 ▇▁▁▁▁
SAI_SS_LAX_TSPENT_P 13 1.00 0.04 0.23 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_RUGBY_NYR_P 10 1.00 0.01 0.14 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_RUGBY_NMONTH_P 10 1.00 0.01 0.28 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_RUGBY_PERWK_P 10 1.00 0.01 0.15 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_RUGBY_TSPENT_P 10 1.00 0.00 0.09 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_SKATE_NYR_P 13 1.00 0.09 0.56 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_SKATE_NMONTH_P 17 1.00 0.22 1.33 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_SKATE_PERWK_P 14 1.00 0.10 0.62 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_SKATE_TSPENT_P 18 1.00 0.03 0.20 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_SBOARD_NYR_P 10 1.00 0.33 1.25 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_SBOARD_NMONTH_P 20 1.00 0.27 0.99 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_SBOARD_PERWK_P 12 1.00 0.10 0.42 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_SBOARD_TSPENT_P 16 1.00 0.25 0.90 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_SOC_NYR_P 31 1.00 1.21 1.85 0 0.00 0.00 2.00 10.00 ▇▁▁▁▁
SAI_SS_SOC_NMONTH_P 44 1.00 2.04 3.05 0 0.00 0.00 4.00 12.00 ▇▂▁▁▁
SAI_SS_SOC_PERWK_P 38 1.00 0.95 1.27 0 0.00 0.00 2.00 7.00 ▇▂▂▁▁
SAI_SS_SOC_TSPENT_P 58 1.00 0.47 0.62 0 0.00 0.00 1.00 4.00 ▇▅▁▁▁
SAI_SS_SURF_NYR_P 10 1.00 0.01 0.21 0 0.00 0.00 0.00 9.00 ▇▁▁▁▁
SAI_SS_SURF_NMONTH_P 12 1.00 0.02 0.31 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_SURF_PERWK_P 10 1.00 0.01 0.18 0 0.00 0.00 0.00 5.00 ▇▁▁▁▁
SAI_SS_SURF_TSPENT_P 11 1.00 0.01 0.15 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_WPOLO_NYR_P 22 1.00 1.16 2.14 0 0.00 0.00 2.00 10.00 ▇▁▁▁▁
SAI_SS_WPOLO_NMONTH_P 42 1.00 1.76 3.24 0 0.00 0.00 3.00 12.00 ▇▁▁▁▁
SAI_SS_WPOLO_PERWK_P 35 1.00 0.78 1.43 0 0.00 0.00 1.00 7.00 ▇▁▁▁▁
SAI_SS_WPOLO_TSPENT_P 47 1.00 0.31 0.56 0 0.00 0.00 0.50 4.00 ▇▂▁▁▁
SAI_SS_TENNIS_NYR_P 15 1.00 0.14 0.63 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_TENNIS_NMONTH_P 17 1.00 0.31 1.46 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_TENNIS_PERWK_P 19 1.00 0.12 0.54 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_TENNIS_TSPENT_P 21 1.00 0.07 0.29 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_RUN_NYR_P 18 1.00 0.14 0.62 0 0.00 0.00 0.00 9.00 ▇▁▁▁▁
SAI_SS_RUN_NMONTH_P 20 1.00 0.29 1.27 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_RUN_PERWK_P 24 1.00 0.16 0.68 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_RUN_TSPENT_P 20 1.00 0.08 0.31 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_MMA_NYR_P 16 1.00 0.06 0.45 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_MMA_NMONTH_P 20 1.00 0.17 1.16 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_MMA_PERWK_P 16 1.00 0.08 0.49 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_MMA_TSPENT_P 20 1.00 0.04 0.24 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_VBALL_NYR_P 13 1.00 0.04 0.27 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_VBALL_NMONTH_P 15 1.00 0.09 0.66 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_VBALL_PERWK_P 17 1.00 0.05 0.36 0 0.00 0.00 0.00 6.00 ▇▁▁▁▁
SAI_SS_VBALL_TSPENT_P 14 1.00 0.03 0.20 0 0.00 0.00 0.00 3.00 ▇▁▁▁▁
SAI_SS_YOGA_NYR_P 13 1.00 0.04 0.37 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_YOGA_NMONTH_P 13 1.00 0.12 0.96 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_YOGA_PERWK_P 14 1.00 0.03 0.30 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_YOGA_TSPENT_P 16 1.00 0.02 0.12 0 0.00 0.00 0.00 3.00 ▇▁▁▁▁
SAI_SS_MUSIC_NYR_P 25 1.00 0.90 1.48 0 0.00 0.00 1.00 10.00 ▇▁▁▁▁
SAI_SS_MUSIC_NMONTH_P 39 1.00 3.38 4.65 0 0.00 0.00 8.00 12.00 ▇▁▁▁▂
SAI_SS_MUSIC_PERWK_P 32 1.00 0.84 1.44 0 0.00 0.00 1.00 7.00 ▇▁▁▁▁
SAI_SS_MUSIC_TSPENT_P 42 1.00 0.29 0.42 0 0.00 0.00 0.50 4.00 ▇▁▁▁▁
SAI_SS_ART_NYR_P 27 1.00 0.78 1.94 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_ART_NMONTH_P 36 1.00 1.60 3.67 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_ART_PERWK_P 41 1.00 0.54 1.39 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_ART_TSPENT_P 38 1.00 0.18 0.45 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_DRAMA_NYR_P 14 1.00 0.23 0.83 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_DRAMA_NMONTH_P 29 1.00 0.53 1.83 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_DRAMA_PERWK_P 27 1.00 0.24 0.83 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_DRAMA_TSPENT_P 24 1.00 0.16 0.54 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_CRAFTS_NYR_P 21 1.00 0.28 1.12 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_CRAFTS_NMONTH_P 28 1.00 0.62 2.40 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_CRAFTS_PERWK_P 27 1.00 0.21 0.85 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_CRAFTS_TSPENT_P 27 1.00 0.08 0.30 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_CHESS_NYR_P 22 1.00 0.27 0.96 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_CHESS_NMONTH_P 28 1.00 0.70 2.36 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_CHESS_PERWK_P 34 1.00 0.17 0.66 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_CHESS_TSPENT_P 33 1.00 0.10 0.32 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_COLLECT_NYR_P 17 1.00 0.21 0.93 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_COLLECT_NMONTH_P 28 1.00 0.53 2.32 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_COLLECT_PERWK_P 24 1.00 0.17 0.86 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_COLLECT_TSPENT_P 23 1.00 0.04 0.20 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_LMUSIC_YEARS_P 1220 0.90 3.84 3.22 0 1.00 3.00 6.00 10.00 ▇▃▂▂▃
SAI_SS_LMUSIC_HOURS_P 1697 0.86 4.64 7.59 0 1.00 3.00 6.00 168.00 ▇▁▁▁▁
SAI_SS_READ_YEARS_P 591 0.95 2.72 2.39 0 0.00 3.00 4.00 10.00 ▇▅▃▁▁
SAI_SS_READ_HOURS_P 831 0.93 4.62 9.22 0 0.00 3.00 6.00 168.00 ▇▁▁▁▁
SPORTS_ACTIVITY_SS_LMUSIC_P 2069 0.83 169.76 369.54 1 3.00 6.00 9.00 999.00 ▇▁▁▁▂
SPORTS_ACTIVITY_SS_READ_P 3310 0.72 91.01 278.58 1 4.00 6.00 8.00 999.00 ▇▁▁▁▁
dance_days 54 1.00 2.29 12.64 0 0.00 0.00 0.00 560.00 ▇▁▁▁▁
base_days 52 1.00 2.58 9.11 0 0.00 0.00 0.67 266.67 ▇▁▁▁▁
basket_days 74 0.99 1.42 5.58 0 0.00 0.00 0.00 147.00 ▇▁▁▁▁
climb_days 28 1.00 0.14 1.93 0 0.00 0.00 0.00 128.00 ▇▁▁▁▁
fball_days 35 1.00 1.02 6.11 0 0.00 0.00 0.00 245.00 ▇▁▁▁▁
fhock_days 11 1.00 0.02 0.67 0 0.00 0.00 0.00 55.00 ▇▁▁▁▁
gym_days 54 1.00 1.99 12.01 0 0.00 0.00 0.00 392.00 ▇▁▁▁▁
ihock_days 14 1.00 0.35 3.31 0 0.00 0.00 0.00 75.00 ▇▁▁▁▁
polo_days 23 1.00 0.17 2.54 0 0.00 0.00 0.00 135.00 ▇▁▁▁▁
iskate_days 28 1.00 0.21 2.14 0 0.00 0.00 0.00 108.00 ▇▁▁▁▁
m_arts_days 48 1.00 1.35 5.69 0 0.00 0.00 0.00 175.00 ▇▁▁▁▁
lax_days 18 1.00 0.16 1.68 0 0.00 0.00 0.00 60.00 ▇▁▁▁▁
rugby_days 10 1.00 0.02 0.53 0 0.00 0.00 0.00 26.67 ▇▁▁▁▁
skate_days 23 1.00 0.29 3.97 0 0.00 0.00 0.00 261.33 ▇▁▁▁▁
sboard_days 28 1.00 0.85 4.93 0 0.00 0.00 0.00 128.00 ▇▁▁▁▁
soc_days 107 0.99 3.83 10.12 0 0.00 0.00 3.00 196.00 ▇▁▁▁▁
surf_days 13 1.00 0.03 0.87 0 0.00 0.00 0.00 48.00 ▇▁▁▁▁
wpolo_days 90 0.99 2.97 10.38 0 0.00 0.00 1.25 256.00 ▇▁▁▁▁
tennis_days 31 1.00 0.31 4.56 0 0.00 0.00 0.00 420.00 ▇▁▁▁▁
run_days 36 1.00 0.30 2.29 0 0.00 0.00 0.00 96.00 ▇▁▁▁▁
mma_days 25 1.00 0.28 3.06 0 0.00 0.00 0.00 175.00 ▇▁▁▁▁
vball_days 20 1.00 0.06 0.73 0 0.00 0.00 0.00 44.00 ▇▁▁▁▁
yoga_days 17 1.00 0.07 1.05 0 0.00 0.00 0.00 48.00 ▇▁▁▁▁
music_days 82 0.99 2.34 7.09 0 0.00 0.00 1.67 224.00 ▇▁▁▁▁
art_days 70 0.99 3.68 17.62 0 0.00 0.00 0.00 504.00 ▇▁▁▁▁
drama_days 46 1.00 0.69 5.10 0 0.00 0.00 0.00 280.00 ▇▁▁▁▁
craft_days 43 1.00 1.11 8.54 0 0.00 0.00 0.00 280.00 ▇▁▁▁▁
chess_days 59 1.00 0.58 4.60 0 0.00 0.00 0.00 200.00 ▇▁▁▁▁
collect_days 39 1.00 0.62 7.08 0 0.00 0.00 0.00 448.00 ▇▁▁▁▁
phys_ind_days_sum 259 0.98 8.07 21.57 0 0.00 1.00 8.00 560.00 ▇▁▁▁▁
phys_team_days_sum 322 0.97 12.60 22.06 0 0.00 4.00 15.50 367.67 ▇▁▁▁▁
art_days_sum 263 0.98 8.85 26.38 0 0.00 0.33 6.00 600.00 ▇▁▁▁▁
sport_act_all_days_sum 735 0.94 29.34 43.86 0 3.33 15.33 38.00 826.17 ▇▁▁▁▁
phys_ind_daypweek_sum 93 0.99 1.84 2.42 0 0.00 1.00 3.00 44.75 ▇▁▁▁▁
phys_team_daypweek_sum 112 0.99 3.67 3.75 0 0.00 3.00 6.00 36.00 ▇▁▁▁▁
art_daypweek_sum 118 0.99 2.15 3.23 0 0.00 1.00 3.00 36.00 ▇▁▁▁▁
sport_act_all_daypweek_sum 273 0.98 7.63 6.40 0 3.00 6.12 11.00 103.25 ▇▁▁▁▁

1.14.2 ABCD Sum Scores Parent Sports and Activities Involvement

method used by Kerlic et al. They focus only on days per week. The data were converted differently.

sport_act <-as_tibble(read.csv(paste0(dataFold,"ABCD_SPACSS01_DATA_TABLE.csv"))) 

sport_act_kerlic_sum <- sport_act %>% 
  # change 999 to 0. don't know seems to infer that the child doesn't do that activiy
  mutate_at(vars(starts_with("SAI_SS_")), ~ replace(., which(.==999), 0)) %>%
  # 0 = 0; 1 = 1; 2 = 2; 3 = 3; 4 = 4; 5 = 5; 6 = 6; 7 = 7; 8 = Once every 2 weeks; 9 = One day every month; 10 = Less than one day per month/; 999 = Don't know | When ballet/dance was not endorsed, values for the follow-up questions are missing. Here, missing values for  the  "... how many...?" follow-up questions have been replaced with "0". 
  #change this so that 0 = nothing, .125 = Less than one day per month, .25 = One day every month, .5 =Once every 2 weeks, 1 = 1 day per week and so on
  mutate_at(.vars = vars(ends_with("_PERWK_P")),
            .funs = funs(case_when(. == 10 ~ 1,
                                   . == 9 ~ 2,
                                   . == 8 ~ 3,
                                   . == 1 ~ 4,
                                   . == 2 ~ 5,
                                   . == 3 ~ 6,
                                   . == 4 ~ 7,
                                   . == 5 ~ 8,
                                   . == 6 ~ 9,
                                   . == 7 ~ 10,
                                   TRUE ~ as.numeric(.)))) %>%
  mutate(phys_ind_daypweek_sum = SAI_SS_SBOARD_PERWK_P + SAI_SS_CLIMB_PERWK_P + SAI_SS_GYM_PERWK_P + SAI_SS_ISKATE_PERWK_P + SAI_SS_M_ARTS_PERWK_P + SAI_SS_SKATE_PERWK_P + SAI_SS_DANCE_PERWK_P + SAI_SS_SURF_PERWK_P + SAI_SS_TENNIS_PERWK_P + SAI_SS_RUN_PERWK_P + SAI_SS_MMA_PERWK_P + SAI_SS_YOGA_PERWK_P) %>%
  mutate(phys_team_daypweek_sum = SAI_SS_BASE_PERWK_P + SAI_SS_BASKET_PERWK_P + SAI_SS_FHOCK_PERWK_P + SAI_SS_FBALL_PERWK_P + SAI_SS_IHOCK_PERWK_P + SAI_SS_POLO_PERWK_P + SAI_SS_LAX_PERWK_P + SAI_SS_RUGBY_PERWK_P + SAI_SS_SOC_PERWK_P + SAI_SS_WPOLO_PERWK_P +SAI_SS_VBALL_PERWK_P) %>%
  mutate(art_daypweek_sum = SAI_SS_COLLECT_PERWK_P + SAI_SS_MUSIC_PERWK_P + SAI_SS_ART_PERWK_P + SAI_SS_DRAMA_PERWK_P + SAI_SS_CRAFTS_PERWK_P + SAI_SS_CHESS_PERWK_P) %>%
  mutate(sport_act_all_daypweek_sum = phys_ind_daypweek_sum + phys_team_daypweek_sum + art_daypweek_sum) %>%
  select(   SUBJECTKEY, EVENTNAME, ends_with("_daypweek_sum"))

sport_act_kerlic_sum %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
   select(-SUBJECTKEY, -EVENTNAME, ends_with('_daypweek_sum')) %>%
     skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 4
_______________________
Column type frequency:
numeric 4
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
phys_ind_daypweek_sum 93 0.99 4.89 5.64 0 0 4 8 70 ▇▁▁▁▁
phys_team_daypweek_sum 112 0.99 8.12 7.60 0 0 6 13 54 ▇▃▁▁▁
art_daypweek_sum 118 0.99 4.96 6.24 0 0 4 8 54 ▇▁▁▁▁
sport_act_all_daypweek_sum 273 0.98 17.89 13.72 0 8 16 26 170 ▇▁▁▁▁

1.14.3 physical activity

ABCD Youth Risk Behavior Survey Exercise Physical Activity

phyc_act <-as_tibble(read.csv(paste0(dataFold,"ABCD_YRB01_DATA_TABLE.csv"))) %>%
  rename(physc_act_days = PHYSICAL_ACTIVITY1_Y) %>%
  select(SUBJECTKEY, EVENTNAME, physc_act_days)

#During the past 7 days, on how many days were you physically active for a total of at least 60 minutes per day? (Add up all the time you spent in any kind of physical activity that increased your heart rate and made you breathe hard some of the time)

phyc_act %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
   select(-SUBJECTKEY, -EVENTNAME) %>%
   skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 1
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
physc_act_days 28 1 3.49 2.32 0 2 3 5 7 ▇▅▇▅▇

1.14.4 BMI and Waist

ABCD Youth Anthropometrics Modified From PhenX values are questionable, even after deleting outliers. We ended up not using them.

anthro <-as_tibble(read.csv(paste0(dataFold,"ABCD_ANT01_DATA_TABLE.csv"))) 

count(anthro,ANTHROWEIGHTCAST)
## # A tibble: 3 × 2
##   ANTHROWEIGHTCAST     n
##              <int> <int>
## 1                0 28185
## 2                1    82
## 3               NA  1417
# remove those (82) with cast as BMI won't be accurate     
bmi_waist <- anthro %>% 
  # filter(ANTHROHEIGHTCALC > 30) %>% # remove those who are unusally short. Potentially error in data entering
  # filter(ANTHROWEIGHTCALC < 500) %>%
  # filter(!rstatix::is_outlier(ANTHROHEIGHTCALC) & !rstatix::is_outlier(ANTHROWEIGHTCALC)) %>%
  mutate(bmi = ifelse(ANTHROWEIGHTCAST == 0 | is.na(ANTHROWEIGHTCAST),
                      (ANTHROWEIGHTCALC/(ANTHROHEIGHTCALC^2))*703,NA)) %>%
  rename(waist = ANTHRO_WAIST_CM) %>%
  select(SUBJECTKEY, EVENTNAME, bmi, waist, ANTHROWEIGHTCALC, ANTHROHEIGHTCALC)
  
# anthro %>% 
#   mutate(bmi = ifelse(ANTHROWEIGHTCAST == 0 | is.na(ANTHROWEIGHTCAST),
#                       (ANTHROWEIGHTCALC/(ANTHROHEIGHTCALC^2))*703,NA)) %>%
#   rename(waist = ANTHRO_WAIST_CM) %>%
#   arrange(desc(bmi)) %>% glimpse()

# bmi_waist %>% arrange(desc(bmi)) %>% glimpse()
# bmi_waist %>% arrange(bmi) %>% glimpse()
# 
# anthro %>% rstatix::identify_outliers(ANTHROHEIGHTCALC) %>%  arrange(ANTHROHEIGHTCALC) %>% View()
# anthro %>% rstatix::identify_outliers(ANTHROHEIGHTCALC) %>%  arrange(desc(ANTHROHEIGHTCALC)) %>% View()

# boxplot(anthro$ANTHROHEIGHTCALC)$out
# boxplot(anthro$ANTHROWEIGHTCALC)$out
# 
# boxplot(bmi_waist$ANTHROWEIGHTCALC)$out

bmi_waist %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
   select(-SUBJECTKEY, -EVENTNAME) %>%
   skimr::skim()
Data summary
Name Piped data
Number of rows 11878
Number of columns 4
_______________________
Column type frequency:
numeric 4
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
bmi 39 1 19.29 36.65 2.08 15.94 17.64 20.65 3017.04 ▇▁▁▁▁
waist 17 1 26.48 4.30 0.00 23.50 25.50 28.70 73.00 ▁▇▂▁▁
ANTHROWEIGHTCALC 11 1 82.56 23.76 0.00 66.05 76.50 93.00 272.00 ▁▇▁▁▁
ANTHROHEIGHTCALC 9 1 55.24 3.33 0.00 53.00 55.10 57.20 82.00 ▁▁▁▇▁

1.15 Join data

all_sum_vars <- 
  plyr::join_all(list(MriandSite, ACSselected,sumCog, CBCLPrecomputedSelected,
                      ASRPrecomputedSelected, maniaParent,BISBAS,
                      UPPS,sleepSum,youthScreenSum,
                      momSubstanceUse,adversitySum,bilingualSum,
                      demograpSum,ResidHistDer,NeighboSafety,
                      school_risk_sum,ParentMonitoring,FamilyConflict_sum,prosocial_sum,
                      sport_act_kerlic_sum,phyc_act), 
                 by=c('SUBJECTKEY','EVENTNAME'), type='full') %>%
  filter(visionProb != 1|is.na(visionProb)) %>% #remove subjects with eyesight problems 
  select(-visionProb)


all_sum_vars %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>% select(-1:-2) %>%
   skimr::skim()
Data summary
Name Piped data
Number of rows 11852
Number of columns 121
_______________________
Column type frequency:
character 2
factor 13
numeric 106
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SITE_ID_L 90 0.99 6 6 0 22 0
MRI_INFO_DEVICESERIALNUMBER 90 0.99 0 12 201 30 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
SEX 0 1.00 FALSE 2 M: 6190, F: 5662
RACE_ETHNICITY 2 1.00 FALSE 5 Whi: 6175, His: 2403, Bla: 1776, Oth: 1243
REL_FAMILY_ID 0 1.00 FALSE 9834 373: 5, 749: 4, 872: 4, 11: 3
tobacco_before_preg 276 0.98 FALSE 2 0: 9966, 1: 1610
tobacco_after_preg 263 0.98 FALSE 2 0: 10968, 1: 621
alcohol_before_preg 681 0.94 FALSE 2 0: 8294, 1: 2877
alcohol_after_preg 291 0.98 FALSE 2 0: 11245, 1: 316
marijuana_before_preg 337 0.97 FALSE 2 0: 10829, 1: 686
marijuana_after_preg 275 0.98 FALSE 2 0: 11333, 1: 244
deveplopment_prematurity 145 0.99 FALSE 2 0: 9504, 1: 2203
bilingual_status 1028 0.91 FALSE 2 0: 6746, 1: 4078
bilingual_degree 1028 0.91 FALSE 3 0: 6746, 2: 2538, 1: 1540
marital 96 0.99 FALSE 6 mar: 7977, nev: 1455, div: 1078, liv: 685

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
INTERVIEW_AGE 0 1.00 118.98 7.49 107.00 112.00 119.00 126.00 133.00 ▇▆▆▆▆
ACS_RAKED_PROPENSITY_SCORE 0 1.00 691.39 351.18 161.36 448.94 619.31 821.83 1778.92 ▅▇▂▂▁
NIHTBX_FLANKER_UNCORRECTED 156 0.99 94.00 9.14 51.00 89.00 95.00 100.00 116.00 ▁▁▃▇▂
NIHTBX_CARDSORT_UNCORRECTED 155 0.99 92.52 9.51 50.00 88.00 93.00 99.00 120.00 ▁▁▆▇▁
NIHTBX_PATTERN_UNCORRECTED 174 0.99 88.06 14.58 30.00 80.00 88.00 99.00 140.00 ▁▃▇▅▁
NIHTBX_PICVOCAB_UNCORRECTED 150 0.99 84.46 8.11 29.00 79.00 84.00 90.00 119.00 ▁▁▇▇▁
NIHTBX_READING_UNCORRECTED 164 0.99 90.86 6.90 59.00 87.00 91.00 95.00 119.00 ▁▁▇▂▁
NIHTBX_PICTURE_UNCORRECTED 162 0.99 102.81 12.08 76.00 94.00 102.00 111.00 136.00 ▃▇▇▅▁
PEA_RAVLT_LD_TRIAL_VII_TC 263 0.98 9.18 3.20 0.00 7.00 9.00 12.00 15.00 ▁▃▇▇▃
NIHTBX_LIST_UNCORRECTED 199 0.98 96.65 12.09 36.00 90.00 97.00 105.00 136.00 ▁▁▅▇▁
LMT_SCR_PERC_CORRECT 336 0.97 0.59 0.17 0.00 0.47 0.56 0.72 1.00 ▁▂▇▅▂
PEA_WISCV_TRS 243 0.98 17.91 3.84 0.00 15.00 18.00 20.00 32.00 ▁▁▇▅▁
NIHTBX_FLUIDCOMP_UNCORRECTED 238 0.98 91.55 10.65 44.00 85.00 92.00 99.00 131.00 ▁▂▇▅▁
NIHTBX_CRYST_UNCORRECTED 182 0.98 86.37 7.06 51.00 82.00 86.00 91.00 115.00 ▁▁▇▃▁
NIHTBX_TOTALCOMP_UNCORRECTED 242 0.98 86.23 9.13 44.00 81.00 87.00 92.00 117.00 ▁▂▇▇▁
CBCL_SCR_SYN_ANXDEP_R 8 1.00 2.52 3.06 0.00 0.00 1.00 4.00 26.00 ▇▁▁▁▁
CBCL_SCR_SYN_WITHDEP_R 8 1.00 1.04 1.71 0.00 0.00 0.00 1.00 15.00 ▇▁▁▁▁
CBCL_SCR_SYN_SOMATIC_R 8 1.00 1.49 1.95 0.00 0.00 1.00 2.00 16.00 ▇▁▁▁▁
CBCL_SCR_SYN_SOCIAL_R 8 1.00 1.63 2.28 0.00 0.00 1.00 2.00 18.00 ▇▁▁▁▁
CBCL_SCR_SYN_THOUGHT_R 8 1.00 1.62 2.20 0.00 0.00 1.00 2.00 18.00 ▇▁▁▁▁
CBCL_SCR_SYN_ATTENTION_R 8 1.00 2.98 3.49 0.00 0.00 2.00 5.00 20.00 ▇▂▁▁▁
CBCL_SCR_SYN_RULEBREAK_R 8 1.00 1.19 1.86 0.00 0.00 0.00 2.00 20.00 ▇▁▁▁▁
CBCL_SCR_SYN_AGGRESSIVE_R 8 1.00 3.26 4.35 0.00 0.00 2.00 5.00 36.00 ▇▁▁▁▁
CBCL_SCR_SYN_INTERNAL_R 8 1.00 5.05 5.53 0.00 1.00 3.00 7.00 51.00 ▇▁▁▁▁
CBCL_SCR_SYN_EXTERNAL_R 8 1.00 4.46 5.87 0.00 0.00 2.00 6.00 49.00 ▇▁▁▁▁
CBCL_SCR_SYN_TOTPROB_R 8 1.00 18.19 17.98 0.00 5.00 13.00 25.00 139.00 ▇▂▁▁▁
CBCL_SCR_DSM5_DEPRESS_R 8 1.00 1.27 2.01 0.00 0.00 0.00 2.00 19.00 ▇▁▁▁▁
CBCL_SCR_DSM5_ANXDISORD_R 8 1.00 2.06 2.43 0.00 0.00 1.00 3.00 17.00 ▇▂▁▁▁
CBCL_SCR_DSM5_SOMATICPR_R 8 1.00 1.08 1.51 0.00 0.00 0.00 2.00 11.00 ▇▁▁▁▁
CBCL_SCR_DSM5_ADHD_R 8 1.00 2.62 2.97 0.00 0.00 2.00 4.00 14.00 ▇▃▂▁▁
CBCL_SCR_DSM5_OPPOSIT_R 8 1.00 1.77 2.04 0.00 0.00 1.00 3.00 10.00 ▇▂▁▁▁
CBCL_SCR_DSM5_CONDUCT_R 8 1.00 1.28 2.36 0.00 0.00 0.00 2.00 25.00 ▇▁▁▁▁
CBCL_SCR_07_SCT_R 8 1.00 0.52 1.00 0.00 0.00 0.00 1.00 8.00 ▇▁▁▁▁
CBCL_SCR_07_OCD_R 8 1.00 1.34 1.82 0.00 0.00 1.00 2.00 14.00 ▇▂▁▁▁
CBCL_SCR_07_STRESS_R 8 1.00 2.90 3.35 0.00 0.00 2.00 4.00 24.00 ▇▂▁▁▁
ASR_SCR_PERSTR_T 106 0.99 47.90 9.37 20.00 42.00 49.00 56.00 60.00 ▁▂▃▆▇
ASR_SCR_ANXDEP_T 106 0.99 53.46 5.71 50.00 50.00 50.00 55.00 98.00 ▇▁▁▁▁
ASR_SCR_WITHDRAWN_T 106 0.99 52.82 5.10 50.00 50.00 51.00 53.00 97.00 ▇▁▁▁▁
ASR_SCR_SOMATIC_T 106 0.99 54.78 6.18 50.00 50.00 52.00 57.00 98.00 ▇▂▁▁▁
ASR_SCR_THOUGHT_T 106 0.99 52.93 5.09 50.00 50.00 51.00 54.00 95.00 ▇▁▁▁▁
ASR_SCR_ATTENTION_T 106 0.99 53.90 5.88 50.00 50.00 51.00 57.00 94.00 ▇▂▁▁▁
ASR_SCR_AGGRESSIVE_T 106 0.99 53.35 5.06 50.00 50.00 51.00 55.00 89.00 ▇▂▁▁▁
ASR_SCR_RULEBREAK_T 106 0.99 52.56 4.71 50.00 50.00 50.00 52.00 83.00 ▇▁▁▁▁
ASR_SCR_INTRUSIVE_T 106 0.99 51.65 3.41 50.00 50.00 50.00 51.00 76.00 ▇▁▁▁▁
ASR_SCR_INTERNAL_T 106 0.99 48.13 10.55 30.00 40.00 48.00 55.00 95.00 ▆▇▃▁▁
ASR_SCR_EXTERNAL_T 106 0.99 45.96 9.62 30.00 38.00 46.00 52.00 90.00 ▇▇▃▁▁
ASR_SCR_TOTPROB_T 106 0.99 43.00 10.23 25.00 36.00 43.00 50.00 89.00 ▆▇▃▁▁
ASR_SCR_DEPRESS_T 106 0.99 54.03 5.98 50.00 50.00 51.00 57.00 100.00 ▇▁▁▁▁
ASR_SCR_ANXDISORD_T 106 0.99 53.51 5.38 50.00 50.00 51.00 54.00 80.00 ▇▁▁▁▁
ASR_SCR_SOMATICPR_T 106 0.99 54.78 6.52 50.00 50.00 51.00 58.00 100.00 ▇▂▁▁▁
ASR_SCR_AVOIDANT_T 106 0.99 53.21 5.42 50.00 50.00 51.00 54.00 90.00 ▇▁▁▁▁
ASR_SCR_ADHD_T 106 0.99 53.25 5.57 50.00 50.00 51.00 53.00 98.00 ▇▁▁▁▁
ASR_SCR_ANTISOCIAL_T 106 0.99 53.01 4.68 50.00 50.00 51.00 54.00 83.00 ▇▁▁▁▁
ASR_SCR_INATTENTION_T 106 0.99 54.30 6.51 50.00 50.00 51.00 57.00 90.00 ▇▂▁▁▁
ASR_SCR_HYPERACTIVE_T 106 0.99 52.03 4.27 50.00 50.00 50.00 51.00 80.00 ▇▁▁▁▁
mania_parent 8 1.00 1.30 2.77 0.00 0.00 0.00 1.00 28.00 ▇▁▁▁▁
BISAvg 22 1.00 1.38 0.71 0.00 0.75 1.25 2.00 3.00 ▃▅▇▃▂
BASRRAvg 23 1.00 2.20 0.62 0.00 1.75 2.25 2.75 3.00 ▁▁▅▆▇
BASDriveAvg 23 1.00 1.04 0.77 0.00 0.50 1.00 1.50 3.00 ▇▆▅▂▂
BASFunAvg 23 1.00 1.43 0.66 0.00 1.00 1.50 1.75 3.00 ▂▅▇▃▂
BASAllAvg 23 1.00 1.55 0.54 0.00 1.17 1.50 1.92 3.00 ▁▅▇▅▂
UPPS_Y_SS_NEGATIVE_URGENCY 23 1.00 8.49 2.65 4.00 7.00 8.00 10.00 16.00 ▆▇▇▂▁
UPPS_Y_SS_LACK_OF_PLANNING 23 1.00 7.74 2.38 4.00 6.00 8.00 9.00 16.00 ▆▇▅▁▁
UPPS_Y_SS_SENSATION_SEEKING 23 1.00 9.77 2.68 4.00 8.00 10.00 12.00 16.00 ▂▅▇▃▂
UPPS_Y_SS_POSITIVE_URGENCY 23 1.00 7.99 2.96 4.00 6.00 8.00 10.00 16.00 ▇▆▆▂▁
UPPS_Y_SS_LACK_OF_PERSEVERANCE 23 1.00 7.04 2.25 4.00 5.00 7.00 8.00 16.00 ▇▆▃▁▁
sleep_hours 5 1.00 1.72 0.81 1.00 1.00 2.00 2.00 5.00 ▇▆▂▁▁
sleep_disturb 5 1.00 1.93 0.98 1.00 1.00 2.00 2.00 5.00 ▇▇▂▁▁
sleep_initiate_maintain 5 1.00 11.75 3.75 7.00 9.00 11.00 13.00 35.00 ▇▃▁▁▁
sleep_breath 5 1.00 3.77 1.25 3.00 3.00 3.00 4.00 15.00 ▇▁▁▁▁
sleep_arousal 5 1.00 3.44 0.92 3.00 3.00 3.00 4.00 15.00 ▇▁▁▁▁
sleep_transition 32 1.00 8.18 2.63 6.00 6.00 7.00 9.00 30.00 ▇▁▁▁▁
sleep_somnolence 6 1.00 6.95 2.44 5.00 5.00 6.00 8.00 25.00 ▇▁▁▁▁
sleep_hyperhydrosis 5 1.00 2.44 1.18 2.00 2.00 2.00 2.00 10.00 ▇▁▁▁▁
sleep_total 33 1.00 36.53 8.24 26.00 31.00 35.00 40.00 126.00 ▇▁▁▁▁
matureGames_Screen 20 1.00 0.57 0.87 0.00 0.00 0.00 1.00 3.00 ▇▃▁▁▁
matureMovies_Screen 21 1.00 0.38 0.64 0.00 0.00 0.00 1.00 3.00 ▇▃▁▁▁
wkdySum_Screen 37 1.00 3.46 3.10 0.00 1.25 2.50 4.75 24.00 ▇▂▁▁▁
wkndSum_Screen 42 1.00 4.62 3.63 0.00 2.00 3.50 6.25 24.00 ▇▃▁▁▁
deveplopment_birth_complications 761 0.94 0.37 0.74 0.00 0.00 0.00 1.00 8.00 ▇▁▁▁▁
deveplopment_pregnancy_complications 744 0.94 0.61 1.02 0.00 0.00 0.00 1.00 12.00 ▇▁▁▁▁
bilingual_use 1028 0.91 1.01 1.69 0.00 0.00 0.00 1.00 9.00 ▇▂▁▁▁
education1stPar 17 1.00 16.60 2.77 1.00 15.00 18.00 19.00 21.00 ▁▁▂▅▇
education2ndPar 2460 0.79 16.38 3.06 0.00 15.00 18.00 18.00 21.00 ▁▁▁▅▇
educationAvg 14 1.00 16.38 2.70 3.00 15.00 17.00 18.50 21.00 ▁▁▂▇▇
combinedIncome 1015 0.91 7.23 2.42 1.00 6.00 8.00 9.00 10.00 ▂▂▃▆▇
householdSize 279 0.98 4.70 1.55 0.00 4.00 4.00 5.00 19.00 ▂▇▁▁▁
econ_insecurities_sum 134 0.99 0.47 1.10 0.00 0.00 0.00 0.00 7.00 ▇▁▁▁▁
area_deprivation_index 694 0.94 92.71 24.86 0.00 86.64 98.48 108.15 125.75 ▁▁▂▇▇
lead_risk 698 0.94 5.10 3.11 0.00 2.00 5.00 8.00 10.00 ▇▆▅▅▆
quartic_uniform_crime_reports 694 0.94 12.09 5.79 0.00 9.41 12.28 15.20 24.29 ▂▃▇▅▁
neighbo_safety_parent_sum 47 1.00 11.67 2.93 3.00 10.00 12.00 14.00 15.00 ▁▁▃▆▇
neighbo_safety_child_sum 24 1.00 4.03 1.10 1.00 3.00 4.00 5.00 5.00 ▁▁▃▆▇
sumSchool_environment 27 1.00 19.93 2.83 6.00 18.00 20.00 22.00 24.00 ▁▁▂▇▇
sumSchool_involvement 26 1.00 13.06 2.37 4.00 12.00 13.00 15.00 16.00 ▁▁▃▅▇
sumSchool_disengagement 25 1.00 3.74 1.46 2.00 3.00 4.00 5.00 8.00 ▇▃▃▁▁
parent_monitor_mean 23 1.00 4.38 0.52 1.00 4.20 4.40 4.80 5.00 ▁▁▁▃▇
fam_conflict_parent 12 1.00 2.54 1.96 0.00 1.00 2.00 4.00 9.00 ▇▇▅▂▁
fam_conflict_children 27 1.00 2.04 1.95 0.00 0.00 2.00 3.00 9.00 ▇▅▂▁▁
prosocial_parent_mean 62 0.99 1.75 0.40 0.00 1.67 2.00 2.00 2.00 ▁▁▁▁▇
prosocial_youth_mean 33 1.00 1.68 0.37 0.00 1.33 1.67 2.00 2.00 ▁▁▁▂▇
phys_ind_daypweek_sum 93 0.99 4.89 5.64 0.00 0.00 4.00 8.00 70.00 ▇▁▁▁▁
phys_team_daypweek_sum 112 0.99 8.13 7.61 0.00 0.00 6.00 13.00 54.00 ▇▃▁▁▁
art_daypweek_sum 118 0.99 4.96 6.24 0.00 0.00 4.00 8.00 54.00 ▇▁▁▁▁
sport_act_all_daypweek_sum 273 0.98 17.90 13.72 0.00 8.00 16.00 26.00 170.00 ▇▁▁▁▁
physc_act_days 28 1.00 3.50 2.32 0.00 2.00 3.00 5.00 7.00 ▇▅▇▅▇

1.16 preprocess site

make sure that there are no members from the same family at different sites

all_sum_vars_baseline <- all_sum_vars %>% filter(EVENTNAME =="baseline_year_1_arm_1") 

all_sum_vars_baseline %>% count(SITE_ID_L)
##    SITE_ID_L    n
## 1     site01  401
## 2     site02  554
## 3     site03  629
## 4     site04  747
## 5     site05  375
## 6     site06  580
## 7     site07  339
## 8     site08  350
## 9     site09  431
## 10    site10  732
## 11    site11  454
## 12    site12  595
## 13    site13  721
## 14    site14  599
## 15    site15  441
## 16    site16 1007
## 17    site17  575
## 18    site18  384
## 19    site19  533
## 20    site20  686
## 21    site21  595
## 22    site22   34
## 23      <NA>   90
# check if there are members from the same family at different sites. There are 6 of them.
all_sum_vars_baseline %>%
  drop_na(SITE_ID_L) %>%
  filter(SITE_ID_L != "site22") %>%
  count(REL_FAMILY_ID, SITE_ID_L) %>%
  spread(SITE_ID_L, n, fill = 0) %>%
  select(-REL_FAMILY_ID) %>% 
       as.matrix %>% 
       crossprod
##        site01 site02 site03 site04 site05 site06 site07 site08 site09 site10
## site01    491      0      0      0      0      0      0      0      0      0
## site02      0   1034      0      0      0      0      0      0      0      0
## site03      0      0    751      0      0      0      0      0      0      0
## site04      0      0      0    957      0      0      0      0      0      0
## site05      0      0      0      0    471      0      0      0      0      0
## site06      0      0      0      0      0    686      0      1      0      0
## site07      0      0      0      0      0      0    425      0      0      0
## site08      0      0      0      0      0      1      0    434      0      0
## site09      0      0      0      0      0      0      0      0    479      0
## site10      0      0      0      0      0      0      0      0      0    906
## site11      0      0      0      0      0      0      0      0      0      0
## site12      0      0      0      0      0      0      0      0      0      0
## site13      0      0      0      1      0      0      0      0      0      0
## site14      0      0      0      0      0      0      0      0      0      0
## site15      0      0      0      0      0      0      0      0      0      0
## site16      0      0      0      0      0      0      0      0      0      0
## site17      0      1      0      0      0      0      0      0      0      0
## site18      0      0      0      0      0      0      0      0      0      0
## site19      0      0      0      0      0      1      0      0      0      0
## site20      0      0      0      0      0      0      0      1      0      0
## site21      0      0      0      0      0      0      0      0      0      0
##        site11 site12 site13 site14 site15 site16 site17 site18 site19 site20
## site01      0      0      0      0      0      0      0      0      0      0
## site02      0      0      0      0      0      0      1      0      0      0
## site03      0      0      0      0      0      0      0      0      0      0
## site04      0      0      1      0      0      0      0      0      0      0
## site05      0      0      0      0      0      0      0      0      0      0
## site06      0      0      0      0      0      0      0      0      1      0
## site07      0      0      0      0      0      0      0      0      0      0
## site08      0      0      0      0      0      0      0      0      0      1
## site09      0      0      0      0      0      0      0      0      0      0
## site10      0      0      0      0      0      0      0      0      0      0
## site11    582      0      0      0      0      0      0      0      0      0
## site12      0    727      0      0      0      0      0      0      0      0
## site13      0      0    879      0      0      0      0      0      0      0
## site14      0      0      0   1083      0      1      0      0      0      0
## site15      0      0      0      0    525      0      0      0      0      0
## site16      0      0      0      1      0   1389      0      0      0      0
## site17      0      0      0      0      0      0    693      0      0      0
## site18      0      0      0      0      0      0      0    448      0      0
## site19      0      0      0      0      0      0      0      0    967      0
## site20      0      0      0      0      0      0      0      0      0   1148
## site21      0      0      0      0      0      0      0      0      0      0
##        site21
## site01      0
## site02      0
## site03      0
## site04      0
## site05      0
## site06      0
## site07      0
## site08      0
## site09      0
## site10      0
## site11      0
## site12      0
## site13      0
## site14      0
## site15      0
## site16      0
## site17      0
## site18      0
## site19      0
## site20      0
## site21    719
#below will remove those 6 
all_sum_vars_baseline_no_dup <- all_sum_vars_baseline %>%
  drop_na(SITE_ID_L) %>%
  filter(SITE_ID_L != "site22") %>%
  group_by(REL_FAMILY_ID) %>% 
  nest(SITE_ID_L, .key="SITE_ID_L") %>%
  mutate(dup = ifelse(length(c(unlist(SITE_ID_L)))==1,0,
                      ifelse(length(unique(c(unlist(SITE_ID_L)))) > 1,1,0))) %>%
  unnest(SITE_ID_L) %>%
  ungroup() %>%
  filter(dup != 1)
## Warning: All elements of `...` must be named.
## Did you want `SITE_ID_L = c(SITE_ID_L)`?

1.17 summary of targets and features

cognition NIHTBX_FLANKER_UNCORRECTED,NIHTBX_CARDSORT_UNCORRECTED,NIHTBX_PATTERN_UNCORRECTED,NIHTBX_PICVOCAB_UNCORRECTED,NIHTBX_READING_UNCORRECTED,NIHTBX_PICTURE_UNCORRECTED,PEA_RAVLT_LD_TRIAL_VII_TC,NIHTBX_LIST_UNCORRECTED,LMT_SCR_PERC_CORRECT,PEA_WISCV_TRS,NIHTBX_FLUIDCOMP_UNCORRECTED,NIHTBX_CRYST_UNCORRECTED,NIHTBX_TOTALCOMP_UNCORRECTED

cognition Sum NIHTBX_FLUIDCOMP_UNCORRECTED,NIHTBX_CRYST_UNCORRECTED,NIHTBX_TOTALCOMP_UNCORRECTED

features CBCL_SCR_SYN_ANXDEP_R, CBCL_SCR_SYN_WITHDEP_R, CBCL_SCR_SYN_SOMATIC_R, CBCL_SCR_SYN_SOCIAL_R, CBCL_SCR_SYN_THOUGHT_R, CBCL_SCR_SYN_ATTENTION_R, CBCL_SCR_SYN_RULEBREAK_R, CBCL_SCR_SYN_AGGRESSIVE_R, ASR_SCR_PERSTR_T, ASR_SCR_ANXDEP_T,ASR_SCR_WITHDRAWN_T, ASR_SCR_SOMATIC_T,ASR_SCR_THOUGHT_T, ASR_SCR_ATTENTION_T, ASR_SCR_AGGRESSIVE_T,ASR_SCR_RULEBREAK_T,ASR_SCR_INTRUSIVE_T,mania_parent,BISAvg,BASRRAvg,BASDriveAvg,BASFunAvg,UPPS_Y_SS_NEGATIVE_URGENCY,UPPS_Y_SS_LACK_OF_PLANNING,UPPS_Y_SS_SENSATION_SEEKING,UPPS_Y_SS_POSITIVE_URGENCY,UPPS_Y_SS_LACK_OF_PERSEVERANCE,sleep_hours,sleep_disturb,sleep_initiate_maintain,sleep_breath,sleep_arousal,sleep_transition,sleep_somnolence,sleep_hyperhydrosis,matureGames_Screen,matureMovies_Screen,wkdySum_Screen,wkndSum_Screen,tobacco_before_preg,tobacco_after_preg,alcohol_before_preg,alcohol_after_preg,marijuana_before_preg,marijuana_after_preg,deveplopment_prematurity,deveplopment_birth_complications,deveplopment_pregnancy_complications,SEX,RACE_ETHNICITY,bilingual_use,marital,educationAvg,combinedIncome,householdSize,econ_insecurities_sum,area_deprivation_index,lead_risk,quartic_uniform_crime_reports,neighbo_safety_parent_sum,neighbo_safety_child_sum,sumSchool_environment,sumSchool_involvement,sumSchool_disengagement,parent_monitor_mean,fam_conflict_parent,fam_conflict_children,prosocial_parent_mean,prosocial_youth_mean, phys_ind_daypweek_sum, phys_team_daypweek_sum, art_daypweek_sum, physc_act_days

cognition <-c("NIHTBX_FLANKER_UNCORRECTED","NIHTBX_CARDSORT_UNCORRECTED","NIHTBX_PATTERN_UNCORRECTED",
              "NIHTBX_PICVOCAB_UNCORRECTED","NIHTBX_READING_UNCORRECTED","NIHTBX_PICTURE_UNCORRECTED",
              "PEA_RAVLT_LD_TRIAL_VII_TC","NIHTBX_LIST_UNCORRECTED","LMT_SCR_PERC_CORRECT","PEA_WISCV_TRS")

gmodel <-c("NIHTBX_PICVOCAB_UNCORRECTED", "NIHTBX_READING_UNCORRECTED", 
           "NIHTBX_FLANKER_UNCORRECTED", "NIHTBX_PATTERN_UNCORRECTED", 
           "NIHTBX_PICTURE_UNCORRECTED", "PEA_RAVLT_LD_TRIAL_VII_TC")

cognition_sum <- c("NIHTBX_FLUIDCOMP_UNCORRECTED","NIHTBX_CRYST_UNCORRECTED","NIHTBX_TOTALCOMP_UNCORRECTED")

features <- c("CBCL_SCR_SYN_ANXDEP_R", "CBCL_SCR_SYN_WITHDEP_R", "CBCL_SCR_SYN_SOMATIC_R", "CBCL_SCR_SYN_SOCIAL_R", 
              "CBCL_SCR_SYN_THOUGHT_R", "CBCL_SCR_SYN_ATTENTION_R", "CBCL_SCR_SYN_RULEBREAK_R", "CBCL_SCR_SYN_AGGRESSIVE_R", 
              "ASR_SCR_PERSTR_T", "ASR_SCR_ANXDEP_T","ASR_SCR_WITHDRAWN_T", "ASR_SCR_SOMATIC_T","ASR_SCR_THOUGHT_T", 
              "ASR_SCR_ATTENTION_T","ASR_SCR_AGGRESSIVE_T","ASR_SCR_RULEBREAK_T","ASR_SCR_INTRUSIVE_T",
              "mania_parent",
              "BISAvg","BASRRAvg","BASDriveAvg","BASFunAvg",
              "UPPS_Y_SS_NEGATIVE_URGENCY","UPPS_Y_SS_LACK_OF_PLANNING","UPPS_Y_SS_SENSATION_SEEKING","UPPS_Y_SS_POSITIVE_URGENCY","UPPS_Y_SS_LACK_OF_PERSEVERANCE",
              "sleep_hours","sleep_disturb","sleep_initiate_maintain","sleep_breath","sleep_arousal","sleep_transition","sleep_somnolence","sleep_hyperhydrosis",
              "matureGames_Screen","matureMovies_Screen","wkdySum_Screen","wkndSum_Screen",
              #"tobacco_before_preg",
              "tobacco_after_preg",
              #"alcohol_before_preg",
              "alcohol_after_preg",
              #"marijuana_before_preg",
              "marijuana_after_preg",
              "deveplopment_prematurity","deveplopment_birth_complications","deveplopment_pregnancy_complications",
              "bilingual_use",
              "SEX","RACE_ETHNICITY",
              "marital","educationAvg","combinedIncome","householdSize","econ_insecurities_sum","area_deprivation_index","lead_risk","quartic_uniform_crime_reports","neighbo_safety_parent_sum","neighbo_safety_child_sum",
              "sumSchool_environment","sumSchool_involvement","sumSchool_disengagement",
              "parent_monitor_mean","fam_conflict_parent","fam_conflict_children",
              "prosocial_parent_mean","prosocial_youth_mean","phys_ind_daypweek_sum", "phys_team_daypweek_sum", "art_daypweek_sum", "physc_act_days")

1.18 visualise missing value

the missing column is the visual impairment. We won’t use that anyway look like a lot of missing value for bilingual_use. We assume missing value as zero to not lose data. for other ones we will use kmean imputation for contionus and mode imputation for factor

all_sum_vars_baseline_no_dup %>%
  select(features) %>%
  naniar::vis_miss(warn_large_data = F) 
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(features)` instead of `features` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.

all_sum_vars_baseline_no_dup  %>% 
  select(features) %>%
  DataExplorer::plot_missing()

missing_feature_values <- all_sum_vars_baseline_no_dup  %>% 
  select(features) %>%
  DataExplorer::profile_missing()

# change bilingual na to 0 
all_sum_vars_baseline_no_dup_bilin0 <- all_sum_vars_baseline_no_dup %>%
  tidyr::replace_na(list(bilingual_use = 0)) 

1.19 need imputation

otherwise more than half of the data will be lost

all_sum_vars_baseline_no_dup_bilin0 %>% nrow()
## [1] 11716
all_sum_vars_baseline_no_dup_bilin0 %>% drop_na() %>% nrow()
## [1] 5633

1.20 cfa on all subjects

NeuroCog2ndOrder <-'
Language =~ NIHTBX_PICVOCAB_UNCORRECTED + NIHTBX_READING_UNCORRECTED 
CognitiveFlexibity =~ NIHTBX_FLANKER_UNCORRECTED + NIHTBX_PATTERN_UNCORRECTED 
MemoryRecall =~ NIHTBX_PICTURE_UNCORRECTED + PEA_RAVLT_LD_TRIAL_VII_TC
g =~ NA*Language + CognitiveFlexibity  + MemoryRecall #estimate the loading of GenAbi -> as opposed to using it as a marker
g ~~ 1*g #need to constrain variance to 1'

all_sum_vars_baseline_no_dup_bilin0_no_na_G<- all_sum_vars_baseline_no_dup_bilin0 %>% drop_na(all_of(gmodel))
#glimpse(all_sum_vars_baseline_no_dup_bilin0_no_na_G)

NeuroCog2ndOrder.Fit <- lavaan::cfa(model = NeuroCog2ndOrder, data = all_sum_vars_baseline_no_dup_bilin0_no_na_G,estimator="MLR")
second_order_output <- lavaan::lavPredict(NeuroCog2ndOrder.Fit, newdata = all_sum_vars_baseline_no_dup_bilin0_no_na_G)

all_sum_vars_baseline_no_dup_bilin0_no_na_G_factor_all_sub <- 
  all_sum_vars_baseline_no_dup_bilin0_no_na_G %>% 
  bind_cols(gfactor = second_order_output[,'g'])

1.21 visualise with correlation heatmap

#all_sum_vars_baseline_no_dup_bilin0_no_na_G_factor_all_sub %>% 
#  select(gfactor, features) %>% dlookr::plot_correlate()

2 Modeling for Socio-Demographic and Psychological Factors

Samples: REL_FAMILY_ID (9856 Levels) SITE_ID_L (need to remove 22nd site. having too few subjects) ALSO make sure about EVENTNAME

Target: Factor analysis of cognition: gfactor

70 Features: CBCL_SCR_SYN_ANXDEP_R CBCL_SCR_SYN_WITHDEP_R CBCL_SCR_SYN_SOMATIC_R CBCL_SCR_SYN_SOCIAL_R CBCL_SCR_SYN_THOUGHT_R CBCL_SCR_SYN_ATTENTION_R CBCL_SCR_SYN_RULEBREAK_R CBCL_SCR_SYN_AGGRESSIVE_R ASR_SCR_PERSTR_T ASR_SCR_ANXDEP_T ASR_SCR_WITHDRAWN_T ASR_SCR_SOMATIC_T ASR_SCR_THOUGHT_T ASR_SCR_ATTENTION_T ASR_SCR_AGGRESSIVE_T ASR_SCR_RULEBREAK_T ASR_SCR_INTRUSIVE_T mania_parent BISAvg BASRRAvg BASDriveAvg BASFunAvg UPPS_Y_SS_NEGATIVE_URGENCY UPPS_Y_SS_LACK_OF_PLANNING UPPS_Y_SS_SENSATION_SEEKING UPPS_Y_SS_POSITIVE_URGENCY UPPS_Y_SS_LACK_OF_PERSEVERANCE sleep_hours sleep_disturb sleep_initiate_maintain sleep_breath sleep_arousal sleep_transition sleep_somnolence sleep_hyperhydrosis matureGames_Screen matureMovies_Screen wkdySum_Screen wkndSum_Screen tobacco_before_preg tobacco_after_preg alcohol_before_preg alcohol_after_preg marijuana_before_preg marijuana_after_preg deveplopment_prematurity deveplopment_birth_complications deveplopment_pregnancy_complications bilingual_use marital educationAvg combinedIncome householdSize econ_insecurities_sum area_deprivation_index lead_risk quartic_uniform_crime_reports neighbo_safety_parent_sum neighbo_safety_child_sum sumSchool_environment sumSchool_involvement sumSchool_disengagement parent_monitor_mean fam_conflict_parent fam_conflict_children prosocial_parent_mean prosocial_youth_mean phys_ind_daypweek_sum phys_team_daypweek_sum art_daypweek_sum physc_act_days


Features by catergories:

Child Mental Health (9): CBCL_SCR_SYN_ANXDEP_R CBCL_SCR_SYN_WITHDEP_R CBCL_SCR_SYN_SOMATIC_R CBCL_SCR_SYN_SOCIAL_R CBCL_SCR_SYN_THOUGHT_R CBCL_SCR_SYN_ATTENTION_R CBCL_SCR_SYN_RULEBREAK_R CBCL_SCR_SYN_AGGRESSIVE_R

Parent Mental Health(10): ASR_SCR_PERSTR_T ASR_SCR_ANXDEP_T ASR_SCR_WITHDRAWN_T ASR_SCR_SOMATIC_T ASR_SCR_THOUGHT_T ASR_SCR_ATTENTION_T ASR_SCR_AGGRESSIVE_T ASR_SCR_RULEBREAK_T ASR_SCR_INTRUSIVE_T mania_parent

Child Personality(9): BISAvg BASRRAvg BASDriveAvg BASFunAvg UPPS_Y_SS_NEGATIVE_URGENCY UPPS_Y_SS_LACK_OF_PLANNING UPPS_Y_SS_SENSATION_SEEKING UPPS_Y_SS_POSITIVE_URGENCY UPPS_Y_SS_LACK_OF_PERSEVERANCE

Child Sleep (8): sleep_hours sleep_disturb sleep_initiate_maintain sleep_breath sleep_arousal sleep_transition sleep_somnolence sleep_hyperhydrosis

Physical Activity (4): phys_ind_daypweek_sum phys_team_daypweek_sum art_daypweek_sum physc_act_days

Child Screen Use (4): matureGames_Screen matureMovies_Screen wkdySum_Screen wkndSum_Screen

Parent Drug Use (6): tobacco_before_preg tobacco_after_preg alcohol_before_preg alcohol_after_preg marijuana_before_preg marijuana_after_preg

Child Developmental Adversity (3): deveplopment_prematurity deveplopment_birth_complications deveplopment_pregnancy_complications

Child Socio-Demographics (15): bilingual_use marital educationAvg combinedIncome householdSize econ_insecurities_sum area_deprivation_index lead_risk quartic_uniform_crime_reports neighbo_safety_parent_sum neighbo_safety_child_sum sumSchool_environment sumSchool_involvement sumSchool_disengagement

Social Interaction (5): parent_monitor_mean fam_conflict_parent fam_conflict_children prosocial_parent_mean prosocial_youth_mean

2.1 set up parallel processing

require(doParallel)
cores <- parallel::detectCores(logical = FALSE)
registerDoParallel(cores = cores - 1)

2.2 set up formular

gfactor_all_features_formular <- as.formula(
  paste("gfactor", 
        paste(features, collapse = " + "), 
        sep = " ~ "))
print(gfactor_all_features_formular)
## gfactor ~ CBCL_SCR_SYN_ANXDEP_R + CBCL_SCR_SYN_WITHDEP_R + CBCL_SCR_SYN_SOMATIC_R + 
##     CBCL_SCR_SYN_SOCIAL_R + CBCL_SCR_SYN_THOUGHT_R + CBCL_SCR_SYN_ATTENTION_R + 
##     CBCL_SCR_SYN_RULEBREAK_R + CBCL_SCR_SYN_AGGRESSIVE_R + ASR_SCR_PERSTR_T + 
##     ASR_SCR_ANXDEP_T + ASR_SCR_WITHDRAWN_T + ASR_SCR_SOMATIC_T + 
##     ASR_SCR_THOUGHT_T + ASR_SCR_ATTENTION_T + ASR_SCR_AGGRESSIVE_T + 
##     ASR_SCR_RULEBREAK_T + ASR_SCR_INTRUSIVE_T + mania_parent + 
##     BISAvg + BASRRAvg + BASDriveAvg + BASFunAvg + UPPS_Y_SS_NEGATIVE_URGENCY + 
##     UPPS_Y_SS_LACK_OF_PLANNING + UPPS_Y_SS_SENSATION_SEEKING + 
##     UPPS_Y_SS_POSITIVE_URGENCY + UPPS_Y_SS_LACK_OF_PERSEVERANCE + 
##     sleep_hours + sleep_disturb + sleep_initiate_maintain + sleep_breath + 
##     sleep_arousal + sleep_transition + sleep_somnolence + sleep_hyperhydrosis + 
##     matureGames_Screen + matureMovies_Screen + wkdySum_Screen + 
##     wkndSum_Screen + tobacco_after_preg + alcohol_after_preg + 
##     marijuana_after_preg + deveplopment_prematurity + deveplopment_birth_complications + 
##     deveplopment_pregnancy_complications + bilingual_use + SEX + 
##     RACE_ETHNICITY + marital + educationAvg + combinedIncome + 
##     householdSize + econ_insecurities_sum + area_deprivation_index + 
##     lead_risk + quartic_uniform_crime_reports + neighbo_safety_parent_sum + 
##     neighbo_safety_child_sum + sumSchool_environment + sumSchool_involvement + 
##     sumSchool_disengagement + parent_monitor_mean + fam_conflict_parent + 
##     fam_conflict_children + prosocial_parent_mean + prosocial_youth_mean + 
##     phys_ind_daypweek_sum + phys_team_daypweek_sum + art_daypweek_sum + 
##     physc_act_days

2.3 creat a list of sites

site_col <- all_sum_vars_baseline_no_dup_bilin0  %>%
  distinct(SITE_ID_L) %>% 
  arrange(SITE_ID_L) 

site_list <- as.list(site_col$SITE_ID_L)

site_char <- as.character(unlist(site_col$SITE_ID_L))

2.4 test cfa

NeuroCog2ndOrder <-'
Language =~ NIHTBX_PICVOCAB_UNCORRECTED + NIHTBX_READING_UNCORRECTED 
CognitiveFlexibity =~ NIHTBX_FLANKER_UNCORRECTED + NIHTBX_PATTERN_UNCORRECTED 
MemoryRecall =~ NIHTBX_PICTURE_UNCORRECTED + PEA_RAVLT_LD_TRIAL_VII_TC
g =~ NA*Language + CognitiveFlexibity  + MemoryRecall #estimate the loading of GenAbi -> as opposed to using it as a marker
g ~~ 1*g #need to constrain variance to 1'

NeuroCog2ndOrder.Fit <- lavaan::cfa(model = NeuroCog2ndOrder, data = all_sum_vars_baseline_no_dup_bilin0_no_na_G,estimator="MLR")
second_order_output <- lavaan::lavPredict(NeuroCog2ndOrder.Fit, newdata = all_sum_vars_baseline_no_dup_bilin0_no_na_G)

all_sum_vars_baseline_no_dup_bilin0_no_na_G_factor_all_sub <- 
  all_sum_vars_baseline_no_dup_bilin0_no_na_G %>% 
  bind_cols(gfactor = second_order_output[,'g'])

2.5 create a list of splits based on sites

split_func  <- function(site)
{ 
  train_indices <- which(all_sum_vars_baseline_no_dup_bilin0_no_na_G$SITE_ID_L != site)
  test_indices <- which(all_sum_vars_baseline_no_dup_bilin0_no_na_G$SITE_ID_L == site)
  
  NeuroCog2ndOrder.Fit <- lavaan::cfa(model = NeuroCog2ndOrder, 
                                      data = all_sum_vars_baseline_no_dup_bilin0_no_na_G[train_indices,],
                                      estimator="MLR")
  
  second_order_output_train <- lavaan::lavPredict(NeuroCog2ndOrder.Fit, 
                                                  newdata = all_sum_vars_baseline_no_dup_bilin0_no_na_G[train_indices,])
  
  second_order_output_test <- lavaan::lavPredict(NeuroCog2ndOrder.Fit, 
                                                  newdata = all_sum_vars_baseline_no_dup_bilin0_no_na_G[test_indices,])
  
  all_sum_vars_baseline_no_dup_bilin0_no_na_G$gfactor <- NA
  all_sum_vars_baseline_no_dup_bilin0_no_na_G$gfactor[train_indices] <- second_order_output_train[,'g']
  all_sum_vars_baseline_no_dup_bilin0_no_na_G$gfactor[test_indices] <- second_order_output_test[,'g']
  
  indices <-
  list(analysis   = train_indices, 
       assessment = test_indices)
split <- make_splits(indices, all_sum_vars_baseline_no_dup_bilin0_no_na_G)
return(split)}

split_list <- map(site_list, ~split_func(.x))

2.6 create a list of recipe based on sites

note step_naomit causes error, so I decided to drop na from the target var prior to this step

recipe_func  <- function(split) {
  preprocessing_recipe <- recipe(gfactor_all_features_formular, 
                                 data =  training(split)) %>%
      # Impute missing for categorical
      step_impute_mode(all_nominal(), -all_outcomes()) %>%
       # change all nominal into dummy variables
      step_dummy(all_nominal(), -all_outcomes()) %>%
      # normalize numeric predictors and outcome
      step_normalize(all_numeric(), all_nominal()) %>%
     # Impute missing for numeric
      step_impute_knn(all_numeric(), -all_outcomes(),
                     neighbors = 5) 
      # %>%
      # # remove na from outcome
      # step_naomit(all_outcomes())
return(preprocessing_recipe)}
recipe_list <- map(split_list, ~recipe_func(.x))

2.7 create a list of workflow, bestmodel and best hyperpara based on sites

require(doParallel)
cores <- parallel::detectCores(logical = FALSE)
registerDoParallel(cores = cores - 1)

elastic_net_tuning_func  <- function(split, preprocessing_recipe) {
  
# create fold for hyperparamer tuning  
  set.seed(123)
  tuning_cv_folds <- training(split) %>%
      vfold_cv(v = 10)
  tuning_cv_folds
  
  #use parsnip to build models
  elastic_net_model <- linear_reg(
  penalty = tune(), 
  mixture = tune()) %>%
  set_engine("glmnet") %>%
  set_mode("regression") 
  
  ## add recipe and model into the workflow
  elastic_net_wfl <- workflow() %>%
  add_recipe(preprocessing_recipe) %>%
  add_model(elastic_net_model)

  #dial to tune parameter
  elastic_net_set <- parameters(
  penalty(range = c(-10,1), trans = log10_trans()),
  mixture())
## 200 levels of lambda and 11 levels of alpha (0, .1 to 1) 
  elastic_net_grid <- grid_regular(elastic_net_set, 
                                   levels = c(200,11))
  
  elastic_net_ctrl <- control_grid(save_pred = TRUE, 
                                   verbose = TRUE,
#                                   parallel_over = "everything"
                                   parallel_over = "resamples"
)
  
  elastic_net_tune <- tune_grid(elastic_net_wfl,
              resamples = tuning_cv_folds,
              grid = elastic_net_grid,
              metrics = metric_set(mae),
              control = elastic_net_ctrl)
  
  ##select the best tuned lambda and alpha
  best_elastic_net_model <- select_best(elastic_net_tune, metric = "mae")
  ##show best hyperparameter <- 
  best_elastic_net_hyperpara <- show_best(elastic_net_tune, metric = "mae")
  
  ## finalise workflow
  elastic_net_wfl_final <- 
  elastic_net_wfl %>%
  finalize_workflow(best_elastic_net_model)
  
  #extract model values
  elastic_net_model_val <- elastic_net_wfl_final %>%
  fit(training(split)) %>%
  pull_workflow_fit()
  
  return(list(elastic_net_wfl_final = elastic_net_wfl_final, 
              best_elastic_net_model = best_elastic_net_model,
              best_elastic_net_hyperpara = best_elastic_net_hyperpara,
              elastic_net_model_val = elastic_net_model_val))
}

start_time <- Sys.time()
elastic_net_tuned_list <- map2(split_list,
                               recipe_list, 
                               ~elastic_net_tuning_func(split = .x,
                                                        preprocessing_recipe = .y))
stop_time <- Sys.time()

stop_time - start_time 

elastic_net_wfl_final_list <- map(elastic_net_tuned_list, "elastic_net_wfl_final")
best_elastic_net_model_list <- map(elastic_net_tuned_list, "best_elastic_net_model")
best_elastic_net_hyperpara_list <- map(elastic_net_tuned_list, "best_elastic_net_hyperpara")
elastic_net_model_val_list <- map(elastic_net_tuned_list, "elastic_net_model_val")

2.8 apply best models to the hold-out sites

elastic_net_fit_func <- function(split, preprocessing_recipe, elastic_net_wfl_final){
  elastic_net_final_fit <- 
    elastic_net_wfl_final %>%
    last_fit(split = split,
             metrics = metric_set(rsq_trad, mae, rmse))
  
  elastic_net_final_metrics <- elastic_net_final_fit %>% collect_metrics()
  
  predicted_df <- elastic_net_final_fit %>% 
    collect_predictions() %>% 
    select(.pred) %>%
    rename(non_brain_predicted = .pred) %>% 
    bind_cols(testing(split))  
  
   predicted_df_baked <- elastic_net_final_fit %>% 
    collect_predictions() %>% 
    select(.pred) %>%
    rename(non_brain_predicted = .pred) %>% 
    bind_cols(preprocessing_recipe %>%
      prep(training = training(split)) %>%
      bake(new_data = testing(split)))  
  
  return(list(elastic_net_final_fit = elastic_net_final_fit,
              elastic_net_final_metrics = elastic_net_final_metrics,
              predicted_df = predicted_df,
              predicted_df_baked = predicted_df_baked))}
 
#  future::plan("multicore", workers = cores - 1)
#  Forked processing ('multicore') is disabled in future (>= 1.13.0) when running R from RStudio, because it is considered unstable.
  future::plan("multisession", 
               workers = cores - 1)
 
  input_list <- list(split = split_list,
                    preprocessing_recipe = recipe_list,
                    elastic_net_wfl_final = elastic_net_wfl_final_list)
  
  elastic_net_fit_list <- furrr::future_pmap(input_list,
                             ~elastic_net_fit_func(split = ..1,
                                                  preprocessing_recipe =  ..2,
                                                  elastic_net_wfl_final = ..3),
                             .options = furrr::furrr_options(seed = 123))
  
elastic_net_final_fit_list <- map(elastic_net_fit_list, "elastic_net_final_fit")
elastic_net_final_metrics_list <- map(elastic_net_fit_list, "elastic_net_final_metrics")
predicted_df_list <- map(elastic_net_fit_list, "predicted_df")
predicted_df_baked_list <- map(elastic_net_fit_list, "predicted_df_baked")

2.9 create dataframes of the modeling output

names(best_elastic_net_model_list) <- site_char
best_elastic_net_model_all_sites <- bind_rows(best_elastic_net_model_list, 
                                              .id = "SITE_ID_L")

names(elastic_net_final_metrics_list) <- site_char
elastic_net_final_metrics_all_sites <- bind_rows(elastic_net_final_metrics_list, 
                                                 .id = "SITE_ID_L")

names(predicted_df_list) <- site_char
predicted_df_all_sites <- bind_rows(predicted_df_list, 
                                    .id = "SITE_ID_L")

names(predicted_df_baked_list) <- site_char
predicted_df_baked_all_sites <- bind_rows(predicted_df_baked_list, 
                                    .id = "SITE_ID_L")

glimpse(best_elastic_net_model_all_sites)
## Rows: 21
## Columns: 4
## $ SITE_ID_L <chr> "site01", "site02", "site03", "site04", "site05", "site06", …
## $ penalty   <dbl> 0.019563983, 0.022219469, 0.019563983, 0.019563983, 0.013354…
## $ mixture   <dbl> 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, …
## $ .config   <chr> "Preprocessor1_Model0351", "Preprocessor1_Model0352", "Prepr…
glimpse(elastic_net_final_metrics_all_sites)
## Rows: 63
## Columns: 5
## $ SITE_ID_L  <chr> "site01", "site01", "site01", "site02", "site02", "site02",…
## $ .metric    <chr> "rsq_trad", "mae", "rmse", "rsq_trad", "mae", "rmse", "rsq_…
## $ .estimator <chr> "standard", "standard", "standard", "standard", "standard",…
## $ .estimate  <dbl> 0.3214087, 0.7187623, 0.9158668, 0.2323321, 0.5805358, 0.74…
## $ .config    <chr> "Preprocessor1_Model1", "Preprocessor1_Model1", "Preprocess…
glimpse(predicted_df_all_sites) 
## Rows: 11,278
## Columns: 126
## $ non_brain_predicted                  <dbl> -0.01874527, -1.66706903, -0.6442…
## $ SUBJECTKEY                           <chr> "NDAR_INVCL18941F", "NDAR_INV4BAP…
## $ EVENTNAME                            <chr> "baseline_year_1_arm_1", "baselin…
## $ MRI_INFO_DEVICESERIALNUMBER          <chr> "HASH6b4422a7", "HASH6b4422a7", "…
## $ SEX                                  <fct> F, M, F, M, F, F, F, M, F, M, F, …
## $ INTERVIEW_AGE                        <dbl> 118, 118, 119, 114, 117, 131, 120…
## $ RACE_ETHNICITY                       <fct> Hispanic, Hispanic, Hispanic, His…
## $ REL_FAMILY_ID                        <fct> 9522, 9387, 9406, 9689, 9449, 947…
## $ ACS_RAKED_PROPENSITY_SCORE           <dbl> 306.1648, 626.4339, 684.5424, 668…
## $ NIHTBX_FLANKER_UNCORRECTED           <dbl> 102, 102, 92, 94, 98, 106, 100, 1…
## $ NIHTBX_CARDSORT_UNCORRECTED          <dbl> 86, 98, 90, 81, 99, 109, 106, 95,…
## $ NIHTBX_PATTERN_UNCORRECTED           <dbl> 97, 65, 101, 82, 86, 103, 101, 10…
## $ NIHTBX_PICVOCAB_UNCORRECTED          <dbl> 87, 88, 74, 80, 78, 81, 92, 74, 7…
## $ NIHTBX_READING_UNCORRECTED           <dbl> 92, 90, 86, 76, 88, 102, 99, 87, …
## $ NIHTBX_PICTURE_UNCORRECTED           <dbl> 104, 91, 97, 101, 110, 106, 131, …
## $ PEA_RAVLT_LD_TRIAL_VII_TC            <int> 7, 10, 5, 10, 14, 13, 12, 6, 7, 1…
## $ NIHTBX_LIST_UNCORRECTED              <dbl> 101, 97, 86, 90, 97, 113, 120, 78…
## $ LMT_SCR_PERC_CORRECT                 <dbl> 0.50000, 0.43750, 0.34375, 0.4687…
## $ PEA_WISCV_TRS                        <int> 19, 19, 16, 16, 17, 17, 27, 12, 1…
## $ NIHTBX_FLUIDCOMP_UNCORRECTED         <dbl> 96, 86, 89, 84, 96, 109, 115, 90,…
## $ NIHTBX_CRYST_UNCORRECTED             <dbl> 89, 88, 78, 76, 82, 90, 95, 79, 7…
## $ NIHTBX_TOTALCOMP_UNCORRECTED         <dbl> 90, 84, 80, 76, 86, 99, 105, 81, …
## $ CBCL_SCR_SYN_ANXDEP_R                <dbl> 15, 8, 1, NA, 3, 0, 0, 0, 3, 0, 1…
## $ CBCL_SCR_SYN_WITHDEP_R               <dbl> 5, 4, 1, NA, 0, 0, 0, 1, 1, 0, 1,…
## $ CBCL_SCR_SYN_SOMATIC_R               <dbl> 8, 2, 0, NA, 2, 2, 0, 2, 1, 0, 1,…
## $ CBCL_SCR_SYN_SOCIAL_R                <dbl> 9, 7, 0, NA, 2, 0, 0, 0, 2, 0, 3,…
## $ CBCL_SCR_SYN_THOUGHT_R               <dbl> 8, 4, 0, NA, 2, 1, 0, 0, 1, 1, 0,…
## $ CBCL_SCR_SYN_ATTENTION_R             <dbl> 14, 10, 0, NA, 0, 4, 0, 0, 4, 1, …
## $ CBCL_SCR_SYN_RULEBREAK_R             <dbl> 9, 5, 0, NA, 0, 2, 0, 0, 0, 0, 3,…
## $ CBCL_SCR_SYN_AGGRESSIVE_R            <dbl> 15, 10, 0, NA, 2, 7, 0, 0, 0, 0, …
## $ CBCL_SCR_SYN_INTERNAL_R              <dbl> 28, 14, 2, NA, 5, 2, 0, 3, 5, 0, …
## $ CBCL_SCR_SYN_EXTERNAL_R              <dbl> 24, 15, 0, NA, 2, 9, 0, 0, 0, 0, …
## $ CBCL_SCR_SYN_TOTPROB_R               <dbl> 88, 57, 2, NA, 13, 17, 0, 5, 14, …
## $ CBCL_SCR_DSM5_DEPRESS_R              <dbl> 6, 7, 0, NA, 2, 0, 0, 2, 0, 0, 1,…
## $ CBCL_SCR_DSM5_ANXDISORD_R            <dbl> 10, 6, 1, NA, 0, 0, 0, 0, 3, 0, 2…
## $ CBCL_SCR_DSM5_SOMATICPR_R            <dbl> 5, 2, 0, NA, 2, 1, 0, 2, 1, 0, 1,…
## $ CBCL_SCR_DSM5_ADHD_R                 <dbl> 13, 8, 0, NA, 1, 4, 0, 0, 2, 0, 4…
## $ CBCL_SCR_DSM5_OPPOSIT_R              <dbl> 6, 5, 0, NA, 2, 6, 0, 0, 0, 0, 1,…
## $ CBCL_SCR_DSM5_CONDUCT_R              <dbl> 11, 5, 0, NA, 0, 2, 0, 0, 0, 0, 1…
## $ CBCL_SCR_07_SCT_R                    <dbl> 2, 1, 0, NA, 0, 0, 0, 0, 0, 0, 1,…
## $ CBCL_SCR_07_OCD_R                    <dbl> 10, 4, 0, NA, 1, 0, 0, 0, 1, 0, 0…
## $ CBCL_SCR_07_STRESS_R                 <dbl> 13, 10, 1, NA, 1, 3, 0, 0, 2, 0, …
## $ ASR_SCR_PERSTR_T                     <dbl> 60, 53, 46, 40, 45, 42, 53, 34, 3…
## $ ASR_SCR_ANXDEP_T                     <dbl> 67, 67, 50, 52, 50, 50, 50, 53, 5…
## $ ASR_SCR_WITHDRAWN_T                  <dbl> 61, 67, 50, 54, 50, 51, 50, 50, 5…
## $ ASR_SCR_SOMATIC_T                    <dbl> 69, 69, 54, 70, 51, 50, 50, 63, 5…
## $ ASR_SCR_THOUGHT_T                    <dbl> 72, 62, 50, 50, 50, 50, 50, 54, 5…
## $ ASR_SCR_ATTENTION_T                  <dbl> 62, 66, 50, 56, 50, 50, 50, 52, 5…
## $ ASR_SCR_AGGRESSIVE_T                 <dbl> 75, 63, 50, 51, 54, 50, 50, 50, 5…
## $ ASR_SCR_RULEBREAK_T                  <dbl> 72, 71, 50, 50, 52, 50, 50, 50, 5…
## $ ASR_SCR_INTRUSIVE_T                  <dbl> 50, 68, 50, 51, 50, 50, 50, 50, 5…
## $ ASR_SCR_INTERNAL_T                   <dbl> 69, 71, 43, 60, 42, 40, 30, 55, 5…
## $ ASR_SCR_EXTERNAL_T                   <dbl> 70, 70, 32, 47, 50, 32, 32, 32, 4…
## $ ASR_SCR_TOTPROB_T                    <dbl> 66, 67, 31, 49, 41, 32, 25, 45, 4…
## $ ASR_SCR_DEPRESS_T                    <dbl> 62, 67, 50, 57, 51, 50, 50, 55, 5…
## $ ASR_SCR_ANXDISORD_T                  <dbl> 75, 73, 50, 55, 51, 50, 50, 63, 5…
## $ ASR_SCR_SOMATICPR_T                  <dbl> 70, 70, 58, 70, 51, 50, 50, 63, 5…
## $ ASR_SCR_AVOIDANT_T                   <dbl> 70, 64, 50, 54, 51, 58, 50, 51, 5…
## $ ASR_SCR_ADHD_T                       <dbl> 64, 69, 50, 52, 50, 50, 50, 50, 5…
## $ ASR_SCR_ANTISOCIAL_T                 <dbl> 73, 70, 50, 51, 52, 50, 50, 50, 5…
## $ ASR_SCR_INATTENTION_T                <dbl> 58, 68, 50, 57, 50, 50, 50, 52, 5…
## $ ASR_SCR_HYPERACTIVE_T                <dbl> 66, 67, 50, 50, 50, 50, 50, 50, 5…
## $ mania_parent                         <int> 5, 10, 0, NA, 7, 0, 0, 0, 0, 0, 0…
## $ BISAvg                               <dbl> 2.25, 1.50, 0.50, 0.50, 0.50, 1.7…
## $ BASRRAvg                             <dbl> 2.00, 3.00, 1.75, 0.25, 2.25, 2.2…
## $ BASDriveAvg                          <dbl> 0.50, 2.00, 0.75, 1.25, 0.75, 0.5…
## $ BASFunAvg                            <dbl> 1.75, 2.00, 1.50, 1.00, 0.25, 2.2…
## $ BASAllAvg                            <dbl> 1.4166667, 2.3333333, 1.3333333, …
## $ UPPS_Y_SS_NEGATIVE_URGENCY           <int> 9, 6, 9, 6, 13, 5, 10, 9, 9, 6, 1…
## $ UPPS_Y_SS_LACK_OF_PLANNING           <int> 8, 8, 5, 6, 5, 4, 8, 6, 4, 5, 7, …
## $ UPPS_Y_SS_SENSATION_SEEKING          <int> 11, 10, 8, 10, 13, 8, 13, 13, 9, …
## $ UPPS_Y_SS_POSITIVE_URGENCY           <int> 15, 8, 8, 10, 9, 6, 12, 9, 8, 4, …
## $ UPPS_Y_SS_LACK_OF_PERSEVERANCE       <int> 6, 11, 4, 9, 6, 7, 4, 6, 7, 6, 9,…
## $ sleep_hours                          <int> 2, 2, 3, NA, 4, 2, 2, 3, 3, 3, 1,…
## $ sleep_disturb                        <int> 1, 1, 1, NA, 2, 1, 1, 2, 2, 2, 2,…
## $ sleep_initiate_maintain              <int> 11, 8, 9, NA, 13, 12, 9, 15, 11, …
## $ sleep_breath                         <int> 4, 5, 3, NA, 3, 3, 4, 3, 7, 4, 3,…
## $ sleep_arousal                        <int> 4, 5, 3, NA, 3, 3, 3, 3, 3, 3, 3,…
## $ sleep_transition                     <int> 19, 10, 8, NA, 9, 7, 7, 10, 10, 6…
## $ sleep_somnolence                     <int> 11, 8, 5, NA, 5, 7, 5, 8, 6, 8, 8…
## $ sleep_hyperhydrosis                  <int> 2, 2, 2, NA, 2, 2, 2, 2, 2, 2, 3,…
## $ sleep_total                          <int> 51, 38, 30, NA, 35, 34, 30, 41, 3…
## $ matureGames_Screen                   <int> 0, 1, 0, 1, 1, 0, 0, 2, 0, 2, 1, …
## $ matureMovies_Screen                  <int> 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, …
## $ wkdySum_Screen                       <dbl> 1.75, 3.50, 1.75, 1.50, 2.00, 2.7…
## $ wkndSum_Screen                       <dbl> 2.00, 5.50, 1.75, 2.00, 2.00, 5.5…
## $ tobacco_before_preg                  <fct> 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 1,…
## $ tobacco_after_preg                   <fct> 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0,…
## $ alcohol_before_preg                  <fct> 0, 0, 0, NA, 0, 1, 0, 0, 0, 0, 1,…
## $ alcohol_after_preg                   <fct> 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0,…
## $ marijuana_before_preg                <fct> 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0,…
## $ marijuana_after_preg                 <fct> 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0,…
## $ deveplopment_prematurity             <fct> 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0,…
## $ deveplopment_birth_complications     <dbl> 0, 0, 0, NA, 0, 1, 0, 0, 0, 0, 0,…
## $ deveplopment_pregnancy_complications <dbl> 0, 0, 2, NA, 0, 0, 0, 1, 1, 0, NA…
## $ bilingual_status                     <fct> NA, NA, 1, 1, 1, NA, 1, 1, NA, NA…
## $ bilingual_degree                     <fct> NA, NA, 1, 1, 1, NA, 1, 1, NA, NA…
## $ bilingual_use                        <dbl> 0, 0, 6, 4, 5, 0, 3, 4, 0, 0, 2, …
## $ marital                              <fct> married, livingWithPartner, separ…
## $ education1stPar                      <int> 15, 6, 8, 12, 10, 19, 18, 13, 12,…
## $ education2ndPar                      <int> 15, 3, NA, NA, NA, 18, 15, 13, 15…
## $ educationAvg                         <dbl> 15.0, 4.5, 8.0, 12.0, 10.0, 18.5,…
## $ combinedIncome                       <int> 9, 3, NA, NA, 4, 10, NA, 6, 8, 4,…
## $ householdSize                        <int> 4, 5, NA, 3, 7, 5, 3, 7, 4, 4, 8,…
## $ econ_insecurities_sum                <dbl> 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, …
## $ area_deprivation_index               <dbl> 86.44, 102.27, 0.00, 97.47, 98.32…
## $ lead_risk                            <int> 7, 10, 10, 10, 10, 6, 6, 9, NA, 1…
## $ quartic_uniform_crime_reports        <dbl> 24.28903, 24.28903, 24.28903, 24.…
## $ neighbo_safety_parent_sum            <dbl> 10, 8, 7, 6, 8, 14, 12, 3, 9, 11,…
## $ neighbo_safety_child_sum             <int> 4, 3, 4, 4, 3, 5, 5, 4, 2, 5, 4, …
## $ sumSchool_environment                <dbl> 20, 21, 24, 23, 23, 22, 24, 21, 2…
## $ sumSchool_involvement                <dbl> 15, 13, 16, 16, 16, 15, 12, 13, 1…
## $ sumSchool_disengagement              <dbl> 2, 6, 2, 2, 2, 3, 2, 5, 2, 2, 2, …
## $ parent_monitor_mean                  <dbl> 4.4, 4.2, 4.6, 3.0, 4.8, 5.0, 3.8…
## $ fam_conflict_parent                  <dbl> 0, 4, 1, 6, 3, 2, 3, 3, 1, 0, 4, …
## $ fam_conflict_children                <dbl> 0, 0, 1, 0, 1, 1, 1, 3, 4, 1, 3, …
## $ prosocial_parent_mean                <dbl> 1.666667, 2.000000, 2.000000, NA,…
## $ prosocial_youth_mean                 <dbl> 1.666667, 1.333333, 2.000000, 1.0…
## $ phys_ind_daypweek_sum                <dbl> 10, 0, 0, NA, 4, 0, 9, 0, 4, 0, 5…
## $ phys_team_daypweek_sum               <dbl> 14, 0, 8, NA, 0, 12, 0, 0, 4, 6, …
## $ art_daypweek_sum                     <dbl> 4, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0,…
## $ sport_act_all_daypweek_sum           <dbl> 28, 0, 8, NA, 4, 12, 9, 0, 8, 6, …
## $ physc_act_days                       <int> 2, 1, 1, 3, 0, 7, 5, 7, 0, 1, 0, …
## $ SITE_ID_L                            <chr> "site01", "site01", "site01", "si…
## $ dup                                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ gfactor                              <dbl> 0.19958234, -0.03410110, -0.85882…
glimpse(predicted_df_baked_all_sites)
## Rows: 11,278
## Columns: 80
## $ SITE_ID_L                            <chr> "site01", "site01", "site01", "si…
## $ non_brain_predicted                  <dbl> -0.01874527, -1.66706903, -0.6442…
## $ CBCL_SCR_SYN_ANXDEP_R                <dbl> 4.0469948, 1.7752001, -0.4965946,…
## $ CBCL_SCR_SYN_WITHDEP_R               <dbl> 2.31248684, 1.72912794, -0.020948…
## $ CBCL_SCR_SYN_SOMATIC_R               <dbl> 3.3136168, 0.2523049, -0.7681324,…
## $ CBCL_SCR_SYN_SOCIAL_R                <dbl> 3.2275942, 2.3519174, -0.7129514,…
## $ CBCL_SCR_SYN_THOUGHT_R               <dbl> 2.8676630, 1.0635359, -0.7405912,…
## $ CBCL_SCR_SYN_ATTENTION_R             <dbl> 3.131137152, 1.992712479, -0.8533…
## $ CBCL_SCR_SYN_RULEBREAK_R             <dbl> 4.1567932, 2.0249091, -0.6399461,…
## $ CBCL_SCR_SYN_AGGRESSIVE_R            <dbl> 2.6816012, 1.5377476, -0.7499598,…
## $ ASR_SCR_PERSTR_T                     <dbl> 1.287236908, 0.534889327, -0.2174…
## $ ASR_SCR_ANXDEP_T                     <dbl> 2.34195330, 2.34195330, -0.607354…
## $ ASR_SCR_WITHDRAWN_T                  <dbl> 1.59961503, 2.77322736, -0.552007…
## $ ASR_SCR_SOMATIC_T                    <dbl> 2.2898535, 2.2898535, -0.1289775,…
## $ ASR_SCR_THOUGHT_T                    <dbl> 3.7189976, 1.7655906, -0.5784979,…
## $ ASR_SCR_ATTENTION_T                  <dbl> 1.3590482, 2.0341371, -0.6662184,…
## $ ASR_SCR_AGGRESSIVE_T                 <dbl> 4.24679033, 1.88858580, -0.666135…
## $ ASR_SCR_RULEBREAK_T                  <dbl> 4.1150086, 3.9032372, -0.5439627,…
## $ ASR_SCR_INTRUSIVE_T                  <dbl> -0.4867646, 4.7207486, -0.4867646…
## $ mania_parent                         <dbl> 1.3241907, 3.1194834, -0.4711019,…
## $ BISAvg                               <dbl> 1.2209757, 0.1658362, -1.2410165,…
## $ BASRRAvg                             <dbl> -0.32251573, 1.28784986, -0.72510…
## $ BASDriveAvg                          <dbl> -0.69320511, 1.27454672, -0.36524…
## $ BASFunAvg                            <dbl> 0.4822646, 0.8601754, 0.1043539, …
## $ UPPS_Y_SS_NEGATIVE_URGENCY           <dbl> 0.1875367, -0.9457830, 0.1875367,…
## $ UPPS_Y_SS_LACK_OF_PLANNING           <dbl> 0.1041536, 0.1041536, -1.1588821,…
## $ UPPS_Y_SS_SENSATION_SEEKING          <dbl> 0.44795430, 0.07466475, -0.671914…
## $ UPPS_Y_SS_POSITIVE_URGENCY           <dbl> 2.3587127608, -0.0005867521, -0.0…
## $ UPPS_Y_SS_LACK_OF_PERSEVERANCE       <dbl> -0.4633063, 1.7560254, -1.3510390…
## $ sleep_hours                          <dbl> 0.3579279, 0.3579279, 1.5884740, …
## $ sleep_disturb                        <dbl> -0.95463743, -0.95463743, -0.9546…
## $ sleep_initiate_maintain              <dbl> -0.20835221, -1.00678383, -0.7406…
## $ sleep_breath                         <dbl> 0.18492738, 0.98843794, -0.618583…
## $ sleep_arousal                        <dbl> 0.6055256, 1.6895942, -0.4785430,…
## $ sleep_transition                     <dbl> 4.10587162, 0.68697951, -0.072774…
## $ sleep_somnolence                     <dbl> 1.64482380, 0.42245781, -0.799908…
## $ sleep_hyperhydrosis                  <dbl> -0.3697340, -0.3697340, -0.369734…
## $ matureGames_Screen                   <dbl> -0.6484096, 0.5010773, -0.6484096…
## $ matureMovies_Screen                  <dbl> -0.5908421, -0.5908421, -0.590842…
## $ wkdySum_Screen                       <dbl> -0.55111237, 0.01488049, -0.55111…
## $ wkndSum_Screen                       <dbl> -0.7215559, 0.2427317, -0.7904336…
## $ deveplopment_birth_complications     <dbl> -0.4961850, -0.4961850, -0.496185…
## $ deveplopment_pregnancy_complications <dbl> -0.6000502, -0.6000502, 1.3712680…
## $ bilingual_use                        <dbl> -0.54934102, -0.54934102, 3.20543…
## $ educationAvg                         <dbl> -0.566530119, -4.601676348, -3.25…
## $ combinedIncome                       <dbl> 0.7220577, -1.7937491, -0.9551468…
## $ householdSize                        <dbl> -0.45720752, 0.19578002, 0.065182…
## $ econ_insecurities_sum                <dbl> -0.4251869, 1.4018488, -0.4251869…
## $ area_deprivation_index               <dbl> -0.278164178, 0.367542661, -3.804…
## $ lead_risk                            <dbl> 0.66052285, 1.63507717, 1.6350771…
## $ quartic_uniform_crime_reports        <dbl> 2.33990202, 2.33990202, 2.3399020…
## $ neighbo_safety_parent_sum            <dbl> -0.60584355, -1.29867284, -1.6450…
## $ neighbo_safety_child_sum             <dbl> -0.0339937, -0.9524127, -0.033993…
## $ sumSchool_environment                <dbl> 0.02510678, 0.38090575, 1.4483026…
## $ sumSchool_involvement                <dbl> 0.82448335, -0.02310619, 1.248278…
## $ sumSchool_disengagement              <dbl> -1.1981619, 1.5477542, -1.1981619…
## $ parent_monitor_mean                  <dbl> 0.03098195, -0.35734179, 0.419305…
## $ fam_conflict_parent                  <dbl> -1.2981374, 0.7390790, -0.7888333…
## $ fam_conflict_children                <dbl> -1.04553814, -1.04553814, -0.5348…
## $ prosocial_parent_mean                <dbl> -0.2092621, 0.6193245, 0.6193245,…
## $ prosocial_youth_mean                 <dbl> -0.02414115, -0.92288397, 0.87460…
## $ phys_ind_daypweek_sum                <dbl> 0.91005294, -0.87793090, -0.87793…
## $ phys_team_daypweek_sum               <dbl> 0.76237360, -1.07259390, -0.02404…
## $ art_daypweek_sum                     <dbl> -0.1555468, -0.7995276, -0.799527…
## $ physc_act_days                       <dbl> -0.6571645, -1.0897235, -1.089723…
## $ gfactor                              <dbl> 0.2542287, -0.0434381, -1.0939692…
## $ tobacco_after_preg_X1                <dbl> -0.2383455, -0.2383455, -0.238345…
## $ alcohol_after_preg_X1                <dbl> -0.1663372, -0.1663372, -0.166337…
## $ marijuana_after_preg_X1              <dbl> -0.1473317, -0.1473317, -0.147331…
## $ deveplopment_prematurity_X1          <dbl> -0.4807505, -0.4807505, -0.480750…
## $ SEX_M                                <dbl> -1.0488617, 0.9533272, -1.0488617…
## $ RACE_ETHNICITY_Black                 <dbl> -0.4206907, -0.4206907, -0.420690…
## $ RACE_ETHNICITY_Hispanic              <dbl> 2.0817662, 2.0817662, 2.0817662, …
## $ RACE_ETHNICITY_Asian                 <dbl> -0.1447151, -0.1447151, -0.144715…
## $ RACE_ETHNICITY_Other                 <dbl> -0.3445854, -0.3445854, -0.344585…
## $ marital_widowed                      <dbl> -0.08909534, -0.08909534, -0.0890…
## $ marital_divorced                     <dbl> -0.3175018, -0.3175018, -0.317501…
## $ marital_separated                    <dbl> -0.1992563, -0.1992563, 5.0182014…
## $ marital_neverMarried                 <dbl> -0.3708123, -0.3708123, -0.370812…
## $ marital_livingWithPartner            <dbl> -0.2396301, 4.1727160, -0.2396301…
elastic_net_final_metrics_all_sites %>% 
  group_by(.metric) %>%
  summarise(mean_estimate = mean(.estimate),
            sd_estimate = sd(.estimate)) 
## # A tibble: 3 × 3
##   .metric  mean_estimate sd_estimate
##   <chr>            <dbl>       <dbl>
## 1 mae              0.653      0.0392
## 2 rmse             0.829      0.0498
## 3 rsq_trad         0.282      0.0747
predicted_df_all_sites %>% 
  group_by(SITE_ID_L) %>%
  dplyr::summarize(cor_non_brain_g = cor(non_brain_predicted,gfactor)) %>%
  ungroup() %>%
  dplyr::summarize(mean_cor = mean(cor_non_brain_g),
                   sd_cor = sd(cor_non_brain_g)) 
## # A tibble: 1 × 2
##   mean_cor sd_cor
##      <dbl>  <dbl>
## 1    0.534 0.0680

2.10 plot predicted vs observed across sites

predicted_df_all_sites %>%
  ggplot(mapping = aes(x = scale(non_brain_predicted), 
                       y = scale(gfactor))) +
  geom_point(alpha = .3) +
  geom_smooth(method = 'lm') +
  labs(color = "") +
  ylab("observed g-factor (Z)") +
  xlab("out-of-site g-factor (Z) predicted by\nsocio-demographic-psychological factors") +
  theme_classic(base_size = 24) 
## `geom_smooth()` using formula 'y ~ x'

2.11 plot top features

elastic_net_model_val_df_list<- map(elastic_net_model_val_list, .f = tidy)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loaded glmnet 4.1-3
elastic_net_model_val_df <- tibble(
  term = elastic_net_model_val_df_list[[1]]$term,
  mean_estimate = rowMeans(sapply(elastic_net_model_val_df_list, "[[", "estimate")),
  sd_estimate = matrixStats::rowSds(sapply(elastic_net_model_val_df_list, "[[", "estimate")))

#load cont names used in ASEG 
NonBrainFeaturesRead<-tibble::as_tibble(read.csv(paste0(utilFold,"NonBrainFeaturesRead.csv")))

elastic_net_model_val_df_named <- NonBrainFeaturesRead %>% rename(term = original) %>%
  full_join(elastic_net_model_val_df, by="term") %>% 
  rename(original = term) %>% rename(term = readable)
  
  elastic_net_model_val_df_named  %>%
  filter(term != "(Intercept)") %>%
#  group_by(mean_estimate > 0) %>%
  top_n(5, abs(mean_estimate)) %>%
  ungroup() %>%
  ggplot(aes(fct_reorder(term, mean_estimate), mean_estimate, fill = mean_estimate > 0)) +
  geom_col(alpha = 0.8, show.legend = FALSE) +
  geom_errorbar(aes(ymin=mean_estimate-sd_estimate, ymax=mean_estimate+sd_estimate), width=.2,
                 position=position_dodge(.9))   +
  labs(y="Std Coefficients\nAcross Folds (M ±SD)") +
  coord_flip() +
  theme_minimal(base_size = 17) +
  labs(x = NULL,
  title = "Socio-Demographic\n& Psychological features\nthat Predict G-Factor\nWith |Std Coeffients| ≥ .1")

## top 20

elastic_net_model_val_df_list<- map(elastic_net_model_val_list, .f = tidy)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loaded glmnet 4.1-3
elastic_net_model_val_df <- tibble(
  term = elastic_net_model_val_df_list[[1]]$term,
  mean_estimate = rowMeans(sapply(elastic_net_model_val_df_list, "[[", "estimate")),
  sd_estimate = matrixStats::rowSds(sapply(elastic_net_model_val_df_list, "[[", "estimate")))

#load cont names used in ASEG 
NonBrainFeaturesRead<-tibble::as_tibble(read.csv(paste0(utilFold,"NonBrainFeaturesRead.csv")))

elastic_net_model_val_df_named <- NonBrainFeaturesRead %>% rename(term = original) %>%
  full_join(elastic_net_model_val_df, by="term") %>% 
  rename(original = term) %>% rename(term = readable)
  
  elastic_net_model_val_df_named  %>%
  filter(term != "(Intercept)") %>%
#  group_by(mean_estimate > 0) %>%
  top_n(20, abs(mean_estimate)) %>%
  ungroup() %>%
  ggplot(aes(fct_reorder(term, mean_estimate), mean_estimate, fill = mean_estimate > 0)) +
  geom_col(alpha = 0.8, show.legend = FALSE) +
  geom_errorbar(aes(ymin=mean_estimate-sd_estimate, ymax=mean_estimate+sd_estimate), width=.2,
                 position=position_dodge(.9))   +
  labs(y="Coefficients Across Folds (M ±SD)") +
  coord_flip() +
  theme_minimal(base_size = 17) +
  labs(x = NULL,
  title = "Top 20 Socio-Demographic\n& Psychological features\nthat Predict G-Factor")

2.12 load the predicted G-Factor from brain variables

and look at correlations among observed gfactor and predicted gfactor from brain and non-brain variables (collapsed across sites)

brainPredicted <- read_csv("~/OneDrive - University of Otago/ABCD3/Analysis/FeatureExploration/brainData/All_pred_results05_01_2021.csv")
## New names:
## * `` -> ...1
## Rows: 10624 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): SUBJECTKEY, SITE_ID_L
## dbl (8): ...1, Stacked, Nback, SST, MID, rsmri, smri, DTI
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#load data and create dummy variable for race and sex
predicted_brain_no_brain_all_sites <- full_join(x = predicted_df_all_sites,
          y = brainPredicted,
          by = c('SUBJECTKEY', 'SITE_ID_L')) %>%
  rename(predicted_G_brain = Stacked) %>%
  rename(predicted_G_non_brain = non_brain_predicted) %>%
  rename(observed_G = gfactor) %>%
  fastDummies::dummy_cols(
    select_columns = c('SEX', 'RACE_ETHNICITY'),
    remove_first_dummy = TRUE) %>%
  mutate(race_non_white = ifelse(RACE_ETHNICITY == "White", 0,1))

#glimpse(predicted_brain_no_brain_all_sites)

predicted_brain_no_brain_all_sites %>%
  select(observed_G,predicted_G_brain,predicted_G_non_brain) %>%
  PerformanceAnalytics::chart.Correlation(histogram=TRUE, pch=19)

2.13 fit the mediation model using soc-dem-psyc as the ind

brain is mediator
g-factor is the dv

set.seed(1234)

model_brain_soc_psyc<-'

observed_G~b*predicted_G_brain+c*predicted_G_non_brain
predicted_G_brain~a*predicted_G_non_brain

#indirect and total effects between
ab:=a*b
total:=ab+c
prop:=ab/total
'

fit_model_brain_soc_psyc<-lavaan::sem(model_brain_soc_psyc,
            data=predicted_brain_no_brain_all_sites,
            test = "bollen.stine",
            se = "bootstrap", bootstrap = 5000)

lavaan::summary(fit_model_brain_soc_psyc, 
                standardized = TRUE, 
                rsquare = TRUE, 
                fit.measures = TRUE)
## lavaan 0.6-9 ended normally after 17 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
##                                                       
##                                                   Used       Total
##   Number of observations                         10628       11278
##                                                                   
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Model Test Baseline Model:
## 
##   Test statistic                              7012.371
##   Degrees of freedom                                 3
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -15257.212
##   Loglikelihood unrestricted model (H1)     -15257.212
##                                                       
##   Akaike (AIC)                               30524.424
##   Bayesian (BIC)                             30560.781
##   Sample-size adjusted Bayesian (BIC)        30544.891
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.000
##   P-value RMSEA <= 0.05                             NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                            Bootstrap
##   Number of requested bootstrap draws             5000
##   Number of successful bootstrap draws            5000
## 
## Regressions:
##                       Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   observed_G ~                                                             
##     prdctd_G_b (b)       0.486    0.015   33.291    0.000    0.486    0.275
##     prdctd_G__ (c)       0.635    0.012   52.839    0.000    0.635    0.460
##   predicted_G_brain ~                                                      
##     prdctd_G__ (a)       0.309    0.007   45.962    0.000    0.309    0.395
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .observed_G        0.371    0.005   67.740    0.000    0.371    0.613
##    .predictd_G_brn    0.163    0.002   84.655    0.000    0.163    0.844
## 
## R-Square:
##                    Estimate
##     observed_G        0.387
##     predictd_G_brn    0.156
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     ab                0.150    0.006   27.208    0.000    0.150    0.109
##     total             0.785    0.012   67.765    0.000    0.785    0.569
##     prop              0.191    0.007   26.753    0.000    0.191    0.191
fit_model_brain_soc_psyc_para <- lavaan::parameterEstimates(fit_model_brain_soc_psyc, 
                           boot.ci.type="bca.simple",
                           level=0.95, 
                           ci=TRUE,
                           standardized = TRUE)

kable(fit_model_brain_soc_psyc_para)
lhs op rhs label est se z pvalue ci.lower ci.upper std.lv std.all std.nox
observed_G ~ predicted_G_brain b 0.4855600 0.0145851 33.29148 0 0.4571685 0.5142848 0.4855600 0.2747331 0.2747331
observed_G ~ predicted_G_non_brain c 0.6348281 0.0120144 52.83872 0 0.6120880 0.6585293 0.6348281 0.4601710 0.8163206
predicted_G_brain ~ predicted_G_non_brain a 0.3086663 0.0067156 45.96233 0 0.2960008 0.3219493 0.3086663 0.3954433 0.7014969
observed_G ~~ observed_G 0.3705897 0.0054708 67.73981 0 0.3597279 0.3812804 0.3705897 0.6127772 0.6127772
predicted_G_brain ~~ predicted_G_brain 0.1633338 0.0019294 84.65457 0 0.1596145 0.1671936 0.1633338 0.8436246 0.8436246
predicted_G_non_brain ~~ predicted_G_non_brain 0.3177730 0.0000000 NA NA 0.3177730 0.3177730 0.3177730 1.0000000 0.3177730
ab := a*b ab 0.1498760 0.0055085 27.20826 0 0.1395263 0.1611217 0.1498760 0.1086414 0.1927244
total := ab+c total 0.7847041 0.0115797 67.76544 0 0.7620716 0.8074552 0.7847041 0.5688123 1.0090450
prop := ab/total prop 0.1909968 0.0071391 26.75344 0 0.1775028 0.2055419 0.1909968 0.1909968 0.1909968

3 Genetics Data Prep and Modeling

3.1 load genetics data

prsScoreFold = paste0("~/OneDrive - University of Otago/ABCD3/genetics/data-bundle-abcd3.0/abcd-release-3.0_chrall_0.8-mac5-hg19-eur-qc-v9-genotypes/")
GeneticQCfold ="~/OneDrive - University of Otago/ABCD3/ABCD3GenotypeWithoutImputed/genomics_sample03/ABCD_genotype/"
Geneticfold ="~/OneDrive - University of Otago/ABCD3/genetics/data-bundle-abcd3.0-afr/"

batch_info <- read.delim("~/OneDrive - University of Otago/ABCD3/ABCD3GenotypeWithoutImputed/genomics_sample03/ABCD_genotype/ABCD_release3.0_.batch_info.txt") %>% rename(SUBJECTKEY=abcd.id_redcap, batch = Axiom_Plate)

batch_info$SUBJECTKEY[which(batch_info$SUBJECTKEY=="`NDAR_INVF3FYXH1G")] <-"NDAR_INVF3FYXH1G"

eur_bim2unrelated <- read.csv("~/OneDrive - University of Otago/ABCD3/genetics/data-bundle-abcd3.0-afr/abcd-release-3.0_chrall_0.8-mac5-hg19-eur-qc-v9/abcd-release-3.0_chrall_0.8-mac5-hg19-eur-qc-v9_bim2unrelated.keep", header = FALSE) %>%
  mutate(SUBJECTKEY = str_remove_all(V1, "ABCD\tAB0[:digit:]{6}_")) %>% select(SUBJECTKEY) %>%
  mutate(EuNotRelated = 1)

eur_bim2unrelated$SUBJECTKEY[which(eur_bim2unrelated$SUBJECTKEY=="`NDAR_INVF3FYXH1G")] <-"NDAR_INVF3FYXH1G"

SUBJ_QC_BAD <- c("NDAR_INVA7RNTEHU", "NDAR_INVV7NEVHLK")
badImputed <- tibble(SUBJECTKEY = SUBJ_QC_BAD, 
                     SUBJ_QC_BAD = c(1,1))

vision_idx <- tibble::as_tibble(read.csv(paste0(dataFold,"ABCD_SVS01_DATA_TABLE.CSV"))) %>% 
  filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
  mutate(visionProb = ifelse(SNELLEN_VA_Y == 0 | SNELLEN_VA_Y == 1 | VIS_FLG == 2, 1, 0))

PRSQcEuNotRelated <- plyr::join_all(list(batch_info, eur_bim2unrelated, badImputed,vision_idx), by='SUBJECTKEY', type='full') 

PRSQcEuNotRelated %>%count(EuNotRelated==1,batch!=461, is.na(SUBJ_QC_BAD),visionProb !=1 |is.na(visionProb)) %>% knitr::kable()
EuNotRelated == 1 batch != 461 is.na(SUBJ_QC_BAD) visionProb != 1 | is.na(visionProb) n
TRUE FALSE TRUE TRUE 32
TRUE TRUE TRUE FALSE 8
TRUE TRUE TRUE TRUE 4814
NA FALSE TRUE TRUE 50
NA TRUE TRUE FALSE 20
NA TRUE TRUE TRUE 6175
NA NA FALSE TRUE 2
NA NA TRUE FALSE 3
NA NA TRUE TRUE 810
finalParticipantNum <- PRSQcEuNotRelated %>%filter(EuNotRelated==1,batch!=461, is.na(SUBJ_QC_BAD),visionProb !=1 |is.na(visionProb)) %>% nrow() 

leeCognition <- tibble::as_tibble(read.table(paste0(prsScoreFold,"lee-wedow-okbay-2018-cognitive-gwas/abcd-release-3.0_chrall_0.8-mac5-hg19-eur-qc-v9-genotypes-lee-wedow-okbay-2018-cognitive-gwas-profiles.csv"),header = TRUE)) %>% 
  rename(SUBJECTKEY = iid) %>% mutate(SUBJECTKEY = str_remove_all(SUBJECTKEY, "AB[:digit:]{7}_"))
leeCognition$SUBJECTKEY[which(leeCognition$SUBJECTKEY=="`NDAR_INVF3FYXH1G")] <-"NDAR_INVF3FYXH1G"

leeCognitionOri <- leeCognition %>% select(matches('p1|p5')) %>% colnames()
leeCognitionNew <- paste0("leeCognition",leeCognitionOri)
leeCognition.renamed <- leeCognition %>% rename_at(vars(all_of(leeCognitionOri )), ~ leeCognitionNew)

predicted_brain_no_brain_all_sites_leeCog <- plyr::join_all(list(predicted_brain_no_brain_all_sites, leeCognition.renamed, PRSQcEuNotRelated), by='SUBJECTKEY', type='full')

predicted_brain_no_brain_all_sites_leeCog <- predicted_brain_no_brain_all_sites_leeCog[,!duplicated(colnames(predicted_brain_no_brain_all_sites_leeCog))] 
predicted_brain_no_brain_all_sites_leeCog <- tibble::rowid_to_column(predicted_brain_no_brain_all_sites_leeCog, "ID") %>% 
  filter(EuNotRelated==1,batch!=461, is.na(SUBJ_QC_BAD),visionProb !=1 |is.na(visionProb)) %>%
  mutate(SEXnum = ifelse(SEX == "F",1,0))

predicted_brain_no_brain_all_sites_leeCog_scaled <- predicted_brain_no_brain_all_sites_leeCog %>% 
  mutate_at(scale, .vars = vars(c(ends_with("_score")&starts_with("lee"), 
                                  starts_with("pc"),
                                  observed_G,predicted_G_brain)), center = TRUE, scale=TRUE)

predicted_brain_no_brain_all_sites_leeCog_mulimodalscaled <- predicted_brain_no_brain_all_sites_leeCog %>% 
  drop_na(Nback,SST,MID,rsmri,smri,DTI) %>%
  mutate_at(scale, .vars = vars(c(ends_with("_score")&starts_with("lee"), 
                                  starts_with("pc"),
                                  observed_G,
                                  Nback, SST, MID, rsmri, smri, DTI)), center = TRUE, scale=TRUE) 

3.2 correlation between PGS of cog performance and observed G across thresholds

calc_pearson_conflint_low <- function(vec_a, vec_b){
  test_res <- cor.test(vec_a, vec_b)
  return(test_res$conf.int[[1]])
}

calc_pearson_conflint_high <- function(vec_a, vec_b){
  test_res <- cor.test(vec_a, vec_b)
  return(test_res$conf.int[[2]])
}

leeCog_ci_low <- predicted_brain_no_brain_all_sites_leeCog %>% 
  select(ends_with("_score")&starts_with("lee"),observed_G) %>%
  corrr::colpair_map(calc_pearson_conflint_low) %>% select(term,observed_G) %>%
  filter(term != "observed_G") %>% rename(observed_G_ci_low = observed_G)

leeCog_ci_high <- predicted_brain_no_brain_all_sites_leeCog %>% 
  select(ends_with("_score")&starts_with("lee"),observed_G) %>%
  corrr::colpair_map(calc_pearson_conflint_high) %>% select(term,observed_G) %>%
  filter(term != "observed_G") %>% rename(observed_G_ci_high = observed_G)

leeCog_corr <- predicted_brain_no_brain_all_sites_leeCog %>% 
  select(ends_with("_score")&starts_with("lee"),observed_G) %>%
  corrr::correlate() %>% select(term,observed_G) %>%
  filter(term != "observed_G") 
## 
## Correlation method: 'pearson'
## Missing treated using: 'pairwise.complete.obs'
plyr::join_all(list(leeCog_corr,leeCog_ci_low,leeCog_ci_high), 
               by = 'term', type = "full", match = "all") %>%
  bind_cols(Threshold = c(".1",".01",".001",".0001",".00001",".000001",".0000001",".00000001",".5",".05")) %>%
  arrange(desc(Threshold)) %>%
  ggplot(aes(x = Threshold, y = observed_G)) +
  geom_col(alpha = 0.8, show.legend = FALSE) +
  geom_errorbar(aes(ymin = observed_G_ci_low, ymax = observed_G_ci_high), width = 0.2) +
  labs(y="Pearson Correlation with CI95%") +
  coord_flip() +
  theme_minimal(base_size = 20) +
  labs(x = "PGS Threshold at p<",
  title = "Correlation between the G-Factor\nand PGS of Cogntive Ability")

3.3 plot a scatter between PGS at .01 and G-Factor

predicted_brain_no_brain_all_sites_leeCog %>%
  ggplot(mapping = aes(x = leeCognitionp1e_2_score, 
                       y = scale(observed_G))) +
  geom_point(alpha = .3) +
  geom_smooth(method = 'lm') +
  labs(color = "") +
  ylab("observed g-factor (Z)") +
  xlab("PGS of Cogntive Ability\nat p<.01 PGS Threshold") +
  theme_classic(base_size = 24) 
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 204 rows containing non-finite values (stat_smooth).
## Warning: Removed 204 rows containing missing values (geom_point).

cor.test(predicted_brain_no_brain_all_sites_leeCog$leeCognitionp1e_2_score,
         predicted_brain_no_brain_all_sites_leeCog$observed_G)
## 
##  Pearson's product-moment correlation
## 
## data:  predicted_brain_no_brain_all_sites_leeCog$leeCognitionp1e_2_score and predicted_brain_no_brain_all_sites_leeCog$observed_G
## t = 14.528, df = 4609, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1814861 0.2366902
## sample estimates:
##       cor 
## 0.2092549

3.4 fit the mediation model using PGS at .01 as the ind

brain is mediator
g-factor is the dv

## treat as if there is no cluster
set.seed(1234)

leecog_lavaan <-'

observed_G~b*predicted_G_brain+c*leeCognitionp1e_2_score 
predicted_G_brain~a*leeCognitionp1e_2_score 

observed_G~pc1 + pc2 + pc3 + pc4 
predicted_G_brain~pc1 + pc2 + pc3 + pc4  

#indirect and total effects between
ab:=a*b
total:=ab+c
prop:=ab/total

'

fit_leecog_lavaan <- lavaan::sem(leecog_lavaan,
            data = predicted_brain_no_brain_all_sites_leeCog_scaled,
            test = "bollen.stine",
            se = "bootstrap", bootstrap = 5000)

lavaan::summary(fit_leecog_lavaan , 
                standardized = TRUE, 
                rsquare = TRUE, 
                fit.measures = TRUE)
## lavaan 0.6-9 ended normally after 26 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        13
##                                                       
##                                                   Used       Total
##   Number of observations                          4389        4815
##                                                                   
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Model Test Baseline Model:
## 
##   Test statistic                               851.124
##   Degrees of freedom                                11
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -11988.454
##   Loglikelihood unrestricted model (H1)     -11988.454
##                                                       
##   Akaike (AIC)                               24002.908
##   Bayesian (BIC)                             24085.937
##   Sample-size adjusted Bayesian (BIC)        24044.628
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.000
##   P-value RMSEA <= 0.05                             NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                            Bootstrap
##   Number of requested bootstrap draws             5000
##   Number of successful bootstrap draws            5000
## 
## Regressions:
##                       Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   observed_G ~                                                             
##     prdctd_G_b (b)       0.342    0.014   24.851    0.000    0.342    0.345
##     lCgntn1_2_ (c)       0.174    0.014   12.220    0.000    0.174    0.175
##   predicted_G_brain ~                                                      
##     lCgntn1_2_ (a)       0.094    0.015    6.333    0.000    0.094    0.094
##   observed_G ~                                                             
##     pc1                  0.125    0.060    2.093    0.036    0.125    0.124
##     pc2                 -0.247    0.198   -1.250    0.211   -0.247   -0.069
##     pc3                  0.005    0.104    0.045    0.964    0.005    0.005
##     pc4                 -0.020    0.119   -0.170    0.865   -0.020   -0.021
##   predicted_G_brain ~                                                      
##     pc1                  0.008    0.051    0.148    0.882    0.008    0.007
##     pc2                 -0.033    0.169   -0.196    0.845   -0.033   -0.009
##     pc3                 -0.015    0.079   -0.190    0.849   -0.015   -0.016
##     pc4                 -0.005    0.122   -0.040    0.968   -0.005   -0.005
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .observed_G        0.816    0.019   43.411    0.000    0.816    0.831
##    .predictd_G_brn    0.991    0.017   57.384    0.000    0.991    0.991
## 
## R-Square:
##                    Estimate
##     observed_G        0.169
##     predictd_G_brn    0.009
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     ab                0.032    0.005    6.089    0.000    0.032    0.032
##     total             0.207    0.015   13.611    0.000    0.207    0.208
##     prop              0.156    0.024    6.393    0.000    0.156    0.156
fit_leecog_lavaan_para <- lavaan::parameterEstimates(fit_leecog_lavaan, 
                           boot.ci.type="bca.simple",
                           level=0.95, 
                           ci=TRUE,
                           standardized = TRUE)

kable(fit_leecog_lavaan_para)
lhs op rhs label est se z pvalue ci.lower ci.upper std.lv std.all std.nox
observed_G ~ predicted_G_brain b 0.3421572 0.0137684 24.8509171 0.0000000 0.3154736 0.3689894 0.3421572 0.3453233 0.3453233
observed_G ~ leeCognitionp1e_2_score c 0.1743477 0.0142668 12.2204832 0.0000000 0.1472803 0.2017802 0.1743477 0.1751125 0.1759811
predicted_G_brain ~ leeCognitionp1e_2_score a 0.0944815 0.0149186 6.3331215 0.0000000 0.0662443 0.1245356 0.0944815 0.0940259 0.0944923
observed_G ~ pc1 0.1249980 0.0597175 2.0931528 0.0363355 0.0042824 0.2391465 0.1249980 0.1238580 0.1261690
observed_G ~ pc2 -0.2474684 0.1979319 -1.2502706 0.2112007 -0.6549932 0.1144279 -0.2474684 -0.0693871 -0.2497868
observed_G ~ pc3 0.0046927 0.1035110 0.0453348 0.9638404 -0.1527567 0.2852511 0.0046927 0.0049345 0.0047366
observed_G ~ pc4 -0.0202014 0.1187176 -0.1701633 0.8648817 -0.3187973 0.1561772 -0.0202014 -0.0213472 -0.0203906
predicted_G_brain ~ pc1 0.0075282 0.0508248 0.1481203 0.8822479 -0.0877917 0.1121372 0.0075282 0.0073911 0.0075290
predicted_G_brain ~ pc2 -0.0330395 0.1686961 -0.1958523 0.8447257 -0.3785987 0.2795089 -0.0330395 -0.0091789 -0.0330433
predicted_G_brain ~ pc3 -0.0149924 0.0788559 -0.1901241 0.8492119 -0.2187669 0.1473727 -0.0149924 -0.0156206 -0.0149941
predicted_G_brain ~ pc4 -0.0049251 0.1217863 -0.0404406 0.9677419 -0.1393776 0.3776521 -0.0049251 -0.0051567 -0.0049257
observed_G ~~ observed_G 0.8160268 0.0187978 43.4108302 0.0000000 0.7822946 0.8559610 0.8160268 0.8313882 0.8313882
predicted_G_brain ~~ predicted_G_brain 0.9905535 0.0172619 57.3837648 0.0000000 0.9589689 1.0272917 0.9905535 0.9907792 0.9907792
leeCognitionp1e_2_score ~~ leeCognitionp1e_2_score 0.9901527 0.0000000 NA NA 0.9901527 0.9901527 0.9901527 1.0000000 0.9901527
leeCognitionp1e_2_score ~~ pc1 0.1164586 0.0000000 NA NA 0.1164586 0.1164586 0.1164586 0.1192200 0.1164586
leeCognitionp1e_2_score ~~ pc2 0.0298778 0.0000000 NA NA 0.0298778 0.0298778 0.0298778 0.1080907 0.0298778
leeCognitionp1e_2_score ~~ pc3 -0.0334960 0.0000000 NA NA -0.0334960 -0.0334960 -0.0334960 -0.0323121 -0.0334960
leeCognitionp1e_2_score ~~ pc4 0.0200220 0.0000000 NA NA 0.0200220 0.0200220 0.0200220 0.0192197 0.0200220
pc1 ~~ pc1 0.9637024 0.0000000 NA NA 0.9637024 0.9637024 0.9637024 1.0000000 0.9637024
pc1 ~~ pc2 0.2553123 0.0000000 NA NA 0.2553123 0.2553123 0.2553123 0.9362483 0.2553123
pc1 ~~ pc3 -0.0813157 0.0000000 NA NA -0.0813157 -0.0813157 -0.0813157 -0.0795109 -0.0813157
pc1 ~~ pc4 0.0206747 0.0000000 NA NA 0.0206747 0.0206747 0.0206747 0.0201167 0.0206747
pc2 ~~ pc2 0.0771646 0.0000000 NA NA 0.0771646 0.0771646 0.0771646 1.0000000 0.0771646
pc2 ~~ pc3 0.0266052 0.0000000 NA NA 0.0266052 0.0266052 0.0266052 0.0919352 0.0266052
pc2 ~~ pc4 0.0041940 0.0000000 NA NA 0.0041940 0.0041940 0.0041940 0.0144216 0.0041940
pc3 ~~ pc3 1.0853069 0.0000000 NA NA 1.0853069 1.0853069 1.0853069 1.0000000 1.0853069
pc3 ~~ pc4 -0.0169518 0.0000000 NA NA -0.0169518 -0.0169518 -0.0169518 -0.0155427 -0.0169518
pc4 ~~ pc4 1.0960262 0.0000000 NA NA 1.0960262 1.0960262 1.0960262 1.0000000 1.0960262
ab := a*b ab 0.0323275 0.0053094 6.0887242 0.0000000 0.0225296 0.0432952 0.0323275 0.0324693 0.0326304
total := ab+c total 0.2066753 0.0151844 13.6110637 0.0000000 0.1772344 0.2355815 0.2066753 0.2075818 0.2086115
prop := ab/total prop 0.1564170 0.0244688 6.3925058 0.0000000 0.1102938 0.2074586 0.1564170 0.1564170 0.1564170

4 final mediation model

Social_Dem_Psy and PGS are the ind brain is mediator
g-factor is the dv

set.seed(1234)

socdem_leecog_lavaan <-'

observed_G~ b*predicted_G_brain + c_socdem*predicted_G_non_brain + c_pgs*leeCognitionp1e_2_score 
predicted_G_brain~a_socdem*predicted_G_non_brain + a_pgs*leeCognitionp1e_2_score 

observed_G~pc1 + pc2 + pc3 + pc4 
predicted_G_brain~pc1 + pc2 + pc3 + pc4  

#indirect and total effects
a_socdem_b:=a_socdem*b
a_pgs_b:=a_pgs*b
total_socdem:=a_socdem*b+c_socdem
total_pgs:=a_pgs*b+c_pgs
prop_socdem :=a_socdem_b/total_socdem
prop_pgs :=a_pgs_b/total_pgs

'

fit_socdem_leecog_lavaan <- lavaan::sem(socdem_leecog_lavaan,
            data = predicted_brain_no_brain_all_sites_leeCog_scaled,
            test = "bollen.stine",
            se = "bootstrap", bootstrap = 5000)

lavaan::summary(fit_socdem_leecog_lavaan , 
                standardized = TRUE, 
                rsquare = TRUE, 
                fit.measures = TRUE)
## lavaan 0.6-9 ended normally after 31 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        15
##                                                       
##                                                   Used       Total
##   Number of observations                          4389        4815
##                                                                   
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Model Test Baseline Model:
## 
##   Test statistic                              1728.306
##   Degrees of freedom                                13
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -11549.863
##   Loglikelihood unrestricted model (H1)     -11549.863
##                                                       
##   Akaike (AIC)                               23129.725
##   Bayesian (BIC)                             23225.528
##   Sample-size adjusted Bayesian (BIC)        23177.864
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.000
##   P-value RMSEA <= 0.05                             NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                            Bootstrap
##   Number of requested bootstrap draws             5000
##   Number of successful bootstrap draws            5000
## 
## Regressions:
##                       Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   observed_G ~                                                             
##     prdc_G_    (b)       0.263    0.013   20.323    0.000    0.263    0.266
##     prd_G__ (c_sc)       0.803    0.032   25.033    0.000    0.803    0.353
##     lCg1_2_ (c_pg)       0.129    0.013    9.812    0.000    0.129    0.130
##   predicted_G_brain ~                                                      
##     prd_G__ (a_sc)       0.539    0.033   16.271    0.000    0.539    0.235
##     lCg1_2_ (a_pg)       0.059    0.015    4.035    0.000    0.059    0.059
##   observed_G ~                                                             
##     pc1                  0.040    0.051    0.787    0.431    0.040    0.040
##     pc2                 -0.087    0.174   -0.503    0.615   -0.087   -0.025
##     pc3                  0.008    0.083    0.097    0.923    0.008    0.008
##     pc4                 -0.010    0.122   -0.083    0.934   -0.010   -0.011
##   predicted_G_brain ~                                                      
##     pc1                 -0.050    0.051   -0.972    0.331   -0.050   -0.049
##     pc2                  0.076    0.170    0.449    0.653    0.076    0.021
##     pc3                 -0.012    0.077   -0.155    0.877   -0.012   -0.012
##     pc4                  0.002    0.113    0.019    0.985    0.002    0.002
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .observed_G        0.706    0.016   45.112    0.000    0.706    0.719
##    .predictd_G_brn    0.938    0.017   55.888    0.000    0.938    0.938
## 
## R-Square:
##                    Estimate
##     observed_G        0.281
##     predictd_G_brn    0.062
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     a_socdem_b        0.142    0.011   12.789    0.000    0.142    0.062
##     a_pgs_b           0.016    0.004    3.916    0.000    0.016    0.016
##     total_socdem      0.945    0.033   28.609    0.000    0.945    0.415
##     total_pgs         0.145    0.014   10.570    0.000    0.145    0.146
##     prop_socdem       0.150    0.012   12.929    0.000    0.150    0.150
##     prop_pgs          0.108    0.027    3.994    0.000    0.108    0.108
socdem_leecog_lavaan_para <- lavaan::parameterEstimates(fit_socdem_leecog_lavaan , 
                           boot.ci.type="bca.simple",
                           level=0.95, 
                           ci=TRUE,
                           standardized = TRUE)

kable(socdem_leecog_lavaan_para)
lhs op rhs label est se z pvalue ci.lower ci.upper std.lv std.all std.nox
observed_G ~ predicted_G_brain b 0.2631146 0.0129467 20.3228779 0.0000000 0.2367382 0.2876208 0.2631146 0.2655493 0.2655493
observed_G ~ predicted_G_non_brain c_socdem 0.8027280 0.0320671 25.0327529 0.0000000 0.7416864 0.8663541 0.8027280 0.3525100 0.8102483
observed_G ~ leeCognitionp1e_2_score c_pgs 0.1293150 0.0131793 9.8120012 0.0000000 0.1041945 0.1556013 0.1293150 0.1298822 0.1305265
predicted_G_brain ~ predicted_G_non_brain a_socdem 0.5392928 0.0331442 16.2711033 0.0000000 0.4749679 0.6062260 0.5392928 0.2346537 0.5393542
predicted_G_brain ~ leeCognitionp1e_2_score a_pgs 0.0592101 0.0146736 4.0351396 0.0000546 0.0318080 0.0883781 0.0592101 0.0589246 0.0592169
observed_G ~ pc1 0.0404987 0.0514325 0.7874142 0.4310394 -0.0626762 0.1406440 0.0404987 0.0401294 0.0408781
observed_G ~ pc2 -0.0874500 0.1737491 -0.5033118 0.6147451 -0.4307179 0.2390868 -0.0874500 -0.0245199 -0.0882693
observed_G ~ pc3 0.0080095 0.0825964 0.0969721 0.9227486 -0.1510781 0.2527761 0.0080095 0.0084224 0.0080846
observed_G ~ pc4 -0.0101223 0.1223925 -0.0827035 0.9340873 -0.3333312 0.0936016 -0.0101223 -0.0106964 -0.0102171
predicted_G_brain ~ pc1 -0.0496401 0.0510824 -0.9717650 0.3311675 -0.1439103 0.0582183 -0.0496401 -0.0487364 -0.0496458
predicted_G_brain ~ pc2 0.0762187 0.1697452 0.4490182 0.6534185 -0.2659863 0.4011320 0.0762187 0.0211748 0.0762274
predicted_G_brain ~ pc3 -0.0119679 0.0773923 -0.1546392 0.8771057 -0.2275455 0.1474684 -0.0119679 -0.0124693 -0.0119692
predicted_G_brain ~ pc4 0.0021078 0.1126057 0.0187185 0.9850657 -0.1779045 0.3231063 0.0021078 0.0022069 0.0021081
observed_G ~~ observed_G 0.7056734 0.0156427 45.1121209 0.0000000 0.6769521 0.7380331 0.7056734 0.7189574 0.7189574
predicted_G_brain ~~ predicted_G_brain 0.9379524 0.0167826 55.8884897 0.0000000 0.9072665 0.9736656 0.9379524 0.9381661 0.9381661
predicted_G_non_brain ~~ predicted_G_non_brain 0.1892808 0.0000000 NA NA 0.1892808 0.1892808 0.1892808 1.0000000 0.1892808
predicted_G_non_brain ~~ leeCognitionp1e_2_score 0.0709779 0.0000000 NA NA 0.0709779 0.0709779 0.0709779 0.1639528 0.0709779
predicted_G_non_brain ~~ pc1 0.0582364 0.0000000 NA NA 0.0582364 0.0582364 0.0582364 0.1363546 0.0582364
predicted_G_non_brain ~~ pc2 0.0131816 0.0000000 NA NA 0.0131816 0.0131816 0.0131816 0.1090704 0.0131816
predicted_G_non_brain ~~ pc3 -0.0220665 0.0000000 NA NA -0.0220665 -0.0220665 -0.0220665 -0.0486859 -0.0220665
predicted_G_non_brain ~~ pc4 -0.0115468 0.0000000 NA NA -0.0115468 -0.0115468 -0.0115468 -0.0253511 -0.0115468
leeCognitionp1e_2_score ~~ leeCognitionp1e_2_score 0.9901527 0.0000000 NA NA 0.9901527 0.9901527 0.9901527 1.0000000 0.9901527
leeCognitionp1e_2_score ~~ pc1 0.1164586 0.0000000 NA NA 0.1164586 0.1164586 0.1164586 0.1192200 0.1164586
leeCognitionp1e_2_score ~~ pc2 0.0298778 0.0000000 NA NA 0.0298778 0.0298778 0.0298778 0.1080907 0.0298778
leeCognitionp1e_2_score ~~ pc3 -0.0334960 0.0000000 NA NA -0.0334960 -0.0334960 -0.0334960 -0.0323121 -0.0334960
leeCognitionp1e_2_score ~~ pc4 0.0200220 0.0000000 NA NA 0.0200220 0.0200220 0.0200220 0.0192197 0.0200220
pc1 ~~ pc1 0.9637024 0.0000000 NA NA 0.9637024 0.9637024 0.9637024 1.0000000 0.9637024
pc1 ~~ pc2 0.2553123 0.0000000 NA NA 0.2553123 0.2553123 0.2553123 0.9362483 0.2553123
pc1 ~~ pc3 -0.0813157 0.0000000 NA NA -0.0813157 -0.0813157 -0.0813157 -0.0795109 -0.0813157
pc1 ~~ pc4 0.0206747 0.0000000 NA NA 0.0206747 0.0206747 0.0206747 0.0201167 0.0206747
pc2 ~~ pc2 0.0771646 0.0000000 NA NA 0.0771646 0.0771646 0.0771646 1.0000000 0.0771646
pc2 ~~ pc3 0.0266052 0.0000000 NA NA 0.0266052 0.0266052 0.0266052 0.0919352 0.0266052
pc2 ~~ pc4 0.0041940 0.0000000 NA NA 0.0041940 0.0041940 0.0041940 0.0144216 0.0041940
pc3 ~~ pc3 1.0853069 0.0000000 NA NA 1.0853069 1.0853069 1.0853069 1.0000000 1.0853069
pc3 ~~ pc4 -0.0169518 0.0000000 NA NA -0.0169518 -0.0169518 -0.0169518 -0.0155427 -0.0169518
pc4 ~~ pc4 1.0960262 0.0000000 NA NA 1.0960262 1.0960262 1.0960262 1.0000000 1.0960262
a_socdem_b := a_socdem*b a_socdem_b 0.1418958 0.0110950 12.7891718 0.0000000 0.1215286 0.1649183 0.1418958 0.0623121 0.1432251
a_pgs_b := a_pgs*b a_pgs_b 0.0155791 0.0039780 3.9163226 0.0000899 0.0083469 0.0238579 0.0155791 0.0156474 0.0157250
total_socdem := a_socdem*b+c_socdem total_socdem 0.9446238 0.0330180 28.6093655 0.0000000 0.8811028 1.0102441 0.9446238 0.4148221 0.9534735
total_pgs := a_pgs*b+c_pgs total_pgs 0.1448941 0.0137075 10.5704508 0.0000000 0.1187712 0.1723074 0.1448941 0.1455296 0.1462515
prop_socdem := a_socdem_b/total_socdem prop_socdem 0.1502141 0.0116186 12.9287596 0.0000000 0.1288758 0.1742519 0.1502141 0.1502141 0.1502141
prop_pgs := a_pgs_b/total_pgs prop_pgs 0.1075203 0.0269231 3.9936053 0.0000651 0.0583395 0.1637417 0.1075203 0.1075203 0.1075203
cor.test(predicted_brain_no_brain_all_sites_leeCog_scaled$predicted_G_non_brain,
         predicted_brain_no_brain_all_sites_leeCog_scaled$leeCognitionp1e_2_score )
## 
##  Pearson's product-moment correlation
## 
## data:  predicted_brain_no_brain_all_sites_leeCog_scaled$predicted_G_non_brain and predicted_brain_no_brain_all_sites_leeCog_scaled$leeCognitionp1e_2_score
## t = 11.202, df = 4609, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1345642 0.1907654
## sample estimates:
##       cor 
## 0.1627969
#save(file = "mediation_gfactor_new_prepocessing_sex_race.RData")