1 Socio-Demographic & Psychological Data Preparation

1.1 Reset workspace and load libraries

This analysis uses ABCD Release 3

rm(list=ls())
gc()

library(tidyverse)
library(qgraph)
library(pander)
library(summarytools)
library(sjPlot)
library(sjmisc)
library(sjlabelled)
library(tidymodels)
library(knitr)

1.2 Setting up paths

1.3 Family relationship

ACS <-read_csv(paste0(dataFold,"ACSPSW03_DATA_TABLE.csv"))

## Rows: 23113 Columns: 32

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (8): SUBJECTKEY, SRC_SUBJECT_ID, INTERVIEW_DATE, SEX, EVENTNAME, GENETI...
## dbl (16): ACSPSW03_ID, DATASET_ID, INTERVIEW_AGE, RACE_ETHNICITY, REL_FAMILY...
## lgl  (8): GENETIC_PAIRED_SUBJECTID_4, GENETIC_PI_HAT_2, GENETIC_PI_HAT_3, GE...

## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#knitr::kable(glimpse(ACS))

#race_ethnicity
#1 = White; 2 = Black; 3 = Hispanic; 4 = Asian; 5 = Other

# guardian-report relationship
# Relationship of the participant in his or her family
# 0 = single; 1 = sibling; 2 = twin; 3 = triplet
# ACS %>% count(REL_RELATIONSHIP)

ACSselected <- ACS %>% 
  select(SUBJECTKEY, EVENTNAME, SEX, INTERVIEW_AGE, RACE_ETHNICITY, 
                              REL_FAMILY_ID, ACS_RAKED_PROPENSITY_SCORE) %>%
  mutate(RACE_ETHNICITY = recode_factor(as.factor(RACE_ETHNICITY),
                `1` = "White", `2` = "Black", `3` = "Hispanic", `4` = "Asian", `5` = "Other",
                .default = "White")) %>%
  mutate(SEX = as.factor(SEX)) %>%
  mutate(REL_FAMILY_ID = as.factor(REL_FAMILY_ID))

ACSselected %>%
 filter(EVENTNAME =="baseline_year_1_arm_1") %>%
 skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	7
_______________________
Column type frequency:
character	2
factor	3
numeric	2
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
SUBJECTKEY	0	1	12	16	0	11878	0
EVENTNAME	0	1	21	21	0	1	0

Variable type: factor

skim_variable	n_missing	complete_rate	ordered	n_unique	top_counts
SEX	0	1	FALSE	2	M: 6196, F: 5682
RACE_ETHNICITY	2	1	FALSE	5	Whi: 6182, His: 2411, Bla: 1784, Oth: 1247
REL_FAMILY_ID	0	1	FALSE	9856	373: 5, 749: 4, 11: 3, 400: 3

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
INTERVIEW_AGE	0	1	118.98	7.50	107.00	112.00	119.00	126.00	133.00	▇▆▆▆▆
ACS_RAKED_PROPENSITY_SCORE	0	1	691.33	350.96	161.36	449.42	619.31	821.72	1778.92	▅▇▂▂▁

1.4 site information

###loading site and scanner information
MRIinfo <-tibble::as_tibble(read.csv(paste0(dataFold, "ABCD_MRI01_DATA_TABLE.csv"))) 
Siteinfo <-tibble::as_tibble(read.csv(paste0(dataFold, "ABCD_LT01_DATA_TABLE.csv")))
MRIinfo  %>% count(EVENTNAME,SEX)

## # A tibble: 4 × 3
##   EVENTNAME                SEX       n
##   <chr>                    <chr> <int>
## 1 2_year_follow_up_y_arm_1 F      2617
## 2 2_year_follow_up_y_arm_1 M      3076
## 3 baseline_year_1_arm_1    F      5631
## 4 baseline_year_1_arm_1    M      6161

MriandSite <- left_join(MRIinfo,Siteinfo, by=c('SUBJECTKEY','EVENTNAME')) %>% 
  select(SUBJECTKEY,EVENTNAME,SITE_ID_L, MRI_INFO_DEVICESERIALNUMBER )
#glimpse(MriandSite)

# probably drop site22 with 34 subjects
MriandSite %>%
 filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
 count(SITE_ID_L)

## # A tibble: 22 × 2
##    SITE_ID_L     n
##    <chr>     <int>
##  1 site01      402
##  2 site02      554
##  3 site03      631
##  4 site04      747
##  5 site05      376
##  6 site06      584
##  7 site07      339
##  8 site08      350
##  9 site09      431
## 10 site10      735
## # … with 12 more rows

MriandSite %>%
 filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
 skimr::skim()

Data summary
Name	Piped data
Number of rows	11792
Number of columns	4
_______________________
Column type frequency:
character	4
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	empty	n_unique
SUBJECTKEY	1	12	16	0	11787
EVENTNAME	1	21	21	0	1
SITE_ID_L	1	6	6	0	22
MRI_INFO_DEVICESERIALNUMBER	1	0	12	201	30

1.5 Cognition

1.5.1 Load neuro cognitive measures

NIH_TB <-as_tibble(read.csv(paste0(dataFold,"ABCD_TBSS01_DATA_TABLE.csv"))) 
CashChoice <-as_tibble(read.csv(paste0(dataFold,"CCT01_DATA_TABLE.csv"))) 
LittleMan <-as_tibble(read.csv(paste0(dataFold,"LMTP201_DATA_TABLE.csv"))) 
Pearson <-as_tibble(read.csv(paste0(dataFold,"ABCD_PS01_DATA_TABLE.csv"))) 
#ABCD Youth Delay Discounting Scores    only used in the follow up data
DelayDis <-as_tibble(read.csv(paste0(dataFold,"ABCD_YDDSS01_DATA_TABLE.csv")))
#ABCD Emotional Stroop Task only used in the follow up data
EmoStroop <-as_tibble(read.csv(paste0(dataFold,"ABCD_YEST01_DATA_TABLE.csv"))) 
#ABCD Game of Dice Task abcd_gdss01 only used in the follow up data
GameOfDice <-as_tibble(read.csv(paste0(dataFold,"ABCD_GDSS01_DATA_TABLE.csv"))) 
#ABCD Social Influence Task abcd_siss01 only used in the follow up data
SocialInfluence <-as_tibble(read.csv(paste0(dataFold,"ABCD_SISS01_DATA_TABLE.csv"))) 

vision_idx <- as_tibble(read.csv(paste0(dataFold,"ABCD_SVS01_DATA_TABLE.CSV"))) %>% 
  mutate(visionProb = ifelse(SNELLEN_VA_Y == 0 | SNELLEN_VA_Y == 1 | VIS_FLG == 2, 1, 0))

#vision_idx %>% select(SNELLEN_VA_Y, VIS_FLG, visionProb) %>%  arrange(SNELLEN_VA_Y)

1.5.2 summary of NIH toolbox cognition

NIH_TB %>% 
  filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
  select(ends_with("_UNCORRECTED")) %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	10
_______________________
Column type frequency:
numeric	10
________________________
Group variables	None

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
NIHTBX_PICVOCAB_UNCORRECTED	149	0.99	84.46	8.12	29	79	84	90	119	▁▁▇▇▁
NIHTBX_FLANKER_UNCORRECTED	155	0.99	94.00	9.14	51	89	95	100	116	▁▁▃▇▂
NIHTBX_LIST_UNCORRECTED	198	0.98	96.65	12.09	36	90	97	105	136	▁▁▅▇▁
NIHTBX_CARDSORT_UNCORRECTED	154	0.99	92.52	9.51	50	88	93	99	120	▁▁▆▇▁
NIHTBX_PATTERN_UNCORRECTED	173	0.99	88.06	14.58	30	80	88	99	140	▁▃▇▅▁
NIHTBX_PICTURE_UNCORRECTED	161	0.99	102.81	12.07	76	94	102	111	136	▃▇▇▅▁
NIHTBX_READING_UNCORRECTED	163	0.99	90.86	6.91	59	87	91	95	119	▁▁▇▂▁
NIHTBX_FLUIDCOMP_UNCORRECTED	237	0.98	91.55	10.66	44	85	92	99	131	▁▂▇▅▁
NIHTBX_CRYST_UNCORRECTED	181	0.98	86.36	7.07	51	82	86	91	115	▁▁▇▃▁
NIHTBX_TOTALCOMP_UNCORRECTED	241	0.98	86.22	9.14	44	81	87	92	117	▁▂▇▇▁

1.5.3 distribution of other cognitive tasks

ABCD Cash Choice Task - Impulsivity, delayed gratification; this single-item task asked the child “Let’s pretend a kind person wanted to give you some money. Would you rather have $75 in three days or $115 in 3 months?”. The child indicates one of these two options or a third “can’t decide” option. See Wulfert, E., Block, J.A., Santa Ana, E., Rodriguez, M.L., & Colsman, M., 2002; Anokhin, A.P., Golosheykin, S., Grant, J.D. & Heath, A.C., 2011. Administered in Baseline assessment only.

ABCD Little Man Task - Visuospatial processing flexibility, attention; participants view pictures of a figure (little man) presented in different orientations and holding a suitcase and must use mental rotation skills to assess which hand (left or right) is holding the suitcase. Accuracy and latency scores are provided for each trial. For details, see Acker, W., & Acker, W., 1982; Nixon, S. J., Prather, R. A., & Lewis, B., 2014.

ABCD Pearson Scores - Rey Auditory Verbal Learning Test – Verbal learning and memory; the task is administered according to standard instructions using a 15-item word list; there are five learning trials (Trials I-V), a distractor trial (List B), measures of immediate recall (Trial VI) and 30-minute delayed recall (Trial VII); for all trials, the total correct is recorded together with the number of perseverations and intrusions. Details can be found in Strauss, E., Sherman, E.M.S., & Spreen, O., 2006; Lezak, M.D., Howieson, D.B., Bigler, E.D., & Tranel, D., 2012. Matrix Reasoning Task – Measures fluid intelligence, visuospatial reasoning; the task is from the Wechsler Intelligence Scale for Children-V and administered using Pearson Clinical Assessment-s Q-interactive platform. Total raw scores, scaled scores (ranging from 0-19; mean = 10, SD = 3) and scores for each item are available. See Wechsler, D., 2014; Daniel, M.H., Wahlstrom, D. & Zhang, O., 2014. Administered in Baseline assessment only.

CashChoice %>% 
  filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
  select(CASH_CHOICE_TASK) %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	1
_______________________
Column type frequency:
numeric	1
________________________
Group variables	None

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
CASH_CHOICE_TASK	20	1	1.62	0.52	1	1	2	2	3	▆▁▇▁▁

LittleMan %>% 
  filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
  select(-1:-7) %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	52
_______________________
Column type frequency:
character	2
logical	2
numeric	48
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
EVENTNAME	0	1	21	21	0	1	0
STUDY_COHORT_NAME	0	1	21	21	0	1	0

Variable type: logical

skim_variable	n_missing	complete_rate	mean	count
LMT_RUN	11878	0	NaN	:
LMT_SCR_EFFICIENCY	11878	0	NaN	:

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
LMT_SCR_PERC_CORRECT	337	0.97	0.59	0.17	0.00	0.47	0.56	0.72	1.00	▁▂▇▅▂
LMT_SCR_PERC_WRONG	337	0.97	0.31	0.17	0.00	0.16	0.31	0.44	0.84	▅▆▇▂▁
LMT_SCR_NUM_CORRECT	337	0.97	18.84	5.45	0.00	15.00	18.00	23.00	32.00	▁▂▇▅▂
LMT_SCR_NUM_WRONG	337	0.97	9.81	5.30	0.00	5.00	10.00	14.00	27.00	▅▆▇▂▁
LMT_SCR_NUM_TIMED_OUT	337	0.97	3.34	2.72	0.00	1.00	3.00	5.00	32.00	▇▁▁▁▁
LMT_SCR_AVG_RT	342	0.97	2658.38	474.80	1063.88	2355.68	2700.25	2989.48	4450.19	▁▃▇▂▁
LMT_SCR_RT_CORRECT	343	0.97	2661.99	470.65	1091.88	2357.26	2697.14	2982.15	4695.50	▁▅▇▂▁
LMT_SCR_RT_WRONG	653	0.95	2688.16	625.58	824.00	2233.53	2681.00	3100.91	4990.00	▁▆▇▂▁
LMT_SCR_CORRECT_RT_STIMTYPE1	1270	0.89	2727.44	739.10	789.00	2200.00	2671.00	3192.00	17963.00	▇▁▁▁▁
LMT_SCR_CORRECT_RT_STIMTYPE2	1235	0.90	2796.90	768.13	550.00	2243.50	2765.00	3309.00	5001.00	▁▅▇▅▁
LMT_SCR_CORRECT_RT_STIMTYPE3	3050	0.74	2874.75	661.98	526.00	2444.00	2845.00	3287.00	5006.00	▁▂▇▃▁
LMT_SCR_CORRECT_RT_STIMTYPE4	2900	0.76	2722.26	679.02	591.00	2248.00	2674.00	3147.00	6100.00	▁▇▆▁▁
LMT_SCR_CORRECT_RT_STIMTYPE5	3674	0.69	3050.74	760.48	347.00	2534.00	3052.50	3572.00	4999.00	▁▂▇▇▂
LMT_SCR_CORRECT_RT_STIMTYPE6	3590	0.70	2957.41	779.92	313.00	2429.00	2951.00	3480.25	5006.00	▁▃▇▆▂
LMT_SCR_CORRECT_RT_STIMTYPE7	717	0.94	2353.55	581.18	798.00	1944.00	2270.00	2672.00	4982.00	▁▇▅▁▁
LMT_SCR_CORRECT_RT_STIMTYPE8	733	0.94	2576.56	633.26	649.00	2138.00	2513.00	2962.00	5000.00	▁▇▇▂▁
LMT_SCR_CORRECT_NUM_STIMTYPE1	338	0.97	2.46	1.27	0.00	1.00	3.00	4.00	4.00	▂▅▆▇▇
LMT_SCR_CORRECT_NUM_STIMTYPE2	338	0.97	2.46	1.27	0.00	1.00	3.00	4.00	4.00	▂▅▆▇▇
LMT_SCR_CORRECT_NUM_STIMTYPE3	338	0.97	2.17	1.51	0.00	1.00	2.00	4.00	4.00	▇▃▅▇▇
LMT_SCR_CORRECT_NUM_STIMTYPE4	338	0.97	2.24	1.51	0.00	1.00	3.00	4.00	4.00	▆▃▅▇▇
LMT_SCR_CORRECT_NUM_STIMTYPE5	338	0.97	1.59	1.33	0.00	0.00	2.00	3.00	4.00	▇▆▆▅▃
LMT_SCR_CORRECT_NUM_STIMTYPE6	338	0.97	1.56	1.30	0.00	0.00	1.00	3.00	4.00	▇▆▆▅▂
LMT_SCR_CORRECT_NUM_STIMTYPE7	338	0.97	3.23	1.09	0.00	3.00	4.00	4.00	4.00	▁▁▂▃▇
LMT_SCR_CORRECT_NUM_STIMTYPE8	338	0.97	3.13	1.11	0.00	3.00	4.00	4.00	4.00	▁▁▂▃▇
LMT_SCR_WRONG_RT_STIMTYPE1	5529	0.53	2766.92	843.40	43.00	2149.00	2678.00	3311.00	5003.00	▁▃▇▅▂
LMT_SCR_WRONG_RT_STIMTYPE2	5260	0.56	2974.29	824.25	108.00	2384.25	2924.00	3540.00	5006.00	▁▂▇▆▂
LMT_SCR_WRONG_RT_STIMTYPE3	4761	0.60	2445.67	799.05	510.00	1851.00	2317.00	2892.00	5000.00	▁▇▆▂▁
LMT_SCR_WRONG_RT_STIMTYPE4	4799	0.60	2431.67	789.55	314.00	1844.00	2301.00	2896.50	5002.00	▁▇▇▃▁
LMT_SCR_WRONG_RT_STIMTYPE5	2787	0.77	2699.10	843.32	523.00	2048.00	2616.00	3272.50	5003.00	▁▇▇▅▂
LMT_SCR_WRONG_RT_STIMTYPE6	2671	0.78	2626.09	794.87	681.00	2019.00	2524.00	3132.00	5003.00	▁▇▇▃▁
LMT_SCR_WRONG_RT_STIMTYPE7	8283	0.30	2548.21	830.29	82.00	1966.00	2470.00	3056.50	5003.00	▁▅▇▃▁
LMT_SCR_WRONG_RT_STIMTYPE8	7965	0.33	2727.80	865.44	175.00	2107.00	2675.00	3306.00	5005.00	▁▅▇▅▂
LMT_SCR_WRONG_NUM_STIMTYPE1	338	0.97	0.97	1.08	0.00	0.00	1.00	2.00	4.00	▇▅▃▂▁
LMT_SCR_WRONG_NUM_STIMTYPE2	338	0.97	1.05	1.13	0.00	0.00	1.00	2.00	4.00	▇▅▃▂▁
LMT_SCR_WRONG_NUM_STIMTYPE3	338	0.97	1.46	1.50	0.00	0.00	1.00	3.00	4.00	▇▅▂▂▃
LMT_SCR_WRONG_NUM_STIMTYPE4	338	0.97	1.45	1.50	0.00	0.00	1.00	3.00	4.00	▇▅▂▂▃
LMT_SCR_WRONG_NUM_STIMTYPE5	338	0.97	1.89	1.40	0.00	1.00	2.00	3.00	4.00	▇▇▇▆▆
LMT_SCR_WRONG_NUM_STIMTYPE6	338	0.97	1.89	1.37	0.00	1.00	2.00	3.00	4.00	▇▇▇▆▆
LMT_SCR_WRONG_NUM_STIMTYPE7	338	0.97	0.54	0.95	0.00	0.00	0.00	1.00	4.00	▇▂▁▁▁
LMT_SCR_WRONG_NUM_STIMTYPE8	338	0.97	0.56	0.93	0.00	0.00	0.00	1.00	4.00	▇▂▁▁▁
LMT_SCR_TOUT_NUM_STIMTYPE1	338	0.97	0.56	0.73	0.00	0.00	0.00	1.00	4.00	▇▅▁▁▁
LMT_SCR_TOUT_NUM_STIMTYPE2	338	0.97	0.49	0.72	0.00	0.00	0.00	1.00	4.00	▇▃▁▁▁
LMT_SCR_TOUT_NUM_STIMTYPE3	338	0.97	0.37	0.63	0.00	0.00	0.00	1.00	4.00	▇▃▁▁▁
LMT_SCR_TOUT_NUM_STIMTYPE4	338	0.97	0.31	0.58	0.00	0.00	0.00	1.00	4.00	▇▂▁▁▁
LMT_SCR_TOUT_NUM_STIMTYPE5	338	0.97	0.52	0.74	0.00	0.00	0.00	1.00	4.00	▇▃▁▁▁
LMT_SCR_TOUT_NUM_STIMTYPE6	338	0.97	0.54	0.73	0.00	0.00	0.00	1.00	4.00	▇▅▁▁▁
LMT_SCR_TOUT_NUM_STIMTYPE7	338	0.97	0.23	0.50	0.00	0.00	0.00	0.00	4.00	▇▂▁▁▁
LMT_SCR_TOUT_NUM_STIMTYPE8	338	0.97	0.31	0.58	0.00	0.00	0.00	1.00	4.00	▇▂▁▁▁

Pearson %>% 
  filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
  select(-1:-11) %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	61
_______________________
Column type frequency:
character	1
numeric	60
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
STUDY_COHORT_NAME	0	1	21	21	0	1	0

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
PEA_RAVLT_SD_TRIAL_I_TC	234	0.98	5.05	1.79	0	4	5	6	14	▁▇▅▁▁
PEA_RAVLT_SD_TRIAL_I_TR	143	0.99	0.36	0.91	0	0	0	0	15	▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_I_TI	143	0.99	0.57	1.02	0	0	0	1	22	▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_II_TC	199	0.98	7.85	2.27	0	6	8	9	15	▁▅▇▃▁
PEA_RAVLT_SD_TRIAL_II_TR	145	0.99	0.69	1.32	0	0	0	1	31	▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_II_TI	145	0.99	0.31	0.77	0	0	0	0	28	▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_III_TC	193	0.98	9.50	2.65	0	8	10	11	15	▁▂▇▇▂
PEA_RAVLT_SD_TRIAL_III_TR	145	0.99	1.16	1.90	0	0	1	2	32	▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_III_TI	145	0.99	0.26	0.68	0	0	0	0	14	▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_IV_TC	192	0.98	10.54	2.68	0	9	11	13	15	▁▁▅▇▅
PEA_RAVLT_SD_TRIAL_IV_TR	147	0.99	1.41	2.26	0	0	1	2	41	▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_IV_TI	147	0.99	0.24	0.63	0	0	0	0	9	▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_V_TC	188	0.98	11.20	2.62	0	10	12	13	15	▁▁▃▇▇
PEA_RAVLT_SD_TRIAL_V_TR	147	0.99	1.55	2.63	0	0	1	2	96	▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_V_TI	147	0.99	0.23	0.66	0	0	0	0	21	▇▁▁▁▁
PEA_RAVLT_SD_LISTB_TC	210	0.98	4.85	1.69	0	4	5	6	13	▁▇▃▁▁
PEA_RAVLT_SD_LISTB_TR	147	0.99	0.24	0.68	0	0	0	0	13	▇▁▁▁▁
PEA_RAVLT_SD_LISTB_TI	147	0.99	0.46	1.02	0	0	0	1	16	▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_VI_TC	210	0.98	9.66	3.05	0	8	10	12	15	▁▂▆▇▃
PEA_RAVLT_SD_TRIAL_VI_TR	147	0.99	1.01	1.89	0	0	0	1	35	▇▁▁▁▁
PEA_RAVLT_SD_TRIAL_VI_TI	147	0.99	0.40	0.92	0	0	0	1	27	▇▁▁▁▁
PEA_RAVLT_LD_TRIAL_VII_TC	264	0.98	9.18	3.20	0	7	9	12	15	▁▃▇▇▃
PEA_RAVLT_LD_TRIAL_VII_TR	171	0.99	0.83	1.79	0	0	0	1	45	▇▁▁▁▁
PEA_RAVLT_LD_TRIAL_VII_TI	171	0.99	0.49	0.97	0	0	0	1	17	▇▁▁▁▁
PEA_WISCV_TRS	243	0.98	17.91	3.84	0	15	18	20	32	▁▁▇▅▁
PEA_WISCV_TSS	249	0.98	9.86	2.99	1	8	10	12	19	▁▆▇▅▁
PEA_WISCV_ITEM_A_RS	219	0.98	0.98	0.12	0	1	1	1	1	▁▁▁▁▇
PEA_WISCV_ITEM_B_RS	209	0.98	0.98	0.14	0	1	1	1	1	▁▁▁▁▇
PEA_WISCV_ITEM_1_RS	11625	0.02	0.92	0.26	0	1	1	1	1	▁▁▁▁▇
PEA_WISCV_ITEM_2_RS	11585	0.02	0.95	0.21	0	1	1	1	1	▁▁▁▁▇
PEA_WISCV_ITEM_3_RS	11380	0.04	0.90	0.30	0	1	1	1	1	▁▁▁▁▇
PEA_WISCV_ITEM_4_RS	10670	0.10	0.94	0.24	0	1	1	1	1	▁▁▁▁▇
PEA_WISCV_ITEM_5_RS	203	0.98	0.98	0.15	0	1	1	1	1	▁▁▁▁▇
PEA_WISCV_ITEM_6_RS	219	0.98	0.93	0.25	0	1	1	1	1	▁▁▁▁▇
PEA_WISCV_ITEM_7_RS	221	0.98	0.96	0.19	0	1	1	1	1	▁▁▁▁▇
PEA_WISCV_ITEM_8_RS	226	0.98	0.97	0.16	0	1	1	1	1	▁▁▁▁▇
PEA_WISCV_ITEM_9_RS	234	0.98	0.95	0.22	0	1	1	1	1	▁▁▁▁▇
PEA_WISCV_ITEM_10_RS	250	0.98	0.97	0.17	0	1	1	1	1	▁▁▁▁▇
PEA_WISCV_ITEM_11_RS	267	0.98	0.94	0.23	0	1	1	1	1	▁▁▁▁▇
PEA_WISCV_ITEM_12_RS	306	0.97	0.93	0.25	0	1	1	1	1	▁▁▁▁▇
PEA_WISCV_ITEM_13_RS	323	0.97	0.92	0.28	0	1	1	1	1	▁▁▁▁▇
PEA_WISCV_ITEM_14_RS	367	0.97	0.80	0.40	0	1	1	1	1	▂▁▁▁▇
PEA_WISCV_ITEM_15_RS	470	0.96	0.74	0.44	0	0	1	1	1	▃▁▁▁▇
PEA_WISCV_ITEM_16_RS	758	0.94	0.70	0.46	0	0	1	1	1	▃▁▁▁▇
PEA_WISCV_ITEM_17_RS	1186	0.90	0.58	0.49	0	0	1	1	1	▆▁▁▁▇
PEA_WISCV_ITEM_18_RS	1626	0.86	0.56	0.50	0	0	1	1	1	▆▁▁▁▇
PEA_WISCV_ITEM_19_RS	2147	0.82	0.56	0.50	0	0	1	1	1	▆▁▁▁▇
PEA_WISCV_ITEM_20_RS	2833	0.76	0.53	0.50	0	0	1	1	1	▇▁▁▁▇
PEA_WISCV_ITEM_21_RS	3462	0.71	0.47	0.50	0	0	0	1	1	▇▁▁▁▇
PEA_WISCV_ITEM_22_RS	4189	0.65	0.32	0.46	0	0	0	1	1	▇▁▁▁▃
PEA_WISCV_ITEM_23_RS	5086	0.57	0.43	0.50	0	0	0	1	1	▇▁▁▁▆
PEA_WISCV_ITEM_24_RS	6025	0.49	0.31	0.46	0	0	0	1	1	▇▁▁▁▃
PEA_WISCV_ITEM_25_RS	7052	0.41	0.22	0.41	0	0	0	0	1	▇▁▁▁▂
PEA_WISCV_ITEM_26_RS	7813	0.34	0.30	0.46	0	0	0	1	1	▇▁▁▁▃
PEA_WISCV_ITEM_27_RS	8983	0.24	0.31	0.46	0	0	0	1	1	▇▁▁▁▃
PEA_WISCV_ITEM_28_RS	9694	0.18	0.22	0.41	0	0	0	0	1	▇▁▁▁▂
PEA_WISCV_ITEM_29_RS	10058	0.15	0.28	0.45	0	0	0	1	1	▇▁▁▁▃
PEA_WISCV_ITEM_30_RS	10532	0.11	0.10	0.30	0	0	0	0	1	▇▁▁▁▁
PEA_WISCV_ITEM_31_RS	10969	0.08	0.15	0.36	0	0	0	0	1	▇▁▁▁▂
PEA_WISCV_ITEM_32_RS	11206	0.06	0.06	0.24	0	0	0	0	1	▇▁▁▁▁

1.5.4 sum cognition

sumCog <- plyr::join_all(list(NIH_TB, CashChoice, LittleMan, Pearson, vision_idx), 
               by=c('SUBJECTKEY','EVENTNAME'), type='full') %>%
  select(SUBJECTKEY,EVENTNAME,
         NIHTBX_FLANKER_UNCORRECTED, NIHTBX_CARDSORT_UNCORRECTED, NIHTBX_PATTERN_UNCORRECTED, 
         NIHTBX_PICVOCAB_UNCORRECTED, NIHTBX_READING_UNCORRECTED, NIHTBX_PICTURE_UNCORRECTED,
         PEA_RAVLT_LD_TRIAL_VII_TC, NIHTBX_LIST_UNCORRECTED, LMT_SCR_PERC_CORRECT, PEA_WISCV_TRS,
         NIHTBX_FLUIDCOMP_UNCORRECTED, NIHTBX_CRYST_UNCORRECTED, NIHTBX_TOTALCOMP_UNCORRECTED, visionProb)
  
sumCog %>%
  filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
  select(-1:-2) %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	14
_______________________
Column type frequency:
numeric	14
________________________
Group variables	None

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
NIHTBX_FLANKER_UNCORRECTED	155	0.99	94.00	9.14	51	89.00	95.00	100.00	116	▁▁▃▇▂
NIHTBX_CARDSORT_UNCORRECTED	154	0.99	92.52	9.51	50	88.00	93.00	99.00	120	▁▁▆▇▁
NIHTBX_PATTERN_UNCORRECTED	173	0.99	88.06	14.58	30	80.00	88.00	99.00	140	▁▃▇▅▁
NIHTBX_PICVOCAB_UNCORRECTED	149	0.99	84.46	8.12	29	79.00	84.00	90.00	119	▁▁▇▇▁
NIHTBX_READING_UNCORRECTED	163	0.99	90.86	6.91	59	87.00	91.00	95.00	119	▁▁▇▂▁
NIHTBX_PICTURE_UNCORRECTED	161	0.99	102.81	12.07	76	94.00	102.00	111.00	136	▃▇▇▅▁
PEA_RAVLT_LD_TRIAL_VII_TC	264	0.98	9.18	3.20	0	7.00	9.00	12.00	15	▁▃▇▇▃
NIHTBX_LIST_UNCORRECTED	198	0.98	96.65	12.09	36	90.00	97.00	105.00	136	▁▁▅▇▁
LMT_SCR_PERC_CORRECT	337	0.97	0.59	0.17	0	0.47	0.56	0.72	1	▁▂▇▅▂
PEA_WISCV_TRS	243	0.98	17.91	3.84	0	15.00	18.00	20.00	32	▁▁▇▅▁
NIHTBX_FLUIDCOMP_UNCORRECTED	237	0.98	91.55	10.66	44	85.00	92.00	99.00	131	▁▂▇▅▁
NIHTBX_CRYST_UNCORRECTED	181	0.98	86.36	7.07	51	82.00	86.00	91.00	115	▁▁▇▃▁
NIHTBX_TOTALCOMP_UNCORRECTED	241	0.98	86.22	9.14	44	81.00	87.00	92.00	117	▁▂▇▇▁
visionProb	11085	0.07	0.04	0.19	0	0.00	0.00	0.00	1	▇▁▁▁▁

sumCog %>%
  filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>% 
  select(-1:-2) %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	6571
Number of columns	14
_______________________
Column type frequency:
numeric	14
________________________
Group variables	None

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
NIHTBX_FLANKER_UNCORRECTED	166	0.97	100.26	7.49	54.00	96.00	101.00	105.00	117	▁▁▂▇▅
NIHTBX_CARDSORT_UNCORRECTED	6550	0.00	94.71	8.53	76.00	90.00	93.00	100.00	109	▁▃▇▅▃
NIHTBX_PATTERN_UNCORRECTED	200	0.97	103.53	15.12	35.00	94.00	103.00	113.00	153	▁▂▇▇▁
NIHTBX_PICVOCAB_UNCORRECTED	280	0.96	89.18	8.33	60.00	84.00	89.00	95.00	123	▁▅▇▂▁
NIHTBX_READING_UNCORRECTED	293	0.96	94.87	6.59	67.00	91.00	95.00	98.00	180	▂▇▁▁▁
NIHTBX_PICTURE_UNCORRECTED	183	0.97	109.48	11.98	76.00	101.00	109.00	118.00	133	▁▃▇▇▃
PEA_RAVLT_LD_TRIAL_VII_TC	213	0.97	9.28	2.86	0.00	7.00	9.00	11.00	15	▁▃▇▇▃
NIHTBX_LIST_UNCORRECTED	6544	0.00	96.00	17.87	36.00	90.00	97.00	105.00	128	▁▁▃▇▂
LMT_SCR_PERC_CORRECT	77	0.99	0.74	0.18	0.03	0.59	0.75	0.91	1	▁▁▅▅▇
PEA_WISCV_TRS	6571	0.00	NaN	NA	NA	NA	NA	NA	NA
NIHTBX_FLUIDCOMP_UNCORRECTED	6547	0.00	94.58	13.66	75.00	84.75	91.50	99.00	127	▇▇▃▃▂
NIHTBX_CRYST_UNCORRECTED	192	0.97	91.03	7.09	66.00	86.00	91.00	95.00	125	▁▆▇▁▁
NIHTBX_TOTALCOMP_UNCORRECTED	6547	0.00	89.08	10.28	77.00	82.00	86.50	91.50	114	▇▅▂▂▂
visionProb	6176	0.06	0.06	0.23	0.00	0.00	0.00	0.00	1	▇▁▁▁▁

1.6 mental health

1.6.1 Parent’s report child’s mental health CBCL

CBCL definitions from Michelini et al. Translational Psychiatry (2019)

CBCL <-read_csv(paste0(dataFold,"ABCD_CBCL01_DATA_TABLE.csv")) 
LowFreqComp <- read_csv(paste0(utilFold,"CBCLLowFreqCompositMichelini.csv")) %>% 
  mutate(CBCL_COL = as.factor(CBCL_COL))

LowFreq <- levels(droplevels(subset(LowFreqComp,LowFreq == 1, select = CBCL_COL)$CBCL_COL[]))
Com1_Attack <- levels(droplevels(subset(LowFreqComp,Com1_Attack== 1, select = CBCL_COL)$CBCL_COL[]))
Com2_Destroy <- levels(droplevels(subset(LowFreqComp,Com2_Destroy== 1, select = CBCL_COL)$CBCL_COL[]))
Com3_Disobeys <- levels(droplevels(subset(LowFreqComp,Com3_Disobeys== 1, select = CBCL_COL)$CBCL_COL[]))
Com4_Steals <- levels(droplevels(subset(LowFreqComp,Com4_Steals== 1, select = CBCL_COL)$CBCL_COL[]))
Com5_Peer <- levels(droplevels(subset(LowFreqComp,Com5_Peer== 1, select = CBCL_COL)$CBCL_COL[]))
Com6_Distracted <- levels(droplevels(subset(LowFreqComp,Com6_Distracted== 1, select = CBCL_COL)$CBCL_COL[]))
Com7_Hallucinations <- levels(droplevels(subset(LowFreqComp,Com7_Hallucinations== 1, select = CBCL_COL)$CBCL_COL[]))
Com8_SexPlay <- levels(droplevels(subset(LowFreqComp,Com8_SexPlay== 1, select = CBCL_COL)$CBCL_COL[]))
Com9_Weight <- levels(droplevels(subset(LowFreqComp,Com9_Weight== 1, select = CBCL_COL)$CBCL_COL[]))

# add .01 so that rounding .5 becomes 1 as opposed to 0
CBCLLowFreqDroppedCompAdded <- 
  CBCL %>% 
  select(-one_of(LowFreq)) %>% 
  mutate(Com1_Attack = round(rowMeans(select(.,one_of(Com1_Attack)))+.01)) %>% 
  mutate(Com2_Destroy = round(rowMeans(select(.,one_of(Com2_Destroy)))+.01)) %>% 
  mutate(Com3_Disobeys = round(rowMeans(select(.,one_of(Com3_Disobeys)))+.01)) %>%
  mutate(Com4_Steals = round(rowMeans(select(.,one_of(Com4_Steals)))+.01)) %>%
  mutate(Com5_Peer = round(rowMeans(select(.,one_of(Com5_Peer)))+.01)) %>%
  mutate(Com6_Distracted  = round(rowMeans(select(.,one_of(Com6_Distracted )))+.01)) %>%
  mutate(Com7_Hallucinations = round(rowMeans(select(.,one_of(Com7_Hallucinations)))+.01)) %>%
  mutate(Com8_SexPlay = round(rowMeans(select(.,one_of(Com8_SexPlay)))+.01)) %>%
  mutate(Com9_Weight = round(rowMeans(select(.,one_of(Com9_Weight )))+.01)) 

comNames <-    
c(Com1_Attack, Com2_Destroy, Com3_Disobeys, Com4_Steals, Com5_Peer, Com6_Distracted, Com7_Hallucinations, Com8_SexPlay, Com9_Weight) 

CBCLLowFreqCompDroppedCompAdded <- 
  CBCLLowFreqDroppedCompAdded %>% select(-one_of(comNames))

CBCLLowFreqCompDroppedCompAddedNoNa <- CBCLLowFreqCompDroppedCompAdded %>% select(-TIMEPT) %>% drop_na()
CBCLNamesAll <- CBCLLowFreqCompDroppedCompAddedNoNa %>% 
  select(matches('CBCL_Q|Com')) %>% colnames()

CBCLLowFreqCompDroppedCompAddedNoNaDef <- 
  plyr::join_all(list(data.frame(CBCL_COL = CBCLNamesAll), LowFreqComp), by='CBCL_COL', type='left')

prevFac <-as_tibble(read.csv(paste0(utilFold,"CBCLLowFreqCompDroppedCompAddedNoNaDefMicheliniFac.csv"))) %>% mutate(CBCL_COL = as.factor(CBCL_COL))

dropFCol <- levels(droplevels(subset(prevFac,prevFactor == "drop", select = CBCL_COL)$CBCL_COL[]))
F1Externalizing <- levels(droplevels(subset(prevFac,prevFactor == "F1", select = CBCL_COL)$CBCL_COL[]))
F2Internalizing <- levels(droplevels(subset(prevFac,prevFactor == "F2", select = CBCL_COL)$CBCL_COL[]))
F3NeuroDevelopmental <- levels(droplevels(subset(prevFac,prevFactor == "F3", select = CBCL_COL)$CBCL_COL[]))
F4Somatoform <- levels(droplevels(subset(prevFac,prevFactor == "F4", select = CBCL_COL)$CBCL_COL[]))
F5Detachment <- levels(droplevels(subset(prevFac,prevFactor == "F5", select = CBCL_COL)$CBCL_COL[]))

CBCLMicheliniPruned <- 
  CBCLLowFreqCompDroppedCompAddedNoNa %>% select(-one_of(dropFCol)) %>% filter(EVENTNAME =="baseline_year_1_arm_1") 

CBCLNames <- CBCLMicheliniPruned %>% 
  select(matches('CBCL_Q|Com')) %>% colnames()

CBCLMicheliniPruned.NewNames <- 
  CBCLLowFreqCompDroppedCompAddedNoNa %>% select(-one_of(dropFCol)) %>%
  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  rename_at(vars(all_of(F1Externalizing)), ~ paste0("Ext",1:length(F1Externalizing))) %>%
  rename_at(vars(all_of(F2Internalizing)), ~ paste0("Int",1:length(F2Internalizing))) %>%
  rename_at(vars(all_of(F3NeuroDevelopmental)), ~ paste0("NDe",1:length(F3NeuroDevelopmental))) %>%
  rename_at(vars(all_of(F4Somatoform)), ~ paste0("Som",1:length(F4Somatoform))) %>%
  rename_at(vars(all_of(F5Detachment)), ~ paste0("Det",1:length(F5Detachment))) 

CBCLfaDat.NewNames <- CBCLMicheliniPruned.NewNames %>% 
  select(matches('Ext|Int|NDe|Som|Det')) %>% 
  select(-matches('INTERVIEW'))

1.6.2 Load precomputed CBCL

Social, thought, attention, int and ext?

CBCLPrecomputed <-read_csv(paste0(dataFold,"ABCD_CBCLS01_DATA_TABLE.csv"))

## Warning: One or more parsing issues, see `problems()` for details

#glimpse(CBCLPrecomputed)

CBCLPrecomputedSelected <- CBCLPrecomputed %>% select(SUBJECTKEY,EVENTNAME, 
  CBCLPrecomputed %>% select(ends_with("_R")) %>% colnames()) 

CBCLPrecomputedSelected %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	29684
Number of columns	22
_______________________
Column type frequency:
character	2
numeric	20
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
SUBJECTKEY	0	1	12	16	0	11878	0
EVENTNAME	0	1	21	24	0	3	0

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p25	p50	p75	p100	hist
CBCL_SCR_SYN_ANXDEP_R	71	1	2.48	3.05	0	1	4	26	▇▁▁▁▁
CBCL_SCR_SYN_WITHDEP_R	71	1	1.10	1.78	0	0	2	16	▇▁▁▁▁
CBCL_SCR_SYN_SOMATIC_R	71	1	1.46	1.94	0	1	2	18	▇▁▁▁▁
CBCL_SCR_SYN_SOCIAL_R	71	1	1.51	2.20	0	1	2	19	▇▁▁▁▁
CBCL_SCR_SYN_THOUGHT_R	71	1	1.58	2.18	0	1	2	22	▇▁▁▁▁
CBCL_SCR_SYN_ATTENTION_R	71	1	2.87	3.43	0	2	5	20	▇▂▁▁▁
CBCL_SCR_SYN_RULEBREAK_R	71	1	1.13	1.84	0	0	2	23	▇▁▁▁▁
CBCL_SCR_SYN_AGGRESSIVE_R	71	1	3.10	4.21	0	2	4	36	▇▁▁▁▁
CBCL_SCR_SYN_INTERNAL_R	71	1	5.05	5.56	1	3	7	51	▇▁▁▁▁
CBCL_SCR_SYN_EXTERNAL_R	71	1	4.23	5.71	0	2	6	49	▇▁▁▁▁
CBCL_SCR_SYN_TOTPROB_R	71	1	17.55	17.62	5	12	24	161	▇▁▁▁▁
CBCL_SCR_DSM5_DEPRESS_R	71	1	1.36	2.13	0	1	2	19	▇▁▁▁▁
CBCL_SCR_DSM5_ANXDISORD_R	71	1	2.01	2.42	0	1	3	18	▇▂▁▁▁
CBCL_SCR_DSM5_SOMATICPR_R	71	1	1.07	1.50	0	0	2	13	▇▁▁▁▁
CBCL_SCR_DSM5_ADHD_R	71	1	2.49	2.90	0	1	4	14	▇▃▁▁▁
CBCL_SCR_DSM5_OPPOSIT_R	71	1	1.70	2.00	0	1	3	10	▇▂▁▁▁
CBCL_SCR_DSM5_CONDUCT_R	71	1	1.21	2.29	0	0	2	25	▇▁▁▁▁
CBCL_SCR_07_SCT_R	71	1	0.51	0.99	0	0	1	8	▇▁▁▁▁
CBCL_SCR_07_OCD_R	71	1	1.33	1.81	0	1	2	16	▇▁▁▁▁
CBCL_SCR_07_STRESS_R	71	1	2.87	3.34	0	2	4	28	▇▂▁▁▁

1.6.3 Parent’s report parent’s mental health

Parent Adult Self Report Raw Scores Aseba

Adaptive Functioning and Strengths Scales: Education; Friends; Spouse/Partner; Family; Job; Personal Strengths

Syndrome Scales: Anxious/Depressed; Withdrawn; Somatic Complaints; Thought Problems; Attention Problems; Aggressive Behavior; Rule-breaking Behavior; and Intrusive

DSM-5-oriented Scales: Depressive Problems; Anxiety Problems; Somatic Problems; Avoidant Personality Problems; Attention Deficit/Hyperactivity Problems (Inattention and Hyperactivity/Impulsivity subscales); and Antisocial Personality Problems

Substance Use Scales: Tobacco; Alcohol; Drugs

Internalizing (INT), Attention Problems (ATT), Externalizing (EXT), and Total Problems (TOT) scales.

ASRPrecomputed <-read_csv(paste0(dataFold,"ABCD_ASRS01_DATA_TABLE.csv")) 
#glimpse(ASRPrecomputed)

ASRPrecomputedSelected <- ASRPrecomputed %>% select(SUBJECTKEY,EVENTNAME, 
  ASRPrecomputed %>% select(ends_with("_T")) %>% colnames()) 

ASRPrecomputedSelected %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	18449
Number of columns	22
_______________________
Column type frequency:
character	2
numeric	20
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
SUBJECTKEY	0	1	12	16	0	11878	0
EVENTNAME	0	1	21	24	0	2	0

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
ASR_SCR_PERSTR_T	163	0.99	47.49	9.44	20	42	49	55	60	▁▂▃▇▇
ASR_SCR_ANXDEP_T	163	0.99	53.49	5.74	50	50	51	55	98	▇▁▁▁▁
ASR_SCR_WITHDRAWN_T	163	0.99	52.82	5.03	50	50	51	53	100	▇▁▁▁▁
ASR_SCR_SOMATIC_T	163	0.99	54.81	6.18	50	50	52	57	98	▇▂▁▁▁
ASR_SCR_THOUGHT_T	163	0.99	52.82	4.97	50	50	51	54	95	▇▁▁▁▁
ASR_SCR_ATTENTION_T	163	0.99	53.94	5.89	50	50	51	57	94	▇▂▁▁▁
ASR_SCR_AGGRESSIVE_T	163	0.99	53.32	5.02	50	50	51	54	89	▇▂▁▁▁
ASR_SCR_RULEBREAK_T	163	0.99	52.53	4.68	50	50	50	52	87	▇▁▁▁▁
ASR_SCR_INTRUSIVE_T	163	0.99	51.65	3.40	50	50	50	51	76	▇▁▁▁▁
ASR_SCR_INTERNAL_T	163	0.99	48.23	10.53	30	40	48	55	95	▆▇▃▁▁
ASR_SCR_EXTERNAL_T	163	0.99	45.91	9.57	30	38	46	52	90	▇▇▃▁▁
ASR_SCR_TOTPROB_T	163	0.99	43.02	10.21	25	36	43	50	89	▆▇▃▁▁
ASR_SCR_DEPRESS_T	163	0.99	54.07	6.02	50	50	51	57	100	▇▁▁▁▁
ASR_SCR_ANXDISORD_T	163	0.99	53.46	5.36	50	50	51	54	80	▇▁▁▁▁
ASR_SCR_SOMATICPR_T	163	0.99	54.82	6.51	50	50	51	58	100	▇▂▁▁▁
ASR_SCR_AVOIDANT_T	163	0.99	53.18	5.35	50	50	51	54	90	▇▁▁▁▁
ASR_SCR_ADHD_T	163	0.99	53.24	5.53	50	50	51	53	98	▇▁▁▁▁
ASR_SCR_ANTISOCIAL_T	163	0.99	53.03	4.63	50	50	51	55	84	▇▁▁▁▁
ASR_SCR_INATTENTION_T	163	0.99	54.35	6.50	50	50	51	57	90	▇▂▁▁▁
ASR_SCR_HYPERACTIVE_T	163	0.99	51.98	4.25	50	50	50	51	80	▇▁▁▁▁

1.6.4 ohter mental health surveys from parents

only Parent General Behavior Inventory -Mania is available in the baseline

#ABCD Sum Scores Mental Health Parent
mentalHealthParent <-as_tibble(read.csv(paste0(dataFold,"ABCD_MHP02_DATA_TABLE.csv"))) 

mentalHealthParent  %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
  select(-1:-8) %>% select(-STUDY_COHORT_NAME) %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	76
_______________________
Column type frequency:
logical	8
numeric	68
________________________
Group variables	None

Variable type: logical

skim_variable	n_missing	mean	count
PLE_P_SS_TOTAL_GOOD_NT	11878	NaN	:
PLE_P_SS_TOTAL_BAD_NT	11878	NaN	:
PLE_P_SS_AFFECTED_GOOD_SUM_NT	11878	NaN	:
PLE_P_SS_AFFECTED_GOOD_MEAN_NT	11878	NaN	:
PLE_P_SS_AFFECTED_BAD_SUM_NT	11878	NaN	:
PLE_P_SS_AFFECTED_BAD_MEAN_NT	11878	NaN	:
PLE_P_SS_AFFECTED_MEAN_NT	11878	NaN	:
PLE_P_SS_AFFECT_SUM_NT	11878	NaN	:

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
PGBI_P_SS_SCORE	8	1	1.30	2.77	0	0	0	1	28	▇▁▁▁▁
PGBI_P_SS_SCORE_NM	0	1	0.01	0.22	0	0	0	0	10	▇▁▁▁▁
PGBI_P_SS_SCORE_NT	0	1	10.00	0.00	10	10	10	10	10	▁▁▇▁▁
PLE_P_SS_TOTAL_NUMBER	11878	0	NaN	NA	NA	NA	NA	NA	NA
PLE_P_SS_TOTAL_NUMBER_NM	11878	0	NaN	NA	NA	NA	NA	NA	NA
PLE_P_SS_TOTAL_NUMBER_NT	11878	0	NaN	NA	NA	NA	NA	NA	NA
PLE_P_SS_TOTAL_GOOD	11878	0	NaN	NA	NA	NA	NA	NA	NA
PLE_P_SS_TOTAL_BAD	11878	0	NaN	NA	NA	NA	NA	NA	NA
PLE_P_SS_AFFECTED_GOOD_SUM	11878	0	NaN	NA	NA	NA	NA	NA	NA
PLE_P_SS_AFFECTED_GOOD_MEAN	11878	0	NaN	NA	NA	NA	NA	NA	NA
PLE_P_SS_AFFECTED_BAD_SUM	11878	0	NaN	NA	NA	NA	NA	NA	NA
PLE_P_SS_AFFECTED_BAD_MEAN	11878	0	NaN	NA	NA	NA	NA	NA	NA
PLE_P_SS_AFFECTED_MEAN	11878	0	NaN	NA	NA	NA	NA	NA	NA
PLE_P_SS_AFFECT_SUM	11878	0	NaN	NA	NA	NA	NA	NA	NA
SSRS_P_SS_SUM	11878	0	NaN	NA	NA	NA	NA	NA	NA
SSRS_P_SS_SUM_NM	11878	0	NaN	NA	NA	NA	NA	NA	NA
SSRS_SS_SUM_NT	11878	0	NaN	NA	NA	NA	NA	NA	NA
GISH_P_SS_M_SUM	11878	0	NaN	NA	NA	NA	NA	NA	NA
GISH_P_SS_M_SUM_NM	11878	0	NaN	NA	NA	NA	NA	NA	NA
GISH_P_SS_M_SUM_NT	11878	0	NaN	NA	NA	NA	NA	NA	NA
GISH_P_SS_F_SUM	11878	0	NaN	NA	NA	NA	NA	NA	NA
GISH_P_SS_F_SUM_NM	11878	0	NaN	NA	NA	NA	NA	NA	NA
GISH_P_SS_F_SUM_NT	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_AGGRESSION_NM	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_AGGRESSION_NT	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_ATTENTION	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_ATTENTION_NM	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_ATTENTION_NT	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_DEPRESSIVE_MOOD	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_DEPRESSIVE_MOOD_NM	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_DEPRESSIVE_MOOD_NT	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_EFFORT_CONT_SS	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_EFFORT_CONT_SS_NM	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_EFFORT_CONT_SS_NT	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_FEAR	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_FEAR_NM	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_FEAR_NT	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_FRUSTRATION	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_FRUSTRATION_NM	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_FRUSTRATION_NT	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_INHIBITORY	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_INHIBITORY_NM	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_INHIBITORY_NT	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_ACTIVATION	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_NEG_AFFECT_SS	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_NEG_AFFECT_SS_NM	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_NEG_AFFECT_SS_NT	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_SHYNESS	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_SHYNESS_NM	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_SHYNESS_NT	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_SURGENCY	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_SURGENCY_NM	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_SURGENCY_NT	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_SURGENCY_SS	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_ACTIVATION_NM	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_SURGENCY_SS_NM	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_SURGENCY_SS_NT	11878	0	NaN	NA	NA	NA	NA	NA	NA
GISH_P_SS_F_SUM2	11878	0	NaN	NA	NA	NA	NA	NA	NA
GISH_P_SS_F_SUM2_NM	11878	0	NaN	NA	NA	NA	NA	NA	NA
GISH_P_SS_F_SUM2_NT	11878	0	NaN	NA	NA	NA	NA	NA	NA
GISH_P_SS_M_SUM2	11878	0	NaN	NA	NA	NA	NA	NA	NA
GISH_P_SS_M_SUM2_NM	11878	0	NaN	NA	NA	NA	NA	NA	NA
GISH_P_SS_M_SUM2_NT	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_ACTIVATION_NT	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_AFFILIATION	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_AFFILIATION_NM	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_AFFILIATION_NT	11878	0	NaN	NA	NA	NA	NA	NA	NA
EATQ_P_SS_AGGRESSION	11878	0	NaN	NA	NA	NA	NA	NA	NA

ggplot(mentalHealthParent, aes(x=PGBI_P_SS_SCORE)) + 
  geom_histogram(color="black", fill="white") +
  xlab("Parent General Behavior-Mania") +
  theme_classic(base_size = 30)

## Warning: Removed 73 rows containing non-finite values (stat_bin).

mentalHealthParent %>% count(PGBI_P_SS_SCORE > 0)

## # A tibble: 3 × 2
##   `PGBI_P_SS_SCORE > 0`     n
##   <lgl>                 <int>
## 1 FALSE                 19013
## 2 TRUE                  10598
## 3 NA                       73

maniaParent <- mentalHealthParent %>% 
  rename(mania_parent = PGBI_P_SS_SCORE) %>%
  select(SUBJECTKEY,EVENTNAME, mania_parent)

1.7 Personality

1.7.1 Load BIS/BAS

BISBAS <-as_tibble(read.csv(paste0(dataFold,"ABCD_BISBAS01_DATA_TABLE.csv"))) %>% 
# filter(EVENTNAME =="baseline_year_1_arm_1")   %>%
  mutate(BISAvg=rowMeans(cbind(BISBAS2_Y,BISBAS3_Y,BISBAS4_Y,BISBAS6_Y), na.rm=F)) %>%
  mutate(BASRRAvg=rowMeans(cbind(BISBAS8_Y,BISBAS10_Y,BISBAS11_Y,BISBAS12_Y), na.rm=F)) %>%
  mutate(BASDriveAvg=rowMeans(cbind(BISBAS13_Y,BISBAS14_Y,BISBAS15_Y,BISBAS16_Y), na.rm=F)) %>%
  mutate(BASFunAvg=rowMeans(cbind(BISBAS17_Y,BISBAS18_Y,BISBAS19_Y,BISBAS20_Y), na.rm=F)) %>% 
  mutate(BASAllAvg=rowMeans(cbind(BASRRAvg,BASDriveAvg,BASFunAvg), na.rm=F)) %>%
  select(SUBJECTKEY,EVENTNAME,BISAvg,BASRRAvg,BASDriveAvg,BASFunAvg,BASAllAvg)

1.7.2 histrogram of BIS/BAS

BISBAS %>% filter(EVENTNAME =="baseline_year_1_arm_1")   %>%
ggplot(aes(x=BISAvg), bins = 100) + 
  geom_histogram(color="black", fill="white") +
  xlab("averaged BIS") +
  theme_classic(base_size = 30)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 22 rows containing non-finite values (stat_bin).

BISBAS %>% filter(EVENTNAME =="baseline_year_1_arm_1")   %>%
ggplot(aes(x=BASAllAvg)) + 
  geom_histogram(color="black", fill="white", bins = 100) +
  xlab("averaged BAS") +
  theme_classic(base_size = 30)

## Warning: Removed 23 rows containing non-finite values (stat_bin).

### other mental health surveys from youth look like only ABCD Prodromal Psychosis Scale (PPS) and UPSS (beside BIS/BAS) are avaliable for the baseline.
Though 4595 data from PPS is missing. So we will not use PPS and only focus on UPPS.

#ABCD Sum Scores Mental Health Youth
mentalHealthYouth <-as_tibble(read.csv(paste0(dataFold,"ABCD_MHY02_DATA_TABLE.csv"))) 

mentalHealthYouth %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>% select(-1:-8) %>% select(-STUDY_COHORT_NAME) %>%
 skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	97
_______________________
Column type frequency:
logical	8
numeric	89
________________________
Group variables	None

Variable type: logical

skim_variable	n_missing	mean	count
PLE_Y_SS_TOTAL_GOOD_NT	11878	NaN	:
PLE_Y_SS_TOTAL_BAD_NT	11878	NaN	:
PLE_Y_SS_AFFECT_SUM_NT	11878	NaN	:
PLE_Y_SS_AFFECTED_MEAN_NT	11878	NaN	:
PLE_Y_SS_AFFECTED_GOOD_SUM_NT	11878	NaN	:
PLE_Y_SS_AFFECTED_GOOD_MEAN_NT	11878	NaN	:
PLE_Y_SS_AFFECTED_BAD_SUM_NT	11878	NaN	:
PLE_Y_SS_AFFECTED_BAD_MEAN_NT	11878	NaN	:

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
PLE_Y_SS_TOTAL_NUMBER	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PLE_Y_SS_TOTAL_NUMBER_NM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PLE_Y_SS_TOTAL_NUMBER_NT	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PLE_Y_SS_TOTAL_GOOD	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PLE_Y_SS_TOTAL_BAD	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PLE_Y_SS_AFFECT_SUM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PLE_Y_SS_AFFECTED_GOOD_SUM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PLE_Y_SS_AFFECTED_GOOD_MEAN	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PLE_Y_SS_AFFECTED_BAD_SUM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PLE_Y_SS_AFFECTED_BAD_MEAN	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PPS_Y_SS_NUMBER	11	1.00	2.63	3.56	0	0	1	4	21	▇▂▁▁▁
PPS_Y_SS_NUMBER_NM	0	1.00	0.02	0.68	0	0	0	0	21	▇▁▁▁▁
PPS_Y_SS_NUMBER_NT	0	1.00	21.00	0.00	21	21	21	21	21	▁▁▇▁▁
PPS_Y_SS_BOTHER_SUM	4595	0.61	2.14	2.62	0	0	1	3	19	▇▂▁▁▁
PPS_Y_SS_BOTHER_SUM_NM	0	1.00	18.37	3.56	0	17	20	21	21	▁▁▁▂▇
PPS_Y_SS_BOTHER_SUM_NT	0	1.00	21.00	0.00	21	21	21	21	21	▁▁▇▁▁
PPS_Y_SS_BOTHER_N_1	4595	0.61	2.15	2.07	0	1	2	3	17	▇▂▁▁▁
PPS_Y_SS_BOTHER_N_1_NM	0	1.00	18.37	3.56	0	17	20	21	21	▁▁▁▂▇
PPS_Y_SS_BOTHER_N_1_NT	0	1.00	21.00	0.00	21	21	21	21	21	▁▁▇▁▁
PPS_Y_SS_SEVERITY_SCORE	10	1.00	6.32	10.61	0	0	2	8	104	▇▁▁▁▁
PPS_Y_SS_SEVERITY_SCORE_NM	0	1.00	38.05	5.67	3	36	40	42	42	▁▁▁▁▇
PPS_Y_SS_SEVERITY_SCORE_NT	0	1.00	42.00	0.00	42	42	42	42	42	▁▁▇▁▁
PPS_SS_MEAN_SEVERITY	4596	0.61	2.15	1.10	1	1	2	3	6	▇▃▂▁▁
UPPS_Y_SS_NEGATIVE_URGENCY	23	1.00	8.49	2.65	4	7	8	10	16	▆▇▇▂▁
UPPS_Y_SS_NEGATIVE_URGENCY_NM	0	1.00	0.01	0.17	0	0	0	0	4	▇▁▁▁▁
UPPS_Y_SS_NEGATIVE_URGENCY_NT	0	1.00	4.00	0.00	4	4	4	4	4	▁▁▇▁▁
UPPS_Y_SS_LACK_OF_PLANNING	23	1.00	7.74	2.38	4	6	8	9	16	▆▇▅▁▁
UPPS_Y_SS_LACK_OF_PLANNING_NM	0	1.00	0.01	0.18	0	0	0	0	4	▇▁▁▁▁
UPPS_Y_SS_LACK_OF_PLANNING_NT	0	1.00	4.00	0.00	4	4	4	4	4	▁▁▇▁▁
UPPS_Y_SS_SENSATION_SEEKING	23	1.00	9.77	2.68	4	8	10	12	16	▂▅▇▃▂
UPPS_Y_SS_SENSATION_SEEKING_NM	0	1.00	0.01	0.18	0	0	0	0	4	▇▁▁▁▁
UPPS_Y_SS_SENSATION_SEEKING_NT	0	1.00	4.00	0.00	4	4	4	4	4	▁▁▇▁▁
UPPS_Y_SS_POSITIVE_URGENCY	23	1.00	7.99	2.96	4	6	8	10	16	▇▆▆▂▁
UPPS_Y_SS_POSITIVE_URGENCY_NM	0	1.00	0.01	0.18	0	0	0	0	4	▇▁▁▁▁
UPPS_Y_SS_POSITIVE_URGENCY_NT	0	1.00	4.00	0.00	4	4	4	4	4	▁▁▇▁▁
UPPS_Y_SS_LACK_OF_PERSEVERANCE	23	1.00	7.04	2.25	4	5	7	8	16	▇▆▃▁▁
UPPS_Y_SS_LACK_OF_PERS_NM	0	1.00	0.01	0.18	0	0	0	0	4	▇▁▁▁▁
UPPS_Y_SS_LACK_OF_PERS_NT	0	1.00	4.00	0.00	4	4	4	4	4	▁▁▇▁▁
BIS_Y_SS_BIS_SUM	22	1.00	9.51	3.75	0	7	9	12	21	▂▇▇▃▁
BIS_Y_SS_BIS_SUM_NM	22	1.00	0.00	0.00	0	0	0	0	0	▁▁▇▁▁
BIS_Y_SS_BIS_SUM_NT	22	1.00	7.00	0.00	7	7	7	7	7	▁▁▇▁▁
BIS_Y_SS_BAS_RR	23	1.00	11.00	2.92	0	9	11	13	15	▁▂▅▇▇
BIS_Y_SS_BAS_RR_NM	23	1.00	0.00	0.00	0	0	0	0	0	▁▁▇▁▁
BIS_Y_SS_BAS_RR_NT	23	1.00	5.00	0.00	5	5	5	5	5	▁▁▇▁▁
BIS_Y_SS_BAS_DRIVE	23	1.00	4.14	3.06	0	2	4	6	12	▇▆▅▂▂
BIS_Y_SS_BAS_DRIVE_NM	23	1.00	0.00	0.00	0	0	0	0	0	▁▁▇▁▁
BIS_Y_SS_BAS_DRIVE_NT	23	1.00	4.00	0.00	4	4	4	4	4	▁▁▇▁▁
BIS_Y_SS_BAS_FS	23	1.00	5.71	2.64	0	4	6	7	12	▂▅▇▃▂
BIS_Y_SS_BAS_FS_NM	23	1.00	0.00	0.00	0	0	0	0	0	▁▁▇▁▁
BIS_Y_SS_BAS_FS_NT	23	1.00	4.00	0.00	4	4	4	4	4	▁▁▇▁▁
BIS_Y_SS_BISM_SUM	22	1.00	5.53	2.84	0	3	5	8	12	▃▅▇▃▂
BIS_Y_SS_BISM_SUM_NM	22	1.00	0.00	0.00	0	0	0	0	0	▁▁▇▁▁
BIS_Y_SS_BISM_SUM_NT	22	1.00	4.00	0.00	4	4	4	4	4	▁▁▇▁▁
BIS_Y_SS_BASM_RR	23	1.00	8.82	2.39	0	7	9	11	12	▁▁▅▆▇
BIS_Y_SS_BASM_RR_NM	23	1.00	0.00	0.00	0	0	0	0	0	▁▁▇▁▁
BIS_Y_SS_BASM_RR_NT	23	1.00	4.00	0.00	4	4	4	4	4	▁▁▇▁▁
BIS_Y_SS_BASM_DRIVE	23	1.00	4.14	3.06	0	2	4	6	12	▇▆▅▂▂
BIS_Y_SS_BASM_DRIVE_NM	0	1.00	0.01	0.18	0	0	0	0	4	▇▁▁▁▁
BIS_Y_SS_BASM_DRIVE_NT	0	1.00	4.00	0.00	4	4	4	4	4	▁▁▇▁▁
DELQ_Y_SS_SUM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
DELQ_Y_SS_SUM_NM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
DELQ_Y_SS_SUM_NT	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
SUP_Y_SS_SUM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
SUP_Y_SS_SUM_NM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
SUP_Y_SS_SUM_NT	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
GISH_Y_SS_M_SUM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
GISH_Y_SS_M_SUM_NM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
GISH_Y_SS_M_SUM_NT	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
GISH_Y_SS_F_SUM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
GISH_Y_SS_F_SUM_NM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
GISH_Y_SS_F_SUM_NT	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PEQ_SS_RELATIONAL_AGGS_NM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PEQ_SS_RELATIONAL_AGGS_NT	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PEQ_SS_RELATIONAL_VICTIM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PEQ_SS_RELATIONAL_VICTIM_NM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PEQ_SS_RELATIONAL_VICTIM_NT	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PEQ_SS_REPUTATION_AGGS	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PEQ_SS_REPUTATION_AGGS_NM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PEQ_SS_REPUTATION_AGGS_NT	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PEQ_SS_REPUTATION_VICTIM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PEQ_SS_REPUTATION_VICTIM_NM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PEQ_SS_REPUTATION_VICTIM_NT	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PEQ_SS_OVERT_AGGRESSION	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PEQ_SS_OVERT_AGGRESSION_NM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PEQ_SS_OVERT_AGGRESSION_NT	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PEQ_SS_OVERT_VICTIM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PEQ_SS_OVERT_VICTIM_NM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PEQ_SS_OVERT_VICTIM_NT	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
PEQ_SS_RELATIONAL_AGGS	11878	0.00	NaN	NA	NA	NA	NA	NA	NA

UPPS <- mentalHealthYouth %>% select(SUBJECTKEY,EVENTNAME, 
                             UPPS_Y_SS_NEGATIVE_URGENCY, UPPS_Y_SS_LACK_OF_PLANNING,
                             UPPS_Y_SS_SENSATION_SEEKING, UPPS_Y_SS_POSITIVE_URGENCY, UPPS_Y_SS_LACK_OF_PERSEVERANCE)

1.8 Physical

basically only sleep related are good

not very relevant: ABCD Sum Scores Traumatic Brain Injury abcd_tbi01 ABCD Longitudinal Summary Scores Traumatic Brain Injury abcd_lsstbi01 ABCD Sum Scores Parent Sports and Activities Involvement abcd__spacss01 ABCD Longitudinal Summary Scores Sports Activity abcd_lsssa01 ABCD Sum Scores Parent Medical History abcd_medhxss01 ABCD Longitudinal Summary Scores Medical History abcd_lssmh01 ABCD Sum Scores Developmental History abcd_devhxss01

this is mainly about puberty: ABCD Sum Scores Physical Health Youth abcd_ssphy01

sleep scores: ABCD Parent Sleep Disturbance Scale for Children abcd_sds01 Diet only at one year follow up: ABCD Child Nutrition Assessment abcd_cna01 sum sleep score + diet ABCD Sum Scores Physical Health Parent abcd_ssphp01

#ABCD Parent Sleep Disturbance Scale for Children
#SLEEPDISTURB1_P
#How many hours of sleep does your child get on most nights? ¬øCu√°ntas horas duerme su ni√±o(a) la mayor√≠a de las noches?
#1 = 9-11 hours/ 9 a 11 horas; 2 = 8-9 hours /8 a 9 horas; 3 = 7-8 hours /7 a 8 horas; 4 = 5-7 hours /5 a 7 horas; 5 = Less than 5 hours/ Menos de 5 horas// Consider each question pertaining to the PAST 6 MONTHS of the child's life
#SLEEPDISTURB2_P
#How long after going to bed does your child usually fall asleep? Despu√©s de acostarse ¬øgeneralmente cu√°nto tiempo tarda su ni√±o(a) en quedarse dormido(a)?
#1 = Less than 15 minutes /Menos de 15 minutos; 2 = 15-30 minutes 15 a 30 minutos; 3 = 30-45 minutes /30 a 45 minutos; 4 = 45-60 minutes /45 a 60 minutos; 5 = More than 60 minutes /M√°s de 60 minutos//Consider each question pertaining to the PAST 6 MONTHS of the child's life
sleepDis <-as_tibble(read.csv(paste0(dataFold,"ABCD_SDS01_DATA_TABLE.csv"))) %>%
    distinct(select(.,-ABCD_SDS01_ID, -DATASET_ID),.keep_all = TRUE) #for some reason there is a duplicate based on these two variables

# sleepDis %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
#   distinct(select(.,-ABCD_SDS01_ID, -DATASET_ID),.keep_all = TRUE) %>% 
#   arrange(SUBJECTKEY)

sleepDis %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  select(SLEEPDISTURB1_P,SLEEPDISTURB2_P) %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	2
_______________________
Column type frequency:
numeric	2
________________________
Group variables	None

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
SLEEPDISTURB1_P	5	1	1.72	0.81	1	1	2	2	5	▇▆▂▁▁
SLEEPDISTURB2_P	5	1	1.93	0.98	1	1	2	2	5	▇▇▂▁▁

PhysicalSum <-as_tibble(read.csv(paste0(dataFold,"ABCD_SSPHP01_DATA_TABLE.csv"))) %>% 
 filter(EVENTNAME =="baseline_year_1_arm_1") 

PhysicalSum %>% select(-1:-8) %>% select(-STUDY_COHORT_NAME) %>% 
   skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	36
_______________________
Column type frequency:
numeric	36
________________________
Group variables	None

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
SDS_P_SS_DIMS	5	1.00	11.75	3.75	7	9	11	13	35	▇▃▁▁▁
SDS_P_SS_DIMS_NM	0	1.00	0.00	0.14	0	0	0	0	7	▇▁▁▁▁
SDS_P_SS_DIMS_NT	0	1.00	7.00	0.00	7	7	7	7	7	▁▁▇▁▁
SDS_P_SS_SBD	5	1.00	3.77	1.26	3	3	3	4	15	▇▁▁▁▁
SDS_P_SS_SBD_NM	0	1.00	0.00	0.06	0	0	0	0	3	▇▁▁▁▁
SDS_P_SS_SBD_NT	0	1.00	3.00	0.00	3	3	3	3	3	▁▁▇▁▁
SDS_P_SS_DA	5	1.00	3.44	0.92	3	3	3	4	15	▇▁▁▁▁
SDS_P_SS_DA_NM	0	1.00	0.00	0.06	0	0	0	0	3	▇▁▁▁▁
SDS_P_SS_DA_NT	0	1.00	3.00	0.00	3	3	3	3	3	▁▁▇▁▁
SDS_P_SS_SWTD	32	1.00	8.18	2.63	6	6	7	9	30	▇▁▁▁▁
SDS_P_SS_SWTD_NM	0	1.00	0.00	0.13	0	0	0	0	6	▇▁▁▁▁
SDS_P_SS_SWTD_NT	0	1.00	6.00	0.00	6	6	6	6	6	▁▁▇▁▁
SDS_P_SS_DOES	6	1.00	6.95	2.44	5	5	6	8	25	▇▁▁▁▁
SDS_P_SS_DOES_NM	0	1.00	0.00	0.10	0	0	0	0	5	▇▁▁▁▁
SDS_P_SS_DOES_NT	0	1.00	5.00	0.00	5	5	5	5	5	▁▁▇▁▁
SDS_P_SS_SHY	5	1.00	2.44	1.18	2	2	2	2	10	▇▁▁▁▁
SDS_P_SS_SHY_NM	0	1.00	0.00	0.04	0	0	0	0	2	▇▁▁▁▁
SDS_P_SS_SHY_NT	0	1.00	2.00	0.00	2	2	2	2	2	▁▁▇▁▁
SDS_P_SS_TOTAL	33	1.00	36.53	8.24	26	31	35	40	126	▇▁▁▁▁
SDS_P_SS_TOTAL_NM	0	1.00	0.01	0.54	0	0	0	0	26	▇▁▁▁▁
SDS_P_SS_TOTAL_NT	0	1.00	26.00	0.00	26	26	26	26	26	▁▁▇▁▁
PDS_P_SS_MALE_CATEGORY	5924	0.50	1.37	0.61	1	1	1	2	5	▇▃▁▁▁
PDS_P_SS_MALE_CAT_NM	5714	0.52	0.04	0.26	0	0	0	0	3	▇▁▁▁▁
PDS_P_SS_MALE_CAT_NT	5714	0.52	3.00	0.00	3	3	3	3	3	▁▁▇▁▁
PDS_P_SS_FEMALE_CATEGORY	6384	0.46	2.18	0.91	1	1	2	3	5	▆▅▇▁▁
PDS_P_SS_FEMALE_CAT_NM	6172	0.48	0.05	0.27	0	0	0	0	3	▇▁▁▁▁
PDS_P_SS_FEMALE_CAT_NT	6172	0.48	3.00	0.00	3	3	3	3	3	▁▁▇▁▁
CNA_P_SS_SUM	11878	0.00	NaN	NA	NA	NA	NA	NA	NA
CNA_P_SS_SUM_NM	1166	0.90	14.00	0.00	14	14	14	14	14	▁▁▇▁▁
CNA_P_SS_SUM_NT	1166	0.90	14.00	0.00	14	14	14	14	14	▁▁▇▁▁
PDS_P_SS_FEMALE_CAT_2_NM	6192	0.48	0.05	0.29	0	0	0	0	3	▇▁▁▁▁
PDS_P_SS_FEMALE_CAT_2_NT	6192	0.48	3.00	0.00	3	3	3	3	3	▁▁▇▁▁
PDS_P_SS_FEMALE_CATEGORY_2	6414	0.46	2.18	0.91	1	1	2	3	5	▆▅▇▁▁
PDS_P_SS_MALE_CATEGORY_2	5934	0.50	1.36	0.61	1	1	1	2	5	▇▃▁▁▁
PDS_P_SS_MALE_CAT_2_NM	5676	0.52	0.06	0.32	0	0	0	0	3	▇▁▁▁▁
PDS_P_SS_MALE_CAT_2_NT	5676	0.52	3.00	0.00	3	3	3	3	3	▁▁▇▁▁

# sds_p_ss_dims
# Disorders of Initiating and Maintaining Sleep (DIMS) SUM:  sleepdisturb1_p +  sleepdisturb2_p + sleepdisturb3_p + sleepdisturb4_p + sleepdisturb5_p + sleepdisturb10_p + sleepdisturb11_p;  Validation: All items must be answered
# 
# sds_p_ss_sbd
# Sleep Breathing disorders (SBD):  SUM sleepdisturb13_p +  sleepdisturb14_p + sleepdisturb15_p; Validation: All items must be answered
# 
# sds_p_ss_da
# Disorder of Arousal (DA) SUM: sleepdisturb17_p +  sleepdisturb20_p + sleepdisturb21_p;  Validation: All items must be answered
# 
# sds_p_ss_swtd
# Sleep-Wake transition Disorders (SWTD) SUM: sleepdisturb6_p + sleepdisturb7_p + sleepdisturb8_p + sleepdisturb12_p +  sleepdisturb18_p + sleepdisturb19_p; Validation: All items must be answered
# 
# sds_p_ss_does
# Disorders of Excessive Somnolence (DOES) SUM:  sleepdisturb22_p + sleepdisturb23_p +  sleepdisturb24_p +  sleepdisturb25_p + sleepdisturb26_p; Validation: All items must be answered
# 
# sds_p_ss_shy
# Sleep Hyperhydrosis (SHY) SUM: sleepdisturb9_p + sleepdisturb16_p; Validation: All items must be answered
# 
# sds_p_ss_total
# Total Score (Sum of 6 Factors): sds_p_ss_dims + sds_p_ss_sbd + sds_p_ss_da + sds_p_ss_swtd + sds_p_ss_does + sds_p_ss_shy; Validation: All items must be answered

sleepSum <- sleepDis %>% full_join(PhysicalSum, by = c("SUBJECTKEY", "EVENTNAME")) %>%
  select(SUBJECTKEY, EVENTNAME, SLEEPDISTURB1_P, SLEEPDISTURB2_P,
         SDS_P_SS_DIMS, SDS_P_SS_SBD, SDS_P_SS_DA, SDS_P_SS_SWTD, SDS_P_SS_DOES, SDS_P_SS_SHY, SDS_P_SS_TOTAL) %>%
  rename(sleep_hours = SLEEPDISTURB1_P) %>%
  rename(sleep_disturb = SLEEPDISTURB2_P) %>%
  rename(sleep_initiate_maintain = SDS_P_SS_DIMS) %>%
  rename(sleep_breath = SDS_P_SS_SBD) %>%
  rename(sleep_arousal = SDS_P_SS_DA) %>%
  rename(sleep_transition = SDS_P_SS_SWTD) %>%
  rename(sleep_somnolence = SDS_P_SS_DOES) %>%
  rename(sleep_hyperhydrosis = SDS_P_SS_SHY) %>%
  rename(sleep_total = SDS_P_SS_TOTAL) 

sleepSum %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  select(-SUBJECTKEY,-EVENTNAME) %>%
   skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	9
_______________________
Column type frequency:
numeric	9
________________________
Group variables	None

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
sleep_hours	5	1	1.72	0.81	1	1	2	2	5	▇▆▂▁▁
sleep_disturb	5	1	1.93	0.98	1	1	2	2	5	▇▇▂▁▁
sleep_initiate_maintain	5	1	11.75	3.75	7	9	11	13	35	▇▃▁▁▁
sleep_breath	5	1	3.77	1.26	3	3	3	4	15	▇▁▁▁▁
sleep_arousal	5	1	3.44	0.92	3	3	3	4	15	▇▁▁▁▁
sleep_transition	32	1	8.18	2.63	6	6	7	9	30	▇▁▁▁▁
sleep_somnolence	6	1	6.95	2.44	5	5	6	8	25	▇▁▁▁▁
sleep_hyperhydrosis	5	1	2.44	1.18	2	2	2	2	10	▇▁▁▁▁
sleep_total	33	1	36.53	8.24	26	31	35	40	126	▇▁▁▁▁

1.9 behavioral

1.9.1 screen time

ABCD Youth Screen Time Survey abcd_stq01

Youth Screen Time Survey This measure includes customized questions about the overall amount of time that the youth spends using visual media, on a typical weekday and weekend day. Media activities assessed include: (1) Watching TV shows or movies; (2) Watching videos (such as YouTube); (3) Playing video games on a computer, console, phone or other device; (4) Texting on a cell phone, tablet, or computer; (5) Visiting social networking sites like Facebook, Twitter, Instagram; (6) Video chat. Seven response options were: none, < 30 minutes, 30 minutes, 1 hour, 2 hours, 3 hours, and 4+ hours.

youthScreen <-as_tibble(read.csv(paste0(dataFold,"ABCD_STQ01_DATA_TABLE.csv")))  
# filter(EVENTNAME =="baseline_year_1_arm_1") 

#On a typical weekend/weekday, how many hours do you
#0 = None; .25 = < 30 minutes; 0.5 = 30 minutes; 1 = 1 hour; 2 = 2 hours; 3 = 3 hours; 4 = 4+ hours //Example: 1½ hours would be coded as 1 hour, rather than 2 hours.  
#How often do you play mature-rated video games (e.g., Call of Duty, Grand Theft Auto, Assassin's Creed, etc.)?
#How often do you watch R-rated movies?

youthScreenAdded <- youthScreen %>% 
  mutate(wkdySum_Screen = rowSums(dplyr::select(.,ends_with("WKDY_Y")))) %>% 
  mutate(wkndSum_Screen = rowSums(dplyr::select(.,ends_with("WKND_Y")))) %>%
  rename(matureGames_Screen = SCREEN13_Y) %>%
  rename(matureMovies_Screen = SCREEN14_Y) 

youthScreenSum <- youthScreenAdded %>%
  select(SUBJECTKEY,EVENTNAME, ends_with("_Screen"))

youthScreenSum %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  select(-SUBJECTKEY,-EVENTNAME) %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	4
_______________________
Column type frequency:
numeric	4
________________________
Group variables	None

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p25	p50	p75	p100	hist
matureGames_Screen	20	1	0.57	0.87	0.00	0.0	1.00	3	▇▃▁▁▁
matureMovies_Screen	21	1	0.38	0.64	0.00	0.0	1.00	3	▇▃▁▁▁
wkdySum_Screen	38	1	3.46	3.10	1.25	2.5	4.75	24	▇▂▁▁▁
wkndSum_Screen	43	1	4.62	3.63	2.00	3.5	6.25	24	▇▃▁▁▁

1.9.2 maternal substance use

#ABCD Developmental History Questionnaire
DevHis <-as_tibble(read.csv(paste0(dataFold,"DHX01_DATA_TABLE.csv"))) %>% 
 #filter(VISIT =="baseline_year_1_arm_1")  %>% 
  rename(EVENTNAME = VISIT)

#glimpse(DevHis)


DevHis %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  select(starts_with("DEVHX_8")) %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	83
_______________________
Column type frequency:
character	3
logical	10
numeric	70
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	max	empty	n_unique
DEVHX_8_RXNORM_MED1	1	94	10580	541
DEVHX_8_OTHER1_NAME_OTH	1	36	11856	19
DEVHX_8_OTHER3_NAME_OTH	1	30	11874	5

Variable type: logical

skim_variable	n_missing	mean	count
DEVHX_8_PRESCRIPT_YES	11878	NaN	:
DEVHX_8_OTHER2_NAME_OTH	11878	NaN	:
DEVHX_8_OTHER4_NAME_OTH	11878	NaN	:
DEVHX_8_OTHER4_TIMES	11878	NaN	:
DEVHX_8_OTHER4_AMT	11878	NaN	:
DEVHX_8_OTHER4_UNIT	11878	NaN	:
DEVHX_8_OTHER5_NAME_OTH	11878	NaN	:
DEVHX_8_OTHER5_TIMES	11878	NaN	:
DEVHX_8_OTHER5_AMT	11878	NaN	:
DEVHX_8_OTHER5_UNIT	11878	NaN	:

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
DEVHX_8_PRESCRIPT_MED	5	1.00	73.99	261.41	0	0.00	0.0	0.00	999	▇▁▁▁▁
DEVHX_8_MED1_PRN	11247	0.05	36.69	187.31	0	0.00	0.0	1.00	999	▇▁▁▁▁
DEVHX_8_MED1_TIMES	10808	0.09	1.26	0.75	0	1.00	1.0	1.00	10	▇▁▁▁▁
DEVHX_8_MED1_TIMES_DK	11720	0.01	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_MED1_HOW_MUCH	10971	0.08	82.50	191.78	0	1.00	10.0	88.00	2000	▇▁▁▁▁
DEVHX_8_MED1_HOW_MUCH_DK	11454	0.04	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_MED1_UNIT	11046	0.07	2.69	0.79	1	2.75	3.0	3.00	10	▃▇▁▁▁
DEVHX_8_MED1_FU	11101	0.07	0.25	0.43	0	0.00	0.0	1.00	1	▇▁▁▁▃
DEVHX_8_RXNORM_MED2	11693	0.02	621559.43	539911.02	281	196496.00	372861.0	1151133.00	1806414	▇▁▂▃▁
DEVHX_8_MED2_PRN	11711	0.01	36.19	186.43	0	0.00	0.0	1.00	999	▇▁▁▁▁
DEVHX_8_MED2_TIMES	11732	0.01	1.40	0.77	0	1.00	1.0	2.00	6	▇▂▁▁▁
DEVHX_8_MED2_TIMES_DK	11846	0.00	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_MED2_HOW_MUCH	11784	0.01	87.57	247.66	0	1.00	2.5	50.00	2000	▇▁▁▁▁
DEVHX_8_MED2_HOW_MUCH_DK	11790	0.01	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_MED2_UNIT	11793	0.01	2.74	1.09	1	2.00	3.0	3.00	10	▃▇▁▁▁
DEVHX_8_MED2_FU	11700	0.01	0.26	0.44	0	0.00	0.0	1.00	1	▇▁▁▁▃
DEVHX_8_RXNORM_MED3	11833	0.00	537198.29	483611.33	1191	82728.00	324026.0	1151133.00	1187033	▇▁▁▂▅
DEVHX_8_MED3_PRN	11848	0.00	0.27	0.45	0	0.00	0.0	0.75	1	▇▁▁▁▃
DEVHX_8_MED3_TIMES	11821	0.00	1.30	0.80	0	1.00	1.0	1.00	4	▁▇▂▁▁
DEVHX_8_MED3_TIMES_DK	11868	0.00	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_MED3_HOW_MUCH	11854	0.00	189.33	478.06	0	1.00	1.5	42.50	2000	▇▁▁▁▁
DEVHX_8_MED3_HOW_MUCH_DK	11855	0.00	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_MED3_UNIT	11858	0.00	2.55	0.76	1	2.00	3.0	3.00	3	▂▁▂▁▇
DEVHX_8_TOBACCO	5	1.00	23.11	149.72	0	0.00	0.0	0.00	999	▇▁▁▁▁
DEVHX_8_CIGS_PER_DAY	10454	0.12	8.35	6.44	0	4.00	6.0	10.00	80	▇▁▁▁▁
DEVHX_8_CIGS_PER_DAY_DK	11691	0.02	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_ALCOHOL	5	1.00	57.29	231.76	0	0.00	0.0	1.00	999	▇▁▁▁▁
DEVHX_8_ALCHOHOL_MAX	9336	0.21	2.40	1.50	0	1.00	2.0	3.00	20	▇▁▁▁▁
DEVHX_8_ALCHOHOL_MAX_DK	11538	0.03	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_ALCHOHOL_AVG	9371	0.21	3.97	4.38	0	1.00	3.0	5.00	52	▇▁▁▁▁
DEVHX_8_ALCHOHOL_AVG_DK	11503	0.03	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_ALCOHOL_EFFECTS	9514	0.20	2.08	1.24	0	1.00	2.0	2.00	20	▇▁▁▁▁
DEVHX_8_ALCOHOL_EFFECTS_DK	11360	0.04	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_MARIJUANA	5	1.00	28.24	165.42	0	0.00	0.0	0.00	999	▇▁▁▁▁
DEVHX_8_MARIJUANA_AMT	11389	0.04	2.09	2.49	0	1.00	2.0	3.00	40	▇▁▁▁▁
DEVHX_8_MARIJUANA_AMT_DK	11687	0.02	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_COC_CRACK	5	1.00	23.74	152.13	0	0.00	0.0	0.00	999	▇▁▁▁▁
DEVHX_8_COC_CRACK_AMT	11845	0.00	3.00	3.71	0	1.00	2.0	3.00	20	▇▁▁▁▁
DEVHX_8_COC_CRACK_AMT_DK	11817	0.01	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_HER_MORPH	5	1.00	24.40	154.22	0	0.00	0.0	0.00	999	▇▁▁▁▁
DEVHX_8_HER_MORPH_AMT	11875	0.00	2.67	1.15	2	2.00	2.0	3.00	4	▇▁▁▁▃
DEVHX_8_HER_MORPH_AMT_DK	11857	0.00	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_OXYCONT	5	1.00	25.08	156.28	0	0.00	0.0	0.00	999	▇▁▁▁▁
DEVHX_8_OXYCONT_AMT	11863	0.00	2.13	1.19	0	1.00	2.0	3.00	4	▂▇▇▇▃
DEVHX_8_OXYCONT_AMT_DK	11851	0.00	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_OTHER_DRUGS	5	1.00	32.07	176.07	0	0.00	0.0	0.00	999	▇▁▁▁▁
DEVHX_8_OTHER1_NAME_2	11781	0.01	46.53	198.62	0	1.00	3.0	12.00	999	▇▁▁▁▁
DEVHX_8_OTHER1_TIMES	11831	0.00	1.94	1.98	0	1.00	1.0	2.00	10	▇▂▁▁▁
DEVHX_8_OTHER1_TIMES_DK	11826	0.00	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_OTHER1_AMT	11847	0.00	92.94	286.57	0	1.00	2.0	16.50	1500	▇▁▁▁▁
DEVHX_8_OTHER1_AMT_DK	11810	0.01	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_OTHER1_UNIT	11848	0.00	3.20	2.11	1	1.25	3.0	3.00	7	▆▇▁▁▃
DEVHX_8_OTHER2_NAME_2	11799	0.01	13.77	112.32	0	0.00	0.0	0.00	999	▇▁▁▁▁
DEVHX_8_OTHER2_TIMES	11873	0.00	1.40	0.89	1	1.00	1.0	1.00	3	▇▁▁▁▂
DEVHX_8_OTHER2_TIMES_DK	11871	0.00	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_OTHER2_AMT	11876	0.00	6.00	2.83	4	5.00	6.0	7.00	8	▇▁▁▁▇
DEVHX_8_OTHER2_AMT_DK	11868	0.00	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_OTHER2_UNIT	11876	0.00	5.00	2.83	3	4.00	5.0	6.00	7	▇▁▁▁▇
DEVHX_8_OTHER3_NAME_2	11870	0.00	3.00	5.26	0	0.00	0.0	3.50	12	▇▁▁▁▂
DEVHX_8_OTHER3_TIMES	11876	0.00	2.00	1.41	1	1.50	2.0	2.50	3	▇▁▁▁▇
DEVHX_8_OTHER3_TIMES_DK	11877	0.00	999.00	NA	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_OTHER3_AMT	11877	0.00	10.00	NA	10	10.00	10.0	10.00	10	▁▁▇▁▁
DEVHX_8_OTHER3_AMT_DK	11876	0.00	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_OTHER3_UNIT	11877	0.00	3.00	NA	3	3.00	3.0	3.00	3	▁▁▇▁▁
DEVHX_8_OTHER4_NAME_2	11875	0.00	0.33	0.58	0	0.00	0.0	0.50	1	▇▁▁▁▃
DEVHX_8_OTHER4_TIMES_DK	11877	0.00	999.00	NA	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_OTHER4_AMT_DK	11877	0.00	999.00	NA	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_OTHER5_NAME_2	11877	0.00	1.00	NA	1	1.00	1.0	1.00	1	▁▁▇▁▁
DEVHX_8_OTHER5_TIMES_DK	11877	0.00	999.00	NA	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEVHX_8_OTHER5_AMT_DK	11877	0.00	999.00	NA	999	999.00	999.0	999.00	999	▁▁▇▁▁

#devhx_8_tobacco
#Before knowing of pregnancy. Tobacco? How many times per day?/ ?Cuantas veces al d?a?

#devhx_9_tobacco
#Knowing of pregnancy. Tobacco? How many times per day?/ ?Cuantas veces al d?a?

#devhx_8_alcohol
#Before knowing of pregnancy. Alcohol? /?Alcohol?

#devhx_9_alcohol
#Knowing of pregnancy. Alcohol? /?Alcohol?

#devhx_8_marijuana
#Before knowing of pregnancy. Marijuana? /?Marihuana?

#devhx_9_marijuana
#Knowing of pregnancy. Marijuana? /?Marihuana?

# change name and replace 999 with na

momSubstanceUse <- DevHis %>% 
  mutate(tobacco_before_preg = as.factor(DEVHX_8_TOBACCO)) %>% 
  mutate(tobacco_after_preg = as.factor(DEVHX_9_TOBACCO)) %>% 
  mutate(alcohol_before_preg = as.factor(DEVHX_8_ALCOHOL)) %>% 
  mutate(alcohol_after_preg = as.factor(DEVHX_9_ALCOHOL)) %>% 
  mutate(marijuana_before_preg = as.factor(DEVHX_8_MARIJUANA)) %>% 
  mutate(marijuana_after_preg = as.factor(DEVHX_9_MARIJUANA)) %>%
  mutate(across(ends_with("_preg"), ~na_if(., 999))) %>% 
  select(SUBJECTKEY,EVENTNAME, ends_with("_preg")) %>%
  droplevels()

momSubstanceUse %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  select(-SUBJECTKEY,-EVENTNAME) %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	6
_______________________
Column type frequency:
factor	6
________________________
Group variables	None

Variable type: factor

skim_variable	n_missing	complete_rate	ordered	n_unique	top_counts
tobacco_before_preg	278	0.98	FALSE	2	0: 9989, 1: 1611
tobacco_after_preg	265	0.98	FALSE	2	0: 10993, 1: 620
alcohol_before_preg	683	0.94	FALSE	2	0: 8313, 1: 2882
alcohol_after_preg	293	0.98	FALSE	2	0: 11270, 1: 315
marijuana_before_preg	340	0.97	FALSE	2	0: 10851, 1: 687
marijuana_after_preg	277	0.98	FALSE	2	0: 11356, 1: 245

1.9.3 developmental adversity

DevHis %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  select(starts_with(c("BIRTH_WEIGHT","DEVHX_10","DEVHX_12","DEVHX_13","DEVHX_14"))) %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	27
_______________________
Column type frequency:
logical	2
numeric	25
________________________
Group variables	None

Variable type: logical

skim_variable	n_missing	complete_rate	mean	count
BIRTH_WEIGHT_LBS	11878	0	NaN	:
BIRTH_WEIGHT_OZ	11878	0	NaN	:

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
DEVHX_10	9	1.00	39.30	191.84	-1	1	1	1	999	▇▁▁▁▁
DEVHX_10A3_P	4	1.00	32.53	176.93	0	0	0	0	999	▇▁▁▁▁
DEVHX_10B3_P	4	1.00	27.22	162.51	0	0	0	0	999	▇▁▁▁▁
DEVHX_10C3_P	4	1.00	32.47	176.94	0	0	0	0	999	▇▁▁▁▁
DEVHX_10D3_P	4	1.00	27.94	164.70	0	0	0	0	999	▇▁▁▁▁
DEVHX_10E3_P	4	1.00	34.16	181.55	0	0	0	0	999	▇▁▁▁▁
DEVHX_10F3_P	4	1.00	26.25	159.80	0	0	0	0	999	▇▁▁▁▁
DEVHX_10G3_P	4	1.00	31.51	174.48	0	0	0	0	999	▇▁▁▁▁
DEVHX_10H3_P	4	1.00	39.36	194.18	0	0	0	0	999	▇▁▁▁▁
DEVHX_10I3_P	4	1.00	30.86	172.66	0	0	0	0	999	▇▁▁▁▁
DEVHX_10J3_P	4	1.00	34.00	180.88	0	0	0	0	999	▇▁▁▁▁
DEVHX_10K3_P	4	1.00	30.07	170.60	0	0	0	0	999	▇▁▁▁▁
DEVHX_10L3_P	4	1.00	26.69	161.04	0	0	0	0	999	▇▁▁▁▁
DEVHX_10M3_P	4	1.00	29.02	167.55	0	0	0	0	999	▇▁▁▁▁
DEVHX_12A_P	5	1.00	11.97	107.82	0	0	0	0	999	▇▁▁▁▁
DEVHX_12_P	9670	0.19	17.10	109.30	1	3	4	6	999	▇▁▁▁▁
DEVHX_13_3_P	5	1.00	12.58	109.69	0	0	0	1	999	▇▁▁▁▁
DEVHX_14A3_P	5	1.00	26.70	161.04	0	0	0	0	999	▇▁▁▁▁
DEVHX_14B3_P	5	1.00	28.55	166.38	0	0	0	0	999	▇▁▁▁▁
DEVHX_14C3_P	5	1.00	25.04	156.01	0	0	0	0	999	▇▁▁▁▁
DEVHX_14D3_P	5	1.00	18.43	134.43	0	0	0	0	999	▇▁▁▁▁
DEVHX_14E3_P	5	1.00	24.22	153.15	0	0	0	0	999	▇▁▁▁▁
DEVHX_14F3_P	5	1.00	25.08	156.01	0	0	0	0	999	▇▁▁▁▁
DEVHX_14G3_P	5	1.00	17.42	130.76	0	0	0	0	999	▇▁▁▁▁
DEVHX_14H3_P	6	1.00	34.36	181.99	0	0	0	0	999	▇▁▁▁▁

#devhx_12a_p
#Was the child born prematurely? /?Naci? el ni?o o la ni?a antes de tiempo?
adversitySum <- DevHis %>% 
  mutate(across(starts_with("DEVHX_"), ~na_if(., 999))) %>%
  mutate(deveplopment_prematurity = as.factor(DEVHX_12A_P)) %>%
  mutate(deveplopment_birth_complications = rowSums(dplyr::select(.,starts_with("DEVHX_14")))) %>%
  #mutate(deveplopment_birth_kg = BIRTH_WEIGHT_LBS*0.453592) %>% #all na???
  mutate(deveplopment_pregnancy_complications = rowSums(select(.,DEVHX_10A3_P:DEVHX_10L3_P))) %>%
  select(SUBJECTKEY,EVENTNAME, starts_with("deveplopment_")) 

adversitySum %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  select(-SUBJECTKEY,-EVENTNAME) %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	3
_______________________
Column type frequency:
factor	1
numeric	2
________________________
Group variables	None

Variable type: factor

skim_variable	n_missing	complete_rate	ordered	n_unique	top_counts
deveplopment_prematurity	145	0.99	FALSE	2	0: 9525, 1: 2208

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
deveplopment_birth_complications	760	0.94	0.37	0.75	0	0	0	1	8	▇▁▁▁▁
deveplopment_pregnancy_complications	744	0.94	0.61	1.02	0	0	0	1	12	▇▁▁▁▁

1.9.4 brain truma

most events are quite rare.

brainTruma <- as_tibble(read.csv(paste0(dataFold,"ABCD_OTBI01_DATA_TABLE.csv"))) 

brainTruma %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  select(-1:-8) %>%
 skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	40
_______________________
Column type frequency:
character	1
logical	3
numeric	36
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
STUDY_COHORT_NAME	0	1	21	21	0	1	0

Variable type: logical

skim_variable	n_missing	mean	count
TBI_8I	11878	NaN	:
TBI_8K	11878	NaN	:
TBI_8L	11878	NaN	:

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
TBI_SELECT_LANGUAGE___1	0	1.00	0.05	0.23	0.0	0.00	0.0	0.00	1	▇▁▁▁▁
TBI_1	4	1.00	0.12	0.33	0.0	0.00	0.0	0.00	1	▇▁▁▁▁
TBI_1B	10436	0.12	0.07	0.28	0.0	0.00	0.0	0.00	3	▇▁▁▁▁
TBI_1C	10438	0.12	0.17	0.37	0.0	0.00	0.0	0.00	1	▇▁▁▁▂
TBI_1D	10437	0.12	4.98	2.63	0.0	3.00	5.0	7.00	11	▇▇▇▇▃
TBI_2	4	1.00	0.02	0.12	0.0	0.00	0.0	0.00	1	▇▁▁▁▁
TBI_2B	11690	0.02	0.04	0.19	0.0	0.00	0.0	0.00	1	▇▁▁▁▁
TBI_2C	11690	0.02	0.12	0.33	0.0	0.00	0.0	0.00	1	▇▁▁▁▁
TBI_2D	11690	0.02	6.61	2.18	0.0	5.00	7.0	8.00	10	▁▂▅▇▃
TBI_3	4	1.00	0.12	0.33	0.0	0.00	0.0	0.00	1	▇▁▁▁▁
TBI_3B	10398	0.12	0.04	0.21	0.0	0.00	0.0	0.00	3	▇▁▁▁▁
TBI_3C	10399	0.12	0.16	0.37	0.0	0.00	0.0	0.00	1	▇▁▁▁▂
TBI_3D	10400	0.12	5.86	2.70	0.0	4.00	6.0	8.00	11	▅▅▆▇▅
TBI_4	4	1.00	0.00	0.07	0.0	0.00	0.0	0.00	1	▇▁▁▁▁
TBI_4B	11819	0.00	0.02	0.13	0.0	0.00	0.0	0.00	1	▇▁▁▁▁
TBI_4C	11819	0.00	0.17	0.38	0.0	0.00	0.0	0.00	1	▇▁▁▁▂
TBI_4D	11819	0.00	6.53	2.44	0.0	5.00	7.0	8.00	10	▂▂▅▇▅
TBI_5	4	1.00	0.00	0.03	0.0	0.00	0.0	0.00	1	▇▁▁▁▁
TBI_5B	11868	0.00	0.00	0.00	0.0	0.00	0.0	0.00	0	▁▁▇▁▁
TBI_5C	11868	0.00	0.00	0.00	0.0	0.00	0.0	0.00	0	▁▁▇▁▁
TBI_5D	11868	0.00	5.10	2.91	1.5	2.25	5.5	7.75	9	▇▁▃▂▆
TBI_6O	4	1.00	0.00	0.03	0.0	0.00	0.0	0.00	1	▇▁▁▁▁
TBI_6P	11869	0.00	1.67	1.12	1.0	1.00	1.0	2.00	4	▇▁▁▁▁
TBI_6Q	11869	0.00	1.78	1.20	0.0	1.00	2.0	2.00	4	▂▇▇▂▂
TBI_6R	11869	0.00	0.00	0.00	0.0	0.00	0.0	0.00	0	▁▁▇▁▁
TBI_6S	11869	0.00	4.56	3.71	1.0	1.00	3.0	8.00	10	▇▂▂▂▃
TBI_7A	4	1.00	0.01	0.09	0.0	0.00	0.0	0.00	1	▇▁▁▁▁
TBI_7C1	11771	0.01	0.07	0.37	0.0	0.00	0.0	0.00	3	▇▁▁▁▁
TBL_7C2	11775	0.01	0.08	0.27	0.0	0.00	0.0	0.00	1	▇▁▁▁▁
TBI_7E	11771	0.01	4.50	3.63	0.0	1.00	5.0	8.00	10	▇▁▂▅▃
TBI_7F	11771	0.01	5.62	4.11	0.0	0.50	8.0	9.00	10	▆▂▁▂▇
TBI_7G	11771	0.01	0.02	0.14	0.0	0.00	0.0	0.00	1	▇▁▁▁▁
TBI_7I	11876	0.00	1.00	0.00	1.0	1.00	1.0	1.00	1	▁▁▇▁▁
TBI_7K	11876	0.00	4.00	5.66	0.0	2.00	4.0	6.00	8	▇▁▁▁▇
TBI_7L	11876	0.00	4.00	5.66	0.0	2.00	4.0	6.00	8	▇▁▁▁▇
TBI_8G	11876	0.00	0.00	0.00	0.0	0.00	0.0	0.00	0	▁▁▇▁▁

1.10 culture

1.10.1 bilingual

https://www.nature.com/articles/s41562-019-0609-3 https://github.com/anthonystevendick/bilingual_abcd/blob/master/bilingual_analysis.r accult_q1_y How well do you speak English?
1 = Poor; 2 = Fair; 3 = Good; 4 = Excellent accult_q2_y Besides English, do you speak or understand another language or dialect? If child asks about languages learned in school, the RA should state: That’s OK, as long as it is a language or dialect that you speak or understand.
1 = Poor Mal; 2 = Fair Regular; 3 = Good Bien; 4 = Excellent Excelente; 777 = Refused Niego contestar; 999 = Don’t Know No se
accult_q4_y What language do you speak with most of your friends?
1 = (Other language) all the time; 2 = (Other language) most of the time; 3 = (Other language) and English equally; 4 = English most of the time; 5 = English all the time
accult_q5_y What language do you speak with most of your family? 1 = (Other language) all the time; 2 = (Other language) most of the time; 3 = (Other language) and English equally; 4 = English most of the time; 5 = English all the time

bilingual <-as_tibble(read.csv(paste0(dataFold,"YACC01_DATA_TABLE.csv"))) 
#%>% filter(EVENTNAME =="baseline_year_1_arm_1")

#bilingual_status
# #recode the accult_q2_y variable into a binary "Bilingual Status", 0 = not bilingual; 1 = bilingual
# 
# bilingual_status <- biLingual$ACCULT_Q2_Y
# sum(is.na(bilingual_status))

#bilingual_degree
# #dimension a 'bilingual degree' variable, where 1 = participant said they were bilingual, and they speak the other language with friends all the time, most of the time,
# #or equally, OR they speak the other language with family all the time, most of the time, or equally.
# 
# bilingual_degree <- ifelse(bilingual_status == 0, 0, ifelse(bilingual_status == 1 & (as.numeric(accult_q4_y) <= 3 | as.numeric(accult_q5_y) <= 3), 1, NA))
# count(bilingual_degree) #check the data
# sum(is.na(bilingual_degree))
#### here I change it such that 0 = non-bilingual, 1 = bilingual who use (Other language) < English, 2 = bilingual who use (Other language) >= English 

#bilingual_use
# 
# #dimension a continuous 'bilingual use' variable, and reverse-score so that if participants speak the other language with friends all the time, most of the time...,
# #they will receive high scores on this measure (range 0-8, with 8 indicating a high-degree of other language use)
# 
# bilingual_use<-10-(as.numeric(abcd_subset$accult_q4_y)+as.numeric(abcd_subset$accult_q5_y))
# sum(is.na(bilingual_use))
####  here I change it such that 0 = non-bilingual, 1 = bilingual who use (Other language) < English, 2 = bilingual who use (Other language) >= English  


bilingualAdded <- bilingual %>% 
  mutate(bilingual_status = factor(ifelse(ACCULT_Q2_Y==777,NA,ACCULT_Q2_Y))) %>%
  mutate(bilingual_degree = factor(ifelse(bilingual_status == 0, 0, 
                                    ifelse(bilingual_status == 1 & (as.numeric(ACCULT_Q4_Y) <= 3 | as.numeric(ACCULT_Q5_Y) <= 3), 1,
                                           ifelse(bilingual_status == 1 & (as.numeric(ACCULT_Q4_Y) > 3 | as.numeric(ACCULT_Q5_Y) > 3), 2,NA))))) %>%
      mutate(bilingual_use = ifelse(bilingual_status == 0, 0,
                                  11-(as.numeric(ACCULT_Q4_Y)+as.numeric(ACCULT_Q5_Y))))

bilingualSum <- bilingualAdded %>%
  select(SUBJECTKEY,EVENTNAME, starts_with("bilingual_"))

bilingualSum %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  select(-SUBJECTKEY,-EVENTNAME) %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	10920
Number of columns	3
_______________________
Column type frequency:
factor	2
numeric	1
________________________
Group variables	None

Variable type: factor

skim_variable	n_missing	complete_rate	ordered	n_unique	top_counts
bilingual_status	74	0.99	FALSE	2	0: 6756, 1: 4090
bilingual_degree	74	0.99	FALSE	3	0: 6756, 2: 2544, 1: 1546

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
bilingual_use	74	0.99	1.02	1.69	0	0	0	1	9	▇▂▁▁▁

1.11 social demographics

race/ethnicity is from ACS sex is from ACS family income family type household size parents’ work status # demo_prnt_empl_v2 [a bit too much to include] parents’ education sumEcon_insecurities

1.11.1 ABCD Parent Demographics Survey

demograp <-as_tibble(read.csv(paste0(dataFold,"PDEM02_DATA_TABLE.csv"))) 

demograp %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  select(-1:-8) %>%
   skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	124
_______________________
Column type frequency:
character	2
logical	2
numeric	120
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
EVENTNAME	0	1	21	21	0	1	0
STUDY_COHORT_NAME	0	1	21	21	0	1	0

Variable type: logical

skim_variable	n_missing	complete_rate	mean	count
DEMO_RACE_NOTES_V2	11878	0	NaN	:
DEMO_RELIG2_V2	11878	0	NaN	:

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
DEMO_PRIM	0	1.00	1.23	0.67	1	1.00	1.0	1.00	5	▇▁▁▁▁
DEMO_BRTHDAT_V2	11	1.00	9.48	0.51	8	9.00	9.0	10.00	11	▁▇▁▇▁
DEMO_ED_V2	2	1.00	4.21	0.79	0	4.00	4.0	5.00	12	▁▇▅▁▁
DEMO_ADOPT_AGEX_V2	11603	0.02	2.17	2.43	0	0.00	1.0	3.00	10	▇▂▁▁▁
DEMO_ADOPT_AGEX_V2_BL_DK	11874	0.00	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEMO_SEX_V2	0	1.00	1.48	0.50	1	1.00	1.0	2.00	3	▇▁▇▁▁
DEMO_GENDER_ID_V2	2	1.00	2.20	26.22	1	1.00	1.0	2.00	999	▇▁▁▁▁
DEMO_RACE_A_P___10	0	1.00	0.74	0.44	0	0.00	1.0	1.00	1	▃▁▁▁▇
DEMO_RACE_A_P___11	0	1.00	0.21	0.41	0	0.00	0.0	0.00	1	▇▁▁▁▂
DEMO_RACE_A_P___12	0	1.00	0.03	0.18	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_RACE_A_P___13	0	1.00	0.00	0.02	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_RACE_A_P___14	0	1.00	0.00	0.04	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_RACE_A_P___15	0	1.00	0.00	0.01	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_RACE_A_P___16	0	1.00	0.00	0.03	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_RACE_A_P___17	0	1.00	0.00	0.06	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_RACE_A_P___18	0	1.00	0.01	0.10	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_RACE_A_P___19	0	1.00	0.02	0.13	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_RACE_A_P___20	0	1.00	0.01	0.12	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_RACE_A_P___21	0	1.00	0.01	0.08	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_RACE_A_P___22	0	1.00	0.01	0.09	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_RACE_A_P___23	0	1.00	0.01	0.07	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_RACE_A_P___24	0	1.00	0.01	0.09	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_RACE_A_P___25	0	1.00	0.07	0.25	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_RACE_A_P___77	0	1.00	0.00	0.07	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_RACE_A_P___99	0	1.00	0.01	0.09	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_ETHN_V2	10	1.00	12.93	101.49	1	2.00	2.0	2.00	999	▇▁▁▁▁
DEMO_ETHN2_V2	9479	0.20	34.04	124.33	10	13.00	14.0	20.00	999	▇▁▁▁▁
DEMO_ORIGIN_V2	2	1.00	187.28	31.07	1	189.00	189.0	189.00	999	▇▁▁▁▁
DEMO_YEARS_US_V2	8016	0.33	9.47	0.94	0	9.00	10.0	10.00	19	▁▁▇▁▁
DEMO_YEARS_US_V2_DK	11859	0.00	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEMO_RELIG_V2	4	1.00	52.61	197.16	1	4.00	11.0	17.00	999	▇▁▁▁▁
DEMO_PRNT_AGE_V2	91	0.99	39.96	6.84	23	35.00	40.0	44.00	80	▃▇▂▁▁
DEMO_PRNT_AGE_V2_BL_REFUSE	11791	0.01	863.76	108.95	777	777.00	777.0	999.00	999	▇▁▁▁▅
DEMO_PRNT_GENDER_ID_V2	2	1.00	2.24	16.91	1	2.00	2.0	2.00	999	▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___10	0	1.00	0.73	0.44	0	0.00	1.0	1.00	1	▃▁▁▁▇
DEMO_PRNT_RACE_A_V2___11	0	1.00	0.17	0.38	0	0.00	0.0	0.00	1	▇▁▁▁▂
DEMO_PRNT_RACE_A_V2___12	0	1.00	0.03	0.16	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___13	0	1.00	0.00	0.01	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___14	0	1.00	0.00	0.04	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___15	0	1.00	0.00	0.01	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___16	0	1.00	0.00	0.02	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___17	0	1.00	0.00	0.05	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___18	0	1.00	0.01	0.08	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___19	0	1.00	0.01	0.11	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___20	0	1.00	0.01	0.10	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___21	0	1.00	0.01	0.07	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___22	0	1.00	0.01	0.07	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___23	0	1.00	0.00	0.05	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___24	0	1.00	0.01	0.07	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___25	0	1.00	0.06	0.24	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___77	0	1.00	0.01	0.08	0	0.00	0.0	0.00	1	▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___99	0	1.00	0.01	0.11	0	0.00	0.0	0.00	1	▇▁▁▁▁
NAAS_ID	11472	0.03	100.79	297.31	1	2.00	2.0	4.00	999	▇▁▁▁▁
NAAS_MOM_ID	11472	0.03	155.13	358.70	1	2.00	4.0	5.00	999	▇▁▁▁▂
NAAS_ID_DAD	11472	0.03	268.13	440.53	1	2.00	5.0	999.00	999	▇▁▁▁▃
NAAS_BIRTHPLACE	11472	0.03	249.36	429.07	1	5.00	5.0	5.00	999	▇▁▁▁▂
NAAS_RAISED	11472	0.03	227.40	415.23	1	5.00	5.0	5.00	999	▇▁▁▁▂
NAAS_COMM_CONTACT	11472	0.03	209.88	403.54	1	4.00	5.0	5.00	999	▇▁▁▁▂
NAAS_PRIDE	11472	0.03	115.17	316.32	1	1.00	2.0	3.00	999	▇▁▁▁▁
NAAS_SELF_RATING	11472	0.03	59.91	230.42	1	3.00	4.0	4.00	999	▇▁▁▁▁
NAAS_TRADITIONS	11472	0.03	53.31	215.53	1	4.00	5.0	5.00	999	▇▁▁▁▁
DEMO_PRNT_ETHN_V2	2	1.00	7.28	71.20	1	2.00	2.0	2.00	999	▇▁▁▁▁
DEMO_PRNT_ETHN2_V2	9879	0.17	21.38	62.54	10	13.00	14.0	20.00	999	▇▁▁▁▁
DEMO_PRNT_16	2	1.00	0.32	0.47	0	0.00	0.0	1.00	1	▇▁▁▁▃
DEMO_PRNT_16A	3817	0.68	0.99	0.11	0	1.00	1.0	1.00	1	▁▁▁▁▇
DEMO_PRNT_ORIGIN_V2	7965	0.33	142.07	62.37	1	111.00	185.0	189.00	999	▇▁▁▁▁
DEMO_BIOFATHER_V2	7965	0.33	149.84	102.20	1	111.00	186.0	189.00	999	▇▁▁▁▁
DEMO_BIOMOTHER_V2	7965	0.33	149.56	80.95	1	111.00	189.0	189.00	999	▇▁▁▁▁
DEMO_MATGRANDM_V2	7965	0.33	137.89	103.11	1	82.00	125.0	189.00	999	▇▁▁▁▁
DEMO_MATGRANDF_V2	7965	0.33	141.08	116.07	1	82.00	125.0	189.00	999	▇▁▁▁▁
DEMO_PATGRANDM_V2	7965	0.33	149.74	144.75	1	81.00	125.0	189.00	999	▇▁▁▁▁
DEMO_PATGRANDF_V2	7965	0.33	154.67	158.47	1	82.00	125.0	189.00	999	▇▁▁▁▁
DEMO_PRNT_YEARS_US_V2	1948	0.84	35.90	10.77	1	31.00	38.0	43.00	100	▂▇▅▁▁
DEMO_PRNT_YEARS_US_V2_DK	11747	0.01	999.00	0.00	999	999.00	999.0	999.00	999	▁▁▇▁▁
DEMO_PRNT_MARITAL_V2	2	1.00	8.23	68.69	1	1.00	1.0	3.00	777	▇▁▁▁▁
DEMO_PRNT_ED_V2	0	1.00	17.68	28.88	1	15.00	18.0	19.00	777	▇▁▁▁▁
DEMO_PRNT_EMPL_V2	2	1.00	5.97	52.17	1	1.00	1.0	5.00	777	▇▁▁▁▁
DEMO_PRNT_EMPL_TIME	3693	0.69	1.27	0.44	1	1.00	1.0	2.00	2	▇▁▁▁▃
DEMO_PRNT_INCOME_V2	2	1.00	84.94	253.45	1	3.00	6.0	8.00	999	▇▁▁▁▁
DEMO_PRNT_PRTNR_V2	2	1.00	9.23	78.55	1	1.00	1.0	1.00	777	▇▁▁▁▁
DEMO_PRNT_PRTNR_BIO	2398	0.80	2.53	32.49	1	1.00	1.0	1.00	999	▇▁▁▁▁
DEMO_PRNT_PRTNR_ADOPT	10096	0.15	0.20	0.40	0	0.00	0.0	0.00	1	▇▁▁▁▂
DEMO_PRTNR_ED_V2	2401	0.80	22.91	78.49	0	15.00	18.0	18.00	999	▇▁▁▁▁
DEMO_PRTNR_EMPL_V2	2454	0.79	11.16	90.79	1	1.00	1.0	1.00	999	▇▁▁▁▁
DEMO_PRTNR_EMPL_TIME	3665	0.69	1.06	0.23	1	1.00	1.0	1.00	2	▇▁▁▁▁
DEMO_PRTNR_INCOME_V2	2397	0.80	95.58	269.81	1	6.00	7.0	9.00	999	▇▁▁▁▁
DEMO_COMB_INCOME_V2	2	1.00	82.50	248.26	1	6.00	8.0	9.00	999	▇▁▁▁▁
DEMO_FAM_EXP1_V2	2	1.00	4.92	61.14	0	0.00	0.0	0.00	777	▇▁▁▁▁
DEMO_FAM_EXP2_V2	2	1.00	2.94	47.21	0	0.00	0.0	0.00	777	▇▁▁▁▁
DEMO_FAM_EXP3_V2	2	1.00	3.97	54.63	0	0.00	0.0	0.00	777	▇▁▁▁▁
DEMO_FAM_EXP4_V2	3	1.00	1.98	39.01	0	0.00	0.0	0.00	777	▇▁▁▁▁
DEMO_FAM_EXP5_V2	3	1.00	2.74	45.58	0	0.00	0.0	0.00	777	▇▁▁▁▁
DEMO_FAM_EXP6_V2	2	1.00	2.54	43.88	0	0.00	0.0	0.00	777	▇▁▁▁▁
DEMO_FAM_EXP7_V2	2	1.00	3.18	48.78	0	0.00	0.0	0.00	777	▇▁▁▁▁
DEMO_ROSTER_V2	279	0.98	4.72	1.77	0	4.00	4.0	5.00	77	▇▁▁▁▁
DEMO_ROSTER_V2_REFUSE	11600	0.02	850.47	104.65	777	777.00	777.0	999.00	999	▇▁▁▁▃
FAM_ROSTER_2C_V2	575	0.95	2.39	2.82	1	1.00	1.0	3.00	15	▇▁▁▁▁
FAM_ROSTER_3C_V2	1045	0.91	3.54	1.96	1	3.00	3.0	3.00	15	▇▁▁▁▁
FAM_ROSTER_4C_V2	2493	0.79	3.50	1.87	1	3.00	3.0	3.00	15	▇▁▁▁▁
FAM_ROSTER_5C_V2	6272	0.47	4.02	2.69	1	3.00	3.0	3.00	15	▇▁▁▁▁
FAM_ROSTER_6C_V2	9110	0.23	4.56	3.22	1	3.00	3.0	4.00	15	▇▁▁▁▁
FAM_ROSTER_7C_V2	10718	0.10	5.10	3.64	1	3.00	3.0	6.00	15	▇▂▁▁▁
FAM_ROSTER_8C_V2	11378	0.04	5.23	3.67	1	3.00	3.0	7.00	15	▇▂▂▁▁
FAM_ROSTER_9C_V2	11647	0.02	5.44	3.73	2	3.00	4.0	7.00	15	▇▁▁▁▁
FAM_ROSTER_10C_V2	11750	0.01	5.00	3.48	1	3.00	3.5	4.25	14	▇▅▁▁▂
FAM_ROSTER_11C_V2	11827	0.00	4.98	3.08	3	3.00	4.0	5.00	14	▇▁▁▁▁
FAM_ROSTER_12C_V2	11847	0.00	5.03	3.19	3	3.00	4.0	5.00	14	▇▁▁▁▁
FAM_ROSTER_13C_V2	11864	0.00	5.50	3.65	3	3.25	4.0	5.50	14	▇▁▁▁▂
FAM_ROSTER_14C_V2	11867	0.00	6.18	3.87	3	4.00	4.0	7.00	14	▇▁▁▁▂
FAM_ROSTER_15C_V2	11870	0.00	8.12	5.28	1	4.00	7.0	13.25	15	▂▇▂▁▇
DEMO_CHILD_TIME_V2	3	1.00	5.41	63.95	0	0.00	0.0	0.00	777	▇▁▁▁▁
DEMO_CHILD_TIME2_V2	10632	0.10	52.67	38.70	0	24.00	48.0	84.00	168	▇▆▆▁▁
DEMO_CHILD_TIME2_V2_DK	11797	0.01	960.63	84.46	777	999.00	999.0	999.00	999	▂▁▁▁▇
DEMO_CHILD_TIME3_V2	10576	0.11	2.29	1.99	1	1.00	1.0	4.00	8	▇▁▃▁▁
DEMO_YRS_1	3	1.00	20.84	129.14	0	0.00	1.0	3.00	999	▇▁▁▁▁
DEMO_YRS_2	5	1.00	35.91	173.28	1	2.00	3.0	4.00	999	▇▁▁▁▁
DEMO_YRS_2A_2	5430	0.54	85.46	278.99	0	0.00	0.0	1.00	999	▇▁▁▁▁
DEMO_YRS_2B_2	5432	0.54	128.37	333.80	0	0.00	0.0	1.00	999	▇▁▁▁▁
DEMO_YRS_2_NO_DISPLAY___1	0	1.00	0.46	0.50	0	0.00	0.0	1.00	1	▇▁▁▁▇
DEMO_RACE_A_P___0	0	1.00	0.00	0.01	0	0.00	0.0	0.00	1	▇▁▁▁▁

#demo_prnt_marital_v2 
#marital 
#Are you now married, widowed, divorced, separated, never married or living with a partner? ¬øUsted actualmente est√° casado(a), viudo(a), divorciado(a), separado(a), nunca se ha casado(a) o vive con una pareja?
#1 = Married Casado(a) ; 2 = Widowed Viudo(a) ; 3 = Divorced Divorciado(a) ; 4 = Separated Separado(a) ; 5 = Never married Nunca me he casado ; 6 = Living with partner Vivo con una pareja ; 777 = Refused to answer Prefiero no responder

#demo_prnt_ed_v2
#What is the highest grade or level of school you have completed or the highest degree you have received? ¬øCu√°l es su m√°ximo nivel de estudios completados o el m√°ximo t√≠tulo que ha recibido?
#0 = Never attended/Kindergarten only Nunca asist√É¬≠/Kinder solamente ; 1 = 1st grade 1.er grado ; 2 = 2nd grade 2.√Ç¬∫ grado ; 3 = 3rd grade 3.er grado ; 4 = 4th grade 4.√Ç¬∫ grado ; 5 = 5th grade 5.√Ç¬∫ grado ; 6 = 6th grade 6.√Ç¬∫ grado ; 7 = 7th grade 7.√Ç¬∫ grado ; 8 = 8th grade 8.√Ç¬∫ grado ; 9 = 9th grade 9.√Ç¬∫ grado ; 10 = 10th grade 10.√Ç¬∫ grado ; 11 = 11th grade 11.√Ç¬∫ grado ; 12 = 12th grade; 13 = High school graduate Preparatoria terminada ; 14 = GED or equivalent Diploma General de Equivalencia (GED) o equivalente ; 15 = Some college; 16 = Associate degree: Occupational; 17 = Associate degree: Academic Program T√É¬≠tulo de asociado: programa acad√É¬©mico ; 18 = Bachelor's degree (ex. BA; 19 = Master's degree (ex. MA; 20 = Professional School degree (ex. MD; 21 = Doctoral degree (ex. PhD; 777 = Refused to answer Prefiero no responder // The following questions are about your partner. Your "partner" refers to any significant figure in your life that helps you in raising your child or has helped you for more than 2 years. This person should be involved 40% or more of the daily activities your child does. For example, your partner could be your spouse. However, your partner could also be your boyfriend/girlfriend or relative.

#demo_comb_income_v2
#What is your TOTAL COMBINED FAMILY INCOME for the past 12 months? This should include income (before taxes and deductions) from all sources, wages, rent from properties, social security, disability and/or veteran's benefits, unemployment benefits, workman's compensation, help from relative (include child payments and alimony), and so on. ¬øCu√°l de estas categor√≠as es la que mejor describe su INGRESO FAMILIAR TOTAL COMBINADO de los √∫ltimos 12 meses? Este debe incluir los ingresos (antes de impuestos y deducciones) provenientes de todas las fuentes, salarios, renta de propiedades, seguro social, pagos por incapacidad o subsidios para veteranos, subsidios por desempleo, compensaci√≥n por accidentes de trabajo, ayuda de familiares (incluya pensiones alimenticias para hijos y c√≥nyuges divorciados), etc.
#1= Less than $5,000; 2=$5,000 through $11,999; 3=$12,000 through $15,999; 4=$16,000 through $24,999; 5=$25,000 through $34,999; 6=$35,000 through $49,999; 7=$50,000 through $74,999; 8= $75,000 through $99,999; 9=$100,000 through $199,999; 10=$200,000 and greater. 999 = Don't know No lo s√É¬© ; 777 = Refuse to answer No deseo responder  | If Separated/Divorced, please average the two household incomes. Si es Separado(a) / Divorciado(a), por favor promedie los dos ingresos familiares

#demo_roster_v2
#How many people are living at your address? INCLUDE everyone who is living or staying at your address for more than 2 months. ¬øCu√°ntas personas est√°n viviendo o qued√°ndose en su domicilio? INCLUYA a todas las personas que lleven viviendo o qued√°ndose en su domicilio durante m√°s de 2 meses.

#demo_fam_exp1_v2
#demo_fam_exp2_v2
#demo_fam_exp3_v2
#demo_fam_exp4_v2
#demo_fam_exp5_v2
#demo_fam_exp6_v2
#demo_fam_exp7_v2
# In the past 12 months, has there been a time when you and your immediate family experienced any of the following: Needed food but couldn't afford to buy it or couldn't afford to go out to get it? ¬øNecesitaban comida pero no les alcanzaba el dinero para comprarla o para salir a comprarla?
# 0 = No No; 1 = Yes S√É¬≠; 777 = Refuse to answer Niego contestar

demograpSum <- demograp %>% 
  select(SUBJECTKEY,EVENTNAME,DEMO_PRNT_MARITAL_V2,DEMO_PRNT_ED_V2,DEMO_PRTNR_ED_V2,DEMO_COMB_INCOME_V2,DEMO_ROSTER_V2, starts_with("DEMO_FAM_")) %>%
  mutate(marital = recode_factor(as.factor( DEMO_PRNT_MARITAL_V2 ),
                `1` = "married", `2` = "widowed", `3` = "divorced", 
                `4` = "separated", `5` = "neverMarried", `6` = "livingWithPartner",
                `777` = NA_character_, `999` = NA_character_,
                .default = "married")) %>%
  mutate(education1stPar = ifelse( DEMO_PRNT_ED_V2 %in% c(777,999) , NA, DEMO_PRNT_ED_V2) ) %>%
  mutate(education2ndPar = ifelse( DEMO_PRTNR_ED_V2 %in% c(777,999) , NA, DEMO_PRTNR_ED_V2) ) %>%
  mutate(educationAvg = rowMeans(cbind(education1stPar,education2ndPar),na.rm=T)) %>%
  mutate(combinedIncome = ifelse( DEMO_COMB_INCOME_V2 %in% c(777,999) , NA, DEMO_COMB_INCOME_V2) ) %>%
  mutate(householdSize= ifelse( DEMO_ROSTER_V2 %in% c(777,999) | DEMO_ROSTER_V2 > 20 , NA, DEMO_ROSTER_V2) ) %>% # trim people who live with > 20 ppl (2 people in total) 
  mutate(across((starts_with("DEMO_FAM_")), ~ na_if(., 777)),
        (across((starts_with("DEMO_FAM_")), ~ na_if(., 999)))) %>%
  mutate(econ_insecurities_sum = rowSums(select(.,starts_with("DEMO_FAM_")),na.rm=F)) %>% 
  select(-starts_with("DEMO_"))
  
  demograpSum  %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	9
_______________________
Column type frequency:
character	2
factor	1
numeric	6
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
SUBJECTKEY	0	1	12	16	0	11878	0
EVENTNAME	0	1	21	21	0	1	0

Variable type: factor

skim_variable	n_missing	complete_rate	ordered	n_unique	top_counts
marital	96	0.99	FALSE	6	mar: 7991, nev: 1460, div: 1082, liv: 688

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
education1stPar	17	1.00	16.59	2.77	1	15	18	19.0	21	▁▁▂▅▇
education2ndPar	2467	0.79	16.37	3.06	0	15	18	18.0	21	▁▁▁▅▇
educationAvg	14	1.00	16.38	2.70	3	15	17	18.5	21	▁▁▂▇▇
combinedIncome	1018	0.91	7.22	2.42	1	6	8	9.0	10	▂▂▃▆▇
householdSize	281	0.98	4.70	1.55	0	4	4	5.0	19	▂▇▁▁▁
econ_insecurities_sum	135	0.99	0.47	1.10	0	0	0	0.0	7	▇▁▁▁▁

1.11.2 more Social Demographics from Residential History Derived Scores

“RESHIST_ADDR1_ADI_WSUM” Residential history derived - Area Deprivation Index: scaled weighted sum based on Kind et al., Annals of Internal Medicine, 2014 1 “RESHIST_ADDR1_GRNDTOT” the grand total Uniform Crime Reports, “RESHIST_ADDR1_LEADRISK” the estimated lead risk in census tract of primary residential address

RHDS <-as_tibble(read.csv(paste0(dataFold,"ABCD_RHDS01_DATA_TABLE.csv"))) 

# RHDS %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>% select(RESHIST_ADDR1_GRNDTOT) %>%
#   #select(-1:-8) %>%
#   summarytools::dfSummary(
#                         style = 'grid', graph.magnif = 0.75,
#                         valid.col = FALSE, tmp.img.dir = "/tmp")

# Uniform Crime Reports seem to have some high values. quartic transformation will be applied.
hist(RHDS$RESHIST_ADDR1_GRNDTOT, breaks = 100)

hist((RHDS$RESHIST_ADDR1_GRNDTOT)^(1/4), breaks = 100)

ResidHistDer <- RHDS %>% 
  select(SUBJECTKEY,EVENTNAME,
         RESHIST_ADDR1_ADI_WSUM, RESHIST_ADDR1_GRNDTOT, RESHIST_ADDR1_LEADRISK) %>%
  rename(area_deprivation_index = RESHIST_ADDR1_ADI_WSUM) %>%
  mutate(quartic_uniform_crime_reports = (RESHIST_ADDR1_GRNDTOT)^(1/4)) %>% select(-RESHIST_ADDR1_GRNDTOT) %>%
  rename(lead_risk = RESHIST_ADDR1_LEADRISK) 
  
ResidHistDer %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	5
_______________________
Column type frequency:
character	2
numeric	3
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
SUBJECTKEY	0	1	12	16	0	11878	0
EVENTNAME	0	1	21	21	0	1	0

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p25	p50	p75	p100	hist
area_deprivation_index	697	0.94	92.73	24.82	86.64	98.48	108.14	125.75	▁▁▂▇▇
lead_risk	701	0.94	5.10	3.11	2.00	5.00	8.00	10.00	▇▆▅▅▆
quartic_uniform_crime_reports	697	0.94	12.09	5.78	9.41	12.28	15.20	24.29	▂▃▇▅▁

1.12 Proximal Environment

From Zhang et al. Translational Psychiatry (2020) https://doi.org/10.1038/s41398-020-0761-6 The “Safety from Crime” items from the PhenX Toolkit was used to assess neighborhood safety and crime reports. Additionally, children reported their schoolrisk and protective factors via a 12-item Inventory for School Risk and Protective Factors of the PhenX toolkit.

1.12.1 ABCD Neighborhood Safety/Crime Survey Modified from PhenX (NSC)

from parents and children

NeighboSafety_parent <-as_tibble(read.csv(paste0(dataFold,"ABCD_PNSC01_DATA_TABLE.csv"))) %>% 
  mutate(neighbo_safety_parent_sum = rowSums(select(.,starts_with("NEIGHBORHOOD")),na.rm=F)) %>%
  select(SUBJECTKEY,EVENTNAME, neighbo_safety_parent_sum)

# I feel safe walking in my neighborhood, day or night. Me siento seguro(a) caminando por mi vecindario, de d√≠a o de noche.
# Violence is not a problem in my neighborhood./ La violencia no es un problema en mi vecindario.
# My neighborhood is safe from crime. Mi vecindario est√° a salvo de la delincuencia.
#1 = Strongly Disagree /Muy en desacuerdo; 2 = Disagree /En desacuerdo; 3 = Neutral (neither agree nor disagree)/ Neutral (ni de acuerdo ni en desacuerdo); 4 = Agree /De acuerdo; 5 = Strongly Agree/ Muy de acuerdo//The following questions are about your neighborhood. Your neighborhood is the area within about a 20-minute walk (or about a mile) from your home. For each of the statements please indicate whether you strongly agree, agree, neither agree nor disagree, disagree, or strongly disagree

NeighboSafety_children<-as_tibble(read.csv(paste0(dataFold,"ABCD_NSC01_DATA_TABLE.csv"))) %>% 
  rename(neighbo_safety_child_sum = NEIGHBORHOOD_CRIME_Y) %>%
  select(SUBJECTKEY,EVENTNAME, neighbo_safety_child_sum)

#My neighborhood is safe from crime.

NeighboSafety <- plyr::join_all(list(NeighboSafety_parent, NeighboSafety_children), by=c('SUBJECTKEY','EVENTNAME'), type='full')

NeighboSafety  %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	4
_______________________
Column type frequency:
character	2
numeric	2
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
SUBJECTKEY	0	1	12	16	0	11878	0
EVENTNAME	0	1	21	21	0	1	0

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
neighbo_safety_parent_sum	47	1	11.67	2.93	3	10	12	14	15	▁▁▃▆▇
neighbo_safety_child_sum	24	1	4.03	1.09	1	3	4	5	5	▁▁▃▆▇

1.12.2 School Risk and Protective Factors Survey

SchRisk <-as_tibble(read.csv(paste0(dataFold,"SRPF01_DATA_TABLE.csv"))) %>% 
 rename(EVENTNAME = VISIT)

school_risk_sum <- SchRisk %>% 
  mutate(sumSchool_environment = rowSums(dplyr::select(., "SCHOOL_2_Y", "SCHOOL_3_Y", "SCHOOL_4_Y", "SCHOOL_5_Y", "SCHOOL_6_Y", "SCHOOL_7_Y"))) %>%
  mutate(sumSchool_involvement = rowSums(dplyr::select(., "SCHOOL_8_Y", "SCHOOL_9_Y", "SCHOOL_10_Y", "SCHOOL_12_Y"))) %>%
  mutate(sumSchool_disengagement = rowSums(dplyr::select(., "SCHOOL_15_Y", "SCHOOL_17_Y"))) %>%
  select(SUBJECTKEY,EVENTNAME, starts_with("sumSchool"))

school_risk_sum %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
   skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	5
_______________________
Column type frequency:
character	2
numeric	3
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
SUBJECTKEY	0	1	12	16	0	11878	0
EVENTNAME	0	1	21	21	0	1	0

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
sumSchool_environment	27	1	19.93	2.83	6	18	20	22	24	▁▁▂▇▇
sumSchool_involvement	26	1	13.06	2.37	4	12	13	15	16	▁▁▃▅▇
sumSchool_disengagement	25	1	3.74	1.46	2	3	4	5	8	▇▃▃▁▁

1.13 Social Interaction

From Zhang et al. Translational Psychiatry (2020) https://doi.org/10.1038/s41398-020-0761-6 The child-reported parental monitoring and acceptance, as well as the child- and parent-reported prosocial tendency and family conflicts were included to measure social interactions. Parent monitoring was accessed by a 5-item summary score of the Parental Monitoring Scale24. Parent acceptance was evaluated by the Acceptance Scale, a subscale of the Child Report of Behavior Inventory (CRPBI)25. Prosocial behavior (e.g., being nice, helping, caring) was assessed using the Prosocial Behavior Scale, a subscale from the “Strengths and Difficulties Questionnaire” (SDQ)26. Both parents and youth reported on the youth’s prosocial behavior (e.g., being considerate of other people’s feelings, often offering to help others). In order to assess the family conflicts, the ABCD protocol utilized a 9-item Family Conflict subscale of the Moos Family Environment Scale (FES) for the baseline protocol27.

1.13.1 ABCD Parental Monitoring Survey

ParMonSur <-as_tibble(read.csv(paste0(dataFold,"PMQ01_DATA_TABLE.csv")))

# How often do your parents/guardians know where you are?
# How often do your parents know who you are with when you are not at school and away from home?
# If you are at home when your parents or guardians are not, how often do you know how to get in touch with them?
# How often do you talk to your mom/dad or guardian about your plans for the coming day, such as your plans about what will happen at school or what you are going to do with friends?
# In an average week, how many times do you and your parents/guardians, eat dinner together?
#1 = Never; 2 = Almost Never; 3 = Sometimes; 4 = Often; 5 = Always or Almost Always

ParentMonitoring <- ParMonSur %>% 
  mutate(parent_monitor_mean = rowMeans(select(., starts_with("PARENT_MONITOR_")))) %>%
  select(SUBJECTKEY,EVENTNAME, parent_monitor_mean)

ParentMonitoring %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
      skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	3
_______________________
Column type frequency:
character	2
numeric	1
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
SUBJECTKEY	0	1	12	16	0	11878	0
EVENTNAME	0	1	21	21	0	1	0

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
parent_monitor_mean	23	1	4.38	0.52	1	4.2	4.4	4.8	5	▁▁▁▃▇

1.13.2 ABCD Family Conflict

ABCD Parents Reported Parent Family Environment Scale-Family Conflict Subscale Modified from PhenX (FES), version 2 - short name: fes02 ABCD Youth Reported Parent Family Environment Scale-Family Conflict Subscale Modified from PhenX (FES), version 2 - short name: fes01

FamilyConflict_Parents <-as_tibble(read.csv(paste0(dataFold,"FES02_DATA_TABLE.csv"))) %>%
    distinct(select(.,-FES02_ID, -DATASET_ID),.keep_all = TRUE) #for some reason there is a duplicate based on these two variables

# We fight a lot in our family. /Peleamos mucho en nuestra familia.
# Family members rarely become openly angry. /Los miembros de la familia raramente se enojan abiertamente.
# Family members sometimes get so angry they throw things./ Los miembros de la familia algunas veces se enojan tanto que avientan cosas.
# Family members hardly ever lose their tempers. /Los miembros de la familia dificilmente pierden su temperamento.
# Family members often criticize each other. /Los miembros de la familia con frecuencia se critican unos a otros.
# Family members sometimes hit each other./ Los miembros de la familia algunas veces se golpean unos a otros.
# If there is a disagreement in our family, we try hard to smooth things over and keep the peace. /Si hay un desacuerdo en nuestra familia, hacemos todo lo posible por resolverlo y conservar la paz.
# Family members often try to one-up or outdo each other./ Los miembros de la familia con frecuencia tratan de superar a los dem√°s.
# In our family, we believe you don't ever get anywhere by raising your voice. /En nuestra familia, creemos que no se llega a nada levantando la voz.
# 1 = True /Verdadera; 0 = False/ Falsa or 0 = True /Verdadera; 1 = False/ Falsa  

FamilyConflict_parents_sum <- FamilyConflict_Parents  %>% 
  mutate(fam_conflict_parent = rowSums(select(., FAM_ENVIRO1_P:FAM_ENVIRO9R_P ), na.rm = F)) %>%           
  select(SUBJECTKEY,EVENTNAME, fam_conflict_parent)

# FamilyConflict_parents_sum  %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
#    summarytools::dfSummary(
#                          style = 'grid', graph.magnif = 0.75, 
#                          valid.col = FALSE, tmp.img.dir = "/tmp")

FamilyConflict_Children <-as_tibble(read.csv(paste0(dataFold,"ABCD_FES01_DATA_TABLE.csv"))) 

# We fight a lot in our family.
# Family members rarely become openly angry.
# Family members sometimes get so angry they throw things.
# Family members hardly ever lose their tempers.
# Family members often criticize each other.
# Family members sometimes hit each other.
# If there's a disagreement in our family, we try hard to smooth things over and keep the peace.
# Family members often try to one-up or outdo each other.
# In our family, we believe you don't ever get anywhere by raising your voice.

FamilyConflict_children_sum <- FamilyConflict_Children   %>% 
  mutate(fam_conflict_children = rowSums(select(., FES_YOUTH_Q1:FES_YOUTH_Q9 ), na.rm = F)) %>%           
  select(SUBJECTKEY,EVENTNAME, fam_conflict_children)

# FamilyConflict_children_sum %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
#    summarytools::dfSummary(
#                          style = 'grid', graph.magnif = 0.75, 
#                          valid.col = FALSE, tmp.img.dir = "/tmp")

FamilyConflict_sum <- plyr::join_all(list(FamilyConflict_parents_sum , FamilyConflict_children_sum), 
                                by=c('SUBJECTKEY','EVENTNAME'), type='full')

FamilyConflict_sum  %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
    skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	4
_______________________
Column type frequency:
character	2
numeric	2
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
SUBJECTKEY	0	1	12	16	0	11878	0
EVENTNAME	0	1	21	21	0	1	0

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
fam_conflict_parent	12	1	2.54	1.96	0	1	2	4	9	▇▇▅▂▁
fam_conflict_children	27	1	2.05	1.95	0	0	2	3	9	▇▅▂▁▁

# FamilyConflict_Parents %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>% arrange(SUBJECTKEY)
# FamilyConflict_Children %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>% arrange(SUBJECTKEY)
# FamilyConflict_sum  %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>% arrange(SUBJECTKEY)

1.13.3 Prosocial Tendency

Parent Prosocial Behavior Survey Youth Prosocial Behavior Survey

#Parent Prosocial Behavior Survey
ParPS <-as_tibble(read.csv(paste0(dataFold,"PSB01_DATA_TABLE.csv"))) 

#Youth Prosocial Behavior Survey
YouthPS <-as_tibble(read.csv(paste0(dataFold,"ABCD_PSB01_DATA_TABLE.csv")))

prosocial_sum <- plyr::join_all(list(ParPS , YouthPS), 
                                by=c('SUBJECTKEY','EVENTNAME'), type='full') %>%
 mutate(prosocial_parent_mean = rowMeans(dplyr::select(., "PROSOCIAL_Q1_P", "PROSOCIAL_Q2_P", "PROSOCIAL_Q3_P"))) %>%
 mutate(prosocial_youth_mean = rowMeans(dplyr::select(., "PROSOCIAL_Q1_Y", "PROSOCIAL_Q2_Y", "PROSOCIAL_Q3_Y"))) %>%
 select(SUBJECTKEY,EVENTNAME, prosocial_parent_mean,prosocial_youth_mean)  

prosocial_sum %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	4
_______________________
Column type frequency:
character	2
numeric	2
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
SUBJECTKEY	0	1	12	16	0	11878	0
EVENTNAME	0	1	21	21	0	1	0

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
prosocial_parent_mean	62	0.99	1.75	0.40	0	1.67	2.00	2	2	▁▁▁▁▇
prosocial_youth_mean	33	1.00	1.68	0.37	0	1.33	1.67	2	2	▁▁▁▂▇

1.14 Parent Sports and Activities

1.14.1 ABCD Sum Scores Parent Sports and Activities Involvement

multiplying hours x days x 4 weeks x months x years /24 to get days this method leads to high zeros. this might be because of the 999 -> 0??

sport_act <-as_tibble(read.csv(paste0(dataFold,"ABCD_SPACSS01_DATA_TABLE.csv"))) 

sport_act_multiplied_sum <- sport_act %>% 
  # change 999 to 0. don't know seems to infer that the child doesn't do that activiy
  mutate_at(vars(starts_with("SAI_SS_")), ~ replace(., which(.==999), 0)) %>%
  # 0 = 0; 1 = 1; 2 = 2; 3 = 3; 4 = 4; 5 = 5; 6 = 6; 7 = 7; 8 = Once every 2 weeks; 9 = One day every month; 10 = Less than one day per month/; 999 = Don't know | When ballet/dance was not endorsed, values for the follow-up questions are missing. Here, missing values for  the  "... how many...?" follow-up questions have been replaced with "0". 
  #change this so that 0 = nothing, .125 = Less than one day per month, .25 = One day every month, .5 =Once every 2 weeks, 1 = 1 day per week and so on
  mutate_at(.vars = vars(ends_with("_PERWK_P")),
            .funs = funs(case_when(. == 10 ~ .125,
                                   . == 9 ~ .25,
                                   . == 8 ~ .5,
                                   TRUE ~ as.numeric(.)))) %>%
  # mutate_at(.vars = vars(ends_with("_PERWK_P")),
  #           .funs = funs(case_when(. == 10 ~ 1,
  #                                  . == 9 ~ 2,
  #                                  . == 8 ~ 3,
  #                                  . == 1 ~ 4,
  #                                  . == 2 ~ 5,
  #                                  . == 3 ~ 6,
  #                                  . == 4 ~ 7,
  #                                  . == 5 ~ 8,
  #                                  . == 6 ~ 9,
  #                                  . == 7 ~ 10,
  #                                  TRUE ~ as.numeric(.)))) %>%
  #0 = 0; 1 = less than 30 minutes; 2 = 30; 3 = 45; 4 = 60 (1 hr); 5 = 90 (1.5 hrs); 6 = 120 (2 hrs); 7 = 150 (2.5 hrs); 8 = 180 (3 hrs); 9 = greater than 3 hours; 999 = Don't know | When ballet/dance was not endorsed, values for the follow-up questions are missing. Here, missing values for  the  "... how many...?" follow-up questions have been replaced with "0".  
  #change to hours unit. assumes less than 30 minutes to be .25 hour(15 mins) and > 3 hrs to be 4 hours
 mutate_at(.vars = vars(ends_with("_TSPENT_P")),
            .funs = funs(case_when(. == 1 ~ .25,
                                   . == 2 ~ .5,
                                   . == 3 ~ .75,
                                   . == 4 ~ 1,
                                   . == 5 ~ 1.5,
                                   . == 6 ~ 2,
                                   . == 7 ~ 2.5,
                                   . == 8 ~ 3,
                                   . == 9 ~ 4,
                                   TRUE ~ as.numeric(.)))) %>%
  #hours x days x 4 weeks x months x years /24 to get days
  mutate(dance_days = SAI_SS_DANCE_TSPENT_P*SAI_SS_DANCE_PERWK_P*4*SAI_SS_DANCE_NMONTH_P*SAI_SS_DANCE_NYR_P/24) %>%
  mutate(base_days = SAI_SS_BASE_TSPENT_P*SAI_SS_BASE_PERWK_P*4*SAI_SS_BASE_NMONTH_P*SAI_SS_BASE_NYR_P/24) %>%
  mutate(basket_days = SAI_SS_BASKET_TSPENT_P*SAI_SS_BASKET_PERWK_P*4*SAI_SS_BASKET_NMONTH_P*SAI_SS_BASKET_NYR_P/24) %>%
  mutate(climb_days = SAI_SS_CLIMB_TSPENT_P*SAI_SS_CLIMB_PERWK_P*4*SAI_SS_CLIMB_NMONTH_P*SAI_SS_CLIMB_NYR_P/24) %>%
  mutate(fball_days = SAI_SS_FBALL_TSPENT_P*SAI_SS_FBALL_PERWK_P*4*SAI_SS_FBALL_NMONTH_P*SAI_SS_FBALL_NYR_P/24) %>%
  mutate(fhock_days = SAI_SS_FHOCK_TSPENT_P*SAI_SS_FHOCK_PERWK_P*4*SAI_SS_FHOCK_NMONTH_P*SAI_SS_FHOCK_NYR_P/24) %>%
  mutate(gym_days = SAI_SS_GYM_TSPENT_P*SAI_SS_GYM_PERWK_P*4*SAI_SS_GYM_NMONTH_P*SAI_SS_GYM_NYR_P/24) %>%
  mutate(ihock_days = SAI_SS_IHOCK_TSPENT_P*SAI_SS_IHOCK_PERWK_P*4*SAI_SS_IHOCK_NMONTH_P*SAI_SS_IHOCK_NYR_P/24) %>%
  mutate(polo_days = SAI_SS_POLO_TSPENT_P*SAI_SS_POLO_PERWK_P*4*SAI_SS_POLO_NMONTH_P*SAI_SS_POLO_NYR_P/24) %>%
  mutate(iskate_days = SAI_SS_ISKATE_TSPENT_P*SAI_SS_ISKATE_PERWK_P*4*SAI_SS_ISKATE_NMONTH_P*SAI_SS_ISKATE_NYR_P/24) %>%
  mutate(m_arts_days = SAI_SS_M_ARTS_TSPENT_P*SAI_SS_M_ARTS_PERWK_P*4*SAI_SS_M_ARTS_NMONTH_P*SAI_SS_M_ARTS_NYR_P/24) %>%
  mutate(lax_days = SAI_SS_LAX_TSPENT_P*SAI_SS_LAX_PERWK_P*4*SAI_SS_LAX_NMONTH_P*SAI_SS_LAX_NYR_P/24) %>%
  mutate(rugby_days = SAI_SS_RUGBY_TSPENT_P*SAI_SS_RUGBY_PERWK_P*4*SAI_SS_RUGBY_NMONTH_P*SAI_SS_RUGBY_NYR_P/24) %>%
  mutate(skate_days = SAI_SS_SKATE_TSPENT_P*SAI_SS_SKATE_PERWK_P*4*SAI_SS_SKATE_NMONTH_P*SAI_SS_SKATE_NYR_P/24) %>%
  mutate(sboard_days = SAI_SS_SBOARD_TSPENT_P*SAI_SS_SBOARD_PERWK_P*4*SAI_SS_SBOARD_NMONTH_P*SAI_SS_SBOARD_NYR_P/24) %>%
  mutate(soc_days = SAI_SS_SOC_TSPENT_P*SAI_SS_SOC_PERWK_P*4*SAI_SS_SOC_NMONTH_P*SAI_SS_SOC_NYR_P/24) %>%
  mutate(surf_days = SAI_SS_SURF_TSPENT_P*SAI_SS_SURF_PERWK_P*4*SAI_SS_SURF_NMONTH_P*SAI_SS_SURF_NYR_P/24) %>%
  mutate(wpolo_days = SAI_SS_WPOLO_TSPENT_P*SAI_SS_WPOLO_PERWK_P*4*SAI_SS_WPOLO_NMONTH_P*SAI_SS_WPOLO_NYR_P/24) %>%
  mutate(tennis_days = SAI_SS_TENNIS_TSPENT_P*SAI_SS_TENNIS_PERWK_P*4*SAI_SS_TENNIS_NMONTH_P*SAI_SS_TENNIS_NYR_P/24) %>%
  mutate(run_days = SAI_SS_RUN_TSPENT_P*SAI_SS_RUN_PERWK_P*4*SAI_SS_RUN_NMONTH_P*SAI_SS_RUN_NYR_P/24) %>%
  mutate(mma_days = SAI_SS_MMA_TSPENT_P*SAI_SS_MMA_PERWK_P*4*SAI_SS_MMA_NMONTH_P*SAI_SS_MMA_NYR_P/24) %>%
  mutate(vball_days = SAI_SS_VBALL_TSPENT_P*SAI_SS_VBALL_PERWK_P*4*SAI_SS_VBALL_NMONTH_P*SAI_SS_VBALL_NYR_P/24) %>%
  mutate(yoga_days = SAI_SS_YOGA_TSPENT_P*SAI_SS_YOGA_PERWK_P*4*SAI_SS_YOGA_NMONTH_P*SAI_SS_YOGA_NYR_P/24) %>%
  mutate(music_days = SAI_SS_MUSIC_TSPENT_P*SAI_SS_MUSIC_PERWK_P*4*SAI_SS_MUSIC_NMONTH_P*SAI_SS_MUSIC_NYR_P/24) %>%
  mutate(art_days = SAI_SS_ART_TSPENT_P*SAI_SS_ART_PERWK_P*4*SAI_SS_ART_NMONTH_P*SAI_SS_ART_NYR_P/24) %>%
  mutate(drama_days = SAI_SS_DRAMA_TSPENT_P*SAI_SS_DRAMA_PERWK_P*4*SAI_SS_DRAMA_NMONTH_P*SAI_SS_DRAMA_NYR_P/24) %>%
  mutate(craft_days = SAI_SS_CRAFTS_TSPENT_P*SAI_SS_CRAFTS_PERWK_P*4*SAI_SS_CRAFTS_NMONTH_P*SAI_SS_CRAFTS_NYR_P/24) %>%
  mutate(chess_days = SAI_SS_CHESS_TSPENT_P*SAI_SS_CHESS_PERWK_P*4*SAI_SS_CHESS_NMONTH_P*SAI_SS_CHESS_NYR_P/24) %>%
  mutate(collect_days = SAI_SS_COLLECT_TSPENT_P*SAI_SS_COLLECT_PERWK_P*4*SAI_SS_COLLECT_NMONTH_P*SAI_SS_COLLECT_NYR_P/24) %>%
# didn't include listening to music or reading since they are in the different scale
# summary based on kerlic's child dev paper
  mutate(phys_ind_days_sum = sboard_days + climb_days + gym_days + iskate_days + m_arts_days + skate_days + dance_days + surf_days + tennis_days + run_days + mma_days + yoga_days) %>%
  mutate(phys_team_days_sum = base_days + basket_days + fhock_days + fball_days + ihock_days + polo_days + lax_days + rugby_days + soc_days + wpolo_days +vball_days) %>%
  mutate(art_days_sum = collect_days + music_days + art_days + drama_days + craft_days + chess_days) %>%
  mutate(sport_act_all_days_sum = phys_ind_days_sum + phys_team_days_sum + art_days_sum) %>%
  mutate(phys_ind_daypweek_sum = SAI_SS_SBOARD_PERWK_P + SAI_SS_CLIMB_PERWK_P + SAI_SS_GYM_PERWK_P + SAI_SS_ISKATE_PERWK_P + SAI_SS_M_ARTS_PERWK_P + SAI_SS_SKATE_PERWK_P + SAI_SS_DANCE_PERWK_P + SAI_SS_SURF_PERWK_P + SAI_SS_TENNIS_PERWK_P + SAI_SS_RUN_PERWK_P + SAI_SS_MMA_PERWK_P + SAI_SS_YOGA_PERWK_P) %>%
  mutate(phys_team_daypweek_sum = SAI_SS_BASE_PERWK_P + SAI_SS_BASKET_PERWK_P + SAI_SS_FHOCK_PERWK_P + SAI_SS_FBALL_PERWK_P + SAI_SS_IHOCK_PERWK_P + SAI_SS_POLO_PERWK_P + SAI_SS_LAX_PERWK_P + SAI_SS_RUGBY_PERWK_P + SAI_SS_SOC_PERWK_P + SAI_SS_WPOLO_PERWK_P +SAI_SS_VBALL_PERWK_P) %>%
  mutate(art_daypweek_sum = SAI_SS_COLLECT_PERWK_P + SAI_SS_MUSIC_PERWK_P + SAI_SS_ART_PERWK_P + SAI_SS_DRAMA_PERWK_P + SAI_SS_CRAFTS_PERWK_P + SAI_SS_CHESS_PERWK_P) %>%
  mutate(sport_act_all_daypweek_sum = phys_ind_daypweek_sum + phys_team_daypweek_sum + art_daypweek_sum)

sport_act_multiplied_sum %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	168
_______________________
Column type frequency:
character	6
numeric	162
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
SUBJECTKEY	1	12	16	11878
SRC_SUBJECT_ID	1	16	16	11878
INTERVIEW_DATE	1	9	9	756
SEX	1	1	1	2
EVENTNAME	1	21	21	1
STUDY_COHORT_NAME	1	21	21	1

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
ABCD_SPACSS01_ID	0	1.00	22337.50	3429.03	16399	19368.25	22337.50	25306.75	28276.00	▇▇▇▇▇
DATASET_ID	0	1.00	34516.00	0.00	34516	34516.00	34516.00	34516.00	34516.00	▁▁▇▁▁
INTERVIEW_AGE	0	1.00	118.98	7.50	107	112.00	119.00	126.00	133.00	▇▆▆▆▆
SAI_SS_DANCE_NYR_P	24	1.00	0.64	1.44	0	0.00	0.00	0.00	10.00	▇▁▁▁▁
SAI_SS_DANCE_NMONTH_P	26	1.00	1.93	3.68	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_DANCE_PERWK_P	20	1.00	0.42	0.89	0	0.00	0.00	0.25	7.00	▇▁▁▁▁
SAI_SS_DANCE_TSPENT_P	30	1.00	0.26	0.51	0	0.00	0.00	0.00	4.00	▇▂▁▁▁
SAI_SS_BASE_NYR_P	17	1.00	0.74	1.50	0	0.00	0.00	1.00	10.00	▇▁▁▁▁
SAI_SS_BASE_NMONTH_P	23	1.00	1.14	2.16	0	0.00	0.00	2.00	12.00	▇▂▁▁▁
SAI_SS_BASE_PERWK_P	23	1.00	0.71	1.26	0	0.00	0.00	2.00	7.00	▇▁▂▁▁
SAI_SS_BASE_TSPENT_P	27	1.00	0.37	0.66	0	0.00	0.00	1.00	4.00	▇▂▁▁▁
SAI_SS_BASKET_NYR_P	21	1.00	0.59	1.23	0	0.00	0.00	1.00	10.00	▇▁▁▁▁
SAI_SS_BASKET_NMONTH_P	25	1.00	1.07	2.14	0	0.00	0.00	1.00	12.00	▇▂▁▁▁
SAI_SS_BASKET_PERWK_P	26	1.00	0.62	1.17	0	0.00	0.00	1.00	7.00	▇▂▁▁▁
SAI_SS_BASKET_TSPENT_P	50	1.00	0.29	0.54	0	0.00	0.00	0.50	4.00	▇▂▁▁▁
SAI_SS_CLIMB_NYR_P	15	1.00	0.08	0.53	0	0.00	0.00	0.00	10.00	▇▁▁▁▁
SAI_SS_CLIMB_NMONTH_P	14	1.00	0.16	1.09	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_CLIMB_PERWK_P	19	1.00	0.06	0.40	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_CLIMB_TSPENT_P	19	1.00	0.04	0.24	0	0.00	0.00	0.00	4.00	▇▁▁▁▁
SAI_SS_FHOCK_NYR_P	10	1.00	0.01	0.18	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_FHOCK_NMONTH_P	11	1.00	0.02	0.34	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_FHOCK_PERWK_P	10	1.00	0.01	0.18	0	0.00	0.00	0.00	5.00	▇▁▁▁▁
SAI_SS_FHOCK_TSPENT_P	10	1.00	0.01	0.10	0	0.00	0.00	0.00	3.00	▇▁▁▁▁
SAI_SS_FBALL_NYR_P	18	1.00	0.25	0.86	0	0.00	0.00	0.00	10.00	▇▁▁▁▁
SAI_SS_FBALL_NMONTH_P	21	1.00	0.44	1.42	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_FBALL_PERWK_P	20	1.00	0.35	1.10	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_FBALL_TSPENT_P	24	1.00	0.17	0.51	0	0.00	0.00	0.00	4.00	▇▁▁▁▁
SAI_SS_GYM_NYR_P	15	1.00	0.49	1.16	0	0.00	0.00	0.00	9.00	▇▁▁▁▁
SAI_SS_GYM_NMONTH_P	19	1.00	1.71	3.59	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_GYM_PERWK_P	21	1.00	0.33	0.77	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_GYM_TSPENT_P	31	1.00	0.26	0.56	0	0.00	0.00	0.00	4.00	▇▂▁▁▁
SAI_SS_IHOCK_NYR_P	10	1.00	0.08	0.56	0	0.00	0.00	0.00	9.00	▇▁▁▁▁
SAI_SS_IHOCK_NMONTH_P	13	1.00	0.14	1.00	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_IHOCK_PERWK_P	10	1.00	0.07	0.48	0	0.00	0.00	0.00	6.00	▇▁▁▁▁
SAI_SS_IHOCK_TSPENT_P	11	1.00	0.03	0.20	0	0.00	0.00	0.00	4.00	▇▁▁▁▁
SAI_SS_POLO_NYR_P	11	1.00	0.08	0.52	0	0.00	0.00	0.00	10.00	▇▁▁▁▁
SAI_SS_POLO_NMONTH_P	14	1.00	0.22	1.38	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_POLO_PERWK_P	14	1.00	0.05	0.36	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_POLO_TSPENT_P	17	1.00	0.04	0.25	0	0.00	0.00	0.00	4.00	▇▁▁▁▁
SAI_SS_ISKATE_NYR_P	11	1.00	0.13	0.64	0	0.00	0.00	0.00	9.00	▇▁▁▁▁
SAI_SS_ISKATE_NMONTH_P	16	1.00	0.27	1.38	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_ISKATE_PERWK_P	18	1.00	0.08	0.40	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_ISKATE_TSPENT_P	19	1.00	0.06	0.26	0	0.00	0.00	0.00	4.00	▇▁▁▁▁
SAI_SS_M_ARTS_NYR_P	16	1.00	0.36	0.97	0	0.00	0.00	0.00	10.00	▇▁▁▁▁
SAI_SS_M_ARTS_NMONTH_P	23	1.00	1.49	3.62	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_M_ARTS_PERWK_P	19	1.00	0.36	0.91	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_M_ARTS_TSPENT_P	25	1.00	0.17	0.40	0	0.00	0.00	0.00	4.00	▇▁▁▁▁
SAI_SS_LAX_NYR_P	10	1.00	0.06	0.41	0	0.00	0.00	0.00	8.00	▇▁▁▁▁
SAI_SS_LAX_NMONTH_P	12	1.00	0.11	0.76	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_LAX_PERWK_P	15	1.00	0.07	0.42	0	0.00	0.00	0.00	5.00	▇▁▁▁▁
SAI_SS_LAX_TSPENT_P	13	1.00	0.04	0.23	0	0.00	0.00	0.00	4.00	▇▁▁▁▁
SAI_SS_RUGBY_NYR_P	10	1.00	0.01	0.14	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_RUGBY_NMONTH_P	10	1.00	0.01	0.28	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_RUGBY_PERWK_P	10	1.00	0.01	0.15	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_RUGBY_TSPENT_P	10	1.00	0.00	0.09	0	0.00	0.00	0.00	4.00	▇▁▁▁▁
SAI_SS_SKATE_NYR_P	13	1.00	0.09	0.56	0	0.00	0.00	0.00	10.00	▇▁▁▁▁
SAI_SS_SKATE_NMONTH_P	17	1.00	0.22	1.33	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_SKATE_PERWK_P	14	1.00	0.10	0.62	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_SKATE_TSPENT_P	18	1.00	0.03	0.20	0	0.00	0.00	0.00	4.00	▇▁▁▁▁
SAI_SS_SBOARD_NYR_P	10	1.00	0.33	1.25	0	0.00	0.00	0.00	10.00	▇▁▁▁▁
SAI_SS_SBOARD_NMONTH_P	20	1.00	0.27	0.99	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_SBOARD_PERWK_P	12	1.00	0.10	0.42	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_SBOARD_TSPENT_P	16	1.00	0.25	0.90	0	0.00	0.00	0.00	4.00	▇▁▁▁▁
SAI_SS_SOC_NYR_P	31	1.00	1.21	1.85	0	0.00	0.00	2.00	10.00	▇▁▁▁▁
SAI_SS_SOC_NMONTH_P	44	1.00	2.04	3.05	0	0.00	0.00	4.00	12.00	▇▂▁▁▁
SAI_SS_SOC_PERWK_P	38	1.00	0.95	1.27	0	0.00	0.00	2.00	7.00	▇▂▂▁▁
SAI_SS_SOC_TSPENT_P	58	1.00	0.47	0.62	0	0.00	0.00	1.00	4.00	▇▅▁▁▁
SAI_SS_SURF_NYR_P	10	1.00	0.01	0.21	0	0.00	0.00	0.00	9.00	▇▁▁▁▁
SAI_SS_SURF_NMONTH_P	12	1.00	0.02	0.31	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_SURF_PERWK_P	10	1.00	0.01	0.18	0	0.00	0.00	0.00	5.00	▇▁▁▁▁
SAI_SS_SURF_TSPENT_P	11	1.00	0.01	0.15	0	0.00	0.00	0.00	4.00	▇▁▁▁▁
SAI_SS_WPOLO_NYR_P	22	1.00	1.16	2.14	0	0.00	0.00	2.00	10.00	▇▁▁▁▁
SAI_SS_WPOLO_NMONTH_P	42	1.00	1.76	3.24	0	0.00	0.00	3.00	12.00	▇▁▁▁▁
SAI_SS_WPOLO_PERWK_P	35	1.00	0.78	1.43	0	0.00	0.00	1.00	7.00	▇▁▁▁▁
SAI_SS_WPOLO_TSPENT_P	47	1.00	0.31	0.56	0	0.00	0.00	0.50	4.00	▇▂▁▁▁
SAI_SS_TENNIS_NYR_P	15	1.00	0.14	0.63	0	0.00	0.00	0.00	10.00	▇▁▁▁▁
SAI_SS_TENNIS_NMONTH_P	17	1.00	0.31	1.46	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_TENNIS_PERWK_P	19	1.00	0.12	0.54	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_TENNIS_TSPENT_P	21	1.00	0.07	0.29	0	0.00	0.00	0.00	4.00	▇▁▁▁▁
SAI_SS_RUN_NYR_P	18	1.00	0.14	0.62	0	0.00	0.00	0.00	9.00	▇▁▁▁▁
SAI_SS_RUN_NMONTH_P	20	1.00	0.29	1.27	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_RUN_PERWK_P	24	1.00	0.16	0.68	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_RUN_TSPENT_P	20	1.00	0.08	0.31	0	0.00	0.00	0.00	4.00	▇▁▁▁▁
SAI_SS_MMA_NYR_P	16	1.00	0.06	0.45	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_MMA_NMONTH_P	20	1.00	0.17	1.16	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_MMA_PERWK_P	16	1.00	0.08	0.49	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_MMA_TSPENT_P	20	1.00	0.04	0.24	0	0.00	0.00	0.00	4.00	▇▁▁▁▁
SAI_SS_VBALL_NYR_P	13	1.00	0.04	0.27	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_VBALL_NMONTH_P	15	1.00	0.09	0.66	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_VBALL_PERWK_P	17	1.00	0.05	0.36	0	0.00	0.00	0.00	6.00	▇▁▁▁▁
SAI_SS_VBALL_TSPENT_P	14	1.00	0.03	0.20	0	0.00	0.00	0.00	3.00	▇▁▁▁▁
SAI_SS_YOGA_NYR_P	13	1.00	0.04	0.37	0	0.00	0.00	0.00	10.00	▇▁▁▁▁
SAI_SS_YOGA_NMONTH_P	13	1.00	0.12	0.96	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_YOGA_PERWK_P	14	1.00	0.03	0.30	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_YOGA_TSPENT_P	16	1.00	0.02	0.12	0	0.00	0.00	0.00	3.00	▇▁▁▁▁
SAI_SS_MUSIC_NYR_P	25	1.00	0.90	1.48	0	0.00	0.00	1.00	10.00	▇▁▁▁▁
SAI_SS_MUSIC_NMONTH_P	39	1.00	3.38	4.65	0	0.00	0.00	8.00	12.00	▇▁▁▁▂
SAI_SS_MUSIC_PERWK_P	32	1.00	0.84	1.44	0	0.00	0.00	1.00	7.00	▇▁▁▁▁
SAI_SS_MUSIC_TSPENT_P	42	1.00	0.29	0.42	0	0.00	0.00	0.50	4.00	▇▁▁▁▁
SAI_SS_ART_NYR_P	27	1.00	0.78	1.94	0	0.00	0.00	0.00	10.00	▇▁▁▁▁
SAI_SS_ART_NMONTH_P	36	1.00	1.60	3.67	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_ART_PERWK_P	41	1.00	0.54	1.39	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_ART_TSPENT_P	38	1.00	0.18	0.45	0	0.00	0.00	0.00	4.00	▇▁▁▁▁
SAI_SS_DRAMA_NYR_P	14	1.00	0.23	0.83	0	0.00	0.00	0.00	10.00	▇▁▁▁▁
SAI_SS_DRAMA_NMONTH_P	29	1.00	0.53	1.83	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_DRAMA_PERWK_P	27	1.00	0.24	0.83	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_DRAMA_TSPENT_P	24	1.00	0.16	0.54	0	0.00	0.00	0.00	4.00	▇▁▁▁▁
SAI_SS_CRAFTS_NYR_P	21	1.00	0.28	1.12	0	0.00	0.00	0.00	10.00	▇▁▁▁▁
SAI_SS_CRAFTS_NMONTH_P	28	1.00	0.62	2.40	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_CRAFTS_PERWK_P	27	1.00	0.21	0.85	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_CRAFTS_TSPENT_P	27	1.00	0.08	0.30	0	0.00	0.00	0.00	4.00	▇▁▁▁▁
SAI_SS_CHESS_NYR_P	22	1.00	0.27	0.96	0	0.00	0.00	0.00	10.00	▇▁▁▁▁
SAI_SS_CHESS_NMONTH_P	28	1.00	0.70	2.36	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_CHESS_PERWK_P	34	1.00	0.17	0.66	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_CHESS_TSPENT_P	33	1.00	0.10	0.32	0	0.00	0.00	0.00	4.00	▇▁▁▁▁
SAI_SS_COLLECT_NYR_P	17	1.00	0.21	0.93	0	0.00	0.00	0.00	10.00	▇▁▁▁▁
SAI_SS_COLLECT_NMONTH_P	28	1.00	0.53	2.32	0	0.00	0.00	0.00	12.00	▇▁▁▁▁
SAI_SS_COLLECT_PERWK_P	24	1.00	0.17	0.86	0	0.00	0.00	0.00	7.00	▇▁▁▁▁
SAI_SS_COLLECT_TSPENT_P	23	1.00	0.04	0.20	0	0.00	0.00	0.00	4.00	▇▁▁▁▁
SAI_SS_LMUSIC_YEARS_P	1220	0.90	3.84	3.22	0	1.00	3.00	6.00	10.00	▇▃▂▂▃
SAI_SS_LMUSIC_HOURS_P	1697	0.86	4.64	7.59	0	1.00	3.00	6.00	168.00	▇▁▁▁▁
SAI_SS_READ_YEARS_P	591	0.95	2.72	2.39	0	0.00	3.00	4.00	10.00	▇▅▃▁▁
SAI_SS_READ_HOURS_P	831	0.93	4.62	9.22	0	0.00	3.00	6.00	168.00	▇▁▁▁▁
SPORTS_ACTIVITY_SS_LMUSIC_P	2069	0.83	169.76	369.54	1	3.00	6.00	9.00	999.00	▇▁▁▁▂
SPORTS_ACTIVITY_SS_READ_P	3310	0.72	91.01	278.58	1	4.00	6.00	8.00	999.00	▇▁▁▁▁
dance_days	54	1.00	2.29	12.64	0	0.00	0.00	0.00	560.00	▇▁▁▁▁
base_days	52	1.00	2.58	9.11	0	0.00	0.00	0.67	266.67	▇▁▁▁▁
basket_days	74	0.99	1.42	5.58	0	0.00	0.00	0.00	147.00	▇▁▁▁▁
climb_days	28	1.00	0.14	1.93	0	0.00	0.00	0.00	128.00	▇▁▁▁▁
fball_days	35	1.00	1.02	6.11	0	0.00	0.00	0.00	245.00	▇▁▁▁▁
fhock_days	11	1.00	0.02	0.67	0	0.00	0.00	0.00	55.00	▇▁▁▁▁
gym_days	54	1.00	1.99	12.01	0	0.00	0.00	0.00	392.00	▇▁▁▁▁
ihock_days	14	1.00	0.35	3.31	0	0.00	0.00	0.00	75.00	▇▁▁▁▁
polo_days	23	1.00	0.17	2.54	0	0.00	0.00	0.00	135.00	▇▁▁▁▁
iskate_days	28	1.00	0.21	2.14	0	0.00	0.00	0.00	108.00	▇▁▁▁▁
m_arts_days	48	1.00	1.35	5.69	0	0.00	0.00	0.00	175.00	▇▁▁▁▁
lax_days	18	1.00	0.16	1.68	0	0.00	0.00	0.00	60.00	▇▁▁▁▁
rugby_days	10	1.00	0.02	0.53	0	0.00	0.00	0.00	26.67	▇▁▁▁▁
skate_days	23	1.00	0.29	3.97	0	0.00	0.00	0.00	261.33	▇▁▁▁▁
sboard_days	28	1.00	0.85	4.93	0	0.00	0.00	0.00	128.00	▇▁▁▁▁
soc_days	107	0.99	3.83	10.12	0	0.00	0.00	3.00	196.00	▇▁▁▁▁
surf_days	13	1.00	0.03	0.87	0	0.00	0.00	0.00	48.00	▇▁▁▁▁
wpolo_days	90	0.99	2.97	10.38	0	0.00	0.00	1.25	256.00	▇▁▁▁▁
tennis_days	31	1.00	0.31	4.56	0	0.00	0.00	0.00	420.00	▇▁▁▁▁
run_days	36	1.00	0.30	2.29	0	0.00	0.00	0.00	96.00	▇▁▁▁▁
mma_days	25	1.00	0.28	3.06	0	0.00	0.00	0.00	175.00	▇▁▁▁▁
vball_days	20	1.00	0.06	0.73	0	0.00	0.00	0.00	44.00	▇▁▁▁▁
yoga_days	17	1.00	0.07	1.05	0	0.00	0.00	0.00	48.00	▇▁▁▁▁
music_days	82	0.99	2.34	7.09	0	0.00	0.00	1.67	224.00	▇▁▁▁▁
art_days	70	0.99	3.68	17.62	0	0.00	0.00	0.00	504.00	▇▁▁▁▁
drama_days	46	1.00	0.69	5.10	0	0.00	0.00	0.00	280.00	▇▁▁▁▁
craft_days	43	1.00	1.11	8.54	0	0.00	0.00	0.00	280.00	▇▁▁▁▁
chess_days	59	1.00	0.58	4.60	0	0.00	0.00	0.00	200.00	▇▁▁▁▁
collect_days	39	1.00	0.62	7.08	0	0.00	0.00	0.00	448.00	▇▁▁▁▁
phys_ind_days_sum	259	0.98	8.07	21.57	0	0.00	1.00	8.00	560.00	▇▁▁▁▁
phys_team_days_sum	322	0.97	12.60	22.06	0	0.00	4.00	15.50	367.67	▇▁▁▁▁
art_days_sum	263	0.98	8.85	26.38	0	0.00	0.33	6.00	600.00	▇▁▁▁▁
sport_act_all_days_sum	735	0.94	29.34	43.86	0	3.33	15.33	38.00	826.17	▇▁▁▁▁
phys_ind_daypweek_sum	93	0.99	1.84	2.42	0	0.00	1.00	3.00	44.75	▇▁▁▁▁
phys_team_daypweek_sum	112	0.99	3.67	3.75	0	0.00	3.00	6.00	36.00	▇▁▁▁▁
art_daypweek_sum	118	0.99	2.15	3.23	0	0.00	1.00	3.00	36.00	▇▁▁▁▁
sport_act_all_daypweek_sum	273	0.98	7.63	6.40	0	3.00	6.12	11.00	103.25	▇▁▁▁▁

1.14.2 ABCD Sum Scores Parent Sports and Activities Involvement

method used by Kerlic et al. They focus only on days per week. The data were converted differently.

sport_act <-as_tibble(read.csv(paste0(dataFold,"ABCD_SPACSS01_DATA_TABLE.csv"))) 

sport_act_kerlic_sum <- sport_act %>% 
  # change 999 to 0. don't know seems to infer that the child doesn't do that activiy
  mutate_at(vars(starts_with("SAI_SS_")), ~ replace(., which(.==999), 0)) %>%
  # 0 = 0; 1 = 1; 2 = 2; 3 = 3; 4 = 4; 5 = 5; 6 = 6; 7 = 7; 8 = Once every 2 weeks; 9 = One day every month; 10 = Less than one day per month/; 999 = Don't know | When ballet/dance was not endorsed, values for the follow-up questions are missing. Here, missing values for  the  "... how many...?" follow-up questions have been replaced with "0". 
  #change this so that 0 = nothing, .125 = Less than one day per month, .25 = One day every month, .5 =Once every 2 weeks, 1 = 1 day per week and so on
  mutate_at(.vars = vars(ends_with("_PERWK_P")),
            .funs = funs(case_when(. == 10 ~ 1,
                                   . == 9 ~ 2,
                                   . == 8 ~ 3,
                                   . == 1 ~ 4,
                                   . == 2 ~ 5,
                                   . == 3 ~ 6,
                                   . == 4 ~ 7,
                                   . == 5 ~ 8,
                                   . == 6 ~ 9,
                                   . == 7 ~ 10,
                                   TRUE ~ as.numeric(.)))) %>%
  mutate(phys_ind_daypweek_sum = SAI_SS_SBOARD_PERWK_P + SAI_SS_CLIMB_PERWK_P + SAI_SS_GYM_PERWK_P + SAI_SS_ISKATE_PERWK_P + SAI_SS_M_ARTS_PERWK_P + SAI_SS_SKATE_PERWK_P + SAI_SS_DANCE_PERWK_P + SAI_SS_SURF_PERWK_P + SAI_SS_TENNIS_PERWK_P + SAI_SS_RUN_PERWK_P + SAI_SS_MMA_PERWK_P + SAI_SS_YOGA_PERWK_P) %>%
  mutate(phys_team_daypweek_sum = SAI_SS_BASE_PERWK_P + SAI_SS_BASKET_PERWK_P + SAI_SS_FHOCK_PERWK_P + SAI_SS_FBALL_PERWK_P + SAI_SS_IHOCK_PERWK_P + SAI_SS_POLO_PERWK_P + SAI_SS_LAX_PERWK_P + SAI_SS_RUGBY_PERWK_P + SAI_SS_SOC_PERWK_P + SAI_SS_WPOLO_PERWK_P +SAI_SS_VBALL_PERWK_P) %>%
  mutate(art_daypweek_sum = SAI_SS_COLLECT_PERWK_P + SAI_SS_MUSIC_PERWK_P + SAI_SS_ART_PERWK_P + SAI_SS_DRAMA_PERWK_P + SAI_SS_CRAFTS_PERWK_P + SAI_SS_CHESS_PERWK_P) %>%
  mutate(sport_act_all_daypweek_sum = phys_ind_daypweek_sum + phys_team_daypweek_sum + art_daypweek_sum) %>%
  select(   SUBJECTKEY, EVENTNAME, ends_with("_daypweek_sum"))

sport_act_kerlic_sum %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
   select(-SUBJECTKEY, -EVENTNAME, ends_with('_daypweek_sum')) %>%
     skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	4
_______________________
Column type frequency:
numeric	4
________________________
Group variables	None

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p25	p50	p75	p100	hist
phys_ind_daypweek_sum	93	0.99	4.89	5.64	0	4	8	70	▇▁▁▁▁
phys_team_daypweek_sum	112	0.99	8.12	7.60	0	6	13	54	▇▃▁▁▁
art_daypweek_sum	118	0.99	4.96	6.24	0	4	8	54	▇▁▁▁▁
sport_act_all_daypweek_sum	273	0.98	17.89	13.72	8	16	26	170	▇▁▁▁▁

1.14.3 physical activity

ABCD Youth Risk Behavior Survey Exercise Physical Activity

phyc_act <-as_tibble(read.csv(paste0(dataFold,"ABCD_YRB01_DATA_TABLE.csv"))) %>%
  rename(physc_act_days = PHYSICAL_ACTIVITY1_Y) %>%
  select(SUBJECTKEY, EVENTNAME, physc_act_days)

#During the past 7 days, on how many days were you physically active for a total of at least 60 minutes per day? (Add up all the time you spent in any kind of physical activity that increased your heart rate and made you breathe hard some of the time)

phyc_act %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
   select(-SUBJECTKEY, -EVENTNAME) %>%
   skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	1
_______________________
Column type frequency:
numeric	1
________________________
Group variables	None

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
physc_act_days	28	1	3.49	2.32	0	2	3	5	7	▇▅▇▅▇

1.14.4 BMI and Waist

ABCD Youth Anthropometrics Modified From PhenX values are questionable, even after deleting outliers. We ended up not using them.

anthro <-as_tibble(read.csv(paste0(dataFold,"ABCD_ANT01_DATA_TABLE.csv"))) 

count(anthro,ANTHROWEIGHTCAST)

## # A tibble: 3 × 2
##   ANTHROWEIGHTCAST     n
##              <int> <int>
## 1                0 28185
## 2                1    82
## 3               NA  1417

# remove those (82) with cast as BMI won't be accurate     
bmi_waist <- anthro %>% 
  # filter(ANTHROHEIGHTCALC > 30) %>% # remove those who are unusally short. Potentially error in data entering
  # filter(ANTHROWEIGHTCALC < 500) %>%
  # filter(!rstatix::is_outlier(ANTHROHEIGHTCALC) & !rstatix::is_outlier(ANTHROWEIGHTCALC)) %>%
  mutate(bmi = ifelse(ANTHROWEIGHTCAST == 0 | is.na(ANTHROWEIGHTCAST),
                      (ANTHROWEIGHTCALC/(ANTHROHEIGHTCALC^2))*703,NA)) %>%
  rename(waist = ANTHRO_WAIST_CM) %>%
  select(SUBJECTKEY, EVENTNAME, bmi, waist, ANTHROWEIGHTCALC, ANTHROHEIGHTCALC)
  
# anthro %>% 
#   mutate(bmi = ifelse(ANTHROWEIGHTCAST == 0 | is.na(ANTHROWEIGHTCAST),
#                       (ANTHROWEIGHTCALC/(ANTHROHEIGHTCALC^2))*703,NA)) %>%
#   rename(waist = ANTHRO_WAIST_CM) %>%
#   arrange(desc(bmi)) %>% glimpse()

# bmi_waist %>% arrange(desc(bmi)) %>% glimpse()
# bmi_waist %>% arrange(bmi) %>% glimpse()
# 
# anthro %>% rstatix::identify_outliers(ANTHROHEIGHTCALC) %>%  arrange(ANTHROHEIGHTCALC) %>% View()
# anthro %>% rstatix::identify_outliers(ANTHROHEIGHTCALC) %>%  arrange(desc(ANTHROHEIGHTCALC)) %>% View()

# boxplot(anthro$ANTHROHEIGHTCALC)$out
# boxplot(anthro$ANTHROWEIGHTCALC)$out
# 
# boxplot(bmi_waist$ANTHROWEIGHTCALC)$out

bmi_waist %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
   select(-SUBJECTKEY, -EVENTNAME) %>%
   skimr::skim()

Data summary
Name	Piped data
Number of rows	11878
Number of columns	4
_______________________
Column type frequency:
numeric	4
________________________
Group variables	None

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
bmi	39	1	19.29	36.65	2.08	15.94	17.64	20.65	3017.04	▇▁▁▁▁
waist	17	1	26.48	4.30	0.00	23.50	25.50	28.70	73.00	▁▇▂▁▁
ANTHROWEIGHTCALC	11	1	82.56	23.76	0.00	66.05	76.50	93.00	272.00	▁▇▁▁▁
ANTHROHEIGHTCALC	9	1	55.24	3.33	0.00	53.00	55.10	57.20	82.00	▁▁▁▇▁

1.15 Join data

all_sum_vars <- 
  plyr::join_all(list(MriandSite, ACSselected,sumCog, CBCLPrecomputedSelected,
                      ASRPrecomputedSelected, maniaParent,BISBAS,
                      UPPS,sleepSum,youthScreenSum,
                      momSubstanceUse,adversitySum,bilingualSum,
                      demograpSum,ResidHistDer,NeighboSafety,
                      school_risk_sum,ParentMonitoring,FamilyConflict_sum,prosocial_sum,
                      sport_act_kerlic_sum,phyc_act), 
                 by=c('SUBJECTKEY','EVENTNAME'), type='full') %>%
  filter(visionProb != 1|is.na(visionProb)) %>% #remove subjects with eyesight problems 
  select(-visionProb)


all_sum_vars %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>% select(-1:-2) %>%
   skimr::skim()

Data summary
Name	Piped data
Number of rows	11852
Number of columns	121
_______________________
Column type frequency:
character	2
factor	13
numeric	106
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
SITE_ID_L	90	0.99	6	6	0	22	0
MRI_INFO_DEVICESERIALNUMBER	90	0.99	0	12	201	30	0

Variable type: factor

skim_variable	n_missing	complete_rate	ordered	n_unique	top_counts
SEX	0	1.00	FALSE	2	M: 6190, F: 5662
RACE_ETHNICITY	2	1.00	FALSE	5	Whi: 6175, His: 2403, Bla: 1776, Oth: 1243
REL_FAMILY_ID	0	1.00	FALSE	9834	373: 5, 749: 4, 872: 4, 11: 3
tobacco_before_preg	276	0.98	FALSE	2	0: 9966, 1: 1610
tobacco_after_preg	263	0.98	FALSE	2	0: 10968, 1: 621
alcohol_before_preg	681	0.94	FALSE	2	0: 8294, 1: 2877
alcohol_after_preg	291	0.98	FALSE	2	0: 11245, 1: 316
marijuana_before_preg	337	0.97	FALSE	2	0: 10829, 1: 686
marijuana_after_preg	275	0.98	FALSE	2	0: 11333, 1: 244
deveplopment_prematurity	145	0.99	FALSE	2	0: 9504, 1: 2203
bilingual_status	1028	0.91	FALSE	2	0: 6746, 1: 4078
bilingual_degree	1028	0.91	FALSE	3	0: 6746, 2: 2538, 1: 1540
marital	96	0.99	FALSE	6	mar: 7977, nev: 1455, div: 1078, liv: 685

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
INTERVIEW_AGE	0	1.00	118.98	7.49	107.00	112.00	119.00	126.00	133.00	▇▆▆▆▆
ACS_RAKED_PROPENSITY_SCORE	0	1.00	691.39	351.18	161.36	448.94	619.31	821.83	1778.92	▅▇▂▂▁
NIHTBX_FLANKER_UNCORRECTED	156	0.99	94.00	9.14	51.00	89.00	95.00	100.00	116.00	▁▁▃▇▂
NIHTBX_CARDSORT_UNCORRECTED	155	0.99	92.52	9.51	50.00	88.00	93.00	99.00	120.00	▁▁▆▇▁
NIHTBX_PATTERN_UNCORRECTED	174	0.99	88.06	14.58	30.00	80.00	88.00	99.00	140.00	▁▃▇▅▁
NIHTBX_PICVOCAB_UNCORRECTED	150	0.99	84.46	8.11	29.00	79.00	84.00	90.00	119.00	▁▁▇▇▁
NIHTBX_READING_UNCORRECTED	164	0.99	90.86	6.90	59.00	87.00	91.00	95.00	119.00	▁▁▇▂▁
NIHTBX_PICTURE_UNCORRECTED	162	0.99	102.81	12.08	76.00	94.00	102.00	111.00	136.00	▃▇▇▅▁
PEA_RAVLT_LD_TRIAL_VII_TC	263	0.98	9.18	3.20	0.00	7.00	9.00	12.00	15.00	▁▃▇▇▃
NIHTBX_LIST_UNCORRECTED	199	0.98	96.65	12.09	36.00	90.00	97.00	105.00	136.00	▁▁▅▇▁
LMT_SCR_PERC_CORRECT	336	0.97	0.59	0.17	0.00	0.47	0.56	0.72	1.00	▁▂▇▅▂
PEA_WISCV_TRS	243	0.98	17.91	3.84	0.00	15.00	18.00	20.00	32.00	▁▁▇▅▁
NIHTBX_FLUIDCOMP_UNCORRECTED	238	0.98	91.55	10.65	44.00	85.00	92.00	99.00	131.00	▁▂▇▅▁
NIHTBX_CRYST_UNCORRECTED	182	0.98	86.37	7.06	51.00	82.00	86.00	91.00	115.00	▁▁▇▃▁
NIHTBX_TOTALCOMP_UNCORRECTED	242	0.98	86.23	9.13	44.00	81.00	87.00	92.00	117.00	▁▂▇▇▁
CBCL_SCR_SYN_ANXDEP_R	8	1.00	2.52	3.06	0.00	0.00	1.00	4.00	26.00	▇▁▁▁▁
CBCL_SCR_SYN_WITHDEP_R	8	1.00	1.04	1.71	0.00	0.00	0.00	1.00	15.00	▇▁▁▁▁
CBCL_SCR_SYN_SOMATIC_R	8	1.00	1.49	1.95	0.00	0.00	1.00	2.00	16.00	▇▁▁▁▁
CBCL_SCR_SYN_SOCIAL_R	8	1.00	1.63	2.28	0.00	0.00	1.00	2.00	18.00	▇▁▁▁▁
CBCL_SCR_SYN_THOUGHT_R	8	1.00	1.62	2.20	0.00	0.00	1.00	2.00	18.00	▇▁▁▁▁
CBCL_SCR_SYN_ATTENTION_R	8	1.00	2.98	3.49	0.00	0.00	2.00	5.00	20.00	▇▂▁▁▁
CBCL_SCR_SYN_RULEBREAK_R	8	1.00	1.19	1.86	0.00	0.00	0.00	2.00	20.00	▇▁▁▁▁
CBCL_SCR_SYN_AGGRESSIVE_R	8	1.00	3.26	4.35	0.00	0.00	2.00	5.00	36.00	▇▁▁▁▁
CBCL_SCR_SYN_INTERNAL_R	8	1.00	5.05	5.53	0.00	1.00	3.00	7.00	51.00	▇▁▁▁▁
CBCL_SCR_SYN_EXTERNAL_R	8	1.00	4.46	5.87	0.00	0.00	2.00	6.00	49.00	▇▁▁▁▁
CBCL_SCR_SYN_TOTPROB_R	8	1.00	18.19	17.98	0.00	5.00	13.00	25.00	139.00	▇▂▁▁▁
CBCL_SCR_DSM5_DEPRESS_R	8	1.00	1.27	2.01	0.00	0.00	0.00	2.00	19.00	▇▁▁▁▁
CBCL_SCR_DSM5_ANXDISORD_R	8	1.00	2.06	2.43	0.00	0.00	1.00	3.00	17.00	▇▂▁▁▁
CBCL_SCR_DSM5_SOMATICPR_R	8	1.00	1.08	1.51	0.00	0.00	0.00	2.00	11.00	▇▁▁▁▁
CBCL_SCR_DSM5_ADHD_R	8	1.00	2.62	2.97	0.00	0.00	2.00	4.00	14.00	▇▃▂▁▁
CBCL_SCR_DSM5_OPPOSIT_R	8	1.00	1.77	2.04	0.00	0.00	1.00	3.00	10.00	▇▂▁▁▁
CBCL_SCR_DSM5_CONDUCT_R	8	1.00	1.28	2.36	0.00	0.00	0.00	2.00	25.00	▇▁▁▁▁
CBCL_SCR_07_SCT_R	8	1.00	0.52	1.00	0.00	0.00	0.00	1.00	8.00	▇▁▁▁▁
CBCL_SCR_07_OCD_R	8	1.00	1.34	1.82	0.00	0.00	1.00	2.00	14.00	▇▂▁▁▁
CBCL_SCR_07_STRESS_R	8	1.00	2.90	3.35	0.00	0.00	2.00	4.00	24.00	▇▂▁▁▁
ASR_SCR_PERSTR_T	106	0.99	47.90	9.37	20.00	42.00	49.00	56.00	60.00	▁▂▃▆▇
ASR_SCR_ANXDEP_T	106	0.99	53.46	5.71	50.00	50.00	50.00	55.00	98.00	▇▁▁▁▁
ASR_SCR_WITHDRAWN_T	106	0.99	52.82	5.10	50.00	50.00	51.00	53.00	97.00	▇▁▁▁▁
ASR_SCR_SOMATIC_T	106	0.99	54.78	6.18	50.00	50.00	52.00	57.00	98.00	▇▂▁▁▁
ASR_SCR_THOUGHT_T	106	0.99	52.93	5.09	50.00	50.00	51.00	54.00	95.00	▇▁▁▁▁
ASR_SCR_ATTENTION_T	106	0.99	53.90	5.88	50.00	50.00	51.00	57.00	94.00	▇▂▁▁▁
ASR_SCR_AGGRESSIVE_T	106	0.99	53.35	5.06	50.00	50.00	51.00	55.00	89.00	▇▂▁▁▁
ASR_SCR_RULEBREAK_T	106	0.99	52.56	4.71	50.00	50.00	50.00	52.00	83.00	▇▁▁▁▁
ASR_SCR_INTRUSIVE_T	106	0.99	51.65	3.41	50.00	50.00	50.00	51.00	76.00	▇▁▁▁▁
ASR_SCR_INTERNAL_T	106	0.99	48.13	10.55	30.00	40.00	48.00	55.00	95.00	▆▇▃▁▁
ASR_SCR_EXTERNAL_T	106	0.99	45.96	9.62	30.00	38.00	46.00	52.00	90.00	▇▇▃▁▁
ASR_SCR_TOTPROB_T	106	0.99	43.00	10.23	25.00	36.00	43.00	50.00	89.00	▆▇▃▁▁
ASR_SCR_DEPRESS_T	106	0.99	54.03	5.98	50.00	50.00	51.00	57.00	100.00	▇▁▁▁▁
ASR_SCR_ANXDISORD_T	106	0.99	53.51	5.38	50.00	50.00	51.00	54.00	80.00	▇▁▁▁▁
ASR_SCR_SOMATICPR_T	106	0.99	54.78	6.52	50.00	50.00	51.00	58.00	100.00	▇▂▁▁▁
ASR_SCR_AVOIDANT_T	106	0.99	53.21	5.42	50.00	50.00	51.00	54.00	90.00	▇▁▁▁▁
ASR_SCR_ADHD_T	106	0.99	53.25	5.57	50.00	50.00	51.00	53.00	98.00	▇▁▁▁▁
ASR_SCR_ANTISOCIAL_T	106	0.99	53.01	4.68	50.00	50.00	51.00	54.00	83.00	▇▁▁▁▁
ASR_SCR_INATTENTION_T	106	0.99	54.30	6.51	50.00	50.00	51.00	57.00	90.00	▇▂▁▁▁
ASR_SCR_HYPERACTIVE_T	106	0.99	52.03	4.27	50.00	50.00	50.00	51.00	80.00	▇▁▁▁▁
mania_parent	8	1.00	1.30	2.77	0.00	0.00	0.00	1.00	28.00	▇▁▁▁▁
BISAvg	22	1.00	1.38	0.71	0.00	0.75	1.25	2.00	3.00	▃▅▇▃▂
BASRRAvg	23	1.00	2.20	0.62	0.00	1.75	2.25	2.75	3.00	▁▁▅▆▇
BASDriveAvg	23	1.00	1.04	0.77	0.00	0.50	1.00	1.50	3.00	▇▆▅▂▂
BASFunAvg	23	1.00	1.43	0.66	0.00	1.00	1.50	1.75	3.00	▂▅▇▃▂
BASAllAvg	23	1.00	1.55	0.54	0.00	1.17	1.50	1.92	3.00	▁▅▇▅▂
UPPS_Y_SS_NEGATIVE_URGENCY	23	1.00	8.49	2.65	4.00	7.00	8.00	10.00	16.00	▆▇▇▂▁
UPPS_Y_SS_LACK_OF_PLANNING	23	1.00	7.74	2.38	4.00	6.00	8.00	9.00	16.00	▆▇▅▁▁
UPPS_Y_SS_SENSATION_SEEKING	23	1.00	9.77	2.68	4.00	8.00	10.00	12.00	16.00	▂▅▇▃▂
UPPS_Y_SS_POSITIVE_URGENCY	23	1.00	7.99	2.96	4.00	6.00	8.00	10.00	16.00	▇▆▆▂▁
UPPS_Y_SS_LACK_OF_PERSEVERANCE	23	1.00	7.04	2.25	4.00	5.00	7.00	8.00	16.00	▇▆▃▁▁
sleep_hours	5	1.00	1.72	0.81	1.00	1.00	2.00	2.00	5.00	▇▆▂▁▁
sleep_disturb	5	1.00	1.93	0.98	1.00	1.00	2.00	2.00	5.00	▇▇▂▁▁
sleep_initiate_maintain	5	1.00	11.75	3.75	7.00	9.00	11.00	13.00	35.00	▇▃▁▁▁
sleep_breath	5	1.00	3.77	1.25	3.00	3.00	3.00	4.00	15.00	▇▁▁▁▁
sleep_arousal	5	1.00	3.44	0.92	3.00	3.00	3.00	4.00	15.00	▇▁▁▁▁
sleep_transition	32	1.00	8.18	2.63	6.00	6.00	7.00	9.00	30.00	▇▁▁▁▁
sleep_somnolence	6	1.00	6.95	2.44	5.00	5.00	6.00	8.00	25.00	▇▁▁▁▁
sleep_hyperhydrosis	5	1.00	2.44	1.18	2.00	2.00	2.00	2.00	10.00	▇▁▁▁▁
sleep_total	33	1.00	36.53	8.24	26.00	31.00	35.00	40.00	126.00	▇▁▁▁▁
matureGames_Screen	20	1.00	0.57	0.87	0.00	0.00	0.00	1.00	3.00	▇▃▁▁▁
matureMovies_Screen	21	1.00	0.38	0.64	0.00	0.00	0.00	1.00	3.00	▇▃▁▁▁
wkdySum_Screen	37	1.00	3.46	3.10	0.00	1.25	2.50	4.75	24.00	▇▂▁▁▁
wkndSum_Screen	42	1.00	4.62	3.63	0.00	2.00	3.50	6.25	24.00	▇▃▁▁▁
deveplopment_birth_complications	761	0.94	0.37	0.74	0.00	0.00	0.00	1.00	8.00	▇▁▁▁▁
deveplopment_pregnancy_complications	744	0.94	0.61	1.02	0.00	0.00	0.00	1.00	12.00	▇▁▁▁▁
bilingual_use	1028	0.91	1.01	1.69	0.00	0.00	0.00	1.00	9.00	▇▂▁▁▁
education1stPar	17	1.00	16.60	2.77	1.00	15.00	18.00	19.00	21.00	▁▁▂▅▇
education2ndPar	2460	0.79	16.38	3.06	0.00	15.00	18.00	18.00	21.00	▁▁▁▅▇
educationAvg	14	1.00	16.38	2.70	3.00	15.00	17.00	18.50	21.00	▁▁▂▇▇
combinedIncome	1015	0.91	7.23	2.42	1.00	6.00	8.00	9.00	10.00	▂▂▃▆▇
householdSize	279	0.98	4.70	1.55	0.00	4.00	4.00	5.00	19.00	▂▇▁▁▁
econ_insecurities_sum	134	0.99	0.47	1.10	0.00	0.00	0.00	0.00	7.00	▇▁▁▁▁
area_deprivation_index	694	0.94	92.71	24.86	0.00	86.64	98.48	108.15	125.75	▁▁▂▇▇
lead_risk	698	0.94	5.10	3.11	0.00	2.00	5.00	8.00	10.00	▇▆▅▅▆
quartic_uniform_crime_reports	694	0.94	12.09	5.79	0.00	9.41	12.28	15.20	24.29	▂▃▇▅▁
neighbo_safety_parent_sum	47	1.00	11.67	2.93	3.00	10.00	12.00	14.00	15.00	▁▁▃▆▇
neighbo_safety_child_sum	24	1.00	4.03	1.10	1.00	3.00	4.00	5.00	5.00	▁▁▃▆▇
sumSchool_environment	27	1.00	19.93	2.83	6.00	18.00	20.00	22.00	24.00	▁▁▂▇▇
sumSchool_involvement	26	1.00	13.06	2.37	4.00	12.00	13.00	15.00	16.00	▁▁▃▅▇
sumSchool_disengagement	25	1.00	3.74	1.46	2.00	3.00	4.00	5.00	8.00	▇▃▃▁▁
parent_monitor_mean	23	1.00	4.38	0.52	1.00	4.20	4.40	4.80	5.00	▁▁▁▃▇
fam_conflict_parent	12	1.00	2.54	1.96	0.00	1.00	2.00	4.00	9.00	▇▇▅▂▁
fam_conflict_children	27	1.00	2.04	1.95	0.00	0.00	2.00	3.00	9.00	▇▅▂▁▁
prosocial_parent_mean	62	0.99	1.75	0.40	0.00	1.67	2.00	2.00	2.00	▁▁▁▁▇
prosocial_youth_mean	33	1.00	1.68	0.37	0.00	1.33	1.67	2.00	2.00	▁▁▁▂▇
phys_ind_daypweek_sum	93	0.99	4.89	5.64	0.00	0.00	4.00	8.00	70.00	▇▁▁▁▁
phys_team_daypweek_sum	112	0.99	8.13	7.61	0.00	0.00	6.00	13.00	54.00	▇▃▁▁▁
art_daypweek_sum	118	0.99	4.96	6.24	0.00	0.00	4.00	8.00	54.00	▇▁▁▁▁
sport_act_all_daypweek_sum	273	0.98	17.90	13.72	0.00	8.00	16.00	26.00	170.00	▇▁▁▁▁
physc_act_days	28	1.00	3.50	2.32	0.00	2.00	3.00	5.00	7.00	▇▅▇▅▇

1.16 preprocess site

make sure that there are no members from the same family at different sites

all_sum_vars_baseline <- all_sum_vars %>% filter(EVENTNAME =="baseline_year_1_arm_1") 

all_sum_vars_baseline %>% count(SITE_ID_L)

##    SITE_ID_L    n
## 1     site01  401
## 2     site02  554
## 3     site03  629
## 4     site04  747
## 5     site05  375
## 6     site06  580
## 7     site07  339
## 8     site08  350
## 9     site09  431
## 10    site10  732
## 11    site11  454
## 12    site12  595
## 13    site13  721
## 14    site14  599
## 15    site15  441
## 16    site16 1007
## 17    site17  575
## 18    site18  384
## 19    site19  533
## 20    site20  686
## 21    site21  595
## 22    site22   34
## 23      <NA>   90

# check if there are members from the same family at different sites. There are 6 of them.
all_sum_vars_baseline %>%
  drop_na(SITE_ID_L) %>%
  filter(SITE_ID_L != "site22") %>%
  count(REL_FAMILY_ID, SITE_ID_L) %>%
  spread(SITE_ID_L, n, fill = 0) %>%
  select(-REL_FAMILY_ID) %>% 
       as.matrix %>% 
       crossprod

##        site01 site02 site03 site04 site05 site06 site07 site08 site09 site10
## site01    491      0      0      0      0      0      0      0      0      0
## site02      0   1034      0      0      0      0      0      0      0      0
## site03      0      0    751      0      0      0      0      0      0      0
## site04      0      0      0    957      0      0      0      0      0      0
## site05      0      0      0      0    471      0      0      0      0      0
## site06      0      0      0      0      0    686      0      1      0      0
## site07      0      0      0      0      0      0    425      0      0      0
## site08      0      0      0      0      0      1      0    434      0      0
## site09      0      0      0      0      0      0      0      0    479      0
## site10      0      0      0      0      0      0      0      0      0    906
## site11      0      0      0      0      0      0      0      0      0      0
## site12      0      0      0      0      0      0      0      0      0      0
## site13      0      0      0      1      0      0      0      0      0      0
## site14      0      0      0      0      0      0      0      0      0      0
## site15      0      0      0      0      0      0      0      0      0      0
## site16      0      0      0      0      0      0      0      0      0      0
## site17      0      1      0      0      0      0      0      0      0      0
## site18      0      0      0      0      0      0      0      0      0      0
## site19      0      0      0      0      0      1      0      0      0      0
## site20      0      0      0      0      0      0      0      1      0      0
## site21      0      0      0      0      0      0      0      0      0      0
##        site11 site12 site13 site14 site15 site16 site17 site18 site19 site20
## site01      0      0      0      0      0      0      0      0      0      0
## site02      0      0      0      0      0      0      1      0      0      0
## site03      0      0      0      0      0      0      0      0      0      0
## site04      0      0      1      0      0      0      0      0      0      0
## site05      0      0      0      0      0      0      0      0      0      0
## site06      0      0      0      0      0      0      0      0      1      0
## site07      0      0      0      0      0      0      0      0      0      0
## site08      0      0      0      0      0      0      0      0      0      1
## site09      0      0      0      0      0      0      0      0      0      0
## site10      0      0      0      0      0      0      0      0      0      0
## site11    582      0      0      0      0      0      0      0      0      0
## site12      0    727      0      0      0      0      0      0      0      0
## site13      0      0    879      0      0      0      0      0      0      0
## site14      0      0      0   1083      0      1      0      0      0      0
## site15      0      0      0      0    525      0      0      0      0      0
## site16      0      0      0      1      0   1389      0      0      0      0
## site17      0      0      0      0      0      0    693      0      0      0
## site18      0      0      0      0      0      0      0    448      0      0
## site19      0      0      0      0      0      0      0      0    967      0
## site20      0      0      0      0      0      0      0      0      0   1148
## site21      0      0      0      0      0      0      0      0      0      0
##        site21
## site01      0
## site02      0
## site03      0
## site04      0
## site05      0
## site06      0
## site07      0
## site08      0
## site09      0
## site10      0
## site11      0
## site12      0
## site13      0
## site14      0
## site15      0
## site16      0
## site17      0
## site18      0
## site19      0
## site20      0
## site21    719

#below will remove those 6 
all_sum_vars_baseline_no_dup <- all_sum_vars_baseline %>%
  drop_na(SITE_ID_L) %>%
  filter(SITE_ID_L != "site22") %>%
  group_by(REL_FAMILY_ID) %>% 
  nest(SITE_ID_L, .key="SITE_ID_L") %>%
  mutate(dup = ifelse(length(c(unlist(SITE_ID_L)))==1,0,
                      ifelse(length(unique(c(unlist(SITE_ID_L)))) > 1,1,0))) %>%
  unnest(SITE_ID_L) %>%
  ungroup() %>%
  filter(dup != 1)

## Warning: All elements of `...` must be named.
## Did you want `SITE_ID_L = c(SITE_ID_L)`?

1.17 summary of targets and features

cognition NIHTBX_FLANKER_UNCORRECTED,NIHTBX_CARDSORT_UNCORRECTED,NIHTBX_PATTERN_UNCORRECTED,NIHTBX_PICVOCAB_UNCORRECTED,NIHTBX_READING_UNCORRECTED,NIHTBX_PICTURE_UNCORRECTED,PEA_RAVLT_LD_TRIAL_VII_TC,NIHTBX_LIST_UNCORRECTED,LMT_SCR_PERC_CORRECT,PEA_WISCV_TRS,NIHTBX_FLUIDCOMP_UNCORRECTED,NIHTBX_CRYST_UNCORRECTED,NIHTBX_TOTALCOMP_UNCORRECTED

cognition Sum NIHTBX_FLUIDCOMP_UNCORRECTED,NIHTBX_CRYST_UNCORRECTED,NIHTBX_TOTALCOMP_UNCORRECTED

features CBCL_SCR_SYN_ANXDEP_R, CBCL_SCR_SYN_WITHDEP_R, CBCL_SCR_SYN_SOMATIC_R, CBCL_SCR_SYN_SOCIAL_R, CBCL_SCR_SYN_THOUGHT_R, CBCL_SCR_SYN_ATTENTION_R, CBCL_SCR_SYN_RULEBREAK_R, CBCL_SCR_SYN_AGGRESSIVE_R, ASR_SCR_PERSTR_T, ASR_SCR_ANXDEP_T,ASR_SCR_WITHDRAWN_T, ASR_SCR_SOMATIC_T,ASR_SCR_THOUGHT_T, ASR_SCR_ATTENTION_T, ASR_SCR_AGGRESSIVE_T,ASR_SCR_RULEBREAK_T,ASR_SCR_INTRUSIVE_T,mania_parent,BISAvg,BASRRAvg,BASDriveAvg,BASFunAvg,UPPS_Y_SS_NEGATIVE_URGENCY,UPPS_Y_SS_LACK_OF_PLANNING,UPPS_Y_SS_SENSATION_SEEKING,UPPS_Y_SS_POSITIVE_URGENCY,UPPS_Y_SS_LACK_OF_PERSEVERANCE,sleep_hours,sleep_disturb,sleep_initiate_maintain,sleep_breath,sleep_arousal,sleep_transition,sleep_somnolence,sleep_hyperhydrosis,matureGames_Screen,matureMovies_Screen,wkdySum_Screen,wkndSum_Screen,tobacco_before_preg,tobacco_after_preg,alcohol_before_preg,alcohol_after_preg,marijuana_before_preg,marijuana_after_preg,deveplopment_prematurity,deveplopment_birth_complications,deveplopment_pregnancy_complications,SEX,RACE_ETHNICITY,bilingual_use,marital,educationAvg,combinedIncome,householdSize,econ_insecurities_sum,area_deprivation_index,lead_risk,quartic_uniform_crime_reports,neighbo_safety_parent_sum,neighbo_safety_child_sum,sumSchool_environment,sumSchool_involvement,sumSchool_disengagement,parent_monitor_mean,fam_conflict_parent,fam_conflict_children,prosocial_parent_mean,prosocial_youth_mean, phys_ind_daypweek_sum, phys_team_daypweek_sum, art_daypweek_sum, physc_act_days

cognition <-c("NIHTBX_FLANKER_UNCORRECTED","NIHTBX_CARDSORT_UNCORRECTED","NIHTBX_PATTERN_UNCORRECTED",
              "NIHTBX_PICVOCAB_UNCORRECTED","NIHTBX_READING_UNCORRECTED","NIHTBX_PICTURE_UNCORRECTED",
              "PEA_RAVLT_LD_TRIAL_VII_TC","NIHTBX_LIST_UNCORRECTED","LMT_SCR_PERC_CORRECT","PEA_WISCV_TRS")

gmodel <-c("NIHTBX_PICVOCAB_UNCORRECTED", "NIHTBX_READING_UNCORRECTED", 
           "NIHTBX_FLANKER_UNCORRECTED", "NIHTBX_PATTERN_UNCORRECTED", 
           "NIHTBX_PICTURE_UNCORRECTED", "PEA_RAVLT_LD_TRIAL_VII_TC")

cognition_sum <- c("NIHTBX_FLUIDCOMP_UNCORRECTED","NIHTBX_CRYST_UNCORRECTED","NIHTBX_TOTALCOMP_UNCORRECTED")

features <- c("CBCL_SCR_SYN_ANXDEP_R", "CBCL_SCR_SYN_WITHDEP_R", "CBCL_SCR_SYN_SOMATIC_R", "CBCL_SCR_SYN_SOCIAL_R", 
              "CBCL_SCR_SYN_THOUGHT_R", "CBCL_SCR_SYN_ATTENTION_R", "CBCL_SCR_SYN_RULEBREAK_R", "CBCL_SCR_SYN_AGGRESSIVE_R", 
              "ASR_SCR_PERSTR_T", "ASR_SCR_ANXDEP_T","ASR_SCR_WITHDRAWN_T", "ASR_SCR_SOMATIC_T","ASR_SCR_THOUGHT_T", 
              "ASR_SCR_ATTENTION_T","ASR_SCR_AGGRESSIVE_T","ASR_SCR_RULEBREAK_T","ASR_SCR_INTRUSIVE_T",
              "mania_parent",
              "BISAvg","BASRRAvg","BASDriveAvg","BASFunAvg",
              "UPPS_Y_SS_NEGATIVE_URGENCY","UPPS_Y_SS_LACK_OF_PLANNING","UPPS_Y_SS_SENSATION_SEEKING","UPPS_Y_SS_POSITIVE_URGENCY","UPPS_Y_SS_LACK_OF_PERSEVERANCE",
              "sleep_hours","sleep_disturb","sleep_initiate_maintain","sleep_breath","sleep_arousal","sleep_transition","sleep_somnolence","sleep_hyperhydrosis",
              "matureGames_Screen","matureMovies_Screen","wkdySum_Screen","wkndSum_Screen",
              #"tobacco_before_preg",
              "tobacco_after_preg",
              #"alcohol_before_preg",
              "alcohol_after_preg",
              #"marijuana_before_preg",
              "marijuana_after_preg",
              "deveplopment_prematurity","deveplopment_birth_complications","deveplopment_pregnancy_complications",
              "bilingual_use",
              "SEX","RACE_ETHNICITY",
              "marital","educationAvg","combinedIncome","householdSize","econ_insecurities_sum","area_deprivation_index","lead_risk","quartic_uniform_crime_reports","neighbo_safety_parent_sum","neighbo_safety_child_sum",
              "sumSchool_environment","sumSchool_involvement","sumSchool_disengagement",
              "parent_monitor_mean","fam_conflict_parent","fam_conflict_children",
              "prosocial_parent_mean","prosocial_youth_mean","phys_ind_daypweek_sum", "phys_team_daypweek_sum", "art_daypweek_sum", "physc_act_days")

1.18 visualise missing value

the missing column is the visual impairment. We won’t use that anyway look like a lot of missing value for bilingual_use. We assume missing value as zero to not lose data. for other ones we will use kmean imputation for contionus and mode imputation for factor

all_sum_vars_baseline_no_dup %>%
  select(features) %>%
  naniar::vis_miss(warn_large_data = F)

## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(features)` instead of `features` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.

all_sum_vars_baseline_no_dup  %>% 
  select(features) %>%
  DataExplorer::plot_missing()

missing_feature_values <- all_sum_vars_baseline_no_dup  %>% 
  select(features) %>%
  DataExplorer::profile_missing()

# change bilingual na to 0 
all_sum_vars_baseline_no_dup_bilin0 <- all_sum_vars_baseline_no_dup %>%
  tidyr::replace_na(list(bilingual_use = 0))

1.19 need imputation

otherwise more than half of the data will be lost

all_sum_vars_baseline_no_dup_bilin0 %>% nrow()

## [1] 11716

all_sum_vars_baseline_no_dup_bilin0 %>% drop_na() %>% nrow()

## [1] 5633

1.20 cfa on all subjects

NeuroCog2ndOrder <-'
Language =~ NIHTBX_PICVOCAB_UNCORRECTED + NIHTBX_READING_UNCORRECTED 
CognitiveFlexibity =~ NIHTBX_FLANKER_UNCORRECTED + NIHTBX_PATTERN_UNCORRECTED 
MemoryRecall =~ NIHTBX_PICTURE_UNCORRECTED + PEA_RAVLT_LD_TRIAL_VII_TC
g =~ NA*Language + CognitiveFlexibity  + MemoryRecall #estimate the loading of GenAbi -> as opposed to using it as a marker
g ~~ 1*g #need to constrain variance to 1'

all_sum_vars_baseline_no_dup_bilin0_no_na_G<- all_sum_vars_baseline_no_dup_bilin0 %>% drop_na(all_of(gmodel))
#glimpse(all_sum_vars_baseline_no_dup_bilin0_no_na_G)

NeuroCog2ndOrder.Fit <- lavaan::cfa(model = NeuroCog2ndOrder, data = all_sum_vars_baseline_no_dup_bilin0_no_na_G,estimator="MLR")
second_order_output <- lavaan::lavPredict(NeuroCog2ndOrder.Fit, newdata = all_sum_vars_baseline_no_dup_bilin0_no_na_G)

all_sum_vars_baseline_no_dup_bilin0_no_na_G_factor_all_sub <- 
  all_sum_vars_baseline_no_dup_bilin0_no_na_G %>% 
  bind_cols(gfactor = second_order_output[,'g'])

1.21 visualise with correlation heatmap

#all_sum_vars_baseline_no_dup_bilin0_no_na_G_factor_all_sub %>% 
#  select(gfactor, features) %>% dlookr::plot_correlate()

2 Modeling for Socio-Demographic and Psychological Factors

Samples: REL_FAMILY_ID (9856 Levels) SITE_ID_L (need to remove 22nd site. having too few subjects) ALSO make sure about EVENTNAME

Target: Factor analysis of cognition: gfactor

70 Features: CBCL_SCR_SYN_ANXDEP_R CBCL_SCR_SYN_WITHDEP_R CBCL_SCR_SYN_SOMATIC_R CBCL_SCR_SYN_SOCIAL_R CBCL_SCR_SYN_THOUGHT_R CBCL_SCR_SYN_ATTENTION_R CBCL_SCR_SYN_RULEBREAK_R CBCL_SCR_SYN_AGGRESSIVE_R ASR_SCR_PERSTR_T ASR_SCR_ANXDEP_T ASR_SCR_WITHDRAWN_T ASR_SCR_SOMATIC_T ASR_SCR_THOUGHT_T ASR_SCR_ATTENTION_T ASR_SCR_AGGRESSIVE_T ASR_SCR_RULEBREAK_T ASR_SCR_INTRUSIVE_T mania_parent BISAvg BASRRAvg BASDriveAvg BASFunAvg UPPS_Y_SS_NEGATIVE_URGENCY UPPS_Y_SS_LACK_OF_PLANNING UPPS_Y_SS_SENSATION_SEEKING UPPS_Y_SS_POSITIVE_URGENCY UPPS_Y_SS_LACK_OF_PERSEVERANCE sleep_hours sleep_disturb sleep_initiate_maintain sleep_breath sleep_arousal sleep_transition sleep_somnolence sleep_hyperhydrosis matureGames_Screen matureMovies_Screen wkdySum_Screen wkndSum_Screen tobacco_before_preg tobacco_after_preg alcohol_before_preg alcohol_after_preg marijuana_before_preg marijuana_after_preg deveplopment_prematurity deveplopment_birth_complications deveplopment_pregnancy_complications bilingual_use marital educationAvg combinedIncome householdSize econ_insecurities_sum area_deprivation_index lead_risk quartic_uniform_crime_reports neighbo_safety_parent_sum neighbo_safety_child_sum sumSchool_environment sumSchool_involvement sumSchool_disengagement parent_monitor_mean fam_conflict_parent fam_conflict_children prosocial_parent_mean prosocial_youth_mean phys_ind_daypweek_sum phys_team_daypweek_sum art_daypweek_sum physc_act_days

Features by catergories:

Child Mental Health (9): CBCL_SCR_SYN_ANXDEP_R CBCL_SCR_SYN_WITHDEP_R CBCL_SCR_SYN_SOMATIC_R CBCL_SCR_SYN_SOCIAL_R CBCL_SCR_SYN_THOUGHT_R CBCL_SCR_SYN_ATTENTION_R CBCL_SCR_SYN_RULEBREAK_R CBCL_SCR_SYN_AGGRESSIVE_R

Parent Mental Health(10): ASR_SCR_PERSTR_T ASR_SCR_ANXDEP_T ASR_SCR_WITHDRAWN_T ASR_SCR_SOMATIC_T ASR_SCR_THOUGHT_T ASR_SCR_ATTENTION_T ASR_SCR_AGGRESSIVE_T ASR_SCR_RULEBREAK_T ASR_SCR_INTRUSIVE_T mania_parent

Child Personality(9): BISAvg BASRRAvg BASDriveAvg BASFunAvg UPPS_Y_SS_NEGATIVE_URGENCY UPPS_Y_SS_LACK_OF_PLANNING UPPS_Y_SS_SENSATION_SEEKING UPPS_Y_SS_POSITIVE_URGENCY UPPS_Y_SS_LACK_OF_PERSEVERANCE

Child Sleep (8): sleep_hours sleep_disturb sleep_initiate_maintain sleep_breath sleep_arousal sleep_transition sleep_somnolence sleep_hyperhydrosis

Physical Activity (4): phys_ind_daypweek_sum phys_team_daypweek_sum art_daypweek_sum physc_act_days

Child Screen Use (4): matureGames_Screen matureMovies_Screen wkdySum_Screen wkndSum_Screen

Parent Drug Use (6): tobacco_before_preg tobacco_after_preg alcohol_before_preg alcohol_after_preg marijuana_before_preg marijuana_after_preg

Child Developmental Adversity (3): deveplopment_prematurity deveplopment_birth_complications deveplopment_pregnancy_complications

Child Socio-Demographics (15): bilingual_use marital educationAvg combinedIncome householdSize econ_insecurities_sum area_deprivation_index lead_risk quartic_uniform_crime_reports neighbo_safety_parent_sum neighbo_safety_child_sum sumSchool_environment sumSchool_involvement sumSchool_disengagement

Social Interaction (5): parent_monitor_mean fam_conflict_parent fam_conflict_children prosocial_parent_mean prosocial_youth_mean

2.1 set up parallel processing

require(doParallel)
cores <- parallel::detectCores(logical = FALSE)
registerDoParallel(cores = cores - 1)

2.2 set up formular

gfactor_all_features_formular <- as.formula(
  paste("gfactor", 
        paste(features, collapse = " + "), 
        sep = " ~ "))
print(gfactor_all_features_formular)

## gfactor ~ CBCL_SCR_SYN_ANXDEP_R + CBCL_SCR_SYN_WITHDEP_R + CBCL_SCR_SYN_SOMATIC_R + 
##     CBCL_SCR_SYN_SOCIAL_R + CBCL_SCR_SYN_THOUGHT_R + CBCL_SCR_SYN_ATTENTION_R + 
##     CBCL_SCR_SYN_RULEBREAK_R + CBCL_SCR_SYN_AGGRESSIVE_R + ASR_SCR_PERSTR_T + 
##     ASR_SCR_ANXDEP_T + ASR_SCR_WITHDRAWN_T + ASR_SCR_SOMATIC_T + 
##     ASR_SCR_THOUGHT_T + ASR_SCR_ATTENTION_T + ASR_SCR_AGGRESSIVE_T + 
##     ASR_SCR_RULEBREAK_T + ASR_SCR_INTRUSIVE_T + mania_parent + 
##     BISAvg + BASRRAvg + BASDriveAvg + BASFunAvg + UPPS_Y_SS_NEGATIVE_URGENCY + 
##     UPPS_Y_SS_LACK_OF_PLANNING + UPPS_Y_SS_SENSATION_SEEKING + 
##     UPPS_Y_SS_POSITIVE_URGENCY + UPPS_Y_SS_LACK_OF_PERSEVERANCE + 
##     sleep_hours + sleep_disturb + sleep_initiate_maintain + sleep_breath + 
##     sleep_arousal + sleep_transition + sleep_somnolence + sleep_hyperhydrosis + 
##     matureGames_Screen + matureMovies_Screen + wkdySum_Screen + 
##     wkndSum_Screen + tobacco_after_preg + alcohol_after_preg + 
##     marijuana_after_preg + deveplopment_prematurity + deveplopment_birth_complications + 
##     deveplopment_pregnancy_complications + bilingual_use + SEX + 
##     RACE_ETHNICITY + marital + educationAvg + combinedIncome + 
##     householdSize + econ_insecurities_sum + area_deprivation_index + 
##     lead_risk + quartic_uniform_crime_reports + neighbo_safety_parent_sum + 
##     neighbo_safety_child_sum + sumSchool_environment + sumSchool_involvement + 
##     sumSchool_disengagement + parent_monitor_mean + fam_conflict_parent + 
##     fam_conflict_children + prosocial_parent_mean + prosocial_youth_mean + 
##     phys_ind_daypweek_sum + phys_team_daypweek_sum + art_daypweek_sum + 
##     physc_act_days

2.3 creat a list of sites

site_col <- all_sum_vars_baseline_no_dup_bilin0  %>%
  distinct(SITE_ID_L) %>% 
  arrange(SITE_ID_L) 

site_list <- as.list(site_col$SITE_ID_L)

site_char <- as.character(unlist(site_col$SITE_ID_L))

2.4 test cfa

NeuroCog2ndOrder <-'
Language =~ NIHTBX_PICVOCAB_UNCORRECTED + NIHTBX_READING_UNCORRECTED 
CognitiveFlexibity =~ NIHTBX_FLANKER_UNCORRECTED + NIHTBX_PATTERN_UNCORRECTED 
MemoryRecall =~ NIHTBX_PICTURE_UNCORRECTED + PEA_RAVLT_LD_TRIAL_VII_TC
g =~ NA*Language + CognitiveFlexibity  + MemoryRecall #estimate the loading of GenAbi -> as opposed to using it as a marker
g ~~ 1*g #need to constrain variance to 1'

NeuroCog2ndOrder.Fit <- lavaan::cfa(model = NeuroCog2ndOrder, data = all_sum_vars_baseline_no_dup_bilin0_no_na_G,estimator="MLR")
second_order_output <- lavaan::lavPredict(NeuroCog2ndOrder.Fit, newdata = all_sum_vars_baseline_no_dup_bilin0_no_na_G)

all_sum_vars_baseline_no_dup_bilin0_no_na_G_factor_all_sub <- 
  all_sum_vars_baseline_no_dup_bilin0_no_na_G %>% 
  bind_cols(gfactor = second_order_output[,'g'])

2.5 create a list of splits based on sites

split_func  <- function(site)
{ 
  train_indices <- which(all_sum_vars_baseline_no_dup_bilin0_no_na_G$SITE_ID_L != site)
  test_indices <- which(all_sum_vars_baseline_no_dup_bilin0_no_na_G$SITE_ID_L == site)
  
  NeuroCog2ndOrder.Fit <- lavaan::cfa(model = NeuroCog2ndOrder, 
                                      data = all_sum_vars_baseline_no_dup_bilin0_no_na_G[train_indices,],
                                      estimator="MLR")
  
  second_order_output_train <- lavaan::lavPredict(NeuroCog2ndOrder.Fit, 
                                                  newdata = all_sum_vars_baseline_no_dup_bilin0_no_na_G[train_indices,])
  
  second_order_output_test <- lavaan::lavPredict(NeuroCog2ndOrder.Fit, 
                                                  newdata = all_sum_vars_baseline_no_dup_bilin0_no_na_G[test_indices,])
  
  all_sum_vars_baseline_no_dup_bilin0_no_na_G$gfactor <- NA
  all_sum_vars_baseline_no_dup_bilin0_no_na_G$gfactor[train_indices] <- second_order_output_train[,'g']
  all_sum_vars_baseline_no_dup_bilin0_no_na_G$gfactor[test_indices] <- second_order_output_test[,'g']
  
  indices <-
  list(analysis   = train_indices, 
       assessment = test_indices)
split <- make_splits(indices, all_sum_vars_baseline_no_dup_bilin0_no_na_G)
return(split)}

split_list <- map(site_list, ~split_func(.x))

2.6 create a list of recipe based on sites

note step_naomit causes error, so I decided to drop na from the target var prior to this step

recipe_func  <- function(split) {
  preprocessing_recipe <- recipe(gfactor_all_features_formular, 
                                 data =  training(split)) %>%
      # Impute missing for categorical
      step_impute_mode(all_nominal(), -all_outcomes()) %>%
       # change all nominal into dummy variables
      step_dummy(all_nominal(), -all_outcomes()) %>%
      # normalize numeric predictors and outcome
      step_normalize(all_numeric(), all_nominal()) %>%
     # Impute missing for numeric
      step_impute_knn(all_numeric(), -all_outcomes(),
                     neighbors = 5) 
      # %>%
      # # remove na from outcome
      # step_naomit(all_outcomes())
return(preprocessing_recipe)}
recipe_list <- map(split_list, ~recipe_func(.x))

2.7 create a list of workflow, bestmodel and best hyperpara based on sites

require(doParallel)
cores <- parallel::detectCores(logical = FALSE)
registerDoParallel(cores = cores - 1)

elastic_net_tuning_func  <- function(split, preprocessing_recipe) {
  
# create fold for hyperparamer tuning  
  set.seed(123)
  tuning_cv_folds <- training(split) %>%
      vfold_cv(v = 10)
  tuning_cv_folds
  
  #use parsnip to build models
  elastic_net_model <- linear_reg(
  penalty = tune(), 
  mixture = tune()) %>%
  set_engine("glmnet") %>%
  set_mode("regression") 
  
  ## add recipe and model into the workflow
  elastic_net_wfl <- workflow() %>%
  add_recipe(preprocessing_recipe) %>%
  add_model(elastic_net_model)

  #dial to tune parameter
  elastic_net_set <- parameters(
  penalty(range = c(-10,1), trans = log10_trans()),
  mixture())
## 200 levels of lambda and 11 levels of alpha (0, .1 to 1) 
  elastic_net_grid <- grid_regular(elastic_net_set, 
                                   levels = c(200,11))
  
  elastic_net_ctrl <- control_grid(save_pred = TRUE, 
                                   verbose = TRUE,
#                                   parallel_over = "everything"
                                   parallel_over = "resamples"
)
  
  elastic_net_tune <- tune_grid(elastic_net_wfl,
              resamples = tuning_cv_folds,
              grid = elastic_net_grid,
              metrics = metric_set(mae),
              control = elastic_net_ctrl)
  
  ##select the best tuned lambda and alpha
  best_elastic_net_model <- select_best(elastic_net_tune, metric = "mae")
  ##show best hyperparameter <- 
  best_elastic_net_hyperpara <- show_best(elastic_net_tune, metric = "mae")
  
  ## finalise workflow
  elastic_net_wfl_final <- 
  elastic_net_wfl %>%
  finalize_workflow(best_elastic_net_model)
  
  #extract model values
  elastic_net_model_val <- elastic_net_wfl_final %>%
  fit(training(split)) %>%
  pull_workflow_fit()
  
  return(list(elastic_net_wfl_final = elastic_net_wfl_final, 
              best_elastic_net_model = best_elastic_net_model,
              best_elastic_net_hyperpara = best_elastic_net_hyperpara,
              elastic_net_model_val = elastic_net_model_val))
}

start_time <- Sys.time()
elastic_net_tuned_list <- map2(split_list,
                               recipe_list, 
                               ~elastic_net_tuning_func(split = .x,
                                                        preprocessing_recipe = .y))
stop_time <- Sys.time()

stop_time - start_time 

elastic_net_wfl_final_list <- map(elastic_net_tuned_list, "elastic_net_wfl_final")
best_elastic_net_model_list <- map(elastic_net_tuned_list, "best_elastic_net_model")
best_elastic_net_hyperpara_list <- map(elastic_net_tuned_list, "best_elastic_net_hyperpara")
elastic_net_model_val_list <- map(elastic_net_tuned_list, "elastic_net_model_val")

2.8 apply best models to the hold-out sites

elastic_net_fit_func <- function(split, preprocessing_recipe, elastic_net_wfl_final){
  elastic_net_final_fit <- 
    elastic_net_wfl_final %>%
    last_fit(split = split,
             metrics = metric_set(rsq_trad, mae, rmse))
  
  elastic_net_final_metrics <- elastic_net_final_fit %>% collect_metrics()
  
  predicted_df <- elastic_net_final_fit %>% 
    collect_predictions() %>% 
    select(.pred) %>%
    rename(non_brain_predicted = .pred) %>% 
    bind_cols(testing(split))  
  
   predicted_df_baked <- elastic_net_final_fit %>% 
    collect_predictions() %>% 
    select(.pred) %>%
    rename(non_brain_predicted = .pred) %>% 
    bind_cols(preprocessing_recipe %>%
      prep(training = training(split)) %>%
      bake(new_data = testing(split)))  
  
  return(list(elastic_net_final_fit = elastic_net_final_fit,
              elastic_net_final_metrics = elastic_net_final_metrics,
              predicted_df = predicted_df,
              predicted_df_baked = predicted_df_baked))}
 
#  future::plan("multicore", workers = cores - 1)
#  Forked processing ('multicore') is disabled in future (>= 1.13.0) when running R from RStudio, because it is considered unstable.
  future::plan("multisession", 
               workers = cores - 1)
 
  input_list <- list(split = split_list,
                    preprocessing_recipe = recipe_list,
                    elastic_net_wfl_final = elastic_net_wfl_final_list)
  
  elastic_net_fit_list <- furrr::future_pmap(input_list,
                             ~elastic_net_fit_func(split = ..1,
                                                  preprocessing_recipe =  ..2,
                                                  elastic_net_wfl_final = ..3),
                             .options = furrr::furrr_options(seed = 123))
  
elastic_net_final_fit_list <- map(elastic_net_fit_list, "elastic_net_final_fit")
elastic_net_final_metrics_list <- map(elastic_net_fit_list, "elastic_net_final_metrics")
predicted_df_list <- map(elastic_net_fit_list, "predicted_df")
predicted_df_baked_list <- map(elastic_net_fit_list, "predicted_df_baked")

2.9 create dataframes of the modeling output

names(best_elastic_net_model_list) <- site_char
best_elastic_net_model_all_sites <- bind_rows(best_elastic_net_model_list, 
                                              .id = "SITE_ID_L")

names(elastic_net_final_metrics_list) <- site_char
elastic_net_final_metrics_all_sites <- bind_rows(elastic_net_final_metrics_list, 
                                                 .id = "SITE_ID_L")

names(predicted_df_list) <- site_char
predicted_df_all_sites <- bind_rows(predicted_df_list, 
                                    .id = "SITE_ID_L")

names(predicted_df_baked_list) <- site_char
predicted_df_baked_all_sites <- bind_rows(predicted_df_baked_list, 
                                    .id = "SITE_ID_L")

glimpse(best_elastic_net_model_all_sites)

## Rows: 21
## Columns: 4
## $ SITE_ID_L <chr> "site01", "site02", "site03", "site04", "site05", "site06", …
## $ penalty   <dbl> 0.019563983, 0.022219469, 0.019563983, 0.019563983, 0.013354…
## $ mixture   <dbl> 0.1, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 1.0, 0.1, 0.1, 0.1, 0.1, …
## $ .config   <chr> "Preprocessor1_Model0351", "Preprocessor1_Model0352", "Prepr…

glimpse(elastic_net_final_metrics_all_sites)

## Rows: 63
## Columns: 5
## $ SITE_ID_L  <chr> "site01", "site01", "site01", "site02", "site02", "site02",…
## $ .metric    <chr> "rsq_trad", "mae", "rmse", "rsq_trad", "mae", "rmse", "rsq_…
## $ .estimator <chr> "standard", "standard", "standard", "standard", "standard",…
## $ .estimate  <dbl> 0.3214087, 0.7187623, 0.9158668, 0.2323321, 0.5805358, 0.74…
## $ .config    <chr> "Preprocessor1_Model1", "Preprocessor1_Model1", "Preprocess…

glimpse(predicted_df_all_sites)

## Rows: 11,278
## Columns: 126
## $ non_brain_predicted                  <dbl> -0.01874527, -1.66706903, -0.6442…
## $ SUBJECTKEY                           <chr> "NDAR_INVCL18941F", "NDAR_INV4BAP…
## $ EVENTNAME                            <chr> "baseline_year_1_arm_1", "baselin…
## $ MRI_INFO_DEVICESERIALNUMBER          <chr> "HASH6b4422a7", "HASH6b4422a7", "…
## $ SEX                                  <fct> F, M, F, M, F, F, F, M, F, M, F, …
## $ INTERVIEW_AGE                        <dbl> 118, 118, 119, 114, 117, 131, 120…
## $ RACE_ETHNICITY                       <fct> Hispanic, Hispanic, Hispanic, His…
## $ REL_FAMILY_ID                        <fct> 9522, 9387, 9406, 9689, 9449, 947…
## $ ACS_RAKED_PROPENSITY_SCORE           <dbl> 306.1648, 626.4339, 684.5424, 668…
## $ NIHTBX_FLANKER_UNCORRECTED           <dbl> 102, 102, 92, 94, 98, 106, 100, 1…
## $ NIHTBX_CARDSORT_UNCORRECTED          <dbl> 86, 98, 90, 81, 99, 109, 106, 95,…
## $ NIHTBX_PATTERN_UNCORRECTED           <dbl> 97, 65, 101, 82, 86, 103, 101, 10…
## $ NIHTBX_PICVOCAB_UNCORRECTED          <dbl> 87, 88, 74, 80, 78, 81, 92, 74, 7…
## $ NIHTBX_READING_UNCORRECTED           <dbl> 92, 90, 86, 76, 88, 102, 99, 87, …
## $ NIHTBX_PICTURE_UNCORRECTED           <dbl> 104, 91, 97, 101, 110, 106, 131, …
## $ PEA_RAVLT_LD_TRIAL_VII_TC            <int> 7, 10, 5, 10, 14, 13, 12, 6, 7, 1…
## $ NIHTBX_LIST_UNCORRECTED              <dbl> 101, 97, 86, 90, 97, 113, 120, 78…
## $ LMT_SCR_PERC_CORRECT                 <dbl> 0.50000, 0.43750, 0.34375, 0.4687…
## $ PEA_WISCV_TRS                        <int> 19, 19, 16, 16, 17, 17, 27, 12, 1…
## $ NIHTBX_FLUIDCOMP_UNCORRECTED         <dbl> 96, 86, 89, 84, 96, 109, 115, 90,…
## $ NIHTBX_CRYST_UNCORRECTED             <dbl> 89, 88, 78, 76, 82, 90, 95, 79, 7…
## $ NIHTBX_TOTALCOMP_UNCORRECTED         <dbl> 90, 84, 80, 76, 86, 99, 105, 81, …
## $ CBCL_SCR_SYN_ANXDEP_R                <dbl> 15, 8, 1, NA, 3, 0, 0, 0, 3, 0, 1…
## $ CBCL_SCR_SYN_WITHDEP_R               <dbl> 5, 4, 1, NA, 0, 0, 0, 1, 1, 0, 1,…
## $ CBCL_SCR_SYN_SOMATIC_R               <dbl> 8, 2, 0, NA, 2, 2, 0, 2, 1, 0, 1,…
## $ CBCL_SCR_SYN_SOCIAL_R                <dbl> 9, 7, 0, NA, 2, 0, 0, 0, 2, 0, 3,…
## $ CBCL_SCR_SYN_THOUGHT_R               <dbl> 8, 4, 0, NA, 2, 1, 0, 0, 1, 1, 0,…
## $ CBCL_SCR_SYN_ATTENTION_R             <dbl> 14, 10, 0, NA, 0, 4, 0, 0, 4, 1, …
## $ CBCL_SCR_SYN_RULEBREAK_R             <dbl> 9, 5, 0, NA, 0, 2, 0, 0, 0, 0, 3,…
## $ CBCL_SCR_SYN_AGGRESSIVE_R            <dbl> 15, 10, 0, NA, 2, 7, 0, 0, 0, 0, …
## $ CBCL_SCR_SYN_INTERNAL_R              <dbl> 28, 14, 2, NA, 5, 2, 0, 3, 5, 0, …
## $ CBCL_SCR_SYN_EXTERNAL_R              <dbl> 24, 15, 0, NA, 2, 9, 0, 0, 0, 0, …
## $ CBCL_SCR_SYN_TOTPROB_R               <dbl> 88, 57, 2, NA, 13, 17, 0, 5, 14, …
## $ CBCL_SCR_DSM5_DEPRESS_R              <dbl> 6, 7, 0, NA, 2, 0, 0, 2, 0, 0, 1,…
## $ CBCL_SCR_DSM5_ANXDISORD_R            <dbl> 10, 6, 1, NA, 0, 0, 0, 0, 3, 0, 2…
## $ CBCL_SCR_DSM5_SOMATICPR_R            <dbl> 5, 2, 0, NA, 2, 1, 0, 2, 1, 0, 1,…
## $ CBCL_SCR_DSM5_ADHD_R                 <dbl> 13, 8, 0, NA, 1, 4, 0, 0, 2, 0, 4…
## $ CBCL_SCR_DSM5_OPPOSIT_R              <dbl> 6, 5, 0, NA, 2, 6, 0, 0, 0, 0, 1,…
## $ CBCL_SCR_DSM5_CONDUCT_R              <dbl> 11, 5, 0, NA, 0, 2, 0, 0, 0, 0, 1…
## $ CBCL_SCR_07_SCT_R                    <dbl> 2, 1, 0, NA, 0, 0, 0, 0, 0, 0, 1,…
## $ CBCL_SCR_07_OCD_R                    <dbl> 10, 4, 0, NA, 1, 0, 0, 0, 1, 0, 0…
## $ CBCL_SCR_07_STRESS_R                 <dbl> 13, 10, 1, NA, 1, 3, 0, 0, 2, 0, …
## $ ASR_SCR_PERSTR_T                     <dbl> 60, 53, 46, 40, 45, 42, 53, 34, 3…
## $ ASR_SCR_ANXDEP_T                     <dbl> 67, 67, 50, 52, 50, 50, 50, 53, 5…
## $ ASR_SCR_WITHDRAWN_T                  <dbl> 61, 67, 50, 54, 50, 51, 50, 50, 5…
## $ ASR_SCR_SOMATIC_T                    <dbl> 69, 69, 54, 70, 51, 50, 50, 63, 5…
## $ ASR_SCR_THOUGHT_T                    <dbl> 72, 62, 50, 50, 50, 50, 50, 54, 5…
## $ ASR_SCR_ATTENTION_T                  <dbl> 62, 66, 50, 56, 50, 50, 50, 52, 5…
## $ ASR_SCR_AGGRESSIVE_T                 <dbl> 75, 63, 50, 51, 54, 50, 50, 50, 5…
## $ ASR_SCR_RULEBREAK_T                  <dbl> 72, 71, 50, 50, 52, 50, 50, 50, 5…
## $ ASR_SCR_INTRUSIVE_T                  <dbl> 50, 68, 50, 51, 50, 50, 50, 50, 5…
## $ ASR_SCR_INTERNAL_T                   <dbl> 69, 71, 43, 60, 42, 40, 30, 55, 5…
## $ ASR_SCR_EXTERNAL_T                   <dbl> 70, 70, 32, 47, 50, 32, 32, 32, 4…
## $ ASR_SCR_TOTPROB_T                    <dbl> 66, 67, 31, 49, 41, 32, 25, 45, 4…
## $ ASR_SCR_DEPRESS_T                    <dbl> 62, 67, 50, 57, 51, 50, 50, 55, 5…
## $ ASR_SCR_ANXDISORD_T                  <dbl> 75, 73, 50, 55, 51, 50, 50, 63, 5…
## $ ASR_SCR_SOMATICPR_T                  <dbl> 70, 70, 58, 70, 51, 50, 50, 63, 5…
## $ ASR_SCR_AVOIDANT_T                   <dbl> 70, 64, 50, 54, 51, 58, 50, 51, 5…
## $ ASR_SCR_ADHD_T                       <dbl> 64, 69, 50, 52, 50, 50, 50, 50, 5…
## $ ASR_SCR_ANTISOCIAL_T                 <dbl> 73, 70, 50, 51, 52, 50, 50, 50, 5…
## $ ASR_SCR_INATTENTION_T                <dbl> 58, 68, 50, 57, 50, 50, 50, 52, 5…
## $ ASR_SCR_HYPERACTIVE_T                <dbl> 66, 67, 50, 50, 50, 50, 50, 50, 5…
## $ mania_parent                         <int> 5, 10, 0, NA, 7, 0, 0, 0, 0, 0, 0…
## $ BISAvg                               <dbl> 2.25, 1.50, 0.50, 0.50, 0.50, 1.7…
## $ BASRRAvg                             <dbl> 2.00, 3.00, 1.75, 0.25, 2.25, 2.2…
## $ BASDriveAvg                          <dbl> 0.50, 2.00, 0.75, 1.25, 0.75, 0.5…
## $ BASFunAvg                            <dbl> 1.75, 2.00, 1.50, 1.00, 0.25, 2.2…
## $ BASAllAvg                            <dbl> 1.4166667, 2.3333333, 1.3333333, …
## $ UPPS_Y_SS_NEGATIVE_URGENCY           <int> 9, 6, 9, 6, 13, 5, 10, 9, 9, 6, 1…
## $ UPPS_Y_SS_LACK_OF_PLANNING           <int> 8, 8, 5, 6, 5, 4, 8, 6, 4, 5, 7, …
## $ UPPS_Y_SS_SENSATION_SEEKING          <int> 11, 10, 8, 10, 13, 8, 13, 13, 9, …
## $ UPPS_Y_SS_POSITIVE_URGENCY           <int> 15, 8, 8, 10, 9, 6, 12, 9, 8, 4, …
## $ UPPS_Y_SS_LACK_OF_PERSEVERANCE       <int> 6, 11, 4, 9, 6, 7, 4, 6, 7, 6, 9,…
## $ sleep_hours                          <int> 2, 2, 3, NA, 4, 2, 2, 3, 3, 3, 1,…
## $ sleep_disturb                        <int> 1, 1, 1, NA, 2, 1, 1, 2, 2, 2, 2,…
## $ sleep_initiate_maintain              <int> 11, 8, 9, NA, 13, 12, 9, 15, 11, …
## $ sleep_breath                         <int> 4, 5, 3, NA, 3, 3, 4, 3, 7, 4, 3,…
## $ sleep_arousal                        <int> 4, 5, 3, NA, 3, 3, 3, 3, 3, 3, 3,…
## $ sleep_transition                     <int> 19, 10, 8, NA, 9, 7, 7, 10, 10, 6…
## $ sleep_somnolence                     <int> 11, 8, 5, NA, 5, 7, 5, 8, 6, 8, 8…
## $ sleep_hyperhydrosis                  <int> 2, 2, 2, NA, 2, 2, 2, 2, 2, 2, 3,…
## $ sleep_total                          <int> 51, 38, 30, NA, 35, 34, 30, 41, 3…
## $ matureGames_Screen                   <int> 0, 1, 0, 1, 1, 0, 0, 2, 0, 2, 1, …
## $ matureMovies_Screen                  <int> 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, …
## $ wkdySum_Screen                       <dbl> 1.75, 3.50, 1.75, 1.50, 2.00, 2.7…
## $ wkndSum_Screen                       <dbl> 2.00, 5.50, 1.75, 2.00, 2.00, 5.5…
## $ tobacco_before_preg                  <fct> 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 1,…
## $ tobacco_after_preg                   <fct> 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0,…
## $ alcohol_before_preg                  <fct> 0, 0, 0, NA, 0, 1, 0, 0, 0, 0, 1,…
## $ alcohol_after_preg                   <fct> 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0,…
## $ marijuana_before_preg                <fct> 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0,…
## $ marijuana_after_preg                 <fct> 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0,…
## $ deveplopment_prematurity             <fct> 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0,…
## $ deveplopment_birth_complications     <dbl> 0, 0, 0, NA, 0, 1, 0, 0, 0, 0, 0,…
## $ deveplopment_pregnancy_complications <dbl> 0, 0, 2, NA, 0, 0, 0, 1, 1, 0, NA…
## $ bilingual_status                     <fct> NA, NA, 1, 1, 1, NA, 1, 1, NA, NA…
## $ bilingual_degree                     <fct> NA, NA, 1, 1, 1, NA, 1, 1, NA, NA…
## $ bilingual_use                        <dbl> 0, 0, 6, 4, 5, 0, 3, 4, 0, 0, 2, …
## $ marital                              <fct> married, livingWithPartner, separ…
## $ education1stPar                      <int> 15, 6, 8, 12, 10, 19, 18, 13, 12,…
## $ education2ndPar                      <int> 15, 3, NA, NA, NA, 18, 15, 13, 15…
## $ educationAvg                         <dbl> 15.0, 4.5, 8.0, 12.0, 10.0, 18.5,…
## $ combinedIncome                       <int> 9, 3, NA, NA, 4, 10, NA, 6, 8, 4,…
## $ householdSize                        <int> 4, 5, NA, 3, 7, 5, 3, 7, 4, 4, 8,…
## $ econ_insecurities_sum                <dbl> 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, …
## $ area_deprivation_index               <dbl> 86.44, 102.27, 0.00, 97.47, 98.32…
## $ lead_risk                            <int> 7, 10, 10, 10, 10, 6, 6, 9, NA, 1…
## $ quartic_uniform_crime_reports        <dbl> 24.28903, 24.28903, 24.28903, 24.…
## $ neighbo_safety_parent_sum            <dbl> 10, 8, 7, 6, 8, 14, 12, 3, 9, 11,…
## $ neighbo_safety_child_sum             <int> 4, 3, 4, 4, 3, 5, 5, 4, 2, 5, 4, …
## $ sumSchool_environment                <dbl> 20, 21, 24, 23, 23, 22, 24, 21, 2…
## $ sumSchool_involvement                <dbl> 15, 13, 16, 16, 16, 15, 12, 13, 1…
## $ sumSchool_disengagement              <dbl> 2, 6, 2, 2, 2, 3, 2, 5, 2, 2, 2, …
## $ parent_monitor_mean                  <dbl> 4.4, 4.2, 4.6, 3.0, 4.8, 5.0, 3.8…
## $ fam_conflict_parent                  <dbl> 0, 4, 1, 6, 3, 2, 3, 3, 1, 0, 4, …
## $ fam_conflict_children                <dbl> 0, 0, 1, 0, 1, 1, 1, 3, 4, 1, 3, …
## $ prosocial_parent_mean                <dbl> 1.666667, 2.000000, 2.000000, NA,…
## $ prosocial_youth_mean                 <dbl> 1.666667, 1.333333, 2.000000, 1.0…
## $ phys_ind_daypweek_sum                <dbl> 10, 0, 0, NA, 4, 0, 9, 0, 4, 0, 5…
## $ phys_team_daypweek_sum               <dbl> 14, 0, 8, NA, 0, 12, 0, 0, 4, 6, …
## $ art_daypweek_sum                     <dbl> 4, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0,…
## $ sport_act_all_daypweek_sum           <dbl> 28, 0, 8, NA, 4, 12, 9, 0, 8, 6, …
## $ physc_act_days                       <int> 2, 1, 1, 3, 0, 7, 5, 7, 0, 1, 0, …
## $ SITE_ID_L                            <chr> "site01", "site01", "site01", "si…
## $ dup                                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ gfactor                              <dbl> 0.19958234, -0.03410110, -0.85882…

glimpse(predicted_df_baked_all_sites)

## Rows: 11,278
## Columns: 80
## $ SITE_ID_L                            <chr> "site01", "site01", "site01", "si…
## $ non_brain_predicted                  <dbl> -0.01874527, -1.66706903, -0.6442…
## $ CBCL_SCR_SYN_ANXDEP_R                <dbl> 4.0469948, 1.7752001, -0.4965946,…
## $ CBCL_SCR_SYN_WITHDEP_R               <dbl> 2.31248684, 1.72912794, -0.020948…
## $ CBCL_SCR_SYN_SOMATIC_R               <dbl> 3.3136168, 0.2523049, -0.7681324,…
## $ CBCL_SCR_SYN_SOCIAL_R                <dbl> 3.2275942, 2.3519174, -0.7129514,…
## $ CBCL_SCR_SYN_THOUGHT_R               <dbl> 2.8676630, 1.0635359, -0.7405912,…
## $ CBCL_SCR_SYN_ATTENTION_R             <dbl> 3.131137152, 1.992712479, -0.8533…
## $ CBCL_SCR_SYN_RULEBREAK_R             <dbl> 4.1567932, 2.0249091, -0.6399461,…
## $ CBCL_SCR_SYN_AGGRESSIVE_R            <dbl> 2.6816012, 1.5377476, -0.7499598,…
## $ ASR_SCR_PERSTR_T                     <dbl> 1.287236908, 0.534889327, -0.2174…
## $ ASR_SCR_ANXDEP_T                     <dbl> 2.34195330, 2.34195330, -0.607354…
## $ ASR_SCR_WITHDRAWN_T                  <dbl> 1.59961503, 2.77322736, -0.552007…
## $ ASR_SCR_SOMATIC_T                    <dbl> 2.2898535, 2.2898535, -0.1289775,…
## $ ASR_SCR_THOUGHT_T                    <dbl> 3.7189976, 1.7655906, -0.5784979,…
## $ ASR_SCR_ATTENTION_T                  <dbl> 1.3590482, 2.0341371, -0.6662184,…
## $ ASR_SCR_AGGRESSIVE_T                 <dbl> 4.24679033, 1.88858580, -0.666135…
## $ ASR_SCR_RULEBREAK_T                  <dbl> 4.1150086, 3.9032372, -0.5439627,…
## $ ASR_SCR_INTRUSIVE_T                  <dbl> -0.4867646, 4.7207486, -0.4867646…
## $ mania_parent                         <dbl> 1.3241907, 3.1194834, -0.4711019,…
## $ BISAvg                               <dbl> 1.2209757, 0.1658362, -1.2410165,…
## $ BASRRAvg                             <dbl> -0.32251573, 1.28784986, -0.72510…
## $ BASDriveAvg                          <dbl> -0.69320511, 1.27454672, -0.36524…
## $ BASFunAvg                            <dbl> 0.4822646, 0.8601754, 0.1043539, …
## $ UPPS_Y_SS_NEGATIVE_URGENCY           <dbl> 0.1875367, -0.9457830, 0.1875367,…
## $ UPPS_Y_SS_LACK_OF_PLANNING           <dbl> 0.1041536, 0.1041536, -1.1588821,…
## $ UPPS_Y_SS_SENSATION_SEEKING          <dbl> 0.44795430, 0.07466475, -0.671914…
## $ UPPS_Y_SS_POSITIVE_URGENCY           <dbl> 2.3587127608, -0.0005867521, -0.0…
## $ UPPS_Y_SS_LACK_OF_PERSEVERANCE       <dbl> -0.4633063, 1.7560254, -1.3510390…
## $ sleep_hours                          <dbl> 0.3579279, 0.3579279, 1.5884740, …
## $ sleep_disturb                        <dbl> -0.95463743, -0.95463743, -0.9546…
## $ sleep_initiate_maintain              <dbl> -0.20835221, -1.00678383, -0.7406…
## $ sleep_breath                         <dbl> 0.18492738, 0.98843794, -0.618583…
## $ sleep_arousal                        <dbl> 0.6055256, 1.6895942, -0.4785430,…
## $ sleep_transition                     <dbl> 4.10587162, 0.68697951, -0.072774…
## $ sleep_somnolence                     <dbl> 1.64482380, 0.42245781, -0.799908…
## $ sleep_hyperhydrosis                  <dbl> -0.3697340, -0.3697340, -0.369734…
## $ matureGames_Screen                   <dbl> -0.6484096, 0.5010773, -0.6484096…
## $ matureMovies_Screen                  <dbl> -0.5908421, -0.5908421, -0.590842…
## $ wkdySum_Screen                       <dbl> -0.55111237, 0.01488049, -0.55111…
## $ wkndSum_Screen                       <dbl> -0.7215559, 0.2427317, -0.7904336…
## $ deveplopment_birth_complications     <dbl> -0.4961850, -0.4961850, -0.496185…
## $ deveplopment_pregnancy_complications <dbl> -0.6000502, -0.6000502, 1.3712680…
## $ bilingual_use                        <dbl> -0.54934102, -0.54934102, 3.20543…
## $ educationAvg                         <dbl> -0.566530119, -4.601676348, -3.25…
## $ combinedIncome                       <dbl> 0.7220577, -1.7937491, -0.9551468…
## $ householdSize                        <dbl> -0.45720752, 0.19578002, 0.065182…
## $ econ_insecurities_sum                <dbl> -0.4251869, 1.4018488, -0.4251869…
## $ area_deprivation_index               <dbl> -0.278164178, 0.367542661, -3.804…
## $ lead_risk                            <dbl> 0.66052285, 1.63507717, 1.6350771…
## $ quartic_uniform_crime_reports        <dbl> 2.33990202, 2.33990202, 2.3399020…
## $ neighbo_safety_parent_sum            <dbl> -0.60584355, -1.29867284, -1.6450…
## $ neighbo_safety_child_sum             <dbl> -0.0339937, -0.9524127, -0.033993…
## $ sumSchool_environment                <dbl> 0.02510678, 0.38090575, 1.4483026…
## $ sumSchool_involvement                <dbl> 0.82448335, -0.02310619, 1.248278…
## $ sumSchool_disengagement              <dbl> -1.1981619, 1.5477542, -1.1981619…
## $ parent_monitor_mean                  <dbl> 0.03098195, -0.35734179, 0.419305…
## $ fam_conflict_parent                  <dbl> -1.2981374, 0.7390790, -0.7888333…
## $ fam_conflict_children                <dbl> -1.04553814, -1.04553814, -0.5348…
## $ prosocial_parent_mean                <dbl> -0.2092621, 0.6193245, 0.6193245,…
## $ prosocial_youth_mean                 <dbl> -0.02414115, -0.92288397, 0.87460…
## $ phys_ind_daypweek_sum                <dbl> 0.91005294, -0.87793090, -0.87793…
## $ phys_team_daypweek_sum               <dbl> 0.76237360, -1.07259390, -0.02404…
## $ art_daypweek_sum                     <dbl> -0.1555468, -0.7995276, -0.799527…
## $ physc_act_days                       <dbl> -0.6571645, -1.0897235, -1.089723…
## $ gfactor                              <dbl> 0.2542287, -0.0434381, -1.0939692…
## $ tobacco_after_preg_X1                <dbl> -0.2383455, -0.2383455, -0.238345…
## $ alcohol_after_preg_X1                <dbl> -0.1663372, -0.1663372, -0.166337…
## $ marijuana_after_preg_X1              <dbl> -0.1473317, -0.1473317, -0.147331…
## $ deveplopment_prematurity_X1          <dbl> -0.4807505, -0.4807505, -0.480750…
## $ SEX_M                                <dbl> -1.0488617, 0.9533272, -1.0488617…
## $ RACE_ETHNICITY_Black                 <dbl> -0.4206907, -0.4206907, -0.420690…
## $ RACE_ETHNICITY_Hispanic              <dbl> 2.0817662, 2.0817662, 2.0817662, …
## $ RACE_ETHNICITY_Asian                 <dbl> -0.1447151, -0.1447151, -0.144715…
## $ RACE_ETHNICITY_Other                 <dbl> -0.3445854, -0.3445854, -0.344585…
## $ marital_widowed                      <dbl> -0.08909534, -0.08909534, -0.0890…
## $ marital_divorced                     <dbl> -0.3175018, -0.3175018, -0.317501…
## $ marital_separated                    <dbl> -0.1992563, -0.1992563, 5.0182014…
## $ marital_neverMarried                 <dbl> -0.3708123, -0.3708123, -0.370812…
## $ marital_livingWithPartner            <dbl> -0.2396301, 4.1727160, -0.2396301…

elastic_net_final_metrics_all_sites %>% 
  group_by(.metric) %>%
  summarise(mean_estimate = mean(.estimate),
            sd_estimate = sd(.estimate))

## # A tibble: 3 × 3
##   .metric  mean_estimate sd_estimate
##   <chr>            <dbl>       <dbl>
## 1 mae              0.653      0.0392
## 2 rmse             0.829      0.0498
## 3 rsq_trad         0.282      0.0747

predicted_df_all_sites %>% 
  group_by(SITE_ID_L) %>%
  dplyr::summarize(cor_non_brain_g = cor(non_brain_predicted,gfactor)) %>%
  ungroup() %>%
  dplyr::summarize(mean_cor = mean(cor_non_brain_g),
                   sd_cor = sd(cor_non_brain_g))

## # A tibble: 1 × 2
##   mean_cor sd_cor
##      <dbl>  <dbl>
## 1    0.534 0.0680

2.10 plot predicted vs observed across sites

predicted_df_all_sites %>%
  ggplot(mapping = aes(x = scale(non_brain_predicted), 
                       y = scale(gfactor))) +
  geom_point(alpha = .3) +
  geom_smooth(method = 'lm') +
  labs(color = "") +
  ylab("observed g-factor (Z)") +
  xlab("out-of-site g-factor (Z) predicted by\nsocio-demographic-psychological factors") +
  theme_classic(base_size = 24)

## `geom_smooth()` using formula 'y ~ x'

2.11 plot top features

elastic_net_model_val_df_list<- map(elastic_net_model_val_list, .f = tidy)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## Loaded glmnet 4.1-3

elastic_net_model_val_df <- tibble(
  term = elastic_net_model_val_df_list[[1]]$term,
  mean_estimate = rowMeans(sapply(elastic_net_model_val_df_list, "[[", "estimate")),
  sd_estimate = matrixStats::rowSds(sapply(elastic_net_model_val_df_list, "[[", "estimate")))

#load cont names used in ASEG 
NonBrainFeaturesRead<-tibble::as_tibble(read.csv(paste0(utilFold,"NonBrainFeaturesRead.csv")))

elastic_net_model_val_df_named <- NonBrainFeaturesRead %>% rename(term = original) %>%
  full_join(elastic_net_model_val_df, by="term") %>% 
  rename(original = term) %>% rename(term = readable)
  
  elastic_net_model_val_df_named  %>%
  filter(term != "(Intercept)") %>%
#  group_by(mean_estimate > 0) %>%
  top_n(5, abs(mean_estimate)) %>%
  ungroup() %>%
  ggplot(aes(fct_reorder(term, mean_estimate), mean_estimate, fill = mean_estimate > 0)) +
  geom_col(alpha = 0.8, show.legend = FALSE) +
  geom_errorbar(aes(ymin=mean_estimate-sd_estimate, ymax=mean_estimate+sd_estimate), width=.2,
                 position=position_dodge(.9))   +
  labs(y="Std Coefficients\nAcross Folds (M ±SD)") +
  coord_flip() +
  theme_minimal(base_size = 17) +
  labs(x = NULL,
  title = "Socio-Demographic\n& Psychological features\nthat Predict G-Factor\nWith |Std Coeffients| ≥ .1")

## top 20

elastic_net_model_val_df_list<- map(elastic_net_model_val_list, .f = tidy)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## Loaded glmnet 4.1-3

elastic_net_model_val_df <- tibble(
  term = elastic_net_model_val_df_list[[1]]$term,
  mean_estimate = rowMeans(sapply(elastic_net_model_val_df_list, "[[", "estimate")),
  sd_estimate = matrixStats::rowSds(sapply(elastic_net_model_val_df_list, "[[", "estimate")))

#load cont names used in ASEG 
NonBrainFeaturesRead<-tibble::as_tibble(read.csv(paste0(utilFold,"NonBrainFeaturesRead.csv")))

elastic_net_model_val_df_named <- NonBrainFeaturesRead %>% rename(term = original) %>%
  full_join(elastic_net_model_val_df, by="term") %>% 
  rename(original = term) %>% rename(term = readable)
  
  elastic_net_model_val_df_named  %>%
  filter(term != "(Intercept)") %>%
#  group_by(mean_estimate > 0) %>%
  top_n(20, abs(mean_estimate)) %>%
  ungroup() %>%
  ggplot(aes(fct_reorder(term, mean_estimate), mean_estimate, fill = mean_estimate > 0)) +
  geom_col(alpha = 0.8, show.legend = FALSE) +
  geom_errorbar(aes(ymin=mean_estimate-sd_estimate, ymax=mean_estimate+sd_estimate), width=.2,
                 position=position_dodge(.9))   +
  labs(y="Coefficients Across Folds (M ±SD)") +
  coord_flip() +
  theme_minimal(base_size = 17) +
  labs(x = NULL,
  title = "Top 20 Socio-Demographic\n& Psychological features\nthat Predict G-Factor")

2.12 load the predicted G-Factor from brain variables

and look at correlations among observed gfactor and predicted gfactor from brain and non-brain variables (collapsed across sites)

brainPredicted <- read_csv("~/OneDrive - University of Otago/ABCD3/Analysis/FeatureExploration/brainData/All_pred_results05_01_2021.csv")

## New names:
## * `` -> ...1

## Rows: 10624 Columns: 10

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): SUBJECTKEY, SITE_ID_L
## dbl (8): ...1, Stacked, Nback, SST, MID, rsmri, smri, DTI

## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#load data and create dummy variable for race and sex
predicted_brain_no_brain_all_sites <- full_join(x = predicted_df_all_sites,
          y = brainPredicted,
          by = c('SUBJECTKEY', 'SITE_ID_L')) %>%
  rename(predicted_G_brain = Stacked) %>%
  rename(predicted_G_non_brain = non_brain_predicted) %>%
  rename(observed_G = gfactor) %>%
  fastDummies::dummy_cols(
    select_columns = c('SEX', 'RACE_ETHNICITY'),
    remove_first_dummy = TRUE) %>%
  mutate(race_non_white = ifelse(RACE_ETHNICITY == "White", 0,1))

#glimpse(predicted_brain_no_brain_all_sites)

predicted_brain_no_brain_all_sites %>%
  select(observed_G,predicted_G_brain,predicted_G_non_brain) %>%
  PerformanceAnalytics::chart.Correlation(histogram=TRUE, pch=19)

2.13 fit the mediation model using soc-dem-psyc as the ind

brain is mediator
g-factor is the dv

set.seed(1234)

model_brain_soc_psyc<-'

observed_G~b*predicted_G_brain+c*predicted_G_non_brain
predicted_G_brain~a*predicted_G_non_brain

#indirect and total effects between
ab:=a*b
total:=ab+c
prop:=ab/total
'

fit_model_brain_soc_psyc<-lavaan::sem(model_brain_soc_psyc,
            data=predicted_brain_no_brain_all_sites,
            test = "bollen.stine",
            se = "bootstrap", bootstrap = 5000)

lavaan::summary(fit_model_brain_soc_psyc, 
                standardized = TRUE, 
                rsquare = TRUE, 
                fit.measures = TRUE)

## lavaan 0.6-9 ended normally after 17 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
##                                                       
##                                                   Used       Total
##   Number of observations                         10628       11278
##                                                                   
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Model Test Baseline Model:
## 
##   Test statistic                              7012.371
##   Degrees of freedom                                 3
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -15257.212
##   Loglikelihood unrestricted model (H1)     -15257.212
##                                                       
##   Akaike (AIC)                               30524.424
##   Bayesian (BIC)                             30560.781
##   Sample-size adjusted Bayesian (BIC)        30544.891
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.000
##   P-value RMSEA <= 0.05                             NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                            Bootstrap
##   Number of requested bootstrap draws             5000
##   Number of successful bootstrap draws            5000
## 
## Regressions:
##                       Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   observed_G ~                                                             
##     prdctd_G_b (b)       0.486    0.015   33.291    0.000    0.486    0.275
##     prdctd_G__ (c)       0.635    0.012   52.839    0.000    0.635    0.460
##   predicted_G_brain ~                                                      
##     prdctd_G__ (a)       0.309    0.007   45.962    0.000    0.309    0.395
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .observed_G        0.371    0.005   67.740    0.000    0.371    0.613
##    .predictd_G_brn    0.163    0.002   84.655    0.000    0.163    0.844
## 
## R-Square:
##                    Estimate
##     observed_G        0.387
##     predictd_G_brn    0.156
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     ab                0.150    0.006   27.208    0.000    0.150    0.109
##     total             0.785    0.012   67.765    0.000    0.785    0.569
##     prop              0.191    0.007   26.753    0.000    0.191    0.191

fit_model_brain_soc_psyc_para <- lavaan::parameterEstimates(fit_model_brain_soc_psyc, 
                           boot.ci.type="bca.simple",
                           level=0.95, 
                           ci=TRUE,
                           standardized = TRUE)

kable(fit_model_brain_soc_psyc_para)

lhs	op	rhs	label	est	se	z	pvalue	ci.lower	ci.upper	std.lv	std.all	std.nox
observed_G	~	predicted_G_brain	b	0.4855600	0.0145851	33.29148	0	0.4571685	0.5142848	0.4855600	0.2747331	0.2747331
observed_G	~	predicted_G_non_brain	c	0.6348281	0.0120144	52.83872	0	0.6120880	0.6585293	0.6348281	0.4601710	0.8163206
predicted_G_brain	~	predicted_G_non_brain	a	0.3086663	0.0067156	45.96233	0	0.2960008	0.3219493	0.3086663	0.3954433	0.7014969
observed_G	~~	observed_G		0.3705897	0.0054708	67.73981	0	0.3597279	0.3812804	0.3705897	0.6127772	0.6127772
predicted_G_brain	~~	predicted_G_brain		0.1633338	0.0019294	84.65457	0	0.1596145	0.1671936	0.1633338	0.8436246	0.8436246
predicted_G_non_brain	~~	predicted_G_non_brain		0.3177730	0.0000000	NA	NA	0.3177730	0.3177730	0.3177730	1.0000000	0.3177730
ab	:=	a*b	ab	0.1498760	0.0055085	27.20826	0	0.1395263	0.1611217	0.1498760	0.1086414	0.1927244
total	:=	ab+c	total	0.7847041	0.0115797	67.76544	0	0.7620716	0.8074552	0.7847041	0.5688123	1.0090450
prop	:=	ab/total	prop	0.1909968	0.0071391	26.75344	0	0.1775028	0.2055419	0.1909968	0.1909968	0.1909968

3 Genetics Data Prep and Modeling

3.1 load genetics data

prsScoreFold = paste0("~/OneDrive - University of Otago/ABCD3/genetics/data-bundle-abcd3.0/abcd-release-3.0_chrall_0.8-mac5-hg19-eur-qc-v9-genotypes/")
GeneticQCfold ="~/OneDrive - University of Otago/ABCD3/ABCD3GenotypeWithoutImputed/genomics_sample03/ABCD_genotype/"
Geneticfold ="~/OneDrive - University of Otago/ABCD3/genetics/data-bundle-abcd3.0-afr/"

batch_info <- read.delim("~/OneDrive - University of Otago/ABCD3/ABCD3GenotypeWithoutImputed/genomics_sample03/ABCD_genotype/ABCD_release3.0_.batch_info.txt") %>% rename(SUBJECTKEY=abcd.id_redcap, batch = Axiom_Plate)

batch_info$SUBJECTKEY[which(batch_info$SUBJECTKEY=="`NDAR_INVF3FYXH1G")] <-"NDAR_INVF3FYXH1G"

eur_bim2unrelated <- read.csv("~/OneDrive - University of Otago/ABCD3/genetics/data-bundle-abcd3.0-afr/abcd-release-3.0_chrall_0.8-mac5-hg19-eur-qc-v9/abcd-release-3.0_chrall_0.8-mac5-hg19-eur-qc-v9_bim2unrelated.keep", header = FALSE) %>%
  mutate(SUBJECTKEY = str_remove_all(V1, "ABCD\tAB0[:digit:]{6}_")) %>% select(SUBJECTKEY) %>%
  mutate(EuNotRelated = 1)

eur_bim2unrelated$SUBJECTKEY[which(eur_bim2unrelated$SUBJECTKEY=="`NDAR_INVF3FYXH1G")] <-"NDAR_INVF3FYXH1G"

SUBJ_QC_BAD <- c("NDAR_INVA7RNTEHU", "NDAR_INVV7NEVHLK")
badImputed <- tibble(SUBJECTKEY = SUBJ_QC_BAD, 
                     SUBJ_QC_BAD = c(1,1))

vision_idx <- tibble::as_tibble(read.csv(paste0(dataFold,"ABCD_SVS01_DATA_TABLE.CSV"))) %>% 
  filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
  mutate(visionProb = ifelse(SNELLEN_VA_Y == 0 | SNELLEN_VA_Y == 1 | VIS_FLG == 2, 1, 0))

PRSQcEuNotRelated <- plyr::join_all(list(batch_info, eur_bim2unrelated, badImputed,vision_idx), by='SUBJECTKEY', type='full') 

PRSQcEuNotRelated %>%count(EuNotRelated==1,batch!=461, is.na(SUBJ_QC_BAD),visionProb !=1 |is.na(visionProb)) %>% knitr::kable()

EuNotRelated == 1	batch != 461	is.na(SUBJ_QC_BAD)	visionProb != 1 \| is.na(visionProb)	n
TRUE	FALSE	TRUE	TRUE	32
TRUE	TRUE	TRUE	FALSE	8
TRUE	TRUE	TRUE	TRUE	4814
NA	FALSE	TRUE	TRUE	50
NA	TRUE	TRUE	FALSE	20
NA	TRUE	TRUE	TRUE	6175
NA	NA	FALSE	TRUE	2
NA	NA	TRUE	FALSE	3
NA	NA	TRUE	TRUE	810

finalParticipantNum <- PRSQcEuNotRelated %>%filter(EuNotRelated==1,batch!=461, is.na(SUBJ_QC_BAD),visionProb !=1 |is.na(visionProb)) %>% nrow() 

leeCognition <- tibble::as_tibble(read.table(paste0(prsScoreFold,"lee-wedow-okbay-2018-cognitive-gwas/abcd-release-3.0_chrall_0.8-mac5-hg19-eur-qc-v9-genotypes-lee-wedow-okbay-2018-cognitive-gwas-profiles.csv"),header = TRUE)) %>% 
  rename(SUBJECTKEY = iid) %>% mutate(SUBJECTKEY = str_remove_all(SUBJECTKEY, "AB[:digit:]{7}_"))
leeCognition$SUBJECTKEY[which(leeCognition$SUBJECTKEY=="`NDAR_INVF3FYXH1G")] <-"NDAR_INVF3FYXH1G"

leeCognitionOri <- leeCognition %>% select(matches('p1|p5')) %>% colnames()
leeCognitionNew <- paste0("leeCognition",leeCognitionOri)
leeCognition.renamed <- leeCognition %>% rename_at(vars(all_of(leeCognitionOri )), ~ leeCognitionNew)

predicted_brain_no_brain_all_sites_leeCog <- plyr::join_all(list(predicted_brain_no_brain_all_sites, leeCognition.renamed, PRSQcEuNotRelated), by='SUBJECTKEY', type='full')

predicted_brain_no_brain_all_sites_leeCog <- predicted_brain_no_brain_all_sites_leeCog[,!duplicated(colnames(predicted_brain_no_brain_all_sites_leeCog))] 
predicted_brain_no_brain_all_sites_leeCog <- tibble::rowid_to_column(predicted_brain_no_brain_all_sites_leeCog, "ID") %>% 
  filter(EuNotRelated==1,batch!=461, is.na(SUBJ_QC_BAD),visionProb !=1 |is.na(visionProb)) %>%
  mutate(SEXnum = ifelse(SEX == "F",1,0))

predicted_brain_no_brain_all_sites_leeCog_scaled <- predicted_brain_no_brain_all_sites_leeCog %>% 
  mutate_at(scale, .vars = vars(c(ends_with("_score")&starts_with("lee"), 
                                  starts_with("pc"),
                                  observed_G,predicted_G_brain)), center = TRUE, scale=TRUE)

predicted_brain_no_brain_all_sites_leeCog_mulimodalscaled <- predicted_brain_no_brain_all_sites_leeCog %>% 
  drop_na(Nback,SST,MID,rsmri,smri,DTI) %>%
  mutate_at(scale, .vars = vars(c(ends_with("_score")&starts_with("lee"), 
                                  starts_with("pc"),
                                  observed_G,
                                  Nback, SST, MID, rsmri, smri, DTI)), center = TRUE, scale=TRUE)

3.2 correlation between PGS of cog performance and observed G across thresholds

calc_pearson_conflint_low <- function(vec_a, vec_b){
  test_res <- cor.test(vec_a, vec_b)
  return(test_res$conf.int[[1]])
}

calc_pearson_conflint_high <- function(vec_a, vec_b){
  test_res <- cor.test(vec_a, vec_b)
  return(test_res$conf.int[[2]])
}

leeCog_ci_low <- predicted_brain_no_brain_all_sites_leeCog %>% 
  select(ends_with("_score")&starts_with("lee"),observed_G) %>%
  corrr::colpair_map(calc_pearson_conflint_low) %>% select(term,observed_G) %>%
  filter(term != "observed_G") %>% rename(observed_G_ci_low = observed_G)

leeCog_ci_high <- predicted_brain_no_brain_all_sites_leeCog %>% 
  select(ends_with("_score")&starts_with("lee"),observed_G) %>%
  corrr::colpair_map(calc_pearson_conflint_high) %>% select(term,observed_G) %>%
  filter(term != "observed_G") %>% rename(observed_G_ci_high = observed_G)

leeCog_corr <- predicted_brain_no_brain_all_sites_leeCog %>% 
  select(ends_with("_score")&starts_with("lee"),observed_G) %>%
  corrr::correlate() %>% select(term,observed_G) %>%
  filter(term != "observed_G")

## 
## Correlation method: 'pearson'
## Missing treated using: 'pairwise.complete.obs'

plyr::join_all(list(leeCog_corr,leeCog_ci_low,leeCog_ci_high), 
               by = 'term', type = "full", match = "all") %>%
  bind_cols(Threshold = c(".1",".01",".001",".0001",".00001",".000001",".0000001",".00000001",".5",".05")) %>%
  arrange(desc(Threshold)) %>%
  ggplot(aes(x = Threshold, y = observed_G)) +
  geom_col(alpha = 0.8, show.legend = FALSE) +
  geom_errorbar(aes(ymin = observed_G_ci_low, ymax = observed_G_ci_high), width = 0.2) +
  labs(y="Pearson Correlation with CI95%") +
  coord_flip() +
  theme_minimal(base_size = 20) +
  labs(x = "PGS Threshold at p<",
  title = "Correlation between the G-Factor\nand PGS of Cogntive Ability")

3.3 plot a scatter between PGS at .01 and G-Factor

predicted_brain_no_brain_all_sites_leeCog %>%
  ggplot(mapping = aes(x = leeCognitionp1e_2_score, 
                       y = scale(observed_G))) +
  geom_point(alpha = .3) +
  geom_smooth(method = 'lm') +
  labs(color = "") +
  ylab("observed g-factor (Z)") +
  xlab("PGS of Cogntive Ability\nat p<.01 PGS Threshold") +
  theme_classic(base_size = 24)

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 204 rows containing non-finite values (stat_smooth).

## Warning: Removed 204 rows containing missing values (geom_point).

cor.test(predicted_brain_no_brain_all_sites_leeCog$leeCognitionp1e_2_score,
         predicted_brain_no_brain_all_sites_leeCog$observed_G)

## 
##  Pearson's product-moment correlation
## 
## data:  predicted_brain_no_brain_all_sites_leeCog$leeCognitionp1e_2_score and predicted_brain_no_brain_all_sites_leeCog$observed_G
## t = 14.528, df = 4609, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1814861 0.2366902
## sample estimates:
##       cor 
## 0.2092549

3.4 fit the mediation model using PGS at .01 as the ind

brain is mediator
g-factor is the dv

## treat as if there is no cluster
set.seed(1234)

leecog_lavaan <-'

observed_G~b*predicted_G_brain+c*leeCognitionp1e_2_score 
predicted_G_brain~a*leeCognitionp1e_2_score 

observed_G~pc1 + pc2 + pc3 + pc4 
predicted_G_brain~pc1 + pc2 + pc3 + pc4  

#indirect and total effects between
ab:=a*b
total:=ab+c
prop:=ab/total

'

fit_leecog_lavaan <- lavaan::sem(leecog_lavaan,
            data = predicted_brain_no_brain_all_sites_leeCog_scaled,
            test = "bollen.stine",
            se = "bootstrap", bootstrap = 5000)

lavaan::summary(fit_leecog_lavaan , 
                standardized = TRUE, 
                rsquare = TRUE, 
                fit.measures = TRUE)

## lavaan 0.6-9 ended normally after 26 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        13
##                                                       
##                                                   Used       Total
##   Number of observations                          4389        4815
##                                                                   
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Model Test Baseline Model:
## 
##   Test statistic                               851.124
##   Degrees of freedom                                11
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -11988.454
##   Loglikelihood unrestricted model (H1)     -11988.454
##                                                       
##   Akaike (AIC)                               24002.908
##   Bayesian (BIC)                             24085.937
##   Sample-size adjusted Bayesian (BIC)        24044.628
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.000
##   P-value RMSEA <= 0.05                             NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                            Bootstrap
##   Number of requested bootstrap draws             5000
##   Number of successful bootstrap draws            5000
## 
## Regressions:
##                       Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   observed_G ~                                                             
##     prdctd_G_b (b)       0.342    0.014   24.851    0.000    0.342    0.345
##     lCgntn1_2_ (c)       0.174    0.014   12.220    0.000    0.174    0.175
##   predicted_G_brain ~                                                      
##     lCgntn1_2_ (a)       0.094    0.015    6.333    0.000    0.094    0.094
##   observed_G ~                                                             
##     pc1                  0.125    0.060    2.093    0.036    0.125    0.124
##     pc2                 -0.247    0.198   -1.250    0.211   -0.247   -0.069
##     pc3                  0.005    0.104    0.045    0.964    0.005    0.005
##     pc4                 -0.020    0.119   -0.170    0.865   -0.020   -0.021
##   predicted_G_brain ~                                                      
##     pc1                  0.008    0.051    0.148    0.882    0.008    0.007
##     pc2                 -0.033    0.169   -0.196    0.845   -0.033   -0.009
##     pc3                 -0.015    0.079   -0.190    0.849   -0.015   -0.016
##     pc4                 -0.005    0.122   -0.040    0.968   -0.005   -0.005
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .observed_G        0.816    0.019   43.411    0.000    0.816    0.831
##    .predictd_G_brn    0.991    0.017   57.384    0.000    0.991    0.991
## 
## R-Square:
##                    Estimate
##     observed_G        0.169
##     predictd_G_brn    0.009
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     ab                0.032    0.005    6.089    0.000    0.032    0.032
##     total             0.207    0.015   13.611    0.000    0.207    0.208
##     prop              0.156    0.024    6.393    0.000    0.156    0.156

fit_leecog_lavaan_para <- lavaan::parameterEstimates(fit_leecog_lavaan, 
                           boot.ci.type="bca.simple",
                           level=0.95, 
                           ci=TRUE,
                           standardized = TRUE)

kable(fit_leecog_lavaan_para)

lhs	op	rhs	label	est	se	z	pvalue	ci.lower	ci.upper	std.lv	std.all	std.nox
observed_G	~	predicted_G_brain	b	0.3421572	0.0137684	24.8509171	0.0000000	0.3154736	0.3689894	0.3421572	0.3453233	0.3453233
observed_G	~	leeCognitionp1e_2_score	c	0.1743477	0.0142668	12.2204832	0.0000000	0.1472803	0.2017802	0.1743477	0.1751125	0.1759811
predicted_G_brain	~	leeCognitionp1e_2_score	a	0.0944815	0.0149186	6.3331215	0.0000000	0.0662443	0.1245356	0.0944815	0.0940259	0.0944923
observed_G	~	pc1		0.1249980	0.0597175	2.0931528	0.0363355	0.0042824	0.2391465	0.1249980	0.1238580	0.1261690
observed_G	~	pc2		-0.2474684	0.1979319	-1.2502706	0.2112007	-0.6549932	0.1144279	-0.2474684	-0.0693871	-0.2497868
observed_G	~	pc3		0.0046927	0.1035110	0.0453348	0.9638404	-0.1527567	0.2852511	0.0046927	0.0049345	0.0047366
observed_G	~	pc4		-0.0202014	0.1187176	-0.1701633	0.8648817	-0.3187973	0.1561772	-0.0202014	-0.0213472	-0.0203906
predicted_G_brain	~	pc1		0.0075282	0.0508248	0.1481203	0.8822479	-0.0877917	0.1121372	0.0075282	0.0073911	0.0075290
predicted_G_brain	~	pc2		-0.0330395	0.1686961	-0.1958523	0.8447257	-0.3785987	0.2795089	-0.0330395	-0.0091789	-0.0330433
predicted_G_brain	~	pc3		-0.0149924	0.0788559	-0.1901241	0.8492119	-0.2187669	0.1473727	-0.0149924	-0.0156206	-0.0149941
predicted_G_brain	~	pc4		-0.0049251	0.1217863	-0.0404406	0.9677419	-0.1393776	0.3776521	-0.0049251	-0.0051567	-0.0049257
observed_G	~~	observed_G		0.8160268	0.0187978	43.4108302	0.0000000	0.7822946	0.8559610	0.8160268	0.8313882	0.8313882
predicted_G_brain	~~	predicted_G_brain		0.9905535	0.0172619	57.3837648	0.0000000	0.9589689	1.0272917	0.9905535	0.9907792	0.9907792
leeCognitionp1e_2_score	~~	leeCognitionp1e_2_score		0.9901527	0.0000000	NA	NA	0.9901527	0.9901527	0.9901527	1.0000000	0.9901527
leeCognitionp1e_2_score	~~	pc1		0.1164586	0.0000000	NA	NA	0.1164586	0.1164586	0.1164586	0.1192200	0.1164586
leeCognitionp1e_2_score	~~	pc2		0.0298778	0.0000000	NA	NA	0.0298778	0.0298778	0.0298778	0.1080907	0.0298778
leeCognitionp1e_2_score	~~	pc3		-0.0334960	0.0000000	NA	NA	-0.0334960	-0.0334960	-0.0334960	-0.0323121	-0.0334960
leeCognitionp1e_2_score	~~	pc4		0.0200220	0.0000000	NA	NA	0.0200220	0.0200220	0.0200220	0.0192197	0.0200220
pc1	~~	pc1		0.9637024	0.0000000	NA	NA	0.9637024	0.9637024	0.9637024	1.0000000	0.9637024
pc1	~~	pc2		0.2553123	0.0000000	NA	NA	0.2553123	0.2553123	0.2553123	0.9362483	0.2553123
pc1	~~	pc3		-0.0813157	0.0000000	NA	NA	-0.0813157	-0.0813157	-0.0813157	-0.0795109	-0.0813157
pc1	~~	pc4		0.0206747	0.0000000	NA	NA	0.0206747	0.0206747	0.0206747	0.0201167	0.0206747
pc2	~~	pc2		0.0771646	0.0000000	NA	NA	0.0771646	0.0771646	0.0771646	1.0000000	0.0771646
pc2	~~	pc3		0.0266052	0.0000000	NA	NA	0.0266052	0.0266052	0.0266052	0.0919352	0.0266052
pc2	~~	pc4		0.0041940	0.0000000	NA	NA	0.0041940	0.0041940	0.0041940	0.0144216	0.0041940
pc3	~~	pc3		1.0853069	0.0000000	NA	NA	1.0853069	1.0853069	1.0853069	1.0000000	1.0853069
pc3	~~	pc4		-0.0169518	0.0000000	NA	NA	-0.0169518	-0.0169518	-0.0169518	-0.0155427	-0.0169518
pc4	~~	pc4		1.0960262	0.0000000	NA	NA	1.0960262	1.0960262	1.0960262	1.0000000	1.0960262
ab	:=	a*b	ab	0.0323275	0.0053094	6.0887242	0.0000000	0.0225296	0.0432952	0.0323275	0.0324693	0.0326304
total	:=	ab+c	total	0.2066753	0.0151844	13.6110637	0.0000000	0.1772344	0.2355815	0.2066753	0.2075818	0.2086115
prop	:=	ab/total	prop	0.1564170	0.0244688	6.3925058	0.0000000	0.1102938	0.2074586	0.1564170	0.1564170	0.1564170

4 final mediation model

Social_Dem_Psy and PGS are the ind brain is mediator
g-factor is the dv

set.seed(1234)

socdem_leecog_lavaan <-'

observed_G~ b*predicted_G_brain + c_socdem*predicted_G_non_brain + c_pgs*leeCognitionp1e_2_score 
predicted_G_brain~a_socdem*predicted_G_non_brain + a_pgs*leeCognitionp1e_2_score 

observed_G~pc1 + pc2 + pc3 + pc4 
predicted_G_brain~pc1 + pc2 + pc3 + pc4  

#indirect and total effects
a_socdem_b:=a_socdem*b
a_pgs_b:=a_pgs*b
total_socdem:=a_socdem*b+c_socdem
total_pgs:=a_pgs*b+c_pgs
prop_socdem :=a_socdem_b/total_socdem
prop_pgs :=a_pgs_b/total_pgs

'

fit_socdem_leecog_lavaan <- lavaan::sem(socdem_leecog_lavaan,
            data = predicted_brain_no_brain_all_sites_leeCog_scaled,
            test = "bollen.stine",
            se = "bootstrap", bootstrap = 5000)

lavaan::summary(fit_socdem_leecog_lavaan , 
                standardized = TRUE, 
                rsquare = TRUE, 
                fit.measures = TRUE)

## lavaan 0.6-9 ended normally after 31 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        15
##                                                       
##                                                   Used       Total
##   Number of observations                          4389        4815
##                                                                   
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Model Test Baseline Model:
## 
##   Test statistic                              1728.306
##   Degrees of freedom                                13
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -11549.863
##   Loglikelihood unrestricted model (H1)     -11549.863
##                                                       
##   Akaike (AIC)                               23129.725
##   Bayesian (BIC)                             23225.528
##   Sample-size adjusted Bayesian (BIC)        23177.864
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.000
##   P-value RMSEA <= 0.05                             NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                            Bootstrap
##   Number of requested bootstrap draws             5000
##   Number of successful bootstrap draws            5000
## 
## Regressions:
##                       Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   observed_G ~                                                             
##     prdc_G_    (b)       0.263    0.013   20.323    0.000    0.263    0.266
##     prd_G__ (c_sc)       0.803    0.032   25.033    0.000    0.803    0.353
##     lCg1_2_ (c_pg)       0.129    0.013    9.812    0.000    0.129    0.130
##   predicted_G_brain ~                                                      
##     prd_G__ (a_sc)       0.539    0.033   16.271    0.000    0.539    0.235
##     lCg1_2_ (a_pg)       0.059    0.015    4.035    0.000    0.059    0.059
##   observed_G ~                                                             
##     pc1                  0.040    0.051    0.787    0.431    0.040    0.040
##     pc2                 -0.087    0.174   -0.503    0.615   -0.087   -0.025
##     pc3                  0.008    0.083    0.097    0.923    0.008    0.008
##     pc4                 -0.010    0.122   -0.083    0.934   -0.010   -0.011
##   predicted_G_brain ~                                                      
##     pc1                 -0.050    0.051   -0.972    0.331   -0.050   -0.049
##     pc2                  0.076    0.170    0.449    0.653    0.076    0.021
##     pc3                 -0.012    0.077   -0.155    0.877   -0.012   -0.012
##     pc4                  0.002    0.113    0.019    0.985    0.002    0.002
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .observed_G        0.706    0.016   45.112    0.000    0.706    0.719
##    .predictd_G_brn    0.938    0.017   55.888    0.000    0.938    0.938
## 
## R-Square:
##                    Estimate
##     observed_G        0.281
##     predictd_G_brn    0.062
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     a_socdem_b        0.142    0.011   12.789    0.000    0.142    0.062
##     a_pgs_b           0.016    0.004    3.916    0.000    0.016    0.016
##     total_socdem      0.945    0.033   28.609    0.000    0.945    0.415
##     total_pgs         0.145    0.014   10.570    0.000    0.145    0.146
##     prop_socdem       0.150    0.012   12.929    0.000    0.150    0.150
##     prop_pgs          0.108    0.027    3.994    0.000    0.108    0.108

socdem_leecog_lavaan_para <- lavaan::parameterEstimates(fit_socdem_leecog_lavaan , 
                           boot.ci.type="bca.simple",
                           level=0.95, 
                           ci=TRUE,
                           standardized = TRUE)

kable(socdem_leecog_lavaan_para)

lhs	op	rhs	label	est	se	z	pvalue	ci.lower	ci.upper	std.lv	std.all	std.nox
observed_G	~	predicted_G_brain	b	0.2631146	0.0129467	20.3228779	0.0000000	0.2367382	0.2876208	0.2631146	0.2655493	0.2655493
observed_G	~	predicted_G_non_brain	c_socdem	0.8027280	0.0320671	25.0327529	0.0000000	0.7416864	0.8663541	0.8027280	0.3525100	0.8102483
observed_G	~	leeCognitionp1e_2_score	c_pgs	0.1293150	0.0131793	9.8120012	0.0000000	0.1041945	0.1556013	0.1293150	0.1298822	0.1305265
predicted_G_brain	~	predicted_G_non_brain	a_socdem	0.5392928	0.0331442	16.2711033	0.0000000	0.4749679	0.6062260	0.5392928	0.2346537	0.5393542
predicted_G_brain	~	leeCognitionp1e_2_score	a_pgs	0.0592101	0.0146736	4.0351396	0.0000546	0.0318080	0.0883781	0.0592101	0.0589246	0.0592169
observed_G	~	pc1		0.0404987	0.0514325	0.7874142	0.4310394	-0.0626762	0.1406440	0.0404987	0.0401294	0.0408781
observed_G	~	pc2		-0.0874500	0.1737491	-0.5033118	0.6147451	-0.4307179	0.2390868	-0.0874500	-0.0245199	-0.0882693
observed_G	~	pc3		0.0080095	0.0825964	0.0969721	0.9227486	-0.1510781	0.2527761	0.0080095	0.0084224	0.0080846
observed_G	~	pc4		-0.0101223	0.1223925	-0.0827035	0.9340873	-0.3333312	0.0936016	-0.0101223	-0.0106964	-0.0102171
predicted_G_brain	~	pc1		-0.0496401	0.0510824	-0.9717650	0.3311675	-0.1439103	0.0582183	-0.0496401	-0.0487364	-0.0496458
predicted_G_brain	~	pc2		0.0762187	0.1697452	0.4490182	0.6534185	-0.2659863	0.4011320	0.0762187	0.0211748	0.0762274
predicted_G_brain	~	pc3		-0.0119679	0.0773923	-0.1546392	0.8771057	-0.2275455	0.1474684	-0.0119679	-0.0124693	-0.0119692
predicted_G_brain	~	pc4		0.0021078	0.1126057	0.0187185	0.9850657	-0.1779045	0.3231063	0.0021078	0.0022069	0.0021081
observed_G	~~	observed_G		0.7056734	0.0156427	45.1121209	0.0000000	0.6769521	0.7380331	0.7056734	0.7189574	0.7189574
predicted_G_brain	~~	predicted_G_brain		0.9379524	0.0167826	55.8884897	0.0000000	0.9072665	0.9736656	0.9379524	0.9381661	0.9381661
predicted_G_non_brain	~~	predicted_G_non_brain		0.1892808	0.0000000	NA	NA	0.1892808	0.1892808	0.1892808	1.0000000	0.1892808
predicted_G_non_brain	~~	leeCognitionp1e_2_score		0.0709779	0.0000000	NA	NA	0.0709779	0.0709779	0.0709779	0.1639528	0.0709779
predicted_G_non_brain	~~	pc1		0.0582364	0.0000000	NA	NA	0.0582364	0.0582364	0.0582364	0.1363546	0.0582364
predicted_G_non_brain	~~	pc2		0.0131816	0.0000000	NA	NA	0.0131816	0.0131816	0.0131816	0.1090704	0.0131816
predicted_G_non_brain	~~	pc3		-0.0220665	0.0000000	NA	NA	-0.0220665	-0.0220665	-0.0220665	-0.0486859	-0.0220665
predicted_G_non_brain	~~	pc4		-0.0115468	0.0000000	NA	NA	-0.0115468	-0.0115468	-0.0115468	-0.0253511	-0.0115468
leeCognitionp1e_2_score	~~	leeCognitionp1e_2_score		0.9901527	0.0000000	NA	NA	0.9901527	0.9901527	0.9901527	1.0000000	0.9901527
leeCognitionp1e_2_score	~~	pc1		0.1164586	0.0000000	NA	NA	0.1164586	0.1164586	0.1164586	0.1192200	0.1164586
leeCognitionp1e_2_score	~~	pc2		0.0298778	0.0000000	NA	NA	0.0298778	0.0298778	0.0298778	0.1080907	0.0298778
leeCognitionp1e_2_score	~~	pc3		-0.0334960	0.0000000	NA	NA	-0.0334960	-0.0334960	-0.0334960	-0.0323121	-0.0334960
leeCognitionp1e_2_score	~~	pc4		0.0200220	0.0000000	NA	NA	0.0200220	0.0200220	0.0200220	0.0192197	0.0200220
pc1	~~	pc1		0.9637024	0.0000000	NA	NA	0.9637024	0.9637024	0.9637024	1.0000000	0.9637024
pc1	~~	pc2		0.2553123	0.0000000	NA	NA	0.2553123	0.2553123	0.2553123	0.9362483	0.2553123
pc1	~~	pc3		-0.0813157	0.0000000	NA	NA	-0.0813157	-0.0813157	-0.0813157	-0.0795109	-0.0813157
pc1	~~	pc4		0.0206747	0.0000000	NA	NA	0.0206747	0.0206747	0.0206747	0.0201167	0.0206747
pc2	~~	pc2		0.0771646	0.0000000	NA	NA	0.0771646	0.0771646	0.0771646	1.0000000	0.0771646
pc2	~~	pc3		0.0266052	0.0000000	NA	NA	0.0266052	0.0266052	0.0266052	0.0919352	0.0266052
pc2	~~	pc4		0.0041940	0.0000000	NA	NA	0.0041940	0.0041940	0.0041940	0.0144216	0.0041940
pc3	~~	pc3		1.0853069	0.0000000	NA	NA	1.0853069	1.0853069	1.0853069	1.0000000	1.0853069
pc3	~~	pc4		-0.0169518	0.0000000	NA	NA	-0.0169518	-0.0169518	-0.0169518	-0.0155427	-0.0169518
pc4	~~	pc4		1.0960262	0.0000000	NA	NA	1.0960262	1.0960262	1.0960262	1.0000000	1.0960262
a_socdem_b	:=	a_socdem*b	a_socdem_b	0.1418958	0.0110950	12.7891718	0.0000000	0.1215286	0.1649183	0.1418958	0.0623121	0.1432251
a_pgs_b	:=	a_pgs*b	a_pgs_b	0.0155791	0.0039780	3.9163226	0.0000899	0.0083469	0.0238579	0.0155791	0.0156474	0.0157250
total_socdem	:=	a_socdem*b+c_socdem	total_socdem	0.9446238	0.0330180	28.6093655	0.0000000	0.8811028	1.0102441	0.9446238	0.4148221	0.9534735
total_pgs	:=	a_pgs*b+c_pgs	total_pgs	0.1448941	0.0137075	10.5704508	0.0000000	0.1187712	0.1723074	0.1448941	0.1455296	0.1462515
prop_socdem	:=	a_socdem_b/total_socdem	prop_socdem	0.1502141	0.0116186	12.9287596	0.0000000	0.1288758	0.1742519	0.1502141	0.1502141	0.1502141
prop_pgs	:=	a_pgs_b/total_pgs	prop_pgs	0.1075203	0.0269231	3.9936053	0.0000651	0.0583395	0.1637417	0.1075203	0.1075203	0.1075203

cor.test(predicted_brain_no_brain_all_sites_leeCog_scaled$predicted_G_non_brain,
         predicted_brain_no_brain_all_sites_leeCog_scaled$leeCognitionp1e_2_score )

## 
##  Pearson's product-moment correlation
## 
## data:  predicted_brain_no_brain_all_sites_leeCog_scaled$predicted_G_non_brain and predicted_brain_no_brain_all_sites_leeCog_scaled$leeCognitionp1e_2_score
## t = 11.202, df = 4609, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1345642 0.1907654
## sample estimates:
##       cor 
## 0.1627969

#save(file = "mediation_gfactor_new_prepocessing_sex_race.RData")

Mediation: Socio-Demographic, Psychological, Genetic and Brain on G-Factor

Narun

Feb/08/2022

1 Socio-Demographic & Psychological Data Preparation

1.1 Reset workspace and load libraries

1.2 Setting up paths

1.3 Family relationship

1.4 site information

1.5 Cognition

1.5.1 Load neuro cognitive measures

1.5.2 summary of NIH toolbox cognition

1.5.3 distribution of other cognitive tasks

1.5.4 sum cognition

1.6 mental health

1.6.1 Parent’s report child’s mental health CBCL

1.6.2 Load precomputed CBCL

1.6.3 Parent’s report parent’s mental health

1.6.4 ohter mental health surveys from parents

1.7 Personality

1.7.1 Load BIS/BAS

1.7.2 histrogram of BIS/BAS

1.8 Physical

1.9 behavioral

1.9.1 screen time

1.9.2 maternal substance use

1.9.3 developmental adversity

1.9.4 brain truma

1.10 culture

1.10.1 bilingual

1.11 social demographics

1.11.1 ABCD Parent Demographics Survey

1.11.2 more Social Demographics from Residential History Derived Scores

1.12 Proximal Environment

1.12.1 ABCD Neighborhood Safety/Crime Survey Modified from PhenX (NSC)

1.12.2 School Risk and Protective Factors Survey

1.13 Social Interaction

1.13.1 ABCD Parental Monitoring Survey

1.13.2 ABCD Family Conflict

1.13.3 Prosocial Tendency

1.14 Parent Sports and Activities

1.14.1 ABCD Sum Scores Parent Sports and Activities Involvement

1.14.2 ABCD Sum Scores Parent Sports and Activities Involvement

1.14.3 physical activity

1.14.4 BMI and Waist

1.15 Join data

1.16 preprocess site

1.17 summary of targets and features

1.18 visualise missing value

1.19 need imputation

1.20 cfa on all subjects

1.21 visualise with correlation heatmap

2 Modeling for Socio-Demographic and Psychological Factors

2.1 set up parallel processing

2.2 set up formular

2.3 creat a list of sites

2.4 test cfa

2.5 create a list of splits based on sites

2.6 create a list of recipe based on sites

2.7 create a list of workflow, bestmodel and best hyperpara based on sites

2.8 apply best models to the hold-out sites

2.9 create dataframes of the modeling output

2.10 plot predicted vs observed across sites

2.11 plot top features

2.12 load the predicted G-Factor from brain variables

2.13 fit the mediation model using soc-dem-psyc as the ind

3 Genetics Data Prep and Modeling

3.1 load genetics data

3.2 correlation between PGS of cog performance and observed G across thresholds

3.3 plot a scatter between PGS at .01 and G-Factor

3.4 fit the mediation model using PGS at .01 as the ind

4 final mediation model