--- title: "Post-Harmonization Downstream Analysis" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Post-Harmonization} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` # Introduction After harmonization, multiple-sites batch effect get controlled (or ideally eliminated). Some studies require further investigation of life span age trend of brain structures or other significant variable effects on brain structures. `ComBatFamQC` provides a post harmonization tool to: - generate age trend estimation adjusting sex and icv: `age_list_gen` - interactively visualize age trend: `age_shiny` - generate residuals eliminating specific covariates' effects: `residual_gen` - generate residuals from scratch - generate residuals based on existing regression model # Set up Import `ComBatFamQC` package and read in harmonized data set for age trend visualization. We use `age_df` data for age trend visualization in the vignette. To be noticed the read-in data set should be a data frame (not tibble). ```{r setup, eval = FALSE} library(ComBatFamQC) data(age_df) ``` # Life Span Age Trend Visualization In this step, we need to generate a list of data sets for all ROIs. Each ROI's data set contains four columns: - **roi.name**: ROI value - **age**: subject's age info - **sex**: subject's sex info - **icv**: subject's intracranial volume ```{r, eval = FALSE} age_df <- data.frame(age_df) features <- colnames(age_df)[c(6:56)] age <- "age" sex <- "sex" icv <- "ICV_baseline" age_df[[sex]] <- as.factor(age_df[[sex]]) ``` ## Create a list of data sets for all ROIs ```{r, eval = FALSE} # Create sub_df for different features sub_df_list <- lapply(seq_len(length(features)), function(i){ sub_df <- age_df[,c(features[i], age, sex, icv)] %>% na.omit() colnames(sub_df) <- c(features[i], "age", "sex", "icv") return(sub_df) }) ``` ## Create age trend estimation for all ROIs ```{r, eval = FALSE} # For MAC users library(parallel) age_list <- mclapply(seq_len(length(features)), function(w){ age_sub <- age_list_gen (sub_df = sub_df_list[[w]], lq = 0.25, hq = 0.75) return(age_sub) }, mc.cores = detectCores()) # For Windows users age_list <- mclapply(1:length(features), function(w){ age_sub <- age_list_gen (sub_df = sub_df_list[[w]], lq = 0.25, hq = 0.75) return(age_sub) }, mc.cores = 1) names(age_list) <- features quantile_type <- c(paste0("quantile_", 100*0.25), "median", paste0("quantile_", 100*0.75)) ``` ## Launch Shiny App for Visualization Users can choose to generate age trend plots using the `ggplot` package or the `plotly` package (if `plotly` is installed). ```{r, eval=FALSE} # plotly: interactive plot ComBatFamQC::age_shiny(age_list, features, quantile_type, use_plotly = TRUE) # ggplot: static plot ComBatFamQC::age_shiny(age_list, features, quantile_type, use_plotly = FALSE) ``` ## Save Age Trend Table and GAMLSS Model ```{r, eval=FALSE} # Save age trend table temp_dir <- tempfile() dir.create(temp_dir) age_save(path = temp_dir, age_list = age_list) # Save GAMLSS Model gamlss_model <- lapply(seq_len(length(age_list)), function(i){ g_model <- age_list[[i]]$model return(g_model)}) names(gamlss_model) <- names(age_list) saveRDS(gamlss_model, file = file.path(temp_dir, "gamlss_model.rds")) ``` # Residual Generation In this step, we would like to generate different sets of residuals removing specific covariates' effects. ## Get harmonized data set ```{r, eval=FALSE} features <- colnames(adni)[c(43:104)] covariates <- c("timedays", "AGE", "SEX", "DIAGNOSIS") interaction <- c("timedays,DIAGNOSIS") batch <- "manufac" combat_model <- combat_harm(type = "lm", features = features, batch = batch, covariates = covariates, interaction = interaction, smooth = NULL, random = NULL, df = adni) harmonized_df <- combat_model$harmonized_df ``` ## Generate residual data set Specify parameters carefully based on regression type and which covariates' effects to remove. ### Generate residuals from scratch ```{r, eval=FALSE} # generate residuals by removing timedays and DIAGNOSIS effects, while preserving AGE and SEX effects. result_residual <- residual_gen(type = "lm", features = features, covariates = covariates, interaction = interaction, smooth = NULL, df = harmonized_df, rm = c("timedays", "DIAGNOSIS")) # save residual data set write.csv(result_residual$residual, file.path(temp_dir, "residual.csv")) # save regression model saveRDS(result_residual$model, file.path(temp_dir, "regression_model.rds")) ``` ### Generate residuals from existing model ```{r, eval=FALSE} result_residual <- residual_gen(df = harmonized_df, rm = c("timedays", "DIAGNOSIS"), model = TRUE, model_path = file.path(temp_dir, "regression_model.rds")) # save residual data set write.csv(result_residual$residual, file.path(temp_dir, "residual.csv")) # Clean up the temporary file unlink(temp_dir, recursive = TRUE) ```