Skip to content

Commit f9a7f3c

Browse files
authored
Merge branch 'StatFunGen:main' into main
2 parents 19ccebd + 423fcfa commit f9a7f3c

File tree

2 files changed

+433
-183
lines changed

2 files changed

+433
-183
lines changed

code/pecotmr_integration/twas_ctwas.ipynb

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,8 @@
202202
" --regions data/twas/EUR_LD_blocks.bed \\\n",
203203
" --xqtl_meta_data data/twas/mwe_twas_pipeline_test_small.tsv \\\n",
204204
" --xqtl_type_table data/twas/data_type_table.txt \\\n",
205-
" --rsq_pval_cutoff 0.05 --rsq_cutoff 0.01 "
205+
" --rsq_pval_cutoff 0.05 --rsq_cutoff 0.01 \\\n",
206+
" --region-name chr11_84267999_86714492"
206207
]
207208
},
208209
{
@@ -585,6 +586,7 @@
585586
" }\n",
586587
" \n",
587588
" xqtl_meta_df <- fread(\"${xqtl_meta_data}\", data.table=FALSE)\n",
589+
" xqtl_meta_df <- meta_data_df[!duplicated(meta_data_df[, c(\"region_id\", \"TSS\")]), ]\n",
588590
" xqtl_type_table <- if (isTRUE(file.exists(\"${xqtl_type_table}\"))) fread(\"${xqtl_type_table}\") else NULL\n",
589591
" gene_list <- c(${', '.join([f\"'{gene}'\" for gene in _filtered_region_info[4]])})\n",
590592
" \n",
@@ -711,7 +713,7 @@
711713
" message(paste(\"Proceeding with TWAS analysis for\", length(twas_weights_results), \"batches\"))\n",
712714
"\n",
713715
" # TWAS analysis - allow this to fail with informative errors\n",
714-
" twas_results_db <- list()\n",
716+
" twas_results_db <- vector(\"list\", length(twas_weights_results))\n",
715717
" for (batch in 1:length(twas_weights_results)){\n",
716718
" message(paste(\"Processing batch\", batch, \"of\", length(twas_weights_results)))\n",
717719
" \n",
@@ -733,22 +735,23 @@
733735
" )\n",
734736
" \n",
735737
" # Report batch results\n",
736-
" if (!is.null(twas_results_db[[batch]])) {\n",
738+
" if (!(is.null(twas_results_db[[batch]][[1]]) | is.null(twas_results_db[[batch]]))) {\n",
737739
" if (!is.null(twas_results_db[[batch]]$twas_result)) {\n",
738740
" message(paste(\"Batch\", batch, \"produced\", nrow(twas_results_db[[batch]]$twas_result), \"TWAS results\"))\n",
739741
" } else {\n",
740742
" message(paste(\"Batch\", batch, \"produced NULL twas_result\"))\n",
741743
" }\n",
742744
" } else {\n",
743745
" message(paste(\"Batch\", batch, \"produced NULL results\"))\n",
746+
" twas_results_db[[batch]] <- NA\n",
744747
" }\n",
745748
" }\n",
746749
" \n",
747750
" rm(twas_weights_results)\n",
748751
" gc()\n",
749752
" \n",
750753
" # Filter and report final results\n",
751-
" twas_results_db <- Filter(Negate(is.null), twas_results_db)\n",
754+
" twas_results_db <- Filter(Negate(is.na), twas_results_db)\n",
752755
" message(paste(\"Final valid batches:\", length(twas_results_db)))\n",
753756
"\n",
754757
" if(length(twas_results_db) != 0){\n",
@@ -1140,10 +1143,15 @@
11401143
"parameter: prior_var_structure = \"shared_all\"\n",
11411144
"# A list of regions to be subset for screening and fine-mapping, for example: \"10_80126158_82231647\"\n",
11421145
"parameter: region_name =[]\n",
1146+
"parameter: subset_context=[] # only process specified list of contexts for single group cTWAS\n",
11431147
"parameter: numThreads = 4\n",
11441148
"parameter: multi_group = True\n",
11451149
"parameter: merge_regions=False\n",
11461150
"parameter: L=5\n",
1151+
"# sum of gene PIPs in the region should be larger than this threshold to get selected for finemapping\n",
1152+
"parameter: min_nonSNP_PIP=0.5\n",
1153+
"# additional name specification for fine-mapping result file names \n",
1154+
"parameter: alias=\"NULL\"\n",
11471155
"import glob\n",
11481156
"\n",
11491157
"skip_if(run_finemapping == False, \" Skip [ctwas_3] fine-mapping. \" )\n",
@@ -1175,7 +1183,7 @@
11751183
" \"params\": params\n",
11761184
" })\n",
11771185
"gwas_study = 'c(' + ', '.join(f'\"{x}\"' for x in gwas_study) + ')'\n",
1178-
"\n",
1186+
"subset_context = 'c(' + ', '.join(f'\"{x}\"' for x in subset_context) + ')'\n",
11791187
"input: region_info_list[_index][\"params\"], for_each = \"region_info_list\"\n",
11801188
"region_name = region_info_list[_index]['region_name']\n",
11811189
"weight_files = region_info_list[_index]['weights']\n",
@@ -1203,19 +1211,22 @@
12031211
" if (${\"TRUE\" if multi_group else \"FALSE\"}){\n",
12041212
" param <- param[!sapply(names(param), function(x) any(sapply(paste0(gwas_studies, \".\"), function(c) grepl(c, x))))] # gwas_study per each prior \n",
12051213
" } else {\n",
1206-
" param <- param[sapply(names(param), function(x) any(sapply(paste0(gwas_studies, \".\"), function(c) grepl(c, x))))] # gwas_study x context pair per each prior \n",
1214+
" param <- param[sapply(names(param), function(x) any(sapply(paste0(gwas_studies, \".\"), function(c) grepl(c, x))))] # gwas_study x context pair per each prior\n",
1215+
" param <- param[grepl(${subset_context},names(param))]\n",
12071216
" }\n",
12081217
"\n",
12091218
" region_data_files <- ${'c(' + ', '.join(f'\"{x}\"' for x in region_data_file) + ')'}\n",
12101219
" names(weight_files) <- gsub('^.*.ctwas_weights.\\\\s*|\\\\s*.${chrom}.*$', '', weight_files) # gwas study names regardless of single/multigroup\n",
12111220
" LD_map <- readRDS(\"${cwd}/${step_name.split('_')[0]}/${name}.LD_map.rds\")\n",
12121221
" snp_map <- readRDS(\"${cwd}/${step_name.split('_')[0]}/${name}.snp_map.${chrom}.rds\")\n",
12131222
" names(z_snp_files) <- gsub('^.*.z_gene_snp.\\\\s*|\\\\s*.${chrom}.*$', '', z_snp_files)\n",
1214-
"\n",
1223+
" alias = ${\"NULL\" if alias == \"NULL\" else f\"'{alias}'\"}\n",
1224+
" \n",
12151225
" ## loop through gwas studies (multigroup) / gwas_study_context groups (single_group)\n",
12161226
" for (study in names(param)){\n",
12171227
" region_data <- readRDS(region_data_files[grepl(study, region_data_files)])\n",
12181228
" finemap_res_file <- file.path(outputdir, paste0(\"${name}.ctwas_finemap_res.${prior_var_structure}.${region_name}.\", study, \".thin${thin}.tsv.gz\"))\n",
1229+
" if (length(alias)!=0) finemap_res_file <- gsub(\"${name}\", \"${name}_${alias}\", finemap_res_file)\n",
12191230
" susie_alpha_file <- gsub(\".tsv.gz\", \".rds\", gsub(\"ctwas_finemap_res\", \"ctwas_susie_alpha_res\", finemap_res_file))\n",
12201231
" if (nrow(region_data[[study]][[gsub(\"chr\", \"\", \"${region_name}\")]]$z_gene)==0) {\n",
12211232
" message(\"No z_gene data available for \", study, \" in ${region_name}. \")\n",
@@ -1247,12 +1258,13 @@
12471258
" ncore = ${numThreads})\n",
12481259
" }\n",
12491260
" screen_res <- screen_regions(region_data[[study]][gsub(\"chr\", \"\", \"${region_name}\")],\n",
1250-
" group_prior = group_prior, group_prior_var = group_prior_var, min_nonSNP_PIP = 0.5, \n",
1261+
" group_prior = group_prior, group_prior_var = group_prior_var, min_nonSNP_PIP = ${min_nonSNP_PIP}, \n",
12511262
" ncore = ${numThreads}, verbose = FALSE, logfile = file.path(outputdir, \n",
12521263
" paste0(\"${name}.screen_regions.${prior_var_structure}.${region_name}.\", study, \".thin${thin}.log\")))\n",
12531264
" screened_region_data <- screen_res$screened_region_data\n",
1254-
" # screen_summary <- screen_res$screen_summary\n",
1255-
" saveRDS(screen_res, file.path(outputdir, paste0(\"${name}.screen_regions.${prior_var_structure}.${region_name}.\", study, \".thin${thin}.rds\")))\n",
1265+
" screen_res_file <- paste0(\"${name}.screen_regions.${prior_var_structure}.${region_name}.\", study, \".thin${thin}.rds\")\n",
1266+
" if (length(alias)!=0) screen_res_file <- gsub(\"${name}\", \"${name}_${alias}\",screen_res_file)\n",
1267+
" saveRDS(screen_res, file.path(outputdir, screen_res_file ))\n",
12561268
" if (length(screened_region_data)==0) {\n",
12571269
" message(\"No region selected for \", study, \" in ${region_name}. \")\n",
12581270
" fwrite(data.frame(), finemap_res_file, sep = \"\\t\", compress = \"gzip\")\n",
@@ -1308,8 +1320,12 @@
13081320
" fwrite(finemap_res, finemap_res_file, sep = \"\\t\", compress = \"gzip\")\n",
13091321
" saveRDS(susie_alpha_res, susie_alpha_file, compress='xz')\n",
13101322
" }\n",
1311-
" saveRDS(ld_diag[1:3], file.path(outputdir, paste0(\"${name}.ctwas_ld_diag.${prior_var_structure}.${region_name}.\", study,\".thin${thin}.rds\")))\n",
1312-
" pdf(file.path(outputdir, paste0(\"${name}.ctwas_ld_diag_plot.${prior_var_structure}.${region_name}.\", study,\".thin${thin}.pdf\")), width = 7, height = 7)\n",
1323+
" ld_diag_file <- paste0(\"${name}.ctwas_ld_diag.${prior_var_structure}.${region_name}.\", study,\".thin${thin}.rds\")\n",
1324+
" if (length(alias)!=0) ld_diag_file <- gsub(\"${name}\", \"${name}_${alias}\",ld_diag_file)\n",
1325+
" ld_diag_plot_file <- gsub(\".ctwas_ld_diag.\", \".ctwas_ld_diag_plot.\", ld_diag_file )\n",
1326+
" ld_diag_plot_file <- gsub(\".rds\", \".pdf\", ld_diag_plot_file )\n",
1327+
" saveRDS(ld_diag[1:3], file.path(outputdir, ld_diag_file))\n",
1328+
" pdf(file.path(outputdir,ld_diag_plot_file), width = 7, height = 7)\n",
13131329
" print(ld_diag$plots)\n",
13141330
" dev.off()\n",
13151331
" message(\"Fine-mapping completed for region ${region_name} with \", study, \". \")\n",

0 commit comments

Comments
 (0)