|
202 | 202 | " --regions data/twas/EUR_LD_blocks.bed \\\n", |
203 | 203 | " --xqtl_meta_data data/twas/mwe_twas_pipeline_test_small.tsv \\\n", |
204 | 204 | " --xqtl_type_table data/twas/data_type_table.txt \\\n", |
205 | | - " --rsq_pval_cutoff 0.05 --rsq_cutoff 0.01 " |
| 205 | + " --rsq_pval_cutoff 0.05 --rsq_cutoff 0.01 \\\n", |
| 206 | + " --region-name chr11_84267999_86714492" |
206 | 207 | ] |
207 | 208 | }, |
208 | 209 | { |
|
585 | 586 | " }\n", |
586 | 587 | " \n", |
587 | 588 | " xqtl_meta_df <- fread(\"${xqtl_meta_data}\", data.table=FALSE)\n", |
| 589 | + " xqtl_meta_df <- meta_data_df[!duplicated(meta_data_df[, c(\"region_id\", \"TSS\")]), ]\n", |
588 | 590 | " xqtl_type_table <- if (isTRUE(file.exists(\"${xqtl_type_table}\"))) fread(\"${xqtl_type_table}\") else NULL\n", |
589 | 591 | " gene_list <- c(${', '.join([f\"'{gene}'\" for gene in _filtered_region_info[4]])})\n", |
590 | 592 | " \n", |
|
711 | 713 | " message(paste(\"Proceeding with TWAS analysis for\", length(twas_weights_results), \"batches\"))\n", |
712 | 714 | "\n", |
713 | 715 | " # TWAS analysis - allow this to fail with informative errors\n", |
714 | | - " twas_results_db <- list()\n", |
| 716 | + " twas_results_db <- vector(\"list\", length(twas_weights_results))\n", |
715 | 717 | " for (batch in 1:length(twas_weights_results)){\n", |
716 | 718 | " message(paste(\"Processing batch\", batch, \"of\", length(twas_weights_results)))\n", |
717 | 719 | " \n", |
|
733 | 735 | " )\n", |
734 | 736 | " \n", |
735 | 737 | " # Report batch results\n", |
736 | | - " if (!is.null(twas_results_db[[batch]])) {\n", |
| 738 | + " if (!(is.null(twas_results_db[[batch]][[1]]) | is.null(twas_results_db[[batch]]))) {\n", |
737 | 739 | " if (!is.null(twas_results_db[[batch]]$twas_result)) {\n", |
738 | 740 | " message(paste(\"Batch\", batch, \"produced\", nrow(twas_results_db[[batch]]$twas_result), \"TWAS results\"))\n", |
739 | 741 | " } else {\n", |
740 | 742 | " message(paste(\"Batch\", batch, \"produced NULL twas_result\"))\n", |
741 | 743 | " }\n", |
742 | 744 | " } else {\n", |
743 | 745 | " message(paste(\"Batch\", batch, \"produced NULL results\"))\n", |
| 746 | + " twas_results_db[[batch]] <- NA\n", |
744 | 747 | " }\n", |
745 | 748 | " }\n", |
746 | 749 | " \n", |
747 | 750 | " rm(twas_weights_results)\n", |
748 | 751 | " gc()\n", |
749 | 752 | " \n", |
750 | 753 | " # Filter and report final results\n", |
751 | | - " twas_results_db <- Filter(Negate(is.null), twas_results_db)\n", |
| 754 | + " twas_results_db <- Filter(Negate(is.na), twas_results_db)\n", |
752 | 755 | " message(paste(\"Final valid batches:\", length(twas_results_db)))\n", |
753 | 756 | "\n", |
754 | 757 | " if(length(twas_results_db) != 0){\n", |
|
1140 | 1143 | "parameter: prior_var_structure = \"shared_all\"\n", |
1141 | 1144 | "# A list of regions to be subset for screening and fine-mapping, for example: \"10_80126158_82231647\"\n", |
1142 | 1145 | "parameter: region_name =[]\n", |
| 1146 | + "parameter: subset_context=[] # only process specified list of contexts for single group cTWAS\n", |
1143 | 1147 | "parameter: numThreads = 4\n", |
1144 | 1148 | "parameter: multi_group = True\n", |
1145 | 1149 | "parameter: merge_regions=False\n", |
1146 | 1150 | "parameter: L=5\n", |
| 1151 | + "# sum of gene PIPs in the region should be larger than this threshold to get selected for finemapping\n", |
| 1152 | + "parameter: min_nonSNP_PIP=0.5\n", |
| 1153 | + "# additional name specification for fine-mapping result file names \n", |
| 1154 | + "parameter: alias=\"NULL\"\n", |
1147 | 1155 | "import glob\n", |
1148 | 1156 | "\n", |
1149 | 1157 | "skip_if(run_finemapping == False, \" Skip [ctwas_3] fine-mapping. \" )\n", |
|
1175 | 1183 | " \"params\": params\n", |
1176 | 1184 | " })\n", |
1177 | 1185 | "gwas_study = 'c(' + ', '.join(f'\"{x}\"' for x in gwas_study) + ')'\n", |
1178 | | - "\n", |
| 1186 | + "subset_context = 'c(' + ', '.join(f'\"{x}\"' for x in subset_context) + ')'\n", |
1179 | 1187 | "input: region_info_list[_index][\"params\"], for_each = \"region_info_list\"\n", |
1180 | 1188 | "region_name = region_info_list[_index]['region_name']\n", |
1181 | 1189 | "weight_files = region_info_list[_index]['weights']\n", |
|
1203 | 1211 | " if (${\"TRUE\" if multi_group else \"FALSE\"}){\n", |
1204 | 1212 | " param <- param[!sapply(names(param), function(x) any(sapply(paste0(gwas_studies, \".\"), function(c) grepl(c, x))))] # gwas_study per each prior \n", |
1205 | 1213 | " } else {\n", |
1206 | | - " param <- param[sapply(names(param), function(x) any(sapply(paste0(gwas_studies, \".\"), function(c) grepl(c, x))))] # gwas_study x context pair per each prior \n", |
| 1214 | + " param <- param[sapply(names(param), function(x) any(sapply(paste0(gwas_studies, \".\"), function(c) grepl(c, x))))] # gwas_study x context pair per each prior\n", |
| 1215 | + " param <- param[grepl(${subset_context},names(param))]\n", |
1207 | 1216 | " }\n", |
1208 | 1217 | "\n", |
1209 | 1218 | " region_data_files <- ${'c(' + ', '.join(f'\"{x}\"' for x in region_data_file) + ')'}\n", |
1210 | 1219 | " names(weight_files) <- gsub('^.*.ctwas_weights.\\\\s*|\\\\s*.${chrom}.*$', '', weight_files) # gwas study names regardless of single/multigroup\n", |
1211 | 1220 | " LD_map <- readRDS(\"${cwd}/${step_name.split('_')[0]}/${name}.LD_map.rds\")\n", |
1212 | 1221 | " snp_map <- readRDS(\"${cwd}/${step_name.split('_')[0]}/${name}.snp_map.${chrom}.rds\")\n", |
1213 | 1222 | " names(z_snp_files) <- gsub('^.*.z_gene_snp.\\\\s*|\\\\s*.${chrom}.*$', '', z_snp_files)\n", |
1214 | | - "\n", |
| 1223 | + " alias = ${\"NULL\" if alias == \"NULL\" else f\"'{alias}'\"}\n", |
| 1224 | + " \n", |
1215 | 1225 | " ## loop through gwas studies (multigroup) / gwas_study_context groups (single_group)\n", |
1216 | 1226 | " for (study in names(param)){\n", |
1217 | 1227 | " region_data <- readRDS(region_data_files[grepl(study, region_data_files)])\n", |
1218 | 1228 | " finemap_res_file <- file.path(outputdir, paste0(\"${name}.ctwas_finemap_res.${prior_var_structure}.${region_name}.\", study, \".thin${thin}.tsv.gz\"))\n", |
| 1229 | + " if (length(alias)!=0) finemap_res_file <- gsub(\"${name}\", \"${name}_${alias}\", finemap_res_file)\n", |
1219 | 1230 | " susie_alpha_file <- gsub(\".tsv.gz\", \".rds\", gsub(\"ctwas_finemap_res\", \"ctwas_susie_alpha_res\", finemap_res_file))\n", |
1220 | 1231 | " if (nrow(region_data[[study]][[gsub(\"chr\", \"\", \"${region_name}\")]]$z_gene)==0) {\n", |
1221 | 1232 | " message(\"No z_gene data available for \", study, \" in ${region_name}. \")\n", |
|
1247 | 1258 | " ncore = ${numThreads})\n", |
1248 | 1259 | " }\n", |
1249 | 1260 | " screen_res <- screen_regions(region_data[[study]][gsub(\"chr\", \"\", \"${region_name}\")],\n", |
1250 | | - " group_prior = group_prior, group_prior_var = group_prior_var, min_nonSNP_PIP = 0.5, \n", |
| 1261 | + " group_prior = group_prior, group_prior_var = group_prior_var, min_nonSNP_PIP = ${min_nonSNP_PIP}, \n", |
1251 | 1262 | " ncore = ${numThreads}, verbose = FALSE, logfile = file.path(outputdir, \n", |
1252 | 1263 | " paste0(\"${name}.screen_regions.${prior_var_structure}.${region_name}.\", study, \".thin${thin}.log\")))\n", |
1253 | 1264 | " screened_region_data <- screen_res$screened_region_data\n", |
1254 | | - " # screen_summary <- screen_res$screen_summary\n", |
1255 | | - " saveRDS(screen_res, file.path(outputdir, paste0(\"${name}.screen_regions.${prior_var_structure}.${region_name}.\", study, \".thin${thin}.rds\")))\n", |
| 1265 | + " screen_res_file <- paste0(\"${name}.screen_regions.${prior_var_structure}.${region_name}.\", study, \".thin${thin}.rds\")\n", |
| 1266 | + " if (length(alias)!=0) screen_res_file <- gsub(\"${name}\", \"${name}_${alias}\",screen_res_file)\n", |
| 1267 | + " saveRDS(screen_res, file.path(outputdir, screen_res_file ))\n", |
1256 | 1268 | " if (length(screened_region_data)==0) {\n", |
1257 | 1269 | " message(\"No region selected for \", study, \" in ${region_name}. \")\n", |
1258 | 1270 | " fwrite(data.frame(), finemap_res_file, sep = \"\\t\", compress = \"gzip\")\n", |
|
1308 | 1320 | " fwrite(finemap_res, finemap_res_file, sep = \"\\t\", compress = \"gzip\")\n", |
1309 | 1321 | " saveRDS(susie_alpha_res, susie_alpha_file, compress='xz')\n", |
1310 | 1322 | " }\n", |
1311 | | - " saveRDS(ld_diag[1:3], file.path(outputdir, paste0(\"${name}.ctwas_ld_diag.${prior_var_structure}.${region_name}.\", study,\".thin${thin}.rds\")))\n", |
1312 | | - " pdf(file.path(outputdir, paste0(\"${name}.ctwas_ld_diag_plot.${prior_var_structure}.${region_name}.\", study,\".thin${thin}.pdf\")), width = 7, height = 7)\n", |
| 1323 | + " ld_diag_file <- paste0(\"${name}.ctwas_ld_diag.${prior_var_structure}.${region_name}.\", study,\".thin${thin}.rds\")\n", |
| 1324 | + " if (length(alias)!=0) ld_diag_file <- gsub(\"${name}\", \"${name}_${alias}\",ld_diag_file)\n", |
| 1325 | + " ld_diag_plot_file <- gsub(\".ctwas_ld_diag.\", \".ctwas_ld_diag_plot.\", ld_diag_file )\n", |
| 1326 | + " ld_diag_plot_file <- gsub(\".rds\", \".pdf\", ld_diag_plot_file )\n", |
| 1327 | + " saveRDS(ld_diag[1:3], file.path(outputdir, ld_diag_file))\n", |
| 1328 | + " pdf(file.path(outputdir,ld_diag_plot_file), width = 7, height = 7)\n", |
1313 | 1329 | " print(ld_diag$plots)\n", |
1314 | 1330 | " dev.off()\n", |
1315 | 1331 | " message(\"Fine-mapping completed for region ${region_name} with \", study, \". \")\n", |
|
0 commit comments