|
7 | 7 | "kernel": "R" |
8 | 8 | }, |
9 | 9 | "source": [ |
10 | | - "# OPERA: simulation using original proportion configuration" |
| 10 | + "# Running OPERA: OPERA: simulation using original proportion configuration" |
11 | 11 | ] |
12 | 12 | }, |
13 | 13 | { |
|
42 | 42 | }, |
43 | 43 | "outputs": [], |
44 | 44 | "source": [ |
45 | | - "# 3 trait index\n", |
| 45 | + "# Generate partitioned indices for 3-trait simulation\n", |
| 46 | + "# (Mixed proportions across different causal configurations)\n", |
| 47 | + "\n", |
| 48 | + "# Load required libraries\n", |
46 | 49 | "library(tidyverse)\n", |
47 | | - "total_indices <- 500\n", |
48 | | - "proportions <- c(0.78, 0.05, 0.05, 0.05, 0.02, 0.02, 0.02, 0.01)\n", |
49 | 50 | "\n", |
50 | | - "# Calculate the exact number of indices for each group\n", |
51 | | - "group_sizes <- floor(total_indices * proportions)\n", |
| 51 | + "# ----------------------\n", |
| 52 | + "# Step 1: Partition indices based on predefined proportions\n", |
| 53 | + "# ----------------------\n", |
52 | 54 | "\n", |
53 | | - "# Adjust for any rounding errors\n", |
54 | | - "group_sizes[1] <- group_sizes[1] + (total_indices - sum(group_sizes))\n", |
55 | | - "\n", |
56 | | - "# Create a vector of group assignments\n", |
57 | | - "groups <- rep(1:8, times = group_sizes)\n", |
| 55 | + "total_indices <- 500 # Total number of samples\n", |
| 56 | + "proportions <- c(0.78, 0.05, 0.05, 0.05, 0.02, 0.02, 0.02, 0.01) # Group proportions\n", |
58 | 57 | "\n", |
| 58 | + "# Calculate number of samples in each group\n", |
| 59 | + "group_sizes <- floor(total_indices * proportions)\n", |
| 60 | + "group_sizes[1] <- group_sizes[1] + (total_indices - sum(group_sizes)) # Correct rounding error\n", |
59 | 61 | "\n", |
60 | | - "groups <- sample(groups)\n", |
| 62 | + "# Assign each sample to a group\n", |
| 63 | + "groups <- rep(1:8, times = group_sizes)\n", |
| 64 | + "groups <- sample(groups) # Shuffle assignments\n", |
61 | 65 | "\n", |
62 | | - "# Create a list to store the indices for each group\n", |
| 66 | + "# Create a list of indices for each group\n", |
63 | 67 | "partitioned_indices <- lapply(1:8, function(i) which(groups == i) - 1)\n", |
64 | 68 | "\n", |
65 | | - "# Name the list elements for clarity\n", |
| 69 | + "# Name the groups\n", |
66 | 70 | "names(partitioned_indices) <- paste(\"Group\", 1:8)\n", |
67 | 71 | "\n", |
68 | | - "# Print the number of indices in each group\n", |
| 72 | + "# Print number of indices per group\n", |
69 | 73 | "sapply(partitioned_indices, length)\n", |
70 | | - " \n", |
71 | | - " \n", |
72 | | - "config = list(c(1,0,0,0),\n", |
73 | | - " c(1,1,0,0),\n", |
74 | | - " c(1,0,1,0),\n", |
75 | | - " c(1, 0, 0, 1),\n", |
76 | | - " c(1,1,1,0),\n", |
77 | | - " c(1,1,0,1),\n", |
78 | | - " c(1,0,1,1),\n", |
79 | | - " c(1,1,1,1))\n", |
80 | | - "\n", |
81 | | - "\n", |
82 | | - "saveRDS(list(partitioned_indices = partitioned_indices, config = config), \"/home/hs3393/cloud_colocalization/simulation_data/opera_original_design/index/partitioned_index_3trait.rds\")\n" |
| 74 | + "\n", |
| 75 | + "# ----------------------\n", |
| 76 | + "# Step 2: Define causal configurations for each group\n", |
| 77 | + "# ----------------------\n", |
| 78 | + "\n", |
| 79 | + "\n", |
| 80 | + "config = list(\n", |
| 81 | + " c(1, 0, 0, 0), \n", |
| 82 | + " c(1, 1, 0, 0), \n", |
| 83 | + " c(1, 0, 1, 0), \n", |
| 84 | + " c(1, 0, 0, 1), \n", |
| 85 | + " c(1, 1, 1, 0), \n", |
| 86 | + " c(1, 1, 0, 1), \n", |
| 87 | + " c(1, 0, 1, 1), \n", |
| 88 | + " c(1, 1, 1, 1) \n", |
| 89 | + ")\n", |
| 90 | + "\n", |
| 91 | + "# ----------------------\n", |
| 92 | + "# Step 3: Save partitioned index and configuration\n", |
| 93 | + "# ----------------------\n", |
| 94 | + "\n", |
| 95 | + "saveRDS(\n", |
| 96 | + " list(partitioned_indices = partitioned_indices, config = config),\n", |
| 97 | + " \"/home/hs3393/cloud_colocalization/simulation_data/opera_original_design/index/partitioned_index_3trait.rds\"\n", |
| 98 | + ")\n" |
83 | 99 | ] |
84 | 100 | }, |
85 | 101 | { |
|
198 | 214 | "output: f'{cwd:a}/{step_name}/sample_{_index}_opera_3trait.rds'\n", |
199 | 215 | "task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'\n", |
200 | 216 | "R: expand = '${ }', stdout = f\"{_output:n}.stdout\", stderr = f\"{_output:n}.stderr\", container = container \n", |
| 217 | + " # --- Load necessary libraries ---\n", |
201 | 218 | " library(\"MASS\")\n", |
202 | 219 | " library(\"plink2R\")\n", |
203 | 220 | " library(\"dplyr\")\n", |
204 | 221 | " library(\"readr\")\n", |
205 | 222 | " library(\"tidyverse\")\n", |
206 | | - " # source some functions to read matrix and inpute the missing data\n", |
207 | | - " source(\"~/cloud_colocalization/simulation_code/simulate_linreg.R\")\n", |
208 | | - " source(\"~/cloud_colocalization/simulation_code/misc.R\")\n", |
| 223 | + " \n", |
| 224 | + " # install simulation package\n", |
| 225 | + " # devtools::install_github(\"StatFunGen/simxQTL\", build_vignettes = FALSE)\n", |
| 226 | + " # BiocManager::install(\"StatFunGen/pecotmr\")\n", |
| 227 | + " library(\"pecotmr\")\n", |
| 228 | + " library(\"simxQTL\")\n", |
| 229 | + " \n", |
209 | 230 | " calculate_sumstat = function(X, Y){\n", |
210 | 231 | " Beta = c()\n", |
211 | 232 | " se = c()\n", |
|
242 | 263 | "\n", |
243 | 264 | " indep = ${\"TRUE\" if independent else \"FALSE\"}\n", |
244 | 265 | " if (indep) {\n", |
245 | | - " LD_vars = 1 # Initialize LD_vars\n", |
| 266 | + " LD_vars = 1 # Initialize LD check\n", |
246 | 267 | "\n", |
247 | 268 | " if (ncausal == 1) {\n", |
248 | | - " # If only one causal variant, just sample it\n", |
| 269 | + " # Only one causal variant needed\n", |
249 | 270 | " vars = sample(1:ncol(Xmat), size = ncausal)\n", |
250 | 271 | " } else {\n", |
251 | | - " # Repeat sampling until selected variables are quasi independent\n", |
| 272 | + " # Ensure selected variants are approximately independent (LD < 0.3)\n", |
252 | 273 | " while (length(LD_vars != 0)) {\n", |
253 | | - " vars = sample(1:ncol(Xmat), size = ncausal) \n", |
254 | | - " cor_mat = cor(Xmat[, vars]) \n", |
| 274 | + " vars = sample(1:ncol(Xmat), size = ncausal)\n", |
| 275 | + " cor_mat = cor(Xmat[, vars])\n", |
255 | 276 | " LD_vars = which(colSums(abs(cor_mat) > 0.3) > 1)\n", |
256 | 277 | " }\n", |
257 | 278 | " }\n", |
258 | 279 | " } else {\n", |
259 | | - " vars = sample(1:ncol(Xmat), size = ncausal)\n", |
| 280 | + " LD_vars = 1 # Initialize LD check\n", |
| 281 | + "\n", |
| 282 | + " if (ncausal == 1) {\n", |
| 283 | + " vars = sample(1:ncol(Xmat), size = ncausal)\n", |
| 284 | + " } else {\n", |
| 285 | + " # Avoid perfectly correlated variants (|cor| = 1)\n", |
| 286 | + " while (length(LD_vars != 0)) {\n", |
| 287 | + " vars = sample(1:ncol(Xmat), size = ncausal)\n", |
| 288 | + " cor_mat = cor(Xmat[, vars])\n", |
| 289 | + " LD_vars = which(colSums(abs(cor_mat) == 1) > 1)\n", |
| 290 | + " }\n", |
| 291 | + " }\n", |
260 | 292 | " }\n", |
261 | 293 | " \n", |
262 | 294 | " result <- sapply(partitioned_res$partitioned_indices, function(x) any(x == tad_number))\n", |
|
326 | 358 | "output: f'{cwd:a}/{step_name}/sample_{_index}_opera_5trait.rds'\n", |
327 | 359 | "task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'\n", |
328 | 360 | "R: expand = '${ }', stdout = f\"{_output:n}.stdout\", stderr = f\"{_output:n}.stderr\", container = container \n", |
| 361 | + " # --- Load necessary libraries ---\n", |
329 | 362 | " library(\"MASS\")\n", |
330 | 363 | " library(\"plink2R\")\n", |
331 | 364 | " library(\"dplyr\")\n", |
332 | 365 | " library(\"readr\")\n", |
333 | 366 | " library(\"tidyverse\")\n", |
334 | | - " # source some functions to read matrix and inpute the missing data\n", |
335 | | - " source(\"~/cloud_colocalization/simulation_code/simulate_linreg.R\")\n", |
336 | | - " source(\"~/cloud_colocalization/simulation_code/misc.R\")\n", |
| 367 | + " \n", |
| 368 | + " # install simulation package\n", |
| 369 | + " # devtools::install_github(\"StatFunGen/simxQTL\", build_vignettes = FALSE)\n", |
| 370 | + " # BiocManager::install(\"StatFunGen/pecotmr\")\n", |
| 371 | + " library(\"pecotmr\")\n", |
| 372 | + " library(\"simxQTL\")\n", |
| 373 | + " \n", |
| 374 | + " # --- Load helper functions ---\n", |
| 375 | + " source(\"~/simxQTL/simulate_linreg.R\")\n", |
337 | 376 | " calculate_sumstat = function(X, Y){\n", |
338 | 377 | " Beta = c()\n", |
339 | 378 | " se = c()\n", |
|
370 | 409 | "\n", |
371 | 410 | " indep = ${\"TRUE\" if independent else \"FALSE\"}\n", |
372 | 411 | " if (indep) {\n", |
373 | | - " LD_vars = 1 # Initialize LD_vars\n", |
| 412 | + " LD_vars = 1 # Initialize LD check\n", |
374 | 413 | "\n", |
375 | 414 | " if (ncausal == 1) {\n", |
376 | | - " # If only one causal variant, just sample it\n", |
| 415 | + " # Only one causal variant needed\n", |
377 | 416 | " vars = sample(1:ncol(Xmat), size = ncausal)\n", |
378 | 417 | " } else {\n", |
379 | | - " # Repeat sampling until selected variables are quasi independent\n", |
| 418 | + " # Ensure selected variants are approximately independent (LD < 0.3)\n", |
380 | 419 | " while (length(LD_vars != 0)) {\n", |
381 | | - " vars = sample(1:ncol(Xmat), size = ncausal) \n", |
382 | | - " cor_mat = cor(Xmat[, vars]) \n", |
| 420 | + " vars = sample(1:ncol(Xmat), size = ncausal)\n", |
| 421 | + " cor_mat = cor(Xmat[, vars])\n", |
383 | 422 | " LD_vars = which(colSums(abs(cor_mat) > 0.3) > 1)\n", |
384 | 423 | " }\n", |
385 | 424 | " }\n", |
386 | 425 | " } else {\n", |
387 | | - " vars = sample(1:ncol(Xmat), size = ncausal)\n", |
| 426 | + " LD_vars = 1 # Initialize LD check\n", |
| 427 | + "\n", |
| 428 | + " if (ncausal == 1) {\n", |
| 429 | + " vars = sample(1:ncol(Xmat), size = ncausal)\n", |
| 430 | + " } else {\n", |
| 431 | + " # Avoid perfectly correlated variants (|cor| = 1)\n", |
| 432 | + " while (length(LD_vars != 0)) {\n", |
| 433 | + " vars = sample(1:ncol(Xmat), size = ncausal)\n", |
| 434 | + " cor_mat = cor(Xmat[, vars])\n", |
| 435 | + " LD_vars = which(colSums(abs(cor_mat) == 1) > 1)\n", |
| 436 | + " }\n", |
| 437 | + " }\n", |
388 | 438 | " }\n", |
389 | 439 | " \n", |
390 | 440 | " result <- sapply(partitioned_res$partitioned_indices, function(x) any(x == tad_number))\n", |
|
998 | 1048 | "output: f'{cwd:a}/{_input[0]:bn}_ntrait_{trait}_{step_name}.rds'\n", |
999 | 1049 | "task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'\n", |
1000 | 1050 | "R: expand = '${ }', stdout = f\"{_output:n}.stdout\", stderr = f\"{_output:n}.stderr\", container = container \n", |
1001 | | - " for(file in list.files(\"/home/xc2270/COLOCBoost/code_COLOCBoost/colocboost_updating/\", full.names = T)) {source(file)}\n", |
1002 | 1051 | " library(tidyverse)\n", |
1003 | | - " data = readRDS(${_input:ar})\n", |
| 1052 | + " # --- Load colocboost function files ---\n", |
| 1053 | + " # devtools::install_github(\"StatFunGen/colocboost\")\n", |
| 1054 | + " # for(file in list.files(\"/home/xc2270/COLOCBoost/code_COLOCBoost/release\", full.names = T)) {source(file)}\n", |
| 1055 | + " library(\"colocboost\")\n", |
1004 | 1056 | " if(length(data$trait) < 5){\n", |
1005 | 1057 | " trait1 = data$trait[[1]] %>% mutate(beta = b, sebeta = se, n = N, variant = SNP) %>% select(beta, sebeta, n, variant) %>% as.data.frame()\n", |
1006 | 1058 | " trait2 = data$trait[[2]] %>% mutate(beta = Beta, sebeta = se, n = 1160, variant = SNP)%>% select(beta, sebeta, n, variant) %>% as.data.frame()\n", |
|
0 commit comments