Skip to content

Commit 5ca3995

Browse files
authored
Merge pull request #17 from SamuelSun2000/main
Annotating, changing the code to library version
2 parents 92302c7 + 651ccf9 commit 5ca3995

25 files changed

+130540
-2207
lines changed

Simulation_Studies/10_OPERA_simulation.ipynb renamed to Simulation_Studies/10_Run_OPERA.ipynb

Lines changed: 260 additions & 166 deletions
Large diffs are not rendered by default.

Simulation_Studies/11_OPERA_original_design.ipynb renamed to Simulation_Studies/11_Run_OPERA_Original_Proportion.ipynb

Lines changed: 100 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"kernel": "R"
88
},
99
"source": [
10-
"# OPERA: simulation using original proportion configuration"
10+
"# Running OPERA: OPERA: simulation using original proportion configuration"
1111
]
1212
},
1313
{
@@ -42,44 +42,60 @@
4242
},
4343
"outputs": [],
4444
"source": [
45-
"# 3 trait index\n",
45+
"# Generate partitioned indices for 3-trait simulation\n",
46+
"# (Mixed proportions across different causal configurations)\n",
47+
"\n",
48+
"# Load required libraries\n",
4649
"library(tidyverse)\n",
47-
"total_indices <- 500\n",
48-
"proportions <- c(0.78, 0.05, 0.05, 0.05, 0.02, 0.02, 0.02, 0.01)\n",
4950
"\n",
50-
"# Calculate the exact number of indices for each group\n",
51-
"group_sizes <- floor(total_indices * proportions)\n",
51+
"# ----------------------\n",
52+
"# Step 1: Partition indices based on predefined proportions\n",
53+
"# ----------------------\n",
5254
"\n",
53-
"# Adjust for any rounding errors\n",
54-
"group_sizes[1] <- group_sizes[1] + (total_indices - sum(group_sizes))\n",
55-
"\n",
56-
"# Create a vector of group assignments\n",
57-
"groups <- rep(1:8, times = group_sizes)\n",
55+
"total_indices <- 500 # Total number of samples\n",
56+
"proportions <- c(0.78, 0.05, 0.05, 0.05, 0.02, 0.02, 0.02, 0.01) # Group proportions\n",
5857
"\n",
58+
"# Calculate number of samples in each group\n",
59+
"group_sizes <- floor(total_indices * proportions)\n",
60+
"group_sizes[1] <- group_sizes[1] + (total_indices - sum(group_sizes)) # Correct rounding error\n",
5961
"\n",
60-
"groups <- sample(groups)\n",
62+
"# Assign each sample to a group\n",
63+
"groups <- rep(1:8, times = group_sizes)\n",
64+
"groups <- sample(groups) # Shuffle assignments\n",
6165
"\n",
62-
"# Create a list to store the indices for each group\n",
66+
"# Create a list of indices for each group\n",
6367
"partitioned_indices <- lapply(1:8, function(i) which(groups == i) - 1)\n",
6468
"\n",
65-
"# Name the list elements for clarity\n",
69+
"# Name the groups\n",
6670
"names(partitioned_indices) <- paste(\"Group\", 1:8)\n",
6771
"\n",
68-
"# Print the number of indices in each group\n",
72+
"# Print number of indices per group\n",
6973
"sapply(partitioned_indices, length)\n",
70-
" \n",
71-
" \n",
72-
"config = list(c(1,0,0,0),\n",
73-
" c(1,1,0,0),\n",
74-
" c(1,0,1,0),\n",
75-
" c(1, 0, 0, 1),\n",
76-
" c(1,1,1,0),\n",
77-
" c(1,1,0,1),\n",
78-
" c(1,0,1,1),\n",
79-
" c(1,1,1,1))\n",
80-
"\n",
81-
"\n",
82-
"saveRDS(list(partitioned_indices = partitioned_indices, config = config), \"/home/hs3393/cloud_colocalization/simulation_data/opera_original_design/index/partitioned_index_3trait.rds\")\n"
74+
"\n",
75+
"# ----------------------\n",
76+
"# Step 2: Define causal configurations for each group\n",
77+
"# ----------------------\n",
78+
"\n",
79+
"\n",
80+
"config = list(\n",
81+
" c(1, 0, 0, 0), \n",
82+
" c(1, 1, 0, 0), \n",
83+
" c(1, 0, 1, 0), \n",
84+
" c(1, 0, 0, 1), \n",
85+
" c(1, 1, 1, 0), \n",
86+
" c(1, 1, 0, 1), \n",
87+
" c(1, 0, 1, 1), \n",
88+
" c(1, 1, 1, 1) \n",
89+
")\n",
90+
"\n",
91+
"# ----------------------\n",
92+
"# Step 3: Save partitioned index and configuration\n",
93+
"# ----------------------\n",
94+
"\n",
95+
"saveRDS(\n",
96+
" list(partitioned_indices = partitioned_indices, config = config),\n",
97+
" \"/home/hs3393/cloud_colocalization/simulation_data/opera_original_design/index/partitioned_index_3trait.rds\"\n",
98+
")\n"
8399
]
84100
},
85101
{
@@ -198,14 +214,19 @@
198214
"output: f'{cwd:a}/{step_name}/sample_{_index}_opera_3trait.rds'\n",
199215
"task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'\n",
200216
"R: expand = '${ }', stdout = f\"{_output:n}.stdout\", stderr = f\"{_output:n}.stderr\", container = container \n",
217+
" # --- Load necessary libraries ---\n",
201218
" library(\"MASS\")\n",
202219
" library(\"plink2R\")\n",
203220
" library(\"dplyr\")\n",
204221
" library(\"readr\")\n",
205222
" library(\"tidyverse\")\n",
206-
" # source some functions to read matrix and inpute the missing data\n",
207-
" source(\"~/cloud_colocalization/simulation_code/simulate_linreg.R\")\n",
208-
" source(\"~/cloud_colocalization/simulation_code/misc.R\")\n",
223+
" \n",
224+
" # install simulation package\n",
225+
" # devtools::install_github(\"StatFunGen/simxQTL\", build_vignettes = FALSE)\n",
226+
" # BiocManager::install(\"StatFunGen/pecotmr\")\n",
227+
" library(\"pecotmr\")\n",
228+
" library(\"simxQTL\")\n",
229+
" \n",
209230
" calculate_sumstat = function(X, Y){\n",
210231
" Beta = c()\n",
211232
" se = c()\n",
@@ -242,21 +263,32 @@
242263
"\n",
243264
" indep = ${\"TRUE\" if independent else \"FALSE\"}\n",
244265
" if (indep) {\n",
245-
" LD_vars = 1 # Initialize LD_vars\n",
266+
" LD_vars = 1 # Initialize LD check\n",
246267
"\n",
247268
" if (ncausal == 1) {\n",
248-
" # If only one causal variant, just sample it\n",
269+
" # Only one causal variant needed\n",
249270
" vars = sample(1:ncol(Xmat), size = ncausal)\n",
250271
" } else {\n",
251-
" # Repeat sampling until selected variables are quasi independent\n",
272+
" # Ensure selected variants are approximately independent (LD < 0.3)\n",
252273
" while (length(LD_vars != 0)) {\n",
253-
" vars = sample(1:ncol(Xmat), size = ncausal) \n",
254-
" cor_mat = cor(Xmat[, vars]) \n",
274+
" vars = sample(1:ncol(Xmat), size = ncausal)\n",
275+
" cor_mat = cor(Xmat[, vars])\n",
255276
" LD_vars = which(colSums(abs(cor_mat) > 0.3) > 1)\n",
256277
" }\n",
257278
" }\n",
258279
" } else {\n",
259-
" vars = sample(1:ncol(Xmat), size = ncausal)\n",
280+
" LD_vars = 1 # Initialize LD check\n",
281+
"\n",
282+
" if (ncausal == 1) {\n",
283+
" vars = sample(1:ncol(Xmat), size = ncausal)\n",
284+
" } else {\n",
285+
" # Avoid perfectly correlated variants (|cor| = 1)\n",
286+
" while (length(LD_vars != 0)) {\n",
287+
" vars = sample(1:ncol(Xmat), size = ncausal)\n",
288+
" cor_mat = cor(Xmat[, vars])\n",
289+
" LD_vars = which(colSums(abs(cor_mat) == 1) > 1)\n",
290+
" }\n",
291+
" }\n",
260292
" }\n",
261293
" \n",
262294
" result <- sapply(partitioned_res$partitioned_indices, function(x) any(x == tad_number))\n",
@@ -326,14 +358,21 @@
326358
"output: f'{cwd:a}/{step_name}/sample_{_index}_opera_5trait.rds'\n",
327359
"task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'\n",
328360
"R: expand = '${ }', stdout = f\"{_output:n}.stdout\", stderr = f\"{_output:n}.stderr\", container = container \n",
361+
" # --- Load necessary libraries ---\n",
329362
" library(\"MASS\")\n",
330363
" library(\"plink2R\")\n",
331364
" library(\"dplyr\")\n",
332365
" library(\"readr\")\n",
333366
" library(\"tidyverse\")\n",
334-
" # source some functions to read matrix and inpute the missing data\n",
335-
" source(\"~/cloud_colocalization/simulation_code/simulate_linreg.R\")\n",
336-
" source(\"~/cloud_colocalization/simulation_code/misc.R\")\n",
367+
" \n",
368+
" # install simulation package\n",
369+
" # devtools::install_github(\"StatFunGen/simxQTL\", build_vignettes = FALSE)\n",
370+
" # BiocManager::install(\"StatFunGen/pecotmr\")\n",
371+
" library(\"pecotmr\")\n",
372+
" library(\"simxQTL\")\n",
373+
" \n",
374+
" # --- Load helper functions ---\n",
375+
" source(\"~/simxQTL/simulate_linreg.R\")\n",
337376
" calculate_sumstat = function(X, Y){\n",
338377
" Beta = c()\n",
339378
" se = c()\n",
@@ -370,21 +409,32 @@
370409
"\n",
371410
" indep = ${\"TRUE\" if independent else \"FALSE\"}\n",
372411
" if (indep) {\n",
373-
" LD_vars = 1 # Initialize LD_vars\n",
412+
" LD_vars = 1 # Initialize LD check\n",
374413
"\n",
375414
" if (ncausal == 1) {\n",
376-
" # If only one causal variant, just sample it\n",
415+
" # Only one causal variant needed\n",
377416
" vars = sample(1:ncol(Xmat), size = ncausal)\n",
378417
" } else {\n",
379-
" # Repeat sampling until selected variables are quasi independent\n",
418+
" # Ensure selected variants are approximately independent (LD < 0.3)\n",
380419
" while (length(LD_vars != 0)) {\n",
381-
" vars = sample(1:ncol(Xmat), size = ncausal) \n",
382-
" cor_mat = cor(Xmat[, vars]) \n",
420+
" vars = sample(1:ncol(Xmat), size = ncausal)\n",
421+
" cor_mat = cor(Xmat[, vars])\n",
383422
" LD_vars = which(colSums(abs(cor_mat) > 0.3) > 1)\n",
384423
" }\n",
385424
" }\n",
386425
" } else {\n",
387-
" vars = sample(1:ncol(Xmat), size = ncausal)\n",
426+
" LD_vars = 1 # Initialize LD check\n",
427+
"\n",
428+
" if (ncausal == 1) {\n",
429+
" vars = sample(1:ncol(Xmat), size = ncausal)\n",
430+
" } else {\n",
431+
" # Avoid perfectly correlated variants (|cor| = 1)\n",
432+
" while (length(LD_vars != 0)) {\n",
433+
" vars = sample(1:ncol(Xmat), size = ncausal)\n",
434+
" cor_mat = cor(Xmat[, vars])\n",
435+
" LD_vars = which(colSums(abs(cor_mat) == 1) > 1)\n",
436+
" }\n",
437+
" }\n",
388438
" }\n",
389439
" \n",
390440
" result <- sapply(partitioned_res$partitioned_indices, function(x) any(x == tad_number))\n",
@@ -998,9 +1048,11 @@
9981048
"output: f'{cwd:a}/{_input[0]:bn}_ntrait_{trait}_{step_name}.rds'\n",
9991049
"task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'\n",
10001050
"R: expand = '${ }', stdout = f\"{_output:n}.stdout\", stderr = f\"{_output:n}.stderr\", container = container \n",
1001-
" for(file in list.files(\"/home/xc2270/COLOCBoost/code_COLOCBoost/colocboost_updating/\", full.names = T)) {source(file)}\n",
10021051
" library(tidyverse)\n",
1003-
" data = readRDS(${_input:ar})\n",
1052+
" # --- Load colocboost function files ---\n",
1053+
" # devtools::install_github(\"StatFunGen/colocboost\")\n",
1054+
" # for(file in list.files(\"/home/xc2270/COLOCBoost/code_COLOCBoost/release\", full.names = T)) {source(file)}\n",
1055+
" library(\"colocboost\")\n",
10041056
" if(length(data$trait) < 5){\n",
10051057
" trait1 = data$trait[[1]] %>% mutate(beta = b, sebeta = se, n = N, variant = SNP) %>% select(beta, sebeta, n, variant) %>% as.data.frame()\n",
10061058
" trait2 = data$trait[[2]] %>% mutate(beta = Beta, sebeta = se, n = 1160, variant = SNP)%>% select(beta, sebeta, n, variant) %>% as.data.frame()\n",

0 commit comments

Comments
 (0)