-
Notifications
You must be signed in to change notification settings - Fork 1k
Rmats prep PR #10128
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Rmats prep PR #10128
Changes from all commits
5ccd85b
4f2ad83
b1ca50f
413229d
7e11e0a
c85ce8e
dd0443f
7957efd
eb7662b
5150e5f
c985940
a772926
028d1c6
86c8a8c
5b92e0a
4c4cfdc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| --- | ||
| # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json | ||
| channels: | ||
| - conda-forge | ||
| - bioconda | ||
| dependencies: | ||
| - "bioconda::rmats=4.3.0" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| process RMATS_PREP { | ||
| tag "${meta.id}" | ||
| label 'process_single' | ||
|
|
||
| conda "${moduleDir}/environment.yml" | ||
| container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container | ||
| ? 'https://depot.galaxyproject.org/singularity/rmats:4.3.0--py311hf2f0b74_5' | ||
| : 'biocontainers/rmats:4.3.0--py311hf2f0b74_5'}" | ||
|
|
||
| input: | ||
| tuple val(meta), path(genome_bam) | ||
| // NOTES - post seems to need only the BAM *names*, not the actual files. Could we just get the first line of each file to get the names? | ||
| // for file in `ls multi_bam_rmats_prep_tmp/*.rmats`; do head -1 $file; done | tr '\n' ',' | ||
| // possible suggestions from @SPPearce - pass ${prefix}.prep.b1.txt as outut | ||
| // NOTES - for stats, it should be possible to parse the formula using patsy, but if we include PAIRADISE we might have R - just do this in R, first pass | ||
| tuple val(meta2), path(reference_gtf) | ||
| val rmats_read_len | ||
|
|
||
| output: | ||
| tuple val(meta), path("*.rmats"), emit: prep_rmats_file | ||
| tuple val(meta), path("*read_outcomes_by_bam.txt"), emit: prep_read_outcomes_file | ||
| tuple val("${task.process}"), val('rmats'), eval('rmats.py --version | sed -e "s/v//g"'), emit: versions_rmats, topic: versions | ||
|
|
||
| when: | ||
| task.ext.when == null || task.ext.when | ||
|
|
||
| script: | ||
| def args = task.ext.args ?: '' | ||
| def prefix = task.ext.prefix ?: "${meta.id}" | ||
akaviaLab marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // NOTES --readLength READLENGTH | ||
| // The length of each read. Required parameter, with the | ||
| // value set according to the RNA-seq read length | ||
| // I should change it by read length (in workflow)! Look at Samtools stats! | ||
| """ | ||
| echo ${genome_bam} > ${prefix}.prep.b1.txt | ||
|
|
||
| rmats.py \\ | ||
| --task prep \\ | ||
| ${args} \\ | ||
| --nthread ${task.cpus} \\ | ||
| --b1 ${prefix}.prep.b1.txt \\ | ||
| --gtf ${reference_gtf} \\ | ||
| --readLength ${rmats_read_len} \\ | ||
| --tmp ${prefix}_rmats_tmp \\ | ||
| --od ${prefix}_rmats_prep | ||
|
|
||
| cp ${prefix}_rmats_tmp/*.txt ${prefix}_read_outcomes_by_bam.txt | ||
| cp ${prefix}_rmats_tmp/*.rmats ${prefix}.rmats | ||
| """ | ||
|
|
||
| // NOTES for post - post requires the rmats files to be in the tmp directory, otherwise it fails | ||
|
|
||
| stub: | ||
| def args = task.ext.args ?: '' | ||
| def prefix = task.ext.prefix ?: "${meta.id}" | ||
| """ | ||
| echo ${args} | ||
|
|
||
| touch ${prefix}.rmats | ||
| touch ${prefix}_read_outcomes_by_bam.txt | ||
| """ | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,89 @@ | ||
| name: "rmats_prep" | ||
| description: MATS is a computational tool to detect differential alternative | ||
| splicing events from RNA-Seq data. | ||
| keywords: | ||
| - splicing | ||
| - RNA-Seq | ||
| - alternative splicing | ||
| - exon | ||
| - intron | ||
| - rMATS | ||
| tools: | ||
| - "rmats": | ||
| description: "MATS is a computational tool to detect differential alternative | ||
| splicing events from RNA-Seq data." | ||
| homepage: "https://github.com/Xinglab/rmats-turbo" | ||
| documentation: "https://github.com/Xinglab/rmats-turbo/blob/v4.3.0/README.md" | ||
| doi: "10.1038/s41596-023-00944-2" | ||
| licence: | ||
| - "FreeBSD for non-commercial use, see LICENSE file" | ||
| identifier: biotools:rmats | ||
| input: | ||
| - - meta: | ||
| type: map | ||
| description: Groovy Map containing sample information. e.g. `[ | ||
| id:'sample1', single_end:false, strandness:'auto']` | ||
| - genome_bam: | ||
| type: file | ||
| description: BAM file aligned to the genome | ||
| pattern: "*.{bam}" | ||
| ontologies: | ||
| - edam: http://edamontology.org/format_2572 | ||
| - reference_gtf: | ||
| type: file | ||
| description: Annotation GTF file | ||
| pattern: "*.{gtf}" | ||
| ontologies: | ||
| - edam: http://edamontology.org/format_2306 | ||
| - rmats_read_len: | ||
| type: integer | ||
| description: Read length in bases | ||
| output: | ||
| prep_rmats_file: | ||
| - - meta: | ||
| type: map | ||
| description: Groovy Map containing sample information. e.g. `[ | ||
| id:'sample1'single_end:false, strandness:'auto']` | ||
| - "*.rmats": | ||
| type: file | ||
| description: rmats junction count information, after processing the BAM | ||
| file | ||
| pattern: "*.rmats" | ||
| ontologies: [] | ||
| prep_read_outcomes_file: | ||
| - - meta: | ||
| type: map | ||
| description: Groovy Map containing sample information. e.g. `[ | ||
| id:'sample1'single_end:false, strandness:'auto']` | ||
| - "*read_outcomes_by_bam.txt": | ||
| type: file | ||
| description: text file detailing number of reads used and not used for | ||
| various reasons (clipped, not paired, wrong length, etc.) | ||
| pattern: "*read_outcomes_by_bam.txt" | ||
| ontologies: | ||
| - edam: http://edamontology.org/format_2330 | ||
| versions_rmats: | ||
| - - ${task.process}: | ||
| type: string | ||
| description: The process the versions were collected from | ||
| - rmats: | ||
| type: string | ||
| description: The tool name | ||
| - rmats.py --version | sed -e "s/v//g": | ||
| type: eval | ||
| description: The command used to generate the version of the tool | ||
| topics: | ||
| versions: | ||
| - - ${task.process}: | ||
| type: string | ||
| description: The process the versions were collected from | ||
| - rmats: | ||
| type: string | ||
| description: The tool name | ||
| - rmats.py --version | sed -e "s/v//g": | ||
| type: eval | ||
| description: The command used to generate the version of the tool | ||
| authors: | ||
| - "@akaviaLab" | ||
| maintainers: | ||
| - "@akaviaLab" |
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,213 @@ | ||||||||
| nextflow_process { | ||||||||
|
|
||||||||
| name "Test Process RMATS_PREP" | ||||||||
| script "../main.nf" | ||||||||
| process "RMATS_PREP" | ||||||||
|
|
||||||||
| tag "modules" | ||||||||
| tag "modules_nfcore" | ||||||||
| tag "rmats" | ||||||||
| tag "rmats/prep" | ||||||||
|
|
||||||||
| test("homo_sapiens - paired unstranded rmats prep") { | ||||||||
|
|
||||||||
| config "./nextflow.config" | ||||||||
|
|
||||||||
| when { | ||||||||
| process { | ||||||||
| """ | ||||||||
| input[0] = [ | ||||||||
| [ id:'test', single_end:false, strandness:"unstranded" ], // meta map | ||||||||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true), | ||||||||
| ] | ||||||||
| input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)] | ||||||||
| input[2] = 150 | ||||||||
| """ | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| then { | ||||||||
| def lines = path(process.out.prep_read_outcomes_file[0][1]).readLines() | ||||||||
| def reads_used = lines[1] =~ /USED: (\d+)/ | ||||||||
| def reads_clipped = lines[9] =~ /CLIPPED: (\d+)/ | ||||||||
| assertAll( | ||||||||
| { assert process.success }, | ||||||||
| { assert snapshot(process.out).match() }, | ||||||||
| { assert reads_used[0][1] as Integer > 0 }, | ||||||||
| { assert reads_clipped[0][1] as Integer == 0 } | ||||||||
| ) | ||||||||
| } | ||||||||
|
|
||||||||
| } | ||||||||
|
|
||||||||
| test("homo_sapiens - single-end unstranded rmats prep") { | ||||||||
|
|
||||||||
| config "./nextflow.config" | ||||||||
| when { | ||||||||
| process { | ||||||||
| """ | ||||||||
| input[0] = [ | ||||||||
| [ id:'test', single_end:true, strandness:"unstranded" ], // meta map | ||||||||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true), | ||||||||
| ] | ||||||||
| input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)] | ||||||||
| input[2] = 150 | ||||||||
| """ | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| then { | ||||||||
| def lines = path(process.out.prep_read_outcomes_file[0][1]).readLines() | ||||||||
| def reads_used = lines[1] =~ /USED: (\d+)/ | ||||||||
| assertAll( | ||||||||
| { assert process.success }, | ||||||||
| { assert snapshot(process.out).match() }, | ||||||||
| { assert reads_used[0][1] as Integer > 0 } | ||||||||
| ) | ||||||||
| } | ||||||||
|
|
||||||||
| } | ||||||||
|
|
||||||||
| test("homo_sapiens - paired forward rmats prep") { | ||||||||
|
|
||||||||
| config "./nextflow.config" | ||||||||
| when { | ||||||||
| process { | ||||||||
| """ | ||||||||
| input[0] = [ | ||||||||
| [ id:'test', single_end:false, strandness:"forward" ], // meta map | ||||||||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true), | ||||||||
| ] | ||||||||
| input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)] | ||||||||
| input[2] = 150 | ||||||||
| """ | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| then { | ||||||||
| def lines = path(process.out.prep_read_outcomes_file[0][1]).readLines() | ||||||||
| def reads_used = lines[1] =~ /USED: (\d+)/ | ||||||||
| assertAll( | ||||||||
| { assert process.success }, | ||||||||
| { assert snapshot(process.out).match() }, | ||||||||
| { assert reads_used[0][1] as Integer > 0 } ) | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
| } | ||||||||
|
|
||||||||
| } | ||||||||
|
|
||||||||
| test("homo_sapiens - paired reverse rmats prep") { | ||||||||
|
|
||||||||
| config "./nextflow.config" | ||||||||
| when { | ||||||||
| process { | ||||||||
| """ | ||||||||
| input[0] = [ | ||||||||
| [ id:'test', single_end:false, strandness:"reverse" ], // meta map | ||||||||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true), | ||||||||
| ] | ||||||||
| input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)] | ||||||||
| input[2] = 150 | ||||||||
| """ | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| then { | ||||||||
| def lines = path(process.out.prep_read_outcomes_file[0][1]).readLines() | ||||||||
| def reads_used = lines[1] =~ /USED: (\d+)/ | ||||||||
| assertAll( | ||||||||
| { assert process.success }, | ||||||||
| { assert snapshot(process.out).match() }, | ||||||||
| { assert reads_used[0][1] as Integer > 0 } | ||||||||
| ) | ||||||||
| } | ||||||||
|
|
||||||||
| } | ||||||||
|
|
||||||||
| test("homo_sapiens - paired unstranded novel splice rmats prep") { | ||||||||
|
|
||||||||
| config "./nextflow.config" | ||||||||
| when { | ||||||||
| process { | ||||||||
| """ | ||||||||
| input[0] = [ | ||||||||
| [ id:'test', single_end:false, strandness:"unstranded" ], // meta map | ||||||||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true), | ||||||||
| ] | ||||||||
| input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)] | ||||||||
| input[2] = 150 | ||||||||
| """ | ||||||||
| } | ||||||||
| params { | ||||||||
| novel_splice_site = true | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| then { | ||||||||
| def lines = path(process.out.prep_read_outcomes_file[0][1]).readLines() | ||||||||
| def reads_used = lines[1] =~ /USED: (\d+)/ | ||||||||
| assertAll( | ||||||||
| { assert process.success }, | ||||||||
| { assert snapshot(process.out).match() }, | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to clean up the snapshot a bit (removes the numbered channels)
Suggested change
|
||||||||
| { assert reads_used[0][1] as Integer > 0 } | ||||||||
| ) | ||||||||
| } | ||||||||
|
|
||||||||
| } | ||||||||
|
|
||||||||
| test("homo_sapiens - paired unstranded no clipping rmats prep") { | ||||||||
|
|
||||||||
| when { | ||||||||
| process { | ||||||||
| """ | ||||||||
| input[0] = [ | ||||||||
| [ id:'test', single_end:false, strandness:"unstranded" ], // meta map | ||||||||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true), | ||||||||
| ] | ||||||||
| input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)] | ||||||||
| input[2] = 150 | ||||||||
| """ | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| then { | ||||||||
| def lines = path(process.out.prep_read_outcomes_file[0][1]).readLines() | ||||||||
| def reads_used = lines[1] =~ /USED: (\d+)/ | ||||||||
| def reads_clipped = lines[9] =~ /CLIPPED: (\d+)/ | ||||||||
| assertAll( | ||||||||
| { assert process.success }, | ||||||||
| { assert snapshot(process.out).match() }, | ||||||||
| { assert reads_used[0][1] as Integer > 0 }, | ||||||||
| { assert reads_clipped[0][1] as Integer > 0}, | ||||||||
| { assert reads_clipped[0][1] as Integer > reads_used[0][1] as Integer } | ||||||||
| ) | ||||||||
| } | ||||||||
|
|
||||||||
| } | ||||||||
|
|
||||||||
| test("homo_sapiens - prep - stub") { | ||||||||
|
|
||||||||
| options "-stub" | ||||||||
|
|
||||||||
| when { | ||||||||
| process { | ||||||||
| """ | ||||||||
| input[0] = [ | ||||||||
| [ id:'test', single_end:false, strandness:"unstranded" ], // meta map | ||||||||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true), | ||||||||
| ] | ||||||||
| input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)] | ||||||||
| input[2] = 150 | ||||||||
| """ | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| then { | ||||||||
| assertAll( | ||||||||
| { assert process.success }, | ||||||||
| { assert snapshot(process.out).match() } | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
| ) | ||||||||
| } | ||||||||
|
|
||||||||
| } | ||||||||
|
|
||||||||
| } | ||||||||
Uh oh!
There was an error while loading. Please reload this page.