Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions modules/nf-core/rmats/prep/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::rmats=4.3.0"
62 changes: 62 additions & 0 deletions modules/nf-core/rmats/prep/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
process RMATS_PREP {
tag "${meta.id}"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://depot.galaxyproject.org/singularity/rmats:4.3.0--py311hf2f0b74_5'
: 'biocontainers/rmats:4.3.0--py311hf2f0b74_5'}"

input:
tuple val(meta), path(genome_bam)
// NOTES - post seems to need only the BAM *names*, not the actual files. Could we just get the first line of each file to get the names?
// for file in `ls multi_bam_rmats_prep_tmp/*.rmats`; do head -1 $file; done | tr '\n' ','
// possible suggestions from @SPPearce - pass ${prefix}.prep.b1.txt as outut
// NOTES - for stats, it should be possible to parse the formula using patsy, but if we include PAIRADISE we might have R - just do this in R, first pass
tuple val(meta2), path(reference_gtf)
val rmats_read_len

output:
tuple val(meta), path("*.rmats"), emit: prep_rmats_file
tuple val(meta), path("*read_outcomes_by_bam.txt"), emit: prep_read_outcomes_file
tuple val("${task.process}"), val('rmats'), eval('rmats.py --version | sed -e "s/v//g"'), emit: versions_rmats, topic: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
// NOTES --readLength READLENGTH
// The length of each read. Required parameter, with the
// value set according to the RNA-seq read length
// I should change it by read length (in workflow)! Look at Samtools stats!
"""
echo ${genome_bam} > ${prefix}.prep.b1.txt

rmats.py \\
--task prep \\
${args} \\
--nthread ${task.cpus} \\
--b1 ${prefix}.prep.b1.txt \\
--gtf ${reference_gtf} \\
--readLength ${rmats_read_len} \\
--tmp ${prefix}_rmats_tmp \\
--od ${prefix}_rmats_prep

cp ${prefix}_rmats_tmp/*.txt ${prefix}_read_outcomes_by_bam.txt
cp ${prefix}_rmats_tmp/*.rmats ${prefix}.rmats
"""

// NOTES for post - post requires the rmats files to be in the tmp directory, otherwise it fails

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
echo ${args}

touch ${prefix}.rmats
touch ${prefix}_read_outcomes_by_bam.txt
"""
}
89 changes: 89 additions & 0 deletions modules/nf-core/rmats/prep/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
name: "rmats_prep"
description: MATS is a computational tool to detect differential alternative
splicing events from RNA-Seq data.
keywords:
- splicing
- RNA-Seq
- alternative splicing
- exon
- intron
- rMATS
tools:
- "rmats":
description: "MATS is a computational tool to detect differential alternative
splicing events from RNA-Seq data."
homepage: "https://github.com/Xinglab/rmats-turbo"
documentation: "https://github.com/Xinglab/rmats-turbo/blob/v4.3.0/README.md"
doi: "10.1038/s41596-023-00944-2"
licence:
- "FreeBSD for non-commercial use, see LICENSE file"
identifier: biotools:rmats
input:
- - meta:
type: map
description: Groovy Map containing sample information. e.g. `[
id:'sample1', single_end:false, strandness:'auto']`
- genome_bam:
type: file
description: BAM file aligned to the genome
pattern: "*.{bam}"
ontologies:
- edam: http://edamontology.org/format_2572
- reference_gtf:
type: file
description: Annotation GTF file
pattern: "*.{gtf}"
ontologies:
- edam: http://edamontology.org/format_2306
- rmats_read_len:
type: integer
description: Read length in bases
output:
prep_rmats_file:
- - meta:
type: map
description: Groovy Map containing sample information. e.g. `[
id:'sample1'single_end:false, strandness:'auto']`
- "*.rmats":
type: file
description: rmats junction count information, after processing the BAM
file
pattern: "*.rmats"
ontologies: []
prep_read_outcomes_file:
- - meta:
type: map
description: Groovy Map containing sample information. e.g. `[
id:'sample1'single_end:false, strandness:'auto']`
- "*read_outcomes_by_bam.txt":
type: file
description: text file detailing number of reads used and not used for
various reasons (clipped, not paired, wrong length, etc.)
pattern: "*read_outcomes_by_bam.txt"
ontologies:
- edam: http://edamontology.org/format_2330
versions_rmats:
- - ${task.process}:
type: string
description: The process the versions were collected from
- rmats:
type: string
description: The tool name
- rmats.py --version | sed -e "s/v//g":
type: eval
description: The command used to generate the version of the tool
topics:
versions:
- - ${task.process}:
type: string
description: The process the versions were collected from
- rmats:
type: string
description: The tool name
- rmats.py --version | sed -e "s/v//g":
type: eval
description: The command used to generate the version of the tool
authors:
- "@akaviaLab"
maintainers:
- "@akaviaLab"
213 changes: 213 additions & 0 deletions modules/nf-core/rmats/prep/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
nextflow_process {

name "Test Process RMATS_PREP"
script "../main.nf"
process "RMATS_PREP"

tag "modules"
tag "modules_nfcore"
tag "rmats"
tag "rmats/prep"

test("homo_sapiens - paired unstranded rmats prep") {

config "./nextflow.config"

when {
process {
"""
input[0] = [
[ id:'test', single_end:false, strandness:"unstranded" ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true),
]
input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)]
input[2] = 150
"""
}
}

then {
def lines = path(process.out.prep_read_outcomes_file[0][1]).readLines()
def reads_used = lines[1] =~ /USED: (\d+)/
def reads_clipped = lines[9] =~ /CLIPPED: (\d+)/
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() },
{ assert reads_used[0][1] as Integer > 0 },
{ assert reads_clipped[0][1] as Integer == 0 }
)
}

}

test("homo_sapiens - single-end unstranded rmats prep") {

config "./nextflow.config"
when {
process {
"""
input[0] = [
[ id:'test', single_end:true, strandness:"unstranded" ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true),
]
input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)]
input[2] = 150
"""
}
}

then {
def lines = path(process.out.prep_read_outcomes_file[0][1]).readLines()
def reads_used = lines[1] =~ /USED: (\d+)/
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() },
{ assert reads_used[0][1] as Integer > 0 }
)
}

}

test("homo_sapiens - paired forward rmats prep") {

config "./nextflow.config"
when {
process {
"""
input[0] = [
[ id:'test', single_end:false, strandness:"forward" ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true),
]
input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)]
input[2] = 150
"""
}
}

then {
def lines = path(process.out.prep_read_outcomes_file[0][1]).readLines()
def reads_used = lines[1] =~ /USED: (\d+)/
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() },
{ assert reads_used[0][1] as Integer > 0 } )
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
{ assert reads_used[0][1] as Integer > 0 } )
{ assert reads_used[0][1] as Integer > 0 }
)

}

}

test("homo_sapiens - paired reverse rmats prep") {

config "./nextflow.config"
when {
process {
"""
input[0] = [
[ id:'test', single_end:false, strandness:"reverse" ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true),
]
input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)]
input[2] = 150
"""
}
}

then {
def lines = path(process.out.prep_read_outcomes_file[0][1]).readLines()
def reads_used = lines[1] =~ /USED: (\d+)/
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() },
{ assert reads_used[0][1] as Integer > 0 }
)
}

}

test("homo_sapiens - paired unstranded novel splice rmats prep") {

config "./nextflow.config"
when {
process {
"""
input[0] = [
[ id:'test', single_end:false, strandness:"unstranded" ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true),
]
input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)]
input[2] = 150
"""
}
params {
novel_splice_site = true
}
}

then {
def lines = path(process.out.prep_read_outcomes_file[0][1]).readLines()
def reads_used = lines[1] =~ /USED: (\d+)/
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() },
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to clean up the snapshot a bit (removes the numbered channels)

Suggested change
{ assert snapshot(process.out).match() },
{ assert snapshot(sanitizeOutput(process.out)).match() },

{ assert reads_used[0][1] as Integer > 0 }
)
}

}

test("homo_sapiens - paired unstranded no clipping rmats prep") {

when {
process {
"""
input[0] = [
[ id:'test', single_end:false, strandness:"unstranded" ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true),
]
input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)]
input[2] = 150
"""
}
}

then {
def lines = path(process.out.prep_read_outcomes_file[0][1]).readLines()
def reads_used = lines[1] =~ /USED: (\d+)/
def reads_clipped = lines[9] =~ /CLIPPED: (\d+)/
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() },
{ assert reads_used[0][1] as Integer > 0 },
{ assert reads_clipped[0][1] as Integer > 0},
{ assert reads_clipped[0][1] as Integer > reads_used[0][1] as Integer }
)
}

}

test("homo_sapiens - prep - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test', single_end:false, strandness:"unstranded" ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true),
]
input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)]
input[2] = 150
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
{ assert snapshot(process.out).match() }
{ assert snapshot(sanitizeOutput(process.out)).match() }

)
}

}

}
Loading
Loading