nf-core · akaviaLab · Jan 18, 2026 · Feb 3, 2026 · Feb 3, 2026 · Feb 11, 2026
diff --git a/modules/nf-core/rmats/prep/environment.yml b/modules/nf-core/rmats/prep/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - "bioconda::rmats=4.3.0"
diff --git a/modules/nf-core/rmats/prep/main.nf b/modules/nf-core/rmats/prep/main.nf
@@ -0,0 +1,62 @@
+process RMATS_PREP {
+    tag "${meta.id}"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+        ? 'https://depot.galaxyproject.org/singularity/rmats:4.3.0--py311hf2f0b74_5'
+        : 'biocontainers/rmats:4.3.0--py311hf2f0b74_5'}"
+
+    input:
+    tuple val(meta), path(genome_bam)
+    // NOTES - post seems to need only the BAM *names*, not the actual files. Could we just get the first line of each file to get the names?
+    // for file in `ls multi_bam_rmats_prep_tmp/*.rmats`; do head -1 $file; done | tr '\n' ','
+    // possible suggestions from @SPPearce - pass ${prefix}.prep.b1.txt as outut
+    // NOTES - for stats, it should be possible to parse the formula using patsy, but if we include PAIRADISE we might have R - just do this in R, first pass
+    tuple val(meta2), path(reference_gtf)
+    val rmats_read_len
+
+    output:
+    tuple val(meta), path("*.rmats"), emit: prep_rmats_file
+    tuple val(meta), path("*read_outcomes_by_bam.txt"), emit: prep_read_outcomes_file
+    tuple val("${task.process}"), val('rmats'), eval('rmats.py --version | sed -e "s/v//g"'), emit: versions_rmats, topic: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    // NOTES   --readLength READLENGTH
+    //                    The length of each read. Required parameter, with the
+    //                    value set according to the RNA-seq read length
+    //          I should change it by read length (in workflow)! Look at Samtools stats!
+    """
+    echo ${genome_bam} > ${prefix}.prep.b1.txt
+
+    rmats.py \\
+        --task prep \\
+        ${args} \\
+        --nthread ${task.cpus} \\
+        --b1 ${prefix}.prep.b1.txt \\
+        --gtf ${reference_gtf} \\
+        --readLength ${rmats_read_len} \\
+        --tmp ${prefix}_rmats_tmp \\
+        --od ${prefix}_rmats_prep
+
+    cp ${prefix}_rmats_tmp/*.txt ${prefix}_read_outcomes_by_bam.txt
+    cp ${prefix}_rmats_tmp/*.rmats ${prefix}.rmats
+    """
+
+    // NOTES for post - post requires the rmats files to be in the tmp directory, otherwise it fails
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    echo ${args}
+
+    touch ${prefix}.rmats
+    touch ${prefix}_read_outcomes_by_bam.txt
+    """
+}
diff --git a/modules/nf-core/rmats/prep/meta.yml b/modules/nf-core/rmats/prep/meta.yml
@@ -0,0 +1,89 @@
+name: "rmats_prep"
+description: MATS is a computational tool to detect differential alternative
+  splicing events from RNA-Seq data.
+keywords:
+  - splicing
+  - RNA-Seq
+  - alternative splicing
+  - exon
+  - intron
+  - rMATS
+tools:
+  - "rmats":
+      description: "MATS is a computational tool to detect differential alternative
+        splicing events from RNA-Seq data."
+      homepage: "https://github.com/Xinglab/rmats-turbo"
+      documentation: "https://github.com/Xinglab/rmats-turbo/blob/v4.3.0/README.md"
+      doi: "10.1038/s41596-023-00944-2"
+      licence:
+        - "FreeBSD for non-commercial use, see LICENSE file"
+      identifier: biotools:rmats
+input:
+  - - meta:
+        type: map
+        description: Groovy Map containing sample information. e.g. `[
+          id:'sample1', single_end:false, strandness:'auto']`
+    - genome_bam:
+        type: file
+        description: BAM file aligned to the genome
+        pattern: "*.{bam}"
+        ontologies:
+          - edam: http://edamontology.org/format_2572
+  - reference_gtf:
+      type: file
+      description: Annotation GTF file
+      pattern: "*.{gtf}"
+      ontologies:
+        - edam: http://edamontology.org/format_2306
+  - rmats_read_len:
+      type: integer
+      description: Read length in bases
+output:
+  prep_rmats_file:
+    - - meta:
+          type: map
+          description: Groovy Map containing sample information. e.g. `[
+            id:'sample1'single_end:false, strandness:'auto']`
+      - "*.rmats":
+          type: file
+          description: rmats junction count information, after processing the BAM
+            file
+          pattern: "*.rmats"
+          ontologies: []
+  prep_read_outcomes_file:
+    - - meta:
+          type: map
+          description: Groovy Map containing sample information. e.g. `[
+            id:'sample1'single_end:false, strandness:'auto']`
+      - "*read_outcomes_by_bam.txt":
+          type: file
+          description: text file detailing number of reads used and not used for
+            various reasons (clipped, not paired, wrong length, etc.)
+          pattern: "*read_outcomes_by_bam.txt"
+          ontologies:
+            - edam: http://edamontology.org/format_2330
+  versions_rmats:
+    - - ${task.process}:
+          type: string
+          description: The process the versions were collected from
+      - rmats:
+          type: string
+          description: The tool name
+      - rmats.py --version | sed -e "s/v//g":
+          type: eval
+          description: The command used to generate the version of the tool
+topics:
+  versions:
+    - - ${task.process}:
+          type: string
+          description: The process the versions were collected from
+      - rmats:
+          type: string
+          description: The tool name
+      - rmats.py --version | sed -e "s/v//g":
+          type: eval
+          description: The command used to generate the version of the tool
+authors:
+  - "@akaviaLab"
+maintainers:
+  - "@akaviaLab"
diff --git a/modules/nf-core/rmats/prep/tests/main.nf.test b/modules/nf-core/rmats/prep/tests/main.nf.test
@@ -0,0 +1,213 @@
+nextflow_process {
+
+    name "Test Process RMATS_PREP"
+    script "../main.nf"
+    process "RMATS_PREP"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "rmats"
+    tag "rmats/prep"
+
+    test("homo_sapiens - paired unstranded rmats prep") {
+
+        config "./nextflow.config"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false, strandness:"unstranded" ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true),
+                ]
+                input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)]
+                input[2] = 150
+                """
+            }
+        }
+
+        then {
+                def lines = path(process.out.prep_read_outcomes_file[0][1]).readLines()
+                def reads_used = lines[1] =~ /USED: (\d+)/
+                def reads_clipped = lines[9] =~ /CLIPPED: (\d+)/
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+                { assert reads_used[0][1] as Integer  > 0 },
+                { assert reads_clipped[0][1] as Integer  == 0 }
+            )
+        }
+
+    }
+
+    test("homo_sapiens - single-end unstranded rmats prep") {
+
+        config "./nextflow.config"
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true, strandness:"unstranded" ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true),
+                ]
+                input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)]
+                input[2] = 150
+                """
+            }
+        }
+
+        then {
+                def lines = path(process.out.prep_read_outcomes_file[0][1]).readLines()
+                def reads_used = lines[1] =~ /USED: (\d+)/
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+                { assert reads_used[0][1] as Integer  > 0 }
+            )
+        }
+
+    }
+
+    test("homo_sapiens - paired forward rmats prep") {
+
+        config "./nextflow.config"
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false, strandness:"forward" ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true),
+                ]
+                input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)]
+                input[2] = 150
+                """
+            }
+        }
+
+        then {
+                def lines = path(process.out.prep_read_outcomes_file[0][1]).readLines()
+                def reads_used = lines[1] =~ /USED: (\d+)/
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+                { assert reads_used[0][1] as Integer  > 0 }            )
-                { assert reads_used[0][1] as Integer  > 0 }            )
+                { assert reads_used[0][1] as Integer  > 0 }
+            )
-                { assert reads_used[0][1] as Integer  > 0 }            )
+                { assert reads_used[0][1] as Integer  > 0 }
+            )
+        }
+
+    }
+
+    test("homo_sapiens - paired reverse rmats prep") {
+
+        config "./nextflow.config"
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false, strandness:"reverse" ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true),
+                ]
+                input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)]
+                input[2] = 150
+                """
+            }
+        }
+
+        then {
+                def lines = path(process.out.prep_read_outcomes_file[0][1]).readLines()
+                def reads_used = lines[1] =~ /USED: (\d+)/
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+                { assert reads_used[0][1] as Integer  > 0 }
+            )
+        }
+
+    }
+
+    test("homo_sapiens - paired unstranded novel splice rmats prep") {
+
+        config "./nextflow.config"
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false, strandness:"unstranded" ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true),
+                ]
+                input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)]
+                input[2] = 150
+                """
+            }
+            params {
+                novel_splice_site = true
+            }
+        }
+
+        then {
+                def lines = path(process.out.prep_read_outcomes_file[0][1]).readLines()
+                def reads_used = lines[1] =~ /USED: (\d+)/
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() },
-                { assert snapshot(process.out).match() },
+                { assert snapshot(sanitizeOutput(process.out)).match() },
-                { assert snapshot(process.out).match() },
+                { assert snapshot(sanitizeOutput(process.out)).match() },
+                { assert reads_used[0][1] as Integer  > 0 }
+            )
+        }
+
+    }
+
+    test("homo_sapiens - paired unstranded no clipping rmats prep") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false, strandness:"unstranded" ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true),
+                ]
+                input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)]
+                input[2] = 150
+                """
+            }
+        }
+
+        then {
+                def lines = path(process.out.prep_read_outcomes_file[0][1]).readLines()
+                def reads_used = lines[1] =~ /USED: (\d+)/
+                def reads_clipped = lines[9] =~ /CLIPPED: (\d+)/
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+                { assert reads_used[0][1] as Integer  > 0 },
+                { assert reads_clipped[0][1] as Integer > 0},
+                { assert reads_clipped[0][1] as Integer  > reads_used[0][1] as Integer }
+            )
+        }
+
+    }
+
+    test("homo_sapiens - prep - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false, strandness:"unstranded" ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.bam', checkIfExists: true),
+                ]
+                input[1] = [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)]
+                input[2] = 150
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
-                { assert snapshot(process.out).match() }
+                { assert snapshot(sanitizeOutput(process.out)).match() }
-                { assert snapshot(process.out).match() }
+                { assert snapshot(sanitizeOutput(process.out)).match() }
+            )
+        }
+
+    }
+
+}