diff --git a/modules/nf-core/rmats/prep/environment.yml b/modules/nf-core/rmats/prep/environment.yml new file mode 100644 index 00000000000..35217f7adac --- /dev/null +++ b/modules/nf-core/rmats/prep/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::rmats=4.3.0" diff --git a/modules/nf-core/rmats/prep/main.nf b/modules/nf-core/rmats/prep/main.nf new file mode 100644 index 00000000000..98bb1f66175 --- /dev/null +++ b/modules/nf-core/rmats/prep/main.nf @@ -0,0 +1,73 @@ +process RMATS_PREP { + tag "${meta.id}" + label 'process_single' + + // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/rmats:4.3.0--py311hf2f0b74_5' + : 'biocontainers/rmats:4.3.0--py311hf2f0b74_5'}" + + input: + // TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct + + tuple val(meta), path(genome_bam) + // TODO - post seems to need only the BAM *names*, not the actual files. Could we just get the first line of each file to get the names? + // for file in `ls multi_bam_rmats_prep_tmp/*.rmats`; do head -1 $file; done | tr '\n' ',' + // TODO - for stats, it should be possible to parse the formula using patsy, but if we include PAIRADISE we might have R - just do this in R, first pass + path reference_gtf + val rmats_read_len + + output: + // TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct + tuple val(meta), path("*.rmats"), emit: prep_rmats_file + tuple val(meta), path("*outcomes_by_bam.txt"), emit: prep_read_outcomes_file + tuple val("${task.process}"), val('rmats'), eval('rmats.py --version | sed -e "s/v//g"'), emit: versions_rmats, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 + // If the software is unable to output a version number on the command-line then it can be manually specified + // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf + // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) + + // --readLength READLENGTH + // The length of each read. Required parameter, with the + // value set according to the RNA-seq read length + // TODO - question. Does this definition mean I should change it by read length? If so, look at a samtools command to figure it out. Samtools stats! + // TODO - should I modify the prefix to include rmats_prep only in a subworkflow via modules.config? It seems so, see example at https://github.com/nf-core/rnaseq/blob/e049f51f0214b2aef7624b9dd496a404a7c34d14/conf/modules.config#L576 + """ + echo ${genome_bam} > ${prefix}.prep.b1.txt + + rmats.py \\ + --task prep \\ + ${args} \\ + --nthread ${task.cpus} \\ + --b1 ${prefix}.prep.b1.txt \\ + --gtf ${reference_gtf} \\ + --readLength ${rmats_read_len} \\ + --tmp ${prefix}_rmats_tmp \\ + --od ${prefix}_rmats_prep + + for file in `ls ${prefix}_rmats_tmp/*` + do + cp \${file} ${prefix}_prep_\$(basename \${file}) + done + """ + + // NOTES for post - post requires the rmats files to be in the tmp directory, otherwise it fails + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo ${args} + + touch ${prefix}.rmats + touch ${prefix}_outcomes_by_bam.txt + """ +} diff --git a/modules/nf-core/rmats/prep/meta.yml b/modules/nf-core/rmats/prep/meta.yml new file mode 100644 index 00000000000..caf656f3b58 --- /dev/null +++ b/modules/nf-core/rmats/prep/meta.yml @@ -0,0 +1,88 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +# # TODO nf-core: Add a description of the module and list keywords +name: "rmats_prep" +description: MATS is a computational tool to detect differential alternative splicing events from RNA-Seq data. +keywords: + - splicing + - RNA-Seq + - alternative splicing + - exon + - intron + - rMATS +tools: + ## TODO nf-core: Add a description and other details for the software below + - "rmats": + description: "MATS is a computational tool to detect differential alternative + splicing events from RNA-Seq data." + homepage: "https://github.com/Xinglab/rmats-turbo" + documentation: "https://github.com/Xinglab/rmats-turbo/blob/v4.3.0/README.md" + doi: "10.1038/s41596-023-00944-2" + licence: ["FreeBSD for non-commercial use, see LICENSE file"] + identifier: biotools:rmats + +input: + # TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ id:'sample1', single_end:false, strandness:'auto']` + - genome_bam: + type: file + description: BAM file aligned to the genome + pattern: "*.{bam}" + ontologies: + - edam: http://edamontology.org/format_2572 # BAM + - reference_gtf: + type: file + description: Annotation GTF file + pattern: "*.{gtf}" + ontologies: + - edam: http://edamontology.org/format_2306 # GTF + - rmats_read_len: + type: integer + description: Read length in bases +output: + # TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct + prep_rmats_file: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ id:'sample1'single_end:false, strandness:'auto']` + - "*.rmats": + type: file + description: text file containing rmats processed splice junctions + pattern: "*.rmats" + ontologies: [] + prep_read_outcomes_file: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ id:'sample1'single_end:false, strandness:'auto']` + - "*outcomes_by_bam.txt": + type: file + description: text file containing the numbers of reads for each outcome (USED, NOT_PAIRED, etc.) + pattern: "*outcomes_by_bam.txt" + ontologies: + - edam: http://edamontology.org/format_2330 + versions_rmats: + - - ${task.process}: + type: string + description: The name of the process + - rmats: + type: string + description: The name of the tool + - rmats.py --version | sed -e "s/v//g": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - rmats: + type: string + description: The name of the tool + - rmats.py --version | sed -e "s/v//g": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@akaviaLab" +maintainers: + - "@akaviaLab" diff --git a/modules/nf-core/rmats/prep/optional_parameters b/modules/nf-core/rmats/prep/optional_parameters new file mode 100644 index 00000000000..ec57fb0638a --- /dev/null +++ b/modules/nf-core/rmats/prep/optional_parameters @@ -0,0 +1,2 @@ +--variable-read-length +--allow-clipping diff --git a/modules/nf-core/rmats/prep/tests/main.nf.test b/modules/nf-core/rmats/prep/tests/main.nf.test new file mode 100644 index 00000000000..ac929ae5691 --- /dev/null +++ b/modules/nf-core/rmats/prep/tests/main.nf.test @@ -0,0 +1,74 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: +// nf-core modules test rmats/prep +nextflow_process { + + name "Test Process RMATS_PREP" + script "../main.nf" + process "RMATS_PREP" + + tag "modules" + tag "modules_nfcore" + tag "rmats" + tag "rmats/prep" + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used + test("sarscov2 - bam") { + + // TODO nf-core: If you are created a test for a chained module + // (the module requires running more than one process to generate the required output) + // add the 'setup' method here. + // You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules). + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples. + ) + } + + } + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + ) + } + + } + +} diff --git a/modules/nf-core/rmats/prep/tests/nextflow.config b/modules/nf-core/rmats/prep/tests/nextflow.config new file mode 100644 index 00000000000..ad690e8dedd --- /dev/null +++ b/modules/nf-core/rmats/prep/tests/nextflow.config @@ -0,0 +1,15 @@ +process { + + withName: RMATS_PREP { + ext.args = {[ + "--variable-read-length --allow-clipping", + meta.single_end ? '-t single' : '', + meta.strandness == "forward" ? "--libType fr-firststrand" : '', + meta.strandness == "reverse" ? "--libType fr-secondstrand" : '', + params.novel_splice_site ? "--novelSS" : "", + (params.novel_splice_site && params.minimum_intron_length) ? "--mil ${params.minimum_intron_length}" : "", + (params.novel_splice_site && params.max_exon_length) ? "--mel ${params.max_exon_length}" : "", + ].join(' ').trim()} + } + +}