diff --git a/modules/nf-core/macsyfinder/download/environment.yml b/modules/nf-core/macsyfinder/download/environment.yml new file mode 100644 index 00000000000..7feab3433e4 --- /dev/null +++ b/modules/nf-core/macsyfinder/download/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::macsyfinder=2.1.6 diff --git a/modules/nf-core/macsyfinder/download/main.nf b/modules/nf-core/macsyfinder/download/main.nf new file mode 100644 index 00000000000..6969294a3c7 --- /dev/null +++ b/modules/nf-core/macsyfinder/download/main.nf @@ -0,0 +1,39 @@ +process MACSYFINDER_DOWNLOAD { + tag "${model_name}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/macsyfinder:2.1.6--pyhdfd78af_0' : + 'biocontainers/macsyfinder:2.1.6--pyhdfd78af_0' }" + + input: + val model_name + + output: + path "models" , emit: models + tuple val("${task.process}"), val('macsyfinder'), eval('macsyfinder --version 2>&1 | sed "1!d;s/^.*MacSyFinder //;s/ .*$//"'), topic: versions, emit: versions_macsyfinder + tuple val("${task.process}"), val('msf_data'), eval('msf_data --version 2>&1 | awk "NR==4" | sed "s/^- MacSyLib //;s/ *$//"'), topic: versions, emit: versions_macsydata + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + # msf_data installs models into the current directory by default + # We'll create a models directory to store them + mkdir -p models + + msf_data install \\ + --target models \\ + ${args} \\ + ${model_name} + """ + + stub: + """ + mkdir -p models/${model_name} + touch models/${model_name}/definitions.txt + """ +} diff --git a/modules/nf-core/macsyfinder/download/meta.yml b/modules/nf-core/macsyfinder/download/meta.yml new file mode 100644 index 00000000000..66a1f0c41fa --- /dev/null +++ b/modules/nf-core/macsyfinder/download/meta.yml @@ -0,0 +1,73 @@ +name: "macsyfinder_download" +description: Download MacSyFinder models using msf_data +keywords: + - genomics + - protein + - macromolecular systems + - models + - database +tools: + - "macsyfinder": + description: "Detection of macromolecular systems in protein datasets using systems\ + \ modelling and similarity search" + homepage: "https://github.com/gem-pasteur/macsyfinder" + documentation: "https://macsyfinder.readthedocs.io" + tool_dev_url: "https://github.com/gem-pasteur/macsyfinder" + doi: "10.24072/pcjournal.250" + licence: + - "GPL v3" + identifier: biotools:macsyfinder +input: + - model_name: + type: string + description: Name of the MacSyFinder model to download (e.g., 'TXSScan', 'CasFinder') +output: + models: + - models: + type: directory + description: Directory containing downloaded MacSyFinder model definitions + pattern: "models" + versions_macsyfinder: + - - ${task.process}: + type: string + description: The name of the process + - macsyfinder: + type: string + description: The name of the tool + - macsyfinder --version 2>&1 | sed "1!d;s/^.*MacSyFinder //;s/ .*$//": + type: eval + description: The expression to obtain the version of the tool + versions_macsydata: + - - ${task.process}: + type: string + description: The name of the process + - msf_data: + type: string + description: The name of the tool + - msf_data --version 2>&1 | awk "NR==4" | sed "s/^- MacSyLib //;s/ *$//": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - macsyfinder: + type: string + description: The name of the tool + - macsyfinder --version 2>&1 | sed "1!d;s/^.*MacSyFinder //;s/ .*$//": + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The name of the process + - msf_data: + type: string + description: The name of the tool + - msf_data --version 2>&1 | awk "NR==4" | sed "s/^- MacSyLib //;s/ *$//": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@brovolia" +maintainers: + - "@brovolia" diff --git a/modules/nf-core/macsyfinder/download/tests/main.nf.test b/modules/nf-core/macsyfinder/download/tests/main.nf.test new file mode 100644 index 00000000000..e458f4c43a5 --- /dev/null +++ b/modules/nf-core/macsyfinder/download/tests/main.nf.test @@ -0,0 +1,54 @@ +nextflow_process { + + name "Test Process MACSYFINDER_DOWNLOAD" + script "../main.nf" + process "MACSYFINDER_DOWNLOAD" + tag "modules" + tag "modules_nfcore" + tag "macsyfinder" + tag "macsyfinder/download" + + test("macsyfinder_download - TXSScan") { + + when { + process { + """ + input[0] = 'TXSScan' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.models.collect { file(it).name }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("macsyfinder_download - TXSScan - stub") { + + options "-stub" + + when { + process { + """ + input[0] = 'TXSScan' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.models.collect { file(it).name }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } +} diff --git a/modules/nf-core/macsyfinder/download/tests/main.nf.test.snap b/modules/nf-core/macsyfinder/download/tests/main.nf.test.snap new file mode 100644 index 00000000000..461d3f382d3 --- /dev/null +++ b/modules/nf-core/macsyfinder/download/tests/main.nf.test.snap @@ -0,0 +1,58 @@ +{ + "macsyfinder_download - TXSScan": { + "content": [ + [ + "models" + ], + { + "versions_macsydata": [ + [ + "MACSYFINDER_DOWNLOAD", + "msf_data", + "1.0.4" + ] + ], + "versions_macsyfinder": [ + [ + "MACSYFINDER_DOWNLOAD", + "macsyfinder", + "2.1.6" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2026-02-27T17:06:58.95757876" + }, + "macsyfinder_download - TXSScan - stub": { + "content": [ + [ + "models" + ], + { + "versions_macsydata": [ + [ + "MACSYFINDER_DOWNLOAD", + "msf_data", + "1.0.4" + ] + ], + "versions_macsyfinder": [ + [ + "MACSYFINDER_DOWNLOAD", + "macsyfinder", + "2.1.6" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2026-02-27T17:07:09.322980182" + } +} \ No newline at end of file diff --git a/modules/nf-core/macsyfinder/search/environment.yml b/modules/nf-core/macsyfinder/search/environment.yml new file mode 100644 index 00000000000..7feab3433e4 --- /dev/null +++ b/modules/nf-core/macsyfinder/search/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::macsyfinder=2.1.6 diff --git a/modules/nf-core/macsyfinder/search/main.nf b/modules/nf-core/macsyfinder/search/main.nf new file mode 100644 index 00000000000..2fbeb2e80c0 --- /dev/null +++ b/modules/nf-core/macsyfinder/search/main.nf @@ -0,0 +1,59 @@ +process MACSYFINDER_SEARCH { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/macsyfinder:2.1.6--pyhdfd78af_0' : + 'biocontainers/macsyfinder:2.1.6--pyhdfd78af_0' }" + + input: + tuple val(meta), path(proteins) + path models + val model_names + + output: + tuple val(meta), path("${prefix}/*") , emit: results + tuple val(meta), path("${prefix}/hmmer_results") , emit: hmmer , optional: true + tuple val(meta), path("${prefix}/macsyfinder.out") , emit: stdout , optional: true + tuple val(meta), path("${prefix}/macsyfinder.err") , emit: stderr , optional: true + tuple val(meta), path("${prefix}/all_systems.tsv") , emit: summary , optional: true + tuple val(meta), path("${prefix}/all_best_solutions*") , emit: best_solutions, optional: true + tuple val("${task.process}"), val('macsyfinder'), eval('macsyfinder --version 2>&1 | sed "1!d;s/^.*MacSyFinder //;s/ .*$//"'), topic: versions, emit: versions_macsyfinder + tuple val("${task.process}"), val('hmmer'), eval('hmmsearch -h 2>&1 | sed "2!d;s/^# HMMER //;s/ .*$//"'), topic: versions, emit: versions_hmmer + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def model_arg = model_names ? "--models ${model_names}" : "" + """ + macsyfinder \\ + --sequence-db ${proteins} \\ + --models-dir ${models} \\ + ${model_arg} \\ + --out-dir ${prefix} \\ + --worker ${task.cpus} \\ + ${args} \\ + 2>| >( tee macsyfinder.err >&2 ) \\ + | tee macsyfinder.out + + mv macsyfinder.err ${prefix}/ + mv macsyfinder.out ${prefix}/ + + # Remove empty error files to avoid snapshot pollution + find ${prefix} -name "*.err" -type f -size 0 -delete + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix} + touch ${prefix}/macsyfinder.err + touch ${prefix}/macsyfinder.out + touch ${prefix}/all_systems.tsv + touch ${prefix}/all_best_solutions.txt + """ +} diff --git a/modules/nf-core/macsyfinder/search/meta.yml b/modules/nf-core/macsyfinder/search/meta.yml new file mode 100644 index 00000000000..31a477b1069 --- /dev/null +++ b/modules/nf-core/macsyfinder/search/meta.yml @@ -0,0 +1,149 @@ +name: "macsyfinder_search" +description: Search for macromolecular systems in protein datasets using MacSyFinder +keywords: + - genomics + - protein + - macromolecular systems + - hmmer + - search +tools: + - "macsyfinder": + description: "Detection of macromolecular systems in protein datasets using systems\ + \ modelling and similarity search" + homepage: "https://github.com/gem-pasteur/macsyfinder" + documentation: "https://macsyfinder.readthedocs.io" + tool_dev_url: "https://github.com/gem-pasteur/macsyfinder" + doi: "10.24072/pcjournal.250" + licence: + - "GPL v3" + identifier: biotools:macsyfinder +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - proteins: + type: file + description: Protein sequence file in FASTA format + pattern: "*.{fasta,faa,fa}" + ontologies: + - edam: "http://edamontology.org/format_1929" + - models: + type: directory + description: Directory containing MacSyFinder model(s) + pattern: "*" + - model_names: + type: string + description: Space-separated list of model names to search for. If not provided, + all models in the directory will be used. +output: + results: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', db_type:'gembase' ]` + - ${prefix}/*: + type: directory + description: Directory containing all MacSyFinder results + pattern: "${prefix}/*" + hmmer: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', db_type:'gembase' ]` + - ${prefix}/hmmer_results: + type: directory + description: Directory containing HMMER search results (may contain variable content like timestamps) + pattern: "${prefix}/hmmer_results" + stdout: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', db_type:'gembase' ]` + - ${prefix}/macsyfinder.out: + type: file + description: MacSyFinder standard output + pattern: "${prefix}/macsyfinder.out" + ontologies: [] + stderr: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', db_type:'gembase' ]` + - ${prefix}/macsyfinder.err: + type: file + description: MacSyFinder standard error + pattern: "${prefix}/macsyfinder.err" + ontologies: [] + summary: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', db_type:'gembase' ]` + - ${prefix}/all_systems.tsv: + type: file + description: Summary table of all detected systems + pattern: "${prefix}/all_systems.tsv" + ontologies: + - edam: "http://edamontology.org/format_3475" + best_solutions: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', db_type:'gembase' ]` + - ${prefix}/all_best_solutions*: + type: file + description: Best solution files for detected systems + pattern: "${prefix}/all_best_solutions*" + ontologies: [] + versions_macsyfinder: + - - ${task.process}: + type: string + description: The name of the process + - macsyfinder: + type: string + description: The name of the tool + - macsyfinder --version 2>&1 | sed "1!d;s/^.*MacSyFinder //;s/ .*$//": + type: eval + description: The expression to obtain the version of the tool + versions_hmmer: + - - ${task.process}: + type: string + description: The name of the process + - hmmer: + type: string + description: The name of the tool + - hmmsearch -h 2>&1 | sed "2!d;s/^# HMMER //;s/ .*$//": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - macsyfinder: + type: string + description: The name of the tool + - macsyfinder --version 2>&1 | sed "1!d;s/^.*MacSyFinder //;s/ .*$//": + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The name of the process + - hmmer: + type: string + description: The name of the tool + - hmmsearch -h 2>&1 | sed "2!d;s/^# HMMER //;s/ .*$//": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@brovolia" +maintainers: + - "@brovolia" diff --git a/modules/nf-core/macsyfinder/search/tests/main.nf.test b/modules/nf-core/macsyfinder/search/tests/main.nf.test new file mode 100644 index 00000000000..c508e726cb9 --- /dev/null +++ b/modules/nf-core/macsyfinder/search/tests/main.nf.test @@ -0,0 +1,127 @@ +nextflow_process { + + name "Test Process MACSYFINDER_SEARCH" + script "../main.nf" + process "MACSYFINDER_SEARCH" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "macsyfinder" + tag "macsyfinder/search" + tag "macsyfinder/download" + tag "pigz" + tag "pigz/compress" + + setup { + run("MACSYFINDER_DOWNLOAD") { + script "../../download/main.nf" + process { + """ + input[0] = 'TXSScan' + """ + } + } + } + + test("macsyfinder_search - proteins - fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/proteome.fasta', checkIfExists: true) + ] + input[1] = MACSYFINDER_DOWNLOAD.out.models + input[2] = 'TXSScan' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.best_solutions.collect { meta, filepath -> [ meta, file(filepath).name ] }, + process.out.stderr.collect { meta, filepath -> [ meta, file(filepath).name ] }, + process.out.stdout.collect { meta, filepath -> [ meta, file(filepath).name ] }, + process.out.summary.collect { meta, filepath -> [ meta, file(filepath).name ] }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("macsyfinder_search - proteins - fasta.gz") { + + setup { + run("PIGZ_COMPRESS") { + script "../../../pigz/compress/main.nf" + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/proteome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = PIGZ_COMPRESS.out.archive.map { meta, file -> + [[ id: 'test' ], file] + } + input[1] = MACSYFINDER_DOWNLOAD.out.models + input[2] = 'TXSScan' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.best_solutions.collect { meta, filepath -> [ meta, file(filepath).name ] }, + process.out.stderr.collect { meta, filepath -> [ meta, file(filepath).name ] }, + process.out.stdout.collect { meta, filepath -> [ meta, file(filepath).name ] }, + process.out.summary.collect { meta, filepath -> [ meta, file(filepath).name ] }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("macsyfinder_search - proteins - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/proteome.fasta', checkIfExists: true) + ] + input[1] = MACSYFINDER_DOWNLOAD.out.models + input[2] = 'TXSScan' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.best_solutions.collect { meta, filepath -> [ meta, file(filepath).name ] }, + process.out.stderr.collect { meta, filepath -> [ meta, file(filepath).name ] }, + process.out.stdout.collect { meta, filepath -> [ meta, file(filepath).name ] }, + process.out.summary.collect { meta, filepath -> [ meta, file(filepath).name ] }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } +} diff --git a/modules/nf-core/macsyfinder/search/tests/main.nf.test.snap b/modules/nf-core/macsyfinder/search/tests/main.nf.test.snap new file mode 100644 index 00000000000..6d6e60ae623 --- /dev/null +++ b/modules/nf-core/macsyfinder/search/tests/main.nf.test.snap @@ -0,0 +1,153 @@ +{ + "macsyfinder_search - proteins - fasta": { + "content": [ + [ + + ], + [ + + ], + [ + [ + { + "id": "test" + }, + "macsyfinder.out" + ] + ], + [ + [ + { + "id": "test" + }, + "all_systems.tsv" + ] + ], + { + "versions_hmmer": [ + [ + "MACSYFINDER_SEARCH", + "hmmer", + "3.4" + ] + ], + "versions_macsyfinder": [ + [ + "MACSYFINDER_SEARCH", + "macsyfinder", + "2.1.6" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2026-02-27T17:00:17.757460737" + }, + "macsyfinder_search - proteins - fasta.gz": { + "content": [ + [ + + ], + [ + + ], + [ + [ + { + "id": "test" + }, + "macsyfinder.out" + ] + ], + [ + [ + { + "id": "test" + }, + "all_systems.tsv" + ] + ], + { + "versions_hmmer": [ + [ + "MACSYFINDER_SEARCH", + "hmmer", + "3.4" + ] + ], + "versions_macsyfinder": [ + [ + "MACSYFINDER_SEARCH", + "macsyfinder", + "2.1.6" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2026-02-27T17:00:42.224448749" + }, + "macsyfinder_search - proteins - fasta - stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "all_best_solutions.txt" + ] + ], + [ + [ + { + "id": "test" + }, + "macsyfinder.err" + ] + ], + [ + [ + { + "id": "test" + }, + "macsyfinder.out" + ] + ], + [ + [ + { + "id": "test" + }, + "all_systems.tsv" + ] + ], + { + "versions_hmmer": [ + [ + "MACSYFINDER_SEARCH", + "hmmer", + "3.4" + ] + ], + "versions_macsyfinder": [ + [ + "MACSYFINDER_SEARCH", + "macsyfinder", + "2.1.6" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2026-02-27T17:00:55.062170192" + } +} \ No newline at end of file diff --git a/modules/nf-core/macsyfinder/search/tests/nextflow.config b/modules/nf-core/macsyfinder/search/tests/nextflow.config new file mode 100644 index 00000000000..d805fb7ed47 --- /dev/null +++ b/modules/nf-core/macsyfinder/search/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: MACSYFINDER_SEARCH { + ext.args = '--db-type unordered' + } +}