diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c43dc9e..f8aeefa0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- Substituted VCF_TO_CSV local module with GATK4/VARIANTSTOTABLE nf-core module [#280](https://github.com/nf-core/variantbenchmarking/issues/280) - RTGtools update to 3.13 [#261](https://github.com/nf-core/variantbenchmarking/issues/261) - Transforming local modules with bcftools and tabix to standard nf-core modules [#267](https://github.com/nf-core/variantbenchmarking/pull/267) - Replace local modules SORT_BED and REFORMAT_HEADER with nf-core ones. [#268](https://github.com/nf-core/variantbenchmarking/pull/268) diff --git a/assets/nf-core-variantbenchmarking_logo_light.png b/assets/nf-core-variantbenchmarking_logo_light.png index 65a860ba..bcd4532c 100644 Binary files a/assets/nf-core-variantbenchmarking_logo_light.png and b/assets/nf-core-variantbenchmarking_logo_light.png differ diff --git a/bin/merge_sompy_features.py b/bin/merge_sompy_features.py index 1322879f..d222fdb7 100755 --- a/bin/merge_sompy_features.py +++ b/bin/merge_sompy_features.py @@ -75,8 +75,8 @@ def get_sample_names(files): sample_names.append(gt_column.replace('.GT', '')) return sample_names -def write_merged_csv(merged_data, output_file, sample_names): - """Write merged dictionary to CSV.""" +def write_merged_file(merged_data, output_file, sample_names, delimiter): + """Write merged dictionary to file according to delimiter.""" sorted_keys = sorted(merged_data.keys(), key=lambda x: (x[0], int(x[1]))) fixed_fields = ["CHROM", "POS", "REF", "ALT", "FILTER"] @@ -84,7 +84,7 @@ def write_merged_csv(merged_data, output_file, sample_names): fieldnames = fixed_fields + dynamic_fields with open(output_file, 'w', newline='') as f: - writer = csv.DictWriter(f, fieldnames=fieldnames, restval='./.') + writer = csv.DictWriter(f, fieldnames=fieldnames, restval='./.', delimiter = delimiter) writer.writeheader() for key in sorted_keys: writer.writerow({field: merged_data[key].get(field, "./.") for field in fieldnames}) @@ -94,7 +94,7 @@ def main(): description="Merge CSVs by CHROM, POS, and handle dynamic GT columns." ) parser.add_argument("files", nargs='+', help="Input CSV files (e.g. *_TP.csv)") - parser.add_argument("--output", required=True, help="Output merged CSV file") + parser.add_argument("--output", required=True, help="Output merged TSV file") args = parser.parse_args() all_dicts = [] @@ -106,7 +106,7 @@ def main(): all_dicts.append(sample_dict) merged = merge_dicts_by_key(all_dicts, all_sample_names) - write_merged_csv(merged, args.output, all_sample_names) + write_merged_file(merged, args.output, all_sample_names, delimiter = "\t") print(f"Merged CSV written to {args.output}") if __name__ == "__main__": diff --git a/bin/plot_upset.py b/bin/plot_upset.py index a6a7bff4..f6db57b3 100755 --- a/bin/plot_upset.py +++ b/bin/plot_upset.py @@ -28,7 +28,7 @@ def parse_file(file_path, category_name): return {} try: - df = pd.read_csv(file_path, sep=',') + df = pd.read_csv(file_path, sep='\t') except pd.errors.EmptyDataError: print(f"Warning: File {file_path} is empty.") return {} diff --git a/conf/modules.config b/conf/modules.config index 224ec28b..35cb4536 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -676,13 +676,18 @@ process { ] } + withName: BCFTOOLS_SORT_COMPARE{ + ext.prefix = { vcf.baseName - ".vcf" + ".sort"} + publishDir = [ + enabled: false + ] + } - withName: VCF_TO_CSV { - ext.prefix = {"${meta.id}.${meta.tag}"} + withName: "BCFTOOLS_REHEADER_COMPARE" { + ext.args2 = {"--output-type z" } + ext.prefix = { vcf.baseName - ".vcf" + ".reheader"} publishDir = [ - path: {"${params.outdir}/${params.variant_type}/summary/comparisons/${meta.id}"}, - pattern: "*{.csv}", - mode: params.publish_dir_mode + enabled: false ] } diff --git a/modules.json b/modules.json index 116e4314..bce57fa4 100644 --- a/modules.json +++ b/modules.json @@ -15,6 +15,11 @@ "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", "installed_by": ["modules"] }, + "bcftools/index": { + "branch": "master", + "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", + "installed_by": ["modules"] + }, "bcftools/merge": { "branch": "master", "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", @@ -66,6 +71,11 @@ "installed_by": ["modules"], "patch": "modules/nf-core/gatk4/concordance/gatk4-concordance.diff" }, + "gatk4/variantstotable": { + "branch": "master", + "git_sha": "52b7843bd92ccf833cc12e71cd84aeccf2583852", + "installed_by": ["modules"] + }, "gawk": { "branch": "master", "git_sha": "76b1f53edcf72798d8515c82f4728ad44b3dd902", diff --git a/modules/local/custom/merge_sompy_features/main.nf b/modules/local/custom/merge_sompy_features/main.nf index 6d8d6793..2247e443 100644 --- a/modules/local/custom/merge_sompy_features/main.nf +++ b/modules/local/custom/merge_sompy_features/main.nf @@ -11,7 +11,7 @@ process MERGE_SOMPY_FEATURES { tuple val(meta), path(csvs) output: - tuple val(meta), path("*.csv") , emit: output + tuple val(meta), path("*.tsv") , emit: output path "versions.yml" , emit: versions when: @@ -22,7 +22,7 @@ process MERGE_SOMPY_FEATURES { def prefix = task.ext.prefix ?: "${meta.id}" """ - merge_sompy_features.py $csvs --output ${prefix}.${meta.tag}.csv + merge_sompy_features.py $csvs --output ${prefix}.${meta.tag}.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -32,7 +32,7 @@ process MERGE_SOMPY_FEATURES { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.${meta.tag}.summary.csv + touch ${prefix}.${meta.tag}.summary.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/custom/plot_upset/main.nf b/modules/local/custom/plot_upset/main.nf index 4bff91a4..5f74cf49 100644 --- a/modules/local/custom/plot_upset/main.nf +++ b/modules/local/custom/plot_upset/main.nf @@ -21,10 +21,10 @@ process PLOT_UPSET { def prefix = task.ext.prefix ?: "${meta.id}" """ plot_upset.py \\ - --fp ${meta.id}.FP.csv \\ - --fn ${meta.id}.FN.csv \\ - --tp-base ${meta.id}.TP_base.csv \\ - --tp-comp ${meta.id}.TP_comp.csv \\ + --fp ${meta.id}.FP.tsv \\ + --fn ${meta.id}.FN.tsv \\ + --tp-base ${meta.id}.TP_base.tsv \\ + --tp-comp ${meta.id}.TP_comp.tsv \\ --output ${prefix} \\ --title "Upset plot for ${meta.id}" diff --git a/modules/local/custom/vcf_to_csv/environment.yml b/modules/local/custom/vcf_to_csv/environment.yml deleted file mode 100644 index a03f0fd7..00000000 --- a/modules/local/custom/vcf_to_csv/environment.yml +++ /dev/null @@ -1,9 +0,0 @@ -channels: - - conda-forge - - bioconda -dependencies: - - bioconda::pysam=0.22.1 - - conda-forge::pandas=2.2.2 - - pip - - pip: - - variant-extractor==4.0.6 diff --git a/modules/local/custom/vcf_to_csv/main.nf b/modules/local/custom/vcf_to_csv/main.nf deleted file mode 100644 index 7607a28a..00000000 --- a/modules/local/custom/vcf_to_csv/main.nf +++ /dev/null @@ -1,43 +0,0 @@ -process VCF_TO_CSV { - tag "$meta.id" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ab/ab3b0054e3111812d8f2deb12345d5b7ca7ea7b18a2dbcbf174d46274c28deba/data': - 'community.wave.seqera.io/library/pip_pandas:40d2e76c16c136f0' }" - - input: - tuple val(meta), path(input) - - output: - tuple val(meta), path("*.csv") , emit: output - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - vcf_to_csv.py \\ - $input \\ - ${prefix}.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ - -} diff --git a/modules/nf-core/bcftools/index/environment.yml b/modules/nf-core/bcftools/index/environment.yml new file mode 100644 index 00000000..ba863b38 --- /dev/null +++ b/modules/nf-core/bcftools/index/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/bcftools + - bioconda::bcftools=1.22 diff --git a/modules/nf-core/bcftools/index/main.nf b/modules/nf-core/bcftools/index/main.nf new file mode 100644 index 00000000..8635a1a2 --- /dev/null +++ b/modules/nf-core/bcftools/index/main.nf @@ -0,0 +1,40 @@ +process BCFTOOLS_INDEX { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.csi"), emit: csi, optional: true + tuple val(meta), path("*.tbi"), emit: tbi, optional: true + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + bcftools \\ + index \\ + ${args} \\ + --threads ${task.cpus} \\ + ${vcf} + """ + + stub: + def args = task.ext.args ?: '' + def extension = args.contains("--tbi") || args.contains("-t") + ? "tbi" + : "csi" + """ + touch ${vcf}.${extension} + """ +} diff --git a/modules/nf-core/bcftools/index/meta.yml b/modules/nf-core/bcftools/index/meta.yml new file mode 100644 index 00000000..4e4bdc8d --- /dev/null +++ b/modules/nf-core/bcftools/index/meta.yml @@ -0,0 +1,82 @@ +name: bcftools_index +description: Index VCF tools +keywords: + - vcf + - index + - bcftools + - csi + - tbi +tools: + - bcftools: + description: BCFtools is a set of utilities that manipulate variant calls in the + Variant Call Format (VCF) and its binary counterpart BCF. All commands work + transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed. Most + commands accept VCF, bgzipped VCF and BCF with filetype detected automatically + even when streaming from a pipe. Indexed VCF and BCF will work in all situations. + Un-indexed VCF and BCF and streams will work in most, but not all situations. + homepage: https://samtools.github.io/bcftools/ + documentation: https://samtools.github.io/bcftools/howtos/index.html + tool_dev_url: https://github.com/samtools/bcftools + doi: "10.1093/gigascience/giab008" + licence: ["MIT", "GPL-3.0-or-later"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF file (optionally GZIPPED) + pattern: "*.{vcf,vcf.gz}" + ontologies: [] +output: + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index file + pattern: "*.csi" + ontologies: [] + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index file for larger files (activated with + -t parameter) + pattern: "*.tbi" + ontologies: [] + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/bcftools/index/tests/main.nf.test b/modules/nf-core/bcftools/index/tests/main.nf.test new file mode 100644 index 00000000..b38c6adb --- /dev/null +++ b/modules/nf-core/bcftools/index/tests/main.nf.test @@ -0,0 +1,108 @@ +nextflow_process { + + name "Test Process BCFTOOLS_INDEX" + script "../main.nf" + process "BCFTOOLS_INDEX" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/index" + + test("sarscov2 - vcf - csi") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") }).match() + } + ) + } + } + + test("sarscov2 - vcf - tbi") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") }).match() + } + ) + } + } + + test("sarscov2 - vcf - csi - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - vcf - tbi - stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/bcftools/index/tests/main.nf.test.snap b/modules/nf-core/bcftools/index/tests/main.nf.test.snap new file mode 100644 index 00000000..2074e974 --- /dev/null +++ b/modules/nf-core/bcftools/index/tests/main.nf.test.snap @@ -0,0 +1,148 @@ +{ + "sarscov2 - vcf - csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + [ + "BCFTOOLS_INDEX", + "bcftools", + "1.22" + ] + ], + "csi": [ + [ + { + "id": "test" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "versions_bcftools": [ + [ + "BCFTOOLS_INDEX", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:03:32.869223843" + }, + "sarscov2 - vcf - tbi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_INDEX", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:03:24.708477718" + }, + "sarscov2 - vcf - tbi - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "BCFTOOLS_INDEX", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_INDEX", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:03:40.444304193" + }, + "sarscov2 - vcf - csi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.vcf.gz.csi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_INDEX", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:03:17.363152216" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/index/tests/nextflow.config b/modules/nf-core/bcftools/index/tests/nextflow.config new file mode 100644 index 00000000..db83f7e5 --- /dev/null +++ b/modules/nf-core/bcftools/index/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = '--tbi' +} diff --git a/modules/nf-core/gatk4/variantstotable/environment.yml b/modules/nf-core/gatk4/variantstotable/environment.yml new file mode 100644 index 00000000..67e0eb86 --- /dev/null +++ b/modules/nf-core/gatk4/variantstotable/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/variantstotable/main.nf b/modules/nf-core/gatk4/variantstotable/main.nf new file mode 100644 index 00000000..fb16d40e --- /dev/null +++ b/modules/nf-core/gatk4/variantstotable/main.nf @@ -0,0 +1,55 @@ +process GATK4_VARIANTSTOTABLE { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" + + input: + tuple val(meta), path(vcf), path(tbi), path(arguments_file), path(include_intervals), path(exclude_intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + + output: + tuple val(meta), path("*.tsv"), emit: table + tuple val("${task.process}"), val('gatk4'), eval("gatk --version | sed -n '/GATK.*v/s/.*v//p'"), topic: versions, emit: versions_gatk4 + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def arguments_file_arg = arguments_file ? "--arguments_file ${arguments_file}" : "" + def include_intervals_arg = include_intervals ? "-L ${include_intervals}" : "" + def exclude_intervals_arg = exclude_intervals ? "-XL ${exclude_intervals}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK VariantsToTable] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + VariantsToTable \\ + ${args} \\ + --variant ${vcf} \\ + --output ${prefix}.tsv \\ + --reference ${fasta} \\ + --tmp-dir . \\ + ${arguments_file_arg} \\ + ${include_intervals_arg} \\ + ${exclude_intervals_arg} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tsv + """ +} diff --git a/modules/nf-core/gatk4/variantstotable/meta.yml b/modules/nf-core/gatk4/variantstotable/meta.yml new file mode 100644 index 00000000..d60b63a2 --- /dev/null +++ b/modules/nf-core/gatk4/variantstotable/meta.yml @@ -0,0 +1,118 @@ +name: gatk4_variantstotable +description: Extract fields from a VCF file to a tab-delimited table +keywords: + - filter + - gatk4 + - table + - vcf +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing VCF information. Attribute `gatk_args` can be used to add arguments to gatk. + e.g. [ id:'test', gatk_args:'-F CHROM -F POS -F TYPE -GF AD'] + - vcf: + type: file + description: VCF file + pattern: "*.{vcf,vcf.gz}" + ontologies: [] + - tbi: + type: file + description: Index of VCF file. + pattern: "*.{idx,tbi}" + ontologies: [] + - arguments_file: + type: file + description: "optional GATK arguments file" + pattern: "*.{txt,list,args,arguments}" + ontologies: [] + - include_intervals: + type: file + description: "optional GATK region file" + pattern: "*.{bed,bed.gz,interval,interval_list}" + ontologies: [] + - exclude_intervals: + type: file + description: "optional GATK exclude region file" + pattern: "*.{bed,bed.gz,interval,interval_list}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Fasta file of reference genome + pattern: "*.fasta" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of fasta file + pattern: "*.fasta.fai" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - dict: + type: file + description: Sequence dictionary of fastea file + pattern: "*.dict" + ontologies: [] +output: + table: + - - meta: + type: file + description: GATK output + pattern: "*.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + - "*.tsv": + type: file + description: GATK output + pattern: "*.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + versions_gatk4: + - - ${task.process}: + type: string + description: The name of the process + - gatk4: + type: string + description: The name of the tool + - gatk --version | sed -n '/GATK.*v/s/.*v//p': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - gatk4: + type: string + description: The name of the tool + - gatk --version | sed -n '/GATK.*v/s/.*v//p': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@lindenb" +maintainers: + - "@lindenb" diff --git a/modules/nf-core/gatk4/variantstotable/tests/main.nf.test b/modules/nf-core/gatk4/variantstotable/tests/main.nf.test new file mode 100644 index 00000000..0163bbd9 --- /dev/null +++ b/modules/nf-core/gatk4/variantstotable/tests/main.nf.test @@ -0,0 +1,88 @@ +nextflow_process { + + name "Test Process GATK4_VARIANTSTOTABLE" + script "../main.nf" + process "GATK4_VARIANTSTOTABLE" + config './nextflow.config' + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/variantstotable" + + test("test1_gatk4_variant_to_table") { + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.idx', checkIfExists: true), + [], + [], + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match() } + ) + } + } + + test("test1_gatk4_variant_to_table - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.idx', checkIfExists: true), + [], + [], + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gatk4/variantstotable/tests/main.nf.test.snap b/modules/nf-core/gatk4/variantstotable/tests/main.nf.test.snap new file mode 100644 index 00000000..7b2be81d --- /dev/null +++ b/modules/nf-core/gatk4/variantstotable/tests/main.nf.test.snap @@ -0,0 +1,84 @@ +{ + "test1_gatk4_variant_to_table": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,fd8256465233e335beb509b0a8b43536" + ] + ], + "1": [ + [ + "GATK4_VARIANTSTOTABLE", + "gatk4", + "4.6.2.0" + ] + ], + "table": [ + [ + { + "id": "test" + }, + "test.tsv:md5,fd8256465233e335beb509b0a8b43536" + ] + ], + "versions_gatk4": [ + [ + "GATK4_VARIANTSTOTABLE", + "gatk4", + "4.6.2.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-05T18:04:11.295114463" + }, + "test1_gatk4_variant_to_table - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GATK4_VARIANTSTOTABLE", + "gatk4", + "4.6.2.0" + ] + ], + "table": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gatk4": [ + [ + "GATK4_VARIANTSTOTABLE", + "gatk4", + "4.6.2.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-05T18:04:17.38237653" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/variantstotable/tests/nextflow.config b/modules/nf-core/gatk4/variantstotable/tests/nextflow.config new file mode 100644 index 00000000..0c5fde34 --- /dev/null +++ b/modules/nf-core/gatk4/variantstotable/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + + withName: GATK4_VARIANTSTOTABLE { + ext.args = "-F CHROM -F POS -F TYPE -GF AD" + } +} diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 6771a454..69dcdd29 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "InProgress", "datePublished": "2025-12-03T16:04:22+00:00", - "description": "

\n \n \n \"nf-core/variantbenchmarking\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/variantbenchmarking)\n[![GitHub Actions CI Status](https://github.com/nf-core/variantbenchmarking/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/variantbenchmarking/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/variantbenchmarking/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/variantbenchmarking/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/variantbenchmarking/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.14916661-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14916661)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/variantbenchmarking)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23variantbenchmarking-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/variantbenchmarking)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n![HiRSE Code Promo Badge](https://img.shields.io/badge/Promo-8db427?label=HiRSE&labelColor=005aa0&link=https%3A%2F%2Fgo.fzj.de%2FCodePromo)\n\n## Introduction\n\n**nf-core/variantbenchmarking** is designed to evaluate and validate the accuracy of variant calling methods in genomic research. Initially, the pipeline is tuned well for available gold standard truth sets (for example, Genome in a Bottle and SEQC2 samples) but it can be used to compare any two variant calling results. The workflow provides benchmarking tools for small variants including SNVs and INDELs, Structural Variants (SVs) and Copy Number Variations (CNVs) for germline and somatic analysis.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\n

\n \n

\n\nThe workflow involves several key processes to ensure reliable and reproducible results as follows:\n\n### Standardization and normalization of variants:\n\nThis initial step ensures consistent formatting and alignment of variants in test and truth VCF files for accurate comparison.\n\n- Subsample if input test vcf is multisample ([bcftools view](https://samtools.github.io/bcftools/bcftools.html#view))\n- Homogenization of multi-allelic variants, MNPs and SVs (including imprecise paired breakends and single breakends) ([variant-extractor](https://github.com/EUCANCan/variant-extractor))\n- Reformatting test VCF files from different SV callers ([svync](https://github.com/nvnieuwk/svync))\n- Standardize SV variants to BND ([SVTK standardize](https://github.com/broadinstitute/gatk-sv/blob/main/src/svtk/scripts/svtk))\n- Decompose SVs to BND [rtgtools svdecompose](https://cn.animalgenome.org/bioinfo/resources/manuals/RTGOperationsManual.pdf)\n- Rename sample names in test and truth VCF files ([bcftools reheader](https://samtools.github.io/bcftools/bcftools.html#reheader))\n- Splitting multi-allelic variants in test and truth VCF files ([bcftools norm](https://samtools.github.io/bcftools/bcftools.html#norm))\n- Deduplication of variants in test and truth VCF files ([bcftools norm](https://samtools.github.io/bcftools/bcftools.html#norm))\n- Left aligning of variants in test and truth VCF files ([bcftools norm](https://samtools.github.io/bcftools/bcftools.html#norm))\n- Use prepy in order to normalize test files. This option is only applicable for happy benchmarking of germline analysis ([prepy](https://github.com/Illumina/hap.py/tree/master))\n- Split SNVs and indels if the given test VCF contains both. This is only applicable for somatic analysis ([bcftools view](https://samtools.github.io/bcftools/bcftools.html#view))\n\n### Filtering options:\n\nApplying filtering on the process of benchmarking itself might makes it impossible to compare different benchmarking strategies. Therefore, for whom like to compare benchmarking methods this subworkflow aims to provide filtering options for variants.\n\n- Filtration of contigs ([bcftools view](https://samtools.github.io/bcftools/bcftools.html#view))\n- Include or exclude SNVs and INDELs ([bcftools filter](https://samtools.github.io/bcftools/bcftools.html#filter))\n- Size and quality filtering for SVs ([SURVIVOR filter](https://github.com/fritzsedlazeck/SURVIVOR/wiki))\n\n### Liftover of vcfs:\n\nThis sub-workflow provides option to convert genome coordinates of truth VCF and test VCFs and high confidence BED file to a new assembly. Golden standard truth files are build upon specific reference genomes which makes the necessity of lifting over depending on the test VCF in query. Lifting over one or more test VCFs is also possible.\n\n- Create sequence dictionary for the reference ([picard CreateSequenceDictionary](https://gatk.broadinstitute.org/hc/en-us/articles/360037068312-CreateSequenceDictionary-Picard)). This file can be saved and reused.\n- Lifting over VCFs ([picard LiftoverVcf](https://gatk.broadinstitute.org/hc/en-us/articles/360037060932-LiftoverVcf-Picard))\n- Lifting over high confidence coordinates ([UCSC liftover](http://hgdownload.cse.ucsc.edu/admin/exe))\n\n### Statistical inference of input test and truth variants:\n\nThis step provides insights into the distribution of variants before benchmarking by extracting variant statistics:.\n\n- SNVs, INDELs and complex variants ([bcftools stats](https://samtools.github.io/bcftools/bcftools.html#stats))\n- SVs by type ([SURVIVOR stats](https://github.com/fritzsedlazeck/SURVIVOR/wiki))\n\n### Benchmarking of variants:\n\nActual benchmarking of variants are split between SVs and small variants:\n\nAvailable methods for germline and somatic _structural variant (SV)_ benchmarking are:\n\n- Truvari ([truvari bench](https://github.com/acenglish/truvari/wiki/bench))\n- SVanalyzer ([svanalyzer benchmark](https://github.com/nhansen/SVanalyzer/blob/master/docs/svbenchmark.rst))\n- Rtgtools (only for BND) ([rtg bndeval](https://realtimegenomics.com/products/rtg-tools))\n\n> [!NOTE]\n> Please note that there is no somatic specific tool for SV benchmarking in this pipeline.\n\nAvailable methods for germline and somatic _CNVs (copy number variations)_ are:\n\n- Truvari ([truvari bench](https://github.com/acenglish/truvari/wiki/bench))\n- Wittyer ([witty.er](https://github.com/Illumina/witty.er/tree/master))\n- Intersection ([bedtools intersect](https://bedtools.readthedocs.io/en/latest/content/tools/intersect.html))\n\n> [!NOTE]\n> Please note that there is no somatic specific tool for CNV benchmarking in this pipeline.\n\n> [!NOTE]\n> Wittyer does not support BND type of variants. It is recommended to either exclude (filter) them out or convert them to other types before analysis.\n\nAvailable methods for *small variants: SNVs and INDEL*s:\n\n- Germline variant benchmarking using ([rtg vcfeval](https://realtimegenomics.com/products/rtg-tools))\n- Germline variant benchmarking using ([hap.py](https://github.com/Illumina/hap.py/blob/master/doc/happy.md))\n- Somatic variant benchmarking using ([rtg vcfeval --squash-ploidy](https://realtimegenomics.com/products/rtg-tools))\n- Somatic variant benchmarking using ([som.py](https://github.com/Illumina/hap.py/tree/master?tab=readme-ov-file#sompy))\n\n> [!NOTE]\n> Please note that using happ.py and som.py with rtgtools as comparison engine is also possible. Check conf/tests/test_ga4gh.config as an example.\n\n### Intersection of benchmark regions:\n\nIntersecting test and truth BED regions produces benchmark metrics. Intersection analysis is especially recommended for _CNV benchmarking_ where result reports may variate per tool.\n\n- Convert SV or CNV VCF file to BED file, if no regions file is provided for test case using ([SVTK vcf2bed](https://github.com/broadinstitute/gatk-sv/blob/main/src/svtk/scripts/svtk))\n- Convert VCF file to BED file, if no regions file is provided for test case using ([Bedops convert2bed](https://bedops.readthedocs.io/en/latest/content/reference/file-management/conversion/convert2bed.html#convert2bed))\n- Intersect the regions and gether benchmarking statistics using ([bedtools intersect](https://bedtools.readthedocs.io/en/latest/content/tools/intersect.html))\n\n### Concordance analysis between test VCFs:\n\n- Concordance analysis enables comparison of test VCFs with each other and it can be coupled to benchmarking analysis ([GATK4 concordance](https://gatk.broadinstitute.org/hc/en-us/articles/360040509811-Concordance))\n\n### Comparison of benchmarking results per TP, FP and FN files\n\nIt is essential to compare benchmarking results in order to infer uniquely or commonly seen TPs, FPs and FNs.\n\n- Merging TP, FP and FN results for happy, rtgtools and sompy ([bcftools merge](https://samtools.github.io/bcftools/bcftools.html#merge))\n- Merging TP, FP and FN results for Truvari and SVanalyzer ([SURVIVOR merge](https://github.com/fritzsedlazeck/SURVIVOR/wiki))\n- Conversion of VCF files to CSV to infer common and unique variants per caller (python script)\n\n### Reporting of benchmark results\n\nThe generation of comprehensive report that consolidates all benchmarking results.\n\n- Merging summary statistics per benchmarking tool (python script)\n- Plotting benchmark metrics per benchmarking tool (R script)\n- Create visual HTML report for the integration of NCBENCH ([datavzrd](https://datavzrd.github.io/docs/index.html))\n- Apply _MultiQC_ to visualize results\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nid,test_vcf,caller\ntest1,test1.vcf.gz,delly\ntest2,test2.vcf,gatk\ntest3,test3.vcf.gz,cnvkit\n```\n\nEach row represents a vcf file (test-query file). For each vcf file and variant calling method (caller) have to be defined.\n\nUser _has to provide truth_vcf and truth_id in config files_.\n\n> [!NOTE]\n> There are publicly available truth sources. For germline analysis, it is common to use [genome in a bottle (GiAB)](https://www.nist.gov/programs-projects/genome-bottle) variants. There are variate type of golden truths and high confidence regions for hg37 and hg38 references. Please select and use carefully.\n> For somatic analysis, [SEQC2 project](https://sites.google.com/view/seqc2/home/data-analysis/high-confidence-somatic-snv-and-indel-v1-2) released SNV, INDEL and CNV regions. One, can select and use those files.\n\nHere you can find example combinations of [truth files](docs/truth.md)\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/variantbenchmarking/usage) and the [parameter documentation](https://nf-co.re/variantbenchmarking/parameters).\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/variantbenchmarking \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --genome GRCh37 \\\n --analysis germline \\\n --truth_id HG002 \\\n --truth_vcf truth.vcf.gz\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n> Conda profile is not available for SVanalyzer (SVBenchmark) tool, if you are planing to use the tool either choose docker or singularity.\n\n### Example usages\n\nThis pipeline enables quite a number of subworkflows suitable for different benchmarking senarios. Please go through [this documentation](docs/testcases.md) to learn some example usages which discusses about the test config files under conf/tests and tests/.\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/variantbenchmarking/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/variantbenchmarking/output).\n\nThis pipeline outputs benchmarking results per method besides to the inferred and compared statistics.\n\n## Credits\n\nnf-core/variantbenchmarking was originally written by K\u00fcbra Narc\u0131 ([@kubranarci](https://github.com/kubranarci)) as a part of benchmarking studies in German Human Genome Phenome Archieve Project ([GHGA](https://www.ghga.de/)).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- Nicolas Vannieuwkerke ([@nvnienwk](https://github.com/nvnieuwk))\n- Maxime Garcia ([@maxulysse](https://github.com/maxulysse))\n- Sameesh Kher ([@khersameesh24](https://github.com/khersameesh24))\n- Florian Heyl ([@heylf](https://github.com/heyl))\n- Kre\u0161imir Be\u0161tak ([@kbestak](https://github.com/kbestak))\n- Ata Jadidahari ([@AtaJadidAhari](https://github.com/AtaJadidAhari))\n- Elad Herz ([@EladH1](https://github.com/EladH1))\n- Victor Didier Perez ([@VictorDidier](https://github.com/VictorDidier))\n\n## Acknowledgements\n\n\n \"GHGA\"\n\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#variantbenchmarking` channel](https://nfcore.slack.com/channels/variantbenchmarking) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/variantbenchmarking for your analysis, please cite it using the following doi: [110.5281/zenodo.14916661](https://doi.org/10.5281/zenodo.14916661)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "

\n \n \n \"nf-core/variantbenchmarking\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/variantbenchmarking)\n[![GitHub Actions CI Status](https://github.com/nf-core/variantbenchmarking/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/variantbenchmarking/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/variantbenchmarking/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/variantbenchmarking/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/variantbenchmarking/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.14916661-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14916661)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/variantbenchmarking)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23variantbenchmarking-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/variantbenchmarking)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n![HiRSE Code Promo Badge](https://img.shields.io/badge/Promo-8db427?label=HiRSE&labelColor=005aa0&link=https%3A%2F%2Fgo.fzj.de%2FCodePromo)\n\n## Introduction\n\n**nf-core/variantbenchmarking** is designed to evaluate and validate the accuracy of variant calling methods in genomic research. Initially, the pipeline is tuned well for available gold standard truth sets (for example, Genome in a Bottle and SEQC2 samples) but it can be used to compare any two variant calling results. The workflow provides benchmarking tools for small variants including SNVs and INDELs, Structural Variants (SVs) and Copy Number Variations (CNVs) for germline and somatic analysis.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\n\n \n \"nf-core/variantbenchmarking\n\n\nThe workflow involves several key processes to ensure reliable and reproducible results as follows:\n\n### Standardization and normalization of test (query/comparison) variants:\n\nThis initial step ensures consistent formatting and alignment of variants in test and truth VCF files for accurate comparison.\n\n- Subsample if input vcf is multisample ([bcftools view](https://samtools.github.io/bcftools/bcftools.html#view))\n- Homogenization of multi-allelic variants, MNPs and SVs (including imprecise paired breakends and single breakends) ([variant-extractor](https://github.com/EUCANCan/variant-extractor))\n- Reformatting VCF files from different SV callers ([svync](https://github.com/nvnieuwk/svync))\n- Standardize SV variants to BND ([SVTK standardize](https://github.com/broadinstitute/gatk-sv/blob/main/src/svtk/scripts/svtk))\n- Decompose SVs to BND [rtgtools svdecompose](https://cn.animalgenome.org/bioinfo/resources/manuals/RTGOperationsManual.pdf)\n- Rename sample names ([bcftools reheader](https://samtools.github.io/bcftools/bcftools.html#reheader))\n- Splitting multi-allelic variants([bcftools norm](https://samtools.github.io/bcftools/bcftools.html#norm))\n- Deduplication of variants ([bcftools norm](https://samtools.github.io/bcftools/bcftools.html#norm))\n- Left aligning of variants ([bcftools norm](https://samtools.github.io/bcftools/bcftools.html#norm))\n- Use prepy in order to normalize. This option is only applicable for happy benchmarking of germline analysis ([prepy](https://github.com/Illumina/hap.py/tree/master))\n- Split SNVs and indels if the given test VCF contains both. This is only applicable for somatic analysis ([bcftools view](https://samtools.github.io/bcftools/bcftools.html#view))\n\n### Standardization and normalization of truth (baseline) variants:\n\n- Decompose SVs to BND [rtgtools svdecompose](https://cn.animalgenome.org/bioinfo/resources/manuals/RTGOperationsManual.pdf)\n- Rename sample names ([bcftools reheader](https://samtools.github.io/bcftools/bcftools.html#reheader))\n- Splitting multi-allelic variants ([bcftools norm](https://samtools.github.io/bcftools/bcftools.html#norm))\n- Deduplication of variants ([bcftools norm](https://samtools.github.io/bcftools/bcftools.html#norm))\n- Left aligning of variants([bcftools norm](https://samtools.github.io/bcftools/bcftools.html#norm))\n\n### Ensemble (majority rule) approch to prepare truth variants:\n\nWhen a \"Gold Standard\" (a high-confidence, validated set of variants) is not available, you can create a Proxy Ground Truth by looking for agreement between different tools. This is Majority Rule approach assumes that if multiple independent variant callers identify the same mutation, it is more likely to be a real biological variant rather than a technical error from a single pipeline. Only variants found by at least the minimum number of callers specified in your threshold are kept as the \"truth\" for the final benchmark.\n\nIf the $--ensemble/_truth$ threshold is set higher than 0, the pipeline performs the following steps:\n\n- Merge small (SNVs and INDELs) using ([bcftools merge](https://samtools.github.io/bcftools/bcftools.html#merge))\n- Merge Structual Variants using ([SURVIVOR merge](https://github.com/fritzsedlazeck/SURVIVOR/wiki))\n- Consensus filtering the variants according to $--ensemble/_truth$.\n\n### Filtering options:\n\nApplying filtering on the process of benchmarking itself might makes it impossible to compare different benchmarking strategies. Therefore, for whom like to compare benchmarking methods this subworkflow aims to provide filtering options for variants.\n\n- Filtration of contigs ([bcftools view](https://samtools.github.io/bcftools/bcftools.html#view))\n- Include or exclude SNVs and INDELs ([bcftools filter](https://samtools.github.io/bcftools/bcftools.html#filter))\n- Size and quality filtering for SVs ([SURVIVOR filter](https://github.com/fritzsedlazeck/SURVIVOR/wiki))\n\n### Liftover of vcfs:\n\nThis sub-workflow provides option to convert genome coordinates of truth VCF and test VCFs and high confidence BED file to a new assembly. Golden standard truth files are build upon specific reference genomes which makes the necessity of lifting over depending on the test VCF in query. Lifting over one or more test VCFs is also possible.\n\n- Create sequence dictionary for the reference ([picard CreateSequenceDictionary](https://gatk.broadinstitute.org/hc/en-us/articles/360037068312-CreateSequenceDictionary-Picard)). This file can be saved and reused.\n- Lifting over VCFs ([picard LiftoverVcf](https://gatk.broadinstitute.org/hc/en-us/articles/360037060932-LiftoverVcf-Picard))\n- Lifting over high confidence coordinates ([UCSC liftover](http://hgdownload.cse.ucsc.edu/admin/exe))\n\n### Statistical inference of input test and truth variants:\n\nThis step provides insights into the distribution of variants before benchmarking by extracting variant statistics:.\n\n- SNVs, INDELs and complex variants ([bcftools stats](https://samtools.github.io/bcftools/bcftools.html#stats))\n- SVs by type ([SURVIVOR stats](https://github.com/fritzsedlazeck/SURVIVOR/wiki))\n\n### Benchmarking of variants:\n\nActual benchmarking of variants are split between SVs and small variants:\n\nAvailable methods for germline and somatic _structural variant (SV)_ benchmarking are:\n\n- Truvari ([truvari bench](https://github.com/acenglish/truvari/wiki/bench))\n- SVanalyzer ([svanalyzer benchmark](https://github.com/nhansen/SVanalyzer/blob/master/docs/svbenchmark.rst))\n- RTGtools (only for BND) ([rtg bndeval](https://realtimegenomics.com/products/rtg-tools))\n\n> [!NOTE]\n> Please note that there is no somatic specific tool for SV benchmarking in this pipeline.\n\nAvailable methods for germline and somatic _CNVs (copy number variations)_ are:\n\n- Truvari ([truvari bench](https://github.com/acenglish/truvari/wiki/bench))\n- Wittyer ([witty.er](https://github.com/Illumina/witty.er/tree/master))\n- Intersection ([bedtools intersect](https://bedtools.readthedocs.io/en/latest/content/tools/intersect.html))\n\n> [!NOTE]\n> Please note that there is no somatic specific tool for CNV benchmarking in this pipeline.\n\n> [!NOTE]\n> Wittyer does not support BND type of variants. It is recommended to either exclude (filter) them out or convert them to other types before analysis.\n\nAvailable methods for *small variants: SNVs and INDEL*s:\n\n- Germline variant benchmarking using ([rtg vcfeval](https://realtimegenomics.com/products/rtg-tools))\n- Germline variant benchmarking using ([hap.py](https://github.com/Illumina/hap.py/blob/master/doc/happy.md))\n- Somatic variant benchmarking using ([rtg vcfeval --squash-ploidy](https://realtimegenomics.com/products/rtg-tools))\n- Somatic variant benchmarking using ([som.py](https://github.com/Illumina/hap.py/tree/master?tab=readme-ov-file#sompy))\n\n> [!NOTE]\n> Please note that using happ.py and som.py with rtgtools as comparison engine is also possible. Check conf/tests/test_ga4gh.config as an example.\n\n### Intersection of benchmark regions:\n\nIntersecting test and truth BED regions produces benchmark metrics. Intersection analysis is especially recommended for _CNV benchmarking_ where result reports may variate per tool.\n\n- Convert SV or CNV VCF file to BED file, if no regions file is provided for test case using ([SVTK vcf2bed](https://github.com/broadinstitute/gatk-sv/blob/main/src/svtk/scripts/svtk))\n- Convert VCF file to BED file, if no regions file is provided for test case using ([Bedops convert2bed](https://bedops.readthedocs.io/en/latest/content/reference/file-management/conversion/convert2bed.html#convert2bed))\n- Intersect the regions and gether benchmarking statistics using ([bedtools intersect](https://bedtools.readthedocs.io/en/latest/content/tools/intersect.html))\n\n### Concordance analysis between test VCFs:\n\n- Concordance analysis enables comparison of test VCFs with each other and it can be coupled to benchmarking analysis ([GATK4 concordance](https://gatk.broadinstitute.org/hc/en-us/articles/360040509811-Concordance))\n\n### Comparison of benchmarking results per TP, FP and FN files\n\nIt is essential to compare benchmarking results in order to infer uniquely or commonly seen TPs, FPs and FNs.\n\n- Merging TP, FP and FN results for happy, rtgtools and sompy ([bcftools merge](https://samtools.github.io/bcftools/bcftools.html#merge))\n- Merging TP, FP and FN results for Truvari and SVanalyzer ([SURVIVOR merge](https://github.com/fritzsedlazeck/SURVIVOR/wiki))\n- Conversion of VCF files to CSV to infer common and unique variants per caller (python script)\n\n### Reporting of benchmark results\n\nThe generation of comprehensive report that consolidates all benchmarking results.\n\n- Merging summary statistics per benchmarking tool (python script)\n- Plotting benchmark metrics per benchmarking tool (R script)\n- Create visual HTML report for the integration of NCBENCH ([datavzrd](https://datavzrd.github.io/docs/index.html))\n- Apply _MultiQC_ to visualize results\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nid,test_vcf,caller\ntest1,test1.vcf.gz,delly\ntest2,test2.vcf,gatk\ntest3,test3.vcf.gz,cnvkit\n```\n\nEach row represents a vcf file (test-query file). For each vcf file and variant calling method (caller) have to be defined.\n\nUser _has to provide truth_vcf and truth_id in config files_.\n\n> [!NOTE]\n> There are publicly available truth sources. For germline analysis, it is common to use [genome in a bottle (GiAB)](https://www.nist.gov/programs-projects/genome-bottle) variants. There are variate type of golden truths and high confidence regions for hg37 and hg38 references. Please select and use carefully.\n> For somatic analysis, [SEQC2 project](https://sites.google.com/view/seqc2/home/data-analysis/high-confidence-somatic-snv-and-indel-v1-2) released SNV, INDEL and CNV regions. One, can select and use those files.\n\nHere you can find example combinations of [truth files](docs/truth.md)\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/variantbenchmarking/usage) and the [parameter documentation](https://nf-co.re/variantbenchmarking/parameters).\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/variantbenchmarking \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --genome GRCh37 \\\n --analysis germline \\\n --truth_id HG002 \\\n --truth_vcf truth.vcf.gz\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n> Conda profile is not available for SVanalyzer (SVBenchmark) tool, if you are planing to use the tool either choose docker or singularity.\n\n### Example usages\n\nThis pipeline enables quite a number of subworkflows suitable for different benchmarking senarios. Please go through [this documentation](docs/testcases.md) to learn some example usages which discusses about the test config files under conf/tests and tests/.\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/variantbenchmarking/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/variantbenchmarking/output).\n\nThis pipeline outputs benchmarking results per method besides to the inferred and compared statistics.\n\n## Credits\n\nnf-core/variantbenchmarking was originally written by K\u00fcbra Narc\u0131 ([@kubranarci](https://github.com/kubranarci)) as a part of benchmarking studies in German Human Genome Phenome Archieve Project ([GHGA](https://www.ghga.de/)).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- Nicolas Vannieuwkerke ([@nvnienwk](https://github.com/nvnieuwk))\n- Maxime Garcia ([@maxulysse](https://github.com/maxulysse))\n- Georgia Kesisoglou ([@georgiakes](https://github.com/georgiakes))\n- Sameesh Kher ([@khersameesh24](https://github.com/khersameesh24))\n- Florian Heyl ([@heylf](https://github.com/heyl))\n- Kre\u0161imir Be\u0161tak ([@kbestak](https://github.com/kbestak))\n- Ata Jadidahari ([@AtaJadidAhari](https://github.com/AtaJadidAhari))\n- Elad Herz ([@EladH1](https://github.com/EladH1))\n- Victor Didier Perez ([@VictorDidier](https://github.com/VictorDidier))\n\n## Acknowledgements\n\n\n \"GHGA\"\n\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#variantbenchmarking` channel](https://nfcore.slack.com/channels/variantbenchmarking) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/variantbenchmarking for your analysis, please cite it using the following doi: [110.5281/zenodo.14916661](https://doi.org/10.5281/zenodo.14916661)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" diff --git a/subworkflows/local/compare_benchmark_results/main.nf b/subworkflows/local/compare_benchmark_results/main.nf index 7a38f83f..adff9d59 100644 --- a/subworkflows/local/compare_benchmark_results/main.nf +++ b/subworkflows/local/compare_benchmark_results/main.nf @@ -3,14 +3,17 @@ // COMPARE_BENCHMARK_RESULTS: SUBWORKFLOW to merge TP/FP/FN results from different tools. // -include { GAWK as REFORMAT_HEADER } from '../../../modules/nf-core/gawk' -include { TABIX_BGZIP as TABIX_BGZIP_UNZIP } from '../../../modules/nf-core/tabix/bgzip' -include { TABIX_BGZIPTABIX } from '../../../modules/nf-core/tabix/bgziptabix' -include { BCFTOOLS_MERGE } from '../../../modules/nf-core/bcftools/merge' -include { SURVIVOR_MERGE } from '../../../modules/nf-core/survivor/merge' -include { VCF_TO_CSV } from '../../../modules/local/custom/vcf_to_csv' -include { MERGE_SOMPY_FEATURES } from '../../../modules/local/custom/merge_sompy_features' -include { PLOT_UPSET } from '../../../modules/local/custom/plot_upset' +include { GAWK as REFORMAT_HEADER } from '../../../modules/nf-core/gawk' +include { TABIX_BGZIP as TABIX_BGZIP_UNZIP } from '../../../modules/nf-core/tabix/bgzip' +include { TABIX_BGZIPTABIX } from '../../../modules/nf-core/tabix/bgziptabix' +include { BCFTOOLS_MERGE } from '../../../modules/nf-core/bcftools/merge' +include { BCFTOOLS_REHEADER as BCFTOOLS_REHEADER_COMPARE } from '../../../modules/nf-core/bcftools/reheader' +include { BCFTOOLS_SORT as BCFTOOLS_SORT_COMPARE } from '../../../modules/nf-core/bcftools/sort' +include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index' +include { SURVIVOR_MERGE } from '../../../modules/nf-core/survivor/merge' +include { GATK4_VARIANTSTOTABLE as VARIANTSTOTABLE } from '../../../modules/nf-core/gatk4/variantstotable' +include { MERGE_SOMPY_FEATURES } from '../../../modules/local/custom/merge_sompy_features' +include { PLOT_UPSET } from '../../../modules/local/custom/plot_upset' workflow COMPARE_BENCHMARK_RESULTS { @@ -19,11 +22,13 @@ workflow COMPARE_BENCHMARK_RESULTS { evaluations_csv // channel: [val(meta), csv] fasta // reference channel [val(meta), ref.fa] fai // reference channel [val(meta), ref.fa.fai] + dictionary // reference channel [val(meta), genome.dict] main: - versions = channel.empty() - merged_vcfs = channel.empty() - ch_plots = channel.empty() + versions = channel.empty() + merged_vcfs = channel.empty() + merged_tbis = channel.empty() + ch_plots = channel.empty() if (params.variant_type == "small" | params.variant_type == "snv" | params.variant_type == "indel"){ @@ -45,7 +50,9 @@ workflow COMPARE_BENCHMARK_RESULTS { fai, [[],[]] ) + merged_vcfs = merged_vcfs.mix(BCFTOOLS_MERGE.out.vcf) + merged_tbis = merged_tbis.mix(BCFTOOLS_MERGE.out.index) } else{ // SV part @@ -68,16 +75,34 @@ workflow COMPARE_BENCHMARK_RESULTS { 0, 30 ) + merged_vcfs = merged_vcfs.mix(SURVIVOR_MERGE.out.vcf) + // fix header + BCFTOOLS_REHEADER_COMPARE( + merged_vcfs.map { meta, vcf -> [ meta, vcf, [], [] ] }, + fai + ) + + BCFTOOLS_SORT_COMPARE( + BCFTOOLS_REHEADER_COMPARE.out.vcf + ) + + merged_tbis = merged_tbis.mix(BCFTOOLS_SORT_COMPARE.out.tbi) } - // convert vcf files to csv - VCF_TO_CSV( - merged_vcfs - ) - versions = versions.mix(VCF_TO_CSV.out.versions.first()) + variantstotable_input_ch = merged_vcfs + .join(merged_tbis) + .map{ meta, vcf, tbi -> [meta, vcf, tbi, [], [], []] } + // convert vcf files to tsv + VARIANTSTOTABLE( + variantstotable_input_ch, + fasta, + fai, + dictionary + ) + versions = versions.mix(VARIANTSTOTABLE.out.versions_gatk4.first()) MERGE_SOMPY_FEATURES( evaluations_csv.groupTuple() @@ -85,11 +110,11 @@ workflow COMPARE_BENCHMARK_RESULTS { versions = versions.mix(MERGE_SOMPY_FEATURES.out.versions.first()) if (!params.skip_plots.contains("upset")){ - VCF_TO_CSV.out.output.mix(MERGE_SOMPY_FEATURES.out.output).map{ - meta, csv -> + VARIANTSTOTABLE.out.table.mix(MERGE_SOMPY_FEATURES.out.output).map{ + meta, tsv -> def newMeta = meta.clone() newMeta.remove('tag') - tuple(newMeta,csv) + tuple(newMeta,tsv) }.set{upset_input} PLOT_UPSET( diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 01f3817c..6c49d78d 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -1,7 +1,7 @@ { "Params: --analysis 'germline' --variant_type 'structural' --method 'svanalyzer'": { "content": [ - 73, + 76, { "BCFTOOLS_DEDUP": { "bcftools": 1.22 @@ -9,6 +9,9 @@ "BCFTOOLS_NORM": { "bcftools": 1.22 }, + "BCFTOOLS_REHEADER_COMPARE": { + "bcftools": 1.22 + }, "BCFTOOLS_REHEADER_QUERY": { "bcftools": 1.22 }, @@ -18,6 +21,9 @@ "BCFTOOLS_SORT": { "bcftools": 1.22 }, + "BCFTOOLS_SORT_COMPARE": { + "bcftools": 1.22 + }, "BCFTOOLS_STATS": { "bcftools": 1.22 }, @@ -39,9 +45,6 @@ "PLOT_SVLEN_DIST": { "python": "3.14.0" }, - "PLOT_UPSET": { - "python": "3.13.7" - }, "SURVIVOR_MERGE": { "survivor": "1.0.7" }, @@ -57,9 +60,6 @@ "TABIX_TABIX_2": { "tabix": 1.21 }, - "VCF_TO_CSV": { - "python": "3.13.0" - }, "Workflow": { "nf-core/variantbenchmarking": "v1.5.0dev" } @@ -105,12 +105,6 @@ "structural/multiqc/multiqc_plots/svg/variant_calling_summary.svg", "structural/multiqc/multiqc_report.html", "structural/summary", - "structural/summary/comparisons", - "structural/summary/comparisons/svbenchmark", - "structural/summary/comparisons/svbenchmark/svbenchmark.FN.csv", - "structural/summary/comparisons/svbenchmark/svbenchmark.FP.csv", - "structural/summary/comparisons/svbenchmark/svbenchmark.TP_base.csv", - "structural/summary/comparisons/svbenchmark/svbenchmark.TP_comp.csv", "structural/summary/datavzrd", "structural/summary/datavzrd/svbenchmark", "structural/summary/datavzrd/svbenchmark/index.html", @@ -132,8 +126,6 @@ "structural/summary/plots/svbenchmark/svbenchmark.FP.structural.mqc.png", "structural/summary/plots/svbenchmark/svbenchmark.TP_base.structural.mqc.png", "structural/summary/plots/svbenchmark/svbenchmark.TP_comp.structural.mqc.png", - "structural/summary/plots/svbenchmark/upset_svbenchmark_tp_fn_mqc.png", - "structural/summary/plots/svbenchmark/upset_svbenchmark_tp_fp_mqc.png", "structural/summary/plots/svbenchmark/variants_by_tool_svbenchmark_mqc.png", "structural/summary/tables", "structural/summary/tables/svbenchmark", @@ -202,7 +194,7 @@ "test3.manta_mqc.stats:md5,c6d5b9f40c4aa8c7b30155464eb52e3c" ] ], - "timestamp": "2026-02-27T11:34:29.828485302", + "timestamp": "2026-03-14T15:23:44.610507714", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -210,7 +202,7 @@ }, "-stub": { "content": [ - 73, + 76, { "BCFTOOLS_DEDUP": { "bcftools": 1.22 @@ -218,6 +210,9 @@ "BCFTOOLS_NORM": { "bcftools": 1.22 }, + "BCFTOOLS_REHEADER_COMPARE": { + "bcftools": 1.22 + }, "BCFTOOLS_REHEADER_QUERY": { "bcftools": 1.22 }, @@ -227,6 +222,9 @@ "BCFTOOLS_SORT": { "bcftools": 1.22 }, + "BCFTOOLS_SORT_COMPARE": { + "bcftools": 1.22 + }, "BCFTOOLS_STATS": { "bcftools": 1.22 }, @@ -248,9 +246,6 @@ "PLOT_SVLEN_DIST": { "python": "3.14.0" }, - "PLOT_UPSET": { - "python": "3.13.7" - }, "SURVIVOR_MERGE": { "survivor": "1.0.7" }, @@ -266,9 +261,6 @@ "TABIX_TABIX_2": { "tabix": 1.21 }, - "VCF_TO_CSV": { - "python": "3.13.0" - }, "Workflow": { "nf-core/variantbenchmarking": "v1.5.0dev" } @@ -291,12 +283,6 @@ "structural/multiqc/multiqc_plots", "structural/multiqc/multiqc_report.html", "structural/summary", - "structural/summary/comparisons", - "structural/summary/comparisons/svbenchmark", - "structural/summary/comparisons/svbenchmark/svbenchmark.FN.csv", - "structural/summary/comparisons/svbenchmark/svbenchmark.FP.csv", - "structural/summary/comparisons/svbenchmark/svbenchmark.TP_base.csv", - "structural/summary/comparisons/svbenchmark/svbenchmark.TP_comp.csv", "structural/summary/datavzrd", "structural/summary/datavzrd/svbenchmark", "structural/summary/datavzrd/svbenchmark/index.html", @@ -323,7 +309,6 @@ "structural/summary/plots/svbenchmark/svbenchmark.FP.svlen.png", "structural/summary/plots/svbenchmark/svbenchmark.TP_base.svlen.png", "structural/summary/plots/svbenchmark/svbenchmark.TP_comp.svlen.png", - "structural/summary/plots/svbenchmark/upset_svbenchmark.upset.mqc.png", "structural/summary/plots/svbenchmark/variants_by_tool_svbenchmark_mqc.png", "structural/summary/tables", "structural/summary/tables/svbenchmark", @@ -389,7 +374,7 @@ "test3.manta_mqc.stats:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "timestamp": "2026-02-27T11:38:41.451821202", + "timestamp": "2026-03-14T15:25:18.63508426", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" diff --git a/tests/germline_small.nf.test.snap b/tests/germline_small.nf.test.snap index 0da4b357..dd3a2ed4 100644 --- a/tests/germline_small.nf.test.snap +++ b/tests/germline_small.nf.test.snap @@ -1,7 +1,7 @@ { "-stub": { "content": [ - 116, + 106, { "BCFTOOLS_DEDUP": { "bcftools": 1.22 @@ -72,9 +72,6 @@ "PLOT_SVLEN_DIST": { "python": "3.14.0" }, - "PLOT_UPSET": { - "python": "3.13.7" - }, "REFORMAT_HEADER": { "gawk": "5.3.0" }, @@ -88,9 +85,6 @@ "bgzip": 1.21, "tabix": 1.21 }, - "VCF_TO_CSV": { - "python": "3.13.0" - }, "Workflow": { "nf-core/variantbenchmarking": "v1.5.0dev" } @@ -114,17 +108,6 @@ "small/multiqc/multiqc_plots", "small/multiqc/multiqc_report.html", "small/summary", - "small/summary/comparisons", - "small/summary/comparisons/happy", - "small/summary/comparisons/happy/happy.FN.csv", - "small/summary/comparisons/happy/happy.FP.csv", - "small/summary/comparisons/happy/happy.TP_base.csv", - "small/summary/comparisons/happy/happy.TP_comp.csv", - "small/summary/comparisons/rtgtools", - "small/summary/comparisons/rtgtools/rtgtools.FN.csv", - "small/summary/comparisons/rtgtools/rtgtools.FP.csv", - "small/summary/comparisons/rtgtools/rtgtools.TP_base.csv", - "small/summary/comparisons/rtgtools/rtgtools.TP_comp.csv", "small/summary/datavzrd", "small/summary/datavzrd/happy", "small/summary/datavzrd/happy/index.html", @@ -169,7 +152,6 @@ "small/summary/plots/happy/happy.TP_base.svlen.png", "small/summary/plots/happy/happy.TP_comp.svlen.png", "small/summary/plots/happy/metric_by_tool_happy_mqc.png", - "small/summary/plots/happy/upset_happy.upset.mqc.png", "small/summary/plots/happy/variants_by_tool_happy_mqc.png", "small/summary/plots/rtgtools", "small/summary/plots/rtgtools/metric_by_tool_rtgtools_mqc.png", @@ -177,7 +159,6 @@ "small/summary/plots/rtgtools/rtgtools.FP.svlen.png", "small/summary/plots/rtgtools/rtgtools.TP_base.svlen.png", "small/summary/plots/rtgtools/rtgtools.TP_comp.svlen.png", - "small/summary/plots/rtgtools/upset_rtgtools.upset.mqc.png", "small/summary/plots/rtgtools/variants_by_tool_rtgtools_mqc.png", "small/summary/tables", "small/summary/tables/happy", @@ -280,7 +261,7 @@ "test7.bcftools.bcftools_stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "timestamp": "2026-02-27T11:49:52.227964314", + "timestamp": "2026-03-15T10:18:43.182353215", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -288,7 +269,7 @@ }, "Params: --analysis 'germline' --variant_type 'small' --method 'happy,rtgtools'": { "content": [ - 116, + 106, { "BCFTOOLS_DEDUP": { "bcftools": 1.22 @@ -359,9 +340,6 @@ "PLOT_SVLEN_DIST": { "python": "3.14.0" }, - "PLOT_UPSET": { - "python": "3.13.7" - }, "REFORMAT_HEADER": { "gawk": "5.3.0" }, @@ -375,9 +353,6 @@ "bgzip": 1.21, "tabix": 1.21 }, - "VCF_TO_CSV": { - "python": "3.13.0" - }, "Workflow": { "nf-core/variantbenchmarking": "v1.5.0dev" } @@ -480,17 +455,6 @@ "small/multiqc/multiqc_plots/svg/variant_calling_summary.svg", "small/multiqc/multiqc_report.html", "small/summary", - "small/summary/comparisons", - "small/summary/comparisons/happy", - "small/summary/comparisons/happy/happy.FN.csv", - "small/summary/comparisons/happy/happy.FP.csv", - "small/summary/comparisons/happy/happy.TP_base.csv", - "small/summary/comparisons/happy/happy.TP_comp.csv", - "small/summary/comparisons/rtgtools", - "small/summary/comparisons/rtgtools/rtgtools.FN.csv", - "small/summary/comparisons/rtgtools/rtgtools.FP.csv", - "small/summary/comparisons/rtgtools/rtgtools.TP_base.csv", - "small/summary/comparisons/rtgtools/rtgtools.TP_comp.csv", "small/summary/datavzrd", "small/summary/datavzrd/happy", "small/summary/datavzrd/happy/index.html", @@ -534,8 +498,6 @@ "small/summary/plots/happy/happy.FP.small.mqc.png", "small/summary/plots/happy/happy.TP_base.small.mqc.png", "small/summary/plots/happy/happy.TP_comp.small.mqc.png", - "small/summary/plots/happy/upset_happy_tp_fn_mqc.png", - "small/summary/plots/happy/upset_happy_tp_fp_mqc.png", "small/summary/plots/rtgtools", "small/summary/plots/rtgtools/f1_by_tool_rtgtools_mqc.png", "small/summary/plots/rtgtools/pr_recall_by_tool_rtgtools_mqc.png", @@ -543,8 +505,6 @@ "small/summary/plots/rtgtools/rtgtools.FP.small.mqc.png", "small/summary/plots/rtgtools/rtgtools.TP_base.small.mqc.png", "small/summary/plots/rtgtools/rtgtools.TP_comp.small.mqc.png", - "small/summary/plots/rtgtools/upset_rtgtools_tp_fn_mqc.png", - "small/summary/plots/rtgtools/upset_rtgtools_tp_fp_mqc.png", "small/summary/plots/rtgtools/variants_by_tool_rtgtools_mqc.png", "small/summary/tables", "small/summary/tables/happy", @@ -672,7 +632,7 @@ "test7.bcftools.bcftools_stats.txt:md5,ccc59737c476e1f77dcef59c50d0e56a" ] ], - "timestamp": "2026-02-27T11:45:33.824921772", + "timestamp": "2026-03-15T10:17:05.518302564", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" diff --git a/tests/germline_sv.nf.test.snap b/tests/germline_sv.nf.test.snap index 02b9d505..9629b5c8 100644 --- a/tests/germline_sv.nf.test.snap +++ b/tests/germline_sv.nf.test.snap @@ -1,7 +1,7 @@ { "Params: --analysis 'germline' --variant_type 'structural' --method 'truvari,svbenchmark,wittyer'": { "content": [ - 140, + 146, { "BCFTOOLS_DEDUP": { "bcftools": 1.22 @@ -24,6 +24,9 @@ "BCFTOOLS_REHEADER_4": { "bcftools": 1.22 }, + "BCFTOOLS_REHEADER_COMPARE": { + "bcftools": 1.22 + }, "BCFTOOLS_REHEADER_QUERY": { "bcftools": 1.22 }, @@ -33,6 +36,9 @@ "BCFTOOLS_SORT": { "bcftools": 1.22 }, + "BCFTOOLS_SORT_COMPARE": { + "bcftools": 1.22 + }, "BCFTOOLS_SPLIT_MULTI": { "bcftools": 1.22 }, @@ -54,9 +60,6 @@ "PLOT_SVLEN_DIST": { "python": "3.14.0" }, - "PLOT_UPSET": { - "python": "3.13.7" - }, "SURVIVOR_FILTER": { "survivor": "1.0.7" }, @@ -88,9 +91,6 @@ "TRUVARI_BENCH": { "truvari": "5.4.0" }, - "VCF_TO_CSV": { - "python": "3.13.0" - }, "WITTYER": { "wittyer": "0.5.2.0" }, @@ -172,17 +172,6 @@ "structural/multiqc/multiqc_plots/svg/variant_calling_summary.svg", "structural/multiqc/multiqc_report.html", "structural/summary", - "structural/summary/comparisons", - "structural/summary/comparisons/svbenchmark", - "structural/summary/comparisons/svbenchmark/svbenchmark.FN.csv", - "structural/summary/comparisons/svbenchmark/svbenchmark.FP.csv", - "structural/summary/comparisons/svbenchmark/svbenchmark.TP_base.csv", - "structural/summary/comparisons/svbenchmark/svbenchmark.TP_comp.csv", - "structural/summary/comparisons/truvari", - "structural/summary/comparisons/truvari/truvari.FN.csv", - "structural/summary/comparisons/truvari/truvari.FP.csv", - "structural/summary/comparisons/truvari/truvari.TP_base.csv", - "structural/summary/comparisons/truvari/truvari.TP_comp.csv", "structural/summary/datavzrd", "structural/summary/datavzrd/svbenchmark", "structural/summary/datavzrd/svbenchmark/index.html", @@ -228,8 +217,6 @@ "structural/summary/plots/svbenchmark/svbenchmark.FP.structural.mqc.png", "structural/summary/plots/svbenchmark/svbenchmark.TP_base.structural.mqc.png", "structural/summary/plots/svbenchmark/svbenchmark.TP_comp.structural.mqc.png", - "structural/summary/plots/svbenchmark/upset_svbenchmark_tp_fn_mqc.png", - "structural/summary/plots/svbenchmark/upset_svbenchmark_tp_fp_mqc.png", "structural/summary/plots/svbenchmark/variants_by_tool_svbenchmark_mqc.png", "structural/summary/plots/truvari", "structural/summary/plots/truvari/f1_by_tool_truvari_mqc.png", @@ -238,8 +225,6 @@ "structural/summary/plots/truvari/truvari.FP.structural.mqc.png", "structural/summary/plots/truvari/truvari.TP_base.structural.mqc.png", "structural/summary/plots/truvari/truvari.TP_comp.structural.mqc.png", - "structural/summary/plots/truvari/upset_truvari_tp_fn_mqc.png", - "structural/summary/plots/truvari/upset_truvari_tp_fp_mqc.png", "structural/summary/plots/truvari/variants_by_tool_truvari_mqc.png", "structural/summary/plots/wittyer", "structural/summary/plots/wittyer/Base_f1_by_tool_wittyer_mqc.png", @@ -361,7 +346,7 @@ "test3.delly_mqc.stats:md5,5afa28bb298f049d0e3127fccb97ceb0" ] ], - "timestamp": "2026-02-27T11:56:19.466420274", + "timestamp": "2026-03-14T15:34:52.569091421", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -369,7 +354,7 @@ }, "-stub": { "content": [ - 140, + 146, { "BCFTOOLS_DEDUP": { "bcftools": 1.22 @@ -392,6 +377,9 @@ "BCFTOOLS_REHEADER_4": { "bcftools": 1.22 }, + "BCFTOOLS_REHEADER_COMPARE": { + "bcftools": 1.22 + }, "BCFTOOLS_REHEADER_QUERY": { "bcftools": 1.22 }, @@ -401,6 +389,9 @@ "BCFTOOLS_SORT": { "bcftools": 1.22 }, + "BCFTOOLS_SORT_COMPARE": { + "bcftools": 1.22 + }, "BCFTOOLS_SPLIT_MULTI": { "bcftools": 1.22 }, @@ -422,9 +413,6 @@ "PLOT_SVLEN_DIST": { "python": "3.14.0" }, - "PLOT_UPSET": { - "python": "3.13.7" - }, "SURVIVOR_FILTER": { "survivor": "1.0.7" }, @@ -456,9 +444,6 @@ "TRUVARI_BENCH": { "truvari": "5.4.0" }, - "VCF_TO_CSV": { - "python": "3.13.0" - }, "WITTYER": { "wittyer": "0.5.2.0" }, @@ -484,17 +469,6 @@ "structural/multiqc/multiqc_plots", "structural/multiqc/multiqc_report.html", "structural/summary", - "structural/summary/comparisons", - "structural/summary/comparisons/svbenchmark", - "structural/summary/comparisons/svbenchmark/svbenchmark.FN.csv", - "structural/summary/comparisons/svbenchmark/svbenchmark.FP.csv", - "structural/summary/comparisons/svbenchmark/svbenchmark.TP_base.csv", - "structural/summary/comparisons/svbenchmark/svbenchmark.TP_comp.csv", - "structural/summary/comparisons/truvari", - "structural/summary/comparisons/truvari/truvari.FN.csv", - "structural/summary/comparisons/truvari/truvari.FP.csv", - "structural/summary/comparisons/truvari/truvari.TP_base.csv", - "structural/summary/comparisons/truvari/truvari.TP_comp.csv", "structural/summary/datavzrd", "structural/summary/datavzrd/svbenchmark", "structural/summary/datavzrd/svbenchmark/index.html", @@ -557,7 +531,6 @@ "structural/summary/plots/svbenchmark/svbenchmark.FP.svlen.png", "structural/summary/plots/svbenchmark/svbenchmark.TP_base.svlen.png", "structural/summary/plots/svbenchmark/svbenchmark.TP_comp.svlen.png", - "structural/summary/plots/svbenchmark/upset_svbenchmark.upset.mqc.png", "structural/summary/plots/svbenchmark/variants_by_tool_svbenchmark_mqc.png", "structural/summary/plots/truvari", "structural/summary/plots/truvari/metric_by_tool_truvari_mqc.png", @@ -565,7 +538,6 @@ "structural/summary/plots/truvari/truvari.FP.svlen.png", "structural/summary/plots/truvari/truvari.TP_base.svlen.png", "structural/summary/plots/truvari/truvari.TP_comp.svlen.png", - "structural/summary/plots/truvari/upset_truvari.upset.mqc.png", "structural/summary/plots/truvari/variants_by_tool_truvari_mqc.png", "structural/summary/plots/wittyer", "structural/summary/plots/wittyer/metric_by_tool_wittyer_mqc.png", @@ -682,7 +654,7 @@ "test3.delly_mqc.stats:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "timestamp": "2026-02-27T12:00:34.844914569", + "timestamp": "2026-03-14T15:36:35.789463062", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" diff --git a/tests/liftover_truth.nf.test.snap b/tests/liftover_truth.nf.test.snap index d05e921e..88f9947f 100644 --- a/tests/liftover_truth.nf.test.snap +++ b/tests/liftover_truth.nf.test.snap @@ -1,7 +1,7 @@ { "Params: --analysis 'germline' --variant_type 'small' --method 'happy,rtgtools' --liftover 'truth'": { "content": [ - 110, + 102, { "BCFTOOLS_ANNOTATE": { "bcftools": 1.22 @@ -94,9 +94,6 @@ "UCSC_LIFTOVER": { "ucsc": 482 }, - "VCF_TO_CSV": { - "python": "3.13.0" - }, "Workflow": { "nf-core/variantbenchmarking": "v1.5.0dev" } @@ -204,17 +201,6 @@ "small/multiqc/multiqc_plots/svg/variant_calling_summary.svg", "small/multiqc/multiqc_report.html", "small/summary", - "small/summary/comparisons", - "small/summary/comparisons/happy", - "small/summary/comparisons/happy/happy.FN.csv", - "small/summary/comparisons/happy/happy.FP.csv", - "small/summary/comparisons/happy/happy.TP_base.csv", - "small/summary/comparisons/happy/happy.TP_comp.csv", - "small/summary/comparisons/rtgtools", - "small/summary/comparisons/rtgtools/rtgtools.FN.csv", - "small/summary/comparisons/rtgtools/rtgtools.FP.csv", - "small/summary/comparisons/rtgtools/rtgtools.TP_base.csv", - "small/summary/comparisons/rtgtools/rtgtools.TP_comp.csv", "small/summary/datavzrd", "small/summary/datavzrd/happy", "small/summary/datavzrd/happy/index.html", @@ -367,7 +353,7 @@ "test7.bcftools.bcftools_stats.txt:md5,ccc59737c476e1f77dcef59c50d0e56a" ] ], - "timestamp": "2026-02-27T12:08:05.574223607", + "timestamp": "2026-03-15T10:27:07.895499559", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" diff --git a/tests/somatic_cnv.nf.test.snap b/tests/somatic_cnv.nf.test.snap index 6715dc91..2a311486 100644 --- a/tests/somatic_cnv.nf.test.snap +++ b/tests/somatic_cnv.nf.test.snap @@ -1,7 +1,7 @@ { "Params: --analysis 'somatic' --variant_type 'copynumber' --method 'truvari,intersect'": { "content": [ - 44, + 48, { "BCFTOOLS_REHEADER_1": { "bcftools": 1.22 @@ -15,12 +15,18 @@ "BCFTOOLS_REHEADER_4": { "bcftools": 1.22 }, + "BCFTOOLS_REHEADER_COMPARE": { + "bcftools": 1.22 + }, "BCFTOOLS_REHEADER_QUERY": { "bcftools": 1.22 }, "BCFTOOLS_REHEADER_TRUTH": { "bcftools": 1.22 }, + "BCFTOOLS_SORT_COMPARE": { + "bcftools": 1.22 + }, "BCFTOOLS_SPLIT_MULTI": { "bcftools": 1.22 }, @@ -60,9 +66,6 @@ "TRUVARI_BENCH": { "truvari": "5.4.0" }, - "VCF_TO_CSV": { - "python": "3.13.0" - }, "WITTYER": { "wittyer": "0.5.2.0" }, @@ -101,12 +104,6 @@ "copynumber/multiqc/multiqc_plots/svg/variant_calling_summary.svg", "copynumber/multiqc/multiqc_report.html", "copynumber/summary", - "copynumber/summary/comparisons", - "copynumber/summary/comparisons/truvari", - "copynumber/summary/comparisons/truvari/truvari.FN.csv", - "copynumber/summary/comparisons/truvari/truvari.FP.csv", - "copynumber/summary/comparisons/truvari/truvari.TP_base.csv", - "copynumber/summary/comparisons/truvari/truvari.TP_comp.csv", "copynumber/summary/datavzrd", "copynumber/summary/datavzrd/intersect", "copynumber/summary/datavzrd/intersect/index.html", @@ -237,7 +234,7 @@ "test15.SEQC2.ascat_stats.csv:md5,55108a9ed1b1c3aa0687e73b3f89115a" ] ], - "timestamp": "2026-02-27T12:13:20.996617982", + "timestamp": "2026-03-14T15:44:52.953383768", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" diff --git a/tests/somatic_indel.nf.test.snap b/tests/somatic_indel.nf.test.snap index 0829aa8b..3c577e86 100644 --- a/tests/somatic_indel.nf.test.snap +++ b/tests/somatic_indel.nf.test.snap @@ -1,59 +1,8 @@ { "Params: --analysis 'somatic' --variant_type 'indel' --method 'rtgtools'": { "content": [ - 101, + 49, { - "ADD_GT_STRELKA": { - "gawk": "5.3.0" - }, - "BCFTOOLS_FILTER": { - "bcftools": 1.22 - }, - "BCFTOOLS_MERGE": { - "bcftools": 1.22 - }, - "BCFTOOLS_NORM": { - "bcftools": 1.22 - }, - "BCFTOOLS_REHEADER_1": { - "bcftools": 1.22 - }, - "BCFTOOLS_REHEADER_2": { - "bcftools": 1.22 - }, - "BCFTOOLS_REHEADER_3": { - "bcftools": 1.22 - }, - "BCFTOOLS_REHEADER_4": { - "bcftools": 1.22 - }, - "BCFTOOLS_REHEADER_QUERY": { - "bcftools": 1.22 - }, - "BCFTOOLS_REHEADER_TRUTH": { - "bcftools": 1.22 - }, - "BCFTOOLS_SORT": { - "bcftools": 1.22 - }, - "BCFTOOLS_STATS": { - "bcftools": 1.22 - }, - "BCFTOOLS_VIEW_CONTIGS": { - "bcftools": 1.22 - }, - "BCFTOOLS_VIEW_FILTERMISSING": { - "bcftools": 1.22 - }, - "BCFTOOLS_VIEW_SUBSAMPLE": { - "bcftools": 1.22 - }, - "DATAVZRD": { - "datavzrd": "2.63.3" - }, - "HAPPY_SOMPY": { - "happy": "0.3.15" - }, "MERGE_REPORTS": { "python": "3.13.0" }, @@ -66,29 +15,9 @@ "PLOT_UPSET": { "python": "3.13.7" }, - "REFORMAT_HEADER": { - "gawk": "5.3.0" - }, - "RTGTOOLS_FORMAT": { - "rtgtools": 3.13 - }, - "RTGTOOLS_VCFEVAL": { - "rtgtools": 3.13 - }, "SPLIT_SOMPY_FEATURES": { "python": "3.13.0" }, - "TABIX_BGZIPTABIX": { - "bgzip": 1.21, - "tabix": 1.21 - }, - "TABIX_BGZIPTABIX_GT": { - "bgzip": 1.21, - "tabix": 1.21 - }, - "VCF_TO_CSV": { - "python": "3.13.0" - }, "Workflow": { "nf-core/variantbenchmarking": "v1.5.0dev" } @@ -102,65 +31,10 @@ "indel/SEQC2/stats", "indel/SEQC2/stats/bcftools", "indel/SEQC2/stats/bcftools/SEQC2.bcftools_stats.txt", - "indel/multiqc", - "indel/multiqc/multiqc_data", - "indel/multiqc/multiqc_data/llms-full.txt", - "indel/multiqc/multiqc_data/multiqc.log", - "indel/multiqc/multiqc_data/multiqc.parquet", - "indel/multiqc/multiqc_data/multiqc_bcftools_stats.txt", - "indel/multiqc/multiqc_data/multiqc_citations.txt", - "indel/multiqc/multiqc_data/multiqc_data.json", - "indel/multiqc/multiqc_data/multiqc_general_stats.txt", - "indel/multiqc/multiqc_data/multiqc_software_versions.txt", - "indel/multiqc/multiqc_data/multiqc_sompy_combined_data.txt", - "indel/multiqc/multiqc_data/multiqc_sompy_indel_data.txt", - "indel/multiqc/multiqc_data/multiqc_sompy_snv_data.txt", - "indel/multiqc/multiqc_data/multiqc_sources.txt", - "indel/multiqc/multiqc_data/multiqc_variant_calling_summary.txt", - "indel/multiqc/multiqc_data/sompy_combined_plot.txt", - "indel/multiqc/multiqc_data/sompy_indel_plot.txt", - "indel/multiqc/multiqc_data/sompy_snv_plot.txt", - "indel/multiqc/multiqc_plots", - "indel/multiqc/multiqc_plots/pdf", - "indel/multiqc/multiqc_plots/pdf/sompy_combined_plot.pdf", - "indel/multiqc/multiqc_plots/pdf/sompy_indel_plot.pdf", - "indel/multiqc/multiqc_plots/pdf/sompy_snv_plot.pdf", - "indel/multiqc/multiqc_plots/pdf/variant_calling_summary.pdf", - "indel/multiqc/multiqc_plots/png", - "indel/multiqc/multiqc_plots/png/sompy_combined_plot.png", - "indel/multiqc/multiqc_plots/png/sompy_indel_plot.png", - "indel/multiqc/multiqc_plots/png/sompy_snv_plot.png", - "indel/multiqc/multiqc_plots/png/variant_calling_summary.png", - "indel/multiqc/multiqc_plots/svg", - "indel/multiqc/multiqc_plots/svg/sompy_combined_plot.svg", - "indel/multiqc/multiqc_plots/svg/sompy_indel_plot.svg", - "indel/multiqc/multiqc_plots/svg/sompy_snv_plot.svg", - "indel/multiqc/multiqc_plots/svg/variant_calling_summary.svg", - "indel/multiqc/multiqc_report.html", "indel/summary", "indel/summary/comparisons", - "indel/summary/comparisons/rtgtools", - "indel/summary/comparisons/rtgtools/rtgtools.FN.csv", - "indel/summary/comparisons/rtgtools/rtgtools.FP.csv", - "indel/summary/comparisons/rtgtools/rtgtools.TP_base.csv", - "indel/summary/comparisons/rtgtools/rtgtools.TP_comp.csv", "indel/summary/comparisons/sompy", - "indel/summary/comparisons/sompy/sompy.FN.csv", - "indel/summary/comparisons/sompy/sompy.FP.csv", - "indel/summary/comparisons/sompy/sompy.TP_comp.csv", "indel/summary/datavzrd", - "indel/summary/datavzrd/rtgtools", - "indel/summary/datavzrd/rtgtools/index.html", - "indel/summary/datavzrd/rtgtools/static", - "indel/summary/datavzrd/rtgtools/static/bundle.js", - "indel/summary/datavzrd/rtgtools/test", - "indel/summary/datavzrd/rtgtools/test/config.js", - "indel/summary/datavzrd/rtgtools/test/data", - "indel/summary/datavzrd/rtgtools/test/data/data_1.js", - "indel/summary/datavzrd/rtgtools/test/functions.js", - "indel/summary/datavzrd/rtgtools/test/index_1.html", - "indel/summary/datavzrd/rtgtools/test/plots", - "indel/summary/datavzrd/rtgtools/test/plots/plots.js", "indel/summary/datavzrd/sompy", "indel/summary/datavzrd/sompy/index.html", "indel/summary/datavzrd/sompy/static", @@ -174,12 +48,6 @@ "indel/summary/datavzrd/sompy/test/plots", "indel/summary/datavzrd/sompy/test/plots/plots.js", "indel/summary/plots", - "indel/summary/plots/rtgtools", - "indel/summary/plots/rtgtools/f1_by_tool_rtgtools_mqc.png", - "indel/summary/plots/rtgtools/pr_recall_by_tool_rtgtools_mqc.png", - "indel/summary/plots/rtgtools/upset_rtgtools_tp_fn_mqc.png", - "indel/summary/plots/rtgtools/upset_rtgtools_tp_fp_mqc.png", - "indel/summary/plots/rtgtools/variants_by_tool_rtgtools_mqc.png", "indel/summary/plots/sompy", "indel/summary/plots/sompy/f1_by_tool_sompy_mqc.png", "indel/summary/plots/sompy/pr_recall_by_tool_sompy_mqc.png", @@ -187,27 +55,11 @@ "indel/summary/plots/sompy/upset_sompy_tp_fp_mqc.png", "indel/summary/plots/sompy/variants_by_tool_sompy_mqc.png", "indel/summary/tables", - "indel/summary/tables/rtgtools", - "indel/summary/tables/rtgtools/rtgtools.summary.csv", "indel/summary/tables/sompy", "indel/summary/tables/sompy/sompy.regions.csv", "indel/summary/tables/sompy/sompy.summary.csv", "indel/test10", "indel/test10/benchmarks", - "indel/test10/benchmarks/rtgtools", - "indel/test10/benchmarks/rtgtools/test10.SEQC2.strelka.fn.vcf.gz", - "indel/test10/benchmarks/rtgtools/test10.SEQC2.strelka.fn.vcf.gz.tbi", - "indel/test10/benchmarks/rtgtools/test10.SEQC2.strelka.fp.vcf.gz", - "indel/test10/benchmarks/rtgtools/test10.SEQC2.strelka.fp.vcf.gz.tbi", - "indel/test10/benchmarks/rtgtools/test10.SEQC2.strelka.non_snp_roc.tsv.gz", - "indel/test10/benchmarks/rtgtools/test10.SEQC2.strelka.phasing.txt", - "indel/test10/benchmarks/rtgtools/test10.SEQC2.strelka.snp_roc.tsv.gz", - "indel/test10/benchmarks/rtgtools/test10.SEQC2.strelka.summary.txt", - "indel/test10/benchmarks/rtgtools/test10.SEQC2.strelka.tp-baseline.vcf.gz", - "indel/test10/benchmarks/rtgtools/test10.SEQC2.strelka.tp-baseline.vcf.gz.tbi", - "indel/test10/benchmarks/rtgtools/test10.SEQC2.strelka.tp.vcf.gz", - "indel/test10/benchmarks/rtgtools/test10.SEQC2.strelka.tp.vcf.gz.tbi", - "indel/test10/benchmarks/rtgtools/test10.SEQC2.strelka.weighted_roc.tsv.gz", "indel/test10/benchmarks/sompy", "indel/test10/benchmarks/sompy/test10.SEQC2.strelka.features.csv", "indel/test10/benchmarks/sompy/test10.SEQC2.strelka.metrics.json", @@ -246,20 +98,6 @@ "indel/test8/stats/bcftools/test8.freebayes.bcftools_stats.txt", "indel/test9", "indel/test9/benchmarks", - "indel/test9/benchmarks/rtgtools", - "indel/test9/benchmarks/rtgtools/test9.SEQC2.mutect2.fn.vcf.gz", - "indel/test9/benchmarks/rtgtools/test9.SEQC2.mutect2.fn.vcf.gz.tbi", - "indel/test9/benchmarks/rtgtools/test9.SEQC2.mutect2.fp.vcf.gz", - "indel/test9/benchmarks/rtgtools/test9.SEQC2.mutect2.fp.vcf.gz.tbi", - "indel/test9/benchmarks/rtgtools/test9.SEQC2.mutect2.non_snp_roc.tsv.gz", - "indel/test9/benchmarks/rtgtools/test9.SEQC2.mutect2.phasing.txt", - "indel/test9/benchmarks/rtgtools/test9.SEQC2.mutect2.snp_roc.tsv.gz", - "indel/test9/benchmarks/rtgtools/test9.SEQC2.mutect2.summary.txt", - "indel/test9/benchmarks/rtgtools/test9.SEQC2.mutect2.tp-baseline.vcf.gz", - "indel/test9/benchmarks/rtgtools/test9.SEQC2.mutect2.tp-baseline.vcf.gz.tbi", - "indel/test9/benchmarks/rtgtools/test9.SEQC2.mutect2.tp.vcf.gz", - "indel/test9/benchmarks/rtgtools/test9.SEQC2.mutect2.tp.vcf.gz.tbi", - "indel/test9/benchmarks/rtgtools/test9.SEQC2.mutect2.weighted_roc.tsv.gz", "indel/test9/benchmarks/sompy", "indel/test9/benchmarks/sompy/test9.SEQC2.mutect2.features.csv", "indel/test9/benchmarks/sompy/test9.SEQC2.mutect2.metrics.json", @@ -299,23 +137,10 @@ ], [ "SEQC2.bcftools_stats.txt:md5,47f7b5da20bbab097d28fc71b898b143", - "multiqc_bcftools_stats.txt:md5,cd952a789de12d189c98539e5d5d04a4", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_sompy_combined_data.txt:md5,7b23ddc824ec4d13127874aa09892938", - "multiqc_sompy_indel_data.txt:md5,7f9c671fba3312dd5a9b707e13d03d98", - "multiqc_sompy_snv_data.txt:md5,30a0d0c90c0fb7f730f133a8924a1f86", - "multiqc_variant_calling_summary.txt:md5,c303501030f6adea87e0a1ea99b2cbe7", - "sompy_combined_plot.txt:md5,aeb950aa8f723d1c32ee68ce5842a889", - "sompy_indel_plot.txt:md5,585a4cf684e12d25ec72bbd25d4df769", - "sompy_snv_plot.txt:md5,370db91dd8d8ec6b98a0c20e969e29ee", - "test10.SEQC2.strelka.phasing.txt:md5,38920536b8c3e241e873c07ba61762e6", - "test10.SEQC2.strelka.summary.txt:md5,9fc3ceee950fe0e89ad2ba6c6a2dea54", "test10.strelka.bcftools_stats.txt:md5,f84f5db1fc3a3edc5b203781af1fa98e", "test8.SEQC2.freebayes.phasing.txt:md5,38920536b8c3e241e873c07ba61762e6", "test8.SEQC2.freebayes.summary.txt:md5,98291a704b23bc72494df77ee647d015", "test8.freebayes.bcftools_stats.txt:md5,cabd582340677dbbfd4e6002feffc390", - "test9.SEQC2.mutect2.phasing.txt:md5,38920536b8c3e241e873c07ba61762e6", - "test9.SEQC2.mutect2.summary.txt:md5,0c3107b6083a60f4ee57fe97e9297a9c", "test9.mutect2.bcftools_stats.txt:md5,88dc79172f15baa6f24e197f14cf09cb", "nameIndex0:md5,720ad6dfad3d1ee07a3a2bc3ed953395", "namedata0:md5,758c4a60948aed6c6a4c78323aa73ab3", @@ -335,7 +160,7 @@ "suffixpointer0:md5,468281ffb10d7dd934289af762a03781" ] ], - "timestamp": "2026-03-04T11:20:38.975739693", + "timestamp": "2026-03-16T21:46:13.245340893", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" diff --git a/tests/somatic_snv.nf.test.snap b/tests/somatic_snv.nf.test.snap index ada3bfe0..3365e88e 100644 --- a/tests/somatic_snv.nf.test.snap +++ b/tests/somatic_snv.nf.test.snap @@ -1,94 +1,11 @@ { "Params: --analysis 'somatic' --variant_type 'snv' --method 'sompy'": { "content": [ - 101, + 40, { - "ADD_GT_STRELKA": { - "gawk": "5.3.0" - }, - "BCFTOOLS_FILTER": { - "bcftools": 1.22 - }, - "BCFTOOLS_MERGE": { - "bcftools": 1.22 - }, - "BCFTOOLS_NORM": { - "bcftools": 1.22 - }, - "BCFTOOLS_REHEADER_1": { - "bcftools": 1.22 - }, - "BCFTOOLS_REHEADER_2": { - "bcftools": 1.22 - }, - "BCFTOOLS_REHEADER_3": { - "bcftools": 1.22 - }, - "BCFTOOLS_REHEADER_4": { - "bcftools": 1.22 - }, - "BCFTOOLS_REHEADER_QUERY": { - "bcftools": 1.22 - }, - "BCFTOOLS_REHEADER_TRUTH": { - "bcftools": 1.22 - }, - "BCFTOOLS_SORT": { - "bcftools": 1.22 - }, - "BCFTOOLS_STATS": { - "bcftools": 1.22 - }, - "BCFTOOLS_VIEW_CONTIGS": { - "bcftools": 1.22 - }, - "BCFTOOLS_VIEW_FILTERMISSING": { - "bcftools": 1.22 - }, - "BCFTOOLS_VIEW_SUBSAMPLE": { - "bcftools": 1.22 - }, - "DATAVZRD": { - "datavzrd": "2.63.3" - }, - "HAPPY_SOMPY": { - "happy": "0.3.15" - }, - "MERGE_REPORTS": { - "python": "3.13.0" - }, - "MERGE_SOMPY_FEATURES": { - "python": "3.13.0" - }, - "PLOTS": { - "r-base": "4.3.1" - }, - "PLOT_UPSET": { - "python": "3.13.7" - }, - "REFORMAT_HEADER": { - "gawk": "5.3.0" - }, - "RTGTOOLS_FORMAT": { - "rtgtools": 3.13 - }, - "RTGTOOLS_VCFEVAL": { - "rtgtools": 3.13 - }, "SPLIT_SOMPY_FEATURES": { "python": "3.13.0" }, - "TABIX_BGZIPTABIX": { - "bgzip": 1.21, - "tabix": 1.21 - }, - "TABIX_BGZIPTABIX_GT": { - "bgzip": 1.21, - "tabix": 1.21 - }, - "VCF_TO_CSV": { - "python": "3.13.0" - }, "Workflow": { "nf-core/variantbenchmarking": "v1.5.0dev" } @@ -128,146 +45,8 @@ "snv/SEQC2/stats", "snv/SEQC2/stats/bcftools", "snv/SEQC2/stats/bcftools/SEQC2.bcftools_stats.txt", - "snv/multiqc", - "snv/multiqc/multiqc_data", - "snv/multiqc/multiqc_data/bcftools-stats-subtypes.txt", - "snv/multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", - "snv/multiqc/multiqc_data/bcftools_stats_variant_depths.txt", - "snv/multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", - "snv/multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", - "snv/multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", - "snv/multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", - "snv/multiqc/multiqc_data/llms-full.txt", - "snv/multiqc/multiqc_data/multiqc.log", - "snv/multiqc/multiqc_data/multiqc.parquet", - "snv/multiqc/multiqc_data/multiqc_bcftools_stats.txt", - "snv/multiqc/multiqc_data/multiqc_citations.txt", - "snv/multiqc/multiqc_data/multiqc_data.json", - "snv/multiqc/multiqc_data/multiqc_general_stats.txt", - "snv/multiqc/multiqc_data/multiqc_software_versions.txt", - "snv/multiqc/multiqc_data/multiqc_sompy_combined_data.txt", - "snv/multiqc/multiqc_data/multiqc_sompy_indel_data.txt", - "snv/multiqc/multiqc_data/multiqc_sompy_snv_data.txt", - "snv/multiqc/multiqc_data/multiqc_sources.txt", - "snv/multiqc/multiqc_data/multiqc_variant_calling_summary.txt", - "snv/multiqc/multiqc_data/sompy_combined_plot.txt", - "snv/multiqc/multiqc_data/sompy_indel_plot.txt", - "snv/multiqc/multiqc_data/sompy_snv_plot.txt", - "snv/multiqc/multiqc_plots", - "snv/multiqc/multiqc_plots/pdf", - "snv/multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", - "snv/multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", - "snv/multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", - "snv/multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", - "snv/multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", - "snv/multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", - "snv/multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", - "snv/multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", - "snv/multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", - "snv/multiqc/multiqc_plots/pdf/sompy_combined_plot.pdf", - "snv/multiqc/multiqc_plots/pdf/sompy_indel_plot.pdf", - "snv/multiqc/multiqc_plots/pdf/sompy_snv_plot.pdf", - "snv/multiqc/multiqc_plots/pdf/variant_calling_summary.pdf", - "snv/multiqc/multiqc_plots/png", - "snv/multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", - "snv/multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", - "snv/multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", - "snv/multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", - "snv/multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", - "snv/multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", - "snv/multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", - "snv/multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", - "snv/multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", - "snv/multiqc/multiqc_plots/png/sompy_combined_plot.png", - "snv/multiqc/multiqc_plots/png/sompy_indel_plot.png", - "snv/multiqc/multiqc_plots/png/sompy_snv_plot.png", - "snv/multiqc/multiqc_plots/png/variant_calling_summary.png", - "snv/multiqc/multiqc_plots/svg", - "snv/multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", - "snv/multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", - "snv/multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", - "snv/multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", - "snv/multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", - "snv/multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", - "snv/multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", - "snv/multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", - "snv/multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", - "snv/multiqc/multiqc_plots/svg/sompy_combined_plot.svg", - "snv/multiqc/multiqc_plots/svg/sompy_indel_plot.svg", - "snv/multiqc/multiqc_plots/svg/sompy_snv_plot.svg", - "snv/multiqc/multiqc_plots/svg/variant_calling_summary.svg", - "snv/multiqc/multiqc_report.html", - "snv/summary", - "snv/summary/comparisons", - "snv/summary/comparisons/rtgtools", - "snv/summary/comparisons/rtgtools/rtgtools.FN.csv", - "snv/summary/comparisons/rtgtools/rtgtools.FP.csv", - "snv/summary/comparisons/rtgtools/rtgtools.TP_base.csv", - "snv/summary/comparisons/rtgtools/rtgtools.TP_comp.csv", - "snv/summary/comparisons/sompy", - "snv/summary/comparisons/sompy/sompy.FN.csv", - "snv/summary/comparisons/sompy/sompy.FP.csv", - "snv/summary/comparisons/sompy/sompy.TP_comp.csv", - "snv/summary/datavzrd", - "snv/summary/datavzrd/rtgtools", - "snv/summary/datavzrd/rtgtools/index.html", - "snv/summary/datavzrd/rtgtools/static", - "snv/summary/datavzrd/rtgtools/static/bundle.js", - "snv/summary/datavzrd/rtgtools/test", - "snv/summary/datavzrd/rtgtools/test/config.js", - "snv/summary/datavzrd/rtgtools/test/data", - "snv/summary/datavzrd/rtgtools/test/data/data_1.js", - "snv/summary/datavzrd/rtgtools/test/functions.js", - "snv/summary/datavzrd/rtgtools/test/index_1.html", - "snv/summary/datavzrd/rtgtools/test/plots", - "snv/summary/datavzrd/rtgtools/test/plots/plots.js", - "snv/summary/datavzrd/sompy", - "snv/summary/datavzrd/sompy/index.html", - "snv/summary/datavzrd/sompy/static", - "snv/summary/datavzrd/sompy/static/bundle.js", - "snv/summary/datavzrd/sompy/test", - "snv/summary/datavzrd/sompy/test/config.js", - "snv/summary/datavzrd/sompy/test/data", - "snv/summary/datavzrd/sompy/test/data/data_1.js", - "snv/summary/datavzrd/sompy/test/functions.js", - "snv/summary/datavzrd/sompy/test/index_1.html", - "snv/summary/datavzrd/sompy/test/plots", - "snv/summary/datavzrd/sompy/test/plots/plots.js", - "snv/summary/plots", - "snv/summary/plots/rtgtools", - "snv/summary/plots/rtgtools/f1_by_tool_rtgtools_mqc.png", - "snv/summary/plots/rtgtools/pr_recall_by_tool_rtgtools_mqc.png", - "snv/summary/plots/rtgtools/upset_rtgtools_tp_fn_mqc.png", - "snv/summary/plots/rtgtools/upset_rtgtools_tp_fp_mqc.png", - "snv/summary/plots/rtgtools/variants_by_tool_rtgtools_mqc.png", - "snv/summary/plots/sompy", - "snv/summary/plots/sompy/f1_by_tool_sompy_mqc.png", - "snv/summary/plots/sompy/pr_recall_by_tool_sompy_mqc.png", - "snv/summary/plots/sompy/upset_sompy_tp_fn_mqc.png", - "snv/summary/plots/sompy/upset_sompy_tp_fp_mqc.png", - "snv/summary/plots/sompy/variants_by_tool_sompy_mqc.png", - "snv/summary/tables", - "snv/summary/tables/rtgtools", - "snv/summary/tables/rtgtools/rtgtools.summary.csv", - "snv/summary/tables/sompy", - "snv/summary/tables/sompy/sompy.regions.csv", - "snv/summary/tables/sompy/sompy.summary.csv", "snv/test10", "snv/test10/benchmarks", - "snv/test10/benchmarks/rtgtools", - "snv/test10/benchmarks/rtgtools/test10.SEQC2.strelka.fn.vcf.gz", - "snv/test10/benchmarks/rtgtools/test10.SEQC2.strelka.fn.vcf.gz.tbi", - "snv/test10/benchmarks/rtgtools/test10.SEQC2.strelka.fp.vcf.gz", - "snv/test10/benchmarks/rtgtools/test10.SEQC2.strelka.fp.vcf.gz.tbi", - "snv/test10/benchmarks/rtgtools/test10.SEQC2.strelka.non_snp_roc.tsv.gz", - "snv/test10/benchmarks/rtgtools/test10.SEQC2.strelka.phasing.txt", - "snv/test10/benchmarks/rtgtools/test10.SEQC2.strelka.snp_roc.tsv.gz", - "snv/test10/benchmarks/rtgtools/test10.SEQC2.strelka.summary.txt", - "snv/test10/benchmarks/rtgtools/test10.SEQC2.strelka.tp-baseline.vcf.gz", - "snv/test10/benchmarks/rtgtools/test10.SEQC2.strelka.tp-baseline.vcf.gz.tbi", - "snv/test10/benchmarks/rtgtools/test10.SEQC2.strelka.tp.vcf.gz", - "snv/test10/benchmarks/rtgtools/test10.SEQC2.strelka.tp.vcf.gz.tbi", - "snv/test10/benchmarks/rtgtools/test10.SEQC2.strelka.weighted_roc.tsv.gz", "snv/test10/benchmarks/sompy", "snv/test10/benchmarks/sompy/test10.SEQC2.strelka.features.csv", "snv/test10/benchmarks/sompy/test10.SEQC2.strelka.metrics.json", @@ -280,20 +59,6 @@ "snv/test10/stats/bcftools/test10.strelka.bcftools_stats.txt", "snv/test8", "snv/test8/benchmarks", - "snv/test8/benchmarks/rtgtools", - "snv/test8/benchmarks/rtgtools/test8.SEQC2.freebayes.fn.vcf.gz", - "snv/test8/benchmarks/rtgtools/test8.SEQC2.freebayes.fn.vcf.gz.tbi", - "snv/test8/benchmarks/rtgtools/test8.SEQC2.freebayes.fp.vcf.gz", - "snv/test8/benchmarks/rtgtools/test8.SEQC2.freebayes.fp.vcf.gz.tbi", - "snv/test8/benchmarks/rtgtools/test8.SEQC2.freebayes.non_snp_roc.tsv.gz", - "snv/test8/benchmarks/rtgtools/test8.SEQC2.freebayes.phasing.txt", - "snv/test8/benchmarks/rtgtools/test8.SEQC2.freebayes.snp_roc.tsv.gz", - "snv/test8/benchmarks/rtgtools/test8.SEQC2.freebayes.summary.txt", - "snv/test8/benchmarks/rtgtools/test8.SEQC2.freebayes.tp-baseline.vcf.gz", - "snv/test8/benchmarks/rtgtools/test8.SEQC2.freebayes.tp-baseline.vcf.gz.tbi", - "snv/test8/benchmarks/rtgtools/test8.SEQC2.freebayes.tp.vcf.gz", - "snv/test8/benchmarks/rtgtools/test8.SEQC2.freebayes.tp.vcf.gz.tbi", - "snv/test8/benchmarks/rtgtools/test8.SEQC2.freebayes.weighted_roc.tsv.gz", "snv/test8/benchmarks/sompy", "snv/test8/benchmarks/sompy/test8.SEQC2.freebayes.features.csv", "snv/test8/benchmarks/sompy/test8.SEQC2.freebayes.metrics.json", @@ -349,34 +114,14 @@ "suffixdata0:md5,f2876dd730673cd49c4de191001f634e", "suffixpointer0:md5,468281ffb10d7dd934289af762a03781", "SEQC2.bcftools_stats.txt:md5,689717d58bc2faefb1701a5ee983c593", - "bcftools-stats-subtypes.txt:md5,bba32c82e906895f9740f84e11216252", - "bcftools_stats_indel-lengths.txt:md5,bd2e91c35c8442abd21e80763d575afc", - "bcftools_stats_variant_depths.txt:md5,e37a8bb9b62c27cae98870b0342ac37a", - "bcftools_stats_vqc_Count_Indels.txt:md5,c9f26ff78232b4ca262e16b558959fdd", - "bcftools_stats_vqc_Count_SNP.txt:md5,0c07768ed6f2942f49becee0b55d3983", - "bcftools_stats_vqc_Count_Transitions.txt:md5,e27493661d676f42e5833eb0b81f986f", - "bcftools_stats_vqc_Count_Transversions.txt:md5,392e38613596b1aade889f4334a55608", - "multiqc_bcftools_stats.txt:md5,e8890d56756d5aa88b4d7345bd6b2142", - "multiqc_citations.txt:md5,5cbab4ecbe14049d965fd97bd61d252b", - "multiqc_sompy_combined_data.txt:md5,41fdcf2904b4a0debcfc7422e3f7ec71", - "multiqc_sompy_indel_data.txt:md5,003f324b11e554fbab958ae7483afed4", - "multiqc_sompy_snv_data.txt:md5,5b29846dd04d4695acc78209698336f2", - "multiqc_variant_calling_summary.txt:md5,88cfa882945f5027361446684edb831b", - "sompy_combined_plot.txt:md5,918077ad1dd8a6c53c1759ef4c9114ef", - "sompy_indel_plot.txt:md5,072e830d52939faeae5795e6f1cbaed8", - "sompy_snv_plot.txt:md5,91a3003c7a0a0898cccdee5394a19d8d", - "test10.SEQC2.strelka.phasing.txt:md5,38920536b8c3e241e873c07ba61762e6", - "test10.SEQC2.strelka.summary.txt:md5,3c356b5cd2542d288c4a7d22d81d9f9f", "test10.strelka.bcftools_stats.txt:md5,0064a9f57d02f1e8154f5d81eaf2dfe9", - "test8.SEQC2.freebayes.phasing.txt:md5,38920536b8c3e241e873c07ba61762e6", - "test8.SEQC2.freebayes.summary.txt:md5,538928aaa876747a4b5b80a043804153", "test8.freebayes.bcftools_stats.txt:md5,98e3627e2d15d7f31661bd56c021447e", "test9.SEQC2.mutect2.phasing.txt:md5,6d29664d5d20e216220ae051a677eec5", "test9.SEQC2.mutect2.summary.txt:md5,0839c17833ad17028b7c2cd5c64c87d7", "test9.mutect2.bcftools_stats.txt:md5,13f07551d60c681e81499dbd189b4537" ] ], - "timestamp": "2026-03-04T11:33:50.887098807", + "timestamp": "2026-03-16T21:48:43.907342995", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -384,7 +129,7 @@ }, "-stub": { "content": [ - 101, + 96, { "ADD_GT_STRELKA": { "gawk": "5.3.0" @@ -469,9 +214,6 @@ "bgzip": 1.21, "tabix": 1.21 }, - "VCF_TO_CSV": { - "python": "3.13.0" - }, "Workflow": { "nf-core/variantbenchmarking": "v1.5.0dev" } @@ -496,15 +238,7 @@ "snv/multiqc/multiqc_report.html", "snv/summary", "snv/summary/comparisons", - "snv/summary/comparisons/rtgtools", - "snv/summary/comparisons/rtgtools/rtgtools.FN.csv", - "snv/summary/comparisons/rtgtools/rtgtools.FP.csv", - "snv/summary/comparisons/rtgtools/rtgtools.TP_base.csv", - "snv/summary/comparisons/rtgtools/rtgtools.TP_comp.csv", "snv/summary/comparisons/sompy", - "snv/summary/comparisons/sompy/sompy.FN.summary.csv", - "snv/summary/comparisons/sompy/sompy.FP.summary.csv", - "snv/summary/comparisons/sompy/sompy.TP_comp.summary.csv", "snv/summary/datavzrd", "snv/summary/datavzrd/rtgtools", "snv/summary/datavzrd/rtgtools/index.html", @@ -545,7 +279,6 @@ "snv/summary/plots", "snv/summary/plots/rtgtools", "snv/summary/plots/rtgtools/metric_by_tool_rtgtools_mqc.png", - "snv/summary/plots/rtgtools/upset_rtgtools.upset.mqc.png", "snv/summary/plots/rtgtools/variants_by_tool_rtgtools_mqc.png", "snv/summary/plots/sompy", "snv/summary/plots/sompy/metric_by_tool_sompy_mqc.png", @@ -651,7 +384,7 @@ "test9.mutect2.bcftools_stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "timestamp": "2026-03-04T11:42:30.978115087", + "timestamp": "2026-03-16T21:50:57.853572329", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" diff --git a/tests/somatic_snv_ensemble.nf.test.snap b/tests/somatic_snv_ensemble.nf.test.snap index 28d3f819..df28498e 100644 --- a/tests/somatic_snv_ensemble.nf.test.snap +++ b/tests/somatic_snv_ensemble.nf.test.snap @@ -1,7 +1,7 @@ { "Params: --analysis 'somatic' --variant_type 'snv' --method 'sompy' --ensemble_truth 2": { "content": [ - 102, + 97, { "ADD_GT_STRELKA": { "gawk": "5.3.0" @@ -96,9 +96,6 @@ "bgzip": 1.21, "tabix": 1.21 }, - "VCF_TO_CSV": { - "python": "3.13.0" - }, "Workflow": { "nf-core/variantbenchmarking": "v1.5.0dev" } @@ -202,15 +199,7 @@ "snv/multiqc/multiqc_report.html", "snv/summary", "snv/summary/comparisons", - "snv/summary/comparisons/rtgtools", - "snv/summary/comparisons/rtgtools/rtgtools.FN.csv", - "snv/summary/comparisons/rtgtools/rtgtools.FP.csv", - "snv/summary/comparisons/rtgtools/rtgtools.TP_base.csv", - "snv/summary/comparisons/rtgtools/rtgtools.TP_comp.csv", "snv/summary/comparisons/sompy", - "snv/summary/comparisons/sompy/sompy.FN.csv", - "snv/summary/comparisons/sompy/sompy.FP.csv", - "snv/summary/comparisons/sompy/sompy.TP_comp.csv", "snv/summary/datavzrd", "snv/summary/datavzrd/rtgtools", "snv/summary/datavzrd/rtgtools/index.html", @@ -240,8 +229,6 @@ "snv/summary/plots/rtgtools", "snv/summary/plots/rtgtools/f1_by_tool_rtgtools_mqc.png", "snv/summary/plots/rtgtools/pr_recall_by_tool_rtgtools_mqc.png", - "snv/summary/plots/rtgtools/upset_rtgtools_tp_fn_mqc.png", - "snv/summary/plots/rtgtools/upset_rtgtools_tp_fp_mqc.png", "snv/summary/plots/rtgtools/variants_by_tool_rtgtools_mqc.png", "snv/summary/plots/sompy", "snv/summary/plots/sompy/f1_by_tool_sompy_mqc.png", @@ -385,7 +372,7 @@ "truth.bcftools_stats.txt:md5,3bd6461cc52798f85d95cdd9ecb6dfa2" ] ], - "timestamp": "2026-03-04T11:53:32.817033457", + "timestamp": "2026-03-15T10:37:36.561055124", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" diff --git a/tests/somatic_sv.nf.test.snap b/tests/somatic_sv.nf.test.snap index 869c9460..fed14a39 100644 --- a/tests/somatic_sv.nf.test.snap +++ b/tests/somatic_sv.nf.test.snap @@ -1,7 +1,7 @@ { "Params: --analysis 'somatic' --variant_type 'structural' --method 'truvari,svbenchmark'": { "content": [ - 61, + 64, { "BCFTOOLS_NORM": { "bcftools": 1.22 @@ -18,12 +18,18 @@ "BCFTOOLS_REHEADER_4": { "bcftools": 1.22 }, + "BCFTOOLS_REHEADER_COMPARE": { + "bcftools": 1.22 + }, "BCFTOOLS_REHEADER_QUERY": { "bcftools": 1.22 }, "BCFTOOLS_REHEADER_TRUTH": { "bcftools": 1.22 }, + "BCFTOOLS_SORT_COMPARE": { + "bcftools": 1.22 + }, "BCFTOOLS_STATS": { "bcftools": 1.22 }, @@ -45,9 +51,6 @@ "PLOT_SVLEN_DIST": { "python": "3.14.0" }, - "PLOT_UPSET": { - "python": "3.13.7" - }, "SURVIVOR_FILTER": { "survivor": "1.0.7" }, @@ -73,9 +76,6 @@ "TRUVARI_BENCH": { "truvari": "5.4.0" }, - "VCF_TO_CSV": { - "python": "3.13.0" - }, "Workflow": { "nf-core/variantbenchmarking": "v1.5.0dev" } @@ -154,12 +154,6 @@ "structural/multiqc/multiqc_plots/svg/variant_calling_summary.svg", "structural/multiqc/multiqc_report.html", "structural/summary", - "structural/summary/comparisons", - "structural/summary/comparisons/truvari", - "structural/summary/comparisons/truvari/truvari.FN.csv", - "structural/summary/comparisons/truvari/truvari.FP.csv", - "structural/summary/comparisons/truvari/truvari.TP_base.csv", - "structural/summary/comparisons/truvari/truvari.TP_comp.csv", "structural/summary/datavzrd", "structural/summary/datavzrd/truvari", "structural/summary/datavzrd/truvari/index.html", @@ -181,8 +175,6 @@ "structural/summary/plots/truvari/truvari.FP.structural.mqc.png", "structural/summary/plots/truvari/truvari.TP_base.structural.mqc.png", "structural/summary/plots/truvari/truvari.TP_comp.structural.mqc.png", - "structural/summary/plots/truvari/upset_truvari_tp_fn_mqc.png", - "structural/summary/plots/truvari/upset_truvari_tp_fp_mqc.png", "structural/summary/plots/truvari/variants_by_tool_truvari_mqc.png", "structural/summary/tables", "structural/summary/tables/truvari", @@ -241,7 +233,7 @@ "test12.tiddit_mqc.stats:md5,950514053b461a3868a12fd5ced0a3d8" ] ], - "timestamp": "2026-02-27T12:40:11.847413253", + "timestamp": "2026-03-14T15:59:37.062119089", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" diff --git a/tests/somatic_sv_ensemble.nf.test.snap b/tests/somatic_sv_ensemble.nf.test.snap index 184f81af..a2393ce3 100644 --- a/tests/somatic_sv_ensemble.nf.test.snap +++ b/tests/somatic_sv_ensemble.nf.test.snap @@ -1,7 +1,7 @@ { "Params: --analysis 'somatic' --variant_type 'structural' --method 'truvari' --ensemble_truth 2": { "content": [ - 63, + 66, { "BCFTOOLS_NORM": { "bcftools": 1.22 @@ -18,9 +18,15 @@ "BCFTOOLS_REHEADER_4": { "bcftools": 1.22 }, + "BCFTOOLS_REHEADER_COMPARE": { + "bcftools": 1.22 + }, "BCFTOOLS_REHEADER_QUERY": { "bcftools": 1.22 }, + "BCFTOOLS_SORT_COMPARE": { + "bcftools": 1.22 + }, "BCFTOOLS_SORT_SV": { "bcftools": 1.22 }, @@ -48,9 +54,6 @@ "PLOT_SVLEN_DIST": { "python": "3.14.0" }, - "PLOT_UPSET": { - "python": "3.13.7" - }, "REFORMAT_TRUTH_SV": { "gawk": "5.3.0" }, @@ -82,9 +85,6 @@ "TRUVARI_BENCH": { "truvari": "5.4.0" }, - "VCF_TO_CSV": { - "python": "3.13.0" - }, "Workflow": { "nf-core/variantbenchmarking": "v1.5.0dev" } @@ -154,12 +154,6 @@ "structural/multiqc/multiqc_plots/svg/variant_calling_summary.svg", "structural/multiqc/multiqc_report.html", "structural/summary", - "structural/summary/comparisons", - "structural/summary/comparisons/truvari", - "structural/summary/comparisons/truvari/truvari.FN.csv", - "structural/summary/comparisons/truvari/truvari.FP.csv", - "structural/summary/comparisons/truvari/truvari.TP_base.csv", - "structural/summary/comparisons/truvari/truvari.TP_comp.csv", "structural/summary/datavzrd", "structural/summary/datavzrd/truvari", "structural/summary/datavzrd/truvari/index.html", @@ -181,8 +175,6 @@ "structural/summary/plots/truvari/truvari.FP.structural.mqc.png", "structural/summary/plots/truvari/truvari.TP_base.structural.mqc.png", "structural/summary/plots/truvari/truvari.TP_comp.structural.mqc.png", - "structural/summary/plots/truvari/upset_truvari_tp_fn_mqc.png", - "structural/summary/plots/truvari/upset_truvari_tp_fp_mqc.png", "structural/summary/plots/truvari/variants_by_tool_truvari_mqc.png", "structural/summary/tables", "structural/summary/tables/truvari", @@ -250,7 +242,7 @@ "truth_mqc.stats:md5,8fc181e15516e21c860075306937484e" ] ], - "timestamp": "2026-03-04T12:19:43.987701275", + "timestamp": "2026-03-15T10:07:48.48522284", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" diff --git a/workflows/variantbenchmarking.nf b/workflows/variantbenchmarking.nf index 599f1b40..729d9bfb 100644 --- a/workflows/variantbenchmarking.nf +++ b/workflows/variantbenchmarking.nf @@ -30,7 +30,7 @@ include { COMPARE_BENCHMARK_RESULTS } from '../subworkflows/local/compare_benc include { INTERSECT_STATISTICS } from '../subworkflows/local/intersect_statistics' include { BND_BENCHMARK } from '../subworkflows/local/bnd_benchmark' include { CONCORDANCE_ANALYSIS } from '../subworkflows/local/concordance_analysis' -include { ENSEMLE_TEST_VCFS } from '../subworkflows/local/ensemble_test_vcfs' +include { ENSEMLE_TEST_VCFS } from '../subworkflows/local/ensemble_test_vcfs' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -338,7 +338,8 @@ workflow VARIANTBENCHMARKING { evals_ch, evals_csv_ch, fasta, - fai + fai, + dictionary ) ch_versions = ch_versions.mix(COMPARE_BENCHMARK_RESULTS.out.versions)