diff --git a/.gitignore b/.gitignore index c5a1bb45..d2dce263 100644 --- a/.gitignore +++ b/.gitignore @@ -116,8 +116,15 @@ venv.bak/ # Ignore real-world test samples test_input/SRR* +test_input/DB* # Ignore dev output TEST_*/ *.output +STELIOS_TEST/ +marine_sediment_dbh/ + +slurm_run.sh + + diff --git a/RunTimeResearchObject-f0b553d37e4255a3291393948f3e308bd88ed301.zip b/RunTimeResearchObject-f0b553d37e4255a3291393948f3e308bd88ed301.zip deleted file mode 100644 index bb6ad039..00000000 Binary files a/RunTimeResearchObject-f0b553d37e4255a3291393948f3e308bd88ed301.zip and /dev/null differ diff --git a/config.yml b/config.yml index b4ff4169..6a1e358a 100644 --- a/config.yml +++ b/config.yml @@ -1,15 +1,15 @@ # Steps to go for qc_and_merge_step: true -taxonomic_inventory: false -cgc_step: false -reads_functional_annotation: false +taxonomic_inventory: true +cgc_step: true +reads_functional_annotation: true assemble: false # Global -threads: 20 +threads: 40 -# For parallelization cases might worth decrease global value of threads -interproscan_threads: 20 +# As a rule of thumb keep that as floor(threads/8) where threads the previous parameter +interproscan_threads: 4 # fastp parameters detect_adapter_for_pe: false diff --git a/learning_cwl.md b/learning_cwl.md deleted file mode 100644 index 400e7229..00000000 --- a/learning_cwl.md +++ /dev/null @@ -1,212 +0,0 @@ -# CWL in a nutshell (not 😜) - - -## Conditionals - -To run a conditional, the input parameter that is used in the -expression to be evaluated, needs to be part of the input of the step!! -Not only in the `.cwl` main params. - - - - -## Data concepts - -An **`object`** is a data structure equivalent to the "object" type in JSON, consisting of a *unordered* set of **name/value pairs** (referred to here as **`fields`**) and where the name is a `string` and the value is a `string`, `number`, `boolean`, `array`, or `object`. - -> **`fields`** is a key term! - -A **`document`** is a file containing a serialized `object`, or an `array` of `objects`. - -A **`process`** is a basic unit of computation which accepts input data, performs some computation, and produces output data. -Examples include `CommandLineTools`, `Workflows`, and `ExpressionTools`. - -An **`input object`** is an object describing the inputs to an invocation of a process. -The **fields** of the input object are referred to as "input **parameters**". -Likewise for the **`output object`**. - - -An **`input schema`** describes the valid format (required fields, data types) for an input object. -Similarly for the case of `output schema`. - - - -The **`inputs`** section describes the inputs of the tool. -This is a ***mapped** list of input parameters* -(see the [YAML Guide](https://www.commonwl.org/user_guide/yaml/#maps) for more about the format) and each parameter includes an **identifier**, a **data type**, and *optionally* an **inputBinding**. ->The **`inputBinding`** describes how this input parameter should appear on the command line. - -For example: - -```bash= - inputBinding: - position: 2 - prefix: -i - separate: false -``` -where - -- `position`: the value of position is used to determine where parameter should appear on the command line -- `separate`: when `false`, the prefix and value are combined into a single argument -- `prefix` : argument on the command line before the parameter - - - -## Execution concepts - -A parameter is a named symbolic input or output of process, with an associated datatype or schema. During execution, values are assigned to parameters to make the input object or output object used for concrete process invocation. - -A CommandLineTool is a process characterized by the execution of a standalone, non-interactive program which is invoked on some input, produces output, and then terminates. - -A workflow is a process characterized by multiple subprocess steps, where step outputs are connected to the inputs of downstream steps to form a directed acylic graph, and independent steps may run concurrently. - -A runtime environment is the actual hardware and software environment when executing a command line tool. It includes, but is not limited to, the hardware architecture, hardware resources, operating system, software runtime (if applicable, such as the specific Python interpreter or the specific Java virtual machine), libraries, modules, packages, utilities, and data files required to run the tool. - -A workflow platform is a specific hardware and software implementation capable of interpreting CWL documents and executing the processes specified by the document. The responsibilities of the workflow platform may include scheduling process invocation, setting up the necessary runtime environment, making input data available, invoking the tool process, and collecting output. - -A workflow platform may choose to only implement the Command Line Tool Description part of the CWL specification. - - - -## [Document context](https://www.commonwl.org/v1.0/SchemaSalad.html#Document_model) - -The implicit context consists of the vocabulary defined by the schema and the base URI. By default, the base URI must be the URI that was used to load the document. It may be overridden by an explicit context. - -If a document consists of a root object, this object may contain the fields `$base`, `$namespaces`, `$schemas`, and `$graph`: - - - - - -## Runtime environment (!) - -Output files produced by tool execution must be written to the designated output directory. The initial current working directory when executing the tool must be the designated output directory. - - - - - -### Requirements & hints - -A process `requirement` modifies the semantics or runtime environment of a process. -If an implementation cannot satisfy all requirements, or a requirement is listed which is not recognized by the implementation, it is a fatal error and the implementation must not attempt to run the process, unless overridden at user option. - -A `hint` is similar to a requirement; however, it is not an error if an implementation cannot satisfy all hints. -The implementation may report a warning if a hint cannot be satisfied. - - -Often tool descriptions will be written for a specific version of a software. -To make it easier for others to use your descriptions, you can include a `SoftwareRequirement` field in the `hints` section. -This may also help to avoid confusion about which version of a tool the description was written for. -Here is an example: - -```cwl -hints: - SoftwareRequirement: - packages: - interproscan: - specs: [ "https://identifiers.org/rrid/RRID:SCR_005829" ] - version: [ "5.21-60" ] -``` - - -**Do not confuse with `requirements`**. -`requirements` are - -requirements: - ResourceRequirement: - ramMin: 10240 - coresMin: 3 - SchemaDefRequirement: - types: - - $import: InterProScan-apps.yml - - - -Optionally, implementations may allow requirements to be specified in the input object document as an array of requirements under the field name `cwl:requirements`. -If implementations allow this, then such requirements should be combined with any requirements present in the corresponding Process as if they were specified there. - -**Requirements specified in a parent Workflow are inherited by step processes if they are valid for that step**. If the substep is a `CommandLineTool` only the `InlineJavascriptRequirement`, `SchemaDefRequirement`, `DockerRequirement`, `SoftwareRequirement`, `InitialWorkDirRequirement`, `EnvVarRequirement`, `ShellCommandRequirement`, `ResourceRequirement` are valid. - -*As good practice, it is best to have process requirements be self-contained, such that each process can run successfully by itself.* - -**`Requirements` override `hints`**. If a process implementation provides a process requirement in hints which is also provided in requirements by an enclosing workflow or workflow step, the enclosing requirements takes precedence. - -> When a tool runs under CWL, the starting working directory is the designated output directory. - - -### Test your tool - -Once you have built your `.cwl` and your `.yml` files, you need to figure out whether your `tool` is working ok. -To do that, run the step your `tool` implements and keep track of its exact output. -Then, you may build a second `.yml` file (we will call it `tools-tests.yml`) which is like that: - -```yaml= -- job: tools/my_tool/my_tool_test.yml - tool: ../../tools/tool/my_tool.cwl - short_name: my_tool.cwl - doc: "TOOL" - output: - tool_output: - location: Any - basename: toul_output - class: Directory - listing: - - class: File - .. - .. - .. - -``` - -For a more complete example, you may see [here](https://github.com/mberacochea/microbetag/blob/78140c451ff7034a3bbc6ac1ec34efe9d0b8b742/tests/cwl/tools-tests.yml). - - - -`cwltest` does: -- runs the cwl file with the .yml input file -- compares the output values - - - -```cwl= - -``` - -## Important links to guide you - -A few rather important links to get to know the CWL framework: - -- [Runtime environment](https://www.commonwl.org/v1.0/CommandLineTool.html#Runtime_environment) -- [Writing workflows](https://www.commonwl.org/user_guide/21-1st-workflow/index.html) -- [Best practicies](https://doc.arvados.org/v1.3/user/cwl/cwl-style.html) - - - - -Tutorials: - -- [Getting started with CWL](https://docs.dockstore.org/en/1.11.0/getting-started/getting-started-with-cwl.html) - - -```cwl= -outputs: - compiled_class: - type: File - outputSource: compile/classfile -``` -The `outputs` section describes the outputs of the workflow. -This is a **list** of output parameters where each parameter consists of an identifier and a data type. -The `outputSource` connects the output parameter `classfile` of the `compile` step to the workflow output parameter `compiled_class`. - - - - -## Toil - -Note! Toil checks if the docker image specified by TOIL_APPLIANCE_SELF exists prior to - launching by using the docker v2 schema. This should be valid for any major docker - repository, but there is an option to override this if desired using the option: - `--forceDockerAppliance`. - - diff --git a/ro-crate-metadata-example.json b/ro-crate-metadata-example.json new file mode 100644 index 00000000..1cb33d9a --- /dev/null +++ b/ro-crate-metadata-example.json @@ -0,0 +1,512 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + } + }, + { + "@id": "./", + "@type": "Dataset", + "datePublished": "2023-03-25T19:45:41+00:00", + "hasPart": [ + { + "@id": "results/" + }, + { + "@id": "config.yml" + }, + { + "@id": "mini_dataset.yml" + }, + { + "@id": "results/functional-annotation/" + }, + { + "@id": "results/sequence-categorisation/" + }, + { + "@id": "results/taxonomy-summary/" + }, + { + "@id": "results/wgs-paired-SRR1620013.merged.unfiltered_fasta" + }, + { + "@id": "results/wgs-paired-SRR1620013.merged.fasta" + }, + { + "@id": "results/wgs-paired-SRR1620013.merged.qc_summary" + }, + { + "@id": "results/wgs-paired-SRR1620013_1.fastq.trimmed.fasta" + }, + { + "@id": "results/wgs-paired-SRR1620013_1.fastq.trimmed.qc_summary" + }, + { + "@id": "results/wgs-paired-SRR1620013_2.fastq.trimmed.fasta" + }, + { + "@id": "results/wgs-paired-SRR1620013_2.fastq.trimmed.qc_summary" + }, + { + "@id": "results/wgs-paired-SRR1620013.merged_CDS.ffn" + }, + { + "@id": "results/wgs-paired-SRR1620013.merged_CDS.faa" + }, + { + "@id": "results/fastp.html" + }, + { + "@id": "results/wgs-paired-SRR1620013.merged.cmsearch.all.tblout.deoverlapped" + }, + { + "@id": "results/RNA-counts" + }, + { + "@id": "results/wgs-paired-SRR1620013.merged.motus.tsv" + }, + { + "@id": "results/functional-annotation/stats/" + }, + { + "@id": "results/functional-annotation/wgs-paired-SRR1620013.merged.summary.go" + }, + { + "@id": "results/functional-annotation/wgs-paired-SRR1620013.merged.summary.go_slim" + }, + { + "@id": "results/functional-annotation/wgs-paired-SRR1620013.merged_CDS.I5.tsv.gz" + }, + { + "@id": "results/functional-annotation/wgs-paired-SRR1620013.merged.hmm.tsv.gz" + }, + { + "@id": "results/functional-annotation/wgs-paired-SRR1620013.merged.summary.ips" + }, + { + "@id": "results/functional-annotation/wgs-paired-SRR1620013.merged.summary.ko" + }, + { + "@id": "results/functional-annotation/wgs-paired-SRR1620013.merged.summary.pfam" + }, + { + "@id": "results/functional-annotation/stats/interproscan.stats" + }, + { + "@id": "results/functional-annotation/stats/go.stats" + }, + { + "@id": "results/functional-annotation/stats/ko.stats" + }, + { + "@id": "results/functional-annotation/stats/pfam.stats" + }, + { + "@id": "results/functional-annotation/stats/orf.stats" + }, + { + "@id": "results/sequence-categorisation/tRNA.RF00005.fasta.gz" + }, + { + "@id": "results/sequence-categorisation/LSU_rRNA_bacteria.RF02541.fa.gz" + }, + { + "@id": "results/sequence-categorisation/SSU_rRNA_bacteria.RF00177.fa.gz" + }, + { + "@id": "results/taxonomy-summary/SSU/" + }, + { + "@id": "results/taxonomy-summary/LSU/" + }, + { + "@id": "results/taxonomy-summary/SSU/wgs-paired-SRR1620013.merged_SSU.fasta.mseq.gz" + }, + { + "@id": "results/taxonomy-summary/SSU/wgs-paired-SRR1620013.merged_SSU.fasta.mseq.tsv" + }, + { + "@id": "results/taxonomy-summary/SSU/wgs-paired-SRR1620013.merged_SSU.fasta.mseq.txt" + }, + { + "@id": "results/taxonomy-summary/SSU/krona.html" + }, + { + "@id": "results/taxonomy-summary/SSU/wgs-paired-SRR1620013.merged_SSU.fasta.mseq_hdf5.biom" + }, + { + "@id": "results/taxonomy-summary/SSU/wgs-paired-SRR1620013.merged_SSU.fasta.mseq_json.biom" + }, + { + "@id": "results/taxonomy-summary/LSU/wgs-paired-SRR1620013.merged_LSU.fasta.mseq.gz" + }, + { + "@id": "results/taxonomy-summary/LSU/wgs-paired-SRR1620013.merged_LSU.fasta.mseq.tsv" + }, + { + "@id": "results/taxonomy-summary/LSU/wgs-paired-SRR1620013.merged_LSU.fasta.mseq.txt" + }, + { + "@id": "results/taxonomy-summary/LSU/krona.html" + }, + { + "@id": "results/taxonomy-summary/LSU/wgs-paired-SRR1620013.merged_LSU.fasta.mseq_hdf5.biom" + }, + { + "@id": "results/taxonomy-summary/LSU/wgs-paired-SRR1620013.merged_LSU.fasta.mseq_json.biom" + } + ], + "name": "MetaGoFlow Results", + "publisher": { + "@id": "https://ror.org/0038zss60" + } + }, + { + "@id": "results/", + "@type": "Dataset" + }, + { + "@id": "config.yml", + "@type": "File", + "description": "The configuration file through which the user sets the values of the metaGOflow parameters.", + "encodingFormat": "text/yaml", + "name": "MetaGOflow configuration file" + }, + { + "@id": "mini_dataset.yml", + "@type": "File" + }, + { + "@id": "results/functional-annotation/", + "@type": "Dataset" + }, + { + "@id": "results/sequence-categorisation/", + "@type": "Dataset", + "description": "Identify specific loci in the sample.", + "name": "Sequence categorisation" + }, + { + "@id": "results/taxonomy-summary/", + "@type": "Dataset" + }, + { + "@id": "results/wgs-paired-SRR1620013.merged.unfiltered_fasta", + "@type": "File", + "encodingFormat": "text/plain", + "name": "" + }, + { + "@id": "results/wgs-paired-SRR1620013.merged.fasta", + "@type": "File", + "encodingFormat": "text/plain", + "name": "Merged filtered reads." + }, + { + "@id": "results/wgs-paired-SRR1620013.merged.qc_summary", + "@type": "File", + "encodingFormat": "text/plain", + "name": "Summary with statistics of the merged reads." + }, + { + "@id": "results/wgs-paired-SRR1620013_1.fastq.trimmed.fasta", + "@type": "File", + "encodingFormat": "text/plain", + "name": "Filtered .fastq file of the single-end reads (forward/reverse)." + }, + { + "@id": "results/wgs-paired-SRR1620013_1.fastq.trimmed.qc_summary", + "@type": "File", + "encodingFormat": "text/plain", + "name": "Summary with statistics of the single-end reads (forward/reverse)." + }, + { + "@id": "results/wgs-paired-SRR1620013_2.fastq.trimmed.fasta", + "@type": "File", + "encodingFormat": "text/plain", + "name": "Filtered .fastq file of the single-end reads (forward/reverse)." + }, + { + "@id": "results/wgs-paired-SRR1620013_2.fastq.trimmed.qc_summary", + "@type": "File", + "encodingFormat": "text/plain", + "name": "Summary with statistics of the single-end reads (forward/reverse)." + }, + { + "@id": "results/wgs-paired-SRR1620013.merged_CDS.ffn", + "@type": "File", + "encodingFormat": "text/plain", + "name": "Coding sequences with nucleotides." + }, + { + "@id": "results/wgs-paired-SRR1620013.merged_CDS.faa", + "@type": "File", + "encodingFormat": "text/plain", + "name": "Coding sequences with amino acids." + }, + { + "@id": "results/fastp.html", + "@type": "File", + "description": "Quality control and preprocessing of FASTQ files", + "encodingFormat": "text/html", + "name": "FASTP analysis of raw sequence data" + }, + { + "@id": "results/wgs-paired-SRR1620013.merged.cmsearch.all.tblout.deoverlapped", + "@type": "File", + "encodingFormat": "text/plain", + "name": "Sequence hits against covariance model databases. Mandatory to run partially the functional annotation step of metaGOflow." + }, + { + "@id": "results/RNA-counts", + "@type": "File", + "encodingFormat": "text/plain", + "name": "Numbers of RNA's counted" + }, + { + "@id": "results/wgs-paired-SRR1620013.merged.motus.tsv", + "@type": "File", + "encodingFormat": "text/plain", + "name": "" + }, + { + "@id": "results/functional-annotation/stats/", + "@type": "Dataset" + }, + { + "@id": "results/functional-annotation/wgs-paired-SRR1620013.merged.summary.go", + "@type": "File", + "encodingFormat": "text/plain", + "name": "Merged contigs GO summary" + }, + { + "@id": "results/functional-annotation/wgs-paired-SRR1620013.merged.summary.go_slim", + "@type": "File", + "encodingFormat": "text/plain", + "name": "Merged contigs GO summary" + }, + { + "@id": "results/functional-annotation/wgs-paired-SRR1620013.merged_CDS.I5.tsv.gz", + "@type": "File", + "encodingFormat": "application/zip", + "name": "Merged contigs CDS I5 summary" + }, + { + "@id": "results/functional-annotation/wgs-paired-SRR1620013.merged.hmm.tsv.gz", + "@type": "File", + "encodingFormat": "application/zip", + "name": "Merged contigs HMM summary" + }, + { + "@id": "results/functional-annotation/wgs-paired-SRR1620013.merged.summary.ips", + "@type": "File", + "encodingFormat": "text/plain", + "name": "Merged contigs InterProScan" + }, + { + "@id": "results/functional-annotation/wgs-paired-SRR1620013.merged.summary.ko", + "@type": "File", + "encodingFormat": "text/plain", + "name": "Merged contigs KO summary" + }, + { + "@id": "results/functional-annotation/wgs-paired-SRR1620013.merged.summary.pfam", + "@type": "File", + "encodingFormat": "text/plain", + "name": "Merged contigs PFAM summary" + }, + { + "@id": "results/functional-annotation/stats/interproscan.stats", + "@type": "File", + "encodingFormat": "text/plain", + "name": "InterProScan summary statistics" + }, + { + "@id": "results/functional-annotation/stats/go.stats", + "@type": "File", + "encodingFormat": "text/plain", + "name": "Geno Ontology summary statistics" + }, + { + "@id": "results/functional-annotation/stats/ko.stats", + "@type": "File", + "encodingFormat": "text/plain", + "name": "Kegg Ontology summary statistics" + }, + { + "@id": "results/functional-annotation/stats/pfam.stats", + "@type": "File", + "encodingFormat": "text/plain", + "name": "Pfam summary statistcs" + }, + { + "@id": "results/functional-annotation/stats/orf.stats", + "@type": "File", + "encodingFormat": "text/plain", + "name": "ORF summary statistics" + }, + { + "@id": "results/sequence-categorisation/tRNA.RF00005.fasta.gz", + "@type": "File", + "description": "Identify specific loci in the sample.", + "encodingFormat": "application/zip", + "name": "Sequence categorisation" + }, + { + "@id": "results/sequence-categorisation/LSU_rRNA_bacteria.RF02541.fa.gz", + "@type": "File", + "description": "Identify specific loci in the sample.", + "encodingFormat": "application/zip", + "name": "Sequence categorisation" + }, + { + "@id": "results/sequence-categorisation/SSU_rRNA_bacteria.RF00177.fa.gz", + "@type": "File", + "description": "Identify specific loci in the sample.", + "encodingFormat": "application/zip", + "name": "Sequence categorisation" + }, + { + "@id": "results/taxonomy-summary/SSU/", + "@type": "Dataset" + }, + { + "@id": "results/taxonomy-summary/LSU/", + "@type": "Dataset" + }, + { + "@id": "results/taxonomy-summary/SSU/wgs-paired-SRR1620013.merged_SSU.fasta.mseq.gz", + "@type": "File", + "encodingFormat": "application/zip", + "name": "LSU sequences used for indentification" + }, + { + "@id": "results/taxonomy-summary/SSU/wgs-paired-SRR1620013.merged_SSU.fasta.mseq.tsv", + "@type": "File", + "encodingFormat": "application/json-ld", + "name": "Tab-separated formatted taxon counts for SSU sequences" + }, + { + "@id": "results/taxonomy-summary/SSU/wgs-paired-SRR1620013.merged_SSU.fasta.mseq.txt", + "@type": "File", + "encodingFormat": "application/json-ld", + "name": "Text-based formatted taxon counts for SSU sequences" + }, + { + "@id": "results/taxonomy-summary/SSU/krona.html", + "@type": "File", + "encodingFormat": "application/html", + "name": "Krona summary of LSU taxonomic inventory" + }, + { + "@id": "results/taxonomy-summary/SSU/wgs-paired-SRR1620013.merged_SSU.fasta.mseq_hdf5.biom", + "@type": "File", + "encodingFormat": "application/json-ld", + "name": "BIOM formatted hdf5 taxon counts for SSU sequences" + }, + { + "@id": "results/taxonomy-summary/SSU/wgs-paired-SRR1620013.merged_SSU.fasta.mseq_json.biom", + "@type": "File", + "encodingFormat": "application/json-ld", + "name": "BIOM formatted taxon counts for SSU sequences" + }, + { + "@id": "results/taxonomy-summary/LSU/wgs-paired-SRR1620013.merged_LSU.fasta.mseq.gz", + "@type": "File", + "encodingFormat": "application/zip", + "name": "LSU sequences used for indentification" + }, + { + "@id": "results/taxonomy-summary/LSU/wgs-paired-SRR1620013.merged_LSU.fasta.mseq.tsv", + "@type": "File", + "encodingFormat": "application/json-ld", + "name": "Tab-separated formatted taxon counts for LSU sequences" + }, + { + "@id": "results/taxonomy-summary/LSU/wgs-paired-SRR1620013.merged_LSU.fasta.mseq.txt", + "@type": "File", + "encodingFormat": "application/json-ld", + "name": "Text-based taxon counts for LSU sequences" + }, + { + "@id": "results/taxonomy-summary/LSU/krona.html", + "@type": "File", + "encodingFormat": "application/html", + "name": "Krona summary of LSU taxonomic inventory" + }, + { + "@id": "results/taxonomy-summary/LSU/wgs-paired-SRR1620013.merged_LSU.fasta.mseq_hdf5.biom", + "@type": "File", + "encodingFormat": "application/json-ld", + "name": "BIOM formatted hdf5 taxon counts for LSU sequences" + }, + { + "@id": "results/taxonomy-summary/LSU/wgs-paired-SRR1620013.merged_LSU.fasta.mseq_json.biom", + "@type": "File", + "encodingFormat": "application/json-ld", + "name": "BIOM formatted taxon counts for LSU sequences" + }, + { + "@id": "https://www.apache.org/licenses/LICENSE-2.0", + "@type": "CreativeWork", + "identifier": "https://spdx.org/licenses/Apache-2.0.html", + "name": "Apache License 2.0" + }, + { + "@id": "https://creativecommons.org/licenses/by/4.0/legalcode", + "@type": "CreativeWork", + "identifier": "https://spdx.org/licenses/CC-BY-4.0.html", + "name": "Creative Commons (CC-BY 4.0)" + }, + { + "@id": "mailto:help@embrc.org", + "@type": "ContactPoint", + "contactType": "Help Desk", + "email": "help@embrc.org", + "identifier": "help@embrc.org", + "url": "https://www.embrc.eu/about-us/contact-us" + }, + { + "@id": "https://ror.org/0038zss60", + "@type": "Organization", + "contactPoint": { + "@id": "mailto:help@embrc.org" + }, + "name": "European Marine Biological Resource Centre", + "url": "https://ror.org/0038zss60" + }, + { + "@id": "#workflow/metaGOflow", + "@type": [ + "File", + "SoftwareSourceCode", + "ComputationalWorkflow" + ], + "affiliation": "University of Flatland", + "author": { + "@id": "EMO BON" + }, + "hasPart": [ + { + "@id": "config.yml" + }, + { + "@id": "MINI_DATASET_FA/mini_dataset.yml" + } + ], + "license": { + "@id": "https://www.apache.org/licenses/LICENSE-2.0" + }, + "name": "metaGOflow", + "url": "https://github.com/emo-bon/MetaGOflow/releases/tag/v1.0.0" + } + ] +} \ No newline at end of file diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json deleted file mode 100644 index 33381669..00000000 --- a/ro-crate-metadata.json +++ /dev/null @@ -1,3277 +0,0 @@ -{ - "@context": "https://w3id.org/ro/crate/1.1/context", - "@graph": [ - { - "@id": "./ro-crate-metadata.json", - "@type": "CreativeWork", - "about": { - "@id": "./" - }, - "conformsTo": { - "@id": "https://w3id.org/ro/crate/1.1" - } - }, - { - "@id": "./", - "@type": "Dataset", - "datePublished": "2021-11-25T11:27:08+00:00", - "hasPart": [ - { - "@id": "./docker/" - }, - { - "@id": "./environment/" - }, - { - "@id": "./Installation/" - }, - { - "@id": "./profiling/" - }, - { - "@id": "./test_input/" - }, - { - "@id": "./tools/" - }, - { - "@id": "./utils/" - }, - { - "@id": "./workflows/" - }, - { - "@id": "./config.yml" - }, - { - "@id": "./create_yml.py" - }, - { - "@id": "./dependencies.md" - }, - { - "@id": "./learning_cwl.md" - }, - { - "@id": "./LICENSE" - }, - { - "@id": "./README.md" - }, - { - "@id": "./run_wf.sh" - }, - { - "@id": "./slurm_run.sh" - } - ] - }, - { - "@id": "./docker/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./docker/scripts_bash/" - }, - { - "@id": "./docker/scripts_python2/" - }, - { - "@id": "./docker/scripts_python3/" - }, - { - "@id": "./docker/docker_build.sh" - }, - { - "@id": "./docker/docker_push.sh" - } - ] - }, - { - "@id": "./docker/scripts_bash/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./docker/scripts_bash/add_header" - }, - { - "@id": "./docker/scripts_bash/add_symbol_json.sh" - }, - { - "@id": "./docker/scripts_bash/antismash_json_generation" - }, - { - "@id": "./docker/scripts_bash/awk_tool" - }, - { - "@id": "./docker/scripts_bash/biom-convert.sh" - }, - { - "@id": "./docker/scripts_bash/clean_motus_output.sh" - }, - { - "@id": "./docker/scripts_bash/diamond_post_run_join.sh" - }, - { - "@id": "./docker/scripts_bash/Dockerfile" - }, - { - "@id": "./docker/scripts_bash/empty_tax.sh" - }, - { - "@id": "./docker/scripts_bash/esl-ssplit.sh" - }, - { - "@id": "./docker/scripts_bash/format_bedfile" - }, - { - "@id": "./docker/scripts_bash/pull_ncrnas.sh" - }, - { - "@id": "./docker/scripts_bash/remove_symbol.sh" - }, - { - "@id": "./docker/scripts_bash/run_antismash_short.sh" - }, - { - "@id": "./docker/scripts_bash/run_samtools_docker.sh" - } - ] - }, - { - "@id": "./docker/scripts_bash/add_header", - "@type": "File" - }, - { - "@id": "./docker/scripts_bash/add_symbol_json.sh", - "@type": "File" - }, - { - "@id": "./docker/scripts_bash/antismash_json_generation", - "@type": "File" - }, - { - "@id": "./docker/scripts_bash/awk_tool", - "@type": "File" - }, - { - "@id": "./docker/scripts_bash/biom-convert.sh", - "@type": "File" - }, - { - "@id": "./docker/scripts_bash/clean_motus_output.sh", - "@type": "File" - }, - { - "@id": "./docker/scripts_bash/diamond_post_run_join.sh", - "@type": "File" - }, - { - "@id": "./docker/scripts_bash/Dockerfile", - "@type": "File" - }, - { - "@id": "./docker/scripts_bash/empty_tax.sh", - "@type": "File" - }, - { - "@id": "./docker/scripts_bash/esl-ssplit.sh", - "@type": "File" - }, - { - "@id": "./docker/scripts_bash/format_bedfile", - "@type": "File" - }, - { - "@id": "./docker/scripts_bash/pull_ncrnas.sh", - "@type": "File" - }, - { - "@id": "./docker/scripts_bash/remove_symbol.sh", - "@type": "File" - }, - { - "@id": "./docker/scripts_bash/run_antismash_short.sh", - "@type": "File" - }, - { - "@id": "./docker/scripts_bash/run_samtools_docker.sh", - "@type": "File" - }, - { - "@id": "./docker/scripts_python2/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./docker/scripts_python2/Dockerfile" - }, - { - "@id": "./docker/scripts_python2/MGRAST_base.py" - }, - { - "@id": "./docker/scripts_python2/run_quality_filtering.py" - } - ] - }, - { - "@id": "./docker/scripts_python2/Dockerfile", - "@type": "File" - }, - { - "@id": "./docker/scripts_python2/MGRAST_base.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python2/run_quality_filtering.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./docker/scripts_python3/antismash_rename_contigs.py" - }, - { - "@id": "./docker/scripts_python3/antismash_to_gff.py" - }, - { - "@id": "./docker/scripts_python3/antismash_to_gff_nojson.py" - }, - { - "@id": "./docker/scripts_python3/build_assembly_gff.py" - }, - { - "@id": "./docker/scripts_python3/change_antismash_output.py" - }, - { - "@id": "./docker/scripts_python3/change_geneclusters_ctg.py" - }, - { - "@id": "./docker/scripts_python3/change_geneclusters_ctg_js.py" - }, - { - "@id": "./docker/scripts_python3/count_fastq.py" - }, - { - "@id": "./docker/scripts_python3/count_lines.py" - }, - { - "@id": "./docker/scripts_python3/Dockerfile" - }, - { - "@id": "./docker/scripts_python3/fastq_to_fasta.py" - }, - { - "@id": "./docker/scripts_python3/functional_stats.py" - }, - { - "@id": "./docker/scripts_python3/generate_checksum.py" - }, - { - "@id": "./docker/scripts_python3/generate_mapfile_prodigal.py" - }, - { - "@id": "./docker/scripts_python3/get_subunits.py" - }, - { - "@id": "./docker/scripts_python3/get_subunits_coords.py" - }, - { - "@id": "./docker/scripts_python3/give_pathways.py" - }, - { - "@id": "./docker/scripts_python3/hmmscan_tab.py" - }, - { - "@id": "./docker/scripts_python3/its-length-new.py" - }, - { - "@id": "./docker/scripts_python3/its-length.py" - }, - { - "@id": "./docker/scripts_python3/make_csv.py" - }, - { - "@id": "./docker/scripts_python3/move_antismash_summary.py" - }, - { - "@id": "./docker/scripts_python3/parsing_hmmscan.py" - }, - { - "@id": "./docker/scripts_python3/reformat-antismash.py" - }, - { - "@id": "./docker/scripts_python3/split_to_chunks.py" - }, - { - "@id": "./docker/scripts_python3/write_summaries.py" - } - ] - }, - { - "@id": "./docker/scripts_python3/antismash_rename_contigs.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/antismash_to_gff.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/antismash_to_gff_nojson.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/build_assembly_gff.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/change_antismash_output.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/change_geneclusters_ctg.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/change_geneclusters_ctg_js.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/count_fastq.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/count_lines.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/Dockerfile", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/fastq_to_fasta.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/functional_stats.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/generate_checksum.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/generate_mapfile_prodigal.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/get_subunits.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/get_subunits_coords.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/give_pathways.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/hmmscan_tab.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/its-length-new.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/its-length.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/make_csv.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/move_antismash_summary.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/parsing_hmmscan.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/reformat-antismash.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/split_to_chunks.py", - "@type": "File" - }, - { - "@id": "./docker/scripts_python3/write_summaries.py", - "@type": "File" - }, - { - "@id": "./docker/docker_build.sh", - "@type": "File" - }, - { - "@id": "./docker/docker_push.sh", - "@type": "File" - }, - { - "@id": "./environment/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./environment/antismash.yml" - }, - { - "@id": "./environment/interproscan.yml" - }, - { - "@id": "./environment/README.md" - }, - { - "@id": "./environment/toil.yml" - } - ] - }, - { - "@id": "./environment/antismash.yml", - "@type": "File" - }, - { - "@id": "./environment/interproscan.yml", - "@type": "File" - }, - { - "@id": "./environment/README.md", - "@type": "File" - }, - { - "@id": "./environment/toil.yml", - "@type": "File" - }, - { - "@id": "./Installation/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./Installation/templates/" - }, - { - "@id": "./Installation/download_dbs.sh" - } - ] - }, - { - "@id": "./Installation/templates/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./Installation/templates/default.yml" - } - ] - }, - { - "@id": "./Installation/templates/default.yml", - "@type": "File" - }, - { - "@id": "./Installation/download_dbs.sh", - "@type": "File" - }, - { - "@id": "./profiling/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./profiling/profiling_parser.py" - }, - { - "@id": "./profiling/run_profiling.sh" - }, - { - "@id": "./profiling/__init__.py" - } - ] - }, - { - "@id": "./profiling/profiling_parser.py", - "@type": "File" - }, - { - "@id": "./profiling/run_profiling.sh", - "@type": "File" - }, - { - "@id": "./profiling/__init__.py", - "@type": "File" - }, - { - "@id": "./test_input/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./test_input/wgs-paired-SRR1620013_1.fastq.gz" - }, - { - "@id": "./test_input/wgs-paired-SRR1620013_2.fastq.gz" - } - ] - }, - { - "@id": "./test_input/wgs-paired-SRR1620013_1.fastq.gz", - "@type": "File" - }, - { - "@id": "./test_input/wgs-paired-SRR1620013_2.fastq.gz", - "@type": "File" - }, - { - "@id": "./tools/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/" - }, - { - "@id": "./tools/chunks/" - }, - { - "@id": "./tools/Combined_gene_caller/" - }, - { - "@id": "./tools/CRISPRCasFinder/" - }, - { - "@id": "./tools/fastp/" - }, - { - "@id": "./tools/GO-slim/" - }, - { - "@id": "./tools/hmmer/" - }, - { - "@id": "./tools/InterProScan/" - }, - { - "@id": "./tools/mask-for-ITS/" - }, - { - "@id": "./tools/Pfam-Parse/" - }, - { - "@id": "./tools/qc-filtering/" - }, - { - "@id": "./tools/qc-stats/" - }, - { - "@id": "./tools/Raw_reads/" - }, - { - "@id": "./tools/RNA_prediction/" - }, - { - "@id": "./tools/SeqPrep/" - }, - { - "@id": "./tools/summaries/" - }, - { - "@id": "./tools/Trimmomatic/" - } - ] - }, - { - "@id": "./tools/Assembly/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/accessioning/" - }, - { - "@id": "./tools/Assembly/antismash/" - }, - { - "@id": "./tools/Assembly/Diamond/" - }, - { - "@id": "./tools/Assembly/EggNOG/" - }, - { - "@id": "./tools/Assembly/generate_mapfile/" - }, - { - "@id": "./tools/Assembly/Genome_properties/" - }, - { - "@id": "./tools/Assembly/GFF/" - }, - { - "@id": "./tools/Assembly/index_fasta/" - }, - { - "@id": "./tools/Assembly/KEGG_analysis/" - } - ] - }, - { - "@id": "./tools/Assembly/accessioning/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/accessioning/add_run_to_db/" - }, - { - "@id": "./tools/Assembly/accessioning/assign_MGYC/" - }, - { - "@id": "./tools/Assembly/accessioning/assign_MGYP/" - }, - { - "@id": "./tools/Assembly/accessioning/scripts/" - }, - { - "@id": "./tools/Assembly/accessioning/Dockerfile" - } - ] - }, - { - "@id": "./tools/Assembly/accessioning/add_run_to_db/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/accessioning/add_run_to_db/add_run_db.cwl" - } - ] - }, - { - "@id": "./tools/Assembly/accessioning/add_run_to_db/add_run_db.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/accessioning/assign_MGYC/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/accessioning/assign_MGYC/assign_mgyc.cwl" - } - ] - }, - { - "@id": "./tools/Assembly/accessioning/assign_MGYC/assign_mgyc.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/accessioning/assign_MGYP/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/accessioning/assign_MGYP/assign_mgyp.cwl" - } - ] - }, - { - "@id": "./tools/Assembly/accessioning/assign_MGYP/assign_mgyp.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/accessioning/scripts/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/accessioning/scripts/add_run.py" - }, - { - "@id": "./tools/Assembly/accessioning/scripts/assign_mgyc.py" - }, - { - "@id": "./tools/Assembly/accessioning/scripts/assign_mgyp_db.py" - } - ] - }, - { - "@id": "./tools/Assembly/accessioning/scripts/add_run.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/accessioning/scripts/assign_mgyc.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/accessioning/scripts/assign_mgyp_db.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/accessioning/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/" - }, - { - "@id": "./tools/Assembly/antismash/move_antismash_summary/" - }, - { - "@id": "./tools/Assembly/antismash/__init__.py" - } - ] - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/antismash/" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/check_value/" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/filtering_fasta/" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/rename_contigs/" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/Dockerfile" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/__init__.py" - } - ] - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/antismash/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/antismash/antismash_v4.cwl" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/antismash/install_4.2.0.sh" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/antismash/run_antismash_short.sh" - } - ] - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/antismash/antismash_v4.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/antismash/install_4.2.0.sh", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/antismash/run_antismash_short.sh", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/check_value/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/check_value/check_value.cwl" - } - ] - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/check_value/check_value.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/filtering_fasta/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/filtering_fasta/filtering_fasta_for_antismash.cwl" - } - ] - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/filtering_fasta/filtering_fasta_for_antismash.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/fix_embl_gbk/" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/fix_geneclusters_txt/" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/GFF_antismash/" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/reformat_antismash/" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/__init__.py" - } - ] - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/fix_embl_gbk/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/fix_embl_gbk/change_antismash_output.py" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/fix_embl_gbk/change_output.cwl" - } - ] - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/fix_embl_gbk/change_antismash_output.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/fix_embl_gbk/change_output.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/fix_geneclusters_txt/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/fix_geneclusters_txt/change_geneclusters_ctg.py" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/fix_geneclusters_txt/change_geneclusters_txt.cwl" - } - ] - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/fix_geneclusters_txt/change_geneclusters_ctg.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/fix_geneclusters_txt/change_geneclusters_txt.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/GFF_antismash/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/GFF_antismash/antismash_to_gff.cwl" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/GFF_antismash/antismash_to_gff_nojson.py" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/GFF_antismash/__init__.py" - } - ] - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/GFF_antismash/antismash_to_gff.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/GFF_antismash/antismash_to_gff_nojson.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/GFF_antismash/__init__.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/reformat_antismash/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/reformat_antismash/reformat-antismash.cwl" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/reformat_antismash/reformat-antismash.py" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/reformat_antismash/__init__.py" - } - ] - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/reformat_antismash/reformat-antismash.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/reformat_antismash/reformat-antismash.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/reformat_antismash/__init__.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/post-processing/__init__.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/rename_contigs/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/rename_contigs/antismash_rename_contigs.py" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/rename_contigs/rename_contigs.cwl" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/rename_contigs/__init__.py" - } - ] - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/rename_contigs/antismash_rename_contigs.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/rename_contigs/rename_contigs.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/rename_contigs/__init__.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/chunking_antismash_with_conditionals/__init__.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/move_antismash_summary/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/antismash/move_antismash_summary/move_antismash_summary.cwl" - }, - { - "@id": "./tools/Assembly/antismash/move_antismash_summary/move_antismash_summary.py" - }, - { - "@id": "./tools/Assembly/antismash/move_antismash_summary/__init__.py" - } - ] - }, - { - "@id": "./tools/Assembly/antismash/move_antismash_summary/move_antismash_summary.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/move_antismash_summary/move_antismash_summary.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/move_antismash_summary/__init__.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/antismash/__init__.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/Diamond/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/Diamond/Diamond-Post-Processing/" - }, - { - "@id": "./tools/Assembly/Diamond/yml-formats/" - }, - { - "@id": "./tools/Assembly/Diamond/Diamond.blastp.cwl" - }, - { - "@id": "./tools/Assembly/Diamond/Dockerfile" - } - ] - }, - { - "@id": "./tools/Assembly/Diamond/Diamond-Post-Processing/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/Diamond/Diamond-Post-Processing/diamond_join.cwl" - }, - { - "@id": "./tools/Assembly/Diamond/Diamond-Post-Processing/diamond_post_run_join.sh" - }, - { - "@id": "./tools/Assembly/Diamond/Diamond-Post-Processing/diamond_sorting.cwl" - } - ] - }, - { - "@id": "./tools/Assembly/Diamond/Diamond-Post-Processing/diamond_join.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/Diamond/Diamond-Post-Processing/diamond_post_run_join.sh", - "@type": "File" - }, - { - "@id": "./tools/Assembly/Diamond/Diamond-Post-Processing/diamond_sorting.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/Diamond/yml-formats/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/Diamond/yml-formats/Diamond-output_formats.yaml" - }, - { - "@id": "./tools/Assembly/Diamond/yml-formats/Diamond-strand_values.yaml" - } - ] - }, - { - "@id": "./tools/Assembly/Diamond/yml-formats/Diamond-output_formats.yaml", - "@type": "File" - }, - { - "@id": "./tools/Assembly/Diamond/yml-formats/Diamond-strand_values.yaml", - "@type": "File" - }, - { - "@id": "./tools/Assembly/Diamond/Diamond.blastp.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/Diamond/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/Assembly/EggNOG/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/EggNOG/Dockerfile" - }, - { - "@id": "./tools/Assembly/EggNOG/eggnog.cwl" - } - ] - }, - { - "@id": "./tools/Assembly/EggNOG/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/Assembly/EggNOG/eggnog.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/generate_mapfile/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/generate_mapfile/generate_mapfile_prodigal.cwl" - }, - { - "@id": "./tools/Assembly/generate_mapfile/generate_mapfile_prodigal.py" - }, - { - "@id": "./tools/Assembly/generate_mapfile/__init__.py" - } - ] - }, - { - "@id": "./tools/Assembly/generate_mapfile/generate_mapfile_prodigal.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/generate_mapfile/generate_mapfile_prodigal.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/generate_mapfile/__init__.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/Genome_properties/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/Genome_properties/Dockerfile" - }, - { - "@id": "./tools/Assembly/Genome_properties/genome_properties.cwl" - } - ] - }, - { - "@id": "./tools/Assembly/Genome_properties/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/Assembly/Genome_properties/genome_properties.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/GFF/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/GFF/build_assembly_gff.py" - }, - { - "@id": "./tools/Assembly/GFF/gff_generation.cwl" - } - ] - }, - { - "@id": "./tools/Assembly/GFF/build_assembly_gff.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/GFF/gff_generation.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/index_fasta/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/index_fasta/fasta_index.cwl" - }, - { - "@id": "./tools/Assembly/index_fasta/run_samtools_docker.sh" - } - ] - }, - { - "@id": "./tools/Assembly/index_fasta/fasta_index.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/index_fasta/run_samtools_docker.sh", - "@type": "File" - }, - { - "@id": "./tools/Assembly/KEGG_analysis/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/KEGG_analysis/KEGG_pathways/" - }, - { - "@id": "./tools/Assembly/KEGG_analysis/Modification/" - }, - { - "@id": "./tools/Assembly/KEGG_analysis/Parsing_hmmscan/" - } - ] - }, - { - "@id": "./tools/Assembly/KEGG_analysis/KEGG_pathways/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/KEGG_analysis/KEGG_pathways/give_pathways.py" - }, - { - "@id": "./tools/Assembly/KEGG_analysis/KEGG_pathways/kegg_pathways.cwl" - }, - { - "@id": "./tools/Assembly/KEGG_analysis/KEGG_pathways/make_graphs.py" - } - ] - }, - { - "@id": "./tools/Assembly/KEGG_analysis/KEGG_pathways/give_pathways.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/KEGG_analysis/KEGG_pathways/kegg_pathways.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/KEGG_analysis/KEGG_pathways/make_graphs.py", - "@type": "File" - }, - { - "@id": "./tools/Assembly/KEGG_analysis/Modification/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/KEGG_analysis/Modification/modification_table.cwl" - } - ] - }, - { - "@id": "./tools/Assembly/KEGG_analysis/Modification/modification_table.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/KEGG_analysis/Parsing_hmmscan/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Assembly/KEGG_analysis/Parsing_hmmscan/parsing_hmmscan.cwl" - }, - { - "@id": "./tools/Assembly/KEGG_analysis/Parsing_hmmscan/parsing_hmmscan.py" - } - ] - }, - { - "@id": "./tools/Assembly/KEGG_analysis/Parsing_hmmscan/parsing_hmmscan.cwl", - "@type": "File" - }, - { - "@id": "./tools/Assembly/KEGG_analysis/Parsing_hmmscan/parsing_hmmscan.py", - "@type": "File" - }, - { - "@id": "./tools/chunks/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/chunks/dna_chunker/" - }, - { - "@id": "./tools/chunks/protein_chunker.cwl" - }, - { - "@id": "./tools/chunks/split_to_chunks.py" - } - ] - }, - { - "@id": "./tools/chunks/dna_chunker/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/chunks/dna_chunker/Dockerfile" - }, - { - "@id": "./tools/chunks/dna_chunker/esl-ssplit.pl" - }, - { - "@id": "./tools/chunks/dna_chunker/esl-ssplit.sh" - }, - { - "@id": "./tools/chunks/dna_chunker/fasta_chunker.cwl" - } - ] - }, - { - "@id": "./tools/chunks/dna_chunker/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/chunks/dna_chunker/esl-ssplit.pl", - "@type": "File" - }, - { - "@id": "./tools/chunks/dna_chunker/esl-ssplit.sh", - "@type": "File" - }, - { - "@id": "./tools/chunks/dna_chunker/fasta_chunker.cwl", - "@type": "File" - }, - { - "@id": "./tools/chunks/protein_chunker.cwl", - "@type": "File" - }, - { - "@id": "./tools/chunks/split_to_chunks.py", - "@type": "File" - }, - { - "@id": "./tools/Combined_gene_caller/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Combined_gene_caller/CGC/" - }, - { - "@id": "./tools/Combined_gene_caller/FragGeneScan/" - }, - { - "@id": "./tools/Combined_gene_caller/Prodigal/" - }, - { - "@id": "./tools/Combined_gene_caller/Dockerfile" - }, - { - "@id": "./tools/Combined_gene_caller/faselector" - }, - { - "@id": "./tools/Combined_gene_caller/post-processing.cwl" - }, - { - "@id": "./tools/Combined_gene_caller/unite_protein_predictions.py" - }, - { - "@id": "./tools/Combined_gene_caller/__init__.py" - } - ] - }, - { - "@id": "./tools/Combined_gene_caller/CGC/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Combined_gene_caller/CGC/combined_gene_caller.cwl" - }, - { - "@id": "./tools/Combined_gene_caller/CGC/combined_gene_caller.py" - }, - { - "@id": "./tools/Combined_gene_caller/CGC/combined_gene_caller_conf.json" - }, - { - "@id": "./tools/Combined_gene_caller/CGC/combined_gene_caller_docker.py" - }, - { - "@id": "./tools/Combined_gene_caller/CGC/Dockerfile" - }, - { - "@id": "./tools/Combined_gene_caller/CGC/__init__.py" - } - ] - }, - { - "@id": "./tools/Combined_gene_caller/CGC/combined_gene_caller.cwl", - "@type": "File" - }, - { - "@id": "./tools/Combined_gene_caller/CGC/combined_gene_caller.py", - "@type": "File" - }, - { - "@id": "./tools/Combined_gene_caller/CGC/combined_gene_caller_conf.json", - "@type": "File" - }, - { - "@id": "./tools/Combined_gene_caller/CGC/combined_gene_caller_docker.py", - "@type": "File" - }, - { - "@id": "./tools/Combined_gene_caller/CGC/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/Combined_gene_caller/CGC/__init__.py", - "@type": "File" - }, - { - "@id": "./tools/Combined_gene_caller/FragGeneScan/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Combined_gene_caller/FragGeneScan/Dockerfile" - }, - { - "@id": "./tools/Combined_gene_caller/FragGeneScan/FGS.cwl" - } - ] - }, - { - "@id": "./tools/Combined_gene_caller/FragGeneScan/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/Combined_gene_caller/FragGeneScan/FGS.cwl", - "@type": "File" - }, - { - "@id": "./tools/Combined_gene_caller/Prodigal/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Combined_gene_caller/Prodigal/Dockerfile" - }, - { - "@id": "./tools/Combined_gene_caller/Prodigal/prodigal.cwl" - } - ] - }, - { - "@id": "./tools/Combined_gene_caller/Prodigal/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/Combined_gene_caller/Prodigal/prodigal.cwl", - "@type": "File" - }, - { - "@id": "./tools/Combined_gene_caller/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/Combined_gene_caller/faselector", - "@type": "File" - }, - { - "@id": "./tools/Combined_gene_caller/post-processing.cwl", - "@type": "File" - }, - { - "@id": "./tools/Combined_gene_caller/unite_protein_predictions.py", - "@type": "File" - }, - { - "@id": "./tools/Combined_gene_caller/__init__.py", - "@type": "File" - }, - { - "@id": "./tools/CRISPRCasFinder/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/CRISPRCasFinder/CRISPRCasFinder.cwl" - }, - { - "@id": "./tools/CRISPRCasFinder/CRISPRCasFinder2Fasta.cwl" - }, - { - "@id": "./tools/CRISPRCasFinder/CRISPRCasFinder2GFF.cwl" - }, - { - "@id": "./tools/CRISPRCasFinder/crisprcf2fasta.py" - }, - { - "@id": "./tools/CRISPRCasFinder/crisprcf2gff.py" - }, - { - "@id": "./tools/CRISPRCasFinder/Dockerfile" - }, - { - "@id": "./tools/CRISPRCasFinder/runCRISPRCasFinderl.cwl" - } - ] - }, - { - "@id": "./tools/CRISPRCasFinder/CRISPRCasFinder.cwl", - "@type": "File" - }, - { - "@id": "./tools/CRISPRCasFinder/CRISPRCasFinder2Fasta.cwl", - "@type": "File" - }, - { - "@id": "./tools/CRISPRCasFinder/CRISPRCasFinder2GFF.cwl", - "@type": "File" - }, - { - "@id": "./tools/CRISPRCasFinder/crisprcf2fasta.py", - "@type": "File" - }, - { - "@id": "./tools/CRISPRCasFinder/crisprcf2gff.py", - "@type": "File" - }, - { - "@id": "./tools/CRISPRCasFinder/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/CRISPRCasFinder/runCRISPRCasFinderl.cwl", - "@type": "File" - }, - { - "@id": "./tools/fastp/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/fastp/Dockerfile" - }, - { - "@id": "./tools/fastp/fastp.cwl" - }, - { - "@id": "./tools/fastp/fastp.yml" - } - ] - }, - { - "@id": "./tools/fastp/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/fastp/fastp.cwl", - "@type": "File" - }, - { - "@id": "./tools/fastp/fastp.yml", - "@type": "File" - }, - { - "@id": "./tools/GO-slim/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/GO-slim/owltools/" - }, - { - "@id": "./tools/GO-slim/Dockerfile" - }, - { - "@id": "./tools/GO-slim/go-basic-metagenomics_release_20160705.obo" - }, - { - "@id": "./tools/GO-slim/go-basic-metagenomics_release_20160705.txt" - }, - { - "@id": "./tools/GO-slim/go_summary-config.json" - }, - { - "@id": "./tools/GO-slim/go_summary.cwl" - }, - { - "@id": "./tools/GO-slim/go_summary_pipeline-1.0.py" - }, - { - "@id": "./tools/GO-slim/metagenomics_go_slim_banding.txt" - }, - { - "@id": "./tools/GO-slim/metagenomics_go_slim_ids.txt" - } - ] - }, - { - "@id": "./tools/GO-slim/owltools/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/GO-slim/owltools/owltools" - }, - { - "@id": "./tools/GO-slim/owltools/owltools-runner-all.jar" - }, - { - "@id": "./tools/GO-slim/owltools/owltools.vmoptions" - } - ] - }, - { - "@id": "./tools/GO-slim/owltools/owltools", - "@type": "File" - }, - { - "@id": "./tools/GO-slim/owltools/owltools-runner-all.jar", - "@type": "File" - }, - { - "@id": "./tools/GO-slim/owltools/owltools.vmoptions", - "@type": "File" - }, - { - "@id": "./tools/GO-slim/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/GO-slim/go-basic-metagenomics_release_20160705.obo", - "@type": "File" - }, - { - "@id": "./tools/GO-slim/go-basic-metagenomics_release_20160705.txt", - "@type": "File" - }, - { - "@id": "./tools/GO-slim/go_summary-config.json", - "@type": "File" - }, - { - "@id": "./tools/GO-slim/go_summary.cwl", - "@type": "File" - }, - { - "@id": "./tools/GO-slim/go_summary_pipeline-1.0.py", - "@type": "File" - }, - { - "@id": "./tools/GO-slim/metagenomics_go_slim_banding.txt", - "@type": "File" - }, - { - "@id": "./tools/GO-slim/metagenomics_go_slim_ids.txt", - "@type": "File" - }, - { - "@id": "./tools/hmmer/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/hmmer/hmmer_tab_modification/" - }, - { - "@id": "./tools/hmmer/hmmscan/" - }, - { - "@id": "./tools/hmmer/hmmsearch/" - }, - { - "@id": "./tools/hmmer/Dockerfile" - }, - { - "@id": "./tools/hmmer/__init__.py" - } - ] - }, - { - "@id": "./tools/hmmer/hmmer_tab_modification/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/hmmer/hmmer_tab_modification/hmmer_tab_modification.cwl" - }, - { - "@id": "./tools/hmmer/hmmer_tab_modification/hmmscan_tab.py" - }, - { - "@id": "./tools/hmmer/hmmer_tab_modification/__init__.py" - } - ] - }, - { - "@id": "./tools/hmmer/hmmer_tab_modification/hmmer_tab_modification.cwl", - "@type": "File" - }, - { - "@id": "./tools/hmmer/hmmer_tab_modification/hmmscan_tab.py", - "@type": "File" - }, - { - "@id": "./tools/hmmer/hmmer_tab_modification/__init__.py", - "@type": "File" - }, - { - "@id": "./tools/hmmer/hmmscan/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/hmmer/hmmscan/hmmscan-subwf.cwl" - }, - { - "@id": "./tools/hmmer/hmmscan/hmmscan.cwl" - }, - { - "@id": "./tools/hmmer/hmmscan/__init__.py" - } - ] - }, - { - "@id": "./tools/hmmer/hmmscan/hmmscan-subwf.cwl", - "@type": "File" - }, - { - "@id": "./tools/hmmer/hmmscan/hmmscan.cwl", - "@type": "File" - }, - { - "@id": "./tools/hmmer/hmmscan/__init__.py", - "@type": "File" - }, - { - "@id": "./tools/hmmer/hmmsearch/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/hmmer/hmmsearch/hmmsearch.cwl" - } - ] - }, - { - "@id": "./tools/hmmer/hmmsearch/hmmsearch.cwl", - "@type": "File" - }, - { - "@id": "./tools/hmmer/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/hmmer/__init__.py", - "@type": "File" - }, - { - "@id": "./tools/InterProScan/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/InterProScan/types-ymls/" - }, - { - "@id": "./tools/InterProScan/Dockerfile" - }, - { - "@id": "./tools/InterProScan/Dockerfile_data" - }, - { - "@id": "./tools/InterProScan/InterProScan-v5.cwl" - } - ] - }, - { - "@id": "./tools/InterProScan/types-ymls/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/InterProScan/types-ymls/InterProScan-apps.yaml" - }, - { - "@id": "./tools/InterProScan/types-ymls/InterProScan-protein_formats.yaml" - } - ] - }, - { - "@id": "./tools/InterProScan/types-ymls/InterProScan-apps.yaml", - "@type": "File" - }, - { - "@id": "./tools/InterProScan/types-ymls/InterProScan-protein_formats.yaml", - "@type": "File" - }, - { - "@id": "./tools/InterProScan/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/InterProScan/Dockerfile_data", - "@type": "File" - }, - { - "@id": "./tools/InterProScan/InterProScan-v5.cwl", - "@type": "File" - }, - { - "@id": "./tools/mask-for-ITS/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/mask-for-ITS/bash_scripts/" - }, - { - "@id": "./tools/mask-for-ITS/bedtools/" - }, - { - "@id": "./tools/mask-for-ITS/bedtools.yml" - }, - { - "@id": "./tools/mask-for-ITS/format-bedfile.cwl" - }, - { - "@id": "./tools/mask-for-ITS/its-length-new.py" - }, - { - "@id": "./tools/mask-for-ITS/suppress_tax.cwl" - } - ] - }, - { - "@id": "./tools/mask-for-ITS/bash_scripts/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/mask-for-ITS/bash_scripts/format_bedfile" - } - ] - }, - { - "@id": "./tools/mask-for-ITS/bash_scripts/format_bedfile", - "@type": "File" - }, - { - "@id": "./tools/mask-for-ITS/bedtools/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/mask-for-ITS/bedtools/bedtools.cwl" - }, - { - "@id": "./tools/mask-for-ITS/bedtools/Dockerfile" - } - ] - }, - { - "@id": "./tools/mask-for-ITS/bedtools/bedtools.cwl", - "@type": "File" - }, - { - "@id": "./tools/mask-for-ITS/bedtools/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/mask-for-ITS/bedtools.yml", - "@type": "File" - }, - { - "@id": "./tools/mask-for-ITS/format-bedfile.cwl", - "@type": "File" - }, - { - "@id": "./tools/mask-for-ITS/its-length-new.py", - "@type": "File" - }, - { - "@id": "./tools/mask-for-ITS/suppress_tax.cwl", - "@type": "File" - }, - { - "@id": "./tools/Pfam-Parse/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Pfam-Parse/pfam_annotations.cwl" - } - ] - }, - { - "@id": "./tools/Pfam-Parse/pfam_annotations.cwl", - "@type": "File" - }, - { - "@id": "./tools/qc-filtering/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/qc-filtering/qc-filtering.cwl" - }, - { - "@id": "./tools/qc-filtering/run_quality_filtering.py" - } - ] - }, - { - "@id": "./tools/qc-filtering/qc-filtering.cwl", - "@type": "File" - }, - { - "@id": "./tools/qc-filtering/run_quality_filtering.py", - "@type": "File" - }, - { - "@id": "./tools/qc-stats/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/qc-stats/MGRAST_base.py" - }, - { - "@id": "./tools/qc-stats/qc-stats.cwl" - } - ] - }, - { - "@id": "./tools/qc-stats/MGRAST_base.py", - "@type": "File" - }, - { - "@id": "./tools/qc-stats/qc-stats.cwl", - "@type": "File" - }, - { - "@id": "./tools/Raw_reads/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Raw_reads/mOTUs/" - } - ] - }, - { - "@id": "./tools/Raw_reads/mOTUs/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Raw_reads/mOTUs/clean_motus_output.cwl" - }, - { - "@id": "./tools/Raw_reads/mOTUs/clean_motus_output.sh" - }, - { - "@id": "./tools/Raw_reads/mOTUs/Dockerfile" - }, - { - "@id": "./tools/Raw_reads/mOTUs/mOTUs.cwl" - }, - { - "@id": "./tools/Raw_reads/mOTUs/mOTUs_download_db.py" - } - ] - }, - { - "@id": "./tools/Raw_reads/mOTUs/clean_motus_output.cwl", - "@type": "File" - }, - { - "@id": "./tools/Raw_reads/mOTUs/clean_motus_output.sh", - "@type": "File" - }, - { - "@id": "./tools/Raw_reads/mOTUs/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/Raw_reads/mOTUs/mOTUs.cwl", - "@type": "File" - }, - { - "@id": "./tools/Raw_reads/mOTUs/mOTUs_download_db.py", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/RNA_prediction/biom-convert/" - }, - { - "@id": "./tools/RNA_prediction/cmsearch/" - }, - { - "@id": "./tools/RNA_prediction/cmsearch-deoverlap/" - }, - { - "@id": "./tools/RNA_prediction/easel/" - }, - { - "@id": "./tools/RNA_prediction/extract-coords/" - }, - { - "@id": "./tools/RNA_prediction/get_subunits_coords/" - }, - { - "@id": "./tools/RNA_prediction/get_subunits_fasta/" - }, - { - "@id": "./tools/RNA_prediction/krona/" - }, - { - "@id": "./tools/RNA_prediction/mapseq/" - }, - { - "@id": "./tools/RNA_prediction/mapseq2biom/" - }, - { - "@id": "./tools/RNA_prediction/pull_ncrnas/" - } - ] - }, - { - "@id": "./tools/RNA_prediction/biom-convert/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/RNA_prediction/biom-convert/biom-convert-table.yaml" - }, - { - "@id": "./tools/RNA_prediction/biom-convert/biom-convert.cwl" - }, - { - "@id": "./tools/RNA_prediction/biom-convert/biom-convert.sh" - }, - { - "@id": "./tools/RNA_prediction/biom-convert/Dockerfile" - } - ] - }, - { - "@id": "./tools/RNA_prediction/biom-convert/biom-convert-table.yaml", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/biom-convert/biom-convert.cwl", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/biom-convert/biom-convert.sh", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/biom-convert/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/cmsearch/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/RNA_prediction/cmsearch/Dockerfile" - }, - { - "@id": "./tools/RNA_prediction/cmsearch/infernal-cmsearch-v1.1.2.cwl" - } - ] - }, - { - "@id": "./tools/RNA_prediction/cmsearch/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/cmsearch/infernal-cmsearch-v1.1.2.cwl", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/cmsearch-deoverlap/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/RNA_prediction/cmsearch-deoverlap/cmsearch-deoverlap-v0.02.cwl" - }, - { - "@id": "./tools/RNA_prediction/cmsearch-deoverlap/cmsearch-deoverlap.pl" - }, - { - "@id": "./tools/RNA_prediction/cmsearch-deoverlap/Dockerfile" - } - ] - }, - { - "@id": "./tools/RNA_prediction/cmsearch-deoverlap/cmsearch-deoverlap-v0.02.cwl", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/cmsearch-deoverlap/cmsearch-deoverlap.pl", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/cmsearch-deoverlap/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/easel/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/RNA_prediction/easel/Dockerfile" - }, - { - "@id": "./tools/RNA_prediction/easel/esl-index.sh" - }, - { - "@id": "./tools/RNA_prediction/easel/esl-sfetch-index.cwl" - }, - { - "@id": "./tools/RNA_prediction/easel/esl-sfetch-manyseqs.cwl" - } - ] - }, - { - "@id": "./tools/RNA_prediction/easel/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/easel/esl-index.sh", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/easel/esl-sfetch-index.cwl", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/easel/esl-sfetch-manyseqs.cwl", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/extract-coords/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/RNA_prediction/extract-coords/awk_tool" - }, - { - "@id": "./tools/RNA_prediction/extract-coords/Dockerfile" - }, - { - "@id": "./tools/RNA_prediction/extract-coords/extract-coords.cwl" - }, - { - "@id": "./tools/RNA_prediction/extract-coords/extract-coords_awk.cwl" - } - ] - }, - { - "@id": "./tools/RNA_prediction/extract-coords/awk_tool", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/extract-coords/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/extract-coords/extract-coords.cwl", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/extract-coords/extract-coords_awk.cwl", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/get_subunits_coords/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/RNA_prediction/get_subunits_coords/get_subunits_coords.cwl" - }, - { - "@id": "./tools/RNA_prediction/get_subunits_coords/get_subunits_coords.py" - } - ] - }, - { - "@id": "./tools/RNA_prediction/get_subunits_coords/get_subunits_coords.cwl", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/get_subunits_coords/get_subunits_coords.py", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/get_subunits_fasta/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/RNA_prediction/get_subunits_fasta/get_subunits.cwl" - }, - { - "@id": "./tools/RNA_prediction/get_subunits_fasta/get_subunits.py" - }, - { - "@id": "./tools/RNA_prediction/get_subunits_fasta/__init__.py" - } - ] - }, - { - "@id": "./tools/RNA_prediction/get_subunits_fasta/get_subunits.cwl", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/get_subunits_fasta/get_subunits.py", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/get_subunits_fasta/__init__.py", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/krona/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/RNA_prediction/krona/Dockerfile" - }, - { - "@id": "./tools/RNA_prediction/krona/krona.cwl" - } - ] - }, - { - "@id": "./tools/RNA_prediction/krona/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/krona/krona.cwl", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/mapseq/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/RNA_prediction/mapseq/Dockerfile" - }, - { - "@id": "./tools/RNA_prediction/mapseq/mapseq.cwl" - } - ] - }, - { - "@id": "./tools/RNA_prediction/mapseq/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/mapseq/mapseq.cwl", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/mapseq2biom/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/RNA_prediction/mapseq2biom/Dockerfile" - }, - { - "@id": "./tools/RNA_prediction/mapseq2biom/mapseq2biom.cwl" - }, - { - "@id": "./tools/RNA_prediction/mapseq2biom/mapseq2biom.pl" - } - ] - }, - { - "@id": "./tools/RNA_prediction/mapseq2biom/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/mapseq2biom/mapseq2biom.cwl", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/mapseq2biom/mapseq2biom.pl", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/pull_ncrnas/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/RNA_prediction/pull_ncrnas/pull_ncrnas.cwl" - }, - { - "@id": "./tools/RNA_prediction/pull_ncrnas/pull_ncrnas.sh" - } - ] - }, - { - "@id": "./tools/RNA_prediction/pull_ncrnas/pull_ncrnas.cwl", - "@type": "File" - }, - { - "@id": "./tools/RNA_prediction/pull_ncrnas/pull_ncrnas.sh", - "@type": "File" - }, - { - "@id": "./tools/SeqPrep/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/SeqPrep/Dockerfile" - }, - { - "@id": "./tools/SeqPrep/seqprep.cwl" - } - ] - }, - { - "@id": "./tools/SeqPrep/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/SeqPrep/seqprep.cwl", - "@type": "File" - }, - { - "@id": "./tools/summaries/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/summaries/functional_stats.cwl" - }, - { - "@id": "./tools/summaries/functional_stats.py" - }, - { - "@id": "./tools/summaries/write_summaries.cwl" - }, - { - "@id": "./tools/summaries/write_summaries.py" - } - ] - }, - { - "@id": "./tools/summaries/functional_stats.cwl", - "@type": "File" - }, - { - "@id": "./tools/summaries/functional_stats.py", - "@type": "File" - }, - { - "@id": "./tools/summaries/write_summaries.cwl", - "@type": "File" - }, - { - "@id": "./tools/summaries/write_summaries.py", - "@type": "File" - }, - { - "@id": "./tools/Trimmomatic/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Trimmomatic/ymls-types/" - }, - { - "@id": "./tools/Trimmomatic/Dockerfile" - }, - { - "@id": "./tools/Trimmomatic/LICENSE" - }, - { - "@id": "./tools/Trimmomatic/trimmomatic" - }, - { - "@id": "./tools/Trimmomatic/trimmomatic-0.36.jar" - }, - { - "@id": "./tools/Trimmomatic/Trimmomatic-v0.36-PE.cwl" - }, - { - "@id": "./tools/Trimmomatic/Trimmomatic-v0.36-SE.cwl" - }, - { - "@id": "./tools/Trimmomatic/trimmomatic.jar" - }, - { - "@id": "./tools/Trimmomatic/trimmomatic_help.txt" - } - ] - }, - { - "@id": "./tools/Trimmomatic/ymls-types/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./tools/Trimmomatic/ymls-types/trimmomatic-end_mode.yaml" - }, - { - "@id": "./tools/Trimmomatic/ymls-types/trimmomatic-illumina_clipping.yaml" - }, - { - "@id": "./tools/Trimmomatic/ymls-types/trimmomatic-max_info.yaml" - }, - { - "@id": "./tools/Trimmomatic/ymls-types/trimmomatic-phred.yaml" - }, - { - "@id": "./tools/Trimmomatic/ymls-types/trimmomatic-sliding_window.yaml" - } - ] - }, - { - "@id": "./tools/Trimmomatic/ymls-types/trimmomatic-end_mode.yaml", - "@type": "File" - }, - { - "@id": "./tools/Trimmomatic/ymls-types/trimmomatic-illumina_clipping.yaml", - "@type": "File" - }, - { - "@id": "./tools/Trimmomatic/ymls-types/trimmomatic-max_info.yaml", - "@type": "File" - }, - { - "@id": "./tools/Trimmomatic/ymls-types/trimmomatic-phred.yaml", - "@type": "File" - }, - { - "@id": "./tools/Trimmomatic/ymls-types/trimmomatic-sliding_window.yaml", - "@type": "File" - }, - { - "@id": "./tools/Trimmomatic/Dockerfile", - "@type": "File" - }, - { - "@id": "./tools/Trimmomatic/LICENSE", - "@type": "File" - }, - { - "@id": "./tools/Trimmomatic/trimmomatic", - "@type": "File" - }, - { - "@id": "./tools/Trimmomatic/trimmomatic-0.36.jar", - "@type": "File" - }, - { - "@id": "./tools/Trimmomatic/Trimmomatic-v0.36-PE.cwl", - "@type": "File" - }, - { - "@id": "./tools/Trimmomatic/Trimmomatic-v0.36-SE.cwl", - "@type": "File" - }, - { - "@id": "./tools/Trimmomatic/trimmomatic.jar", - "@type": "File" - }, - { - "@id": "./tools/Trimmomatic/trimmomatic_help.txt", - "@type": "File" - }, - { - "@id": "./utils/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./utils/add_header/" - }, - { - "@id": "./utils/count_lines/" - }, - { - "@id": "./utils/fastq_to_fasta/" - }, - { - "@id": "./utils/generate_checksum/" - }, - { - "@id": "./utils/make_csv/" - }, - { - "@id": "./utils/pigz/" - }, - { - "@id": "./utils/remove_header/" - }, - { - "@id": "./utils/result-file-chunker/" - }, - { - "@id": "./utils/return_directory/" - }, - { - "@id": "./utils/clean_fasta_headers.cwl" - }, - { - "@id": "./utils/concatenate.cwl" - }, - { - "@id": "./utils/count_fasta.cwl" - }, - { - "@id": "./utils/count_number_lines.cwl" - }, - { - "@id": "./utils/make_flatten.cwl" - }, - { - "@id": "./utils/make_tab_sep.cwl" - }, - { - "@id": "./utils/move.cwl" - }, - { - "@id": "./utils/multiple-gunzip.cwl" - }, - { - "@id": "./utils/qc-flag.cwl" - }, - { - "@id": "./utils/size_file.cwl" - }, - { - "@id": "./utils/touch_file.cwl" - } - ] - }, - { - "@id": "./utils/add_header/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./utils/add_header/add_header" - }, - { - "@id": "./utils/add_header/add_header.cwl" - } - ] - }, - { - "@id": "./utils/add_header/add_header", - "@type": "File" - }, - { - "@id": "./utils/add_header/add_header.cwl", - "@type": "File" - }, - { - "@id": "./utils/count_lines/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./utils/count_lines/count_lines.cwl" - }, - { - "@id": "./utils/count_lines/count_lines.py" - } - ] - }, - { - "@id": "./utils/count_lines/count_lines.cwl", - "@type": "File" - }, - { - "@id": "./utils/count_lines/count_lines.py", - "@type": "File" - }, - { - "@id": "./utils/fastq_to_fasta/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./utils/fastq_to_fasta/fastq_to_fasta.cwl" - }, - { - "@id": "./utils/fastq_to_fasta/fastq_to_fasta.py" - } - ] - }, - { - "@id": "./utils/fastq_to_fasta/fastq_to_fasta.cwl", - "@type": "File" - }, - { - "@id": "./utils/fastq_to_fasta/fastq_to_fasta.py", - "@type": "File" - }, - { - "@id": "./utils/generate_checksum/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./utils/generate_checksum/generate_checksum.cwl" - }, - { - "@id": "./utils/generate_checksum/generate_checksum.py" - } - ] - }, - { - "@id": "./utils/generate_checksum/generate_checksum.cwl", - "@type": "File" - }, - { - "@id": "./utils/generate_checksum/generate_checksum.py", - "@type": "File" - }, - { - "@id": "./utils/make_csv/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./utils/make_csv/make_csv.cwl" - }, - { - "@id": "./utils/make_csv/make_csv.py" - } - ] - }, - { - "@id": "./utils/make_csv/make_csv.cwl", - "@type": "File" - }, - { - "@id": "./utils/make_csv/make_csv.py", - "@type": "File" - }, - { - "@id": "./utils/pigz/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./utils/pigz/gzip.cwl" - } - ] - }, - { - "@id": "./utils/pigz/gzip.cwl", - "@type": "File" - }, - { - "@id": "./utils/remove_header/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./utils/remove_header/remove_headers.cwl" - }, - { - "@id": "./utils/remove_header/remove_headers.yml" - } - ] - }, - { - "@id": "./utils/remove_header/remove_headers.cwl", - "@type": "File" - }, - { - "@id": "./utils/remove_header/remove_headers.yml", - "@type": "File" - }, - { - "@id": "./utils/result-file-chunker/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./utils/result-file-chunker/old_scripts/" - }, - { - "@id": "./utils/result-file-chunker/create_chunks_file.cwl" - }, - { - "@id": "./utils/result-file-chunker/Dockerfile" - }, - { - "@id": "./utils/result-file-chunker/result_chunker.cwl" - }, - { - "@id": "./utils/result-file-chunker/result_chunker_subwf.cwl" - }, - { - "@id": "./utils/result-file-chunker/split_fasta.cwl" - }, - { - "@id": "./utils/result-file-chunker/split_fasta_by_size.sh" - }, - { - "@id": "./utils/result-file-chunker/split_tsv.cwl" - } - ] - }, - { - "@id": "./utils/result-file-chunker/old_scripts/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./utils/result-file-chunker/old_scripts/chunkFastaResultFileUtil.py" - }, - { - "@id": "./utils/result-file-chunker/old_scripts/chunkTSVFileUtil.py" - }, - { - "@id": "./utils/result-file-chunker/old_scripts/cleaningUtils.py" - }, - { - "@id": "./utils/result-file-chunker/old_scripts/run_result_file_chunker.py" - } - ] - }, - { - "@id": "./utils/result-file-chunker/old_scripts/chunkFastaResultFileUtil.py", - "@type": "File" - }, - { - "@id": "./utils/result-file-chunker/old_scripts/chunkTSVFileUtil.py", - "@type": "File" - }, - { - "@id": "./utils/result-file-chunker/old_scripts/cleaningUtils.py", - "@type": "File" - }, - { - "@id": "./utils/result-file-chunker/old_scripts/run_result_file_chunker.py", - "@type": "File" - }, - { - "@id": "./utils/result-file-chunker/create_chunks_file.cwl", - "@type": "File" - }, - { - "@id": "./utils/result-file-chunker/Dockerfile", - "@type": "File" - }, - { - "@id": "./utils/result-file-chunker/result_chunker.cwl", - "@type": "File" - }, - { - "@id": "./utils/result-file-chunker/result_chunker_subwf.cwl", - "@type": "File" - }, - { - "@id": "./utils/result-file-chunker/split_fasta.cwl", - "@type": "File" - }, - { - "@id": "./utils/result-file-chunker/split_fasta_by_size.sh", - "@type": "File" - }, - { - "@id": "./utils/result-file-chunker/split_tsv.cwl", - "@type": "File" - }, - { - "@id": "./utils/return_directory/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./utils/return_directory/return_directory.cwl" - } - ] - }, - { - "@id": "./utils/return_directory/return_directory.cwl", - "@type": "File" - }, - { - "@id": "./utils/clean_fasta_headers.cwl", - "@type": "File" - }, - { - "@id": "./utils/concatenate.cwl", - "@type": "File" - }, - { - "@id": "./utils/count_fasta.cwl", - "@type": "File" - }, - { - "@id": "./utils/count_number_lines.cwl", - "@type": "File" - }, - { - "@id": "./utils/make_flatten.cwl", - "@type": "File" - }, - { - "@id": "./utils/make_tab_sep.cwl", - "@type": "File" - }, - { - "@id": "./utils/move.cwl", - "@type": "File" - }, - { - "@id": "./utils/multiple-gunzip.cwl", - "@type": "File" - }, - { - "@id": "./utils/qc-flag.cwl", - "@type": "File" - }, - { - "@id": "./utils/size_file.cwl", - "@type": "File" - }, - { - "@id": "./utils/touch_file.cwl", - "@type": "File" - }, - { - "@id": "./workflows/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./workflows/conditionals/" - }, - { - "@id": "./workflows/subworkflows/" - }, - { - "@id": "./workflows/gos_wf.cwl" - } - ] - }, - { - "@id": "./workflows/conditionals/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./workflows/conditionals/megahit.cwl" - }, - { - "@id": "./workflows/conditionals/qc.cwl" - }, - { - "@id": "./workflows/conditionals/raw-reads-2.cwl" - }, - { - "@id": "./workflows/conditionals/rna-prediction.cwl" - } - ] - }, - { - "@id": "./workflows/conditionals/megahit.cwl", - "@type": "File" - }, - { - "@id": "./workflows/conditionals/qc.cwl", - "@type": "File" - }, - { - "@id": "./workflows/conditionals/raw-reads-2.cwl", - "@type": "File" - }, - { - "@id": "./workflows/conditionals/rna-prediction.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./workflows/subworkflows/amplicon/" - }, - { - "@id": "./workflows/subworkflows/assembly/" - }, - { - "@id": "./workflows/subworkflows/cmsearch/" - }, - { - "@id": "./workflows/subworkflows/functional-annotation/" - }, - { - "@id": "./workflows/subworkflows/hmmer/" - }, - { - "@id": "./workflows/subworkflows/raw_reads/" - }, - { - "@id": "./workflows/subworkflows/chunking-subwf-IPS.cwl" - }, - { - "@id": "./workflows/subworkflows/classify-otu-visualise.cwl" - }, - { - "@id": "./workflows/subworkflows/final_chunking.cwl" - }, - { - "@id": "./workflows/subworkflows/func_summaries.cwl" - }, - { - "@id": "./workflows/subworkflows/other_ncrnas.cwl" - }, - { - "@id": "./workflows/subworkflows/rna_prediction-sub-wf.cwl" - }, - { - "@id": "./workflows/subworkflows/seqprep-qc-cond-subwf.cwl" - }, - { - "@id": "./workflows/subworkflows/seqprep-subwf.cwl" - } - ] - }, - { - "@id": "./workflows/subworkflows/amplicon/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./workflows/subworkflows/amplicon/ITS-wf.cwl" - }, - { - "@id": "./workflows/subworkflows/amplicon/trimming-not-empty-subwf.cwl" - }, - { - "@id": "./workflows/subworkflows/amplicon/trim_and_reformat_reads.cwl" - } - ] - }, - { - "@id": "./workflows/subworkflows/amplicon/ITS-wf.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/amplicon/trimming-not-empty-subwf.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/amplicon/trim_and_reformat_reads.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/assembly/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./workflows/subworkflows/assembly/antismash/" - }, - { - "@id": "./workflows/subworkflows/assembly/cgc/" - }, - { - "@id": "./workflows/subworkflows/assembly/diamond/" - }, - { - "@id": "./workflows/subworkflows/assembly/accessioning-prediction_subwf.cwl" - }, - { - "@id": "./workflows/subworkflows/assembly/change_formats_and_names.cwl" - }, - { - "@id": "./workflows/subworkflows/assembly/deal_with_functional_annotation.cwl" - }, - { - "@id": "./workflows/subworkflows/assembly/eggnog-subwf.cwl" - }, - { - "@id": "./workflows/subworkflows/assembly/Func_ann_and_post_processing-subwf.cwl" - }, - { - "@id": "./workflows/subworkflows/assembly/kegg_analysis.cwl" - } - ] - }, - { - "@id": "./workflows/subworkflows/assembly/antismash/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./workflows/subworkflows/assembly/antismash/antismash-annotation-subsubwf.cwl" - }, - { - "@id": "./workflows/subworkflows/assembly/antismash/antismash_chunking_subwf.cwl" - }, - { - "@id": "./workflows/subworkflows/assembly/antismash/main_antismash_subwf.cwl" - }, - { - "@id": "./workflows/subworkflows/assembly/antismash/no_antismash_subwf.cwl" - }, - { - "@id": "./workflows/subworkflows/assembly/antismash/split_subwf_perl.cwl" - } - ] - }, - { - "@id": "./workflows/subworkflows/assembly/antismash/antismash-annotation-subsubwf.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/assembly/antismash/antismash_chunking_subwf.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/assembly/antismash/main_antismash_subwf.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/assembly/antismash/no_antismash_subwf.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/assembly/antismash/split_subwf_perl.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/assembly/cgc/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./workflows/subworkflows/assembly/cgc/CGC-subwf.cwl" - }, - { - "@id": "./workflows/subworkflows/assembly/cgc/predict_proteins_assemblies.cwl" - } - ] - }, - { - "@id": "./workflows/subworkflows/assembly/cgc/CGC-subwf.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/assembly/cgc/predict_proteins_assemblies.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/assembly/diamond/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./workflows/subworkflows/assembly/diamond/diamond-postprocessing_subwf.cwl" - }, - { - "@id": "./workflows/subworkflows/assembly/diamond/diamond-subwf.cwl" - } - ] - }, - { - "@id": "./workflows/subworkflows/assembly/diamond/diamond-postprocessing_subwf.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/assembly/diamond/diamond-subwf.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/assembly/accessioning-prediction_subwf.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/assembly/change_formats_and_names.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/assembly/deal_with_functional_annotation.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/assembly/eggnog-subwf.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/assembly/Func_ann_and_post_processing-subwf.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/assembly/kegg_analysis.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/cmsearch/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./workflows/subworkflows/cmsearch/cmsearch-condition.cwl" - }, - { - "@id": "./workflows/subworkflows/cmsearch/cmsearch-multimodel-assembly.cwl" - }, - { - "@id": "./workflows/subworkflows/cmsearch/cmsearch-multimodel-raw-data.cwl" - } - ] - }, - { - "@id": "./workflows/subworkflows/cmsearch/cmsearch-condition.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/cmsearch/cmsearch-multimodel-assembly.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/cmsearch/cmsearch-multimodel-raw-data.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/functional-annotation/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./workflows/subworkflows/functional-annotation/functional-annotation.cwl" - }, - { - "@id": "./workflows/subworkflows/functional-annotation/post-proccessing-go-pfam-stats-subwf.cwl" - } - ] - }, - { - "@id": "./workflows/subworkflows/functional-annotation/functional-annotation.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/functional-annotation/post-proccessing-go-pfam-stats-subwf.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/hmmer/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./workflows/subworkflows/hmmer/db/" - }, - { - "@id": "./workflows/subworkflows/hmmer/assembly-SRR1620013_smallhmmscan_subwf" - }, - { - "@id": "./workflows/subworkflows/hmmer/assembly-SRR1620013_small_CDS.faa" - }, - { - "@id": "./workflows/subworkflows/hmmer/chunking-subwf-hmmscan.cwl" - }, - { - "@id": "./workflows/subworkflows/hmmer/chunking-subwf-hmmsearch.cwl" - }, - { - "@id": "./workflows/subworkflows/hmmer/hmmscan.yml" - } - ] - }, - { - "@id": "./workflows/subworkflows/hmmer/db/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./workflows/subworkflows/hmmer/db/test.hmm" - }, - { - "@id": "./workflows/subworkflows/hmmer/db/test.hmm.h3f" - }, - { - "@id": "./workflows/subworkflows/hmmer/db/test.hmm.h3i" - }, - { - "@id": "./workflows/subworkflows/hmmer/db/test.hmm.h3m" - }, - { - "@id": "./workflows/subworkflows/hmmer/db/test.hmm.h3p" - } - ] - }, - { - "@id": "./workflows/subworkflows/hmmer/db/test.hmm", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/hmmer/db/test.hmm.h3f", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/hmmer/db/test.hmm.h3i", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/hmmer/db/test.hmm.h3m", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/hmmer/db/test.hmm.h3p", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/hmmer/assembly-SRR1620013_smallhmmscan_subwf", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/hmmer/assembly-SRR1620013_small_CDS.faa", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/hmmer/chunking-subwf-hmmscan.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/hmmer/chunking-subwf-hmmsearch.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/hmmer/hmmscan.yml", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/raw_reads/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./workflows/subworkflows/raw_reads/cgc/" - }, - { - "@id": "./workflows/subworkflows/raw_reads/Func_ann_and_post_proccessing-subwf.cwl" - }, - { - "@id": "./workflows/subworkflows/raw_reads/mOTUs-workflow.cwl" - } - ] - }, - { - "@id": "./workflows/subworkflows/raw_reads/cgc/", - "@type": "Dataset", - "hasPart": [ - { - "@id": "./workflows/subworkflows/raw_reads/cgc/CGC-subwf.cwl" - }, - { - "@id": "./workflows/subworkflows/raw_reads/cgc/predict_proteins_reads.cwl" - } - ] - }, - { - "@id": "./workflows/subworkflows/raw_reads/cgc/CGC-subwf.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/raw_reads/cgc/predict_proteins_reads.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/raw_reads/Func_ann_and_post_proccessing-subwf.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/raw_reads/mOTUs-workflow.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/chunking-subwf-IPS.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/classify-otu-visualise.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/final_chunking.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/func_summaries.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/other_ncrnas.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/rna_prediction-sub-wf.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/seqprep-qc-cond-subwf.cwl", - "@type": "File" - }, - { - "@id": "./workflows/subworkflows/seqprep-subwf.cwl", - "@type": "File" - }, - { - "@id": "./workflows/gos_wf.cwl", - "@type": "File" - }, - { - "@id": "./config.yml", - "@type": "File" - }, - { - "@id": "./create_yml.py", - "@type": "File" - }, - { - "@id": "./dependencies.md", - "@type": "File" - }, - { - "@id": "./learning_cwl.md", - "@type": "File" - }, - { - "@id": "./LICENSE", - "@type": "File" - }, - { - "@id": "./README.md", - "@type": "File" - }, - { - "@id": "./run_wf.sh", - "@type": "File" - }, - { - "@id": "./slurm_run.sh", - "@type": "File" - } - ] -} \ No newline at end of file diff --git a/run_wf.sh b/run_wf.sh index abf35ed8..2e0082ff 100755 --- a/run_wf.sh +++ b/run_wf.sh @@ -1,5 +1,7 @@ #!/bin/bash +METAGOFLOW_VERSION="https://github.com/emo-bon/MetaGOflow/releases/tag/v1.0.0" + # default values # SCRIPT_PATH=$(realpath "$0") PIPELINE_DIR=$(dirname "${SCRIPT_PATH}") @@ -29,6 +31,7 @@ Script arguments. " } +# [TODO] Consider adding a -t argument to run using toil. while getopts :y:f:r:e:u:k:c:d:m:n:l:sph option; do case "${option}" in y) YML=${OPTARG} ;; @@ -82,10 +85,9 @@ _check_mandatory() { } _check_reads() { - #check forward and reverse reads both present - #check if single reads then no other readsgiven - - # BASH SYNTAX: + # check forward and reverse reads both present + # check if single reads then no other readsgiven + # BASH SYNTAX: # to check if a variable has value: # [ -z "$var" ] && echo "Empty" @@ -107,12 +109,12 @@ _check_mandatory "$NAME" "-n" _check_mandatory "$RUN_DIR" "-d" _check_reads "$FORWARD_READS" "$REVERSE_READS" - - # ----------------------------- environment & variables ----------------------------- # # load required environments and packages before running +export CWD=$(pwd) + export TOIL_SLURM_ARGS="--array=1-${LIMIT_QUEUE}%20" #schedule 100 jobs 20 running at one time export CWL="${PIPELINE_DIR}/workflows/gos_wf.cwl" @@ -125,13 +127,11 @@ export TMPDIR=${RUN_DIR}/tmp export OUT_DIR=${RUN_DIR} export LOG_DIR=${OUT_DIR}/log-dir/${NAME} export OUT_DIR_FINAL=${OUT_DIR}/results -export PROV_DIR=${OUT_DIR}/prov export CACHE_DIR=${OUT_DIR}/cache -mkdir -p "${OUT_DIR_FINAL}" "${TMPDIR}" "${PROV_DIR}" - #"${CACHE_DIR}" ${JOB_TOIL_FOLDER}" "${LOG_DIR}" +mkdir -p "${OUT_DIR_FINAL}" "${TMPDIR}" -export RENAMED_YML_TMP=${RUN_DIR}/"${NAME}"_temp.yml -export RENAMED_YML=${RUN_DIR}/"${NAME}".yml +export EXTENDED_CONFIG_YAML_TMP=${RUN_DIR}/"${NAME}"_temp.yml +export EXTENDED_CONFIG_YAML=${RUN_DIR}/"${NAME}".yml # Get study id in case of ENA fetch tool if [[ $ENA_RUN_ID != "" ]]; @@ -158,18 +158,16 @@ then export PATH_ENA_RAW_DATA=${PIPELINE_DIR}/${OUT_DIR}/raw_data_from_ENA/${ENA_STUDY_ID}/raw/ - fi - # ----------------------------- prepare yml file ----------------------------- # echo "Writing yaml file" # DO NOT leave spaces after "\" in the end of a line -python3 create_yml.py \ +python utils/create_yml.py \ -y "${YML}" \ - -o "${RENAMED_YML_TMP}" \ + -o "${EXTENDED_CONFIG_YAML_TMP}" \ -l "${PATH_ENA_RAW_DATA}" \ -f "${PIPELINE_DIR}/${FORWARD_READS}" \ -r "${PIPELINE_DIR}/${REVERSE_READS}" \ @@ -178,10 +176,10 @@ python3 create_yml.py \ -e "${ENA_RUN_ID}" mv eosc-wf.yml ${RUN_DIR}/ -cat ${RUN_DIR}/eosc-wf.yml ${RENAMED_YML_TMP} > ${RENAMED_YML} -rm ${RENAMED_YML_TMP} +cat ${RUN_DIR}/eosc-wf.yml ${EXTENDED_CONFIG_YAML_TMP} > ${EXTENDED_CONFIG_YAML} +rm ${EXTENDED_CONFIG_YAML_TMP} rm ${RUN_DIR}/eosc-wf.yml - +cp config.yml ${RUN_DIR}/ # ----------------------------- running pipeline ----------------------------- # @@ -192,7 +190,6 @@ TOIL_PARAMS+=( --preserve-entire-environment --batchSystem slurm --disableChaining - # --provenance "${PROV_DIR}" --disableCaching --logFile "${LOG_DIR}/${NAME}.log" --jobStore "${JOB_TOIL_FOLDER}/${NAME}" @@ -203,24 +200,64 @@ TOIL_PARAMS+=( --retryCount 2 --logDebug "$CWL" - "$RENAMED_YML" + "$EXTENDED_CONFIG_YAML" ) -# Toil parameters documentation +# Toil parameters documentation - just for your information # --disableChaining Disables chaining of jobs (chaining uses one job's resource allocation for its successor job if possible). # --preserve-entire-environment Need to propagate the env vars for Singularity, etc., into the HPC jobs # --disableProgress Disables the progress bar shown when standard error is a terminal. # --retryCount Number of times to retry a failing job before giving up and labeling job failed. default=1 # --disableCaching Disables caching in the file store. This flag must be set to use a batch system that does not support caching such as Grid Engine, Parasol, LSF, or Slurm. - -# # COMMENT IN TO RUN THE TOIL VERSION +# COMMENT IN TO RUN THE TOIL VERSION and MUTE the cwltool case in line 222. # echo "toil-cwl-runner" "${TOIL_PARAMS[@]}" # toil-cwl-runner "${TOIL_PARAMS[@]}" +# -------------------------------------------- + +# Run the metaGOflow workflow using cwltool +cwltool --parallel ${SINGULARITY} --outdir ${OUT_DIR_FINAL} ${CWL} ${EXTENDED_CONFIG_YAML} + +# -------------------------------------------- + +# Edit output structure +rm -rf ${TMPDIR} -cwltool --debug ${SINGULARITY} --provenance ${PROV_DIR} --outdir ${OUT_DIR_FINAL} ${CWL} ${RENAMED_YML} +cd ${OUT_DIR}/results/functional-annotation/ + +count=`ls -1 *.chunks 2>/dev/null | wc -l` +if [ $count != 0 ] +then + rm *.chunks +fi + +count=`ls -1 *CDS.I5_001.tsv.gz 2>/dev/null | wc -l` +if [ $count != 0 ] +then + + fullfile=*.merged.CDS.I5_001.tsv.gz + prefix=$(echo $fullfile | sed 's/[^_]*$//') + prefix=${prefix::-1} + + ls *.merged.CDS.I5_*.tsv.gz | xargs -I {} cat {} > allfiles.gz + ls *.merged.CDS.I5_*.tsv.gz | xargs -I {} rm {} + mv allfiles.gz ${prefix}".tsv.gz" +fi + +cd ${CWD} + + +# -------------------------------------------- + +# Build RO-crate +rocrate init -c ${RUN_DIR} + +if [ -z "$ENA_RUN_ID" ]; then + ENA_RUN_ID="None" +fi +python utils/edit-ro-crate.py ${OUT_DIR} ${EXTENDED_CONFIG_YAML} ${ENA_RUN_ID} ${METAGOFLOW_VERSION} -# --cachedir ${CACHE_DIR} -# --leave-tmpdir --leave-outputs +# -------------------------------------------- +rm -r ${OUT_DIR} diff --git a/slurm_run.sh b/slurm_run.sh index ce4e2603..c29387fd 100755 --- a/slurm_run.sh +++ b/slurm_run.sh @@ -8,18 +8,33 @@ #SBATCH --mail-user=haris.zafr@gmail.com #SBATCH --mail-type=ALL #SBATCH --requeue -#SBATCH --job-name="taxInv" -#SBATCH --output=tax_invent_fat_water.output +#SBATCH --job-name="rocr-tax" +#SBATCH --output=rocrates_tax.output + +# Deactivate conda if already there +conda deactivate # Load module module load python/3.7.8 module load singularity/3.7.1 -# Run the wf -#./run_wf.sh -f test_input/wgs-paired-SRR1620013_1.fastq.gz -r test_input/wgs-paired-SRR1620013_2.fastq.gz -n wgs-SRR1620013 -d MOTUS -s -./run_wf.sh -f test_input/DBB_AABVOSDA_1_1_HMNJKDSX3.UDI256_clean.fastq.gz -r test_input/DBB_AABVOSDA_1_2_HMNJKDSX3.UDI256_clean.fastq.gz -n water-sample-DBB_AABVOSDA_1_2_HMNJKDSX3.UDI256 -d WATER_SAMPLE_TAX_FAT -s +# Run the wf with mini dataset +./run_wf.sh -f test_input/wgs-paired-SRR1620013_1.fastq.gz -r test_input/wgs-paired-SRR1620013_2.fastq.gz -n mini_dataset -d MINI_DATASET -s + +# Run the wf with short dataset +# ./run_wf.sh -f test_input/test_1_fwd_HWLTKDRXY_600000.fastq.gz -r test_input/test_2_rev_HWLTKDRXY_600000.fastq.gz -n dev_dataset -d DEV_DATASET -s + +# To run the manuscript use cases: +# marine sediment +# ./run_wf.sh -f test_input/DBH_AAAIOSDA_1_1_HMNJKDSX3.UDI224_clean.fastq.gz -r test_input/DBH_AAAIOSDA_1_2_HMNJKDSX3.UDI224_clean.fastq.gz -n DBH_dataset -d marine_sediment_dbh -s + +# column water +# ./run_wf.sh -f test_input/DBB_AABVOSDA_1_1_HMNJKDSX3.UDI256_clean.fastq.gz -r test_input/DBB_AABVOSDA_1_2_HMNJKDSX3.UDI256_clean.fastq.gz -n DBB_dataset -d water_column_dbb -s + +# To run an ENA run # ./run_wf.sh -e ERR855786 -d TEST_SIMPLIFIED_PFAM -n ERR855786 -s + # Disable the module module purge diff --git a/software_versions.tsv b/software_versions.tsv new file mode 100644 index 00000000..93e780ae --- /dev/null +++ b/software_versions.tsv @@ -0,0 +1,23 @@ +resource type description version step in metaGOflow official_url Docker pull command +fetch_tool software Allows to download data from ENA 1 - https://github.com/EBI-Metagenomics/fetch_tool hariszaf/fetch-tool:latest +debian_slim OS Debian is a Linux distribution that's composed entirely of free and open-source software 11.5 taxonomy inventory https://www.debian.org/releases/ debian:stable-slim +fastp software 0.20.0 quality control https://github.com/OpenGene/fastp microbiomeinformatics/pipeline-v5.fastp:0.20.0 +easel software computational analysis of biological sequences using probabilistic models. Easel is used by HMMER, the profile hidden Markov model software that underlies several protein and DNA sequence family databases such as Pfam, and by Infernal, 0.45h taxonomy inventory https://github.com/EddyRivasLab/easel microbiomeinformatics/pipeline-v5.easel:v0.45h +python3 programming language a high-level, general-purpose programming language 3.3 - https://www.python.org microbiomeinformatics/pipeline-v5.python3:v3.1 +python2 programming language a high-level, general-purpose programming language 2.7.10 - https://www.python.org microbiomeinformatics/pipeline-v5.python2:v1 +bash-scripts script set of stand-alone scripts for several tasks 1.3 taxonomy inventory https://hub.docker.com/r/microbiomeinformatics/pipeline-v5.bash-scripts microbiomeinformatics/pipeline-v5.bash-scripts:v1.3 +cmsearch software earch CM(s) against a nucleotide sequence database, using profile HMM(s) to prefilter the database. 1.1.2 taxonomy inventory https://github.com/EddyRivasLab/infernal microbiomeinformatics/pipeline-v5.cmsearch:v1.1.2 +cmsearch-deoverlap software convert cmsearch output 0.02 taxonomy inventory https://github.com/nawrockie/cmsearch_tblout_deoverlap microbiomeinformatics/pipeline-v5.cmsearch-deoverlap:v0.02 +alpine OS Alpine Linux is a security-oriented, lightweight Linux distribution based on musl libc and busybox. 3.7 taxonomy inventory, CGC https://www.alpinelinux.org alpine:3.7 +mapseq software a set of fast and accurate sequence read classification tools designed to assign taxonomy and OTU classifications to ribosomal RNA sequences. 1.2.3 taxonomy inventory https://github.com/jfmrod/MAPseq microbiomeinformatics/pipeline-v5.mapseq:v1.2.3 +mapseq2biom software convert mapseq output 1 taxonomy inventory https://github.com/EBI-Metagenomics/pipeline-v5/blob/develop/tools/RNA_prediction/mapseq2biom/mapseq2biom.pl microbiomeinformatics/pipeline-v5.mapseq2biom:v1.0 +KronaTools software Krona allows hierarchical data to be explored with zooming, multi-layered pie charts. 2.7.1 taxonomy inventory https://github.com/marbl/Krona microbiomeinformatics/pipeline-v5.krona:v2.7.1 +biom convert software Biom convert wrapper 2.1.6 taxonomy inventory https://pypi.org/project/biom-format/ microbiomeinformatics/pipeline-v5.biom-convert:v2.1.6 +FragGeneScan software FragGeneScan is an application for finding (fragmented) genes in short reads. 1.31 CGC https://github.com/EBI-Metagenomics/fraggenescan hariszaf/pipeline-v5.fraggenescan:v1.31.1 +Combined Gene Caller software script from the MGnify group for post-processing of FGS and Prodigal 1.0.1 CGC https://github.com/EBI-Metagenomics/pipeline-v5/tree/pipeline_5.1/tools/Combined_gene_caller microbiomeinformatics/pipeline-v5.protein-post-processing:v1.0.1 +eggNOG-mapper software EggNOG-mapper is a tool for fast functional annotation of novel sequences. It uses precomputed orthologous groups and phylogenies from the eggNOG database ( http://eggnog5.embl.de ) to transfer functional information from fine-grained orthologs only. 2.18 functional annotation https://github.com/eggnogdb/eggnog-mapper/ hariszaf/pipeline-v5.eggnog:v2.1.8 +interproscan database InterPro is a database which integrates together predictive information about proteins’ function from a number of partner resources, giving an overview of the families that a protein belongs to and the domains and sites it contains. 5.57-90.0 functional annotation https://github.com/ebi-pf-team/interproscan hariszaf/pipeline-v5.interproscan:v5.57-90.0 +hmmer software HMMER searches biological sequence databases for homologous sequences, using either single sequences or multiple sequence alignments as queries. HMMER implements a technology called "profile hidden Markov models" (profile HMMs). 3.2.1 functional annotation https://github.com/EddyRivasLab/hmmer microbiomeinformatics/pipeline-v5.hmmer:v3.2.1 +megahit software MEGAHIT is an ultra-fast and memory-efficient NGS assembler. It is optimized for metagenomes, but also works well on generic single genome assembly (small or mammalian size) and single-cell assembly. 1.2.9 assembly https://github.com/voutcn/megahit quay.io/biocontainers/megahit:1.2.9--h2e03b76_1 +mOTUs software The mOTU profiler is a computational tool that estimates relative taxonomic abundance of known and currently unknown microbial community members using metagenomic shotgun sequencing data. 2.5.1 taxonomy inventory https://github.com/motu-tool/mOTUs microbiomeinformatics/pipeline-v5.motus:v2.5.1 +GO-slim script Format IPS output 1.0.0 functional annotation https://github.com/EBI-Metagenomics/pipeline-v5/blob/master/tools/GO-slim/go_summary_pipeline-1.0.py microbiomeinformatics/pipeline-v5.go-summary:v1.0 \ No newline at end of file diff --git a/tools/qc-filtering/qc-filtering.cwl b/tools/qc-filtering/qc-filtering.cwl index 6dd1a63d..d875ee33 100644 --- a/tools/qc-filtering/qc-filtering.cwl +++ b/tools/qc-filtering/qc-filtering.cwl @@ -30,12 +30,12 @@ inputs: doc: > Number of originally submitted sequences as in the user submitted FASTQ file - single end FASTQ or pair end merged FASTQ file. - stats_file_name: - type: string - default: stats_summary - label: 'Post QC stats output file name' - doc: > - Give a name for the file which will hold the stats after QC. + # stats_file_name: + # type: string + # default: stats_summary + # label: 'Post QC stats output file name' + # doc: > + # Give a name for the file which will hold the stats after QC. min_length: type: int default: 100 # For assemblies we need to set this in the input YAML to 500 @@ -56,13 +56,13 @@ outputs: label: Stats summary output file type: File outputBinding: - glob: $(inputs.stats_file_name) + glob: $(inputs.seq_file.nameroot).qc_summary arguments: - position: 2 valueFrom: $(inputs.seq_file.nameroot).fasta - position: 3 - valueFrom: $(inputs.stats_file_name) + valueFrom: $(inputs.seq_file.nameroot).qc_summary - position: 4 valueFrom: $(inputs.submitted_seq_count) - position: 5 diff --git a/create_yml.py b/utils/create_yml.py similarity index 90% rename from create_yml.py rename to utils/create_yml.py index 52ce221e..f0939742 100755 --- a/create_yml.py +++ b/utils/create_yml.py @@ -5,7 +5,6 @@ import os import yaml as yml - db_fields = [ "ssu_db", "lsu_db", @@ -90,13 +89,6 @@ def db_dir(db_path, tools_path, yaml_path): help="The accession number in ENA of the run to be analysed", required=False ) - # parser.add_argument( - # "-s", - # "--study_accession_number", - # dest="study_accession_number", - # help="The accession number in ENA of the study of the run", - # required=False - # ) parser.add_argument( "-p", "--private", @@ -126,10 +118,6 @@ def db_dir(db_path, tools_path, yaml_path): # load template yml file and append database path template_yml = db_dir(args.db_dir, args.tools_dir, args.yml) - # paired_reads = [args.fr.split("/")[-1].split(".fastq.gz")[0], args.rr.split("/")[-1].split(".fastq.gz")[0]] - # paired_reads_names = '"' + paired_reads[0] + '", "' + paired_reads[1] + '"' - - # Building the .yml file with open(args.output, "w") as output_yml: @@ -137,7 +125,9 @@ def db_dir(db_path, tools_path, yaml_path): yaml.dump(template_yml, output_yml) - with open("config.yml", "r") as config_yml: + cwd = os.getcwd() + config_file = os.path.join(cwd, "config.yml") + with open(config_file, "r") as config_yml: config = yml.safe_load(config_yml) diff --git a/utils/edit-ro-crate.py b/utils/edit-ro-crate.py new file mode 100755 index 00000000..832e680c --- /dev/null +++ b/utils/edit-ro-crate.py @@ -0,0 +1,470 @@ +#! /usr/bin/env python3 + +import sys +import argparse +import textwrap +from rocrate.rocrate import ROCrate +from rocrate.model.person import Person +import datetime + +ena_accession_raw_data= "Raw sequence data and laboratory sequence generation metadata", + +descriptions = [ + { + "@id": "sequence-categorisation/", + "@type": "Dataset", + "name": "Sequence categorisation", + "description": "Identify specific loci in the sample." + }, + { "@id": "functional_annotation/", + "@type": "Dataset", + "name": "Functional annotation results", + "description": "Functional annotation of merged reads"}, + { + "@id": "config.yml", + "@type": "File", + "name": "MetaGOflow configuration file", + "description": "The configuration file through which the user sets the values of the metaGOflow parameters.", + "encodingFormat": "text/yaml" + }, + { + "@id": "fastp.html", + "@type": "File", + "name": "FASTP analysis of raw sequence data", + "description": "Quality control and preprocessing of FASTQ files", + "encodingFormat": "text/html" + }, + { + "@id": "final.contigs.fa", + "@type": "File", + "name": "FASTA formatted contig sequences", + "description": "These are the assembled contig sequences from the merged reads in FASTA format", + "encodingFormat": "text/plain" + }, + { + "@id": "go.stats", + "@type": "File", + "name": "Geno Ontology summary statistics", + "encodingFormat": "text/plain" + }, + { + "@id": "interproscan.stats", + "@type": "File", + "name": "InterProScan summary statistics", + "encodingFormat": "text/plain" + }, + { + "@id": "ko.stats", + "@type": "File", + "name": "Kegg Ontology summary statistics", + "encodingFormat": "text/plain" + }, + { + "@id": "orf.stats", + "@type": "File", + "name": "ORF summary statistics", + "encodingFormat": "text/plain" + }, + { + "@id": "pfam.stats", + "@type": "File", + "name": "Pfam summary statistcs", + "encodingFormat": "text/plain" + }, + { + "@id": ".merged_CDS.I5.tsv.gz", + "@type": "File", + "name": "Merged contigs CDS I5 summary", + "encodingFormat": "application/zip" + }, + { + "@id": ".merged.hmm.tsv.gz", + "@type": "File", + "name": "Merged contigs HMM summary", + "encodingFormat": "application/zip" + }, + { + "@id": ".merged.summary.go", + "@type": "File", + "name": "Merged contigs GO summary", + "encodingFormat": "text/plain" + }, + { + "@id": ".merged.summary.go_slim", + "@type": "File", + "name": "Merged contigs InterProScan slim", + "encodingFormat": "text/plain" + }, + { + "@id": ".merged.summary.ips", + "@type": "File", + "name": "Merged contigs InterProScan", + "encodingFormat": "text/plain" + }, + { + "@id": ".merged.summary.ko", + "@type": "File", + "name": "Merged contigs KO summary", + "encodingFormat": "text/plain" + }, + { + "@id": ".merged.summary.pfam", + "@type": "File", + "name": "Merged contigs PFAM summary", + "encodingFormat": "text/plain" + }, + { + "@id": "RNA-counts", + "@type": "File", + "name": "Numbers of RNA's counted", + "encodingFormat": "text/plain" + }, + { + "@id": "krona.html", + "@type": "File", + "name": "Krona summary of LSU taxonomic inventory", + "encodingFormat": "application/html" + }, + { + "@id": ".merged_LSU.fasta.mseq.gz", + "@type": "File", + "name": "LSU sequences used for indentification", + "encodingFormat": "application/zip" + }, + { + "@id": ".merged_LSU.fasta.mseq_hdf5.biom", + "@type": "File", + "name": "BIOM formatted hdf5 taxon counts for LSU sequences", + "encodingFormat": "application/json-ld" + }, + { + "@id": ".merged_LSU.fasta.mseq_json.biom", + "@type": "File", + "name": "BIOM formatted taxon counts for LSU sequences", + "encodingFormat": "application/json-ld" + }, + { + "@id": ".merged_LSU.fasta.mseq.tsv", + "@type": "File", + "name": "Tab-separated formatted taxon counts for LSU sequences", + "encodingFormat": "application/json-ld" + }, + { + "@id": ".merged_LSU.fasta.mseq.txt", + "@type": "File", + "name": "Text-based taxon counts for LSU sequences", + "encodingFormat": "application/json-ld" + }, + { + "@id": "krona.html", + "@type": "File", + "name": "Krona summary of SSU taxonomic inventory", + "encodingFormat": "text/html" + }, + { + "@id": ".merged_SSU.fasta.mseq.gz", + "@type": "File", + "name": "LSU sequences used for indentification", + "encodingFormat": "application/zip" + }, + { + "@id": ".merged_SSU.fasta.mseq_hdf5.biom", + "@type": "File", + "name": "BIOM formatted hdf5 taxon counts for SSU sequences", + "encodingFormat": "application/json-ld" + }, + { + "@id": ".merged_SSU.fasta.mseq_json.biom", + "@type": "File", + "name": "BIOM formatted taxon counts for SSU sequences", + "encodingFormat": "application/json-ld" + }, + { + "@id": ".merged_SSU.fasta.mseq.tsv", + "@type": "File", + "name": "Tab-separated formatted taxon counts for SSU sequences", + "encodingFormat": "application/json-ld" + }, + { + "@id": ".merged_SSU.fasta.mseq.txt", + "@type": "File", + "name": "Text-based formatted taxon counts for SSU sequences", + "encodingFormat": "application/json-ld" + }, + { + "@id": ".all.tblout.deoverlapped", + "@type": "File", + "name": "Sequence hits against covariance model databases. Mandatory to run partially the functional annotation step of metaGOflow.", + "encodingFormat": "text/plain" + }, + { + "@id": ".merged.fasta", + "@type": "File", + "name": "Merged filtered reads.", + "encodingFormat": "text/plain" + }, + { + "@id": ".fastq.trimmed.fasta", + "@type": "File", + "name": "Filtered .fastq file of the single-end reads (forward/reverse).", + "encodingFormat": "text/plain" + }, + { + "@id": ".fastq.trimmed.qc_summary", + "@type": "File", + "name": "Summary with statistics of the single-end reads (forward/reverse).", + "encodingFormat": "text/plain" + }, + { + "@id": ".all.tblout.deoverlapped", + "@type": "File", + "name": "Sequence hits against covariance model databases", + "encodingFormat": "text/plain" + }, + { + "@id": ".merged.unfiltered_fasta", + "@type": "File", + "name": "", + "encodingFormat": "text/plain" + }, + { + "@id": ".motus.tsv", + "@type": "File", + "name": "", + "encodingFormat": "text/plain" + }, + { + "@id": ".merged.qc_summary", + "@type": "File", + "name": "Summary with statistics of the merged reads.", + "encodingFormat": "text/plain" + }, + { + "@id": "SSU.fasta.gz", + "@type": "File", + "name": "SSU sequences.", + "encodingFormat": "application/zip" + }, + { + "@id": "SSU_rRNA_archaea.RF01959.fa.gz", + "@type": "File", + "name": "SSU sequences mapping to Archaea.", + "encodingFormat": "application/zip" + }, + { + "@id": "SSU_rRNA_bacteria.RF00177.fa.gz", + "@type": "File", + "name": "SSU sequences mapping to Bacteria.", + "encodingFormat": "application/zip" + }, + { + "@id": "SSU_rRNA_eukarya.RF01960.fa.gz", + "@type": "File", + "name": "SSU sequences mapping to Eukaryotes.", + "encodingFormat": "application/zip" + }, + { + "@id": "tmRNA.RF00023.fasta.gz", + "@type": "File", + "name": "Sequences mapping to transfer-messenger RNAs", + "encodingFormat": "application/zip" + }, + { + "@id": "tRNA.RF00005.fasta.gz", + "@type": "File", + "name": "Sequences mapping to tranfer RNAs", + "encodingFormat": "application/zip" + }, + { + "@id": "tRNA-Sec.RF01852.fasta.gz", + "@type": "File", + "name": "Sequences mapping to selenocysteine tRNAs.", + "encodingFormat": "application/zip" + }, + { + "@id": "LSU.fasta.gz", + "@type": "File", + "name": "LSU sequences.", + "encodingFormat": "application/zip" + }, + { + "@id": "LSU_rRNA_archaea.RF02540.fa.gz", + "@type": "File", + "name": "LSU sequences mapping to Archaea.", + "encodingFormat": "application/zip" + }, + { + "@id": "LSU_rRNA_bacteria.RF02541.fa.gz", + "@type": "File", + "name": "LSU sequences mapping to Bacteria.", + "encodingFormat": "application/zip" + }, + { + "@id": "LSU_rRNA_eukarya.RF02543.fa.gz", + "@type": "File", + "name": "LSU sequences mapping to Eukaryotes.", + "encodingFormat": "application/zip" + }, + { + "@id": "RNaseP_bact_a.RF00010.fasta.gz", + "@type": "File", + "name": "Sequences mapping to ribonucleus P bacterial sequences.", + "encodingFormat": "application/zip" + }, + { + "@id": "5_8S.fa.gz", + "@type": "File", + "name": "Sequences mapping to ribonucleus 5_8S rRNA gene.", + "encodingFormat": "application/zip" + }, + { + "@id": "alpha_tmRNA.RF01849.fasta.gz", + "@type": "File", + "name": "Sequences mapping to alpha transfer-messenger RNA.", + "encodingFormat": "application/zip" + }, + { + "@id": "Bacteria_large_SRP.RF01854.fasta.gz", + "@type": "File", + "name": "Sequences mapping to bacterial large signal recognition particle RNAs.", + "encodingFormat": "application/zip" + }, + { + "@id": "Bacteria_small_SRP.RF00169.fasta.gz", + "@type": "File", + "name": "Sequences mapping to bacterial small signal recognition particle RNAs.", + "encodingFormat": "application/zip" + }, + { + "@id": ".merged_CDS.faa", + "@type": "File", + "name": "Coding sequences with amino acids.", + "encodingFormat": "text/plain" + }, + { + "@id": ".merged_CDS.ffn", + "@type": "File", + "name": "Coding sequences with nucleotides.", + "encodingFormat": "text/plain" + } +] + +def main(target_directory, extended_config_yaml, ena_run_accession_id, metagoflow_version ): + """ + Edit the output of the `rocrate init` tool. + Map files descriptions ; should be done through the cwl descriptions but we go that way at a later point. + Add extra ids on the ro-crate describing the metaGOflow workflow and the ENA data (if used). + """ + + crate = ROCrate(target_directory) # here we use a complete directory, could we use just the json..? + + for entry in crate.get_entities(): + for description in descriptions: + try: + pk = description["@id"] in entry.id + except: + pass + finally: + if pk : + for k,v in description.items(): + if k not in entry.properties().keys(): + entry.properties()[k] = v + + metagoflow_id = "workflow/metaGOflow" + mg_license_id = "https://www.apache.org/licenses/LICENSE-2.0" + embrc_id = "https://ror.org/0038zss60" + mail_id = "mailto:help@embrc.org" + metagoflow_product_license_id = "https://creativecommons.org/licenses/by/4.0/legalcode" + + + mg_license = crate.add(Person(crate, mg_license_id, properties={ + "@type": "CreativeWork", + "name": "Apache License 2.0", + "identifier": "https://spdx.org/licenses/Apache-2.0.html" + })) + + metagoflow_product_license = crate.add(Person(crate,metagoflow_product_license_id, properties={ + "@type": "CreativeWork", + "name": "Creative Commons (CC-BY 4.0)", + "identifier": "https://spdx.org/licenses/CC-BY-4.0.html" + })) + + embrc_mail = crate.add(Person(crate, mail_id, properties={ + "@type": "ContactPoint", + "contactType": "Help Desk", + "email": "help@embrc.org", + "identifier": "help@embrc.org", + "url": "https://www.embrc.eu/about-us/contact-us" + })) + + + embrc = crate.add(Person(crate, embrc_id, properties={ + "@type": "Organization", + "name": "European Marine Biological Resource Centre", + "url": embrc_id, + "contactPoint": {"@id": mail_id} + })) + + + metagoflow = crate.add(Person(crate, metagoflow_id, properties={ + "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + "name": "metaGOflow", + "affiliation": "University of Flatland", + "author": {"@id": "EMO BON"}, + "url": metagoflow_version, + "license": { "@id": mg_license_id}, + "hasPart": [ + {"@id": "config.yml"}, + {"@id": extended_config_yaml} + ] + })) + + + if ena_run_accession_id != "None": + + ena_id = crate.add(Person(crate, ena_run_accession_id, properties={ + + "@id": ena_accession_raw_data, + "@type": "File", + "name": "ENA accession for run raw sequence data", + "description": "Link to the ENA entry of the raw data used for this analysis.", + "url": "https://www.ebi.ac.uk/ena/browser/view/" + ena_run_accession_id, + "encodingFormat": "text/xml" + })) + + crate.root_dataset.properties()["name"] = "MetaGoFlow Results" + # crate.root_dataset.properties()["license"] = {"@id": metagoflow_product_license_id} + crate.root_dataset.properties()["publisher"] = {"@id": embrc_id} + + print("export...") + + crate.write_zip("".join([target_directory,".zip"])) + + print("..ro-crate as .zip ready.") + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter + # description=textwrap.dedent(desc), + ) + parser.add_argument("target_directory", + help="Name of target directory containing MetaGOflow" +\ + "output" + ) + parser.add_argument("extended_config_yaml", + help="The extened YAML file metaGOflow built and used based on user's config" +\ + "output" + ) + parser.add_argument("ena_run_accession_id", + help="Run accession id in ENA." + ) + parser.add_argument("metagoflow_version", + help="URL pointing to the metaGOflow version used" + ) + + args = parser.parse_args() + + # Run main function + main(args.target_directory, args.extended_config_yaml, args.ena_run_accession_id, args.metagoflow_version) diff --git a/workflows/conditionals/qc.cwl b/workflows/conditionals/qc.cwl index ce205600..a0707d54 100644 --- a/workflows/conditionals/qc.cwl +++ b/workflows/conditionals/qc.cwl @@ -48,8 +48,12 @@ outputs: - m_convert_trimmed_reads_to_fasta/fasta pickValue: first_non_null + m_qc_summary: + type: File + outputSource: m_length_filter/stats_summary_file + # outputs for each of the 2 trimmed seq file - fastp_filtering_json: + fastp_filtering_html: type: File? outputSource: fastp_trim_and_overlap/html_report @@ -154,7 +158,6 @@ steps: in: seq_file: m_convert_trimmed_reads_to_fasta/fasta submitted_seq_count: count_submitted_reads/count - stats_file_name: {default: 'qc_summary'} min_length: min_length_required input_file_format: { default: 'fasta' } out: [ filtered_file, stats_summary_file ] @@ -180,7 +183,7 @@ steps: QCed_reads: m_length_filter/filtered_file sequence_count: m_count_processed_reads/count out_dir_name: qc_stats_folder_for_merged - out: [ output_dir ] + out: [ output_dir, summary_out ] clean_merged_fasta_headers: @@ -189,8 +192,6 @@ steps: sequences: fastp_trim_and_overlap/merged_fastq out: [ sequences_with_cleaned_headers ] - - # WITH REPSECT TO EACH OF THE 2 TRIMMED SEQ FILES # ------------------------------------------------- @@ -217,12 +218,10 @@ steps: in: seq_file: convert_trimmed_reads_to_fasta/fasta submitted_seq_count: count_submitted_reads/count - stats_file_name: {default: 'qc_summary' } min_length: min_length_required input_file_format: { default: 'fasta' } out: [ filtered_file, stats_summary_file ] - count_processed_reads: run: ../../utils/count_fasta.cwl scatter: sequences diff --git a/workflows/conditionals/rna-prediction.cwl b/workflows/conditionals/rna-prediction.cwl index a2b5de54..9f327ee2 100644 --- a/workflows/conditionals/rna-prediction.cwl +++ b/workflows/conditionals/rna-prediction.cwl @@ -134,7 +134,7 @@ steps: in: file_list: source: - - tax_chunking/chunked_by_size_files + # - tax_chunking/chunked_by_size_files - rna_prediction/compressed_rnas - other_ncrnas/ncrnas linkMerge: merge_flattened diff --git a/workflows/gos_wf.cwl b/workflows/gos_wf.cwl index 58db8f3d..7e187120 100644 --- a/workflows/gos_wf.cwl +++ b/workflows/gos_wf.cwl @@ -67,16 +67,12 @@ inputs: doc: | metaGOflow will perform the functional annotation step - assemble: type: boolean default: false doc: | metaGOflow will perform the assembly step using MEGAHIT - # # Files to run partially the wf - # ncrna_tab_file: {type: File?} - # Pre-process overrepresentation_analysis: type: boolean @@ -118,7 +114,6 @@ inputs: doc: | fastp parameter setting the percentage of bases allowed to be unqualified (0~100). Default 40 means 40%. - min_length_required: type: int default: 100 @@ -302,13 +297,14 @@ steps: # Merged sequence file - m_qc_stats - m_filtered_fasta + - m_qc_summary # Trimmed PE files - qc-statistics - qc_summary - qc-status - input_files_hashsum_paired - - fastp_filtering_json + - fastp_filtering_html - filtered_fasta - motus_input @@ -513,26 +509,18 @@ steps: - options outputs: - # QC FOR RNA PREDICTION + + # QUALITY CONTROL # --------------------- - qc-statistics: - type: Directory[]? - outputSource: qc_and_merge/qc-statistics - pickValue: all_non_null qc_summary: type: File[]? outputSource: qc_and_merge/qc_summary pickValue: all_non_null - hashsum_paired: - type: File[]? - outputSource: qc_and_merge/input_files_hashsum_paired - pickValue: all_non_null - - fastp_filtering_json_report: + fastp_filtering_html_report: type: File? - outputSource: qc_and_merge/fastp_filtering_json + outputSource: qc_and_merge/fastp_filtering_html pickValue: all_non_null m_filtered_fasta: # this is the filtered merged seq file @@ -544,9 +532,9 @@ outputs: outputSource: qc_and_merge/filtered_fasta pickValue: all_non_null - m_qc_stats: - type: Directory? - outputSource: qc_and_merge/m_qc_stats + m_qc_summary: + type: File? + outputSource: qc_and_merge/m_qc_summary motus_input: type: File? @@ -554,12 +542,14 @@ outputs: # mOTUs STEP # ---------------------- + motus: type: File outputSource: motus_taxonomy/motus # RNA PREDICTION STEP # ---------------------- + sequence-categorisation_folder: type: Directory? outputSource: rna_prediction/sequence_categorisation_folder @@ -623,4 +613,5 @@ $schemas: s:license: "https://www.apache.org/licenses/LICENSE-2.0" s:copyrightHolder: "European Marine Biological Resource Centre" -s:author: "Haris Zafeiropoulos" +s:author: "EMO BON team" +s:version: "v1.0.0"