epi2me-labs
diff --git a/‎.gitlab-ci.yml‎
Lines changed: 1 addition & 5 deletions b/‎.gitlab-ci.yml‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 3 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 28 deletions b/‎README.md‎
Lines changed: 2 additions & 28 deletions
diff --git a/‎bin/workflow_glue/report.py‎
Lines changed: 10 additions & 17 deletions b/‎bin/workflow_glue/report.py‎
Lines changed: 10 additions & 17 deletions
diff --git a/‎bin/workflow_glue/run_isonclust2.py‎
Lines changed: 0 additions & 138 deletions b/‎bin/workflow_glue/run_isonclust2.py‎
Lines changed: 0 additions & 138 deletions
diff --git a/‎docs/header.md‎
Lines changed: 1 addition & 2 deletions b/‎docs/header.md‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎docs/intro.md‎
Lines changed: 0 additions & 10 deletions b/‎docs/intro.md‎
Lines changed: 0 additions & 10 deletions
diff --git a/‎docs/links.md‎
Lines changed: 1 addition & 4 deletions b/‎docs/links.md‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎docs/quickstart.md‎
Lines changed: 1 addition & 12 deletions b/‎docs/quickstart.md‎
Lines changed: 1 addition & 12 deletions
diff --git a/‎evaluation/tests.sh‎
Lines changed: 0 additions & 12 deletions b/‎evaluation/tests.sh‎
Lines changed: 0 additions & 12 deletions
@@ -45,7 +45,7 @@ docker-run:
             - MATRIX_NAME: [
                 "fusions", "differential_expression", "isoforms",
                 "only_differential_expression", "differential_expression_gff3",
-                "ncbi_gzip", "denovo", "ncbi_no_gene_id", "ensembl_with_versions",
+                "ncbi_gzip", "ncbi_no_gene_id", "ensembl_with_versions",
                 "differential_expression_mouse"
             ]
     rules:
@@ -60,10 +60,6 @@ docker-run:
               NF_WORKFLOW_OPTS: "--fastq  ERR6053095_chr20.fastq --transcriptome-source reference-guided \
                   --ref_genome chr20/hg38_chr20.fa --ref_annotation chr20/gencode.v22.annotation.chr20.gtf"
               NF_IGNORE_PROCESSES: preprocess_reads,merge_transcriptomes,decompress_annotation,decompress_ref,decompress_transcriptome,preprocess_ref_transcriptome
-        - if: $MATRIX_NAME == "denovo"
-          variables:
-              NF_WORKFLOW_OPTS: "--fastq test_data/fastq/SIRV_E0_PCS109_50.fq.gz --transcriptome_source denovo"
-              NF_IGNORE_PROCESSES: preprocess_reads,merge_transcriptomes,decompress_annotation,decompress_ref,build_minimap_index,decompress_transcriptome,preprocess_ref_transcriptome
         - if: $MATRIX_NAME == "fusions"
           variables:
               NF_BEFORE_SCRIPT: wget -O test_data.tar.gz https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-isoforms/wf-isoforms_test_data.tar.gz && tar -xzvf  test_data.tar.gz
 
@@ -5,7 +5,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [unreleased]
+### Fixed
 - Remove dead links from README
+### Removed
+- Denovo `--transcriptome_source` option.`
 
 ## [v0.3.1]
 ### Added
 
@@ -5,8 +5,6 @@ for assembly and annotation of transcripts from Oxford Nanopore cDNA or direct R
 
 
 
-
-
 ## Introduction
 
 This workflow identifies RNA isoforms using either cDNA or direct RNA (dRNA) 
@@ -27,16 +25,6 @@ in long read mode (with or without a guide reference annotation) to generate the
 * The annotation generated by the pipeline is compared to the reference annotation. 
 using [gffcompare](http://ccb.jhu.edu/software/stringtie/gffcompare.shtml)
 
-#### de novo-based transcript assembly (experimental!)
-* Sequence clusters are generated using [isONclust2](https://github.com/nanoporetech/isONclust2)
-  * If a reference genome is supplied, cluster quality metrics are determined by comparing    
-  with clusters generated from a minimap2 alignment.
-* A consensus sequence for each cluster is generated using [spoa](https://github.com/rvaser/spoa)
-* Three rounds of polishing using racon and minimap2 to give a final polished CDS for each gene.
-* Full-length reads are then mapped to these polished CDS.
-* Transcripts are assembled by stringtie as for the reference-based approach.
-* __Note__: This approach is currently not supported with direct RNA reads.
-
 ### Fusion gene detection
 Fusion gene detection is performed using [JAFFA](https://github.com/Oshlack/JAFFA), with the JAFFAL extension for use 
 with ONT long reads. 
@@ -134,25 +122,14 @@ nextflow run epi2me-labs/wf-transcriptomes \
   --out_dir outdir -w workspace_dir
 ```
 
-**Example workflow for denovo transcript assembly**
-```
-OUTPUT=~/output
-nextflow run epi2me-labs/wf-transcriptomes \
-  --fastq test_data/fastq \
-  --transcriptome_source denovo \
-  --out_dir ${OUTPUT} \
-  -w ${OUTPUT}/workspace \
-  --sample sample_id
-```
 A full list of options can be seen in nextflow_schema.json. 
 Parameters can be specified either in a config like `parameter = value` or on the command line like `--parameter value`.
 Below are some commonly used parameters in the format used in config files.
 
 Select how the transcriptome used for analysis should be prepared:
 
 - To create a reference transcriptome using an existing reference genome `--transcriptome_source reference-guided` (default)
-- Use a a supplied transcriptome `--transcriptome_source precomputed"`
-- Gnerate transcriptome via the denovo pipeline `--transcriptome_source denovo"` 
+- Use a supplied transcriptome `--transcriptome_source precomputed"` 
 
 
 To run the workflow with direct RNA reads `--direct_rna true` (this just skips the pychopper step).
@@ -297,7 +274,4 @@ in `${out_dir}/jaffal_output_${sample_id}` you will find:
 * [nextflow](https://www.nextflow.io/)
 * [docker](https://www.docker.com/products/docker-desktop)
 * [Singularity](https://sylabs.io/singularity/)
-* [racon](https://github.com/isovic/racon)
-* [spoa](https://github.com/rvaser/spoa)
-* [inONclust](https://github.com/ksahlin/isONclust)
-* [isONclust2](https://github.com/nanoporetech/isONclust2)
+* [racon](https://github.com/isovic/racon)
@@ -74,7 +74,6 @@ def argparser():
     parser.add_argument(
         "--de_stats", required=False, type=str, default=None, nargs='*',
         help="Differential expression report optional")
-    parser.add_argument('--denovo', dest='denovo', action='store_true')
 
     return parser
 
@@ -699,21 +698,16 @@ def transcript_table(report, df_tmaps, max_rows):
     section.table(df, index=False)
 
 
-def transcriptome_summary(report, gffs, sample_ids, denovo=False):
+def transcriptome_summary(report, gffs, sample_ids):
     """
     Plot transcriptome summaries.
 
     Some of this data is available via gffcompare output, but the de novo
     pipeline skips that, so we do it all here.
 
-    We do not report exon number for the denovo assembly yet. This is because
-    in this case, the gff annotation is generated by aligning to the CDS not
-    the genome.
-
     :param report: aplanat WFReport
     :param gffs: list of paths to gff transcriptome annotations
     :param sample_ids: list of sample ids
-    :param denovo: whether annotation was generated by de novo pipeline or not
     """
     # test.db gets written to the git repo.
     section = report.add_section()
@@ -771,17 +765,16 @@ def transcriptome_summary(report, gffs, sample_ids, denovo=False):
             title='transcript lengths')
         plots.append(box)
 
-        if not denovo:
-            x, y = zip(*sorted(exons_per_transcript.items()))
+        x, y = zip(*sorted(exons_per_transcript.items()))
 
-            fig = figure(title="Exons per transcript")
-            fig.vbar(
-                x, top=list(y), color=Colors.cerulean)
-            fig.xaxis.axis_label = 'Num. exons'
-            fig.yaxis.axis_label = 'Num. genes'
+        fig = figure(title="Exons per transcript")
+        fig.vbar(
+            x, top=list(y), color=Colors.cerulean)
+        fig.xaxis.axis_label = 'Num. exons'
+        fig.yaxis.axis_label = 'Num. genes'
 
-            fig.xaxis.major_label_orientation = math.pi / 2.8
-            plots.append(fig)
+        fig.xaxis.major_label_orientation = math.pi / 2.8
+        plots.append(fig)
 
         df_sum = pd.DataFrame.from_dict(
             {'Total genes': [num_genes],
@@ -929,7 +922,7 @@ def main(args):
     # Results
     if args.gff_annotation is not None:
         transcriptome_summary(
-            report, args.gff_annotation, sample_ids, denovo=args.denovo)
+            report, args.gff_annotation, sample_ids)
 
     if args.gffcompare_dir is not None:
         df_tmaps = gff_compare_plots(
 
@@ -1,5 +1,4 @@
 # wf-transcriptomes
 
 This repository contains a [nextflow](https://www.nextflow.io/) workflow
-for assembly and annotation of transcripts from Oxford Nanopore cDNA or direct RNA reads.
-
+for assembly and annotation of transcripts from Oxford Nanopore cDNA or direct RNA reads.
@@ -18,16 +18,6 @@ in long read mode (with or without a guide reference annotation) to generate the
 * The annotation generated by the pipeline is compared to the reference annotation. 
 using [gffcompare](http://ccb.jhu.edu/software/stringtie/gffcompare.shtml)
 
-#### de novo-based transcript assembly (experimental!)
-* Sequence clusters are generated using [isONclust2](https://github.com/nanoporetech/isONclust2)
-  * If a reference genome is supplied, cluster quality metrics are determined by comparing    
-  with clusters generated from a minimap2 alignment.
-* A consensus sequence for each cluster is generated using [spoa](https://github.com/rvaser/spoa)
-* Three rounds of polishing using racon and minimap2 to give a final polished CDS for each gene.
-* Full-length reads are then mapped to these polished CDS.
-* Transcripts are assembled by stringtie as for the reference-based approach.
-* __Note__: This approach is currently not supported with direct RNA reads.
-
 ### Fusion gene detection
 Fusion gene detection is performed using [JAFFA](https://github.com/Oshlack/JAFFA), with the JAFFAL extension for use 
 with ONT long reads. 
 
@@ -3,7 +3,4 @@
 * [nextflow](https://www.nextflow.io/)
 * [docker](https://www.docker.com/products/docker-desktop)
 * [Singularity](https://sylabs.io/singularity/)
-* [racon](https://github.com/isovic/racon)
-* [spoa](https://github.com/rvaser/spoa)
-* [inONclust](https://github.com/ksahlin/isONclust)
-* [isONclust2](https://github.com/nanoporetech/isONclust2)
+* [racon](https://github.com/isovic/racon)
@@ -47,25 +47,14 @@ nextflow run epi2me-labs/wf-transcriptomes \
   --out_dir outdir -w workspace_dir
 ```
 
-**Example workflow for denovo transcript assembly**
-```
-OUTPUT=~/output
-nextflow run epi2me-labs/wf-transcriptomes \
-  --fastq test_data/fastq \
-  --transcriptome_source denovo \
-  --out_dir ${OUTPUT} \
-  -w ${OUTPUT}/workspace \
-  --sample sample_id
-```
 A full list of options can be seen in nextflow_schema.json. 
 Parameters can be specified either in a config like `parameter = value` or on the command line like `--parameter value`.
 Below are some commonly used parameters in the format used in config files.
 
 Select how the transcriptome used for analysis should be prepared:
 
 - To create a reference transcriptome using an existing reference genome `--transcriptome_source reference-guided` (default)
-- Use a a supplied transcriptome `--transcriptome_source precomputed"`
-- Gnerate transcriptome via the denovo pipeline `--transcriptome_source denovo"` 
+- Use a supplied transcriptome `--transcriptome_source precomputed"` 
 
 
 To run the workflow with direct RNA reads `--direct_rna true` (this just skips the pychopper step).
 
@@ -26,18 +26,6 @@ multisampledir="test_data/demultiplexed_fastq"
 #"--minimap2_opts '-uf --splice-flank=no'"
 results=()
 
-OUTPUT=$1/denovo_multi_sample_no_ref_genome;
-nextflow run . --fastq $multisampledir $config --denovo --ref_genome test_data/SIRV_150601a.fasta  -profile local --out_dir ${OUTPUT} -w ${OUTPUT}/workspace \
---sample_sheet test_data/sample_sheet -resume;
-r=$?
-results+=("$(basename $OUTPUT): $r")
-
-OUTPUT=$1/denovo_single;
-nextflow run . --fastq $singledir $config --denovo --ref_genome test_data/SIRV_150601a.fasta -profile local --out_dir ${OUTPUT} -w ${OUTPUT}/workspace \
---sample_sheet test_data/sample_sheet -resume;
-r=$?
-results+=("$(basename $OUTPUT): $r")
-
 # Reference based tests
 OUTPUT=$1/reference_single_dir;
 nextflow run . --fastq $singledir $config --ref_genome test_data/SIRV_150601a.fasta --minimap2_opts '-uf --splice-flank=no' \