From 04d70df1bf1b6e8cf6ddb4555916018240f4c08b Mon Sep 17 00:00:00 2001 From: Alison Meynert Date: Wed, 22 Apr 2026 13:50:47 -0700 Subject: [PATCH 1/5] feat: add gnomAD 3.1.2 genomes to comparison to DivRef 1.1 --- .../divref/tools/extract_gnomad_single_afs.py | 48 +++++++++++++++++-- workflows/compare_divref_gnomad.smk | 3 +- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/divref/divref/tools/extract_gnomad_single_afs.py b/divref/divref/tools/extract_gnomad_single_afs.py index 0d060bc..00a84a9 100644 --- a/divref/divref/tools/extract_gnomad_single_afs.py +++ b/divref/divref/tools/extract_gnomad_single_afs.py @@ -16,6 +16,9 @@ class GnomadVersion(StrEnum): """Mapping of gnomAD versions to GCS sites tables.""" JOINT_41 = "gs://gcp-public-data--gnomad/release/4.1/ht/joint/gnomad.joint.v4.1.sites.ht" + GENOMES_312 = ( + "gs://gcp-public-data--gnomad/release/3.1.2/ht/genomes/gnomad.genomes.v3.1.2.sites.ht" + ) HGDP_1KG_312 = "gs://gcp-public-data--gnomad/release/3.1.2/ht/genomes/gnomad.genomes.v3.1.2.hgdp_1kg_subset_variant_annotations.ht" @@ -54,14 +57,21 @@ def extract_gnomad_single_afs( ht: hl.Table match gnomad_version: case GnomadVersion.JOINT_41: - ht = extract_from_joint_41( + ht = extract_from_gnomad_41_joint( + contig=contig, + freq_threshold=freq_threshold, + populations=populations, + reference_genome=reference_genome, + ) + case GnomadVersion.GENOMES_312: + ht = extract_from_gnomad_312_genomes( contig=contig, freq_threshold=freq_threshold, populations=populations, reference_genome=reference_genome, ) case GnomadVersion.HGDP_1KG_312: - ht = extract_from_hgdp_1kg_312( + ht = extract_from_gnomad_312_hgdp_1kg( contig=contig, freq_threshold=freq_threshold, populations=populations, @@ -84,7 +94,7 @@ def extract_gnomad_single_afs( ).export(str(out_sites_tsv)) -def extract_from_joint_41( +def extract_from_gnomad_41_joint( contig: str, freq_threshold: float, populations: list[str], @@ -114,7 +124,37 @@ def extract_from_joint_41( return va.select("locus", "alleles", "pop_freqs") -def extract_from_hgdp_1kg_312( +def extract_from_gnomad_312_genomes( + contig: str, + freq_threshold: float, + populations: list[str], + reference_genome: str, +) -> hl.Table: + """Use the gnomAD 3.1.2 HGDP+1KG sites schema.""" + va_all = hl.read_table(GnomadVersion.GENOMES_312.value) + interval = hl.parse_locus_interval(contig, reference_genome=reference_genome) + va = hl.filter_intervals(va_all, [interval]) + + freq_meta = va.globals.freq_meta.collect()[0] + map_to_index = {to_hashable_items(x): i for i, x in enumerate(freq_meta)} + + pop_indices = [] + for pop in populations: + idx = map_to_index.get(to_hashable_items({"group": "adj", "pop": pop})) + if idx is None: + raise ValueError(f"Population {pop!r} not found in gnomAD frequency metadata") + pop_indices.append(idx) + + va = va.select_globals(pops=populations) + va = va.select(pop_freqs=hl.literal(pop_indices).map(lambda i: va.freq[i])) + if freq_threshold > 0: + va = va.filter(hl.any(lambda x: x.AF > freq_threshold, va.pop_freqs)) + va = va.key_by() + + return va.select("locus", "alleles", "pop_freqs") + + +def extract_from_gnomad_312_hgdp_1kg( contig: str, freq_threshold: float, populations: list[str], diff --git a/workflows/compare_divref_gnomad.smk b/workflows/compare_divref_gnomad.smk index 8e407ff..6168758 100644 --- a/workflows/compare_divref_gnomad.smk +++ b/workflows/compare_divref_gnomad.smk @@ -14,11 +14,12 @@ CONTIG: str = "chr22" DIVREF_DUCKDB_URL: str = ( "https://zenodo.org/records/14802613/files/" "DivRef-v1.1.haplotypes_gnomad_merge.index.duckdb" ) -GNOMAD_VERSIONS: list[str] = ["joint_41", "hgdp_1kg_312"] +GNOMAD_VERSIONS: list[str] = ["joint_41", "genomes_312", "hgdp_1kg_312"] # Maps filename wildcard → plot label used by compare_divref_gnomad.R GNOMAD_LABEL: dict[str, str] = { "joint_41": "gnomAD 4.1 joint AF", + "genomes_312": "gnomAD 3.1.2 genomes AF", "hgdp_1kg_312": "gnomAD 3.1.2 HGDP+1KG AF", } From 0a2b00dae7acfb748acd079763177c10a922c76d Mon Sep 17 00:00:00 2001 From: Alison Meynert Date: Wed, 22 Apr 2026 14:47:43 -0700 Subject: [PATCH 2/5] feat: reformatted evidence --- README.md | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 7ea5dee..93638fd 100644 --- a/README.md +++ b/README.md @@ -54,18 +54,28 @@ pixi run -e analysis setup-r-packages > DivRef is constructed by computing empirical phased haplotypes within 25 BPs over 0.5% allele frequency from the Human Genome Diversity Panel (HGDP) using the phased Hail dataset provided by the gnomAD team at the Broad Institute, merged with single variants over 0.5% AF from the gnomAD v4.1.0 summary release. Some gnomAD v4.1.0 variants that we expected to see represented in DivRef 1.1 were not. -We checked all 'gnomAD_variant' variants on chr22 from DivRef 1.1 against the gnomAD 4.1 joint (exomes and genomes) variants and the gnomAD 3.1.2 HGDP+1KG subset variants. -The latter is the source used for the haplotypes present in DivRef. +We checked all 'gnomAD_variant' variants on chr22 from DivRef 1.1 against: + +- gnomAD 3.1.2 HGDP+1KG subset +- gnomAD 3.1.2 genomes (~76K genomes) +- gnomAD 4.1 joint (~730K exomes and ~76K genomes) + +gnomAD 3.1.2 HGDP+1KG subset is the source used for the haplotypes present in DivRef 1.1. ```bash pixi run -e analysis snakemake -j1 -s workflows/compare_divref_gnomad.smk ``` -We found that all DivRef 1.1 variants were present in the gnomAD 3.1.2 HGDP+1KG subset, while 16 were missing from the gnomAD 4.1 joint set. +**DivRef 1.1 variants present in gnomAD datasets** + +We found that all DivRef 1.1 'gnomAD_variant' variants were present in the gnomAD 3.1.2 HGDP+1KG subset and in the gnomAD 3.1.2 genomes set, while 16 were missing from the gnomAD 4.1 joint set. + We further compared the allele frequencies for the 5 populations recorded in the DivRef 1.1 DuckDB index against the frequencies for those populations in the two gnomAD sets, using the Hail tables as input. -The allele frequencies between DivRef 1.1 and the gnomAD 3.1.2 HGDP+1KG subset were very close, within a rounding error of 5e06, for all variants. +- gnomAD 3.1.2 HGDP+1KG subset: within a rounding error of 5e06, for all variants. +- gnomAD 3.1.2 genomes: 1,400 variants with an AF difference >= 0.001, all of which had lower AF in the genomes set compared to HGDP+1KG, indicating more stringent filtering in the genomes set +- gnomAD 4.1 joint: 60,424 variants with an AF difference >= 0.001, evenly distributed both lower and higher AF -The allele frequencies between DivRef 1.1 and gnomAD 4.1 joint set were significantly different. 60,424 variants were found with an allele frequence difference >= 0.001. +There are 506,983 DivRef 1.1 'gnomAD_variant' variants. When we run the original DivRef script [extract_gnomad_afs.py](https://github.com/e9genomics/human-diversity-reference/blob/main/scripts/extract_gnomad_afs.py) (uses gnomAD 3.1.2 HGDP+1KG sites as input, hard-coded) followed by lines 156-177 of [create_fasta_and_index.py](https://github.com/e9genomics/human-diversity-reference/blob/main/scripts/create_fasta_and_index.py) (lines 156-177) using the parameters specified in the [Makefile](https://github.com/e9genomics/human-diversity-reference/blob/main/Makefile), we also get 506,983 variants. We concluded that the DivRef 1.1 documentation was incorrect, and that the actual source of the gnomAD variants in the dataset was the gnomAD 3.1.2 HGDP+1KG subset, the same as for the haplotypes. From 386b3d6635c61e366b9ff46d850af11d383b7c69 Mon Sep 17 00:00:00 2001 From: Alison Meynert Date: Wed, 22 Apr 2026 14:57:02 -0700 Subject: [PATCH 3/5] fix: typos --- README.md | 2 +- divref/divref/tools/extract_gnomad_single_afs.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 93638fd..7905ce8 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ We found that all DivRef 1.1 'gnomAD_variant' variants were present in the gnomA We further compared the allele frequencies for the 5 populations recorded in the DivRef 1.1 DuckDB index against the frequencies for those populations in the two gnomAD sets, using the Hail tables as input. -- gnomAD 3.1.2 HGDP+1KG subset: within a rounding error of 5e06, for all variants. +- gnomAD 3.1.2 HGDP+1KG subset: within a rounding error of 5e-6, for all variants. - gnomAD 3.1.2 genomes: 1,400 variants with an AF difference >= 0.001, all of which had lower AF in the genomes set compared to HGDP+1KG, indicating more stringent filtering in the genomes set - gnomAD 4.1 joint: 60,424 variants with an AF difference >= 0.001, evenly distributed both lower and higher AF diff --git a/divref/divref/tools/extract_gnomad_single_afs.py b/divref/divref/tools/extract_gnomad_single_afs.py index 00a84a9..8fda049 100644 --- a/divref/divref/tools/extract_gnomad_single_afs.py +++ b/divref/divref/tools/extract_gnomad_single_afs.py @@ -130,7 +130,7 @@ def extract_from_gnomad_312_genomes( populations: list[str], reference_genome: str, ) -> hl.Table: - """Use the gnomAD 3.1.2 HGDP+1KG sites schema.""" + """Use the gnomAD 3.1.2 genomes sites schema.""" va_all = hl.read_table(GnomadVersion.GENOMES_312.value) interval = hl.parse_locus_interval(contig, reference_genome=reference_genome) va = hl.filter_intervals(va_all, [interval]) From 775ca565c934539894d93777cfaab84e5846032a Mon Sep 17 00:00:00 2001 From: Alison Meynert Date: Wed, 22 Apr 2026 16:34:22 -0700 Subject: [PATCH 4/5] feat: additional plot --- scripts/compare_divref_gnomad.R | 18 ++++++++++++++++-- workflows/compare_divref_gnomad.smk | 5 +++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/scripts/compare_divref_gnomad.R b/scripts/compare_divref_gnomad.R index 0b99146..d4125b2 100644 --- a/scripts/compare_divref_gnomad.R +++ b/scripts/compare_divref_gnomad.R @@ -113,11 +113,25 @@ p <- divref_in_gnomad_with_af_diffs %>% scale_y_log10() + theme_bw() + xlab(paste0(opts$gnomad_label, " - DivRef 1.1 AF")) + - ylab("Variants") + - labs(title = paste0("DivRef 1.1 'gnomAD_variant' variants found in ", opts$gnomad_label)) + ylab("Variants") ggsave(paste0(opts$output_base, ".af_diffs.png"), p, height = 10, width = 6) +p <- divref_in_gnomad_with_af_diffs %>% + select(variants, diff_afr, diff_amr, diff_eas, diff_sas, diff_nfe) %>% + pivot_longer( + cols = c(diff_afr, diff_amr, diff_eas, diff_sas, diff_nfe), + names_to = "population", values_to = "diff_freq", names_prefix = "diff_" + ) %>% + ggplot(aes(x = diff_freq)) + + geom_histogram() + + scale_y_log10() + + theme_bw() + + xlab(paste0(opts$gnomad_label, " - DivRef 1.1 AF")) + + ylab("Variants") + +ggsave(paste0(opts$output_base, ".af_diffs_all.png"), p, height = 6, width = 6) + # Count: variants with large AF differences ---- n_large_af_diff <- divref_in_gnomad_with_af_diffs %>% diff --git a/workflows/compare_divref_gnomad.smk b/workflows/compare_divref_gnomad.smk index 6168758..a8038b7 100644 --- a/workflows/compare_divref_gnomad.smk +++ b/workflows/compare_divref_gnomad.smk @@ -37,6 +37,10 @@ rule all: f"{OUTPUT_DIR}/compare_divref_gnomad/{CONTIG}.{{gnomad_version}}.af_diffs.png", gnomad_version=GNOMAD_VERSIONS, ), + expand( + f"{OUTPUT_DIR}/compare_divref_gnomad/{CONTIG}.{{gnomad_version}}.af_diffs_all.png", + gnomad_version=GNOMAD_VERSIONS, + ), expand( f"{OUTPUT_DIR}/compare_divref_gnomad/{CONTIG}.{{gnomad_version}}.not_in_gnomad_afs.png", gnomad_version=GNOMAD_VERSIONS, @@ -105,6 +109,7 @@ rule compare_divref_gnomad: tsv=f"{OUTPUT_DIR}/compare_divref_gnomad/{CONTIG}.{{gnomad_version}}.tsv", output: af_diffs_png=f"{OUTPUT_DIR}/compare_divref_gnomad/{CONTIG}.{{gnomad_version}}.af_diffs.png", + af_diffs_all_png=f"{OUTPUT_DIR}/compare_divref_gnomad/{CONTIG}.{{gnomad_version}}.af_diffs_all.png", not_in_gnomad_png=f"{OUTPUT_DIR}/compare_divref_gnomad/{CONTIG}.{{gnomad_version}}.not_in_gnomad_afs.png", not_in_gnomad_tsv=f"{OUTPUT_DIR}/compare_divref_gnomad/{CONTIG}.{{gnomad_version}}.divref_not_in_gnomad.tsv", log: From 5eb1cc4d48bc65958cfc12191abcf9c492502527 Mon Sep 17 00:00:00 2001 From: Alison Meynert Date: Wed, 22 Apr 2026 16:43:33 -0700 Subject: [PATCH 5/5] feat: make outputs optional --- .../divref/tools/extract_gnomad_single_afs.py | 188 +++++++----------- workflows/compare_divref_gnomad.smk | 2 - 2 files changed, 69 insertions(+), 121 deletions(-) diff --git a/divref/divref/tools/extract_gnomad_single_afs.py b/divref/divref/tools/extract_gnomad_single_afs.py index 8fda049..dc89222 100644 --- a/divref/divref/tools/extract_gnomad_single_afs.py +++ b/divref/divref/tools/extract_gnomad_single_afs.py @@ -1,10 +1,13 @@ """Tool to extract gnomAD variant and sample frequency data for the DivRef pipeline.""" +import operator +from dataclasses import dataclass from enum import StrEnum from enum import unique from pathlib import Path import hail as hl +from fgpyo.io import assert_path_is_writable from divref import defaults from divref.hail import hail_init @@ -22,163 +25,110 @@ class GnomadVersion(StrEnum): HGDP_1KG_312 = "gs://gcp-public-data--gnomad/release/3.1.2/ht/genomes/gnomad.genomes.v3.1.2.hgdp_1kg_subset_variant_annotations.ht" +@dataclass(frozen=True) +class _GnomadSchema: + """Per-version schema config for reading population AFs from a gnomAD sites table.""" + + freq_meta_path: str # dot-path into table globals, e.g. "joint_globals.freq_meta" + row_freq_field: str # dot-path on table row, e.g. "joint.freq" + pop_key: str # key in freq_meta entries for the population code, e.g. "gen_anc" or "pop" + + +_GNOMAD_SCHEMA: dict[GnomadVersion, _GnomadSchema] = { + GnomadVersion.JOINT_41: _GnomadSchema( + freq_meta_path="joint_globals.freq_meta", + row_freq_field="joint.freq", + pop_key="gen_anc", + ), + GnomadVersion.GENOMES_312: _GnomadSchema( + freq_meta_path="freq_meta", + row_freq_field="freq", + pop_key="pop", + ), + GnomadVersion.HGDP_1KG_312: _GnomadSchema( + freq_meta_path="gnomad_freq_meta", + row_freq_field="gnomad_freq", + pop_key="pop", + ), +} + + def extract_gnomad_single_afs( *, gnomad_version: GnomadVersion, - out_sites_hail_table: Path, - out_sites_tsv: Path, contig: str, freq_threshold: float = 0, populations: list[str] = defaults.POPULATIONS, reference_genome: str = defaults.REFERENCE_GENOME, + out_sites_hail_table: Path | None, + out_sites_tsv: Path | None = None, gcs_credentials_path: Path = Path("~/.config/gcloud/application_default_credentials.json"), ) -> None: """ Extract gnomAD variant and sample frequency data for downstream pipeline tools. Reads a gnomAD sites table and filters to variants above the frequency threshold in at least one - population. Writes two outputs: a Hail table at `out_sites_hail_table` for downstream pipeline - tools, and a flat TSV at `out_sites_tsv` with columns `variant` (contig:pos:ref:alt) and one - allele-frequency column per population. + population. Writes up to two outputs: a Hail table at `out_sites_hail_table` for downstream + pipeline tools, and a flat TSV at `out_sites_tsv` with columns `variant` (contig:pos:ref:alt) + and one allele-frequency column per population. + + At least one of `out_sites_hail_table` or `out_sites_tsv` must be defined. Args: - out_sites_hail_table: Output path for the Hail table. - out_sites_tsv: Output path for the TSV file. - gnomad_version: gnomAD sites table to use - the schema varies. + gnomad_version: gnomAD sites table to use - the schema varies per version. contig: Contig to extract sites. freq_threshold: Minimum allele frequency in any population to retain a variant. Defaults 0, all variants returned. populations: List of population codes to extract frequencies for. reference_genome: Reference genome to use. Defaults to "GRCh38". + out_sites_hail_table: Output path for the Hail table. Optional. + out_sites_tsv: Output path for the TSV file. Optional. gcs_credentials_path: Path to GCS default credentials JSON file. """ - hail_init(gcs_credentials_path.expanduser()) - - ht: hl.Table - match gnomad_version: - case GnomadVersion.JOINT_41: - ht = extract_from_gnomad_41_joint( - contig=contig, - freq_threshold=freq_threshold, - populations=populations, - reference_genome=reference_genome, - ) - case GnomadVersion.GENOMES_312: - ht = extract_from_gnomad_312_genomes( - contig=contig, - freq_threshold=freq_threshold, - populations=populations, - reference_genome=reference_genome, - ) - case GnomadVersion.HGDP_1KG_312: - ht = extract_from_gnomad_312_hgdp_1kg( - contig=contig, - freq_threshold=freq_threshold, - populations=populations, - reference_genome=reference_genome, - ) - case _: - raise ValueError(f"Unrecognized gnomAD sites table version: {gnomad_version}") - - ht.naive_coalesce(64).write(str(out_sites_hail_table), overwrite=True) - - ht.select( - variant=hl.format( - "%s:%s:%s:%s", - ht.locus.contig, - hl.str(ht.locus.position), - ht.alleles[0], - ht.alleles[1], - ), - **{pop: ht.pop_freqs[i].AF for i, pop in enumerate(populations)}, - ).export(str(out_sites_tsv)) - - -def extract_from_gnomad_41_joint( - contig: str, - freq_threshold: float, - populations: list[str], - reference_genome: str, -) -> hl.Table: - """Use the gnomAD 4.1 joint sites schema.""" - va_all = hl.read_table(GnomadVersion.JOINT_41.value) - interval = hl.parse_locus_interval(contig, reference_genome=reference_genome) - va = hl.filter_intervals(va_all, [interval]) + if out_sites_hail_table is None and out_sites_tsv is None: + raise ValueError("At least one of out_sites_hail_table or out_sites_tsv must be provided") - freq_meta = va.globals.joint_globals.freq_meta.collect()[0] - map_to_index = {to_hashable_items(x): i for i, x in enumerate(freq_meta)} - - pop_indices = [] - for pop in populations: - idx = map_to_index.get(to_hashable_items({"group": "adj", "gen_anc": pop})) - if idx is None: - raise ValueError(f"Population {pop!r} not found in gnomAD frequency metadata") - pop_indices.append(idx) - - va = va.select_globals(pops=populations) - va = va.select(pop_freqs=hl.literal(pop_indices).map(lambda i: va.joint.freq[i])) - if freq_threshold > 0: - va = va.filter(hl.any(lambda x: x.AF > freq_threshold, va.pop_freqs)) - va = va.key_by() + # validate output paths before starting Hail + if out_sites_tsv is not None: + assert_path_is_writable(out_sites_tsv) - return va.select("locus", "alleles", "pop_freqs") - - -def extract_from_gnomad_312_genomes( - contig: str, - freq_threshold: float, - populations: list[str], - reference_genome: str, -) -> hl.Table: - """Use the gnomAD 3.1.2 genomes sites schema.""" - va_all = hl.read_table(GnomadVersion.GENOMES_312.value) - interval = hl.parse_locus_interval(contig, reference_genome=reference_genome) - va = hl.filter_intervals(va_all, [interval]) - - freq_meta = va.globals.freq_meta.collect()[0] - map_to_index = {to_hashable_items(x): i for i, x in enumerate(freq_meta)} - - pop_indices = [] - for pop in populations: - idx = map_to_index.get(to_hashable_items({"group": "adj", "pop": pop})) - if idx is None: - raise ValueError(f"Population {pop!r} not found in gnomAD frequency metadata") - pop_indices.append(idx) - - va = va.select_globals(pops=populations) - va = va.select(pop_freqs=hl.literal(pop_indices).map(lambda i: va.freq[i])) - if freq_threshold > 0: - va = va.filter(hl.any(lambda x: x.AF > freq_threshold, va.pop_freqs)) - va = va.key_by() - - return va.select("locus", "alleles", "pop_freqs") + hail_init(gcs_credentials_path.expanduser()) + schema = _GNOMAD_SCHEMA[gnomad_version] -def extract_from_gnomad_312_hgdp_1kg( - contig: str, - freq_threshold: float, - populations: list[str], - reference_genome: str, -) -> hl.Table: - """Use the gnomAD 3.1.2 HGDP+1KG sites schema.""" - va_all = hl.read_table(GnomadVersion.HGDP_1KG_312.value) + va_all = hl.read_table(gnomad_version.value) interval = hl.parse_locus_interval(contig, reference_genome=reference_genome) va = hl.filter_intervals(va_all, [interval]) - freq_meta = va.globals.gnomad_freq_meta.collect()[0] + freq_meta = operator.attrgetter(schema.freq_meta_path)(va.globals).collect()[0] map_to_index = {to_hashable_items(x): i for i, x in enumerate(freq_meta)} pop_indices = [] for pop in populations: - idx = map_to_index.get(to_hashable_items({"group": "adj", "pop": pop})) + idx = map_to_index.get(to_hashable_items({"group": "adj", schema.pop_key: pop})) if idx is None: raise ValueError(f"Population {pop!r} not found in gnomAD frequency metadata") pop_indices.append(idx) + row_freq = operator.attrgetter(schema.row_freq_field)(va) va = va.select_globals(pops=populations) - va = va.select(pop_freqs=hl.literal(pop_indices).map(lambda i: va.gnomad_freq[i])) + va = va.select(pop_freqs=hl.literal(pop_indices).map(lambda i: row_freq[i])) if freq_threshold > 0: va = va.filter(hl.any(lambda x: x.AF > freq_threshold, va.pop_freqs)) va = va.key_by() - - return va.select("locus", "alleles", "pop_freqs") + va = va.select("locus", "alleles", "pop_freqs") + + if out_sites_hail_table is not None: + va.naive_coalesce(64).write(str(out_sites_hail_table), overwrite=True) + + if out_sites_tsv is not None: + va.select( + variant=hl.format( + "%s:%s:%s:%s", + va.locus.contig, + hl.str(va.locus.position), + va.alleles[0], + va.alleles[1], + ), + **{pop: va.pop_freqs[i].AF for i, pop in enumerate(populations)}, + ).export(str(out_sites_tsv)) diff --git a/workflows/compare_divref_gnomad.smk b/workflows/compare_divref_gnomad.smk index a8038b7..c1852a2 100644 --- a/workflows/compare_divref_gnomad.smk +++ b/workflows/compare_divref_gnomad.smk @@ -79,7 +79,6 @@ rule download_divref_index: #################################################################################################### rule extract_gnomad_single_afs: output: - ht=directory(f"{OUTPUT_DIR}/compare_divref_gnomad/{CONTIG}.{{gnomad_version}}.ht"), tsv=f"{OUTPUT_DIR}/compare_divref_gnomad/{CONTIG}.{{gnomad_version}}.tsv", log: f"logs/compare_divref_gnomad/extract_gnomad_single_afs.{{gnomad_version}}.log", @@ -92,7 +91,6 @@ rule extract_gnomad_single_afs: divref extract-gnomad-single-afs \ --contig {params.contig} \ --gnomad-version {params.gnomad_version} \ - --out-sites-hail-table {output.ht} \ --out-sites-tsv {output.tsv} ) &> {log} """