Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion src/anyvlm/functions/ingest_vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,14 @@ def _yield_expression_af_batches(
msg = f"One or more required INFO column is missing: {'AC' in info}, {'AN' in info}, {'AC_Het' in info}, {'AC_Hom' in info}, {'AC_Hemi' in info}"
_logger.exception(msg)
raise VcfAfColumnsError(msg) from e
if af.an == 0:
_logger.debug(
"Encountered AN=0 in VCF at %s-%s-%s-%s; this will be skipped during ingest.",
record.chrom,
record.pos,
record.ref,
alt,
)
batch.append((expression, af))
if len(batch) >= batch_size:
_logger.debug("Yielding next batch")
Expand Down Expand Up @@ -106,11 +114,15 @@ def ingest_vcf(
for variant_id, af in zip(variant_ids, afs, strict=True):
if variant_id is None:
continue
try:
allele_frequency = af.ac / af.an
except ZeroDivisionError:
continue
caf = AnyVlmCohortAlleleFrequencyResult(
focusAllele=iriReference(variant_id),
focusAlleleCount=af.ac,
locusAlleleCount=af.an,
focusAlleleFrequency=af.ac / af.an,
focusAlleleFrequency=allele_frequency,
qualityMeasures=QualityMeasures(qcFilters=af.filters),
ancillaryResults=AncillaryResults(
heterozygotes=af.ac_het,
Expand Down
15 changes: 15 additions & 0 deletions tests/data/vcf/vcf_an_0.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
##fileformat=VCFv4.2
##FILTER=<ID=LowQual,Description="Low quality">
##FILTER=<ID=NO_HQ_GENOTYPES,Description="Site has no high quality variant genotypes">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=AC_Hemi,Number=A,Type=Integer,Description="Allele counts in hemizygous genotypes">
##INFO=<ID=AC_Het,Number=A,Type=Integer,Description="Allele counts in heterozygous genotypes">
##INFO=<ID=AC_Hom,Number=A,Type=Integer,Description="Allele counts in homozygous genotypes">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=AS_QUALapprox,Number=1,Type=String,Description="Allele-specific QUAL approximations">
##contig=<ID=chr14,length=107043718>
##source=SelectVariants
##INFO=<ID=VRS_Allele_IDs,Number=R,Type=String,Description="The computed identifiers for the GA4GH VRS Alleles corresponding to the GT indexes of the REF and ALT alleles">
#CHROM POS ID REF ALT QUAL FILTER INFO
chr14 18223529 . C A . LowQual;NO_HQ_GENOTYPES AC=0;AC_Hemi=0;AC_Het=0;AC_Hom=0;AF=0.00;AN=0;AS_QUALapprox=0|55;VRS_Allele_IDs=ga4gh:VA.8OSPHYmhyg9hJTpFQ8aNcmLgYMR77ZyJ,ga4gh:VA.slgr2fnRKaUnQrJZvYNDGMrfZHw6QCr6
11 changes: 11 additions & 0 deletions tests/unit/functions/test_ingest_vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,14 @@ def test_ingest_vcf_infocol_missing(
stub_anyvar_client,
postgres_storage,
)


def test_ingest_vcf_an_zero(
stub_anyvar_client: BaseAnyVarClient, test_data_dir: Path, postgres_storage: Storage
):
"""Test smooth handling of VCF row where AN=0"""
ingest_vcf(
test_data_dir / "vcf" / "vcf_an_0.vcf",
stub_anyvar_client,
postgres_storage,
)