diff --git a/src/anyvlm/functions/ingest_vcf.py b/src/anyvlm/functions/ingest_vcf.py index 76d621d..bdbc245 100644 --- a/src/anyvlm/functions/ingest_vcf.py +++ b/src/anyvlm/functions/ingest_vcf.py @@ -62,6 +62,14 @@ def _yield_expression_af_batches( msg = f"One or more required INFO column is missing: {'AC' in info}, {'AN' in info}, {'AC_Het' in info}, {'AC_Hom' in info}, {'AC_Hemi' in info}" _logger.exception(msg) raise VcfAfColumnsError(msg) from e + if af.an == 0: + _logger.debug( + "Encountered AN=0 in VCF at %s-%s-%s-%s; this will be skipped during ingest.", + record.chrom, + record.pos, + record.ref, + alt, + ) batch.append((expression, af)) if len(batch) >= batch_size: _logger.debug("Yielding next batch") @@ -106,11 +114,15 @@ def ingest_vcf( for variant_id, af in zip(variant_ids, afs, strict=True): if variant_id is None: continue + try: + allele_frequency = af.ac / af.an + except ZeroDivisionError: + continue caf = AnyVlmCohortAlleleFrequencyResult( focusAllele=iriReference(variant_id), focusAlleleCount=af.ac, locusAlleleCount=af.an, - focusAlleleFrequency=af.ac / af.an, + focusAlleleFrequency=allele_frequency, qualityMeasures=QualityMeasures(qcFilters=af.filters), ancillaryResults=AncillaryResults( heterozygotes=af.ac_het, diff --git a/tests/data/vcf/vcf_an_0.vcf b/tests/data/vcf/vcf_an_0.vcf new file mode 100644 index 0000000..b166e3f --- /dev/null +++ b/tests/data/vcf/vcf_an_0.vcf @@ -0,0 +1,15 @@ +##fileformat=VCFv4.2 +##FILTER= +##FILTER= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##source=SelectVariants +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO +chr14 18223529 . C A . LowQual;NO_HQ_GENOTYPES AC=0;AC_Hemi=0;AC_Het=0;AC_Hom=0;AF=0.00;AN=0;AS_QUALapprox=0|55;VRS_Allele_IDs=ga4gh:VA.8OSPHYmhyg9hJTpFQ8aNcmLgYMR77ZyJ,ga4gh:VA.slgr2fnRKaUnQrJZvYNDGMrfZHw6QCr6 diff --git a/tests/unit/functions/test_ingest_vcf.py b/tests/unit/functions/test_ingest_vcf.py index 71b6d74..b0e071b 100644 --- a/tests/unit/functions/test_ingest_vcf.py +++ b/tests/unit/functions/test_ingest_vcf.py @@ -115,3 +115,14 @@ def test_ingest_vcf_infocol_missing( stub_anyvar_client, postgres_storage, ) + + +def test_ingest_vcf_an_zero( + stub_anyvar_client: BaseAnyVarClient, test_data_dir: Path, postgres_storage: Storage +): + """Test smooth handling of VCF row where AN=0""" + ingest_vcf( + test_data_dir / "vcf" / "vcf_an_0.vcf", + stub_anyvar_client, + postgres_storage, + )