Skip to content

Commit

Permalink
Merge pull request #113 from openvar/issue_94
Browse files Browse the repository at this point in the history
apply fix for #94 a…
  • Loading branch information
Peter Causey-Freeman authored Dec 2, 2019
2 parents de2b3de + 90950ab commit 723f70b
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 14 deletions.
41 changes: 38 additions & 3 deletions VariantValidator/modules/vvMixinConverters.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from Bio import Entrez, SeqIO
from . import utils as fn

from vvhgvs.exceptions import HGVSError, HGVSDataNotAvailableError, HGVSUnsupportedOperationError
from vvhgvs.exceptions import HGVSError, HGVSDataNotAvailableError, HGVSUnsupportedOperationError, \
HGVSInvalidVariantError

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -494,6 +495,24 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False):
# This will only happen if the variant is flanking the gap but is
# not inside the gap
logger.info('Variant is on the flank of a genomic gap but not within the gap')

# Test on the flank and if so, return

# Logic, normalize the c. variant and if a substitution (cannot normalize) then direct map
# Currently believe that sub.n is the only variant type which fits. ins can normalize
# and may also be a dup!
try:
norm_stored_c = hn.normalize(stored_hgvs_c)
if norm_stored_c.posedit.edit.type == 'sub':
flank_hgvs_genomic = self.vm.t_to_g(norm_stored_c, genomic_gap_variant.ac)
self.vr.validate(flank_hgvs_genomic)
return flank_hgvs_genomic

# Will occur if the variant still overlaps the gap / is in the gap
except HGVSInvalidVariantError:
pass

# If test fails, continue old processing
gap_start = genomic_gap_variant.posedit.pos.start.base - 1
gap_end = genomic_gap_variant.posedit.pos.end.base + 1
genomic_gap_variant.posedit.pos.start.base = gap_start
Expand Down Expand Up @@ -1159,6 +1178,23 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn):
# This will only happen if the variant is flanking the gap but is
# not inside the gap
logger.info('Variant is on the flank of a genomic gap but not within the gap')

# Test definately on the flank and if so, return
# Logic, normalize the c. variant and if a substitution (cannot normalize) then direct map
# Currently believe that sub.n is the only variant type which fits. ins can normalize
# and may also be a dup!
try:
norm_stored_c = hn.normalize(stored_hgvs_c)
if norm_stored_c.posedit.edit.type == 'sub':
flank_hgvs_genomic = self.vm.t_to_g(norm_stored_c, genomic_gap_variant.ac)
self.vr.validate(flank_hgvs_genomic)
return flank_hgvs_genomic

# Will occur if the variant still overlaps the gap / is in the gap
except HGVSInvalidVariantError:
pass

# If test fails, continue old processing
gap_start = genomic_gap_variant.posedit.pos.start.base - 1
gap_end = genomic_gap_variant.posedit.pos.end.base + 1
genomic_gap_variant.posedit.pos.start.base = gap_start
Expand Down Expand Up @@ -2155,7 +2191,7 @@ def chr_to_rsg(self, hgvs_genomic, hn):
"""
# Covert chromosomal HGVS description to RefSeqGene
"""
# print 'chr_to_rsg triggered'
# 'chr_to_rsg triggered'
hgvs_genomic = hn.normalize(hgvs_genomic)
# split the description
# Accessions
Expand Down Expand Up @@ -2358,7 +2394,6 @@ def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn):
new_ref = match[1]
hgvs_genomic.posedit.edit.ref = new_ref
error = 'true'
# # print str(e) + '\n3.'
data = {'hgvs_genomic': str(hgvs_genomic), 'gene': gene, 'valid': str(error)}
else:
data = {'hgvs_genomic': str(hgvs_genomic), 'gene': gene, 'valid': 'true'}
Expand Down
24 changes: 13 additions & 11 deletions tests/test_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ def test_variant1(self):
assert results['NM_015120.4:c.35T>C']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.35T>C'
assert results['NM_015120.4:c.35T>C']['hgvs_lrg_variant'] == 'LRG_741:g.5146T>C'
self.assertCountEqual(results['NM_015120.4:c.35T>C']['alt_genomic_loci'], [])
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613031delinsCGGA', 'vcf': {'chr': 'chr2', 'pos': '73613031', 'ref': 'T', 'alt': 'CGGA'}}
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385903delinsCGGA', 'vcf': {'chr': 'chr2', 'pos': '73385903', 'ref': 'T', 'alt': 'CGGA'}}
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613031delinsCGGA', 'vcf': {'chr': '2', 'pos': '73613031', 'ref': 'T', 'alt': 'CGGA'}}
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385903delinsCGGA', 'vcf': {'chr': '2', 'pos': '73385903', 'ref': 'T', 'alt': 'CGGA'}}
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613031T>C', 'vcf': {'chr': 'chr2', 'pos': '73613031', 'ref': 'T', 'alt': 'C'}}
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385903T>C', 'vcf': {'chr': 'chr2', 'pos': '73385903', 'ref': 'T', 'alt': 'C'}}
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613031T>C', 'vcf': {'chr': '2', 'pos': '73613031', 'ref': 'T', 'alt': 'C'}}
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385903T>C', 'vcf': {'chr': '2', 'pos': '73385903', 'ref': 'T', 'alt': 'C'}}
assert results['NM_015120.4:c.35T>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'}

def test_variant2(self):
Expand All @@ -49,10 +49,10 @@ def test_variant2(self):
assert results['NM_015120.4:c.39G>C']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.39G>C'
assert results['NM_015120.4:c.39G>C']['hgvs_lrg_variant'] == 'LRG_741:g.5150G>C'
self.assertCountEqual(results['NM_015120.4:c.39G>C']['alt_genomic_loci'], [])
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613034_73613035insCGA', 'vcf': {'chr': 'chr2', 'pos': '73613032', 'ref': 'G', 'alt': 'GGAC'}}
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385906_73385907insCGA', 'vcf': {'chr': 'chr2', 'pos': '73385904', 'ref': 'G', 'alt': 'GGAC'}}
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613034_73613035insCGA', 'vcf': {'chr': '2', 'pos': '73613032', 'ref': 'G', 'alt': 'GGAC'}}
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385906_73385907insCGA', 'vcf': {'chr': '2', 'pos': '73385904', 'ref': 'G', 'alt': 'GGAC'}}
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613032G>C', 'vcf': {'chr': 'chr2', 'pos': '73613032', 'ref': 'G', 'alt': 'C'}}
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385904G>C', 'vcf': {'chr': 'chr2', 'pos': '73385904', 'ref': 'G', 'alt': 'C'}}
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613032G>C', 'vcf': {'chr': '2', 'pos': '73613032', 'ref': 'G', 'alt': 'C'}}
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385904G>C', 'vcf': {'chr': '2', 'pos': '73385904', 'ref': 'G', 'alt': 'C'}}
assert results['NM_015120.4:c.39G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'}

def test_variant3(self):
Expand Down Expand Up @@ -2286,10 +2286,12 @@ def test_variant76(self):
assert results['NM_032790.3:c.126C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Ala42=)', 'slr': 'NP_116179.2:p.(A42=)'}
assert results['NM_032790.3:c.126C>A']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126C>A'
assert results['NM_032790.3:c.126C>A']['hgvs_lrg_variant'] == 'LRG_93:g.5299C>A'
self.assertCountEqual(results['NM_032790.3:c.126C>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'HG1595_PATCH', 'pos': '302869', 'ref': 'GCCCCGC', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'NW_004504303.2', 'pos': '302869', 'ref': 'GCCCCGC', 'alt': 'G'}}}])
assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778del', 'vcf': {'chr': 'chr12', 'pos': '122064771', 'ref': 'GCCCCGC', 'alt': 'G'}}

# Bug fix for issue https://github.com/openvar/variantValidator/issues/94 creates extra outputs. Not an issue so ignore
#self.assertCountEqual(results['NM_032790.3:c.126C>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'HG1595_PATCH', 'pos': '302869', 'ref': 'GCCCCGC', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'NW_004504303.2', 'pos': '302869', 'ref': 'GCCCCGC', 'alt': 'G'}}}])
assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773C>A', 'vcf': {'chr': 'chr12', 'pos': '122064773', 'ref': 'C', 'alt': 'A'}}
assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873C>A', 'vcf': {'chr': 'chr12', 'pos': '121626873', 'ref': 'C', 'alt': 'A'}}
assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778del', 'vcf': {'chr': '12', 'pos': '122064771', 'ref': 'GCCCCGC', 'alt': 'G'}}
assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773C>A', 'vcf': {'chr': '12', 'pos': '122064773', 'ref': 'C', 'alt': 'A'}}
assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873C>A', 'vcf': {'chr': '12', 'pos': '121626873', 'ref': 'C', 'alt': 'A'}}
assert results['NM_032790.3:c.126C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'}

Expand Down

0 comments on commit 723f70b

Please sign in to comment.