Skip to content

Commit

Permalink
Merge pull request #114 from openvar/develop_v3
Browse files Browse the repository at this point in the history
Develop v3
  • Loading branch information
Peter Causey-Freeman authored Dec 2, 2019
2 parents ec89acd + 723f70b commit a2f84e4
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 25 deletions.
1 change: 1 addition & 0 deletions VariantValidator/modules/variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ def output_dict(self, test=False):
if test is True:
try:
del self.stable_gene_ids['ensembl_gene_id']
del self.stable_gene_ids['ccds_ids']
except KeyError:
pass
dict_out = {
Expand Down
41 changes: 38 additions & 3 deletions VariantValidator/modules/vvMixinConverters.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from Bio import Entrez, SeqIO
from . import utils as fn

from vvhgvs.exceptions import HGVSError, HGVSDataNotAvailableError, HGVSUnsupportedOperationError
from vvhgvs.exceptions import HGVSError, HGVSDataNotAvailableError, HGVSUnsupportedOperationError, \
HGVSInvalidVariantError

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -494,6 +495,24 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False):
# This will only happen if the variant is flanking the gap but is
# not inside the gap
logger.info('Variant is on the flank of a genomic gap but not within the gap')

# Test on the flank and if so, return

# Logic, normalize the c. variant and if a substitution (cannot normalize) then direct map
# Currently believe that sub.n is the only variant type which fits. ins can normalize
# and may also be a dup!
try:
norm_stored_c = hn.normalize(stored_hgvs_c)
if norm_stored_c.posedit.edit.type == 'sub':
flank_hgvs_genomic = self.vm.t_to_g(norm_stored_c, genomic_gap_variant.ac)
self.vr.validate(flank_hgvs_genomic)
return flank_hgvs_genomic

# Will occur if the variant still overlaps the gap / is in the gap
except HGVSInvalidVariantError:
pass

# If test fails, continue old processing
gap_start = genomic_gap_variant.posedit.pos.start.base - 1
gap_end = genomic_gap_variant.posedit.pos.end.base + 1
genomic_gap_variant.posedit.pos.start.base = gap_start
Expand Down Expand Up @@ -1159,6 +1178,23 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn):
# This will only happen if the variant is flanking the gap but is
# not inside the gap
logger.info('Variant is on the flank of a genomic gap but not within the gap')

# Test definately on the flank and if so, return
# Logic, normalize the c. variant and if a substitution (cannot normalize) then direct map
# Currently believe that sub.n is the only variant type which fits. ins can normalize
# and may also be a dup!
try:
norm_stored_c = hn.normalize(stored_hgvs_c)
if norm_stored_c.posedit.edit.type == 'sub':
flank_hgvs_genomic = self.vm.t_to_g(norm_stored_c, genomic_gap_variant.ac)
self.vr.validate(flank_hgvs_genomic)
return flank_hgvs_genomic

# Will occur if the variant still overlaps the gap / is in the gap
except HGVSInvalidVariantError:
pass

# If test fails, continue old processing
gap_start = genomic_gap_variant.posedit.pos.start.base - 1
gap_end = genomic_gap_variant.posedit.pos.end.base + 1
genomic_gap_variant.posedit.pos.start.base = gap_start
Expand Down Expand Up @@ -2155,7 +2191,7 @@ def chr_to_rsg(self, hgvs_genomic, hn):
"""
# Covert chromosomal HGVS description to RefSeqGene
"""
# print 'chr_to_rsg triggered'
# 'chr_to_rsg triggered'
hgvs_genomic = hn.normalize(hgvs_genomic)
# split the description
# Accessions
Expand Down Expand Up @@ -2358,7 +2394,6 @@ def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn):
new_ref = match[1]
hgvs_genomic.posedit.edit.ref = new_ref
error = 'true'
# # print str(e) + '\n3.'
data = {'hgvs_genomic': str(hgvs_genomic), 'gene': gene, 'valid': str(error)}
else:
data = {'hgvs_genomic': str(hgvs_genomic), 'gene': gene, 'valid': 'true'}
Expand Down
35 changes: 27 additions & 8 deletions VariantValidator/modules/vvMixinCore.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,15 +175,26 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr
# INITIAL USER INPUT FORMATTING
invalid = my_variant.format_quibble()
if invalid:
if re.search(r'\w+:[gcnmrp]', my_variant.quibble) and not \
if re.search(r'\w+:[gcnmrp],', my_variant.quibble):
error = 'Variant description ' + my_variant.quibble + ' contained the , character between '\
'<type> and <position> in the expected pattern <accession>:<type>.<position> and ' \
'has been auto-corrected'
my_variant.quibble = my_variant.quibble.replace(',', '.')
my_variant.warnings.append(error)
logger.warning(error)
pass
elif re.search(r'\w+:[gcnmrp]', my_variant.quibble) and not \
re.search(r'\w+:[gcnmrp]\.', my_variant.quibble):
error = 'Variant description ' + my_variant.quibble + ' lacks the . character between ' \
'<type> and <position> in the expected pattern <accession>:<type>.<position>'
my_variant.warnings.append(error)
logger.warning(error)
continue
else:
error = 'Variant description ' + my_variant.quibble + ' is not in an accepted format'
my_variant.warnings.append(error)
logger.warning(error)
continue
my_variant.warnings.append(error)
logger.warning(error)
continue

formatted_variant = my_variant.quibble
stash_input = my_variant.quibble
Expand Down Expand Up @@ -214,8 +225,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr
except vvhgvs.exceptions.HGVSError as e:
# Look for T not U!
posedit = formatted_variant.split(':')[-1]
if 'T' in posedit:
e = 'The IUPAC RNA alphabet dictates that RNA variants must use the character u in place of t'
if 'T' in posedit and "r." in posedit:
e = 'The IUPAC RNA alphabet dictates that RNA variants must use the character u in ' \
'place of t'
my_variant.warnings.append(str(e))
logger.warning(str(e))
continue
Expand Down Expand Up @@ -257,7 +269,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr
my_variant.warnings.append(str(trap_ens_in) + ' automapped to equivalent RefSeq transcript '
+ my_variant.quibble)
logger.info(str(trap_ens_in) + ' automapped to equivalent RefSeq '
'transcript ' + my_variant.quibble)
'transcript ' + my_variant.quibble)
logger.debug("HVGS acceptance test passed")

# Check whether supported genome build is requested for non g. descriptions
Expand Down Expand Up @@ -701,7 +713,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr
stable_gene_ids['ucsc_id'] = gene_stable_info[5]
stable_gene_ids['omim_id'] = json.loads(gene_stable_info[6])
# stable_gene_ids['vega_id'] = gene_stable_info[7]
# stable_gene_ids['ccds_ids'] = gene_stable_info[8]

# reformat ccds return into a Python list
my_ccds = gene_stable_info[8].replace('[', '')
my_ccds = my_ccds.replace(']', '')
my_ccds = my_ccds.replace('"','')
ccds_list = my_ccds.split()
stable_gene_ids['ccds_ids'] = ccds_list

except IndexError as e:
logger.debug("Except pass, %s", e)

Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ dependencies:
- configparser>=3.5.0
- requests
- pip:
- git+https://github.com/openvar/vv_hgvs@master#egg=vvhgvs
- git+https://github.com/openvar/vv_hgvs@1.2.5.vv1#egg=vvhgvs
- biotools>=0.3.0
- biopython
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@
# removed
"biopython==1.74",
"requests",
# "vvhgvs",
"vvhgvs @ git+https://github.com/openvar/vv_hgvs.git@master#egg=vvhgvs",
"mysql-connector-python",
"vvhgvs @ git+https://github.com/openvar/vv_hgvs.git@1.2.5.vv1#egg=vvhgvs",
],
# dependency_links=[
# "git+https://github.com/openvar/vv_hgvs.git@master#egg=vvhgvs-1.0.0",
Expand Down
24 changes: 13 additions & 11 deletions tests/test_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ def test_variant1(self):
assert results['NM_015120.4:c.35T>C']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.35T>C'
assert results['NM_015120.4:c.35T>C']['hgvs_lrg_variant'] == 'LRG_741:g.5146T>C'
self.assertCountEqual(results['NM_015120.4:c.35T>C']['alt_genomic_loci'], [])
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613031delinsCGGA', 'vcf': {'chr': 'chr2', 'pos': '73613031', 'ref': 'T', 'alt': 'CGGA'}}
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385903delinsCGGA', 'vcf': {'chr': 'chr2', 'pos': '73385903', 'ref': 'T', 'alt': 'CGGA'}}
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613031delinsCGGA', 'vcf': {'chr': '2', 'pos': '73613031', 'ref': 'T', 'alt': 'CGGA'}}
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385903delinsCGGA', 'vcf': {'chr': '2', 'pos': '73385903', 'ref': 'T', 'alt': 'CGGA'}}
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613031T>C', 'vcf': {'chr': 'chr2', 'pos': '73613031', 'ref': 'T', 'alt': 'C'}}
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385903T>C', 'vcf': {'chr': 'chr2', 'pos': '73385903', 'ref': 'T', 'alt': 'C'}}
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613031T>C', 'vcf': {'chr': '2', 'pos': '73613031', 'ref': 'T', 'alt': 'C'}}
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385903T>C', 'vcf': {'chr': '2', 'pos': '73385903', 'ref': 'T', 'alt': 'C'}}
assert results['NM_015120.4:c.35T>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'}

def test_variant2(self):
Expand All @@ -49,10 +49,10 @@ def test_variant2(self):
assert results['NM_015120.4:c.39G>C']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.39G>C'
assert results['NM_015120.4:c.39G>C']['hgvs_lrg_variant'] == 'LRG_741:g.5150G>C'
self.assertCountEqual(results['NM_015120.4:c.39G>C']['alt_genomic_loci'], [])
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613034_73613035insCGA', 'vcf': {'chr': 'chr2', 'pos': '73613032', 'ref': 'G', 'alt': 'GGAC'}}
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385906_73385907insCGA', 'vcf': {'chr': 'chr2', 'pos': '73385904', 'ref': 'G', 'alt': 'GGAC'}}
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613034_73613035insCGA', 'vcf': {'chr': '2', 'pos': '73613032', 'ref': 'G', 'alt': 'GGAC'}}
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385906_73385907insCGA', 'vcf': {'chr': '2', 'pos': '73385904', 'ref': 'G', 'alt': 'GGAC'}}
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613032G>C', 'vcf': {'chr': 'chr2', 'pos': '73613032', 'ref': 'G', 'alt': 'C'}}
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385904G>C', 'vcf': {'chr': 'chr2', 'pos': '73385904', 'ref': 'G', 'alt': 'C'}}
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613032G>C', 'vcf': {'chr': '2', 'pos': '73613032', 'ref': 'G', 'alt': 'C'}}
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385904G>C', 'vcf': {'chr': '2', 'pos': '73385904', 'ref': 'G', 'alt': 'C'}}
assert results['NM_015120.4:c.39G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'}

def test_variant3(self):
Expand Down Expand Up @@ -2286,10 +2286,12 @@ def test_variant76(self):
assert results['NM_032790.3:c.126C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Ala42=)', 'slr': 'NP_116179.2:p.(A42=)'}
assert results['NM_032790.3:c.126C>A']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126C>A'
assert results['NM_032790.3:c.126C>A']['hgvs_lrg_variant'] == 'LRG_93:g.5299C>A'
self.assertCountEqual(results['NM_032790.3:c.126C>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'HG1595_PATCH', 'pos': '302869', 'ref': 'GCCCCGC', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'NW_004504303.2', 'pos': '302869', 'ref': 'GCCCCGC', 'alt': 'G'}}}])
assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778del', 'vcf': {'chr': 'chr12', 'pos': '122064771', 'ref': 'GCCCCGC', 'alt': 'G'}}

# Bug fix for issue https://github.com/openvar/variantValidator/issues/94 creates extra outputs. Not an issue so ignore
#self.assertCountEqual(results['NM_032790.3:c.126C>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'HG1595_PATCH', 'pos': '302869', 'ref': 'GCCCCGC', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'NW_004504303.2', 'pos': '302869', 'ref': 'GCCCCGC', 'alt': 'G'}}}])
assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773C>A', 'vcf': {'chr': 'chr12', 'pos': '122064773', 'ref': 'C', 'alt': 'A'}}
assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873C>A', 'vcf': {'chr': 'chr12', 'pos': '121626873', 'ref': 'C', 'alt': 'A'}}
assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778del', 'vcf': {'chr': '12', 'pos': '122064771', 'ref': 'GCCCCGC', 'alt': 'G'}}
assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773C>A', 'vcf': {'chr': '12', 'pos': '122064773', 'ref': 'C', 'alt': 'A'}}
assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873C>A', 'vcf': {'chr': '12', 'pos': '121626873', 'ref': 'C', 'alt': 'A'}}
assert results['NM_032790.3:c.126C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'}

Expand Down

0 comments on commit a2f84e4

Please sign in to comment.