forked from juzishengwu/decoder
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparse_geno.py
More file actions
81 lines (69 loc) · 2.03 KB
/
parse_geno.py
File metadata and controls
81 lines (69 loc) · 2.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#coding=utf-8
import os, sys
import mwparserfromhell
def parse(genofn):
fn = genofn
wikicode = mwparserfromhell.parse(open(fn).read())
ts = wikicode.filter_templates()
if not ts:
return {}
t = ts[0]
geno = {}
if t.name.strip() == 'Genotype':
for key in ['rsid', 'allele1','allele2', 'magnitude', 'repute', 'summary']:
try:
value = t.get(key)
value = value.strip().split('=')[-1]
except ValueError:
continue
else:
geno[key.lower()] = value
if not geno.get('rsid', ''):
return {}
if not geno.get('magnitude', ''):
geno['magnitude'] = 0
else:
try:
geno['magnitude'] = float(geno['magnitude'])
except ValueError:
geno['magnitude'] = 0
return geno
if __name__ == '__main__':
os.environ['DJANGO_SETTINGS_MODULE'] = 'decoder.settings'
import django
django.setup()
from django.db import IntegrityError
from baike.models import *
def geno2db(geno):
rsid = geno['rsid']
try:
snp = Snp.objects.get(rsid=rsid)
except Snp.DoesNotExist:
print 'no snp %s'%rsid
return
geno.pop('rsid')
geno['allele'] = geno.get('allele1', '') + geno.get('allele2', '')
if not geno['allele']:
print 'no allele %s'%rsid
return
try:
g = Genotype(**geno)
#print snp
#print geno['allele']
g.snp = snp
g.save()
except IntegrityError, e:
print e
print geno
a = sys.argv[1]
if os.path.isfile(a):
r = parse(a)
if r:geno2db(r)
if os.path.isdir(a):
for fn in os.listdir(a):
if fn.startswith('Rs') and fn.endswith('txt'):
genofn = os.path.join(a, fn)
print genofn
r = parse(genofn)
if r:
geno2db(r)