Skip to content

Commit 51a2c5b

Browse files
committed
changed order of RSCU_gene + add csv extension to RSCU_tw
1 parent 0efab1b commit 51a2c5b

File tree

6 files changed

+679
-7
lines changed

6 files changed

+679
-7
lines changed
476 Bytes
Binary file not shown.

Compute_RSCU_gene/__main__.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
1+
#author Rhondene Wint
22
#!/usr/bin/python
33

44
import argparse
@@ -80,14 +80,12 @@ def compute_rscu_weights(df_codcnt:pd.DataFrame)->pd.DataFrame:
8080
parser = argparse.ArgumentParser(description=about,epilog=epi_note)
8181
parser.add_argument('-CDS', help='Path to fasta file with coding sequences', type=str, required=True, metavar='')
8282
parser.add_argument('-out', help='Path of destination folder for output file', type=str, default='./file_out.rscu', metavar='')
83-
8483
args=parser.parse_args()
8584

86-
8785
headers,seqs=fix_fasta.fix_fasta(args.CDS)##preprocess fasta to a paired list of headers and sequences
8886

8987
df_list=[]
90-
for i in range(len(seqs)):
88+
for i,cds in enumerate(seqs):
9189
ID = headers[i].split(' ')[0]
9290
if len(cds)%3 !=0:
9391
ID = headers[i].split(' ')[0]
@@ -108,5 +106,8 @@ def compute_rscu_weights(df_codcnt:pd.DataFrame)->pd.DataFrame:
108106
##add a gene information column
109107
r3['SeqID']= rscu['SeqID'].values[0]
110108
r3['Length']=rscu['Length'].values[0]
109+
#reorder columns so seqID and length are first
110+
cols = r3.columns.tolist()
111+
r3 = r3[cols[-2:]+cols[:-2]]
111112
comb.append(r3)
112113
pd.concat(comb,axis=0).reset_index(drop=True).to_csv(args.out+'_rscu.csv',index=False)
32 KB
Binary file not shown.

Compute_RSCU_tw/__main__.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ def get_cod_freq(headers:list, seqs:list):
4545
non_deg=['AUG', "UAA","UAG", "UGA", "UGG" ]
4646
codon_count=dict()
4747
codon_count = {codon: 0 for codon in codon_aa if codon not in non_deg }
48-
4948
for i,cds in enumerate(seqs):
5049
if len(cds)%3 !=0:
5150
ID = headers[i].split(' ')[0]
@@ -88,13 +87,12 @@ def compute_rscu_weights(df_codcount:pd.DataFrame)->pd.DataFrame:
8887

8988
args=parser.parse_args()
9089

91-
9290
headers,seqs=fix_fasta.fix_fasta(args.CDS) ##formats fasta into csv of sequences
9391
df_codcount = get_cod_freq(headers,seqs) ##computes absolute codon frequencies
9492
rscu = compute_rscu_weights(df_codcount) ##computes RSCU and adaptive weights
9593

9694
#save the file
97-
rscu.to_csv('{}.rscu'.format(args.out), index=False)
95+
rscu.to_csv('{}.rscu.csv'.format(args.out), index=False)
9896

9997

10098

per_gene_absolute_codon_counts.csv

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
Codon,Obs_Freq,Amino_acid,Length,SeqID
2+
UUU,10,Phe,2166,>FBpp0423064
3+
UUC,8,Phe,2166,>FBpp0423064
4+
UCU,8,Ser4,2166,>FBpp0423064
5+
UCC,10,Ser4,2166,>FBpp0423064
6+
UCA,11,Ser4,2166,>FBpp0423064
7+
UCG,11,Ser4,2166,>FBpp0423064
8+
AGU,17,Ser2,2166,>FBpp0423064
9+
AGC,13,Ser2,2166,>FBpp0423064
10+
CUU,6,Leu4,2166,>FBpp0423064
11+
CUC,7,Leu4,2166,>FBpp0423064
12+
CUA,10,Leu4,2166,>FBpp0423064
13+
CUG,7,Leu4,2166,>FBpp0423064
14+
UUA,7,Leu2,2166,>FBpp0423064
15+
UUG,19,Leu2,2166,>FBpp0423064
16+
UAU,11,Tyr,2166,>FBpp0423064
17+
UAC,3,Tyr,2166,>FBpp0423064
18+
UGU,7,Cys,2166,>FBpp0423064
19+
UGC,3,Cys,2166,>FBpp0423064
20+
CGU,4,Arg4,2166,>FBpp0423064
21+
CGC,6,Arg4,2166,>FBpp0423064
22+
CGA,5,Arg4,2166,>FBpp0423064
23+
CGG,4,Arg4,2166,>FBpp0423064
24+
AGA,13,Arg2,2166,>FBpp0423064
25+
AGG,9,Arg2,2166,>FBpp0423064
26+
CCU,7,Pro,2166,>FBpp0423064
27+
CCC,14,Pro,2166,>FBpp0423064
28+
CCA,22,Pro,2166,>FBpp0423064
29+
CCG,7,Pro,2166,>FBpp0423064
30+
CAU,7,His,2166,>FBpp0423064
31+
CAC,4,His,2166,>FBpp0423064
32+
CAA,16,Gln,2166,>FBpp0423064
33+
CAG,18,Gln,2166,>FBpp0423064
34+
AUU,14,Ile,2166,>FBpp0423064
35+
AUC,13,Ile,2166,>FBpp0423064
36+
AUA,15,Ile,2166,>FBpp0423064
37+
ACU,9,Thr,2166,>FBpp0423064
38+
ACC,10,Thr,2166,>FBpp0423064
39+
ACA,9,Thr,2166,>FBpp0423064
40+
ACG,6,Thr,2166,>FBpp0423064
41+
AAU,31,Asn,2166,>FBpp0423064
42+
AAC,14,Asn,2166,>FBpp0423064
43+
AAA,41,Lys,2166,>FBpp0423064
44+
AAG,38,Lys,2166,>FBpp0423064
45+
GUU,11,Val,2166,>FBpp0423064
46+
GUC,12,Val,2166,>FBpp0423064
47+
GUA,7,Val,2166,>FBpp0423064
48+
GUG,8,Val,2166,>FBpp0423064
49+
GCU,11,Ala,2166,>FBpp0423064
50+
GCC,12,Ala,2166,>FBpp0423064
51+
GCA,8,Ala,2166,>FBpp0423064
52+
GCG,4,Ala,2166,>FBpp0423064
53+
GAU,31,Asp,2166,>FBpp0423064
54+
GAC,7,Asp,2166,>FBpp0423064
55+
GAA,36,Glu,2166,>FBpp0423064
56+
GAG,18,Glu,2166,>FBpp0423064
57+
GGU,9,Gly,2166,>FBpp0423064
58+
GGC,7,Gly,2166,>FBpp0423064
59+
GGA,10,Gly,2166,>FBpp0423064
60+
GGG,2,Gly,2166,>FBpp0423064

0 commit comments

Comments
 (0)