This repository was archived by the owner on Jan 22, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgeneral_stats_function.py
More file actions
48 lines (40 loc) · 2.12 KB
/
general_stats_function.py
File metadata and controls
48 lines (40 loc) · 2.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
##################################
### Loading Necessary Packages ###
##################################
import pandas as pd
import yaml
from pathlib import Path
################################
### check general annotation ###
################################
def general_stats(bacannot_summary):
# iterate over available samples
for sample in bacannot_summary:
# load dir of samples' results
results_dir = bacannot_summary[sample]['results_dir']
# load annotation stats
general_results = yaml.safe_load(
Path(f"{results_dir}/annotation/{sample}.txt").read_text()
)
# load MLST
mlst_results = pd.read_csv(
f"{results_dir}/MLST/{sample}_mlst_analysis.txt",
sep='\t', header=None
)
# load refseq_masher
refseq_masher_results = pd.read_csv(
f"{results_dir}/refseq_masher/refseq_masher_results.txt",
sep='\t'
)
refseq_masher_results.sort_values(by='distance', ascending=True, inplace=True)
# save annotation stats
bacannot_summary[sample]['general_annotation'] = {}
bacannot_summary[sample]['general_annotation']['mlst'] = str(mlst_results[2].item()).replace('-', 'null')
bacannot_summary[sample]['general_annotation']['cds'] = general_results.get('CDS', 0)
bacannot_summary[sample]['general_annotation']['rrna'] = general_results.get('rRNA', 0)
bacannot_summary[sample]['general_annotation']['trna'] = general_results.get('tRNA', 0)
bacannot_summary[sample]['general_annotation']['tmrna'] = general_results.get('tmRNA', 0)
bacannot_summary[sample]['general_annotation']['closest_reference'] = {}
bacannot_summary[sample]['general_annotation']['closest_reference']['strain'] = refseq_masher_results.head(1)['top_taxonomy_name'].item()
bacannot_summary[sample]['general_annotation']['closest_reference']['distance'] = refseq_masher_results.head(1)['distance'].item()
bacannot_summary[sample]['general_annotation']['closest_reference']['accession'] = refseq_masher_results.head(1)['assembly_accession'].item()