Skip to content

Commit da3065a

Browse files
committed
fix merge
1 parent 6961836 commit da3065a

File tree

10 files changed

+21
-120
lines changed

10 files changed

+21
-120
lines changed

LeafletSC.egg-info/PKG-INFO

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Metadata-Version: 2.1
22
Name: LeafletSC
3-
Version: 0.2.11
3+
Version: 0.2.12
44
Summary: Alternative splicing quantification in single cells with Leaflet
55
Home-page: https://github.com/daklab/Leaflet
66
Author: Karin Isaev, Columbia University and NYGC
161 Bytes
Binary file not shown.
Binary file not shown.

LeafletSC/clustering/find_intron_clusters.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,9 @@ def mapping_juncs_exons(juncs_gr, gtf_exons_gr, singletons):
302302
return juncs_gr, juncs_coords_unique, clusters
303303

304304
def visualize_junctions(dat, junc_id):
305+
306+
#
307+
305308
# Filter data for the specific junction ID
306309
dat = dat[dat.Cluster == dat[dat.junction_id == junc_id].Cluster.values[0]]
307310

@@ -438,7 +441,7 @@ def main(junc_files, gtf_file, output_file, sequencing_type, junc_bed_file, thre
438441
junc_scores_all = refine_clusters(clust_info)
439442
junc_scores_all = junc_scores_all[junc_scores_all.min_usage < threshold_inc]
440443
# add 5ss and 3ss usatio of each junction to all_juncs
441-
all_juncs = all_juncs.merge(clust_info[['junction_id', 'total_5ss_counts', 'total_3ss_counts', "5SS_usage", "3SS_usage"]], on='junction_id')
444+
all_juncs = all_juncs.merge(junc_scores_all[['junction_id', 'total_5ss_counts', 'total_3ss_counts', "5SS_usage", "3SS_usage"]], on='junction_id')
442445

443446
# remove junctions that are in junc_scores_all from juncs_gr, clusters, all_juncs and juncs_coords_unique
444447
juncs_gr = juncs_gr[~juncs_gr.junction_id.isin(junc_scores_all.junction_id)]

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,14 @@ The full method can be found in our [paper](https://www.biorxiv.org/content/10.1
7272
}
7373
```
7474

75+
### Potential errors:
76+
77+
If you have any errors with the package Polars, please ensure you install polars-lts-cpu:
78+
79+
```
80+
pip install polars-lts-cpu
81+
```
82+
7583
### To-do:
7684

7785
1. Add documentation and some tests for how to run the simulation code

Tutorials/01_run_intron_clustering.ipynb

Lines changed: 7 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,9 @@
99
},
1010
{
1111
"cell_type": "code",
12-
"execution_count": 21,
12+
"execution_count": null,
1313
"metadata": {},
14-
"outputs": [
15-
{
16-
"ename": "SyntaxError",
17-
"evalue": "invalid syntax (712172458.py, line 2)",
18-
"output_type": "error",
19-
"traceback": [
20-
"\u001b[0;36m Cell \u001b[0;32mIn[21], line 2\u001b[0;36m\u001b[0m\n\u001b[0;31m import LeafletSC.0.2.11\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
21-
]
22-
}
23-
],
14+
"outputs": [],
2415
"source": [
2516
"# Load LeafletSC \n",
2617
"import LeafletSC\n",
@@ -47,7 +38,7 @@
4738
},
4839
{
4940
"cell_type": "code",
50-
"execution_count": 16,
41+
"execution_count": null,
5142
"metadata": {},
5243
"outputs": [],
5344
"source": [
@@ -85,76 +76,9 @@
8576
},
8677
{
8778
"cell_type": "code",
88-
"execution_count": 17,
79+
"execution_count": null,
8980
"metadata": {},
90-
"outputs": [
91-
{
92-
"name": "stdout",
93-
"output_type": "stream",
94-
"text": [
95-
"Loading files obtained from single_cell sequencing\n",
96-
"Reading in junction files from /gpfs/commons/home/kisaev/LeafletSC/data/raw/junctions\n",
97-
"The number of junction files to be processed is 2\n"
98-
]
99-
},
100-
{
101-
"name": "stderr",
102-
"output_type": "stream",
103-
"text": [
104-
"100%|██████████| 2/2 [00:01<00:00, 1.15it/s]\n"
105-
]
106-
},
107-
{
108-
"name": "stdout",
109-
"output_type": "stream",
110-
"text": [
111-
"The gtf file you provided is /gpfs/commons/groups/knowles_lab/Karin/genome_files/gencode.v43.basic.annotation.gtf\n",
112-
"Reading the gtf may take a while depending on the size of your gtf file\n"
113-
]
114-
},
115-
{
116-
"name": "stderr",
117-
"output_type": "stream",
118-
"text": [
119-
"INFO:root:Extracted GTF attributes: ['gene_id', 'gene_type', 'gene_name', 'level', 'tag', 'transcript_id', 'transcript_type', 'transcript_name', 'transcript_support_level', 'havana_transcript', 'exon_number', 'exon_id', 'hgnc_id', 'havana_gene', 'ont', 'protein_id', 'ccdsid', 'artif_dupl']\n"
120-
]
121-
},
122-
{
123-
"name": "stdout",
124-
"output_type": "stream",
125-
"text": [
126-
"Reading gtf file took 124.31 seconds\n",
127-
"+++++++++++++++++++++++++++++++++++++++++++++++++++++++\n",
128-
"The number of unique exons is 411865\n",
129-
"The number of unique transcript ids is 115526\n",
130-
"The number of unique gene ids is 62668\n",
131-
"+++++++++++++++++++++++++++++++++++++++++++++++++++++++\n",
132-
"Done extracting exons from gtf file\n",
133-
"Cleaning up 'chrom' column\n",
134-
"Making gr object from all junctions across all cell types\n",
135-
"The number of junctions prior to assessing distance to exons is 12876\n",
136-
"Annotating junctions with known exons based on input gtf file\n",
137-
"The number of junctions after assessing distance to exons is 11689\n",
138-
"Clustering intron splicing events by gene_id\n",
139-
"The number of clusters after clustering by gene_id is 11189\n",
140-
"The number of clusters after clustering by gene_id is 11188\n",
141-
"The number of junctions after removing singletons is 11688\n",
142-
"The number of clusters after filtering for shared splice sites is 214\n",
143-
"The number of junctions after filtering for shared splice sites is 703\n",
144-
"Refining intron clusters to account for junction usage ratio threshold, this may take a while...\n",
145-
"Done refining clusters!\n",
146-
"The number of clusters after removing low confidence junctions is 214\n",
147-
"Reclustering intron splicing events after low confidence junction removal\n",
148-
"The number of clusters after removing singletons is 118\n",
149-
"Confirming that junctions in each cluster share splice sites\n",
150-
"The number of clusters after filtering for shared splice sites is 117\n",
151-
"The number of clusters to be finally evaluated is 117\n",
152-
"The number of junctions to be finally evaluated is 370\n",
153-
"You can find the output file here: /gpfs/commons/home/kisaev/LeafletSC/data/processed/test_intron_clusters_50_500000_2_20240312_single_cell.gz\n",
154-
"Finished obtaining intron cluster files!\n"
155-
]
156-
}
157-
],
81+
"outputs": [],
15882
"source": [
15983
"all_juncs_df = find_intron_clusters(junc_files=junc_files, gtf_file=gtf_file, output_file=output_file, \n",
16084
" sequencing_type=sequencing_type, junc_bed_file=junc_bed_file, \n",
@@ -167,43 +91,9 @@
16791
},
16892
{
16993
"cell_type": "code",
170-
"execution_count": 19,
94+
"execution_count": null,
17195
"metadata": {},
172-
"outputs": [
173-
{
174-
"data": {
175-
"text/plain": [
176-
"['junction_id',\n",
177-
" 'counts_total',\n",
178-
" 'chrom',\n",
179-
" 'chromStart',\n",
180-
" 'chromEnd',\n",
181-
" 'name',\n",
182-
" 'score',\n",
183-
" 'strand',\n",
184-
" 'thickStart',\n",
185-
" 'thickEnd',\n",
186-
" 'itemRgb',\n",
187-
" 'blockCount',\n",
188-
" 'blockSizes',\n",
189-
" 'blockStarts',\n",
190-
" 'num_cells_wjunc',\n",
191-
" 'cell_readcounts',\n",
192-
" 'file_name',\n",
193-
" 'cell_type',\n",
194-
" 'block_add_start',\n",
195-
" 'block_subtract_end',\n",
196-
" 'intron_length',\n",
197-
" 'Cluster',\n",
198-
" 'gene_id',\n",
199-
" 'Count']"
200-
]
201-
},
202-
"execution_count": 19,
203-
"metadata": {},
204-
"output_type": "execute_result"
205-
}
206-
],
96+
"outputs": [],
20797
"source": [
20898
"# visualize junctions in an intron cluster \n",
20999
"list(all_juncs_df)"

dist/LeafletSC-0.2.11.tar.gz

-25.3 KB
Binary file not shown.

dist/LeafletSC-0.2.12.tar.gz

25.3 KB
Binary file not shown.

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ def read_requirements():
77

88
setup(
99
name='LeafletSC',
10-
version='0.2.11',
10+
version='0.2.13',
1111
author='Karin Isaev, Columbia University and NYGC',
1212
author_email='ki2255@cumc.columbia.edu',
1313
description='Alternative splicing quantification in single cells with Leaflet',

0 commit comments

Comments
 (0)