|
9 | 9 | }, |
10 | 10 | { |
11 | 11 | "cell_type": "code", |
12 | | - "execution_count": 21, |
| 12 | + "execution_count": null, |
13 | 13 | "metadata": {}, |
14 | | - "outputs": [ |
15 | | - { |
16 | | - "ename": "SyntaxError", |
17 | | - "evalue": "invalid syntax (712172458.py, line 2)", |
18 | | - "output_type": "error", |
19 | | - "traceback": [ |
20 | | - "\u001b[0;36m Cell \u001b[0;32mIn[21], line 2\u001b[0;36m\u001b[0m\n\u001b[0;31m import LeafletSC.0.2.11\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" |
21 | | - ] |
22 | | - } |
23 | | - ], |
| 14 | + "outputs": [], |
24 | 15 | "source": [ |
25 | 16 | "# Load LeafletSC \n", |
26 | 17 | "import LeafletSC\n", |
|
47 | 38 | }, |
48 | 39 | { |
49 | 40 | "cell_type": "code", |
50 | | - "execution_count": 16, |
| 41 | + "execution_count": null, |
51 | 42 | "metadata": {}, |
52 | 43 | "outputs": [], |
53 | 44 | "source": [ |
|
85 | 76 | }, |
86 | 77 | { |
87 | 78 | "cell_type": "code", |
88 | | - "execution_count": 17, |
| 79 | + "execution_count": null, |
89 | 80 | "metadata": {}, |
90 | | - "outputs": [ |
91 | | - { |
92 | | - "name": "stdout", |
93 | | - "output_type": "stream", |
94 | | - "text": [ |
95 | | - "Loading files obtained from single_cell sequencing\n", |
96 | | - "Reading in junction files from /gpfs/commons/home/kisaev/LeafletSC/data/raw/junctions\n", |
97 | | - "The number of junction files to be processed is 2\n" |
98 | | - ] |
99 | | - }, |
100 | | - { |
101 | | - "name": "stderr", |
102 | | - "output_type": "stream", |
103 | | - "text": [ |
104 | | - "100%|██████████| 2/2 [00:01<00:00, 1.15it/s]\n" |
105 | | - ] |
106 | | - }, |
107 | | - { |
108 | | - "name": "stdout", |
109 | | - "output_type": "stream", |
110 | | - "text": [ |
111 | | - "The gtf file you provided is /gpfs/commons/groups/knowles_lab/Karin/genome_files/gencode.v43.basic.annotation.gtf\n", |
112 | | - "Reading the gtf may take a while depending on the size of your gtf file\n" |
113 | | - ] |
114 | | - }, |
115 | | - { |
116 | | - "name": "stderr", |
117 | | - "output_type": "stream", |
118 | | - "text": [ |
119 | | - "INFO:root:Extracted GTF attributes: ['gene_id', 'gene_type', 'gene_name', 'level', 'tag', 'transcript_id', 'transcript_type', 'transcript_name', 'transcript_support_level', 'havana_transcript', 'exon_number', 'exon_id', 'hgnc_id', 'havana_gene', 'ont', 'protein_id', 'ccdsid', 'artif_dupl']\n" |
120 | | - ] |
121 | | - }, |
122 | | - { |
123 | | - "name": "stdout", |
124 | | - "output_type": "stream", |
125 | | - "text": [ |
126 | | - "Reading gtf file took 124.31 seconds\n", |
127 | | - "+++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", |
128 | | - "The number of unique exons is 411865\n", |
129 | | - "The number of unique transcript ids is 115526\n", |
130 | | - "The number of unique gene ids is 62668\n", |
131 | | - "+++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", |
132 | | - "Done extracting exons from gtf file\n", |
133 | | - "Cleaning up 'chrom' column\n", |
134 | | - "Making gr object from all junctions across all cell types\n", |
135 | | - "The number of junctions prior to assessing distance to exons is 12876\n", |
136 | | - "Annotating junctions with known exons based on input gtf file\n", |
137 | | - "The number of junctions after assessing distance to exons is 11689\n", |
138 | | - "Clustering intron splicing events by gene_id\n", |
139 | | - "The number of clusters after clustering by gene_id is 11189\n", |
140 | | - "The number of clusters after clustering by gene_id is 11188\n", |
141 | | - "The number of junctions after removing singletons is 11688\n", |
142 | | - "The number of clusters after filtering for shared splice sites is 214\n", |
143 | | - "The number of junctions after filtering for shared splice sites is 703\n", |
144 | | - "Refining intron clusters to account for junction usage ratio threshold, this may take a while...\n", |
145 | | - "Done refining clusters!\n", |
146 | | - "The number of clusters after removing low confidence junctions is 214\n", |
147 | | - "Reclustering intron splicing events after low confidence junction removal\n", |
148 | | - "The number of clusters after removing singletons is 118\n", |
149 | | - "Confirming that junctions in each cluster share splice sites\n", |
150 | | - "The number of clusters after filtering for shared splice sites is 117\n", |
151 | | - "The number of clusters to be finally evaluated is 117\n", |
152 | | - "The number of junctions to be finally evaluated is 370\n", |
153 | | - "You can find the output file here: /gpfs/commons/home/kisaev/LeafletSC/data/processed/test_intron_clusters_50_500000_2_20240312_single_cell.gz\n", |
154 | | - "Finished obtaining intron cluster files!\n" |
155 | | - ] |
156 | | - } |
157 | | - ], |
| 81 | + "outputs": [], |
158 | 82 | "source": [ |
159 | 83 | "all_juncs_df = find_intron_clusters(junc_files=junc_files, gtf_file=gtf_file, output_file=output_file, \n", |
160 | 84 | " sequencing_type=sequencing_type, junc_bed_file=junc_bed_file, \n", |
|
167 | 91 | }, |
168 | 92 | { |
169 | 93 | "cell_type": "code", |
170 | | - "execution_count": 19, |
| 94 | + "execution_count": null, |
171 | 95 | "metadata": {}, |
172 | | - "outputs": [ |
173 | | - { |
174 | | - "data": { |
175 | | - "text/plain": [ |
176 | | - "['junction_id',\n", |
177 | | - " 'counts_total',\n", |
178 | | - " 'chrom',\n", |
179 | | - " 'chromStart',\n", |
180 | | - " 'chromEnd',\n", |
181 | | - " 'name',\n", |
182 | | - " 'score',\n", |
183 | | - " 'strand',\n", |
184 | | - " 'thickStart',\n", |
185 | | - " 'thickEnd',\n", |
186 | | - " 'itemRgb',\n", |
187 | | - " 'blockCount',\n", |
188 | | - " 'blockSizes',\n", |
189 | | - " 'blockStarts',\n", |
190 | | - " 'num_cells_wjunc',\n", |
191 | | - " 'cell_readcounts',\n", |
192 | | - " 'file_name',\n", |
193 | | - " 'cell_type',\n", |
194 | | - " 'block_add_start',\n", |
195 | | - " 'block_subtract_end',\n", |
196 | | - " 'intron_length',\n", |
197 | | - " 'Cluster',\n", |
198 | | - " 'gene_id',\n", |
199 | | - " 'Count']" |
200 | | - ] |
201 | | - }, |
202 | | - "execution_count": 19, |
203 | | - "metadata": {}, |
204 | | - "output_type": "execute_result" |
205 | | - } |
206 | | - ], |
| 96 | + "outputs": [], |
207 | 97 | "source": [ |
208 | 98 | "# visualize junctions in an intron cluster \n", |
209 | 99 | "list(all_juncs_df)" |
|
0 commit comments