File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -302,14 +302,16 @@ define([
302302 // Click import library
303303 $ ( this . wrapSelector ( '#popupImport' ) ) . on ( 'click' , function ( ) {
304304 // add import codes
305- var code = that . generateImportCode ( ) ;
306- // create block and run it
307- $ ( '#vp_wrapper' ) . trigger ( {
308- type : 'create_option_page' ,
309- blockType : 'block' ,
310- menuId : 'lgExe_code' ,
311- menuState : { taskState : { code : code } } ,
312- afterAction : 'run'
305+ var codes = that . generateImportCode ( ) ;
306+ codes && codes . forEach ( code => {
307+ // create block and run it
308+ $ ( '#vp_wrapper' ) . trigger ( {
309+ type : 'create_option_page' ,
310+ blockType : 'block' ,
311+ menuId : 'lgExe_code' ,
312+ menuState : { taskState : { code : code } } ,
313+ afterAction : 'run'
314+ } ) ;
313315 } ) ;
314316 } ) ;
315317
@@ -598,7 +600,7 @@ define([
598600
599601 generateImportCode ( ) {
600602 /** Implementation needed - Generated on clicking Import Library button */
601- return '' ;
603+ return [ ] ;
602604 }
603605
604606 generateCode ( ) {
Original file line number Diff line number Diff line change @@ -30,6 +30,37 @@ import fitz
3030import nltk
3131nltk.download('punkt')` ;
3232
33+ const PDF_FUNC = `def vp_pdf_get_sentence(fname_lst):
34+ '''
35+ Get sentence from pdf file by PyMuPDF
36+ '''
37+ df = pd.DataFrame()
38+ for fname in fname_lst:
39+ if fname.split('.')[-1] != 'pdf': continue
40+ try:
41+ doc = fitz.open(fname)
42+ sentence_lst = []
43+ for page in doc:
44+ block_lst = page.get_text('blocks')
45+
46+ text_lst = [block[4] for block in block_lst if block[6] == 0]
47+ text = '\\n'.join(text_lst)
48+
49+ sentence_lst.extend([sentence for sentence in nltk.sent_tokenize(text)])
50+
51+ doc.close()
52+ except Exception as e:
53+ print(e)
54+ continue
55+
56+ df_doc = pd.DataFrame({
57+ 'fname': fname.split('/')[-1],
58+ 'sentence': sentence_lst
59+ })
60+ df = pd.concat([df,df_doc])
61+
62+ return df.reset_index().drop('index', axis=1)` ;
63+
3364 const PDF_CMD = 'df = vp_pdf_get_sentence(pdf_lst)\ndf'
3465 /**
3566 * PDF
@@ -93,7 +124,10 @@ nltk.download('punkt')`;
93124 }
94125
95126 generateImportCode ( ) {
96- return PDF_IMPORT ;
127+ return [
128+ PDF_IMPORT ,
129+ PDF_FUNC
130+ ] ;
97131 }
98132
99133 generateCode ( ) {
Original file line number Diff line number Diff line change @@ -147,7 +147,9 @@ define([
147147 }
148148
149149 generateImportCode ( ) {
150- return 'from pandas_profiling import ProfileReport' ;
150+ return [
151+ 'from pandas_profiling import ProfileReport'
152+ ] ;
151153 }
152154
153155 generateCode ( ) {
Original file line number Diff line number Diff line change @@ -238,7 +238,7 @@ define([
238238 }
239239
240240 generateImportCode ( ) {
241- return 'from sklearn import metrics' ;
241+ return [ 'from sklearn import metrics' ] ;
242242 }
243243
244244 generateCode ( ) {
Original file line number Diff line number Diff line change @@ -775,7 +775,7 @@ define([
775775 }
776776 code . append ( "rcParams['axes.unicode_minus'] = False" ) ;
777777
778- return code . toString ( ) ;
778+ return [ code . toString ( ) ] ;
779779 }
780780
781781
Original file line number Diff line number Diff line change @@ -114,7 +114,7 @@ define([
114114 var code = new com_String ( ) ;
115115 code . appendLine ( 'import matplotlib.pyplot as plt' ) ;
116116 code . append ( 'import seaborn as sns' ) ;
117- return code . toString ( ) ;
117+ return [ code . toString ( ) ] ;
118118 }
119119
120120 generateCode ( ) {
Original file line number Diff line number Diff line change @@ -507,7 +507,7 @@ define([
507507 code . append ( "rcParams['axes.unicode_minus'] = False" ) ;
508508 }
509509
510- return code . toString ( ) ;
510+ return [ code . toString ( ) ] ;
511511 }
512512
513513 generateCode ( preview = false ) {
You can’t perform that action at this time.
0 commit comments