1010import fitz
1111import nltk
1212nltk .download ('punkt' )
13+
1314######
1415# Visual Python: Data Analysis > PDF
1516######
@@ -43,6 +44,7 @@ def vp_pdf_get_sentence(fname_lst):
4344 df = _vp_pd .concat ([df ,df_doc ])
4445
4546 return df .reset_index ().drop ('index' , axis = 1 )
47+
4648######
4749# Visual Python: Data Analysis > Frame
4850######
@@ -63,6 +65,7 @@ def vp_drop_outlier(df, col, weight=1.5):
6365 df_res = df .drop (outlier_index ).copy ()
6466
6567 return df_res
68+
6669######
6770# Visual Python: Machine Learning > Model Info
6871######
@@ -74,10 +77,12 @@ def vp_create_feature_importances(model, X_train=None, sort=False):
7477
7578 df_i = _vp_pd .DataFrame (model .feature_importances_ , index = feature_names , columns = ['Feature_importance' ])
7679 df_i ['Percentage' ] = 100 * (df_i ['Feature_importance' ] / df_i ['Feature_importance' ].max ())
77- if sort : df_i .sort_values (by = 'Feature_importance' , ascending = False , inplace = True )
80+ if sort :
81+ df_i .sort_values (by = 'Feature_importance' , ascending = False , inplace = True )
7882 df_i = df_i .round (2 )
7983
8084 return df_i
85+
8186######
8287# Visual Python: Machine Learning > Model Info
8388######
@@ -91,10 +96,13 @@ def vp_plot_feature_importances(model, X_train=None, sort=False, top_count=0):
9196 df_i ['Percentage' ].sort_values ().plot (kind = 'barh' )
9297 else :
9398 df_i ['Percentage' ].plot (kind = 'barh' )
99+
94100 _vp_plt .xlabel ('Feature importance Percentage' )
95101 _vp_plt .ylabel ('Features' )
96-
97102 _vp_plt .show ()
103+
104+ return
105+
98106######
99107# Visual Python: Visualization > Seaborn
100108######
@@ -134,4 +142,6 @@ def _single(ax):
134142 for idx , ax in _vp_np .ndenumerate (axs ):
135143 _single (ax )
136144 else :
137- _single (axs )
145+ _single (axs )
146+
147+ return
0 commit comments