Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions python/pandasCommand.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,14 @@ def _vp_get_columns_list(df):
cInfo['value'] = "'{}'".format(c)
elif type(c).__name__ == 'Timestamp':
cInfo['value'] = str(c)

# category - iopub data rate limit issue...
cInfo['category'] = []
if str(df[c].dtype) == 'object':
uniqValues = df[c].dropna().unique()
if len(uniqValues) <= 20:
cInfo['category'] = [{ "value": "'{}'".format(u) if type(u) == str else u, "label": u } for u in uniqValues]
else:
cInfo['category'] = []
else:
cInfo['category'] = []

colList.append(cInfo)
return colList

Expand Down
16 changes: 13 additions & 3 deletions python/userCommand.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import fitz
import nltk
nltk.download('punkt')

######
# Visual Python: Data Analysis > PDF
######
Expand Down Expand Up @@ -43,6 +44,7 @@ def vp_pdf_get_sentence(fname_lst):
df = _vp_pd.concat([df,df_doc])

return df.reset_index().drop('index', axis=1)

######
# Visual Python: Data Analysis > Frame
######
Expand All @@ -63,6 +65,7 @@ def vp_drop_outlier(df, col, weight=1.5):
df_res = df.drop(outlier_index).copy()

return df_res

######
# Visual Python: Machine Learning > Model Info
######
Expand All @@ -74,10 +77,12 @@ def vp_create_feature_importances(model, X_train=None, sort=False):

df_i = _vp_pd.DataFrame(model.feature_importances_, index=feature_names, columns=['Feature_importance'])
df_i['Percentage'] = 100 * (df_i['Feature_importance'] / df_i['Feature_importance'].max())
if sort: df_i.sort_values(by='Feature_importance', ascending=False, inplace=True)
if sort:
df_i.sort_values(by='Feature_importance', ascending=False, inplace=True)
df_i = df_i.round(2)

return df_i

######
# Visual Python: Machine Learning > Model Info
######
Expand All @@ -91,10 +96,13 @@ def vp_plot_feature_importances(model, X_train=None, sort=False, top_count=0):
df_i['Percentage'].sort_values().plot(kind='barh')
else:
df_i['Percentage'].plot(kind='barh')

_vp_plt.xlabel('Feature importance Percentage')
_vp_plt.ylabel('Features')

_vp_plt.show()

return

######
# Visual Python: Visualization > Seaborn
######
Expand Down Expand Up @@ -134,4 +142,6 @@ def _single(ax):
for idx, ax in _vp_np.ndenumerate(axs):
_single(ax)
else:
_single(axs)
_single(axs)

return
13 changes: 5 additions & 8 deletions python/variableCommand.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import numpy as _vp_np
import random as _vp_rd
"""
Search Variables
"""
Expand Down Expand Up @@ -77,8 +79,6 @@ def _vp_get_profiling_list():

return result

import numpy as _vp_np
import random as _vp_rd
def _vp_sample(data, sample_cnt):
"""
Sampling data
Expand All @@ -94,16 +94,13 @@ def _vp_sample(data, sample_cnt):
return data[_vp_np.random.choice(data.shape[0], sample_cnt, replace=False)]
elif dataType == 'list':
return _vp_rd.choices(data, k=sample_cnt)

return data

def _vp_check_module_loaded(fname_list):
"""
Check if this module is loaded
"""
result = []
for fname in fname_list:
if fname in globals():
result.append(True)
else:
result.append(False)
result = [True if fname in globals() else False for fname in fname_list]

return result
10 changes: 4 additions & 6 deletions python/visualizationCommand.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,10 @@ def _vp_seaborn_show_values(axs, precision=1, space=0.01):
pstr = '{:.' + str(precision) + 'f}'

def _single(ax):
# check orient
# check orient / if 0
orient = 'v'
if len(ax.patches) == 1:
# check if 0
if ax.patches[0].get_x() == 0:
orient = 'h'
if len(ax.patches) == 1 and ax.patches[0].get_x() == 0:
orient = 'h'
else:
# compare 0, 1 patches
p0 = ax.patches[0]
Expand All @@ -36,7 +34,7 @@ def _single(ax):
ax.text(_x, _y, value, ha='left')

if isinstance(axs, _vp_np.ndarray):
for idx, ax in _vp_np.ndenumerate(axs):
for _, ax in _vp_np.ndenumerate(axs):
_single(ax)
else:
_single(axs)