Skip to content

Commit b62ea84

Browse files
author
minjk-bl
committed
Edit valuecounts output
1 parent 5fc1348 commit b62ea84

1 file changed

Lines changed: 29 additions & 17 deletions

File tree

visualpython/js/m_apps/Information.js

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -74,24 +74,36 @@ define([
7474
code: "pd.DataFrame({'Null Count': ${data}.isnull().sum(), 'Non-Null Count': ${data}.notnull().sum()})", dtype: ['DataFrame', 'Series'], toframe: true },
7575
// { id: 'duplicates', label: 'Duplicated', code: '${data}.duplicated()', dtype: ['DataFrame', 'Series'] },
7676
{ id: 'duplicates', label: 'Duplicated', code: "_duplicated = ([${data}.duplicated().sum()] + [${data}[col].duplicated().sum() for col in ${data}.columns])\
77-
\n_duplicated_df = pd.DataFrame({\
78-
\n 'Rows':[len(${data})]*len(_duplicated),\
79-
\n 'Unique':[len(${data}) - dups for dups in _duplicated],\
80-
\n 'Duplicated': _duplicated,\
81-
\n 'Duplicated by': ['All columns'] + ${data}.columns.to_list()\
82-
\n}, index=['Combination']+${data}.columns.to_list())\
83-
\n_duplicated_df", dtype: ['DataFrame', 'Series'], toframe: true },
77+
\n_duplicated_df = pd.DataFrame({\
78+
\n 'Rows':[len(${data})]*len(_duplicated),\
79+
\n 'Unique':[len(${data}) - dups for dups in _duplicated],\
80+
\n 'Duplicated': _duplicated,\
81+
\n 'Duplicated by': ['All columns'] + ${data}.columns.to_list()\
82+
\n}, index=['Combination']+${data}.columns.to_list())\
83+
\n_duplicated_df", dtype: ['DataFrame', 'Series'], toframe: true },
8484
{ id: 'unique', label: 'Unique', code: '${data}.unique()', dtype: ['Series'] },
85-
{ id: 'value_counts', label: 'Value counts', code: "_value_counts_dict = {}\
86-
\nfor col in ${data}.columns:\
87-
\n if pd.api.types.is_numeric_dtype(${data}[col]):\
88-
\n _value_counts = ${data}[col].value_counts(bins=10, sort=False)\
89-
\n _value_counts_dict[(col, 'bins')] = list(_value_counts.index) + ['']*(10 - len(_value_counts))\
90-
\n else:\
91-
\n _value_counts = ${data}[col].value_counts()\
92-
\n _value_counts_dict[(col, 'category')] = list(_value_counts.index) + ['']*(10 - len(_value_counts))\
93-
\n _value_counts_dict[(col, 'count')] = list(_value_counts.values) + ['']*(10 - len(_value_counts))\
94-
\npd.DataFrame(_value_counts_dict)", dtype: ['DataFrame', 'Series'], toframe: true },
85+
{ id: 'value_counts', label: 'Value counts',
86+
// code: "_value_counts_dict = {}\
87+
// \nfor col in ${data}.columns:\
88+
// \n if pd.api.types.is_numeric_dtype(${data}[col]):\
89+
// \n _value_counts = ${data}[col].value_counts(bins=10, sort=False)\
90+
// \n _value_counts_dict[(col, 'bins')] = list(_value_counts.index) + ['']*(10 - len(_value_counts))\
91+
// \n else:\
92+
// \n _value_counts = ${data}[col].value_counts()\
93+
// \n _value_counts_dict[(col, 'category')] = list(_value_counts.index) + ['']*(10 - len(_value_counts))\
94+
// \n _value_counts_dict[(col, 'count')] = list(_value_counts.values) + ['']*(10 - len(_value_counts))\
95+
// \npd.DataFrame(_value_counts_dict)",
96+
code: "_dfr = pd.DataFrame()\
97+
\nfor col in ${data}.columns:\
98+
\n if pd.api.types.is_numeric_dtype(${data}[col]) and ${data}[col].value_counts().size > 10:\
99+
\n _value_counts = ${data}[col].value_counts(bins=10, sort=False)\
100+
\n _dfr = pd.concat([_dfr, pd.DataFrame({(col,'bins'): _value_counts.index})], axis=1)\
101+
\n else:\
102+
\n _value_counts = ${data}[col].value_counts()\
103+
\n _dfr = pd.concat([_dfr, pd.DataFrame({(col,'category'): _value_counts.index})], axis=1)\
104+
\n _dfr = pd.concat([_dfr, pd.DataFrame({(col,'count'): _value_counts.values})], axis=1)\
105+
\n_dfr.replace(np.nan,'')",
106+
dtype: ['DataFrame', 'Series'], toframe: true },
95107
]
96108
},
97109
{

0 commit comments

Comments
 (0)