@@ -74,24 +74,36 @@ define([
7474 code : "pd.DataFrame({'Null Count': ${data}.isnull().sum(), 'Non-Null Count': ${data}.notnull().sum()})" , dtype : [ 'DataFrame' , 'Series' ] , toframe : true } ,
7575 // { id: 'duplicates', label: 'Duplicated', code: '${data}.duplicated()', dtype: ['DataFrame', 'Series'] },
7676 { id : 'duplicates' , label : 'Duplicated' , code : "_duplicated = ([${data}.duplicated().sum()] + [${data}[col].duplicated().sum() for col in ${data}.columns])\
77- \n_duplicated_df = pd.DataFrame({\
78- \n 'Rows':[len(${data})]*len(_duplicated),\
79- \n 'Unique':[len(${data}) - dups for dups in _duplicated],\
80- \n 'Duplicated': _duplicated,\
81- \n 'Duplicated by': ['All columns'] + ${data}.columns.to_list()\
82- \n}, index=['Combination']+${data}.columns.to_list())\
83- \n_duplicated_df", dtype : [ 'DataFrame' , 'Series' ] , toframe : true } ,
77+ \n_duplicated_df = pd.DataFrame({\
78+ \n 'Rows':[len(${data})]*len(_duplicated),\
79+ \n 'Unique':[len(${data}) - dups for dups in _duplicated],\
80+ \n 'Duplicated': _duplicated,\
81+ \n 'Duplicated by': ['All columns'] + ${data}.columns.to_list()\
82+ \n}, index=['Combination']+${data}.columns.to_list())\
83+ \n_duplicated_df" , dtype : [ 'DataFrame' , 'Series' ] , toframe : true } ,
8484 { id : 'unique' , label : 'Unique' , code : '${data}.unique()' , dtype : [ 'Series' ] } ,
85- { id : 'value_counts' , label : 'Value counts' , code : "_value_counts_dict = {}\
86- \nfor col in ${data}.columns:\
87- \n if pd.api.types.is_numeric_dtype(${data}[col]):\
88- \n _value_counts = ${data}[col].value_counts(bins=10, sort=False)\
89- \n _value_counts_dict[(col, 'bins')] = list(_value_counts.index) + ['']*(10 - len(_value_counts))\
90- \n else:\
91- \n _value_counts = ${data}[col].value_counts()\
92- \n _value_counts_dict[(col, 'category')] = list(_value_counts.index) + ['']*(10 - len(_value_counts))\
93- \n _value_counts_dict[(col, 'count')] = list(_value_counts.values) + ['']*(10 - len(_value_counts))\
94- \npd.DataFrame(_value_counts_dict)" , dtype : [ 'DataFrame' , 'Series' ] , toframe : true } ,
85+ { id : 'value_counts' , label : 'Value counts' ,
86+ // code: "_value_counts_dict = {}\
87+ // \nfor col in ${data}.columns:\
88+ // \n if pd.api.types.is_numeric_dtype(${data}[col]):\
89+ // \n _value_counts = ${data}[col].value_counts(bins=10, sort=False)\
90+ // \n _value_counts_dict[(col, 'bins')] = list(_value_counts.index) + ['']*(10 - len(_value_counts))\
91+ // \n else:\
92+ // \n _value_counts = ${data}[col].value_counts()\
93+ // \n _value_counts_dict[(col, 'category')] = list(_value_counts.index) + ['']*(10 - len(_value_counts))\
94+ // \n _value_counts_dict[(col, 'count')] = list(_value_counts.values) + ['']*(10 - len(_value_counts))\
95+ // \npd.DataFrame(_value_counts_dict)",
96+ code : "_dfr = pd.DataFrame()\
97+ \nfor col in ${data}.columns:\
98+ \n if pd.api.types.is_numeric_dtype(${data}[col]) and ${data}[col].value_counts().size > 10:\
99+ \n _value_counts = ${data}[col].value_counts(bins=10, sort=False)\
100+ \n _dfr = pd.concat([_dfr, pd.DataFrame({(col,'bins'): _value_counts.index})], axis=1)\
101+ \n else:\
102+ \n _value_counts = ${data}[col].value_counts()\
103+ \n _dfr = pd.concat([_dfr, pd.DataFrame({(col,'category'): _value_counts.index})], axis=1)\
104+ \n _dfr = pd.concat([_dfr, pd.DataFrame({(col,'count'): _value_counts.values})], axis=1)\
105+ \n_dfr.replace(np.nan,'')" ,
106+ dtype : [ 'DataFrame' , 'Series' ] , toframe : true } ,
95107 ]
96108 } ,
97109 {
0 commit comments