@@ -41,7 +41,9 @@ class calls the ``fit`` method of each sub-estimator on random samples
4141
4242from __future__ import division
4343
44+ import warnings
4445from warnings import warn
46+
4547from abc import ABCMeta , abstractmethod
4648
4749import numpy as np
@@ -89,7 +91,11 @@ def _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees,
8991 curr_sample_weight *= sample_counts
9092
9193 if class_weight == 'subsample' :
92- curr_sample_weight *= compute_sample_weight ('auto' , y , indices )
94+ with warnings .catch_warnings ():
95+ warnings .simplefilter ('ignore' , DeprecationWarning )
96+ curr_sample_weight *= compute_sample_weight ('auto' , y , indices )
97+ elif class_weight == 'balanced_subsample' :
98+ curr_sample_weight *= compute_sample_weight ('balanced' , y , indices )
9399
94100 tree .fit (X , y , sample_weight = curr_sample_weight , check_input = False )
95101
@@ -414,30 +420,40 @@ def _validate_y_class_weight(self, y):
414420 self .n_classes_ .append (classes_k .shape [0 ])
415421
416422 if self .class_weight is not None :
417- valid_presets = ('auto' , 'subsample' )
423+ valid_presets = ('auto' , 'balanced' , 'balanced_subsample' , ' subsample' , 'auto ' )
418424 if isinstance (self .class_weight , six .string_types ):
419425 if self .class_weight not in valid_presets :
420426 raise ValueError ('Valid presets for class_weight include '
421- '"auto " and "subsample ". Given "%s".'
427+ '"balanced " and "balanced_subsample ". Given "%s".'
422428 % self .class_weight )
429+ if self .class_weight == "subsample" :
430+ warn ("class_weight='subsample' is deprecated and will be removed in 0.18."
431+ " It was replaced by class_weight='balanced_subsample' "
432+ "using the balanced strategy." , DeprecationWarning )
423433 if self .warm_start :
424- warn ('class_weight presets "auto " or "subsample " are '
434+ warn ('class_weight presets "balanced " or "balanced_subsample " are '
425435 'not recommended for warm_start if the fitted data '
426436 'differs from the full dataset. In order to use '
427- '"auto " weights, use compute_class_weight("auto ", '
437+ '"balanced " weights, use compute_class_weight("balanced ", '
428438 'classes, y). In place of y you can use a large '
429439 'enough sample of the full training set target to '
430440 'properly estimate the class frequency '
431441 'distributions. Pass the resulting weights as the '
432442 'class_weight parameter.' )
433443
434- if self .class_weight != 'subsample' or not self .bootstrap :
444+ if (self .class_weight not in ['subsample' , 'balanced_subsample' ] or
445+ not self .bootstrap ):
435446 if self .class_weight == 'subsample' :
436447 class_weight = 'auto'
448+ elif self .class_weight == "balanced_subsample" :
449+ class_weight = "balanced"
437450 else :
438451 class_weight = self .class_weight
439- expanded_class_weight = compute_sample_weight (class_weight ,
440- y_original )
452+ with warnings .catch_warnings ():
453+ if class_weight == "auto" :
454+ warnings .simplefilter ('ignore' , DeprecationWarning )
455+ expanded_class_weight = compute_sample_weight (class_weight ,
456+ y_original )
441457
442458 return y , expanded_class_weight
443459
@@ -758,17 +774,18 @@ class RandomForestClassifier(ForestClassifier):
758774 and add more estimators to the ensemble, otherwise, just fit a whole
759775 new forest.
760776
761- class_weight : dict, list of dicts, "auto ", "subsample " or None, optional
777+ class_weight : dict, list of dicts, "balanced ", "balanced_subsample " or None, optional
762778
763779 Weights associated with classes in the form ``{class_label: weight}``.
764780 If not given, all classes are supposed to have weight one. For
765781 multi-output problems, a list of dicts can be provided in the same
766782 order as the columns of y.
767783
768- The "auto" mode uses the values of y to automatically adjust
769- weights inversely proportional to class frequencies in the input data.
784+ The "balanced" mode uses the values of y to automatically adjust
785+ weights inversely proportional to class frequencies in the input data
786+ as ``n_samples / (n_classes * np.bincount(y))``
770787
771- The "subsample " mode is the same as "auto " except that weights are
788+ The "balanced_subsample " mode is the same as "balanced " except that weights are
772789 computed based on the bootstrap sample for every tree grown.
773790
774791 For multi-output, the weights of each column of y will be multiplied.
@@ -1100,17 +1117,18 @@ class ExtraTreesClassifier(ForestClassifier):
11001117 and add more estimators to the ensemble, otherwise, just fit a whole
11011118 new forest.
11021119
1103- class_weight : dict, list of dicts, "auto ", "subsample " or None, optional
1120+ class_weight : dict, list of dicts, "balanced ", "balanced_subsample " or None, optional
11041121
11051122 Weights associated with classes in the form ``{class_label: weight}``.
11061123 If not given, all classes are supposed to have weight one. For
11071124 multi-output problems, a list of dicts can be provided in the same
11081125 order as the columns of y.
11091126
1110- The "auto" mode uses the values of y to automatically adjust
1111- weights inversely proportional to class frequencies in the input data.
1127+ The "balanced" mode uses the values of y to automatically adjust
1128+ weights inversely proportional to class frequencies in the input data
1129+ as ``n_samples / (n_classes * np.bincount(y))``
11121130
1113- The "subsample " mode is the same as "auto " except that weights are
1131+ The "balanced_subsample " mode is the same as "balanced " except that weights are
11141132 computed based on the bootstrap sample for every tree grown.
11151133
11161134 For multi-output, the weights of each column of y will be multiplied.
0 commit comments