xuesj
diff --git a/‎doc/modules/classes.rst‎
Lines changed: 1 addition & 0 deletions b/‎doc/modules/classes.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/modules/feature_selection.rst‎
Lines changed: 37 additions & 13 deletions b/‎doc/modules/feature_selection.rst‎
Lines changed: 37 additions & 13 deletions
diff --git a/‎doc/whats_new.rst‎
Lines changed: 12 additions & 0 deletions b/‎doc/whats_new.rst‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎examples/ensemble/plot_feature_transformation.py‎
Lines changed: 4 additions & 3 deletions b/‎examples/ensemble/plot_feature_transformation.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎examples/ensemble/plot_random_forest_embedding.py‎
Lines changed: 4 additions & 1 deletion b/‎examples/ensemble/plot_random_forest_embedding.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎examples/feature_selection/plot_select_from_model_boston.py‎
Lines changed: 51 additions & 0 deletions b/‎examples/feature_selection/plot_select_from_model_boston.py‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎sklearn/ensemble/tests/test_forest.py‎
Lines changed: 8 additions & 3 deletions b/‎sklearn/ensemble/tests/test_forest.py‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎sklearn/ensemble/tests/test_gradient_boosting.py‎
Lines changed: 7 additions & 3 deletions b/‎sklearn/ensemble/tests/test_gradient_boosting.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎sklearn/feature_selection/__init__.py‎
Lines changed: 4 additions & 1 deletion b/‎sklearn/feature_selection/__init__.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎sklearn/feature_selection/base.py‎
Lines changed: 1 addition & 1 deletion b/‎sklearn/feature_selection/base.py‎
Lines changed: 1 addition & 1 deletion
@@ -463,6 +463,7 @@ From text
  feature_selection.SelectKBest
  feature_selection.SelectFpr
  feature_selection.SelectFdr
+ feature_selection.SelectFromModel
  feature_selection.SelectFwe
  feature_selection.RFE
  feature_selection.RFECV
 
@@ -131,33 +131,52 @@ number of features.
  elimination example with automatic tuning of the number of features
  selected with cross-validation.
 
+.. _select_from_model:
 
-.. _l1_feature_selection:
+Feature selection using SelectFromModel
+=======================================
+
+:class:`SelectFromModel` is a meta-transformer that can be used along with any
+estimator that has a ``coef_`` or ``feature_importances_`` attribute after fitting.
+The features are considered unimportant and removed, if the corresponding
+``coef_`` or ``feature_importances_`` values are below the provided
+``threshold`` parameter. Apart from specifying the threshold numerically,
+there are build-in heuristics for finding a threshold using a string argument.
+Available heuristics are "mean", "median" and float multiples of these like
+"0.1*mean".
+
+For examples on how it is to be used refer to the sections below.
+
+.. topic:: Examples
+
+ * :ref:`example_feature_selection_plot_select_from_model_boston.py`: Selecting the two
+ most important features from the Boston dataset without knowing the
+ threshold beforehand.
 
 L1-based feature selection
-==========================
+--------------------------
 
 .. currentmodule:: sklearn
 
-Selecting non-zero coefficients
----------------------------------
-
 :ref:`Linear models <linear_model>` penalized with the L1 norm have
 sparse solutions: many of their estimated coefficients are zero. When the goal
 is to reduce the dimensionality of the data to use with another classifier,
-they expose a ``transform`` method to select the non-zero coefficient. In
-particular, sparse estimators useful for this purpose are the
-:class:`linear_model.Lasso` for regression, and
+they can be used along with :class:`feature_selection.SelectFromModel`
+to select the non-zero coefficients. In particular, sparse estimators useful for
+this purpose are the :class:`linear_model.Lasso` for regression, and
 of :class:`linear_model.LogisticRegression` and :class:`svm.LinearSVC`
 for classification::
 
  >>> from sklearn.svm import LinearSVC
  >>> from sklearn.datasets import load_iris
+ >>> from sklearn.feature_selection import SelectFromModel
  >>> iris = load_iris()
  >>> X, y = iris.data, iris.target
  >>> X.shape
  (150, 4)
- >>> X_new = LinearSVC(C=0.01, penalty="l1", dual=False).fit_transform(X, y)
+ >>> lsvc = LinearSVC(C=0.01, penalty="l1", dual=False).fit(X, y)
+ >>> model = SelectFromModel(lsvc, prefit=True)
+ >>> X_new = model.transform(X)
  >>> X_new.shape
  (150, 3)
 
@@ -241,23 +260,27 @@ of features non zero.
  http://hal.inria.fr/hal-00354771/
 
 Tree-based feature selection
-============================
+----------------------------
 
 Tree-based estimators (see the :mod:`sklearn.tree` module and forest
 of trees in the :mod:`sklearn.ensemble` module) can be used to compute
 feature importances, which in turn can be used to discard irrelevant
-features::
+features (when coupled with the :class:`sklearn.feature_selection.SelectFromModel`
+meta-transformer)::
 
  >>> from sklearn.ensemble import ExtraTreesClassifier
  >>> from sklearn.datasets import load_iris
+ >>> from sklearn.feature_selection import SelectFromModel
  >>> iris = load_iris()
  >>> X, y = iris.data, iris.target
  >>> X.shape
  (150, 4)
  >>> clf = ExtraTreesClassifier()
- >>> X_new = clf.fit(X, y).transform(X)
+ >>> clf = clf.fit(X, y)
  >>> clf.feature_importances_ # doctest: +SKIP
  array([ 0.04..., 0.05..., 0.4..., 0.4...])
+ >>> model = SelectFromModel(clf, prefit=True)
+ >>> X_new = model.transform(X)
  >>> X_new.shape # doctest: +SKIP
  (150, 2)
 
@@ -278,12 +301,13 @@ the actual learning. The recommended way to do this in scikit-learn is
 to use a :class:`sklearn.pipeline.Pipeline`::
 
  clf = Pipeline([
- ('feature_selection', LinearSVC(penalty="l1")),
+ ('feature_selection', SelectFromModel(LinearSVC(penalty="l1"))),
  ('classification', RandomForestClassifier())
  ])
  clf.fit(X, y)
 
 In this snippet we make use of a :class:`sklearn.svm.LinearSVC`
+coupled with :class:`sklearn.feature_selection.SelectFromModel`
 to evaluate feature importances and select the most relevant features.
 Then, a :class:`sklearn.ensemble.RandomForestClassifier` is trained on the
 transformed output, i.e. using only relevant features. You can perform
 
@@ -210,6 +210,11 @@ Enhancements
  - Altered :func:`metrics.roc_curve` to drop unnecessary thresholds by
  default. By `Graham Clenaghan`_.
 
+ - Added :class:`feature_selection.SelectFromModel` meta-transformer which can
+ be used along with estimators that have `coef_` or `feature_importances_`
+ attribute to select important features of the input data. By
+ `Maheshakya Wijewardena`_, `Joel Nothman`_ and `Manoj Kumar`_.
+
 Bug fixes
 .........
 
@@ -283,6 +288,13 @@ API changes summary
  fit method to the constructor in
  :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`.
 
+ - Models inheriting from ``_LearntSelectorMixin`` will no longer support the
+ transform methods. (i.e, RandomForests, GradientBoosting, LogisticRegression,
+ DecisionTrees, SVMs and SGD related models). Wrap these models around the
+ metatransfomer :class:`feature_selection.SelectFromModel` to remove
+ features (according to `coefs_` or `feature_importances_`)
+ which are below a certain threshold value instead.
+
 .. _changes_0_1_16:
 
 Version 0.16.1
 
@@ -34,6 +34,7 @@
 from sklearn.linear_model import LogisticRegression
 from sklearn.ensemble import (RandomTreesEmbedding, RandomForestClassifier,
  GradientBoostingClassifier)
+from sklearn.feature_selection import SelectFromModel
 from sklearn.preprocessing import OneHotEncoder
 from sklearn.cross_validation import train_test_split
 from sklearn.metrics import roc_curve
@@ -53,12 +54,12 @@
 rt = RandomTreesEmbedding(max_depth=3, n_estimators=n_estimator)
 rt_lm = LogisticRegression()
 rt.fit(X_train, y_train)
-rt_lm.fit(rt.transform(X_train_lr), y_train_lr)
+rt_lm.fit(SelectFromModel(rt, prefit=True).transform(X_train_lr), y_train_lr)
 
-y_pred_rt = rt_lm.predict_proba(rt.transform(X_test))[:, 1]
+y_pred_rt = rt_lm.predict_proba(
+SelectFromModel(rt, prefit=True).transform(X_test))[:, 1]
 fpr_rt_lm, tpr_rt_lm, _ = roc_curve(y_test, y_pred_rt)
 
-
 # Supervised transformation based on random forests
 rf = RandomForestClassifier(max_depth=3, n_estimators=n_estimator)
 rf_enc = OneHotEncoder()
 
@@ -30,14 +30,17 @@
 from sklearn.datasets import make_circles
 from sklearn.ensemble import RandomTreesEmbedding, ExtraTreesClassifier
 from sklearn.decomposition import TruncatedSVD
+from sklearn.feature_selection import SelectFromModel
 from sklearn.naive_bayes import BernoulliNB
 
 # make a synthetic dataset
 X, y = make_circles(factor=0.5, random_state=0, noise=0.05)
 
 # use RandomTreesEmbedding to transform data
 hasher = RandomTreesEmbedding(n_estimators=10, random_state=0, max_depth=3)
-X_transformed = hasher.fit_transform(X)
+hasher.fit(X)
+model = SelectFromModel(hasher, prefit=True)
+X_transformed = model.transform(X)
 
 # Visualize result using PCA
 pca = TruncatedSVD(n_components=2)
 
@@ -0,0 +1,51 @@
+"""
+===================================================
+Feature selection using SelectFromModel and LassoCV
+===================================================
+
+Use SelectFromModel meta-transformer along with Lasso to select the best
+couple of features from the Boston dataset.
+"""
+# Author: Manoj Kumar <mks542@nyu.edu>
+# License: BSD 3 clause
+
+print(__doc__)
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from sklearn.datasets import load_boston
+from sklearn.feature_selection import SelectFromModel
+from sklearn.linear_model import LassoCV
+
+# Load the boston dataset.
+boston = load_boston()
+X, y = boston['data'], boston['target']
+
+# We use the base estimator LassoCV since the L1 norm promotes sparsity of features.
+clf = LassoCV()
+
+# Set a minimum threshold of 0.25
+sfm = SelectFromModel(clf, threshold=0.25)
+sfm.fit(X, y)
+n_features = sfm.transform(X).shape[1]
+
+# Reset the threshold till the number of features equals two.
+# Note that the attribute can be set directly instead of repeatedly
+# fitting the metatransformer.
+while n_features > 2:
+ sfm.threshold += 0.1
+ X_transform = sfm.transform(X)
+ n_features = X_transform.shape[1]
+
+# Plot the selected two features from X.
+plt.title(
+ "Features selected from Boston using SelectFromModel with "
+ "threshold %0.3f." % sfm.threshold)
+feature1 = X_transform[:, 0]
+feature2 = X_transform[:, 1] 
+plt.plot(feature1, feature2, 'r.')
+plt.xlabel("Feature number 1")
+plt.ylabel("Feature number 2")
+plt.ylim([np.min(feature2), np.max(feature2)])
+plt.show()
@@ -19,6 +19,7 @@
 from scipy.sparse import csc_matrix
 from scipy.sparse import coo_matrix
 
+from sklearn.utils import warnings
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_array_equal
@@ -194,15 +195,19 @@ def test_probability():
 def check_importances(X, y, name, criterion):
  ForestEstimator = FOREST_ESTIMATORS[name]
 
- est = ForestEstimator(n_estimators=20, criterion=criterion,random_state=0)
+ est = ForestEstimator(n_estimators=20, criterion=criterion,
+ random_state=0)
  est.fit(X, y)
  importances = est.feature_importances_
  n_important = np.sum(importances > 0.1)
  assert_equal(importances.shape[0], 10)
  assert_equal(n_important, 3)
 
- X_new = est.transform(X, threshold="mean")
- assert_less(X_new.shape[1], X.shape[1])
+ # XXX: Remove this test in 0.19 after transform support to estimators
+ # is removed.
+ X_new = assert_warns(
+ DeprecationWarning, est.transform, X, threshold="mean")
+ assert_less(0 < X_new.shape[1], X.shape[1])
 
  # Check with parallel
  importances = est.feature_importances_
 
@@ -26,6 +26,7 @@
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_true
 from sklearn.utils.testing import assert_warns
+from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.validation import DataConversionWarning
 from sklearn.utils.validation import NotFittedError
 
@@ -296,10 +297,13 @@ def test_feature_importances():
  clf.fit(X, y)
  assert_true(hasattr(clf, 'feature_importances_'))
 
- X_new = clf.transform(X, threshold="mean")
+ # XXX: Remove this test in 0.19 after transform support to estimators
+ # is removed.
+ X_new = assert_warns(
+ DeprecationWarning, clf.transform, X, threshold="mean")
  assert_less(X_new.shape[1], X.shape[1])
-
- feature_mask = clf.feature_importances_ > clf.feature_importances_.mean()
+ feature_mask = (
+  clf.feature_importances_ > clf.feature_importances_.mean())
  assert_array_almost_equal(X_new, X[:, feature_mask])
 
 
 
@@ -20,6 +20,8 @@
 from .rfe import RFE
 from .rfe import RFECV
 
+from .from_model import SelectFromModel
+
 __all__ = ['GenericUnivariateSelect',
  'RFE',
  'RFECV',
@@ -32,4 +34,5 @@
  'chi2',
  'f_classif',
  'f_oneway',
- 'f_regression']
+ 'f_regression',
+ 'SelectFromModel']
@@ -81,7 +81,7 @@ def transform(self, X):
  return np.empty(0).reshape((X.shape[0], 0))
  if len(mask) != X.shape[1]:
  raise ValueError("X has a different shape than during fitting.")
- return check_array(X, accept_sparse='csr')[:, safe_mask(X, mask)]
+ return X[:, safe_mask(X, mask)]
 
  def inverse_transform(self, X):
  """