peterjc
diff --git a/‎.circleci/config.yml‎
Lines changed: 6 additions & 7 deletions b/‎.circleci/config.yml‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎README.rst‎
Lines changed: 2 additions & 2 deletions b/‎README.rst‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/calibration/plot_compare_calibration.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/calibration/plot_compare_calibration.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/classification/plot_classification_probability.py‎
Lines changed: 33 additions & 20 deletions b/‎examples/classification/plot_classification_probability.py‎
Lines changed: 33 additions & 20 deletions
diff --git a/‎examples/compose/plot_column_transformer_mixed_types.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/compose/plot_column_transformer_mixed_types.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/compose/plot_digits_pipe.py‎
Lines changed: 37 additions & 21 deletions b/‎examples/compose/plot_digits_pipe.py‎
Lines changed: 37 additions & 21 deletions
diff --git a/‎examples/ensemble/plot_feature_transformation.py‎
Lines changed: 7 additions & 8 deletions b/‎examples/ensemble/plot_feature_transformation.py‎
Lines changed: 7 additions & 8 deletions
diff --git a/‎examples/ensemble/plot_voting_probas.py‎
Lines changed: 2 additions & 1 deletion b/‎examples/ensemble/plot_voting_probas.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/exercises/plot_digits_classification_exercise.py‎
Lines changed: 3 additions & 2 deletions b/‎examples/exercises/plot_digits_classification_exercise.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎examples/linear_model/plot_iris_logistic.py‎
Lines changed: 7 additions & 7 deletions b/‎examples/linear_model/plot_iris_logistic.py‎
Lines changed: 7 additions & 7 deletions
@@ -41,13 +41,12 @@ jobs:
  # Test examples run with minimal dependencies
  - MINICONDA_PATH: ~/miniconda
  - CONDA_ENV_NAME: testenv
- - PYTHON_VERSION: 2
- - NUMPY_VERSION: 1.8.2
- # XXX: plot_gpc_xor.py fails with scipy 0.13.3
- - SCIPY_VERSION: 0.14
- - MATPLOTLIB_VERSION: 1.3
- - SCIKIT_IMAGE_VERSION: 0.9.3
- - PANDAS_VERSION: 0.13.1
+ - PYTHON_VERSION: "2"
+ - NUMPY_VERSION: "1.10"
+ - SCIPY_VERSION: "0.16"
+ - MATPLOTLIB_VERSION: "1.4"
+ - SCIKIT_IMAGE_VERSION: "0.11"
+ - PANDAS_VERSION: "0.17.1"
  steps:
  - checkout
  - run: ./build_tools/circle/checkout_merge_commit.sh
 
@@ -56,8 +56,8 @@ scikit-learn requires:
 **Scikit-learn 0.20 is the last version to support Python2.7.**
 Scikit-learn 0.21 and later will require Python 3.5 or newer.
 
-For running the examples Matplotlib >= 1.3.1 is required. A few examples
-require scikit-image >= 0.9.3 and a few examples require pandas >= 0.13.1.
+For running the examples Matplotlib >= 1.4 is required. A few examples
+require scikit-image >= 0.11.3 and a few examples require pandas >= 0.17.1.
 
 scikit-learn also uses CBLAS, the C interface to the Basic Linear Algebra
 Subprograms library. scikit-learn comes with a reference implementation, but
 
@@ -75,7 +75,7 @@
 y_test = y[train_samples:]
 
 # Create classifiers
-lr = LogisticRegression()
+lr = LogisticRegression(solver='lbfgs')
 gnb = GaussianNB()
 svc = LinearSVC(C=1.0)
 rfc = RandomForestClassifier(n_estimators=100)
 
@@ -3,13 +3,17 @@
 Plot classification probability
 ===============================
 
-Plot the classification probability for different classifiers. We use a 3
-class dataset, and we classify it with a Support Vector classifier, L1
-and L2 penalized logistic regression with either a One-Vs-Rest or multinomial
-setting, and Gaussian process classification.
+Plot the classification probability for different classifiers. We use a 3 class
+dataset, and we classify it with a Support Vector classifier, L1 and L2
+penalized logistic regression with either a One-Vs-Rest or multinomial setting,
+and Gaussian process classification.
 
-The logistic regression is not a multiclass classifier out of the box. As
-a result it can identify only the first class.
+Linear SVC is not a probabilistic classifier by default but it has a built-in
+calibration option enabled in this example (`probability=True`).
+
+The logistic regression with One-Vs-Rest is not a multiclass classifier out of
+the box. As a result it has more trouble in separating class 2 and 3 than the
+other estimators.
 """
 print(__doc__)
 
@@ -19,6 +23,7 @@ class dataset, and we classify it with a Support Vector classifier, L1
 import matplotlib.pyplot as plt
 import numpy as np
 
+from sklearn.metrics import accuracy_score
 from sklearn.linear_model import LogisticRegression
 from sklearn.svm import SVC
 from sklearn.gaussian_process import GaussianProcessClassifier
@@ -31,19 +36,27 @@ class dataset, and we classify it with a Support Vector classifier, L1
 
 n_features = X.shape[1]
 
-C = 1.0
+C = 10
 kernel = 1.0 * RBF([1.0, 1.0]) # for GPC
 
-# Create different classifiers. The logistic regression cannot do
-# multiclass out of the box.
-classifiers = {'L1 logistic': LogisticRegression(C=C, penalty='l1'),
- 'L2 logistic (OvR)': LogisticRegression(C=C, penalty='l2'),
- 'Linear SVC': SVC(kernel='linear', C=C, probability=True,
- random_state=0),
- 'L2 logistic (Multinomial)': LogisticRegression(
- C=C, solver='lbfgs', multi_class='multinomial'),
- 'GPC': GaussianProcessClassifier(kernel)
- }
+# Create different classifiers.
+classifiers = {
+ 'L1 logistic': LogisticRegression(C=C, penalty='l1',
+ solver='saga',
+ multi_class='multinomial',
+ max_iter=10000),
+ 'L2 logistic (Multinomial)': LogisticRegression(C=C, penalty='l2',
+ solver='saga',
+ multi_class='multinomial',
+ max_iter=10000),
+ 'L2 logistic (OvR)': LogisticRegression(C=C, penalty='l2',
+ solver='saga',
+ multi_class='ovr',
+ max_iter=10000),
+ 'Linear SVC': SVC(kernel='linear', C=C, probability=True,
+ random_state=0),
+ 'GPC': GaussianProcessClassifier(kernel)
+}
 
 n_classifiers = len(classifiers)
 
@@ -59,10 +72,10 @@ class dataset, and we classify it with a Support Vector classifier, L1
  classifier.fit(X, y)
 
  y_pred = classifier.predict(X)
- classif_rate = np.mean(y_pred.ravel() == y.ravel()) * 100
- print("classif_rate for %s : %f " % (name, classif_rate))
+ accuracy = accuracy_score(y, y_pred)
+ print("Accuracy (train) for %s: %0.1f%% " % (name, accuracy * 100))
 
- # View probabilities=
+ # View probabilities:
  probas = classifier.predict_proba(Xfull)
  n_classes = np.unique(y_pred).size
  for k in range(n_classes):
 
@@ -71,7 +71,7 @@
 # Append classifier to preprocessing pipeline.
 # Now we have a full prediction pipeline.
 clf = Pipeline(steps=[('preprocessor', preprocessor),
- ('classifier', LogisticRegression())])
+ ('classifier', LogisticRegression(solver='lbfgs'))])
 
 X = data.drop('survived', axis=1)
 y = data['survived']
 
@@ -22,42 +22,58 @@
 
 import numpy as np
 import matplotlib.pyplot as plt
+import pandas as pd
 
-from sklearn import linear_model, decomposition, datasets
+from sklearn import datasets
+from sklearn.decomposition import PCA
+from sklearn.linear_model import SGDClassifier
 from sklearn.pipeline import Pipeline
 from sklearn.model_selection import GridSearchCV
 
-logistic = linear_model.LogisticRegression()
 
-pca = decomposition.PCA()
+# Define a pipeline to search for the best combination of PCA truncation
+# and classifier regularization.
+logistic = SGDClassifier(loss='log', penalty='l2', early_stopping=True,
+ max_iter=10000, tol=1e-5, random_state=0)
+pca = PCA()
 pipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)])
 
 digits = datasets.load_digits()
 X_digits = digits.data
 y_digits = digits.target
 
+# Parameters of pipelines can be set using ‘__’ separated parameter names:
+param_grid = {
+ 'pca__n_components': [5, 20, 30, 40, 50, 64],
+ 'logistic__alpha': np.logspace(-4, 4, 5),
+}
+search = GridSearchCV(pipe, param_grid, iid=False, cv=5,
+ return_train_score=False)
+search.fit(X_digits, y_digits)
+print("Best parameter (CV score=%0.3f):" % search.best_score_)
+print(search.best_params_)
+
 # Plot the PCA spectrum
 pca.fit(X_digits)
 
-plt.figure(1, figsize=(4, 3))
-plt.clf()
-plt.axes([.2, .2, .7, .7])
-plt.plot(pca.explained_variance_, linewidth=2)
-plt.axis('tight')
-plt.xlabel('n_components')
-plt.ylabel('explained_variance_')
+fig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6))
+ax0.plot(pca.explained_variance_ratio_, linewidth=2)
+ax0.set_ylabel('PCA explained variance')
+
+ax0.axvline(search.best_estimator_.named_steps['pca'].n_components,
+ linestyle=':', label='n_components chosen')
+ax0.legend(prop=dict(size=12))
 
-# Prediction
-n_components = [20, 40, 64]
-Cs = np.logspace(-4, 4, 3)
+# For each number of components, find the best classifier results
+results = pd.DataFrame(search.cv_results_)
+components_col = 'param_pca__n_components'
+best_clfs = results.groupby(components_col).apply(
+ lambda g: g.nlargest(1, 'mean_test_score'))
 
-# Parameters of pipelines can be set using ‘__’ separated parameter names:
-estimator = GridSearchCV(pipe,
- dict(pca__n_components=n_components,
- logistic__C=Cs), cv=5)
-estimator.fit(X_digits, y_digits)
+best_clfs.plot(x=components_col, y='mean_test_score', yerr='std_test_score',
+ legend=False, ax=ax1)
+ax1.set_ylabel('Classification accuracy (val)')
+ax1.set_xlabel('n_components')
 
-plt.axvline(estimator.best_estimator_.named_steps['pca'].n_components,
- linestyle=':', label='n_components chosen')
-plt.legend(prop=dict(size=12))
+plt.tight_layout()
 plt.show()
@@ -42,19 +42,19 @@
 n_estimator = 10
 X, y = make_classification(n_samples=80000)
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
+
 # It is important to train the ensemble of trees on a different subset
 # of the training data than the linear regression model to avoid
 # overfitting, in particular if the total number of leaves is
 # similar to the number of training samples
-X_train, X_train_lr, y_train, y_train_lr = train_test_split(X_train,
- y_train,
- test_size=0.5)
+X_train, X_train_lr, y_train, y_train_lr = train_test_split(
+ X_train, y_train, test_size=0.5)
 
 # Unsupervised transformation based on totally random trees
 rt = RandomTreesEmbedding(max_depth=3, n_estimators=n_estimator,
  random_state=0)
 
-rt_lm = LogisticRegression()
+rt_lm = LogisticRegression(solver='lbfgs', max_iter=1000)
 pipeline = make_pipeline(rt, rt_lm)
 pipeline.fit(X_train, y_train)
 y_pred_rt = pipeline.predict_proba(X_test)[:, 1]
@@ -63,17 +63,18 @@
 # Supervised transformation based on random forests
 rf = RandomForestClassifier(max_depth=3, n_estimators=n_estimator)
 rf_enc = OneHotEncoder(categories='auto')
-rf_lm = LogisticRegression()
+rf_lm = LogisticRegression(solver='lbfgs', max_iter=1000)
 rf.fit(X_train, y_train)
 rf_enc.fit(rf.apply(X_train))
 rf_lm.fit(rf_enc.transform(rf.apply(X_train_lr)), y_train_lr)
 
 y_pred_rf_lm = rf_lm.predict_proba(rf_enc.transform(rf.apply(X_test)))[:, 1]
 fpr_rf_lm, tpr_rf_lm, _ = roc_curve(y_test, y_pred_rf_lm)
 
+# Supervised transformation based on gradient boosted trees
 grd = GradientBoostingClassifier(n_estimators=n_estimator)
 grd_enc = OneHotEncoder(categories='auto')
-grd_lm = LogisticRegression()
+grd_lm = LogisticRegression(solver='lbfgs', max_iter=1000)
 grd.fit(X_train, y_train)
 grd_enc.fit(grd.apply(X_train)[:, :, 0])
 grd_lm.fit(grd_enc.transform(grd.apply(X_train_lr)[:, :, 0]), y_train_lr)
@@ -82,12 +83,10 @@
  grd_enc.transform(grd.apply(X_test)[:, :, 0]))[:, 1]
 fpr_grd_lm, tpr_grd_lm, _ = roc_curve(y_test, y_pred_grd_lm)
 
-
 # The gradient boosted model by itself
 y_pred_grd = grd.predict_proba(X_test)[:, 1]
 fpr_grd, tpr_grd, _ = roc_curve(y_test, y_pred_grd)
 
-
 # The random forest model by itself
 y_pred_rf = rf.predict_proba(X_test)[:, 1]
 fpr_rf, tpr_rf, _ = roc_curve(y_test, y_pred_rf)
 
@@ -29,7 +29,7 @@
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.ensemble import VotingClassifier
 
-clf1 = LogisticRegression(random_state=123)
+clf1 = LogisticRegression(solver='lbfgs', max_iter=1000, random_state=123)
 clf2 = RandomForestClassifier(n_estimators=100, random_state=123)
 clf3 = GaussianNB()
 X = np.array([[-1.0, -1.0], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
@@ -79,4 +79,5 @@
 plt.ylim([0, 1])
 plt.title('Class probabilities for sample 1 by different classifiers')
 plt.legend([p1[0], p2[0]], ['class 1', 'class 2'], loc='upper left')
+plt.tight_layout()
 plt.show()
@@ -15,7 +15,7 @@
 from sklearn import datasets, neighbors, linear_model
 
 digits = datasets.load_digits()
-X_digits = digits.data
+X_digits = digits.data / digits.data.max()
 y_digits = digits.target
 
 n_samples = len(X_digits)
@@ -26,7 +26,8 @@
 y_test = y_digits[int(.9 * n_samples):]
 
 knn = neighbors.KNeighborsClassifier()
-logistic = linear_model.LogisticRegression()
+logistic = linear_model.LogisticRegression(solver='lbfgs', max_iter=1000,
+ multi_class='multinomial')
 
 print('KNN score: %f' % knn.fit(X_train, y_train).score(X_test, y_test))
 print('LogisticRegression score: %f'
 
@@ -7,29 +7,28 @@
 =========================================================
 
 Show below is a logistic-regression classifiers decision boundaries on the
-`iris <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_ dataset. The
-datapoints are colored according to their labels.
+first two dimensions (sepal length and width) of the `iris
+<https://en.wikipedia.org/wiki/Iris_flower_data_set>`_ dataset. The datapoints
+are colored according to their labels.
 
 """
 print(__doc__)
 
-
 # Code source: Gaël Varoquaux
 # Modified for documentation by Jaques Grobler
 # License: BSD 3 clause
 
 import numpy as np
 import matplotlib.pyplot as plt
-from sklearn import linear_model, datasets
+from sklearn.linear_model import LogisticRegression
+from sklearn import datasets
 
 # import some data to play with
 iris = datasets.load_iris()
 X = iris.data[:, :2] # we only take the first two features.
 Y = iris.target
 
-h = .02 # step size in the mesh
-
-logreg = linear_model.LogisticRegression(C=1e5)
+logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial')
 
 # we create an instance of Neighbours Classifier and fit the data.
 logreg.fit(X, Y)
@@ -38,6 +37,7 @@
 # point in the mesh [x_min, x_max]x[y_min, y_max].
 x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
+h = .02 # step size in the mesh
 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
 Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])