peterjc
diff --git a/‎doc/whats_new/v0.20.rst‎
Lines changed: 5 additions & 0 deletions b/‎doc/whats_new/v0.20.rst‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎sklearn/decomposition/tests/test_dict_learning.py‎
Lines changed: 20 additions & 0 deletions b/‎sklearn/decomposition/tests/test_dict_learning.py‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎sklearn/linear_model/omp.py‎
Lines changed: 5 additions & 2 deletions b/‎sklearn/linear_model/omp.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎sklearn/linear_model/tests/test_omp.py‎
Lines changed: 14 additions & 0 deletions b/‎sklearn/linear_model/tests/test_omp.py‎
Lines changed: 14 additions & 0 deletions
@@ -506,6 +506,11 @@ Decomposition, manifold learning and clustering
  :class:`mixture.BayesianGaussianMixture`. :issue:`10740` by :user:`Erich
  Schubert <kno10>` and :user:`Guillaume Lemaitre <glemaitre>`.
 
+- Fixed a bug in :class:`decomposition.SparseCoder` when running OMP sparse
+ coding in parallel using readonly memory mapped datastructures. :issue:`5956`
+ by :user:`Vighnesh Birodkar <vighneshbirodkar>` and
+ :user:`Olivier Grisel <ogrisel>`.
+
 Metrics
 
 - Fixed a bug in :func:`metrics.precision_recall_fscore_support`
 
@@ -1,3 +1,4 @@
+from __future__ import division
 import pytest
 
 import numpy as np
@@ -366,3 +367,22 @@ def test_sparse_coder_estimator():
  transform_alpha=0.001).transform(X)
  assert_true(not np.all(code == 0))
  assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1)
+
+
+def test_sparse_coder_parallel_mmap():
+ # Non-regression test for:
+ # https://github.com/scikit-learn/scikit-learn/issues/5956
+ # Test that SparseCoder does not error by passing reading only
+ # arrays to child processes
+
+ rng = np.random.RandomState(777)
+ n_components, n_features = 40, 64
+ init_dict = rng.rand(n_components, n_features)
+ # Ensure that `data` is >2M. Joblib memory maps arrays
+ # if they are larger than 1MB. The 4 accounts for float32
+ # data type
+ n_samples = int(2e6) // (4 * n_features)
+ data = np.random.rand(n_samples, n_features).astype(np.float32)
+
+ sc = SparseCoder(init_dict, transform_algorithm='omp', n_jobs=2)
+ sc.fit_transform(data)
@@ -191,7 +191,7 @@ def _gram_omp(Gram, Xy, n_nonzero_coefs, tol_0=None, tol=None,
  """
  Gram = Gram.copy('F') if copy_Gram else np.asfortranarray(Gram)
 
- if copy_Xy:
+ if copy_Xy or not Xy.flags.writeable:
  Xy = Xy.copy()
 
  min_float = np.finfo(Gram.dtype).eps
@@ -491,6 +491,9 @@ def orthogonal_mp_gram(Gram, Xy, n_nonzero_coefs=None, tol=None,
  Xy = Xy[:, np.newaxis]
  if tol is not None:
  norms_squared = [norms_squared]
+ if copy_Xy or not Xy.flags.writeable:
+ # Make the copy once instead of many times in _gram_omp itself.
+ Xy = Xy.copy()
 
  if n_nonzero_coefs is None and tol is None:
  n_nonzero_coefs = int(0.1 * len(Gram))
@@ -515,7 +518,7 @@ def orthogonal_mp_gram(Gram, Xy, n_nonzero_coefs=None, tol=None,
  out = _gram_omp(
  Gram, Xy[:, k], n_nonzero_coefs,
  norms_squared[k] if tol is not None else None, tol,
- copy_Gram=copy_Gram, copy_Xy=copy_Xy,
+ copy_Gram=copy_Gram, copy_Xy=False,
  return_path=return_path)
  if return_path:
  _, idx, coefs, n_iter = out
 
@@ -104,6 +104,20 @@ def test_perfect_signal_recovery():
  assert_array_almost_equal(gamma[:, 0], gamma_gram, decimal=2)
 
 
+def test_orthogonal_mp_gram_readonly():
+ # Non-regression test for:
+ # https://github.com/scikit-learn/scikit-learn/issues/5956
+ idx, = gamma[:, 0].nonzero()
+ G_readonly = G.copy()
+ G_readonly.setflags(write=False)
+ Xy_readonly = Xy.copy()
+ Xy_readonly.setflags(write=False)
+ gamma_gram = orthogonal_mp_gram(G_readonly, Xy_readonly[:, 0], 5,
+ copy_Gram=False, copy_Xy=False)
+ assert_array_equal(idx, np.flatnonzero(gamma_gram))
+ assert_array_almost_equal(gamma[:, 0], gamma_gram, decimal=2)
+
+
 def test_estimator():
  omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
  omp.fit(X, y[:, 0])