Skip to content

Commit 174f4ae

Browse files
authored
[MRG] FIX SparseCoder with readonly parallel mmap (scikit-learn#11346)
1 parent f43dd0e commit 174f4ae

File tree

4 files changed

+44
-2
lines changed

4 files changed

+44
-2
lines changed

doc/whats_new/v0.20.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,11 @@ Decomposition, manifold learning and clustering
506506
:class:`mixture.BayesianGaussianMixture`. :issue:`10740` by :user:`Erich
507507
Schubert <kno10>` and :user:`Guillaume Lemaitre <glemaitre>`.
508508

509+
- Fixed a bug in :class:`decomposition.SparseCoder` when running OMP sparse
510+
coding in parallel using readonly memory mapped datastructures. :issue:`5956`
511+
by :user:`Vighnesh Birodkar <vighneshbirodkar>` and
512+
:user:`Olivier Grisel <ogrisel>`.
513+
509514
Metrics
510515

511516
- Fixed a bug in :func:`metrics.precision_recall_fscore_support`

sklearn/decomposition/tests/test_dict_learning.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from __future__ import division
12
import pytest
23

34
import numpy as np
@@ -366,3 +367,22 @@ def test_sparse_coder_estimator():
366367
transform_alpha=0.001).transform(X)
367368
assert_true(not np.all(code == 0))
368369
assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1)
370+
371+
372+
def test_sparse_coder_parallel_mmap():
373+
# Non-regression test for:
374+
# https://github.com/scikit-learn/scikit-learn/issues/5956
375+
# Test that SparseCoder does not error by passing reading only
376+
# arrays to child processes
377+
378+
rng = np.random.RandomState(777)
379+
n_components, n_features = 40, 64
380+
init_dict = rng.rand(n_components, n_features)
381+
# Ensure that `data` is >2M. Joblib memory maps arrays
382+
# if they are larger than 1MB. The 4 accounts for float32
383+
# data type
384+
n_samples = int(2e6) // (4 * n_features)
385+
data = np.random.rand(n_samples, n_features).astype(np.float32)
386+
387+
sc = SparseCoder(init_dict, transform_algorithm='omp', n_jobs=2)
388+
sc.fit_transform(data)

sklearn/linear_model/omp.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ def _gram_omp(Gram, Xy, n_nonzero_coefs, tol_0=None, tol=None,
191191
"""
192192
Gram = Gram.copy('F') if copy_Gram else np.asfortranarray(Gram)
193193

194-
if copy_Xy:
194+
if copy_Xy or not Xy.flags.writeable:
195195
Xy = Xy.copy()
196196

197197
min_float = np.finfo(Gram.dtype).eps
@@ -491,6 +491,9 @@ def orthogonal_mp_gram(Gram, Xy, n_nonzero_coefs=None, tol=None,
491491
Xy = Xy[:, np.newaxis]
492492
if tol is not None:
493493
norms_squared = [norms_squared]
494+
if copy_Xy or not Xy.flags.writeable:
495+
# Make the copy once instead of many times in _gram_omp itself.
496+
Xy = Xy.copy()
494497

495498
if n_nonzero_coefs is None and tol is None:
496499
n_nonzero_coefs = int(0.1 * len(Gram))
@@ -515,7 +518,7 @@ def orthogonal_mp_gram(Gram, Xy, n_nonzero_coefs=None, tol=None,
515518
out = _gram_omp(
516519
Gram, Xy[:, k], n_nonzero_coefs,
517520
norms_squared[k] if tol is not None else None, tol,
518-
copy_Gram=copy_Gram, copy_Xy=copy_Xy,
521+
copy_Gram=copy_Gram, copy_Xy=False,
519522
return_path=return_path)
520523
if return_path:
521524
_, idx, coefs, n_iter = out

sklearn/linear_model/tests/test_omp.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,20 @@ def test_perfect_signal_recovery():
104104
assert_array_almost_equal(gamma[:, 0], gamma_gram, decimal=2)
105105

106106

107+
def test_orthogonal_mp_gram_readonly():
108+
# Non-regression test for:
109+
# https://github.com/scikit-learn/scikit-learn/issues/5956
110+
idx, = gamma[:, 0].nonzero()
111+
G_readonly = G.copy()
112+
G_readonly.setflags(write=False)
113+
Xy_readonly = Xy.copy()
114+
Xy_readonly.setflags(write=False)
115+
gamma_gram = orthogonal_mp_gram(G_readonly, Xy_readonly[:, 0], 5,
116+
copy_Gram=False, copy_Xy=False)
117+
assert_array_equal(idx, np.flatnonzero(gamma_gram))
118+
assert_array_almost_equal(gamma[:, 0], gamma_gram, decimal=2)
119+
120+
107121
def test_estimator():
108122
omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
109123
omp.fit(X, y[:, 0])

0 commit comments

Comments
 (0)