1010from sklearn .utils .testing import SkipTest
1111from sklearn .utils .testing import assert_almost_equal
1212from sklearn .utils .testing import assert_raises
13- from sklearn .utils .testing import assert_raises_regexp
13+ from sklearn .utils .testing import assert_raises_regex
1414from sklearn .utils .testing import assert_true
1515from sklearn .utils .testing import assert_greater
1616from sklearn .utils .testing import assert_less
@@ -257,8 +257,30 @@ def test_k_means_n_init():
257257
258258 # two regression tests on bad n_init argument
259259 # previous bug: n_init <= 0 threw non-informative TypeError (#3858)
260- assert_raises_regexp (ValueError , "n_init" , KMeans (n_init = 0 ).fit , X )
261- assert_raises_regexp (ValueError , "n_init" , KMeans (n_init = - 1 ).fit , X )
260+ assert_raises_regex (ValueError , "n_init" , KMeans (n_init = 0 ).fit , X )
261+ assert_raises_regex (ValueError , "n_init" , KMeans (n_init = - 1 ).fit , X )
262+
263+
264+ def test_k_means_explicit_init_shape ():
265+ # test for sensible errors when giving explicit init
266+ # with wrong number of features or clusters
267+ rnd = np .random .RandomState (0 )
268+ X = rnd .normal (size = (40 , 3 ))
269+ for Class in [KMeans , MiniBatchKMeans ]:
270+ # mismatch of number of features
271+ km = Class (n_init = 1 , init = X [:, :2 ], n_clusters = len (X ))
272+ msg = "does not match the number of features of the data"
273+ assert_raises_regex (ValueError , msg , km .fit , X )
274+ # for callable init
275+ km = Class (n_init = 1 , init = lambda X_ , k , random_state : X_ [:, :2 ], n_clusters = len (X ))
276+ assert_raises_regex (ValueError , msg , km .fit , X )
277+ # mismatch of number of clusters
278+ msg = "does not match the number of clusters"
279+ km = Class (n_init = 1 , init = X [:2 , :], n_clusters = 3 )
280+ assert_raises_regex (ValueError , msg , km .fit , X )
281+ # for callable init
282+ km = Class (n_init = 1 , init = lambda X_ , k , random_state : X_ [:2 , :], n_clusters = 3 )
283+ assert_raises_regex (ValueError , msg , km .fit , X )
262284
263285
264286def test_k_means_fortran_aligned_data ():
@@ -267,7 +289,7 @@ def test_k_means_fortran_aligned_data():
267289 centers = np .array ([[0 , 0 ], [0 , 1 ]])
268290 labels = np .array ([0 , 1 , 1 ])
269291 km = KMeans (n_init = 1 , init = centers , precompute_distances = False ,
270- random_state = 42 )
292+ random_state = 42 , n_clusters = 2 )
271293 km .fit (X )
272294 assert_array_equal (km .cluster_centers_ , centers )
273295 assert_array_equal (km .labels_ , labels )
@@ -437,8 +459,10 @@ def test_init(X, k, random_state):
437459
438460 # Small test to check that giving the wrong number of centers
439461 # raises a meaningful error
440- assert_raises (ValueError ,
441- MiniBatchKMeans (init = test_init , random_state = 42 ).fit , X_csr )
462+ msg = "does not match the number of clusters"
463+ assert_raises_regex (ValueError , msg , MiniBatchKMeans (init = test_init ,
464+ random_state = 42 ).fit ,
465+ X_csr )
442466
443467 # Now check that the fit actually works
444468 mb_k_means = MiniBatchKMeans (n_clusters = 3 , init = test_init ,
0 commit comments