added new class SinkhornL1l2Transport() + dedicated test

PythonOT · rflamary · Sep 14, 2017 · Aug 28, 2017 · Aug 28, 2017 · Aug 31, 2017
commit 2005a09548a6f6d42cd9aafadbb4583e4029936c
diff --git a/ot/da.py b/ot/da.py
@@ -1369,6 +1369,10 @@ class SinkhornLpl1Transport(BaseTransport):
 
  Parameters
  ----------
+ reg_e : float, optional (default=1)
+ Entropic regularization parameter
+ reg_cl : float, optional (default=0.1)
+ Class regularization parameter
  mode : string, optional (default="unsupervised")
  The DA mode. If "unsupervised" no target labels are taken into account
  to modify the cost matrix. If "semisupervised" the target labels
@@ -1384,6 +1388,11 @@ class SinkhornLpl1Transport(BaseTransport):
  The ground metric for the Wasserstein problem
  distribution : string, optional (default="uniform")
  The kind of distribution estimation to employ
+ max_iter : int, float, optional (default=10)
+ The minimum number of iteration before stopping the optimization
+ algorithm if no it has not converged
+ max_inner_iter : int, float, optional (default=200)
+ The number of iteration in the inner loop
  verbose : int, optional (default=0)
  Controls the verbosity of the optimization algorithm
  log : int, optional (default=0)
@@ -1452,3 +1461,103 @@ def fit(self, Xs, ys=None, Xt=None, yt=None):
  verbose=self.verbose, log=self.log)
 
  return self
+
+
+class SinkhornL1l2Transport(BaseTransport):
+ """Domain Adapatation OT method based on sinkhorn algorithm +
+ l1l2 class regularization.
+
+ Parameters
+ ----------
+ reg_e : float, optional (default=1)
+ Entropic regularization parameter
+ reg_cl : float, optional (default=0.1)
+ Class regularization parameter
+ mode : string, optional (default="unsupervised")
+ The DA mode. If "unsupervised" no target labels are taken into account
+ to modify the cost matrix. If "semisupervised" the target labels
+ are taken into account to set coefficients of the pairwise distance
+ matrix to 0 for row and columns indices that correspond to source and
+ target samples which share the same labels.
+ mapping : string, optional (default="barycentric")
+ The kind of mapping to apply to transport samples from a domain into
+ another one.
+ if "barycentric" only the samples used to estimate the coupling can
+ be transported from a domain to another one.
+ metric : string, optional (default="sqeuclidean")
+ The ground metric for the Wasserstein problem
+ distribution : string, optional (default="uniform")
+ The kind of distribution estimation to employ
+ max_iter : int, float, optional (default=10)
+ The minimum number of iteration before stopping the optimization
+ algorithm if no it has not converged
+ max_inner_iter : int, float, optional (default=200)
+ The number of iteration in the inner loop
+ verbose : int, optional (default=0)
+ Controls the verbosity of the optimization algorithm
+ log : int, optional (default=0)
+ Controls the logs of the optimization algorithm
+ Attributes
+ ----------
+ Coupling_ : the optimal coupling
+
+ References
+ ----------
+
+ .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
+ "Optimal Transport for Domain Adaptation," in IEEE
+ Transactions on Pattern Analysis and Machine Intelligence ,
+ vol.PP, no.99, pp.1-1
+ .. [2] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015).
+ Generalized conditional gradient: analysis of convergence
+ and applications. arXiv preprint arXiv:1510.06567.
+
+ """
+
+ def __init__(self, reg_e=1., reg_cl=0.1, mode="unsupervised",
+ max_iter=10, max_inner_iter=200,
+ tol=10e-9, verbose=False, log=False,
+ metric="sqeuclidean",
+ distribution_estimation=distribution_estimation_uniform,
+ out_of_sample_map='ferradans'):
+
+ self.reg_e = reg_e
+ self.reg_cl = reg_cl
+ self.mode = mode
+ self.max_iter = max_iter
+ self.max_inner_iter = max_inner_iter
+ self.tol = tol
+ self.verbose = verbose
+ self.log = log
+ self.metric = metric
+ self.distribution_estimation = distribution_estimation
+ self.out_of_sample_map = out_of_sample_map
+
+ def fit(self, Xs, ys=None, Xt=None, yt=None):
+ """Build a coupling matrix from source and target sets of samples
+ (Xs, ys) and (Xt, yt)
+ Parameters
+ ----------
+ Xs : array-like of shape = [n_source_samples, n_features]
+ The training input samples.
+ ys : array-like, shape = [n_source_samples]
+ The class labels
+ Xt : array-like of shape = [n_target_samples, n_features]
+ The training input samples.
+ yt : array-like, shape = [n_labeled_target_samples]
+ The class labels
+ Returns
+ -------
+ self : object
+ Returns self.
+ """
+
+ super(SinkhornL1l2Transport, self).fit(Xs, ys, Xt, yt)
+
+ self.Coupling_ = sinkhorn_l1l2_gl(
+ a=self.mu_s, labels_a=ys, b=self.mu_t, M=self.Cost,
+ reg=self.reg_e, eta=self.reg_cl, numItermax=self.max_iter,
+ numInnerItermax=self.max_inner_iter, stopInnerThr=self.tol,
+ verbose=self.verbose, log=self.log)
+
+ return self
diff --git a/test/test_da.py b/test/test_da.py
@@ -63,6 +63,56 @@ def test_sinkhorn_lpl1_transport_class():
  assert_equal(transp_Xs.shape, Xs.shape)
 
 
+def test_sinkhorn_l1l2_transport_class():
+ """test_sinkhorn_transport
+ """
+
+ ns = 150
+ nt = 200
+
+ Xs, ys = get_data_classif('3gauss', ns)
+ Xt, yt = get_data_classif('3gauss2', nt)
+
+ clf = ot.da.SinkhornL1l2Transport()
+
+ # test its computed
+ clf.fit(Xs=Xs, ys=ys, Xt=Xt)
+
+ # test dimensions of coupling
+ assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0])))
+ assert_equal(clf.Coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
+
+ # test margin constraints
+ mu_s = unif(ns)
+ mu_t = unif(nt)
+ assert_allclose(np.sum(clf.Coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
+ assert_allclose(np.sum(clf.Coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
+
+ # test transform
+ transp_Xs = clf.transform(Xs=Xs)
+ assert_equal(transp_Xs.shape, Xs.shape)
+
+ Xs_new, _ = get_data_classif('3gauss', ns + 1)
+ transp_Xs_new = clf.transform(Xs_new)
+
+ # check that the oos method is not working
+ assert_equal(transp_Xs_new, Xs_new)
+
+ # test inverse transform
+ transp_Xt = clf.inverse_transform(Xt=Xt)
+ assert_equal(transp_Xt.shape, Xt.shape)
+
+ Xt_new, _ = get_data_classif('3gauss2', nt + 1)
+ transp_Xt_new = clf.inverse_transform(Xt=Xt_new)
+
+ # check that the oos method is not working and returns the input data
+ assert_equal(transp_Xt_new, Xt_new)
+
+ # test fit_transform
+ transp_Xs = clf.fit_transform(Xs=Xs, ys=ys, Xt=Xt)
+ assert_equal(transp_Xs.shape, Xs.shape)
+
+
 def test_sinkhorn_transport_class():
  """test_sinkhorn_transport
  """