lucasimi
diff --git a/‎.github/workflows/test-unit.yml‎
Lines changed: 4 additions & 0 deletions b/‎.github/workflows/test-unit.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 6 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/tdamapper/_common.py‎
Lines changed: 26 additions & 45 deletions b/‎src/tdamapper/_common.py‎
Lines changed: 26 additions & 45 deletions
diff --git a/‎src/tdamapper/app.py‎
Lines changed: 4 additions & 2 deletions b/‎src/tdamapper/app.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎src/tdamapper/core.py‎
Lines changed: 37 additions & 90 deletions b/‎src/tdamapper/core.py‎
Lines changed: 37 additions & 90 deletions
@@ -49,6 +49,10 @@ jobs:
  run: | 
  python -m pip install -e .[dev]
 
+ - name: Run typechecks
+ run: | 
+ mypy src tests --ignore-missing-imports
+
  - name: Run tests and code coverage
  run: | 
  coverage run --source=src -m pytest tests/test_unit_*.py
 
@@ -72,6 +72,12 @@ Homepage = "https://github.com/lucasimi/tda-mapper-python"
 Documentation = "https://tda-mapper.readthedocs.io"
 Issues = "https://github.com/lucasimi/tda-mapper-python/issues"
 
+[tool.setuptools.packages.find]
+where = ["src"]
+
+[tool.setuptools.package-data]
+"tdamapper" = ["py.typed"]
+
 [tool.coverage.run]
 omit = [
  "**/_*.py",
 
@@ -8,37 +8,14 @@
 import io
 import pstats
 import warnings
-from typing import Any, Callable, Iterator, Protocol, TypeVar
+from typing import Any, Callable
 
 import numpy as np
 from numpy.typing import NDArray
 
-warnings.filterwarnings("default", category=DeprecationWarning, module=r"^tdamapper\.")
-
-T = TypeVar("T")
-
-
-class Array(Protocol[T]):
-
- def __getitem__(self, index: int) -> T:
- """
- Get an item from the array.
- """
-
- def __len__(self) -> int:
- """
- Get the length of the array.
- """
-
- def __setitem__(self, index: int, value: T) -> None:
- """
- Set an item in the array.
- """
+from tdamapper.protocols import Array, ArrayRead
 
- def __iter__(self) -> Iterator[T]:
- """
- Iterate over the array.
- """
+warnings.filterwarnings("default", category=DeprecationWarning, module=r"^tdamapper\.")
 
 
 def deprecated(msg: str) -> Callable[..., Any]:
@@ -58,48 +35,52 @@ def warn_user(msg: str) -> None:
 
 class EstimatorMixin:
 
- def _is_sparse(self, X: Array[Any]) -> bool:
+ def _is_sparse(self, X: ArrayRead[Any]) -> bool:
  # simple alternative use scipy.sparse.issparse
  return hasattr(X, "toarray")
 
  def _validate_X_y(
- self, X: Array[Any], y: Array[Any]
+ self, X: ArrayRead[Any], y: ArrayRead[Any]
  ) -> tuple[NDArray[np.float64], NDArray[np.float64]]:
  if self._is_sparse(X):
  raise ValueError("Sparse data not supported.")
 
- X = np.asarray(X)
- y = np.asarray(y)
+ X_ = np.asarray(X)
+ y_ = np.asarray(y)
 
- if X.size == 0:
- msg = f"0 feature(s) (shape={X.shape}) while a minimum of 1 is " "required."
+ if X_.size == 0:
+ msg = (
+ f"0 feature(s) (shape={X_.shape}) while a minimum of 1 is " "required."
+ )
  raise ValueError(msg)
 
- if y.size == 0:
- msg = f"0 feature(s) (shape={y.shape}) while a minimum of 1 is " "required."
+ if y_.size == 0:
+ msg = (
+ f"0 feature(s) (shape={y_.shape}) while a minimum of 1 is " "required."
+ )
  raise ValueError(msg)
 
- if X.ndim == 1:
+ if X_.ndim == 1:
  raise ValueError("1d-arrays not supported.")
 
- if np.iscomplexobj(X) or np.iscomplexobj(y):
+ if np.iscomplexobj(X_) or np.iscomplexobj(y_):
  raise ValueError("Complex data not supported.")
 
- if X.dtype == np.object_:
- X = np.array(X, dtype=float)
+ if X_.dtype == np.object_:
+ X_ = np.array(X_, dtype=float)
 
- if y.dtype == np.object_:
- y = np.array(y, dtype=float)
+ if y_.dtype == np.object_:
+ y_ = np.array(y_, dtype=float)
 
  if (
- np.isnan(X).any()
- or np.isinf(X).any()
- or np.isnan(y).any()
- or np.isinf(y).any()
+ np.isnan(X_).any()
+ or np.isinf(X_).any()
+ or np.isnan(y_).any()
+ or np.isinf(y_).any()
  ):
  raise ValueError("NaNs or infinite values not supported.")
 
- return X, y
+ return X_, y_
 
  def _set_n_features_in(self, X: Array[Any]) -> None:
  if hasattr(X, "shape"):
 
@@ -15,10 +15,11 @@
 from sklearn.preprocessing import StandardScaler
 from umap import UMAP
 
-from tdamapper.core import Cover, TrivialClustering
+from tdamapper.core import TrivialClustering
 from tdamapper.cover import BallCover, CubicalCover, KNNCover
 from tdamapper.learn import MapperAlgorithm
 from tdamapper.plot import MapperPlot
+from tdamapper.protocols import Clustering, Cover
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -164,7 +165,7 @@ def run_mapper(
  elif lens_type == LENS_UMAP:
  lens = lens_umap(n_components=lens_umap_n_components)
 
- cover: Cover
+ cover: Cover[NDArray[np.float_]]
  if cover_type == COVER_CUBICAL:
  cover = CubicalCover(
  n_intervals=cover_cubical_n_intervals,
@@ -178,6 +179,7 @@ def run_mapper(
  logger.error(f"Unknown cover type: {cover_type}")
  return None
 
+ clustering: Clustering[NDArray[np.float_]]
  if clustering_type == CLUSTERING_TRIVIAL:
  clustering = TrivialClustering()
  elif clustering_type == CLUSTERING_KMEANS:
 
@@ -31,12 +31,13 @@
 from __future__ import annotations
 
 import logging
-from typing import Any, Callable, Iterator, Optional, Protocol
+from typing import Any, Callable, Generic, Iterator, Optional, TypeVar
 
 import networkx as nx
 from joblib import Parallel, delayed
 
-from tdamapper._common import Array, ParamsMixin, clone
+from tdamapper._common import ParamsMixin, clone
+from tdamapper.protocols import ArrayRead, Clustering, Cover, SpatialSearch
 from tdamapper.utils.unionfind import UnionFind
 
 ATTR_IDS = "ids"
@@ -53,9 +54,17 @@
  handlers=[logging.StreamHandler()],
 )
 
+S = TypeVar("S")
+
+T = TypeVar("T")
+
 
 def mapper_labels(
- X: Array[Any], y: Array[Any], cover: Cover, clustering: Clustering, n_jobs: int = 1
+ X: ArrayRead[S],
+ y: ArrayRead[T],
+ cover: Cover[T],
+ clustering: Clustering[S],
+ n_jobs: int = 1,
 ) -> list[list[int]]:
  """
  Identify the nodes of the Mapper graph.
@@ -85,7 +94,7 @@ def mapper_labels(
  """
 
  def _run_clustering(
- local_ids: list[int], X_local: Array[Any], clust: Clustering
+ local_ids: list[int], X_local: ArrayRead[S], clust: Clustering[S]
  ) -> tuple[list[int], list[int]]:
  local_lbls = clust.fit(X_local).labels_
  return local_ids, local_lbls
@@ -110,7 +119,11 @@ def _run_clustering(
 
 
 def mapper_connected_components(
- X: Array[Any], y: Array[Any], cover: Cover, clustering: Clustering, n_jobs: int = 1
+ X: ArrayRead[S],
+ y: ArrayRead[T],
+ cover: Cover[T],
+ clustering: Clustering[S],
+ n_jobs: int = 1,
 ) -> list[int]:
  """
  Identify the connected components of the Mapper graph.
@@ -155,7 +168,11 @@ def mapper_connected_components(
 
 
 def mapper_graph(
- X: Array[Any], y: Array[Any], cover: Cover, clustering: Clustering, n_jobs: int = 1
+ X: ArrayRead[S],
+ y: ArrayRead[T],
+ cover: Cover[T],
+ clustering: Clustering[S],
+ n_jobs: int = 1,
 ) -> nx.Graph:
  """
  Create the Mapper graph.
@@ -201,7 +218,7 @@ def mapper_graph(
 
 
 def aggregate_graph(
- X: Array[Any], graph: nx.Graph, agg: Callable[..., Any]
+ X: ArrayRead[S], graph: nx.Graph, agg: Callable[..., Any]
 ) -> dict[int, Any]:
  """
  Apply an aggregation function to the nodes of a graph.
@@ -229,81 +246,7 @@ def aggregate_graph(
  return agg_values
 
 
-class Cover(Protocol):
- """
- Abstract interface for cover algorithms.
-
- This is a naive implementation. Subclasses should override the methods of
- this class to implement more meaningful cover algorithms.
- """
-
- def apply(self, X: Array[Any]) -> Iterator[list[int]]:
- """
- Covers the dataset with a single open set.
-
- This is a naive implementation that returns a generator producing a
- single list containing all the ids if the original dataset. This
- method should be overridden by subclasses to implement more meaningful
- cover algorithms.
-
- :param X: A dataset of n points.
- :return: A generator of lists of ids.
- """
-
-
-class Clustering(Protocol):
- """
- Abstract interface for clustering algorithms.
-
- A clustering algorithm is a method for grouping data points into clusters.
- Each cluster is represented by a unique integer label, and the labels are
- assigned to the points in the dataset. The labels are typically non-negative
- integers, starting from zero. The labels are assigned such that the points
- in the same cluster have the same label, and the points in different clusters
- have different labels. The labels are not necessarily contiguous, and there
- may be gaps in the sequence of labels.
- """
-
- labels_: list[int]
-
- def fit(self, X: Array[Any], y: Optional[Array[Any]] = None) -> Clustering:
- """
- Fit the clustering algorithm to the data.
-
- :param X: A dataset of n points.
- :param y: A dataset of targets. Typically ignored and present for
- compatibility with scikit-learn's clustering interface.
- :return: The fitted clustering object.
- """
-
-
-class SpatialSearch(Protocol):
- """
- Abstract interface for search algorithms.
-
- A spatial search algorithm is a method for finding neighbors of a
- query point in a dataset.
- """
-
- def fit(self, X: Array[Any]) -> SpatialSearch:
- """
- Train internal parameters.
-
- :param X: A dataset of n points.
- :return: The object itself.
- """
-
- def search(self, x: Any) -> list[int]:
- """
- Return a list of neighbors for the query point.
-
- :param x: A query point for which we want to find neighbors.
- :return: A list containing all the indices of the points in the
- dataset.
- """
-
-
-def proximity_net(search: SpatialSearch, X: Array[Any]) -> Iterator[list[int]]:
+def proximity_net(search: SpatialSearch[S], X: ArrayRead[S]) -> Iterator[list[int]]:
  """
  Covers the dataset using proximity-net.
 
@@ -331,7 +274,7 @@ def proximity_net(search: SpatialSearch, X: Array[Any]) -> Iterator[list[int]]:
  yield neigh_ids
 
 
-class TrivialCover(ParamsMixin):
+class TrivialCover(ParamsMixin, Generic[T]):
  """
  Cover algorithm that covers data with a single subset containing the whole
  dataset.
@@ -340,7 +283,7 @@ class TrivialCover(ParamsMixin):
  dataset.
  """
 
- def apply(self, X: Array[Any]) -> Iterator[list[int]]:
+ def apply(self, X: ArrayRead[T]) -> Iterator[list[int]]:
  """
  Covers the dataset with a single open set.
 
@@ -350,7 +293,7 @@ def apply(self, X: Array[Any]) -> Iterator[list[int]]:
  yield list(range(0, len(X)))
 
 
-class FailSafeClustering(ParamsMixin):
+class FailSafeClustering(ParamsMixin, Generic[T]):
  """
  A delegating clustering algorithm that prevents failure.
 
@@ -364,17 +307,19 @@ class FailSafeClustering(ParamsMixin):
  enable logging, or False to suppress it. Defaults to True.
  """
 
- _clustering: Optional[Clustering]
+ _clustering: Optional[Clustering[T]]
  _verbose: bool
  labels_: list[int]
 
  def __init__(
- self, clustering: Optional[Clustering] = None, verbose: bool = True
+ self, clustering: Optional[Clustering[T]] = None, verbose: bool = True
  ) -> None:
  self.clustering = clustering
  self.verbose = verbose
 
- def fit(self, X: Array[Any], y: Optional[Array[Any]] = None) -> FailSafeClustering:
+ def fit(
+ self, X: ArrayRead[T], y: Optional[ArrayRead[T]] = None
+ ) -> FailSafeClustering[T]:
  self._clustering = (
  TrivialClustering() if self.clustering is None else self.clustering
  )
@@ -389,7 +334,7 @@ def fit(self, X: Array[Any], y: Optional[Array[Any]] = None) -> FailSafeClusteri
  return self
 
 
-class TrivialClustering(ParamsMixin):
+class TrivialClustering(ParamsMixin, Generic[T]):
  """
  A clustering algorithm that returns a single cluster.
 
@@ -404,7 +349,9 @@ class TrivialClustering(ParamsMixin):
  def __init__(self) -> None:
  pass
 
- def fit(self, X: Array[Any], _y: Optional[Array[Any]] = None) -> TrivialClustering:
+ def fit(
+ self, X: ArrayRead[T], _y: Optional[ArrayRead[T]] = None
+ ) -> TrivialClustering[T]:
  """
  Fit the clustering algorithm to the data.