lucasimi
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 23 additions & 0 deletions b/‎Makefile‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎app/streamlit_app.py‎
Lines changed: 101 additions & 9 deletions b/‎app/streamlit_app.py‎
Lines changed: 101 additions & 9 deletions
diff --git a/‎src/tdamapper/_plot_plotly.py‎
Lines changed: 1 addition & 1 deletion b/‎src/tdamapper/_plot_plotly.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/tdamapper/clustering.py‎
Lines changed: 1 addition & 1 deletion b/‎src/tdamapper/clustering.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/tdamapper/utils/heap.py‎
Lines changed: 29 additions & 29 deletions b/‎src/tdamapper/utils/heap.py‎
Lines changed: 29 additions & 29 deletions
@@ -24,3 +24,4 @@
 .idea
 dist/
 build/
+coverage.xml
@@ -0,0 +1,23 @@
+PYTHON = python
+PIP = pip
+
+.PHONY: all
+all: install
+
+.PHONY: install
+install:
+$(PIP) install -e .[dev]
+
+.PHONY: test
+test:
+coverage run --source=src -m pytest tests/test_unit_*.py
+coverage xml
+
+.PHONY: bench
+bench:
+$(PYTHON) -m pytest tests/test_bench_*.py -s -o log_cli=true --log-level=INFO
+
+.PHONY: clean
+clean:
+find . -type d -name "__pycache__" -exec rm -r {} +
+find . -type f -name "*.pyc" -delete
@@ -23,8 +23,8 @@
 from umap import UMAP
 
 from tdamapper.core import aggregate_graph
-from tdamapper.cover import BallCover, CubicalCover
-from tdamapper.learn import MapperAlgorithm
+from tdamapper.cover import BallCover, CubicalCover, KNNCover
+from tdamapper.learn import MapperAlgorithm, MapperClustering
 from tdamapper.plot import MapperPlot
 
 LIMITS_ENABLED = bool(os.environ.get("LIMITS_ENABLED", False))
@@ -63,8 +63,12 @@
 
 V_COVER_CUBICAL = "Cubical"
 
+V_COVER_KNN = "KNN"
+
 V_CLUSTERING_TRIVIAL = "Trivial"
 
+V_CLUSTERING_COVER = "Cover"
+
 V_CLUSTERING_AGGLOMERATIVE = "Agglomerative"
 
 V_CLUSTERING_DBSCAN = "DBSCAN"
@@ -198,7 +202,10 @@ def _get_data_summary(df_X, df_y):
  }
  ).T
  df_summary = pd.DataFrame(
- {V_DATA_SUMMARY_FEAT: df.columns, V_DATA_SUMMARY_HIST: df_hist.values.tolist()}
+ {
+ V_DATA_SUMMARY_FEAT: df.columns,
+ V_DATA_SUMMARY_HIST: df_hist.values.tolist(),
+ }
  )
  return df_summary
 
@@ -316,9 +323,10 @@ def mapper_lens_input_section(X):
  if pca_n > n_feats:
  lens = X
  else:
- lens = PCA(n_components=pca_n, random_state=pca_random_state).fit_transform(
- X
- )
+ lens = PCA(
+ n_components=pca_n,
+ random_state=pca_random_state,
+ ).fit_transform(X)
  elif lens_type == V_LENS_UMAP:
  umap_n = st.number_input(
  "UMAP Components",
@@ -343,7 +351,12 @@ def mapper_cover_input_section():
  st.header("🌐 Cover")
  cover_type = st.selectbox(
  "Type",
- options=[V_COVER_TRIVIAL, V_COVER_BALL, V_COVER_CUBICAL],
+ options=[
+ V_COVER_TRIVIAL,
+ V_COVER_BALL,
+ V_COVER_CUBICAL,
+ V_COVER_KNN,
+ ],
  index=2,
  )
  cover = None
@@ -379,9 +392,79 @@ def mapper_cover_input_section():
  "Overlap", value=0.25, min_value=0.0, max_value=1.0
  )
  cover = CubicalCover(n_intervals=cubical_n, overlap_frac=cubical_p)
+ elif cover_type == V_COVER_KNN:
+ knn_k = st.number_input("Neighbors", value=10, min_value=1)
+ cover = KNNCover(neighbors=knn_k)
  return cover
 
 
+def mapper_clustering_cover():
+ cover_type = st.selectbox(
+ "Type",
+ options=[
+ V_COVER_TRIVIAL,
+ V_COVER_BALL,
+ V_COVER_CUBICAL,
+ V_COVER_KNN,
+ ],
+ index=2,
+ key="mapper_clustering_cover_type",
+ )
+ cover = None
+ if cover_type == V_COVER_TRIVIAL:
+ cover = None
+ elif cover_type == V_COVER_BALL:
+ ball_r = st.number_input(
+ "Radius",
+ value=100.0,
+ min_value=0.0,
+ key="mapper_clustering_radius",
+ )
+ metric = st.selectbox(
+ "Metric",
+ options=[
+ "euclidean",
+ "chebyshev",
+ "manhattan",
+ "cosine",
+ ],
+ key="mapper_clustering_cover_metric",
+ )
+ cover = BallCover(radius=ball_r, metric=metric)
+ elif cover_type == V_COVER_CUBICAL:
+ cubical_n = st.number_input(
+ "Intervals",
+ value=10,
+ min_value=0,
+ key="mapper_clustering_cover_intervals",
+ )
+ cubical_overlap = st.checkbox(
+ "Set overlap",
+ value=False,
+ help="Uses a dimension-dependant default overlap when unchecked",
+ key="mapper_clustering_cover_set_overlap",
+ )
+ cubical_p = None
+ if cubical_overlap:
+ cubical_p = st.number_input(
+ "Overlap",
+ value=0.25,
+ min_value=0.0,
+ max_value=1.0,
+ key="mapper_clustering_cover_overlap",
+ )
+ cover = CubicalCover(n_intervals=cubical_n, overlap_frac=cubical_p)
+ elif cover_type == V_COVER_KNN:
+ knn_k = st.number_input(
+ "Neighbors",
+ value=10,
+ min_value=1,
+ key="mapper_clustering_knn_k",
+ )
+ cover = KNNCover(neighbors=knn_k)
+ return MapperClustering(cover=cover, n_jobs=-2)
+
+
 def mapper_clustering_kmeans():
  clust_num = st.number_input(
  "Clusters",
@@ -485,17 +568,20 @@ def mapper_clustering_input_section():
  "Type",
  options=[
  V_CLUSTERING_TRIVIAL,
+ V_CLUSTERING_COVER,
  V_CLUSTERING_KMEANS,
  V_CLUSTERING_AGGLOMERATIVE,
  V_CLUSTERING_DBSCAN,
  V_CLUSTERING_HDBSCAN,
  V_CLUSTERING_AFFINITY_PROPAGATION,
  ],
- index=1,
+ index=0,
  )
  clustering = None
  if clustering_type == V_CLUSTERING_TRIVIAL:
  clustering = None
+ elif clustering_type == V_CLUSTERING_COVER:
+ clustering = mapper_clustering_cover()
  elif clustering_type == V_CLUSTERING_AGGLOMERATIVE:
  clustering = mapper_clustering_agglomerative()
  elif clustering_type == V_CLUSTERING_KMEANS:
@@ -625,7 +711,13 @@ def compute_mapper_fig(mapper_plot, colors, node_size, cmap, _agg, agg_name):
  logger.info("Generating Mapper figure")
  mapper_fig = mapper_plot.plot_plotly(
  colors,
- node_size=node_size,
+ node_size=[
+ 0.0,
+ node_size / 2.0,
+ node_size,
+ node_size * 1.5,
+ node_size * 2.0,
+ ],
  agg=_agg,
  title=[f"{c}" for c in colors.columns],
  cmap=cmap,
 
@@ -73,7 +73,7 @@ def plot_plotly(
  titles = [title for _ in range(colors_num)]
  elif isinstance(title, list) and len(title) == colors_num:
  titles = title
- node_sizes = [node_size] if isinstance(node_size, int) else node_size
+ node_sizes = [node_size] if isinstance(node_size, (int, float)) else node_size
  fig = _figure(mapper_plot, width, height, node_sizes, colors, titles, agg, cmaps)
  _add_ui_to_layout(mapper_plot, fig, colors, titles, node_sizes, agg, cmaps)
  return fig
 
@@ -43,6 +43,7 @@ def __init__(self, cover=None, clustering=None, n_jobs=1):
  self.n_jobs = n_jobs
 
  def fit(self, X, y=None):
+ y = X if y is None else y
  X, y = self._validate_X_y(X, y)
  cover = TrivialCover() if self.cover is None else self.cover
  cover = clone(cover)
@@ -53,7 +54,6 @@ def fit(self, X, y=None):
  )
  clustering = clone(clustering)
  n_jobs = self.n_jobs
- y = X if y is None else y
  itm_lbls = mapper_connected_components(
  X,
  y,
 
@@ -13,91 +13,91 @@ def _parent(i):
 class _HeapNode:
 
  def __init__(self, key, value):
- self.__key = key
- self.__value = value
+ self._key = key
+ self._value = value
 
  def get(self):
- return self.__key, self.__value
+ return self._key, self._value
 
  def __lt__(self, other):
- return self.__key < other
+ return self._key < other._key
 
  def __le__(self, other):
- return self.__key <= other
+ return self._key <= other._key
 
  def __gt__(self, other):
- return self.__key > other
+ return self._key > other._key
 
  def __ge__(self, other):
- return self.__key >= other
+ return self._key >= other._key
 
 
 class MaxHeap:
 
  def __init__(self):
- self.__heap = []
- self.__iter = None
+ self._heap = []
+ self._iter = None
 
  def __iter__(self):
- self.__iter = iter(self.__heap)
+ self._iter = iter(self._heap)
  return self
 
  def __next__(self):
- node = next(self.__iter)
+ node = next(self._iter)
  return node.get()
 
  def __len__(self):
- return len(self.__heap)
+ return len(self._heap)
 
  def top(self):
- if not self.__heap:
+ if not self._heap:
  return (None, None)
- return self.__heap[0].get()
+ return self._heap[0].get()
 
  def pop(self):
- if not self.__heap:
+ if not self._heap:
  return
- max_val = self.__heap[0]
- self.__heap[0] = self.__heap[-1]
- self.__heap.pop()
+ max_val = self._heap[0]
+ self._heap[0] = self._heap[-1]
+ self._heap.pop()
  self._bubble_down()
  return max_val.get()
 
  def add(self, key, val):
- self.__heap.append(_HeapNode(key, val))
+ self._heap.append(_HeapNode(key, val))
  self._bubble_up()
 
  def _get_local_max(self, i):
- heap_len = len(self.__heap)
+ heap_len = len(self._heap)
  left = _left(i)
  right = _right(i)
  if left >= heap_len:
  return i
  if right >= heap_len:
- if self.__heap[i] < self.__heap[left]:
+ if self._heap[i] < self._heap[left]:
  return left
  return i
  max_child = left
- if self.__heap[left] < self.__heap[right]:
+ if self._heap[left] < self._heap[right]:
  max_child = right
- if self.__heap[i] < self.__heap[max_child]:
+ if self._heap[i] < self._heap[max_child]:
  return max_child
  return i
 
  def _fix_down(self, i):
  local_max = self._get_local_max(i)
  if i < local_max:
- self.__heap[i], self.__heap[local_max] = (
- self.__heap[local_max],
- self.__heap[i],
+ self._heap[i], self._heap[local_max] = (
+ self._heap[local_max],
+ self._heap[i],
  )
  return local_max
  return i
 
  def _fix_up(self, i):
  parent = _parent(i)
- if self.__heap[parent] < self.__heap[i]:
- self.__heap[i], self.__heap[parent] = self.__heap[parent], self.__heap[i]
+ if self._heap[parent] < self._heap[i]:
+ self._heap[i], self._heap[parent] = self._heap[parent], self._heap[i]
  return parent
  return i
 
@@ -110,7 +110,7 @@ def _bubble_down(self):
  current = local_max
 
  def _bubble_up(self):
- current = len(self.__heap) - 1
+ current = len(self._heap) - 1
  done = False
  while not done:
  local_max = self._fix_up(current)
-Original file line number
+Diff line change
 .idea
 dist/
 build/
 +coverage.xml