py-pdf · bosd · Nov 5, 2024 · Nov 2, 2024 · Nov 2, 2024 · Nov 2, 2024
diff --git a/camelot/core.py b/camelot/core.py
@@ -25,7 +25,6 @@
 
 from .backends import ImageConversionBackend
 from .utils import build_file_path_in_temp_dir
-from .utils import compute_whitespace
 from .utils import get_index_closest_point
 from .utils import get_textline_coords
 
@@ -611,20 +610,6 @@ def parsing_report(self):
  }
  return report
 
- def record_metadata(self, parser):
- """Record data about the origin of the table."""
- self.flavor = parser.id
- self.filename = parser.filename
- self.debug_info = parser.debug_info
- if parser.copy_text is not None:
- self.copy_spanning_text(parser.copy_text)
- data = self.data
- self.df = pd.DataFrame(data)
- self.shape = self.df.shape
-
- self.whitespace = compute_whitespace(data)
- self.pdf_size = (parser.pdf_width, parser.pdf_height)
-
  def get_pdf_image(self):
  """Compute pdf image and cache it."""
  if self._image is None:

diff --git a/camelot/handlers.py b/camelot/handlers.py
@@ -4,7 +4,6 @@
 
 import multiprocessing as mp
 import os
-import sys
 from pathlib import Path
 
 from pypdf import PdfReader
@@ -71,8 +70,6 @@ def __init__(
  self.password = "" # noqa: S105
  else:
  self.password = password
- if sys.version_info[0] < 3:
- self.password = self.password.encode("ascii")
  self.pages = self._get_pages(pages)
 
  def _get_pages(self, pages):

diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py
@@ -86,7 +86,6 @@ def _nurminen_table_detection(self, textlines):
  Assumes that tables are situated relatively far apart
  vertically.
  """
- # TODO: add support for arabic text #141
  # sort textlines in reading order
  textlines.sort(key=lambda x: (-x.y0, x.x0))
  textedges = TextEdges(edge_tol=self.edge_tol)

diff --git a/camelot/utils.py b/camelot/utils.py
@@ -747,32 +747,6 @@ def find_columns_boundaries(tls, min_gap=1.0):
  return cols_bounds
 
 
-def find_rows_boundaries(tls, min_gap=1.0):
- """Make a list of disjunct rows boundaries for a list of text objects.
-
- Parameters
- ----------
- tls : list of PDFMiner text object.
-
- min_gap : minimum distance between rows. Any elements closer than
- this threshold are merged together.
-
- Returns
- -------
- boundaries : list
- List y-coordinates for rows.
- [(1st row bottom, 1st row top), (2nd row bottom, 2nd row top), ...]
- """
- rows_bounds = []
- tls.sort(key=lambda tl: tl.y0)
- for tl in tls:
- if (not rows_bounds) or rows_bounds[-1][1] + min_gap < tl.y0:
- rows_bounds.append([tl.y0, tl.y1])
- else:
- rows_bounds[-1][1] = max(rows_bounds[-1][1], tl.y1)
- return rows_bounds
-
-
 def boundaries_to_split_lines(boundaries):
  """Find split lines given a list of boundaries between rows or cols.
 

diff --git a/pypdf_table_extraction/utils.py b/pypdf_table_extraction/utils.py
@@ -14,7 +14,6 @@
 from camelot.utils import download_url # noqa F401
 from camelot.utils import expand_bbox_with_textline # noqa F401
 from camelot.utils import find_columns_boundaries # noqa F401
-from camelot.utils import find_rows_boundaries # noqa F401
 from camelot.utils import flag_font_size # noqa F401
 from camelot.utils import flavor_to_kwargs # noqa F401
 from camelot.utils import get_index_closest_point # noqa F401