Skip to content
This repository was archived by the owner on Apr 11, 2025. It is now read-only.
Merged
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 39 additions & 109 deletions camelot/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import numpy as np
import pandas as pd


# minimum number of vertical textline intersections for a textedge
# to be considered valid
TEXTEDGE_REQUIRED_ELEMENTS = 4
Expand Down Expand Up @@ -398,125 +397,56 @@ def set_edges(self, vertical, horizontal, joint_tol=2):
List of detected horizontal lines.

"""

def find_close_point(over, coord, joint_tol):
for i, t in enumerate(over):
if math.isclose(coord, t[0], abs_tol=joint_tol):
return i
return None

for v in vertical:
# find closest x coord
# iterate over y coords and find closest start and end points
i = [
i
for i, t in enumerate(self.cols)
if np.isclose(v[0], t[0], atol=joint_tol)
]
j = [
j
for j, t in enumerate(self.rows)
if np.isclose(v[3], t[0], atol=joint_tol)
]
k = [
k
for k, t in enumerate(self.rows)
if np.isclose(v[1], t[0], atol=joint_tol)
]
if not j:
start = find_close_point(self.rows, v[3], joint_tol)
if start is None:
continue
J = j[0]
if i == [0]: # only left edge
L = i[0]
if k:
K = k[0]
while J < K:
self.cells[J][L].left = True
J += 1
else:
K = len(self.rows)
while J < K:
self.cells[J][L].left = True
J += 1
elif i == []: # only right edge
L = len(self.cols) - 1
if k:
K = k[0]
while J < K:
self.cells[J][L].right = True
J += 1
else:
K = len(self.rows)
while J < K:
self.cells[J][L].right = True
J += 1
end = find_close_point(self.rows, v[1], joint_tol)
if end is None:
end = len(self.rows)
i = find_close_point(self.cols, v[0], joint_tol)
if i is None: # only right edge
i = len(self.cols) - 1
for j in range(start, end):
self.cells[j][i].right = True
elif i == 0: # only left edge
for j in range(start, end):
self.cells[j][0].left = True
else: # both left and right edges
L = i[0]
if k:
K = k[0]
while J < K:
self.cells[J][L].left = True
self.cells[J][L - 1].right = True
J += 1
else:
K = len(self.rows)
while J < K:
self.cells[J][L].left = True
self.cells[J][L - 1].right = True
J += 1
for j in range(start, end):
self.cells[j][i].left = True
self.cells[j][i - 1].right = True

for h in horizontal:
# find closest y coord
# iterate over x coords and find closest start and end points
i = [
i
for i, t in enumerate(self.rows)
if np.isclose(h[1], t[0], atol=joint_tol)
]
j = [
j
for j, t in enumerate(self.cols)
if np.isclose(h[0], t[0], atol=joint_tol)
]
k = [
k
for k, t in enumerate(self.cols)
if np.isclose(h[2], t[0], atol=joint_tol)
]
if not j:
start = find_close_point(self.cols, h[0], joint_tol)
if start is None:
continue
J = j[0]
if i == [0]: # only top edge
L = i[0]
if k:
K = k[0]
while J < K:
self.cells[L][J].top = True
J += 1
else:
K = len(self.cols)
while J < K:
self.cells[L][J].top = True
J += 1
elif i == []: # only bottom edge
L = len(self.rows) - 1
if k:
K = k[0]
while J < K:
self.cells[L][J].bottom = True
J += 1
else:
K = len(self.cols)
while J < K:
self.cells[L][J].bottom = True
J += 1
end = find_close_point(self.cols, h[2], joint_tol)
if end is None:
end = len(self.cols)
i = find_close_point(self.rows, h[1], joint_tol)
if i is None: # only bottom edge
i = len(self.rows) - 1
for j in range(start, end):
self.cells[i][j].bottom = True
elif i == 0: # only top edge
for j in range(start, end):
self.cells[0][j].top = True
else: # both top and bottom edges
L = i[0]
if k:
K = k[0]
while J < K:
self.cells[L][J].top = True
self.cells[L - 1][J].bottom = True
J += 1
else:
K = len(self.cols)
while J < K:
self.cells[L][J].top = True
self.cells[L - 1][J].bottom = True
J += 1
for j in range(start, end):
self.cells[i][j].top = True
self.cells[i - 1][j].bottom = True

return self

Expand Down