Skip to content

Commit 5101978

Browse files
symlink patch
1 parent b720dd0 commit 5101978

File tree

1 file changed

+22
-129
lines changed

1 file changed

+22
-129
lines changed

src/gitprobe/analysis/repo_analyzer.py

Lines changed: 22 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -14,70 +14,22 @@
1414

1515

1616
class RepoAnalyzer:
17-
"""
18-
A comprehensive repository analyzer that generates structured file trees.
19-
20-
This class analyzes local repository directories and creates detailed file tree
21-
structures with metadata including file sizes, extensions, and estimated token counts.
22-
It supports flexible filtering through include/exclude patterns to focus on
23-
relevant files and directories.
24-
25-
Attributes:
26-
include_patterns (List[str]): Glob patterns for files to include in analysis.
27-
If None, defaults to DEFAULT_INCLUDE_PATTERNS.
28-
exclude_patterns (List[str]): Glob patterns for files/directories to exclude.
29-
Merged with DEFAULT_IGNORE_PATTERNS.
30-
"""
31-
3217
def __init__(
3318
self,
3419
include_patterns: Optional[List[str]] = None,
3520
exclude_patterns: Optional[List[str]] = None,
3621
) -> None:
37-
"""
38-
Initialize the RepoAnalyzer with custom filtering patterns.
39-
40-
Args:
41-
include_patterns: List of glob patterns for files to include.
42-
If None, uses DEFAULT_INCLUDE_PATTERNS.
43-
exclude_patterns: List of glob patterns for files/directories to exclude.
44-
These are added to DEFAULT_IGNORE_PATTERNS.
45-
"""
4622
self.include_patterns = (
4723
include_patterns if include_patterns is not None else DEFAULT_INCLUDE_PATTERNS
4824
)
49-
5025
self.exclude_patterns = (
5126
list(DEFAULT_IGNORE_PATTERNS) + exclude_patterns
5227
if exclude_patterns is not None
5328
else list(DEFAULT_IGNORE_PATTERNS)
5429
)
5530

5631
def analyze_repository_structure(self, repo_dir: str) -> Dict:
57-
"""
58-
Perform complete analysis of repository structure.
59-
60-
Analyzes the given repository directory and returns a comprehensive
61-
structure including the file tree and summary statistics.
62-
63-
Args:
64-
repo_dir: Path to the repository directory to analyze.
65-
66-
Returns:
67-
Dict containing:
68-
- file_tree: Nested dictionary representing the directory structure
69-
- summary: Dictionary with total_files and total_size_kb
70-
71-
Raises:
72-
FileNotFoundError: If the specified repository directory doesn't exist.
73-
PermissionError: If access to the directory is denied.
74-
75-
Example:
76-
>>> result = analyzer.analyze_repository_structure('/path/to/repo')
77-
>>> print(result['summary']['total_files'])
78-
"""
7932
file_tree = self._build_file_tree(repo_dir)
80-
8133
return {
8234
"file_tree": file_tree,
8335
"summary": {
@@ -87,33 +39,22 @@ def analyze_repository_structure(self, repo_dir: str) -> Dict:
8739
}
8840

8941
def _build_file_tree(self, repo_dir: str) -> Dict:
90-
"""
91-
Build hierarchical file tree structure with intelligent filtering.
92-
93-
Creates a nested dictionary representation of the directory structure,
94-
applying include/exclude patterns to filter relevant files and directories.
95-
96-
Args:
97-
repo_dir: Root directory path to analyze.
98-
99-
Returns:
100-
Dict representing the file tree structure with metadata.
101-
"""
102-
10342
def build_tree(path: Path, base_path: Path) -> Optional[Dict]:
104-
"""
105-
Recursively build tree structure for a given path.
106-
107-
Args:
108-
path: Current path being processed.
109-
base_path: Root path for calculating relative paths.
110-
111-
Returns:
112-
Dict representing the current path's tree structure, or None if excluded.
113-
"""
11443
relative_path = path.relative_to(base_path)
11544
relative_path_str = str(relative_path)
11645

46+
# 🚫 Reject symlinks
47+
if path.is_symlink():
48+
return None
49+
50+
# 🚫 Reject escaped paths (e.g., symlinks pointing outside)
51+
try:
52+
if not path.resolve().is_relative_to(base_path.resolve()):
53+
return None
54+
except AttributeError:
55+
if not str(path.resolve()).startswith(str(base_path.resolve())):
56+
return None
57+
11758
if self._should_exclude_path(relative_path_str, path.name):
11859
return None
11960

@@ -129,6 +70,7 @@ def build_tree(path: Path, base_path: Path) -> Optional[Dict]:
12970
"extension": path.suffix,
13071
"_size_bytes": size,
13172
}
73+
13274
elif path.is_dir():
13375
children = []
13476
try:
@@ -147,87 +89,38 @@ def build_tree(path: Path, base_path: Path) -> Optional[Dict]:
14789
"children": children,
14890
}
14991
return None
150-
else:
151-
return None
92+
93+
# Other types (sockets, devices, etc.)
94+
return None
15295

15396
return build_tree(Path(repo_dir), Path(repo_dir))
15497

15598
def _should_exclude_path(self, path: str, filename: str) -> bool:
156-
"""
157-
Determine if a path should be excluded based on exclusion patterns.
158-
159-
Checks the given path and filename against all configured exclude patterns
160-
using various matching strategies including glob patterns and path prefixes.
161-
162-
Args:
163-
path: Relative path of the file/directory.
164-
filename: Name of the file/directory.
165-
166-
Returns:
167-
True if the path should be excluded, False otherwise.
168-
"""
16999
for pattern in self.exclude_patterns:
170100
if fnmatch.fnmatch(path, pattern) or fnmatch.fnmatch(filename, pattern):
171101
return True
172-
173-
if pattern.endswith("/"):
174-
if path.startswith(pattern.rstrip("/")):
175-
return True
176-
else:
177-
if path.startswith(pattern + "/") or path == pattern:
178-
return True
179-
180-
path_parts = path.split("/")
181-
if pattern in path_parts:
182-
return True
102+
if pattern.endswith("/") and path.startswith(pattern.rstrip("/")):
103+
return True
104+
if path.startswith(pattern + "/") or path == pattern:
105+
return True
106+
if pattern in path.split("/"):
107+
return True
183108
return False
184109

185110
def _should_include_file(self, path: str, filename: str) -> bool:
186-
"""
187-
Determine if a file should be included based on inclusion patterns.
188-
189-
If no include patterns are specified, all files are included by default.
190-
Otherwise, files must match at least one include pattern.
191-
192-
Args:
193-
path: Relative path of the file.
194-
filename: Name of the file.
195-
196-
Returns:
197-
True if the file should be included, False otherwise.
198-
"""
199111
if not self.include_patterns:
200112
return True
201-
202113
for pattern in self.include_patterns:
203114
if fnmatch.fnmatch(path, pattern) or fnmatch.fnmatch(filename, pattern):
204115
return True
205116
return False
206117

207118
def _count_files(self, tree: Dict) -> int:
208-
"""
209-
Recursively count total number of files in the tree structure.
210-
211-
Args:
212-
tree: File tree dictionary to count files in.
213-
214-
Returns:
215-
Total number of files in the tree.
216-
"""
217119
if tree["type"] == "file":
218120
return 1
219121
return sum(self._count_files(child) for child in tree.get("children", []))
220122

221123
def _calculate_size(self, tree: Dict) -> float:
222-
"""
223-
Recursively calculate total size of all files in the tree structure.
224-
225-
Args:
226-
tree: File tree dictionary to calculate size for.
227-
228-
Returns:
229-
Total size in kilobytes of all files in the tree.
230-
"""
231124
if tree["type"] == "file":
232125
return tree.get("_size_bytes", 0) / 1024
233126
return sum(self._calculate_size(child) for child in tree.get("children", []))

0 commit comments

Comments
 (0)