1414
1515
1616class RepoAnalyzer :
17- """
18- A comprehensive repository analyzer that generates structured file trees.
19-
20- This class analyzes local repository directories and creates detailed file tree
21- structures with metadata including file sizes, extensions, and estimated token counts.
22- It supports flexible filtering through include/exclude patterns to focus on
23- relevant files and directories.
24-
25- Attributes:
26- include_patterns (List[str]): Glob patterns for files to include in analysis.
27- If None, defaults to DEFAULT_INCLUDE_PATTERNS.
28- exclude_patterns (List[str]): Glob patterns for files/directories to exclude.
29- Merged with DEFAULT_IGNORE_PATTERNS.
30- """
31-
3217 def __init__ (
3318 self ,
3419 include_patterns : Optional [List [str ]] = None ,
3520 exclude_patterns : Optional [List [str ]] = None ,
3621 ) -> None :
37- """
38- Initialize the RepoAnalyzer with custom filtering patterns.
39-
40- Args:
41- include_patterns: List of glob patterns for files to include.
42- If None, uses DEFAULT_INCLUDE_PATTERNS.
43- exclude_patterns: List of glob patterns for files/directories to exclude.
44- These are added to DEFAULT_IGNORE_PATTERNS.
45- """
4622 self .include_patterns = (
4723 include_patterns if include_patterns is not None else DEFAULT_INCLUDE_PATTERNS
4824 )
49-
5025 self .exclude_patterns = (
5126 list (DEFAULT_IGNORE_PATTERNS ) + exclude_patterns
5227 if exclude_patterns is not None
5328 else list (DEFAULT_IGNORE_PATTERNS )
5429 )
5530
5631 def analyze_repository_structure (self , repo_dir : str ) -> Dict :
57- """
58- Perform complete analysis of repository structure.
59-
60- Analyzes the given repository directory and returns a comprehensive
61- structure including the file tree and summary statistics.
62-
63- Args:
64- repo_dir: Path to the repository directory to analyze.
65-
66- Returns:
67- Dict containing:
68- - file_tree: Nested dictionary representing the directory structure
69- - summary: Dictionary with total_files and total_size_kb
70-
71- Raises:
72- FileNotFoundError: If the specified repository directory doesn't exist.
73- PermissionError: If access to the directory is denied.
74-
75- Example:
76- >>> result = analyzer.analyze_repository_structure('/path/to/repo')
77- >>> print(result['summary']['total_files'])
78- """
7932 file_tree = self ._build_file_tree (repo_dir )
80-
8133 return {
8234 "file_tree" : file_tree ,
8335 "summary" : {
@@ -87,33 +39,22 @@ def analyze_repository_structure(self, repo_dir: str) -> Dict:
8739 }
8840
8941 def _build_file_tree (self , repo_dir : str ) -> Dict :
90- """
91- Build hierarchical file tree structure with intelligent filtering.
92-
93- Creates a nested dictionary representation of the directory structure,
94- applying include/exclude patterns to filter relevant files and directories.
95-
96- Args:
97- repo_dir: Root directory path to analyze.
98-
99- Returns:
100- Dict representing the file tree structure with metadata.
101- """
102-
10342 def build_tree (path : Path , base_path : Path ) -> Optional [Dict ]:
104- """
105- Recursively build tree structure for a given path.
106-
107- Args:
108- path: Current path being processed.
109- base_path: Root path for calculating relative paths.
110-
111- Returns:
112- Dict representing the current path's tree structure, or None if excluded.
113- """
11443 relative_path = path .relative_to (base_path )
11544 relative_path_str = str (relative_path )
11645
46+ # 🚫 Reject symlinks
47+ if path .is_symlink ():
48+ return None
49+
50+ # 🚫 Reject escaped paths (e.g., symlinks pointing outside)
51+ try :
52+ if not path .resolve ().is_relative_to (base_path .resolve ()):
53+ return None
54+ except AttributeError :
55+ if not str (path .resolve ()).startswith (str (base_path .resolve ())):
56+ return None
57+
11758 if self ._should_exclude_path (relative_path_str , path .name ):
11859 return None
11960
@@ -129,6 +70,7 @@ def build_tree(path: Path, base_path: Path) -> Optional[Dict]:
12970 "extension" : path .suffix ,
13071 "_size_bytes" : size ,
13172 }
73+
13274 elif path .is_dir ():
13375 children = []
13476 try :
@@ -147,87 +89,38 @@ def build_tree(path: Path, base_path: Path) -> Optional[Dict]:
14789 "children" : children ,
14890 }
14991 return None
150- else :
151- return None
92+
93+ # Other types (sockets, devices, etc.)
94+ return None
15295
15396 return build_tree (Path (repo_dir ), Path (repo_dir ))
15497
15598 def _should_exclude_path (self , path : str , filename : str ) -> bool :
156- """
157- Determine if a path should be excluded based on exclusion patterns.
158-
159- Checks the given path and filename against all configured exclude patterns
160- using various matching strategies including glob patterns and path prefixes.
161-
162- Args:
163- path: Relative path of the file/directory.
164- filename: Name of the file/directory.
165-
166- Returns:
167- True if the path should be excluded, False otherwise.
168- """
16999 for pattern in self .exclude_patterns :
170100 if fnmatch .fnmatch (path , pattern ) or fnmatch .fnmatch (filename , pattern ):
171101 return True
172-
173- if pattern .endswith ("/" ):
174- if path .startswith (pattern .rstrip ("/" )):
175- return True
176- else :
177- if path .startswith (pattern + "/" ) or path == pattern :
178- return True
179-
180- path_parts = path .split ("/" )
181- if pattern in path_parts :
182- return True
102+ if pattern .endswith ("/" ) and path .startswith (pattern .rstrip ("/" )):
103+ return True
104+ if path .startswith (pattern + "/" ) or path == pattern :
105+ return True
106+ if pattern in path .split ("/" ):
107+ return True
183108 return False
184109
185110 def _should_include_file (self , path : str , filename : str ) -> bool :
186- """
187- Determine if a file should be included based on inclusion patterns.
188-
189- If no include patterns are specified, all files are included by default.
190- Otherwise, files must match at least one include pattern.
191-
192- Args:
193- path: Relative path of the file.
194- filename: Name of the file.
195-
196- Returns:
197- True if the file should be included, False otherwise.
198- """
199111 if not self .include_patterns :
200112 return True
201-
202113 for pattern in self .include_patterns :
203114 if fnmatch .fnmatch (path , pattern ) or fnmatch .fnmatch (filename , pattern ):
204115 return True
205116 return False
206117
207118 def _count_files (self , tree : Dict ) -> int :
208- """
209- Recursively count total number of files in the tree structure.
210-
211- Args:
212- tree: File tree dictionary to count files in.
213-
214- Returns:
215- Total number of files in the tree.
216- """
217119 if tree ["type" ] == "file" :
218120 return 1
219121 return sum (self ._count_files (child ) for child in tree .get ("children" , []))
220122
221123 def _calculate_size (self , tree : Dict ) -> float :
222- """
223- Recursively calculate total size of all files in the tree structure.
224-
225- Args:
226- tree: File tree dictionary to calculate size for.
227-
228- Returns:
229- Total size in kilobytes of all files in the tree.
230- """
231124 if tree ["type" ] == "file" :
232125 return tree .get ("_size_bytes" , 0 ) / 1024
233126 return sum (self ._calculate_size (child ) for child in tree .get ("children" , []))
0 commit comments