7
7
from enum import Enum , auto
8
8
from pathlib import Path
9
9
10
- from gitingest .exceptions import InvalidNotebookError
11
10
from gitingest .utils .ingestion_utils import _get_encoding_list
12
11
from gitingest .utils .notebook_utils import process_notebook
13
12
from gitingest .utils .textfile_checker_utils import is_textfile
14
13
15
- SEPARATOR = "=" * 48 + " \n "
14
+ SEPARATOR = "=" * 48 # Tiktoken, the tokenizer openai uses, counts 2 tokens if we have more than 48
16
15
17
16
18
17
class FileSystemNodeType (Enum ):
@@ -36,108 +35,105 @@ class FileSystemNode: # pylint: disable=too-many-instance-attributes
36
35
"""
37
36
Class representing a node in the file system (either a file or directory).
38
37
39
- This class has more than the recommended number of attributes because it needs to
40
- track various properties of files and directories for comprehensive analysis.
38
+ Tracks properties of files/directories for comprehensive analysis.
41
39
"""
42
40
43
41
name : str
44
- type : FileSystemNodeType # e.g., "directory" or "file"
42
+ type : FileSystemNodeType
45
43
path_str : str
46
44
path : Path
47
45
size : int = 0
48
46
file_count : int = 0
49
47
dir_count : int = 0
50
48
depth : int = 0
51
- children : list [FileSystemNode ] = field (default_factory = list ) # Using default_factory instead of empty list
49
+ children : list [FileSystemNode ] = field (default_factory = list )
52
50
53
51
def sort_children (self ) -> None :
54
52
"""
55
53
Sort the children nodes of a directory according to a specific order.
56
54
57
55
Order of sorting:
58
- 1. README.md first
59
- 2. Regular files (not starting with dot)
60
- 3. Hidden files (starting with dot)
61
- 4. Regular directories (not starting with dot)
62
- 5. Hidden directories (starting with dot)
63
- All groups are sorted alphanumerically within themselves.
64
- """
65
- # Separate files and directories
66
- files = [child for child in self .children if child .type == FileSystemNodeType .FILE ]
67
- directories = [child for child in self .children if child .type == FileSystemNodeType .DIRECTORY ]
56
+ 2. Regular files (not starting with dot)
57
+ 3. Hidden files (starting with dot)
58
+ 4. Regular directories (not starting with dot)
59
+ 5. Hidden directories (starting with dot)
68
60
69
- # Find README.md
70
- readme_files = [f for f in files if f .name .lower () == "readme.md" ]
71
- other_files = [f for f in files if f .name .lower () != "readme.md" ]
61
+ All groups are sorted alphanumerically within themselves.
72
62
73
- # Separate hidden and regular files/directories
74
- regular_files = [f for f in other_files if not f .name .startswith ("." )]
75
- hidden_files = [f for f in other_files if f .name .startswith ("." )]
76
- regular_dirs = [d for d in directories if not d .name .startswith ("." )]
77
- hidden_dirs = [d for d in directories if d .name .startswith ("." )]
63
+ Raises
64
+ ------
65
+ ValueError
66
+ If the node is not a directory.
67
+ """
68
+ if self .type != FileSystemNodeType .DIRECTORY :
69
+ raise ValueError ("Cannot sort children of a non-directory node" )
78
70
79
- # Sort each group alphanumerically
80
- regular_files .sort (key = lambda x : x .name )
81
- hidden_files .sort (key = lambda x : x .name )
82
- regular_dirs .sort (key = lambda x : x .name )
83
- hidden_dirs .sort (key = lambda x : x .name )
71
+ def _sort_key (child : FileSystemNode ) -> tuple [int , str ]:
72
+ # returns the priority order for the sort function, 0 is first
73
+ # Groups: 0=README, 1=regular file, 2=hidden file, 3=regular dir, 4=hidden dir
74
+ name = child .name .lower ()
75
+ if child .type == FileSystemNodeType .FILE :
76
+ if name == "readme.md" :
77
+ return (0 , name )
78
+ return (1 if not name .startswith ("." ) else 2 , name )
79
+ return (3 if not name .startswith ("." ) else 4 , name )
84
80
85
- self .children = readme_files + regular_files + hidden_files + regular_dirs + hidden_dirs
81
+ self .children . sort ( key = _sort_key )
86
82
87
83
@property
88
84
def content_string (self ) -> str :
89
85
"""
90
- Return the content of the node as a string.
91
-
92
- This property returns the content of the node as a string, including the path and content.
86
+ Return the content of the node as a string, including path and content.
93
87
94
88
Returns
95
89
-------
96
90
str
97
91
A string representation of the node's content.
98
92
"""
99
- content_repr = SEPARATOR
93
+ parts = [
94
+ SEPARATOR ,
95
+ f"File: { str (self .path_str ).replace (os .sep , '/' )} " ,
96
+ SEPARATOR ,
97
+ f"{ self .content } " ,
98
+ ]
100
99
101
- # Use forward slashes in output paths
102
- content_repr += f"File: { str (self .path_str ).replace (os .sep , '/' )} \n "
103
- content_repr += SEPARATOR
104
- content_repr += f"{ self .content } \n \n "
105
- return content_repr
100
+ return "\n " .join (parts ) + "\n \n "
106
101
107
102
@property
108
103
def content (self ) -> str : # pylint: disable=too-many-return-statements
109
104
"""
110
- Read the content of a file.
111
-
112
- This function attempts to open a file and read its contents using UTF-8 encoding.
113
- If an error occurs during reading (e.g., file is not found or permission error),
114
- it returns an error message.
105
+ Read the content of a file if it's text (or a notebook). Return an error message otherwise.
115
106
116
107
Returns
117
108
-------
118
109
str
119
110
The content of the file, or an error message if the file could not be read.
111
+
112
+ Raises
113
+ ------
114
+ ValueError
115
+ If the node is a directory.
120
116
"""
121
- if self .type == FileSystemNodeType .FILE and not is_textfile (self .path ):
117
+ if self .type == FileSystemNodeType .DIRECTORY :
118
+ raise ValueError ("Cannot read content of a directory node" )
119
+
120
+ if not is_textfile (self .path ):
122
121
return "[Non-text file]"
123
122
124
- try :
125
- if self .path .suffix == ".ipynb" :
126
- try :
127
- return process_notebook (self .path )
128
- except Exception as exc :
129
- return f"Error processing notebook: { exc } "
130
-
131
- for encoding in _get_encoding_list ():
132
- try :
133
- with self .path .open (encoding = encoding ) as f :
134
- return f .read ()
135
- except UnicodeDecodeError :
136
- continue
137
- except OSError as exc :
138
- return f"Error reading file: { exc } "
139
-
140
- return "Error: Unable to decode file with available encodings"
141
-
142
- except (OSError , InvalidNotebookError ) as exc :
143
- return f"Error reading file: { exc } "
123
+ if self .path .suffix == ".ipynb" :
124
+ try :
125
+ return process_notebook (self .path )
126
+ except Exception as exc :
127
+ return f"Error processing notebook: { exc } "
128
+
129
+ # Try multiple encodings
130
+ for encoding in _get_encoding_list ():
131
+ try :
132
+ with self .path .open (encoding = encoding ) as f :
133
+ return f .read ()
134
+ except UnicodeDecodeError :
135
+ continue
136
+ except OSError as exc :
137
+ return f"Error reading file: { exc } "
138
+
139
+ return "Error: Unable to decode file with available encodings"
0 commit comments