@@ -163,9 +163,14 @@ def __str__(self) -> str:
163163 return "\n " .join (lines )
164164
165165
166+ @dataclass
167+ class State :
168+ has_comments : bool = False
169+
170+
166171def to_items (
167172 text : str , line_offset : int = 0 , column_offset : int = 0
168- ) -> Iterable [ tuple [str , str ]]:
173+ ) -> tuple [ list [ tuple [str , str ]], State ]:
169174 """Parse a directive option block into (key, value) tuples.
170175
171176 :param text: The directive option text.
@@ -174,12 +179,17 @@ def to_items(
174179
175180 :raises: `TokenizeError`
176181 """
177- for key_token , value_token in to_tokens (text , line_offset , column_offset ):
178- yield key_token .value , value_token .value if value_token is not None else ""
182+ output = []
183+ state = State ()
184+ for key_token , value_token in _to_tokens (text , state , line_offset , column_offset ):
185+ output .append (
186+ (key_token .value , value_token .value if value_token is not None else "" )
187+ )
188+ return output , state
179189
180190
181- def to_tokens (
182- text : str , line_offset : int = 0 , column_offset : int = 0
191+ def _to_tokens (
192+ text : str , state : State , line_offset : int = 0 , column_offset : int = 0
183193) -> Iterable [tuple [KeyToken , ValueToken | None ]]:
184194 """Parse a directive option, and yield key/value token pairs.
185195
@@ -191,7 +201,7 @@ def to_tokens(
191201 """
192202 key_token : KeyToken | None = None
193203 try :
194- for token in tokenize (text ):
204+ for token in _tokenize (text , state ):
195205 if isinstance (token , KeyToken ):
196206 if key_token is not None :
197207 yield key_token , None
@@ -207,12 +217,12 @@ def to_tokens(
207217 raise
208218
209219
210- def tokenize (text : str ) -> Iterable [Token ]:
220+ def _tokenize (text : str , state : State ) -> Iterable [Token ]:
211221 """Yield tokens from a directive option stream."""
212222 stream = StreamBuffer (text )
213223
214224 while True :
215- _scan_to_next_token (stream )
225+ _scan_to_next_token (stream , state )
216226
217227 if stream .peek () == _CHARS_END :
218228 break
@@ -227,9 +237,9 @@ def tokenize(text: str) -> Iterable[Token]:
227237 if ch in ("'" , '"' ):
228238 yield _scan_flow_scalar (stream , cast (Literal ['"' , "'" ], ch ), is_key = True )
229239 else :
230- yield _scan_plain_scalar (stream , is_key = True )
240+ yield _scan_plain_scalar (stream , state , is_key = True )
231241
232- _scan_to_next_token (stream )
242+ _scan_to_next_token (stream , state )
233243
234244 # check next char is colon + space
235245 if stream .peek () != ":" :
@@ -240,21 +250,21 @@ def tokenize(text: str) -> Iterable[Token]:
240250 end_mark = stream .get_position ()
241251 yield ColonToken (start_mark , end_mark )
242252
243- _scan_to_next_token (stream )
253+ _scan_to_next_token (stream , state )
244254
245255 # now find value
246256 ch = stream .peek ()
247257 if stream .column == 0 :
248258 pass
249259 elif ch in ("|" , ">" ):
250- yield _scan_block_scalar (stream , cast (Literal ["|" , ">" ], ch ))
260+ yield _scan_block_scalar (stream , cast (Literal ["|" , ">" ], ch ), state )
251261 elif ch in ("'" , '"' ):
252262 yield _scan_flow_scalar (stream , cast (Literal ['"' , "'" ], ch ), is_key = False )
253263 else :
254- yield _scan_plain_scalar (stream , is_key = False )
264+ yield _scan_plain_scalar (stream , state , is_key = False )
255265
256266
257- def _scan_to_next_token (stream : StreamBuffer ) -> None :
267+ def _scan_to_next_token (stream : StreamBuffer , state : State ) -> None :
258268 """Skip spaces, line breaks and comments.
259269
260270 The byte order mark is also stripped,
@@ -267,14 +277,15 @@ def _scan_to_next_token(stream: StreamBuffer) -> None:
267277 while stream .peek () == " " :
268278 stream .forward ()
269279 if stream .peek () == "#" :
280+ state .has_comments = True
270281 while stream .peek () not in _CHARS_END_NEWLINE :
271282 stream .forward ()
272283 if not _scan_line_break (stream ):
273284 found = True
274285
275286
276287def _scan_plain_scalar (
277- stream : StreamBuffer , is_key : bool = False
288+ stream : StreamBuffer , state : State , is_key : bool = False
278289) -> KeyToken | ValueToken :
279290 chunks = []
280291 start_mark = stream .get_position ()
@@ -284,6 +295,7 @@ def _scan_plain_scalar(
284295 while True :
285296 length = 0
286297 if stream .peek () == "#" :
298+ state .has_comments = True
287299 break
288300 while True :
289301 ch = stream .peek (length )
@@ -302,6 +314,8 @@ def _scan_plain_scalar(
302314 end_mark = stream .get_position ()
303315 spaces = _scan_plain_spaces (stream , allow_newline = (not is_key ))
304316 if not spaces or stream .peek () == "#" or (stream .column < indent ):
317+ if stream .peek () == "#" :
318+ state .has_comments = True
305319 break
306320
307321 return (
@@ -472,7 +486,9 @@ def _scan_flow_scalar_breaks(stream: StreamBuffer) -> list[str]:
472486 return chunks
473487
474488
475- def _scan_block_scalar (stream : StreamBuffer , style : Literal ["|" , ">" ]) -> ValueToken :
489+ def _scan_block_scalar (
490+ stream : StreamBuffer , style : Literal ["|" , ">" ], state : State
491+ ) -> ValueToken :
476492 indent = 0
477493 folded = style == ">"
478494 chunks = []
@@ -481,7 +497,7 @@ def _scan_block_scalar(stream: StreamBuffer, style: Literal["|", ">"]) -> ValueT
481497 # Scan the header.
482498 stream .forward ()
483499 chomping , increment = _scan_block_scalar_indicators (stream , start_mark )
484- _scan_block_scalar_ignored_line (stream , start_mark )
500+ _scan_block_scalar_ignored_line (stream , start_mark , state )
485501
486502 # Determine the indentation level and go to the first non-empty line.
487503 min_indent = indent + 1
@@ -575,10 +591,13 @@ def _scan_block_scalar_indicators(
575591 return chomping , increment
576592
577593
578- def _scan_block_scalar_ignored_line (stream : StreamBuffer , start_mark : Position ) -> None :
594+ def _scan_block_scalar_ignored_line (
595+ stream : StreamBuffer , start_mark : Position , state : State
596+ ) -> None :
579597 while stream .peek () == " " :
580598 stream .forward ()
581599 if stream .peek () == "#" :
600+ state .has_comments = True
582601 while stream .peek () not in _CHARS_END_NEWLINE :
583602 stream .forward ()
584603 ch = stream .peek ()
0 commit comments