@@ -456,11 +456,108 @@ extension Source {
456456 return AST . Trivia ( trivia)
457457 }
458458
459+ /// Try to lex a matching option.
460+ ///
461+ /// MatchingOption -> 'i' | 'J' | 'm' | 'n' | 's' | 'U' | 'x' | 'xx' | 'w'
462+ /// | 'D' | 'P' | 'S' | 'W' | 'y{' ('g' | 'w') '}'
463+ ///
464+ mutating func lexMatchingOption( ) throws -> AST . MatchingOption ? {
465+ typealias OptKind = AST . MatchingOption . Kind
466+
467+ let locOpt = try recordLoc { src -> OptKind ? in
468+ func advanceAndReturn( _ o: OptKind ) -> OptKind {
469+ src. advance ( )
470+ return o
471+ }
472+ guard let c = src. peek ( ) else { return nil }
473+ switch c {
474+ // PCRE options.
475+ case " i " : return advanceAndReturn ( . caseInsensitive)
476+ case " J " : return advanceAndReturn ( . allowDuplicateGroupNames)
477+ case " m " : return advanceAndReturn ( . multiline)
478+ case " n " : return advanceAndReturn ( . noAutoCapture)
479+ case " s " : return advanceAndReturn ( . singleLine)
480+ case " U " : return advanceAndReturn ( . reluctantByDefault)
481+ case " x " :
482+ src. advance ( )
483+ return src. tryEat ( " x " ) ? . extraExtended : . extended
484+
485+ // ICU options.
486+ case " w " : return advanceAndReturn ( . unicodeWordBoundaries)
487+
488+ // Oniguruma options.
489+ case " D " : return advanceAndReturn ( . asciiOnlyDigit)
490+ case " P " : return advanceAndReturn ( . asciiOnlyPOSIXProps)
491+ case " S " : return advanceAndReturn ( . asciiOnlySpace)
492+ case " W " : return advanceAndReturn ( . asciiOnlyWord)
493+ case " y " :
494+ src. advance ( )
495+ try src. expect ( " { " )
496+ let opt : OptKind
497+ if src. tryEat ( " w " ) {
498+ opt = . textSegmentWordMode
499+ } else {
500+ try src. expect ( " g " )
501+ opt = . textSegmentGraphemeMode
502+ }
503+ try src. expect ( " } " )
504+ return opt
505+
506+ default :
507+ return nil
508+ }
509+ }
510+ guard let locOpt = locOpt else { return nil }
511+ return . init( locOpt. value, location: locOpt. location)
512+ }
513+
514+ /// Try to lex a sequence of matching options.
515+ ///
516+ /// MatchingOptionSeq -> '^' MatchingOption* | MatchingOption+
517+ /// | MatchingOption* '-' MatchingOption+
518+ ///
519+ mutating func lexMatchingOptionSequence(
520+ ) throws -> AST . MatchingOptionSequence ? {
521+ let ateCaret = recordLoc { $0. tryEat ( " ^ " ) }
522+
523+ // TODO: Warn on duplicate options, and options appearing in both adding
524+ // and removing lists?
525+ var adding : [ AST . MatchingOption ] = [ ]
526+ while let opt = try lexMatchingOption ( ) {
527+ adding. append ( opt)
528+ }
529+
530+ // If the sequence begun with a caret '^', options can be added, so we're
531+ // done.
532+ if ateCaret. value {
533+ return . init( caretLoc: ateCaret. location, adding: adding, minusLoc: nil ,
534+ removing: [ ] )
535+ }
536+
537+ // Try to lex options to remove.
538+ let ateMinus = recordLoc { $0. tryEat ( " - " ) }
539+ if ateMinus. value {
540+ var removing : [ AST . MatchingOption ] = [ ]
541+ while let opt = try lexMatchingOption ( ) {
542+ // Text segment options can only be added, they cannot be removed
543+ // with (?-), they should instead be set to a different mode.
544+ if opt. isTextSegmentMode {
545+ throw ParseError . cannotRemoveTextSegmentOptions
546+ }
547+ removing. append ( opt)
548+ }
549+ return . init( caretLoc: nil , adding: adding, minusLoc: ateMinus. location,
550+ removing: removing)
551+ }
552+ guard !adding. isEmpty else { return nil }
553+ return . init( caretLoc: nil , adding: adding, minusLoc: nil , removing: [ ] )
554+ }
459555
460556 /// Try to consume the start of a group
461557 ///
462558 /// GroupStart -> '(?' GroupKind | '('
463- /// GroupKind -> Named | ':' | '|' | '>' | '=' | '!' | '<=' | '<!'
559+ /// GroupKind -> Named | ':' | '|' | '>' | '=' | '!' | '*' | '<=' | '<!'
560+ /// | '<*' | MatchingOptionSeq (':' | ')')
464561 /// Named -> '<' [^'>']+ '>' | 'P<' [^'>']+ '>'
465562 /// | '\'' [^'\'']+ '\''
466563 ///
@@ -502,8 +599,25 @@ extension Source {
502599 return . namedCapture( name)
503600 }
504601
505- throw ParseError . misc (
506- " Unknown group kind '(? \( src. peek ( ) !) ' " )
602+ // Matching option changing group (?iJmnsUxxxDPSWy{..}-iJmnsUxxxDPSW:).
603+ if let seq = try src. lexMatchingOptionSequence ( ) {
604+ if src. tryEat ( " : " ) {
605+ return . changeMatchingOptions( seq, hasImplicitScope: false )
606+ }
607+ // If this isn't start of an explicit group, we should have an
608+ // implicit group that covers the remaining elements of the current
609+ // group.
610+ // TODO: This implicit scoping behavior matches Oniguruma, but PCRE
611+ // also does it across alternations, which will require additional
612+ // handling.
613+ try src. expect ( " ) " )
614+ return . changeMatchingOptions( seq, hasImplicitScope: true )
615+ }
616+
617+ guard let next = src. peek ( ) else {
618+ throw ParseError . expectedGroupSpecifier
619+ }
620+ throw ParseError . misc ( " Unknown group kind '(? \( next) ' " )
507621 }
508622
509623 // Explicitly spelled out PRCE2 syntax for some groups.
0 commit comments