Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
0b38ca9
Atomically load the lowered program (#610)
natecook1000 Oct 6, 2022
335a0c2
Add tests for line start/end word boundary diffs (#616)
natecook1000 Dec 2, 2022
54ff516
Add tweaks for Android
finagolfin Dec 5, 2022
eb7f801
Fix documentation typo (#615)
ole Dec 6, 2022
c51e8f2
Fix abstract for Regex.dotMatchesNewlines(_:). (#614)
amartini51 Dec 6, 2022
45f752a
Remove `RegexConsumer` and fix its dependencies (#617)
natecook1000 Dec 14, 2022
ed95066
Improve StringProcessing and RegexBuilder documentation (#611)
natecook1000 Dec 14, 2022
c34cea5
Set availability for inverted character class test (#621)
natecook1000 Dec 16, 2022
3ca8b13
Merge pull request #618 from buttaface/droid
Azoy Dec 18, 2022
3a3dc7a
Add type annotations in RegexBuilder tests
natecook1000 Feb 1, 2023
6c4f291
Workaround for fileprivate array issue
natecook1000 Feb 1, 2023
7e059b7
Merge pull request #628 from apple/result_builder_changes_workaround
natecook1000 Feb 1, 2023
6a4077f
Fix an issue where named character classes weren't getting converted …
DaveEwing Feb 1, 2023
8184fc0
Merge pull request #629 from apple/dewing/CharacterClassDSLConversion
DaveEwing Feb 2, 2023
2a78475
Stop at end of search string in TwoWaySearcher (#631)
natecook1000 Feb 8, 2023
d5a6cec
Correct misspelling in DSL renderer (#627)
natecook1000 Feb 8, 2023
7756942
Fix output type mismatch with RegexBuilder (#626)
natecook1000 Feb 9, 2023
070e0ec
Revert "Merge pull request #628 from apple/result_builder_changes_wor…
natecook1000 Feb 15, 2023
1358fc0
Use `some` syntax in variadics
natecook1000 Feb 15, 2023
083d32a
Type checker workaround: adjust test
milseman Apr 2, 2023
ca92db7
Further refactor to work around type checker regression
milseman Apr 3, 2023
336f9c5
Merge pull request #643 from milseman/typechecker_workaround
milseman Apr 3, 2023
852b890
Align availability macro with OS versions (#641)
milseman Apr 4, 2023
236b47c
Speed up general character class matching (#642)
milseman Apr 4, 2023
348e6c3
Test for \s matching CRLF when scalar matching (#648)
natecook1000 Apr 4, 2023
a7ba701
General ascii fast paths for character classes (#644)
milseman Apr 4, 2023
e01e43d
Remove the unsupported `anyScalar` case (#650)
natecook1000 Apr 4, 2023
e0352a2
Fix range-based quantification fast path (#653)
natecook1000 Apr 11, 2023
923cf5e
Add in ASCII fast-path for anyNonNewline (#654)
milseman Apr 11, 2023
9ea9936
Avoid long expression type checks (#657)
natecook1000 Apr 11, 2023
58626cc
Processor cleanup (#655)
milseman Apr 14, 2023
4418183
Fix `firstRange(of:)` search (#656)
natecook1000 Apr 14, 2023
57b343d
Bug fix and hot path for quantified `.` (#658)
milseman Apr 18, 2023
6695027
Run scalar-semantic benchmark variants (#659)
milseman Apr 18, 2023
8eafd55
Refactor operations to be on String (#664)
milseman Apr 19, 2023
0354667
Provide unique generic method parameter names (#669)
natecook1000 May 16, 2023
98d5ddc
Enable quantification optimizations for scalar semantics (#671)
milseman May 22, 2023
84bc9c8
Merge remote-tracking branch 'origin/main' into swift/main
milseman May 22, 2023
59fce2f
Remove redundant test
milseman May 24, 2023
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Remove the unsupported anyScalar case (#650)
We decided not to support the `anyScalar` character class, which would match a single Unicode scalar regardless of matching mode. However, its representation was still included in the various character class types in the regex engine, leading to unreachable code and unclear requirements when changing or adding new code. This change removes that representation where possible. The `DSLTree.Atom.CharacterClass` enum is left unchanged, since it is marked `@_spi(RegexBuilder) public`. Any use of that enum case is handled with a `fatalError("Unsupported")`, and it isn't produced on any code path.
  • Loading branch information
natecook1000 authored Apr 4, 2023
commit e01e43d3f753cee6e25fbaadc26b0d624e5c6ad9
3 changes: 0 additions & 3 deletions Sources/_StringProcessing/ByteCodeGen.swift
Original file line number Diff line number Diff line change
Expand Up @@ -702,9 +702,6 @@ fileprivate extension Compiler.ByteCodeGen {
case .characterClass(let cc):
// Custom character class that consumes a single grapheme
let model = cc.asRuntimeModel(options)
guard model.consumesSingleGrapheme else {
return false
}
builder.buildQuantify(
model: model,
kind,
Expand Down
8 changes: 0 additions & 8 deletions Sources/_StringProcessing/Engine/MEBuiltins.swift
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,6 @@ extension String {
switch (isScalarSemantics, cc) {
case (_, .anyGrapheme):
next = index(after: currentPosition)
case (_, .anyScalar):
next = unicodeScalars.index(after: currentPosition)
case (true, _):
next = unicodeScalars.index(after: currentPosition)
case (false, _):
Expand All @@ -204,12 +202,6 @@ extension String {
switch cc {
case .any, .anyGrapheme:
matched = true
case .anyScalar:
if isScalarSemantics {
matched = true
} else {
matched = isOnGraphemeClusterBoundary(next)
}
case .digit:
if isScalarSemantics {
matched = scalar.properties.numericType != nil && asciiCheck
Expand Down
4 changes: 2 additions & 2 deletions Sources/_StringProcessing/PrintAsPattern.swift
Original file line number Diff line number Diff line change
Expand Up @@ -760,8 +760,6 @@ extension DSLTree.Atom.CharacterClass {
switch self {
case .anyGrapheme:
return ".anyGraphemeCluster"
case .anyUnicodeScalar:
return ".anyUnicodeScalar"
case .digit:
return ".digit"
case .notDigit:
Expand All @@ -786,6 +784,8 @@ extension DSLTree.Atom.CharacterClass {
return ".whitespace"
case .notWhitespace:
return ".whitespace.inverted"
case .anyUnicodeScalar:
fatalError("Unsupported")
}
}
}
Expand Down
1 change: 0 additions & 1 deletion Sources/_StringProcessing/Regex/ASTConversion.swift
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,6 @@ extension AST.Atom.EscapedBuiltin {
case .wordCharacter: return .word
case .notWordCharacter: return .notWord
case .graphemeCluster: return .anyGrapheme
case .trueAnychar: return .anyUnicodeScalar
default: return nil
}
}
Expand Down
3 changes: 2 additions & 1 deletion Sources/_StringProcessing/Regex/DSLTree.swift
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,6 @@ extension DSLTree.Atom.CharacterClass {
public var inverted: DSLTree.Atom.CharacterClass? {
switch self {
case .anyGrapheme: return nil
case .anyUnicodeScalar: return nil
case .digit: return .notDigit
case .notDigit: return .digit
case .word: return .notWord
Expand All @@ -273,6 +272,8 @@ extension DSLTree.Atom.CharacterClass {
case .notVerticalWhitespace: return .verticalWhitespace
case .whitespace: return .notWhitespace
case .notWhitespace: return .whitespace
case .anyUnicodeScalar:
fatalError("Unsupported")
}
}
}
Expand Down
3 changes: 1 addition & 2 deletions Sources/_StringProcessing/Unicode/ASCII.swift
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,7 @@ extension String {

// TODO: bitvectors
switch cc {
case .any, .anyGrapheme, .anyScalar:
// TODO: should any scalar not consume CR-LF in scalar semantic mode?
case .any, .anyGrapheme:
return (next, true)

case .digit:
Expand Down
14 changes: 1 addition & 13 deletions Sources/_StringProcessing/_CharacterClassModel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ struct _CharacterClassModel: Hashable {
case any = 0
/// Any grapheme cluster
case anyGrapheme
/// Any Unicode scalar
case anyScalar
/// Character.isDigit
case digit
/// Horizontal whitespace: `[:blank:]`, i.e
Expand Down Expand Up @@ -90,15 +88,6 @@ struct _CharacterClassModel: Hashable {
}
}

extension _CharacterClassModel {
var consumesSingleGrapheme: Bool {
switch self.cc {
case .anyScalar: return false
default: return true
}
}
}

extension _CharacterClassModel.Representation {
/// Returns true if this CharacterClass should be matched by strict ascii under the given options
func isStrictAscii(options: MatchingOptions) -> Bool {
Expand All @@ -119,7 +108,6 @@ extension _CharacterClassModel.Representation: CustomStringConvertible {
switch self {
case .any: return "<any>"
case .anyGrapheme: return "<any grapheme>"
case .anyScalar: return "<any scalar>"
case .digit: return "<digit>"
case .horizontalWhitespace: return "<horizontal whitespace>"
case .newlineSequence: return "<newline sequence>"
Expand Down Expand Up @@ -185,7 +173,7 @@ extension DSLTree.Atom.CharacterClass {
case .anyGrapheme:
cc = .anyGrapheme
case .anyUnicodeScalar:
cc = .anyScalar
fatalError("Unsupported")
}
return _CharacterClassModel(cc: cc, options: options, isInverted: inverted)
}
Expand Down