Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
0b38ca9
Atomically load the lowered program (#610)
natecook1000 Oct 6, 2022
335a0c2
Add tests for line start/end word boundary diffs (#616)
natecook1000 Dec 2, 2022
54ff516
Add tweaks for Android
finagolfin Dec 5, 2022
eb7f801
Fix documentation typo (#615)
ole Dec 6, 2022
c51e8f2
Fix abstract for Regex.dotMatchesNewlines(_:). (#614)
amartini51 Dec 6, 2022
45f752a
Remove `RegexConsumer` and fix its dependencies (#617)
natecook1000 Dec 14, 2022
ed95066
Improve StringProcessing and RegexBuilder documentation (#611)
natecook1000 Dec 14, 2022
c34cea5
Set availability for inverted character class test (#621)
natecook1000 Dec 16, 2022
3ca8b13
Merge pull request #618 from buttaface/droid
Azoy Dec 18, 2022
3a3dc7a
Add type annotations in RegexBuilder tests
natecook1000 Feb 1, 2023
6c4f291
Workaround for fileprivate array issue
natecook1000 Feb 1, 2023
7e059b7
Merge pull request #628 from apple/result_builder_changes_workaround
natecook1000 Feb 1, 2023
6a4077f
Fix an issue where named character classes weren't getting converted …
DaveEwing Feb 1, 2023
8184fc0
Merge pull request #629 from apple/dewing/CharacterClassDSLConversion
DaveEwing Feb 2, 2023
2a78475
Stop at end of search string in TwoWaySearcher (#631)
natecook1000 Feb 8, 2023
d5a6cec
Correct misspelling in DSL renderer (#627)
natecook1000 Feb 8, 2023
7756942
Fix output type mismatch with RegexBuilder (#626)
natecook1000 Feb 9, 2023
070e0ec
Revert "Merge pull request #628 from apple/result_builder_changes_wor…
natecook1000 Feb 15, 2023
1358fc0
Use `some` syntax in variadics
natecook1000 Feb 15, 2023
083d32a
Type checker workaround: adjust test
milseman Apr 2, 2023
ca92db7
Further refactor to work around type checker regression
milseman Apr 3, 2023
336f9c5
Merge pull request #643 from milseman/typechecker_workaround
milseman Apr 3, 2023
852b890
Align availability macro with OS versions (#641)
milseman Apr 4, 2023
236b47c
Speed up general character class matching (#642)
milseman Apr 4, 2023
348e6c3
Test for \s matching CRLF when scalar matching (#648)
natecook1000 Apr 4, 2023
a7ba701
General ascii fast paths for character classes (#644)
milseman Apr 4, 2023
e01e43d
Remove the unsupported `anyScalar` case (#650)
natecook1000 Apr 4, 2023
e0352a2
Fix range-based quantification fast path (#653)
natecook1000 Apr 11, 2023
923cf5e
Add in ASCII fast-path for anyNonNewline (#654)
milseman Apr 11, 2023
9ea9936
Avoid long expression type checks (#657)
natecook1000 Apr 11, 2023
58626cc
Processor cleanup (#655)
milseman Apr 14, 2023
4418183
Fix `firstRange(of:)` search (#656)
natecook1000 Apr 14, 2023
57b343d
Bug fix and hot path for quantified `.` (#658)
milseman Apr 18, 2023
6695027
Run scalar-semantic benchmark variants (#659)
milseman Apr 18, 2023
8eafd55
Refactor operations to be on String (#664)
milseman Apr 19, 2023
0354667
Provide unique generic method parameter names (#669)
natecook1000 May 16, 2023
98d5ddc
Enable quantification optimizations for scalar semantics (#671)
milseman May 22, 2023
84bc9c8
Merge remote-tracking branch 'origin/main' into swift/main
milseman May 22, 2023
59fce2f
Remove redundant test
milseman May 24, 2023
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Improve StringProcessing and RegexBuilder documentation (#611)
This includes documentation improvements for core types/methods, RegexBuilder types along with their generated variadic initializers, and adds some curation. It also includes tests of the documentation code samples.
  • Loading branch information
natecook1000 authored Dec 14, 2022
commit ed95066932177db1079c4add619fcfb3105fbead
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,6 @@ fastlane/test_output
# https://github.com/johnno1962/injectionforxcode

iOSInjectionProject/

# DocC build folder
*.docc-build
8 changes: 8 additions & 0 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,14 @@ let package = Package(
swiftSettings: [
.unsafeFlags(["-Xfrontend", "-disable-availability-checking"])
]),
.testTarget(
name: "DocumentationTests",
dependencies: ["_StringProcessing", "RegexBuilder"],
swiftSettings: [
.unsafeFlags(["-Xfrontend", "-disable-availability-checking"]),
.unsafeFlags(["-enable-bare-slash-regex"]),
]),

// FIXME: Disabled due to rdar://94763190.
// .testTarget(
// name: "Prototypes",
Expand Down
4 changes: 2 additions & 2 deletions Sources/RegexBuilder/Anchor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ extension Anchor {
///
/// Word boundaries are identified using the Unicode default word boundary
/// algorithm by default. To specify a different word boundary algorithm,
/// see the `RegexComponent.wordBoundaryKind(_:)` method.
/// use the `wordBoundaryKind(_:)` method.
///
/// This anchor is equivalent to `\b` in regex syntax.
public static var wordBoundary: Anchor {
Expand All @@ -157,7 +157,7 @@ extension Anchor {
/// The inverse of this anchor, which matches at every position that this
/// anchor does not.
///
/// For the `wordBoundary` and `textSegmentBoundary` anchors, the inverted
/// For the ``wordBoundary`` and ``textSegmentBoundary`` anchors, the inverted
/// version corresponds to `\B` and `\Y`, respectively.
public var inverted: Anchor {
var result = self
Expand Down
9 changes: 9 additions & 0 deletions Sources/RegexBuilder/Builder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@

@_spi(RegexBuilder) import _StringProcessing

/// A custom parameter attribute that constructs regular expressions from
/// closures.
///
/// You typically see `RegexComponentBuilder` as a parameter attribute for
/// `Regex`- or `RegexComponent`-producing closure parameters, allowing those
/// closures to combine multiple regular expression components. Type
/// initializers and string algorithm methods in the RegexBuilder framework
/// include a builder closure parameter, so that you can use regular expression
/// components together.
@available(SwiftStdlib 5.7, *)
@resultBuilder
public enum RegexComponentBuilder {
Expand Down
102 changes: 94 additions & 8 deletions Sources/RegexBuilder/CharacterClass.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
@_implementationOnly import _RegexParser
@_spi(RegexBuilder) import _StringProcessing

/// A class of characters that match in a regex.
///
/// A character class can represent individual characters, a group of
/// characters, the set of character that match some set of criteria, or
/// a set algebraic combination of all of the above.
@available(SwiftStdlib 5.7, *)
public struct CharacterClass {
internal var ccc: DSLTree.CustomCharacterClass
Expand Down Expand Up @@ -42,6 +47,20 @@ extension CharacterClass: RegexComponent {

@available(SwiftStdlib 5.7, *)
extension CharacterClass {
/// A character class that matches any character that does not match this
/// character class.
///
/// For example, you can use the `inverted` property to create a character
/// class that excludes a specific group of characters:
///
/// let validCharacters = CharacterClass("a"..."z", .anyOf("-_"))
/// let invalidCharacters = validCharacters.inverted
///
/// let username = "user123"
/// if username.contains(invalidCharacters) {
/// print("Invalid username: '\(username)'")
/// }
/// // Prints "Invalid username: 'user123'"
public var inverted: CharacterClass {
if let inv = builtin?.inverted {
return CharacterClass(builtin: inv)
Expand All @@ -53,26 +72,50 @@ extension CharacterClass {

@available(SwiftStdlib 5.7, *)
extension RegexComponent where Self == CharacterClass {
/// A character class that matches any element.
///
/// This character class is unaffected by the `dotMatchesNewlines()` method.
/// To match any character that isn't a newline, see
/// ``anyNonNewline``.
///
/// This character class is equivalent to the regex syntax "dot"
/// metacharacter in single-line mode: `(?s:.)`.
public static var any: CharacterClass {
.init(DSLTree.CustomCharacterClass(members: [.atom(.any)]))
}

/// A character class that matches any element that isn't a newline.
///
/// This character class is unaffected by the `dotMatchesNewlines()` method.
/// To match any character, including newlines, see ``any``.
///
/// This character class is equivalent to the regex syntax "dot"
/// metacharacter with single-line mode disabled: `(?-s:.)`.
public static var anyNonNewline: CharacterClass {
.init(DSLTree.CustomCharacterClass(members: [.atom(.anyNonNewline)]))
}

/// A character class that matches any single `Character`, or extended
/// grapheme cluster, regardless of the current semantic level.
///
/// This character class is equivalent to `\X` in regex syntax.
public static var anyGraphemeCluster: CharacterClass {
.init(builtin: .anyGrapheme)
}

public static var whitespace: CharacterClass {
.init(builtin: .whitespace)
}

/// A character class that matches any digit.
///
/// This character class is equivalent to `\d` in regex syntax.
public static var digit: CharacterClass {
.init(builtin: .digit)
}

/// A character class that matches any hexadecimal digit.
///
/// `hexDigit` matches the ASCII characters `0` through `9`, and upper- or
/// lowercase `a` through `f`. The corresponding characters in the "Halfwidth
/// and Fullwidth Forms" Unicode block are not matched by this character
/// class.
public static var hexDigit: CharacterClass {
.init(DSLTree.CustomCharacterClass(members: [
.range(.char("A"), .char("F")),
Expand All @@ -81,27 +124,56 @@ extension RegexComponent where Self == CharacterClass {
]))
}

/// A character class that matches any element that is a "word character".
///
/// This character class is equivalent to `\w` in regex syntax.
public static var word: CharacterClass {
.init(builtin: .word)
}

/// A character class that matches any element that is classified as
/// whitespace.
///
/// This character class is equivalent to `\s` in regex syntax.
public static var whitespace: CharacterClass {
.init(builtin: .whitespace)
}

/// A character class that matches any element that is classified as
/// horizontal whitespace.
///
/// This character class is equivalent to `\h` in regex syntax.
public static var horizontalWhitespace: CharacterClass {
.init(builtin: .horizontalWhitespace)
}

/// A character class that matches any newline sequence.
///
/// This character class is equivalent to `\R` or `\n` in regex syntax.
public static var newlineSequence: CharacterClass {
.init(builtin: .newlineSequence)
}

/// A character class that matches any element that is classified as
/// vertical whitespace.
///
/// This character class is equivalent to `\v` in regex syntax.
public static var verticalWhitespace: CharacterClass {
.init(builtin: .verticalWhitespace)
}

public static var word: CharacterClass {
.init(builtin: .word)
}
}

@available(SwiftStdlib 5.7, *)
extension RegexComponent where Self == CharacterClass {
/// Returns a character class that matches any character in the given string
/// or sequence.
///
/// Calling this method with a group of characters is equivalent to listing
/// those characters in a custom character class in regex syntax. For example,
/// the two regexes in this example are equivalent:
///
/// let regex1 = /[abcd]+/
/// let regex2 = OneOrMore(.anyOf("abcd"))
public static func anyOf<S: Sequence>(_ s: S) -> CharacterClass
where S.Element == Character
{
Expand All @@ -111,6 +183,9 @@ extension RegexComponent where Self == CharacterClass {

/// Returns a character class that matches any Unicode scalar in the given
/// sequence.
///
/// Calling this method with a group of Unicode scalars is equivalent to
/// listing them in a custom character class in regex syntax.
public static func anyOf<S: Sequence>(_ s: S) -> CharacterClass
where S.Element == UnicodeScalar
{
Expand All @@ -122,6 +197,11 @@ extension RegexComponent where Self == CharacterClass {
// Unicode properties
@available(SwiftStdlib 5.7, *)
extension CharacterClass {
/// Returns a character class that matches any element with the given Unicode
/// general category.
///
/// For example, when passed `.uppercaseLetter`, this method is equivalent to
/// `/\p{Uppercase_Letter}/` or `/\p{Lu}/`.
public static func generalCategory(_ category: Unicode.GeneralCategory) -> CharacterClass {
return CharacterClass(.generalCategory(category))
}
Expand All @@ -148,6 +228,7 @@ public func ...(lhs: UnicodeScalar, rhs: UnicodeScalar) -> CharacterClass {

@available(SwiftStdlib 5.7, *)
extension RegexComponent where Self == CharacterClass {
/// Creates a character class that combines the given classes in a union.
public init(_ first: CharacterClass, _ rest: CharacterClass...) {
if rest.isEmpty {
self.init(first.ccc)
Expand All @@ -161,24 +242,29 @@ extension RegexComponent where Self == CharacterClass {

@available(SwiftStdlib 5.7, *)
extension CharacterClass {
/// Returns a character class from the union of this class and the given class.
public func union(_ other: CharacterClass) -> CharacterClass {
CharacterClass(.init(members: [
.custom(self.ccc),
.custom(other.ccc)]))
}

/// Returns a character class from the intersection of this class and the given class.
public func intersection(_ other: CharacterClass) -> CharacterClass {
CharacterClass(.init(members: [
.intersection(self.ccc, other.ccc)
]))
}

/// Returns a character class by subtracting the given class from this class.
public func subtracting(_ other: CharacterClass) -> CharacterClass {
CharacterClass(.init(members: [
.subtraction(self.ccc, other.ccc)
]))
}

/// Returns a character class matching elements in one or the other, but not both,
/// of this class and the given class.
public func symmetricDifference(_ other: CharacterClass) -> CharacterClass {
CharacterClass(.init(members: [
.symmetricDifference(self.ccc, other.ccc)
Expand Down
Loading