Skip to content

Commit d4eb173

Browse files
committed
Untested draft of native -rangeOfString: for bridged Swift Strings
1 parent 63124b5 commit d4eb173

File tree

2 files changed

+218
-2
lines changed

2 files changed

+218
-2
lines changed

stdlib/public/core/StringBridge.swift

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,17 @@ extension StringProtocol {
727727
let upperbound = _toUTF16Index(range.lowerBound + range.count)
728728
return Range(uncheckedBounds: (lower: lowerbound, upper: upperbound))
729729
}
730+
731+
func _toNSRange(_ indices: Range<Index>?) -> _SwiftNSRange {
732+
guard let indices = indices else {
733+
return _SwiftNSRange(location: _cocoaNotFound, length: 0)
734+
}
735+
let offsetRange = self._toUTF16Offsets(indices)
736+
return _SwiftNSRange(
737+
location: offsetRange.lowerBound,
738+
length: offsetRange.count
739+
)
740+
}
730741
}
731742

732743
extension String {

stdlib/public/core/StringStorageBridge.swift

Lines changed: 207 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,132 @@ import SwiftShims
1616

1717
internal let _cocoaASCIIEncoding:UInt = 1 /* NSASCIIStringEncoding */
1818
internal let _cocoaUTF8Encoding:UInt = 4 /* NSUTF8StringEncoding */
19+
internal let _cocoaNotFound = Int.max
20+
21+
extension Collection {
22+
@inline(__always)
23+
@_effects(readonly)
24+
func _boyerMooreSearch<C>(
25+
for needle: C, skipTableLookup: (Element) -> Int) -> Range<Index>?
26+
where C: BidirectionalCollection, C.Element == Element, Element: Hashable {
27+
let needleCount = needle.count
28+
guard let initialSearchEnd = index(
29+
startIndex,
30+
offsetBy: needleCount,
31+
limitedBy: endIndex
32+
) else {
33+
return nil
34+
}
35+
var searchRange = startIndex ..< initialSearchEnd
36+
let needleSlice = needle.reversed()
37+
while true {
38+
let ourSlice = self[searchRange].reversed()
39+
40+
let maybeMismatch = zip(ourSlice.indices, needleSlice.indices).first {
41+
ourSlice[$0] != needleSlice[$1]
42+
}?.0
43+
guard let mismatch = maybeMismatch else {
44+
return searchRange
45+
}
46+
let skip = skipTableLookup(ourSlice[mismatch])
47+
guard let newEnd = index(
48+
searchRange.upperBound,
49+
offsetBy: skip,
50+
limitedBy: endIndex
51+
) else {
52+
//went off the end, no match
53+
return nil
54+
}
55+
let newStart = index(
56+
searchRange.lowerBound,
57+
offsetBy: skip
58+
)
59+
searchRange = newStart ..< newEnd
60+
}
61+
}
62+
63+
@_effects(readonly)
64+
func boyerMooreSearch<C>(for needle: C) -> Range<Index>?
65+
where C: BidirectionalCollection, C.Element == Element, Element: Hashable {
66+
var skipTable:[Element : Int] = [:]
67+
skipTable.reserveCapacity(needle.count)
68+
var offset = 0
69+
for element in needle.reversed() {
70+
skipTable[element] = needle.count - offset
71+
offset += 1
72+
}
73+
return _boyerMooreSearch(for: needle) { skipTable[$0] ?? needle.count }
74+
}
75+
76+
// 256 bytes
77+
typealias TableStorage =
78+
(UInt64, UInt64, UInt64, UInt64, UInt64, UInt64, UInt64, UInt64,
79+
UInt64, UInt64, UInt64, UInt64, UInt64, UInt64, UInt64, UInt64,
80+
UInt64, UInt64, UInt64, UInt64, UInt64, UInt64, UInt64, UInt64,
81+
UInt64, UInt64, UInt64, UInt64, UInt64, UInt64, UInt64, UInt64)
82+
83+
@_effects(readonly)
84+
func boyerMooreSearch<C>(for needle: C) -> Range<Index>?
85+
where C: BidirectionalCollection, C.Element == UInt8, Element == UInt8 {
86+
if needle.count < 256 {
87+
var skipTableStorage: TableStorage =
88+
(0, 0, 0, 0, 0, 0, 0, 0,
89+
0, 0, 0, 0, 0, 0, 0, 0,
90+
0, 0, 0, 0, 0, 0, 0, 0,
91+
0, 0, 0, 0, 0, 0, 0, 0)
92+
return withUnsafeMutableBytes(of: &skipTableStorage) { rawBuffer in
93+
let skipTable = rawBuffer.bindMemory(to: UInt8.self)
94+
skipTable.initialize(repeating: UInt8(needle.count))
95+
for (i, c) in needle.reversed().enumerated() {
96+
skipTable[Int(c)] = UInt8(needle.count - i)
97+
}
98+
return _boyerMooreSearch(for: needle) { Int(skipTable[Int($0)]) }
99+
}
100+
} else {
101+
var skipTable = [Int](repeating: needle.count, count: 256)
102+
for (i, c) in needle.reversed().enumerated() {
103+
skipTable[Int(c)] = needle.count - i
104+
}
105+
return _boyerMooreSearch(for: needle) { skipTable[Int($0)] }
106+
}
107+
}
108+
}
109+
110+
struct NSStringUTF16View : BidirectionalCollection {
111+
typealias Index = Int
112+
typealias Element = UTF16.CodeUnit
113+
114+
let str: AnyObject
115+
var ptr: UnsafePointer<UTF16.CodeUnit>? = nil
116+
let endIndex: Int
117+
118+
init(_ opaque: AnyObject) {
119+
str = opaque
120+
endIndex = _stdlib_binary_CFStringGetLength(opaque)
121+
if let direct = _stdlib_binary_CFStringGetCharactersPtr(opaque) {
122+
ptr = UnsafePointer(direct)
123+
}
124+
}
125+
126+
@inline(__always)
127+
subscript(position: Int) -> UTF16.CodeUnit {
128+
guard let direct = ptr else {
129+
return _cocoaStringSubscript(str, position)
130+
}
131+
return UnsafeBufferPointer(start: direct, count: endIndex)[position]
132+
}
133+
134+
var startIndex: Int {
135+
0
136+
}
137+
138+
func index(after i: Index) -> Int {
139+
return i + 1
140+
}
141+
func index(before i: Index) -> Int {
142+
return i - 1
143+
}
144+
}
19145

20146
// ObjC interfaces.
21147
extension _AbstractStringStorage {
@@ -132,13 +258,80 @@ extension _AbstractStringStorage {
132258
return _cocoaStringCompare(self, other) == 0 ? 1 : 0
133259
}
134260
}
261+
262+
@inline(__always)
263+
func _toNSRange(_ indices: Range<String.Index>?) -> _SwiftNSRange {
264+
return asString._toNSRange(indices)
265+
}
266+
267+
@inline(__always) var utf8: String.UTF8View { asString.utf8 }
268+
@inline(__always) var utf16: String.UTF16View { asString.utf16 }
269+
270+
@_effects(readonly)
271+
internal func _nativeRange<T:_AbstractStringStorage>(
272+
of nativeOther: T
273+
) -> _SwiftNSRange {
274+
return _toNSRange(utf8.boyerMooreSearch(for: nativeOther.utf8))
275+
}
276+
277+
@_effects(readonly)
278+
internal func _foreignRange(of needle: AnyObject) -> _SwiftNSRange {
279+
precondition(_isNSString(needle))
280+
// At this point we've proven that it is a non-Swift NSString
281+
282+
// CFString will only give us ASCII bytes here, but that's fine.
283+
// We already handled non-ASCII UTF8 strings earlier since they're Swift.
284+
if let range = withCocoaASCIIPointer(
285+
needle,
286+
work: { ptr -> Range<String.Index>? in
287+
let asciiNeedle = UnsafeBufferPointer(
288+
start: ptr,
289+
count: _stdlib_binary_CFStringGetLength(needle)
290+
)
291+
return utf8.boyerMooreSearch(for: asciiNeedle)
292+
}) {
293+
return _toNSRange(range)
294+
}
295+
296+
if let range = utf16.boyerMooreSearch(for: NSStringUTF16View(needle)) {
297+
return _toNSRange(range)
298+
}
299+
300+
return _toNSRange(nil)
301+
}
302+
303+
@inline(__always)
304+
@_effects(readonly)
305+
internal func _range(of other: AnyObject?) -> _SwiftNSRange {
306+
guard let other = other, count > 0 else {
307+
return _toNSRange(nil)
308+
}
309+
310+
if self === other {
311+
return _toNSRange(utf16.startIndex ..< utf16.endIndex)
312+
}
313+
314+
let knownOther = _KnownCocoaString(other)
315+
switch knownOther {
316+
case .storage:
317+
return _nativeRange(
318+
of: _unsafeUncheckedDowncast(other, to: __StringStorage.self)
319+
)
320+
case .shared:
321+
return _nativeRange(
322+
of: _unsafeUncheckedDowncast(other, to: __SharedStringStorage.self)
323+
)
324+
default:
325+
return _foreignRange(of: other)
326+
}
327+
}
135328
}
136329

137330
extension __StringStorage {
138331
@objc(length)
139332
final internal var UTF16Length: Int {
140333
@_effects(readonly) @inline(__always) get {
141-
return asString.utf16.count // UTF16View special-cases ASCII for us.
334+
return utf16.count // UTF16View special-cases ASCII for us.
142335
}
143336
}
144337

@@ -219,6 +412,12 @@ extension __StringStorage {
219412
final internal func isEqual(to other: AnyObject?) -> Int8 {
220413
return _isEqual(other)
221414
}
415+
416+
@objc(rangeOfString:)
417+
@_effects(readonly)
418+
final internal func range(of other: AnyObject?) -> _SwiftNSRange {
419+
return _range(of: other)
420+
}
222421

223422
@objc(copyWithZone:)
224423
final internal func copy(with zone: _SwiftNSZone?) -> AnyObject {
@@ -234,7 +433,7 @@ extension __SharedStringStorage {
234433
@objc(length)
235434
final internal var UTF16Length: Int {
236435
@_effects(readonly) get {
237-
return asString.utf16.count // UTF16View special-cases ASCII for us.
436+
return utf16.count // UTF16View special-cases ASCII for us.
238437
}
239438
}
240439

@@ -315,6 +514,12 @@ extension __SharedStringStorage {
315514
final internal func isEqual(to other: AnyObject?) -> Int8 {
316515
return _isEqual(other)
317516
}
517+
518+
@objc(rangeOfString:)
519+
@_effects(readonly)
520+
final internal func range(of other: AnyObject?) -> _SwiftNSRange {
521+
return _range(of: other)
522+
}
318523

319524
@objc(copyWithZone:)
320525
final internal func copy(with zone: _SwiftNSZone?) -> AnyObject {

0 commit comments

Comments
 (0)