Skip to content

Commit b11be4d

Browse files
Add Injected Language Support (#150)
# Description This PR adds support for injected languages using tree-sitter as described in #16 and in the [tree-sitter documentation](https://tree-sitter.github.io/tree-sitter/syntax-highlighting#language-injection). Languages can contain _injected_ languages which are described in a language's `injections.scm` file. Some examples of injected languages are: - HTML contains CSS and Javascript in `style` and `script` tags - Javascript contains Regex literals - PHP contains HTML between the `<php` tags - C++ can contain rawstring literals of arbitrary languages # Details This PR is a rework of the `TreeSitterClient` class. Specifically it: - Adds a `layers` array and `primaryLayer` property. `layers` contains all language layers in the document, and `primaryLayer` is the ID of the document's primary language. - Each layer is a `LanguageLayer` object. These objects represent an injected language-range(s) combination. Each layer can have one or more range associated with it depending on if it should be [parsed as one document or multiple](https://tree-sitter.github.io/tree-sitter/syntax-highlighting#language-injection). - When editing: - Each layer's ranges are updated using a similar algorithm to tree-sitter, and edits are applied to make use of the incremental parsing tree-sitter gives. - Each layer is checked for any injections, inserting any new injected layers and keeping track of any 'untouched' layers to remove. - Any layers that were not touched are removed. The highlight query algorithm is largely the same, but keeps track of any ranges not used by any injected layers, and only queries the primary layer for those ranges so as not to override any injected highlights. # Related Issues - Closes #16 # Screenshots Before, other languages were detected but parsed and highlighted as normal text. <img width="1104" alt="Screenshot 2023-02-28 at 3 08 52 PM" src="https://user-images.githubusercontent.com/35942988/221980502-3aa61b6a-136a-43b9-a545-8fd835945002.png"> With Injected languages, in this case CSS and JS embedded in HTML and a second layer of Regex embedded in JS embedded in HTML: <img width="889" alt="Screenshot 2023-03-24 at 2 45 15 PM" src="https://user-images.githubusercontent.com/35942988/227628557-df55d986-a104-4a24-ab6c-97c8e69f6136.png">
1 parent 36a5f79 commit b11be4d

12 files changed

+668
-142
lines changed

Sources/CodeEditTextView/Controller/STTextViewController.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ public class STTextViewController: NSViewController, STTextViewDelegate, ThemeAt
320320
return self?.textView.textContentStorage.textStorage?.mutableString.substring(with: range)
321321
}
322322

323-
provider = try? TreeSitterClient(codeLanguage: language, textProvider: textProvider)
323+
provider = TreeSitterClient(codeLanguage: language, textProvider: textProvider)
324324
}
325325

326326
if let provider = provider {
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
//
2+
// NSRange+Comparable.swift
3+
//
4+
//
5+
// Created by Khan Winter on 3/15/23.
6+
//
7+
8+
import Foundation
9+
10+
extension NSRange: Comparable {
11+
public static func == (lhs: NSRange, rhs: NSRange) -> Bool {
12+
return lhs.location == rhs.location && lhs.length == rhs.length
13+
}
14+
15+
public static func < (lhs: NSRange, rhs: NSRange) -> Bool {
16+
return lhs.location < rhs.location
17+
}
18+
}

Sources/CodeEditTextView/Extensions/NSRange+/NSRange+InputEdit.swift

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,35 @@ extension InputEdit {
2929
newEndPoint: newEndPoint)
3030
}
3131
}
32+
33+
extension NSRange {
34+
// swiftlint:disable line_length
35+
/// Modifies the range to account for an edit.
36+
/// Largely based on code from
37+
/// [tree-sitter](https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720)
38+
mutating func applyInputEdit(_ edit: InputEdit) {
39+
// swiftlint:enable line_length
40+
let endIndex = NSMaxRange(self)
41+
let isPureInsertion = edit.oldEndByte == edit.startByte
42+
43+
// Edit is after the range
44+
if (edit.startByte/2) > endIndex {
45+
return
46+
} else if edit.oldEndByte/2 < location {
47+
// If the edit is entirely before this range
48+
self.location += (Int(edit.newEndByte) - Int(edit.oldEndByte))/2
49+
} else if edit.startByte/2 < location {
50+
// If the edit starts in the space before this range and extends into this range
51+
length -= Int(edit.oldEndByte)/2 - location
52+
location = Int(edit.newEndByte)/2
53+
} else if edit.startByte/2 == location && isPureInsertion {
54+
// If the edit is *only* an insertion right at the beginning of the range
55+
location = Int(edit.newEndByte)/2
56+
} else {
57+
// Otherwise, the edit is entirely within this range
58+
if edit.startByte/2 < endIndex || (edit.startByte/2 == endIndex && isPureInsertion) {
59+
length = (Int(edit.newEndByte)/2 - location) + (length - (Int(edit.oldEndByte)/2 - location))
60+
}
61+
}
62+
}
63+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
//
2+
// NSRange+TSRange.swift
3+
//
4+
//
5+
// Created by Khan Winter on 2/26/23.
6+
//
7+
8+
import Foundation
9+
import SwiftTreeSitter
10+
11+
extension NSRange {
12+
var tsRange: TSRange {
13+
return TSRange(
14+
points: .zero..<(.zero),
15+
bytes: (UInt32(self.location) * 2)..<(UInt32(self.location + self.length) * 2)
16+
)
17+
}
18+
}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
//
2+
// Tree+prettyPrint.swift
3+
//
4+
//
5+
// Created by Khan Winter on 3/16/23.
6+
//
7+
8+
import SwiftTreeSitter
9+
10+
#if DEBUG
11+
extension Tree {
12+
func prettyPrint() {
13+
guard let cursor = self.rootNode?.treeCursor else {
14+
print("NO ROOT NODE")
15+
return
16+
}
17+
guard cursor.currentNode != nil else {
18+
print("NO CURRENT NODE")
19+
return
20+
}
21+
22+
func p(_ cursor: TreeCursor, depth: Int) {
23+
guard let node = cursor.currentNode else {
24+
return
25+
}
26+
27+
let visible = node.isNamed
28+
29+
if visible {
30+
print(String(repeating: " ", count: depth * 2), terminator: "")
31+
if let fieldName = cursor.currentFieldName {
32+
print(fieldName, ": ", separator: "", terminator: "")
33+
}
34+
print("(", node.nodeType ?? "NONE", " ", node.range, " ", separator: "", terminator: "")
35+
}
36+
37+
if cursor.goToFirstChild() {
38+
while true {
39+
if cursor.currentNode != nil && cursor.currentNode!.isNamed {
40+
print("")
41+
}
42+
43+
p(cursor, depth: depth + 1)
44+
45+
if !cursor.gotoNextSibling() {
46+
break
47+
}
48+
}
49+
50+
if !cursor.gotoParent() {
51+
fatalError("Could not go to parent, this tree may be invalid.")
52+
}
53+
}
54+
55+
if visible {
56+
print(")", terminator: "")
57+
}
58+
}
59+
60+
if cursor.currentNode?.childCount == 0 {
61+
if !cursor.currentNode!.isNamed {
62+
print("{\(cursor.currentNode!.nodeType ?? "NONE")}")
63+
} else {
64+
print("\"\(cursor.currentNode!.nodeType ?? "NONE")\"")
65+
}
66+
} else {
67+
p(cursor, depth: 1)
68+
}
69+
}
70+
}
71+
#endif

Sources/CodeEditTextView/Filters/STTextViewController+TextFormation.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ extension STTextViewController {
7171
/// - whitespaceProvider: The whitespace providers to use.
7272
/// - indentationUnit: The unit of indentation to use.
7373
private func setUpNewlineTabFilters(whitespaceProvider: WhitespaceProviders, indentationUnit: String) {
74-
let newlineFilter: Filter = NewlineFilter(whitespaceProviders: whitespaceProvider)
74+
let newlineFilter: Filter = NewlineProcessingFilter(whitespaceProviders: whitespaceProvider)
7575
let tabReplacementFilter: Filter = TabReplacementFilter(indentationUnit: indentationUnit)
7676

7777
textFilters.append(contentsOf: [newlineFilter, tabReplacementFilter])

Sources/CodeEditTextView/Highlighting/HighlightProviding.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ public protocol HighlightProviding {
1717
/// - Note: This does not need to be *globally* unique, merely unique across all the highlighters used.
1818
var identifier: String { get }
1919

20+
/// Called once at editor initialization.
21+
func setUp(textView: HighlighterTextView)
22+
2023
/// Updates the highlighter's code language.
2124
/// - Parameters:
2225
/// - codeLanguage: The langugage that should be used by the highlighter.

Sources/CodeEditTextView/Highlighting/Highlighter.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ class Highlighter: NSObject {
8888
}
8989

9090
textView.textContentStorage.textStorage?.delegate = self
91+
highlightProvider?.setUp(textView: textView)
9192

9293
if let scrollView = textView.enclosingScrollView {
9394
NotificationCenter.default.addObserver(self,
@@ -121,6 +122,7 @@ class Highlighter: NSObject {
121122
public func setHighlightProvider(_ provider: HighlightProviding) {
122123
self.highlightProvider = provider
123124
highlightProvider?.setLanguage(codeLanguage: language)
125+
highlightProvider?.setUp(textView: textView)
124126
invalidate()
125127
}
126128

@@ -282,7 +284,7 @@ extension Highlighter: NSTextStorageDelegate {
282284
delta: delta) { [weak self] invalidatedIndexSet in
283285
let indexSet = invalidatedIndexSet
284286
.union(IndexSet(integersIn: editedRange))
285-
// Only invalidate indices that aren't visible.
287+
// Only invalidate indices that are visible.
286288
.intersection(self?.visibleSet ?? .init())
287289

288290
for range in indexSet.rangeView {
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
//
2+
// TreeSitterClient+Edit.swift
3+
//
4+
//
5+
// Created by Khan Winter on 3/10/23.
6+
//
7+
8+
import Foundation
9+
import SwiftTreeSitter
10+
import CodeEditLanguages
11+
12+
extension TreeSitterClient {
13+
14+
/// Calculates a series of ranges that have been invalidated by a given edit.
15+
/// - Parameters:
16+
/// - textView: The text view to use for text.
17+
/// - edit: The edit to act on.
18+
/// - language: The language to use.
19+
/// - readBlock: A callback for fetching blocks of text.
20+
/// - Returns: An array of distinct `NSRanges` that need to be re-highlighted.
21+
func findChangedByteRanges(
22+
textView: HighlighterTextView,
23+
edit: InputEdit,
24+
layer: LanguageLayer,
25+
readBlock: @escaping Parser.ReadBlock
26+
) -> [NSRange] {
27+
let (oldTree, newTree) = calculateNewState(
28+
tree: layer.tree,
29+
parser: layer.parser,
30+
edit: edit,
31+
readBlock: readBlock
32+
)
33+
if oldTree == nil && newTree == nil {
34+
// There was no existing tree, make a new one and return all indexes.
35+
layer.tree = createTree(parser: layer.parser, readBlock: readBlock)
36+
return [NSRange(textView.documentRange.intRange)]
37+
}
38+
39+
let ranges = changedByteRanges(oldTree, rhs: newTree).map { $0.range }
40+
41+
layer.tree = newTree
42+
43+
return ranges
44+
}
45+
46+
/// Applies the edit to the current `tree` and returns the old tree and a copy of the current tree with the
47+
/// processed edit.
48+
/// - Parameters:
49+
/// - tree: The tree before an edit used to parse the new tree.
50+
/// - parser: The parser used to parse the new tree.
51+
/// - edit: The edit to apply.
52+
/// - readBlock: The block to use to read text.
53+
/// - Returns: (The old state, the new state).
54+
internal func calculateNewState(
55+
tree: Tree?,
56+
parser: Parser,
57+
edit: InputEdit,
58+
readBlock: @escaping Parser.ReadBlock
59+
) -> (Tree?, Tree?) {
60+
guard let oldTree = tree else {
61+
return (nil, nil)
62+
}
63+
semaphore.wait()
64+
65+
// Apply the edit to the old tree
66+
oldTree.edit(edit)
67+
68+
let newTree = parser.parse(tree: oldTree, readBlock: readBlock)
69+
70+
semaphore.signal()
71+
72+
return (oldTree.copy(), newTree)
73+
}
74+
75+
/// Calculates the changed byte ranges between two trees.
76+
/// - Parameters:
77+
/// - lhs: The first (older) tree.
78+
/// - rhs: The second (newer) tree.
79+
/// - Returns: Any changed ranges.
80+
internal func changedByteRanges(_ lhs: Tree?, rhs: Tree?) -> [Range<UInt32>] {
81+
switch (lhs, rhs) {
82+
case (let t1?, let t2?):
83+
return t1.changedRanges(from: t2).map({ $0.bytes })
84+
case (nil, let t2?):
85+
let range = t2.rootNode?.byteRange
86+
87+
return range.flatMap({ [$0] }) ?? []
88+
case (_, nil):
89+
return []
90+
}
91+
}
92+
93+
/// Performs an injections query on the given language layer.
94+
/// Updates any existing layers with new ranges and adds new layers if needed.
95+
/// - Parameters:
96+
/// - textView: The text view to use.
97+
/// - layer: The language layer to perform the query on.
98+
/// - layerSet: The set of layers that exist in the document.
99+
/// Used for efficient lookup of existing `(language, range)` pairs
100+
/// - touchedLayers: The set of layers that existed before updating injected layers.
101+
/// Will have items removed as they are found.
102+
/// - readBlock: A completion block for reading from text storage efficiently.
103+
/// - Returns: An index set of any updated indexes.
104+
@discardableResult
105+
internal func updateInjectedLanguageLayers(
106+
textView: HighlighterTextView,
107+
layer: LanguageLayer,
108+
layerSet: inout Set<LanguageLayer>,
109+
touchedLayers: inout Set<LanguageLayer>,
110+
readBlock: @escaping Parser.ReadBlock
111+
) -> IndexSet {
112+
guard let tree = layer.tree,
113+
let rootNode = tree.rootNode,
114+
let cursor = layer.languageQuery?.execute(node: rootNode, in: tree) else {
115+
return IndexSet()
116+
}
117+
118+
cursor.matchLimit = Constants.treeSitterMatchLimit
119+
120+
let languageRanges = self.injectedLanguagesFrom(cursor: cursor) { range, _ in
121+
return textView.stringForRange(range)
122+
}
123+
124+
var updatedRanges = IndexSet()
125+
126+
for (languageName, ranges) in languageRanges {
127+
guard let treeSitterLanguage = TreeSitterLanguage(rawValue: languageName) else {
128+
continue
129+
}
130+
131+
if treeSitterLanguage == primaryLayer {
132+
continue
133+
}
134+
135+
for range in ranges {
136+
// Temp layer object for
137+
let layer = LanguageLayer(
138+
id: treeSitterLanguage,
139+
parser: Parser(),
140+
supportsInjections: false,
141+
ranges: [range.range]
142+
)
143+
144+
if layerSet.contains(layer) {
145+
// If we've found this layer, it means it should exist after an edit.
146+
touchedLayers.remove(layer)
147+
} else {
148+
// New range, make a new layer!
149+
if let addedLayer = addLanguageLayer(layerId: treeSitterLanguage, readBlock: readBlock) {
150+
addedLayer.ranges = [range.range]
151+
addedLayer.parser.includedRanges = addedLayer.ranges.map { $0.tsRange }
152+
addedLayer.tree = createTree(parser: addedLayer.parser, readBlock: readBlock)
153+
154+
layerSet.insert(addedLayer)
155+
updatedRanges.insert(range: range.range)
156+
}
157+
}
158+
}
159+
}
160+
161+
return updatedRanges
162+
}
163+
}

0 commit comments

Comments
 (0)