@@ -29,23 +29,15 @@ static NSUInteger lowestSetBit(NSUInteger bitmask) {
2929/* *
3030 * Whether a string represents a control character.
3131 */
32- static bool IsControlCharacter (NSUInteger length, NSString * label) {
33- if (length > 1 ) {
34- return false ;
35- }
36- unichar codeUnit = [label characterAtIndex: 0 ];
37- return (codeUnit <= 0x1f && codeUnit >= 0x00 ) || (codeUnit >= 0x7f && codeUnit <= 0x9f );
32+ static bool IsControlCharacter (uint64_t character) {
33+ return (character <= 0x1f && character >= 0x00 ) || (character >= 0x7f && character <= 0x9f );
3834}
3935
4036/* *
4137 * Whether a string represents an unprintable key.
4238 */
43- static bool IsUnprintableKey (NSUInteger length, NSString * label) {
44- if (length > 1 ) {
45- return false ;
46- }
47- unichar codeUnit = [label characterAtIndex: 0 ];
48- return codeUnit >= 0xF700 && codeUnit <= 0xF8FF ;
39+ static bool IsUnprintableKey (uint64_t character) {
40+ return character >= 0xF700 && character <= 0xF8FF ;
4941}
5042
5143/* *
@@ -113,6 +105,40 @@ static uint64_t toLower(uint64_t n) {
113105 return n;
114106}
115107
108+ // Decode a UTF-16 sequence to an array of char32 (UTF-32).
109+ //
110+ // See https://en.wikipedia.org/wiki/UTF-16#Description for the algorithm.
111+ //
112+ // The returned character array must be deallocated with delete[]. The length of
113+ // the result is stored in `out_length`.
114+ //
115+ // Although NSString has a dataUsingEncoding method, we implement our own
116+ // because dataUsingEncoding outputs redundant characters for unknown reasons.
117+ static uint32_t * DecodeUtf16 (NSString * target, size_t * out_length) {
118+ // The result always has a length less or equal to target.
119+ size_t result_pos = 0 ;
120+ uint32_t * result = new uint32_t [target.length];
121+ uint16_t high_surrogate = 0 ;
122+ for (NSUInteger target_pos = 0 ; target_pos < target.length ; target_pos += 1 ) {
123+ uint16_t codeUnit = [target characterAtIndex: target_pos];
124+ // BMP
125+ if (codeUnit <= 0xD7FF || codeUnit >= 0xE000 ) {
126+ result[result_pos] = codeUnit;
127+ result_pos += 1 ;
128+ // High surrogates
129+ } else if (codeUnit <= 0xDBFF ) {
130+ high_surrogate = codeUnit - 0xD800 ;
131+ // Low surrogates
132+ } else {
133+ uint16_t low_surrogate = codeUnit - 0xDC00 ;
134+ result[result_pos] = (high_surrogate << 10 ) + low_surrogate + 0x10000 ;
135+ result_pos += 1 ;
136+ }
137+ }
138+ *out_length = result_pos;
139+ return result;
140+ }
141+
116142/* *
117143 * Returns the logical key of a KeyUp or KeyDown event.
118144 *
@@ -125,30 +151,34 @@ static uint64_t GetLogicalKeyForEvent(NSEvent* event, uint64_t physicalKey) {
125151 return fromKeyCode.unsignedLongLongValue ;
126152 }
127153
128- NSString * keyLabel = event.charactersIgnoringModifiers ;
129- NSUInteger keyLabelLength = [keyLabel length ];
130- // If this key is printable, generate the logical key from its Unicode
131- // value. Control keys such as ESC, CTRL, and SHIFT are not printable. HOME,
132- // DEL, arrow keys, and function keys are considered modifier function keys,
133- // which generate invalid Unicode scalar values.
134- if (keyLabelLength != 0 && !IsControlCharacter (keyLabelLength, keyLabel) &&
135- !IsUnprintableKey (keyLabelLength, keyLabel)) {
136- // Given that charactersIgnoringModifiers can contain a string of arbitrary
137- // length, limit to a maximum of two Unicode scalar values. It is unlikely
138- // that a keyboard would produce a code point bigger than 32 bits, but it is
139- // still worth defending against this case.
140- NSCAssert ((keyLabelLength < 2 ), @"Unexpected long key label: |%@|.", keyLabel);
141-
142- uint64_t codeUnit = (uint64_t )[keyLabel characterAtIndex: 0 ];
143- if (keyLabelLength == 2 ) {
144- uint64_t secondCode = (uint64_t )[keyLabel characterAtIndex: 1 ];
145- codeUnit = (codeUnit << 16 ) | secondCode;
154+ // Convert `charactersIgnoringModifiers` to UTF32.
155+ NSString * keyLabelUtf16 = event.charactersIgnoringModifiers ;
156+
157+ // Check if this key is a single character, which will be used to generate the
158+ // logical key from its Unicode value.
159+ //
160+ // Multi-char keys will be minted onto the macOS plane because there are no
161+ // meaningful values for them. Control keys and unprintable keys have been
162+ // converted by `keyCodeToLogicalKey` earlier.
163+ uint32_t character = 0 ;
164+ if (keyLabelUtf16.length != 0 ) {
165+ size_t keyLabelLength;
166+ uint32_t * keyLabel = DecodeUtf16 (keyLabelUtf16, &keyLabelLength);
167+ if (keyLabelLength == 1 ) {
168+ uint32_t keyLabelChar = *keyLabel;
169+ delete[] keyLabel;
170+ NSCAssert (!IsControlCharacter(keyLabelChar) && !IsUnprintableKey(keyLabelChar),
171+ @"Unexpected control or unprintable keylabel 0x%x", keyLabelChar);
172+ NSCAssert (keyLabelChar <= 0x10FFFF , @" Out of range keylabel 0x%x " , keyLabelChar);
173+ character = keyLabelChar;
146174 }
147- return KeyOfPlane (toLower (codeUnit), kUnicodePlane );
175+ }
176+ if (character != 0 ) {
177+ return KeyOfPlane (toLower (character), kUnicodePlane );
148178 }
149179
150- // This is a non-printable key that is unrecognized , so a new code is minted
151- // to the macOS plane.
180+ // We can't represent this key with a single printable unicode , so a new code
181+ // is minted to the macOS plane.
152182 return KeyOfPlane (event.keyCode , kMacosPlane );
153183}
154184
0 commit comments