Skip to content
This repository was archived by the owner on Dec 17, 2018. It is now read-only.

Commit 22a6045

Browse files
authored
Merge pull request #4 from nkovacs/better-function-param-parsing
Improve function param parsing
2 parents 4b42d43 + fbf39e5 commit 22a6045

File tree

3 files changed

+103
-4
lines changed

3 files changed

+103
-4
lines changed

README.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,23 @@ A [PEG.js] parser for [ICU MessageFormat] strings – part of [messageformat.js]
44
Outputs an AST defined by [parser.pegjs].
55

66
The generated parser function takes two parameters, first the string to be
7-
parsed, and a second optional parameter `options`, an object containing arrays
7+
parsed, and a second optional parameter `options`, an object.
8+
9+
The options object contains arrays
810
of keywords for `cardinal` and `ordinal` rules for the current locale – these
911
are used to validate plural and selectordinal keys. If `options` or its fields
1012
are missing or set to false, the full set of valid [Unicode CLDR] keys is used:
1113
`'zero', 'one', 'two', 'few', 'many', 'other'`. To disable this check, pass in
1214
an empty array.
1315

16+
The `options` object also supports a setting that makes the parser
17+
follow the ICU MessageFormat spec more closely: `strictFunctionParams`.
18+
19+
By default, function parameters are split on commas and trimmed,
20+
so the parameters in `{x,fn, a, b }` are parsed as `['a','b']`.
21+
Setting `strictFunctionParams` to true will result in a params array
22+
with a single element: `[' a, b ']`.
23+
1424
[ICU MessageFormat]: https://messageformat.github.io/guide/
1525
[messageformat.js]: https://messageformat.github.io/
1626
[parser.pegjs]: ./parser.pegjs

parser.pegjs

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
start = token*
1+
start = token*
22

33
token
44
= argument / select / plural / function
@@ -37,7 +37,7 @@ plural = '{' _ arg:id _ ',' _ type:('plural'/'selectordinal') _ ',' _ offset:off
3737
};
3838
}
3939

40-
function = '{' _ arg:id _ ',' _ key:id _ params:functionParams* '}' {
40+
function = '{' _ arg:id _ ',' _ key:id _ params:functionParams '}' {
4141
return {
4242
type: 'function',
4343
arg: arg,
@@ -48,6 +48,10 @@ function = '{' _ arg:id _ ',' _ key:id _ params:functionParams* '}' {
4848

4949
id = $([0-9a-zA-Z$_][^ \t\n\r,.+={}]*)
5050

51+
paramDefault = str:paramcharsDefault+ { return str.join(''); }
52+
53+
paramStrict = str:paramcharsStrict+ { return str.join(''); }
54+
5155
selectCase = _ key:id _ tokens:caseTokens { return { key: key, tokens: tokens }; }
5256

5357
pluralCase = _ key:pluralKey _ tokens:caseTokens { return { key: key, tokens: tokens }; }
@@ -60,7 +64,25 @@ pluralKey
6064
= id
6165
/ '=' d:digits { return d; }
6266

63-
functionParams = _ ',' _ p:id _ { return p; }
67+
functionParams
68+
= p:functionParamsDefault* ! { return options.strictFunctionParams; } { return p; }
69+
/ p:functionParamsStrict* & { return options.strictFunctionParams; } { return p; }
70+
71+
functionParamsStrict = _ ',' p:paramStrict { return p; }
72+
73+
functionParamsDefault = _ ',' _ p:paramDefault _ { return p.replace(/^[ \t\n\r]*|[ \t\n\r]*$/g, ''); }
74+
75+
doubleapos = "''" { return "'"; }
76+
77+
inapos = doubleapos / str:[^']+ { return str.join(''); }
78+
79+
quotedCurly
80+
= "'{"str:inapos*"'" { return '\u007B'+str.join(''); }
81+
/ "'}"str:inapos*"'" { return '\u007D'+str.join(''); }
82+
83+
quotedFunctionParams
84+
= quotedCurly
85+
/ "'"
6486

6587
char
6688
= [^{}#\\\0-\x08\x0e-\x1f\x7f]
@@ -72,6 +94,18 @@ char
7294
return String.fromCharCode(parseInt('0x' + h1 + h2 + h3 + h4));
7395
}
7496

97+
paramcharsCommon
98+
= doubleapos
99+
/ quotedFunctionParams
100+
101+
paramcharsDefault
102+
= paramcharsCommon
103+
/ str:[^',}]+ { return str.join(''); }
104+
105+
paramcharsStrict
106+
= paramcharsCommon
107+
/ str:[^'}]+ { return str.join(''); }
108+
75109
digits = $([0-9]+)
76110

77111
hexDigit = [0-9a-fA-F]

test.js

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,61 @@ describe("Ordinals", function() {
239239
});
240240

241241
});
242+
describe("Functions", function() {
243+
it("should accept no parameters", function() {
244+
expect(parse('{var,date}')[0].type).to.eql('function');
245+
expect(parse('{var,date}')[0].key).to.eql('date');
246+
expect(parse('{var,date}')[0].params).to.be.empty;
247+
})
248+
249+
it("should accept parameters", function() {
250+
expect(parse('{var,date,long}')[0].type).to.eql('function');
251+
expect(parse('{var,date,long}')[0].key).to.eql('date');
252+
expect(parse('{var,date,long}')[0].params[0]).to.eql('long');
253+
expect(parse('{var,date,long,short}')[0].params[0]).to.eql('long');
254+
expect(parse('{var,date,long,short}')[0].params[1]).to.eql('short');
255+
})
256+
257+
it("should accept parameters with whitespace", function() {
258+
expect(parse('{var,date,y-M-d HH:mm:ss zzzz}')[0].type).to.eql('function');
259+
expect(parse('{var,date,y-M-d HH:mm:ss zzzz}')[0].key).to.eql('date');
260+
expect(parse('{var,date,y-M-d HH:mm:ss zzzz}')[0].params[0]).to.eql('y-M-d HH:mm:ss zzzz');
261+
// This is not how ICU works. ICU does not trim whitespace,
262+
// but messageformat-parse must trim it to maintain backwards compatibility.
263+
expect(parse('{var,date, y-M-d HH:mm:ss zzzz }')[0].params[0]).to.eql('y-M-d HH:mm:ss zzzz');
264+
// This is how ICU works.
265+
expect(parse('{var,date, y-M-d HH:mm:ss zzzz }', { strictFunctionParams: true })[0].params[0]).to.eql(' y-M-d HH:mm:ss zzzz ');
266+
})
267+
268+
it("should accept parameters with special characters", function() {
269+
expect(parse("{var,date,y-M-d '{,}' '' HH:mm:ss zzzz}")[0].type).to.eql('function');
270+
expect(parse("{var,date,y-M-d '{,}' '' HH:mm:ss zzzz}")[0].key).to.eql('date');
271+
expect(parse("{var,date,y-M-d '{,}' '' HH:mm:ss zzzz}")[0].params[0]).to.eql("y-M-d {,} ' HH:mm:ss zzzz");
272+
expect(parse("{var,date,y-M-d '{,}' '' HH:mm:ss zzzz'}'}")[0].params[0]).to.eql("y-M-d {,} ' HH:mm:ss zzzz}");
273+
expect(parse("{var,date,y-M-d # HH:mm:ss zzzz}")[0].params[0]).to.eql("y-M-d # HH:mm:ss zzzz");
274+
expect(parse("{var,date,y-M-d '#' HH:mm:ss zzzz}")[0].params[0]).to.eql("y-M-d '#' HH:mm:ss zzzz");
275+
// This is not how ICU works.
276+
expect(parse("{var,date,y-M-d, HH:mm:ss zzzz}")[0].params[0]).to.eql("y-M-d");
277+
expect(parse("{var,date,y-M-d, HH:mm:ss zzzz}")[0].params[1]).to.eql("HH:mm:ss zzzz");
278+
// This is how ICU works, but this only allows a single argStyle parameter.
279+
expect(parse("{var,date,y-M-d, HH:mm:ss zzzz}", { strictFunctionParams: true })[0].params[0]).to.eql("y-M-d, HH:mm:ss zzzz");
280+
})
281+
282+
it("should be gracious with whitespace", function() {
283+
var firstRes = JSON.stringify(parse('{var, date, long, short}'));
284+
expect(JSON.stringify(parse('{ var, date, long, short }'))).to.eql(firstRes);
285+
expect(JSON.stringify(parse('{var,date,long,short}'))).to.eql(firstRes);
286+
expect(JSON.stringify(parse('{\nvar, \ndate,\n long\n\n,\n short\n\n\n}'))).to.eql(firstRes);
287+
expect(JSON.stringify(parse('{\tvar\t,\t\t\r date\t\n, \tlong\n, short\t\n\n\n\n}'))).to.eql(firstRes);
288+
289+
// This is not how ICU works. ICU does not trim whitespace.
290+
firstRes = JSON.stringify(parse('{var, date, y-M-d HH:mm:ss zzzz}'));
291+
expect(JSON.stringify(parse('{ var, date, y-M-d HH:mm:ss zzzz }'))).to.eql(firstRes);
292+
expect(JSON.stringify(parse('{var,date,y-M-d HH:mm:ss zzzz}'))).to.eql(firstRes);
293+
expect(JSON.stringify(parse('{\nvar, \ndate,\n \n\n\n y-M-d HH:mm:ss zzzz\n\n\n}'))).to.eql(firstRes);
294+
expect(JSON.stringify(parse('{\tvar\t,\t\t\r date\t\n, \t\ny-M-d HH:mm:ss zzzz\t\n\n\n\n}'))).to.eql(firstRes);
295+
});
296+
});
242297
describe("Nested/Recursive blocks", function() {
243298

244299
it("should allow a select statement inside of a select statement", function() {

0 commit comments

Comments
 (0)