-
Notifications
You must be signed in to change notification settings - Fork 257
fix(REN-5): raise MTParseErrorInvalidCharacter for non-ASCII literal input #233
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7342fe6
79a5f1e
ad020ca
d0e4b57
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -69,6 +69,8 @@ - (void)tearDown | |
| @[ @"x \\ y", @[ @(kMTMathAtomVariable), @(kMTMathAtomOrdinary), @(kMTMathAtomVariable)], @"x\\ y"], | ||
| // spacing | ||
| @[ @"x \\quad y \\; z \\! q", @[ @(kMTMathAtomVariable), @(kMTMathAtomSpace), @(kMTMathAtomVariable),@(kMTMathAtomSpace), @(kMTMathAtomVariable),@(kMTMathAtomSpace), @(kMTMathAtomVariable)], @"x\\quad y\\; z\\! q"], | ||
| // tilde is a non-breaking space (renders as an ordinary space, same as a literal space) | ||
| @[ @"x~y", @[ @(kMTMathAtomVariable), @(kMTMathAtomOrdinary), @(kMTMathAtomVariable)], @"x\\ y"], | ||
| ]; | ||
| } | ||
|
|
||
|
|
@@ -1488,6 +1490,16 @@ - (void) testDisplayLines | |
| @[@"x^\\choose y", @(MTParseErrorInvalidCommand)], | ||
| @[@"x^\\brack y", @(MTParseErrorInvalidCommand)], | ||
| @[@"x^\\brace y", @(MTParseErrorInvalidCommand)], | ||
| // REN-5: non-ASCII literal characters should produce MTParseErrorInvalidCharacter | ||
| @[@"π", @(MTParseErrorInvalidCharacter)], // π (U+03C0) | ||
| @[@"3 × 4", @(MTParseErrorInvalidCharacter)], // 3 × 4 | ||
| @[@"x ≤ y", @(MTParseErrorInvalidCharacter)], // x ≤ y | ||
|
Comment on lines
+1493
to
+1496
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Once the @[@"\\left π (", @(MTParseErrorInvalidCharacter)],
@[@"\\begin π {matrix}", @(MTParseErrorInvalidCharacter)],Currently, these cases will either parse successfully (silently dropping the character) or fail with a different error because |
||
| @[@"x 𝑎 y", @(MTParseErrorInvalidCharacter)], // above-BMP literal (U+1D44E, surrogate pair) | ||
| // Special characters with no meaning in math mode are errors (match LaTeX: | ||
| // % is a comment, # is a macro parameter, $ toggles math mode - none valid here). | ||
| @[@"a % b", @(MTParseErrorInvalidCharacter)], | ||
| @[@"a # b", @(MTParseErrorInvalidCharacter)], | ||
| @[@"a $ b", @(MTParseErrorInvalidCharacter)], | ||
| ]; | ||
| }; | ||
|
|
||
|
|
@@ -1508,6 +1520,26 @@ - (void) testErrors | |
| } | ||
| } | ||
|
|
||
| // REN-5: characters TeX silently discards (whitespace catcode 10/5 and NUL | ||
| // catcode 9) must continue to parse without error. Guards against the error | ||
| // path swallowing legitimate whitespace. | ||
| - (void) testIgnoredWhitespaceCharacters | ||
| { | ||
| unichar nulChars[3] = { 'x', 0x0000, 'y' }; | ||
| NSString* withNul = [NSString stringWithCharacters:nulChars length:3]; | ||
| NSArray* inputs = @[ @"x\ty", @"x\ny", @"x\ry", withNul ]; | ||
| for (NSString* str in inputs) { | ||
| NSError* error = nil; | ||
| MTMathList* list = [MTMathListBuilder buildFromString:str error:&error]; | ||
| NSString* desc = [NSString stringWithFormat:@"whitespace input %@", str]; | ||
| XCTAssertNotNil(list, @"%@", desc); | ||
| XCTAssertNil(error, @"%@", desc); | ||
| XCTAssertEqual(list.atoms.count, 2u, @"%@", desc); | ||
| XCTAssertEqual([list.atoms[0] type], kMTMathAtomVariable, @"%@", desc); | ||
| XCTAssertEqual([list.atoms[1] type], kMTMathAtomVariable, @"%@", desc); | ||
| } | ||
| } | ||
|
|
||
| // REN-6: \over inside an explicit-brace script group must still parse correctly. | ||
| - (void) testOverInScriptBraces | ||
| { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When formatting the error message for an invalid character, if the character
chis a UTF-16 high surrogate (e.g., for characters outside the Basic Multilingual Plane like mathematical symbols), formatting it alone with%Ccreates an unpaired surrogate. This results in an invalidNSStringwhich can cause crashes or rendering issues when displayed, logged, or serialized.We should detect if
chis a high surrogate, consume the corresponding low surrogate to form a complete surrogate pair, and format the full Unicode codepoint and character string safely. If the next character is not a valid low surrogate, ensure we restore the lookahead character by callingunlookCharacter.References
unlookCharacter) before breaking the parsing loop.