This repository was archived by the owner on Jul 24, 2024. It is now read-only.
- Notifications
You must be signed in to change notification settings - Fork 15
Lex and parse in terms of bytes rather than characters #57
Merged
Merged
Changes from 1 commit
Commits
Show all changes
6 commits Select commit Hold shift + click to select a range
4c7047f Lex and parse in terms of bytes rather than characters
f345389 Allow lexer to accept bytes as input
f0fc4f1 Add a test case for non-UTF-8 variable name
843a00c Add ByteString type
447b1be Use ByteString through lexer and parser
d803e53 Remove serde dependency
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Add ByteString type
- Loading branch information
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,97 @@ | ||
| use std::cmp::{Eq, PartialEq}; | ||
| use std::fmt::{Debug, Formatter, Result}; | ||
| use std::ops::Deref; | ||
| | ||
| use serde::Serialize; | ||
| | ||
| /// A wrapper for Vec<u8> that provides a human-readable Debug impl and | ||
| /// a few other conveniences. | ||
| /// | ||
| /// The Trunk lexer and parser work mainly with byte strings because | ||
| /// valid PHP code is not required to be valid UTF-8. | ||
| #[derive(Clone, Eq, PartialEq, Serialize)] | ||
| pub struct ByteString(pub(crate) Vec<u8>); | ||
| | ||
| impl ByteString { | ||
| pub fn new(bytes: Vec<u8>) -> Self { | ||
| ByteString(bytes) | ||
| } | ||
| } | ||
| | ||
| impl Debug for ByteString { | ||
| fn fmt(&self, f: &mut Formatter<'_>) -> Result { | ||
| write!(f, "\"")?; | ||
| for &b in &self.0 { | ||
| match b { | ||
| 0 => write!(f, "\\0")?, | ||
| b'\n' | b'\r' | b'\t' => write!(f, "{}", b.escape_ascii())?, | ||
| 0x01..=0x19 | 0x7f..=0xff => write!(f, "\\x{:02x}", b)?, | ||
| _ => write!(f, "{}", b as char)?, | ||
| } | ||
| } | ||
| write!(f, "\"")?; | ||
| Ok(()) | ||
| } | ||
| } | ||
| | ||
| impl<const N: usize> PartialEq<&[u8; N]> for ByteString { | ||
| fn eq(&self, other: &&[u8; N]) -> bool { | ||
| &self.0 == other | ||
| } | ||
| } | ||
| | ||
| impl From<Vec<u8>> for ByteString { | ||
| fn from(bytes: Vec<u8>) -> Self { | ||
| ByteString::new(bytes) | ||
| } | ||
| } | ||
| | ||
| impl From<&[u8]> for ByteString { | ||
| fn from(bytes: &[u8]) -> Self { | ||
| ByteString::new(bytes.to_vec()) | ||
| } | ||
| } | ||
| | ||
| impl<const N: usize> From<&[u8; N]> for ByteString { | ||
| fn from(bytes: &[u8; N]) -> Self { | ||
| ByteString::new(bytes.to_vec()) | ||
| } | ||
| } | ||
| | ||
| impl From<&str> for ByteString { | ||
| fn from(bytes: &str) -> Self { | ||
| ByteString::new(bytes.as_bytes().to_vec()) | ||
| } | ||
| } | ||
| | ||
| impl From<String> for ByteString { | ||
| fn from(bytes: String) -> Self { | ||
| ByteString::new(bytes.into_bytes()) | ||
| } | ||
| } | ||
| | ||
| impl Deref for ByteString { | ||
| type Target = Vec<u8>; | ||
| | ||
| fn deref(&self) -> &Vec<u8> { | ||
| &self.0 | ||
| } | ||
| } | ||
| | ||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
| | ||
| #[test] | ||
| fn test_byte_string_debug() { | ||
| assert_eq!(format!("{:?}", ByteString::from("abc")), r#""abc""#); | ||
| assert_eq!( | ||
| format!("{:?}", ByteString::from("\0\n\r\t")), | ||
| r#""\0\n\r\t""# | ||
| ); | ||
| assert_eq!( | ||
| format!("{:?}", ByteString::from(b"\x01\x10\x7f\xff")), | ||
| r#""\x01\x10\x7f\xff""# | ||
| ); | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,7 @@ | ||
| mod byte_string; | ||
| mod lexer; | ||
| mod token; | ||
| | ||
| pub use byte_string::ByteString; | ||
| pub use lexer::{Lexer, LexerError}; | ||
| pub use token::{OpenTagKind, Span, Token, TokenKind}; |
Add this suggestion to a batch that can be applied as a single commit. This suggestion is invalid because no changes were made to the code. Suggestions cannot be applied while the pull request is closed. Suggestions cannot be applied while viewing a subset of changes. Only one suggestion per line can be applied in a batch. Add this suggestion to a batch that can be applied as a single commit. Applying suggestions on deleted lines is not supported. You must change the existing code in this line in order to create a valid suggestion. Outdated suggestions cannot be applied. This suggestion has been applied or marked resolved. Suggestions cannot be applied from pending reviews. Suggestions cannot be applied on multi-line comments. Suggestions cannot be applied while the pull request is queued to merge. Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.