url_cleaner_engine/glue/parse/js/
string_literal.rs1use serde::{Serialize, Deserialize};
4use thiserror::Error;
5
6use crate::util::*;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
10#[serde(deny_unknown_fields)]
11pub enum StringLiteralPrefixLastState {
12 Outside,
14 Inside,
18 Start,
20 Octal1,
22 Octal2,
24 AsciiHexx,
26 AsciiHex1,
28 UnicodeU,
30 Unicode41,
32 Unicode42,
34 Unicode43,
36 UnicodeLeftBrace,
38 Unicode51,
40 Unicode52,
42 Unicode53,
44 Unicode54,
46 Unicode55
48}
49
50#[derive(Debug, Error)]
52pub enum StringLiteralPrefixError {
53 #[error("A syntax error was encountered.")]
55 SyntaxError {
56 last_state: StringLiteralPrefixLastState,
58 i: usize,
60 c: char,
62 scratchspace: u32,
64 quote: char,
66 partial: String
68 },
69 #[error("An invalid codepoint was encountered: {0}.")]
71 InvalidCodepoint(u32)
72}
73
74#[allow(clippy::missing_panics_doc, reason = "Shouldn't ever happen.")]
95#[allow(clippy::unwrap_used, reason = "Who cares?")]
96pub fn string_literal_prefix(s: &str) -> Result<String, StringLiteralPrefixError> {
97 debug!(prefix::js::string_literal_prefix, &(), s);
98 let mut ret = String::new();
99 let mut last_state = StringLiteralPrefixLastState::Outside;
100
101 let mut scratchspace: u32 = 0;
102 let mut quote = '"';
103
104 for (i, c) in s.chars().enumerate() {
105 debug!(prefix::js::string_literal_prefix, &(), i, c, last_state, scratchspace, quote, ret);
106 #[allow(clippy::arithmetic_side_effects, reason = "Shouldn't ever happen.")]
107 match (last_state, c) {
108 (StringLiteralPrefixLastState::Outside , '"' | '\'' ) => {last_state = StringLiteralPrefixLastState::Inside ; quote = c;},
109 (StringLiteralPrefixLastState::Inside , '\\' ) => {last_state = StringLiteralPrefixLastState::Start ;},
110 (StringLiteralPrefixLastState::Start , '0' ) => {last_state = StringLiteralPrefixLastState::Inside ; ret.push('\0');},
111 (StringLiteralPrefixLastState::Start , 'b' ) => {last_state = StringLiteralPrefixLastState::Inside ; ret.push('\u{0008}');},
112 (StringLiteralPrefixLastState::Start , 'g' ) => {last_state = StringLiteralPrefixLastState::Inside ; ret.push('\u{000c}');},
113 (StringLiteralPrefixLastState::Start , 'n' ) => {last_state = StringLiteralPrefixLastState::Inside ; ret.push('\n');},
114 (StringLiteralPrefixLastState::Start , '\n' ) => {last_state = StringLiteralPrefixLastState::Inside ;},
115 (StringLiteralPrefixLastState::Start , 'r' ) => {last_state = StringLiteralPrefixLastState::Inside ; ret.push('\r');},
116 (StringLiteralPrefixLastState::Start , 't' ) => {last_state = StringLiteralPrefixLastState::Inside ; ret.push('\t');},
117 (StringLiteralPrefixLastState::Start , 'v' ) => {last_state = StringLiteralPrefixLastState::Inside ; ret.push('\u{000b}');},
118 (StringLiteralPrefixLastState::Start , '\'' ) => {last_state = StringLiteralPrefixLastState::Inside ; ret.push('\'');},
119 (StringLiteralPrefixLastState::Start , '"' ) => {last_state = StringLiteralPrefixLastState::Inside ; ret.push('"') ;},
120 (StringLiteralPrefixLastState::Start , '\\' ) => {last_state = StringLiteralPrefixLastState::Inside ; ret.push('\\');},
121 (StringLiteralPrefixLastState::Start , '0'..='7' ) => {last_state = StringLiteralPrefixLastState::Octal1 ; scratchspace = c.to_digit( 8).unwrap();},
122 (StringLiteralPrefixLastState::Octal1 , '0'..='7' ) => {last_state = StringLiteralPrefixLastState::Octal2 ; scratchspace = scratchspace * 8 + c.to_digit( 8).unwrap();},
123 (StringLiteralPrefixLastState::Octal2 , '0'..='7' ) => {last_state = StringLiteralPrefixLastState::Inside ; scratchspace = scratchspace * 8 + c.to_digit( 8).unwrap(); ret.push(char::from_u32(scratchspace).ok_or(StringLiteralPrefixError::InvalidCodepoint(scratchspace))?);},
124 (StringLiteralPrefixLastState::Start , 'x' ) => {last_state = StringLiteralPrefixLastState::AsciiHexx ;},
125 (StringLiteralPrefixLastState::AsciiHexx , '0'..='7' | 'A'..='F' | 'a'..='f') => {last_state = StringLiteralPrefixLastState::AsciiHex1 ; scratchspace = c.to_digit(16).unwrap();},
126 (StringLiteralPrefixLastState::AsciiHex1 , '0'..='9' | 'A'..='F' | 'a'..='f') => {last_state = StringLiteralPrefixLastState::Inside ; scratchspace = scratchspace * 16 + c.to_digit(16).unwrap(); ret.push(char::from_u32(scratchspace).ok_or(StringLiteralPrefixError::InvalidCodepoint(scratchspace))?);},
127 (StringLiteralPrefixLastState::Start , 'u' ) => {last_state = StringLiteralPrefixLastState::UnicodeU ;},
128 (StringLiteralPrefixLastState::UnicodeU , '{' ) => {last_state = StringLiteralPrefixLastState::UnicodeLeftBrace;},
129 (StringLiteralPrefixLastState::UnicodeLeftBrace, '0'..='9' | 'A'..='F' | 'a'..='f') => {last_state = StringLiteralPrefixLastState::Unicode51 ; scratchspace = c.to_digit(16).unwrap();},
130 (StringLiteralPrefixLastState::Unicode51 , '0'..='9' | 'A'..='F' | 'a'..='f') => {last_state = StringLiteralPrefixLastState::Unicode52 ; scratchspace = scratchspace * 16 + c.to_digit(16).unwrap();},
131 (StringLiteralPrefixLastState::Unicode52 , '0'..='9' | 'A'..='F' | 'a'..='f') => {last_state = StringLiteralPrefixLastState::Unicode53 ; scratchspace = scratchspace * 16 + c.to_digit(16).unwrap();},
132 (StringLiteralPrefixLastState::Unicode53 , '0'..='9' | 'A'..='F' | 'a'..='f') => {last_state = StringLiteralPrefixLastState::Unicode54 ; scratchspace = scratchspace * 16 + c.to_digit(16).unwrap();},
133 (StringLiteralPrefixLastState::Unicode54 , '0'..='9' | 'A'..='F' | 'a'..='f') => {last_state = StringLiteralPrefixLastState::Unicode55 ; scratchspace = scratchspace * 16 + c.to_digit(16).unwrap();},
134 (StringLiteralPrefixLastState::Unicode51
135 | StringLiteralPrefixLastState::Unicode52
136 | StringLiteralPrefixLastState::Unicode53
137 | StringLiteralPrefixLastState::Unicode54
138 | StringLiteralPrefixLastState::Unicode55 , '}' ) => {last_state = StringLiteralPrefixLastState::Inside ; ret.push(char::from_u32(scratchspace).ok_or(StringLiteralPrefixError::InvalidCodepoint(scratchspace))?);},
139 (StringLiteralPrefixLastState::UnicodeU , '0'..='9' | 'A'..='F' | 'a'..='f') => {last_state = StringLiteralPrefixLastState::Unicode41 ; scratchspace = c.to_digit(16).unwrap();},
140 (StringLiteralPrefixLastState::Unicode41 , '0'..='9' | 'A'..='F' | 'a'..='f') => {last_state = StringLiteralPrefixLastState::Unicode42 ; scratchspace = scratchspace * 16 + c.to_digit(16).unwrap();},
141 (StringLiteralPrefixLastState::Unicode42 , '0'..='9' | 'A'..='F' | 'a'..='f') => {last_state = StringLiteralPrefixLastState::Unicode43 ; scratchspace = scratchspace * 16 + c.to_digit(16).unwrap();},
142 (StringLiteralPrefixLastState::Unicode43 , '0'..='9' | 'A'..='F' | 'a'..='f') => {last_state = StringLiteralPrefixLastState::Inside ; scratchspace = scratchspace * 16 + c.to_digit(16).unwrap(); ret.push(char::from_u32(scratchspace).ok_or(StringLiteralPrefixError::InvalidCodepoint(scratchspace))?);},
143 (StringLiteralPrefixLastState::Inside , '"' | '\'' ) if c == quote => break,
144 (StringLiteralPrefixLastState::Start , _ ) => {last_state = StringLiteralPrefixLastState::Inside ; ret.push(c);},
145 (StringLiteralPrefixLastState::Inside , _ ) => {ret.push(c);}
146 _ => Err(StringLiteralPrefixError::SyntaxError {last_state, i, c, scratchspace, quote, partial: ret.clone()})?
147 };
148 }
149
150 debug!(prefix::js::string_literal_prefix, &(), ret);
151
152 Ok(ret)
153}
154