pub(crate) fn get_line_col_char(doc: &str, byte_offset: usize) -> (usize, usize, usize) {
let mut lineno: usize = 1;
let mut colno: usize = 0;
if byte_offset == 0 {
return (1, 1, 0)
}
assert!(byte_offset <= doc.len(), "requested byteoffset {} is not less than or equal to doc length ({})", byte_offset, doc.len());
if byte_offset == doc.len() {
let last_char_pos = doc.char_indices().last().unwrap(); let (lineno, mut colno, mut codepoint_off) = get_line_col_char(doc, last_char_pos.0);
colno += 1;
codepoint_off += 1;
return (lineno, colno, codepoint_off);
}
for (codepoint_off, (byte_off, char)) in doc.char_indices().enumerate() {
colno += 1;
if char == '\n' {
if byte_off == byte_offset {
return (lineno, colno, codepoint_off)
}
lineno += 1;
colno = 0;
}
if byte_off < byte_offset {
continue
}
if byte_off == byte_offset {
return (lineno, colno, codepoint_off)
}
if byte_off > byte_offset {
unreachable!("Byteoffset lands in the middle of a character")
}
}
unreachable!("Reached end of document")
}
pub (crate) fn escape_double_quoted(input: &str) -> String {
let mut escaped = String::with_capacity(input.len() * 2);
for c in input.chars() {
match c {
'"' => { escaped.push('\\'); escaped.push('"'); }
'\\' => { escaped.push('\\'); escaped.push('\\'); }
'\n' => { escaped.push('\\'); escaped.push('n'); }
'\r' => { escaped.push('\\'); escaped.push('r'); }
'\t' => { escaped.push('\\'); escaped.push('t'); }
'/' => { escaped.push('\\'); escaped.push('/'); }
'\u{0008}' => { escaped.push('\\'); escaped.push('b'); }
'\u{000c}' => { escaped.push('\\'); escaped.push('f'); }
'\u{2028}' => { escaped.push_str("\\u2028"); }
'\u{2029}' => { escaped.push_str("\\u2029"); }
_ => escaped.push(c),
}
}
escaped
}
#[allow(dead_code)]
pub (crate) fn escape_single_quoted(input: &str) -> String {
let mut escaped = String::with_capacity(input.len() * 2);
for c in input.chars() {
match c {
'\'' => { escaped.push('\\'); escaped.push('\''); }
'\\' => { escaped.push('\\'); escaped.push('\\'); }
'\n' => { escaped.push('\\'); escaped.push('n'); }
'\r' => { escaped.push('\\'); escaped.push('r'); }
'\t' => { escaped.push('\\'); escaped.push('t'); }
'/' => { escaped.push('\\'); escaped.push('/'); }
'\u{0008}' => { escaped.push('\\'); escaped.push('b'); }
'\u{000c}' => { escaped.push('\\'); escaped.push('f'); }
'\u{2028}' => { escaped.push_str("\\u2028"); }
'\u{2029}' => { escaped.push_str("\\u2029"); }
_ => escaped.push(c),
}
}
escaped
}
pub fn unescape(input: &str) -> Result<String, String> {
let mut output = String::with_capacity(input.len());
let mut chars = input.chars().peekable();
while let Some(ch) = chars.next() {
if ch != '\\' {
output.push(ch);
} else {
let esc = chars.next().ok_or_else(|| err("Incomplete escape at end of string"))?;
match esc {
'a' => output.push('\x07'),
'n' => output.push('\n'),
'r' => output.push('\r'),
't' => output.push('\t'),
'b' => output.push('\x08'), 'f' => output.push('\x0C'), 'v' => output.push('\x0B'),
'0' => output.push('\0'),
'\\' => output.push('\\'),
'\'' => output.push('\''),
'"' => output.push('\"'),
'/' => output.push('/'), '\n' | '\r' | '\u{2028}' | '\u{2029}' => {
output.push(esc);
}
'x' => {
let val = read_hex_digits(&mut chars, 2, "\\x")?;
output.push(char_from_u32(val)?);
}
'u' => {
let val = read_hex_digits(&mut chars, 4, "\\u")?;
output.push(char_from_u32(val)?);
}
_ => {
return Err(format!("Unknown escape character: {esc}"));
}
}
}
}
Ok(output)
}
pub (crate) fn read_hex_digits<I: Iterator<Item = char>>(
chars: &mut std::iter::Peekable<I>,
count: usize,
context: &str
) -> Result<u32, String> {
let mut val = 0u32;
for _ in 0..count {
let c = chars.next().ok_or_else(|| err(format!("Incomplete {context} escape")))?;
let digit = c
.to_digit(16)
.ok_or_else(|| err(format!("Invalid hex digit '{c}' in {context} escape")))?;
val = (val << 4) | digit;
}
Ok(val)
}
fn char_from_u32(u: u32) -> Result<char, String> {
std::char::from_u32(u).ok_or_else(|| err(format!("Invalid Unicode code point U+{u:X}")))
}
fn err<S: Into<String>>(message: S) -> String {
message.into()
}
#[cfg(all(not(feature = "unlimited_depth"), not(target_os = "windows"), debug_assertions))]
pub (crate) const MAX_DEPTH: usize = 1000;
#[cfg(all(not(feature = "unlimited_depth"), not(target_os = "windows"), not(debug_assertions)))]
pub (crate) const MAX_DEPTH: usize = 3000;
#[cfg(all(not(feature = "unlimited_depth"), target_os = "windows", debug_assertions))]
pub (crate) const MAX_DEPTH: usize = 700;
#[cfg(all(not(feature = "unlimited_depth"), target_os = "windows", not(debug_assertions)))]
pub (crate) const MAX_DEPTH: usize = 2000;
#[cfg(feature = "unlimited_depth")]
pub (crate) const MAX_DEPTH: usize = usize::MAX;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_escape_single_quote_bug() {
let input = "Hello'World";
let result = escape_single_quoted(input);
let expected = "Hello\\'World";
assert_eq!(result, expected, "Single quote should be escaped as \\' not \\\"");
}
#[test]
fn test_escape_double_quoted_comprehensive() {
let input = "Hello\"World\n\t\r\\";
let result = escape_double_quoted(input);
let expected = "Hello\\\"World\\n\\t\\r\\\\";
assert_eq!(result, expected);
}
#[test]
fn test_escape_single_quoted_comprehensive() {
let input = "Hello'World\n\t\r\\";
let result = escape_single_quoted(input);
let expected = "Hello\\'World\\n\\t\\r\\\\";
assert_eq!(result, expected);
}
#[test]
fn test_unescape_basic_escapes() {
let input = "Hello\\nWorld\\t\\r\\\\";
let result = unescape(input).unwrap();
let expected = "Hello\nWorld\t\r\\";
assert_eq!(result, expected);
}
#[test]
fn test_unescape_quotes() {
let input = "He said \\\"Hello\\\" and she said \\'Hi\\'";
let result = unescape(input).unwrap();
let expected = "He said \"Hello\" and she said 'Hi'";
assert_eq!(result, expected);
}
#[test]
fn test_unescape_unicode_valid() {
let input = "Unicode: \\u0041\\u0042\\u2764";
let result = unescape(input).unwrap();
let expected = "Unicode: AB❤";
assert_eq!(result, expected);
}
#[test]
fn test_unescape_hex_valid() {
let input = "Hex: \\x41\\x42\\x21";
let result = unescape(input).unwrap();
let expected = "Hex: AB!";
assert_eq!(result, expected);
}
#[test]
fn test_unescape_invalid_unicode_short() {
let input = "Invalid: \\u12G"; let result = unescape(input);
assert!(result.is_err(), "Should fail on invalid unicode escape");
}
#[test]
fn test_unescape_invalid_unicode_incomplete() {
let input = "Incomplete: \\u123"; let result = unescape(input);
assert!(result.is_err(), "Should fail on incomplete unicode escape");
}
#[test]
fn test_unescape_invalid_hex_char() {
let input = "Invalid hex: \\xZZ";
let result = unescape(input);
assert!(result.is_err(), "Should fail on invalid hex escape");
}
#[test]
fn test_unescape_invalid_hex_incomplete() {
let input = "Incomplete hex: \\x1";
let result = unescape(input);
assert!(result.is_err(), "Should fail on incomplete hex escape");
}
#[test]
fn test_unescape_unknown_escape() {
let input = "Unknown: \\z";
let result = unescape(input);
assert!(result.is_err(), "Should fail on unknown escape sequence");
}
#[test]
fn test_unescape_incomplete_escape_at_end() {
let input = "Incomplete: \\";
let result = unescape(input);
assert!(result.is_err(), "Should fail on incomplete escape at end");
}
#[test]
fn test_unescape_line_continuation() {
let input = "Line\\ncontinuation";
let result = unescape(input).unwrap();
let expected = "Line\ncontinuation";
assert_eq!(result, expected);
}
#[test]
fn test_unescape_all_special_chars() {
let input = "\\a\\b\\f\\n\\r\\t\\v\\0\\\\\\'\\\"";
let result = unescape(input).unwrap();
let expected = "\x07\x08\x0C\n\r\t\x0B\0\\'\"";
assert_eq!(result, expected);
}
#[test]
fn test_unescape_unicode_line_separators() {
let input = "\\u2028\\u2029"; let result = unescape(input).unwrap();
let expected = "\u{2028}\u{2029}";
assert_eq!(result, expected);
}
#[test]
fn test_read_hex_digits_valid() {
let mut chars = "ABCD".chars().peekable();
let result = read_hex_digits(&mut chars, 4, "\\u").unwrap();
assert_eq!(result, 0xABCD);
}
#[test]
fn test_read_hex_digits_invalid_char() {
let mut chars = "12G4".chars().peekable();
let result = read_hex_digits(&mut chars, 4, "\\u");
assert!(result.is_err(), "Should fail on invalid hex character");
}
#[test]
fn test_read_hex_digits_incomplete() {
let mut chars = "12".chars().peekable();
let result = read_hex_digits(&mut chars, 4, "\\u");
assert!(result.is_err(), "Should fail on incomplete hex sequence");
}
}