use logos::Skip;
use super::Token;
pub(super) fn parse_doc_comment(lex: &logos::Lexer<'_, Token>) -> String {
let raw = lex.slice();
let body = raw.strip_prefix("///").unwrap_or(raw);
let body = body.strip_prefix(' ').unwrap_or(body);
body.trim_end().to_string()
}
pub(super) fn parse_inner_doc_comment(lex: &logos::Lexer<'_, Token>) -> String {
let raw = lex.slice();
let body = raw.strip_prefix("//!").unwrap_or(raw);
let body = body.strip_prefix(' ').unwrap_or(body);
body.trim_end().to_string()
}
pub(super) fn skip_block_comment(lex: &mut logos::Lexer<'_, Token>) -> Skip {
let remainder = lex.remainder();
let bytes = remainder.as_bytes();
let mut depth: usize = 1;
let mut i: usize = 0;
let len = bytes.len();
while i < len {
let next_idx = i.saturating_add(1);
let byte = bytes.get(i).copied().unwrap_or(0);
let next = bytes.get(next_idx).copied().unwrap_or(0);
if next_idx < len && byte == b'/' && next == b'*' {
depth = depth.saturating_add(1);
i = i.saturating_add(2);
} else if next_idx < len && byte == b'*' && next == b'/' {
depth = depth.saturating_sub(1);
i = i.saturating_add(2);
if depth == 0 {
lex.bump(i);
return Skip;
}
} else {
i = i.saturating_add(1);
}
}
let opening_span = lex.span();
let end = opening_span.end.saturating_add(len);
lex.extras
.unterminated_block_comments
.push((opening_span.start, end));
lex.bump(len);
Skip
}
pub(super) fn parse_number(s: &str) -> Option<crate::ast::NumberLiteral> {
use crate::ast::{NumberLiteral, NumberSourceKind, NumberValue};
let (digits, suffix) = strip_numeric_suffix(s);
let kind = if digits.bytes().any(|b| b == b'.' || b == b'e' || b == b'E') {
NumberSourceKind::Float
} else {
NumberSourceKind::Integer
};
let cleaned: String = digits.chars().filter(|c| *c != '_').collect();
let value = match kind {
NumberSourceKind::Integer => NumberValue::Integer(cleaned.parse::<i128>().ok()?),
NumberSourceKind::Float => NumberValue::Float(cleaned.parse::<f64>().ok()?),
};
Some(NumberLiteral::from_lex(value, suffix, kind))
}
fn strip_numeric_suffix(s: &str) -> (&str, Option<crate::ast::NumericSuffix>) {
use crate::ast::NumericSuffix as N;
const TABLE: [(&str, N); 4] = [
("I32", N::I32),
("I64", N::I64),
("F32", N::F32),
("F64", N::F64),
];
TABLE
.iter()
.find_map(|&(text, suffix)| s.strip_suffix(text).map(|d| (d, Some(suffix))))
.unwrap_or((s, None))
}
pub(super) fn parse_string(lex: &mut logos::Lexer<'_, Token>) -> String {
let s = lex.slice();
let content = s
.strip_prefix('"')
.and_then(|s| s.strip_suffix('"'))
.unwrap_or_default();
let (text, bad) = process_escapes(content);
record_bad_escapes(lex, bad);
text
}
pub(super) fn parse_multiline_string(lex: &mut logos::Lexer<'_, Token>) -> String {
let s = lex.slice();
let content = s
.strip_prefix("\"\"\"")
.and_then(|s| s.strip_suffix("\"\"\""))
.unwrap_or_default();
let (text, bad) = process_escapes(content);
record_bad_escapes(lex, bad);
text
}
fn record_bad_escapes(lex: &mut logos::Lexer<'_, Token>, bad: Vec<String>) {
if bad.is_empty() {
return;
}
let span = lex.span();
for hex in bad {
lex.extras
.invalid_unicode_escapes
.push((span.start, span.end, hex));
}
}
fn process_escapes(s: &str) -> (String, Vec<String>) {
let mut result = String::new();
let mut bad_escapes: Vec<String> = Vec::new();
let mut chars = s.chars();
while let Some(ch) = chars.next() {
if ch == '\\' {
match chars.next() {
Some(c @ ('"' | '\\')) => result.push(c),
Some('n') => result.push('\n'),
Some('t') => result.push('\t'),
Some('r') => result.push('\r'),
Some('u') => {
let hex: String = chars.by_ref().take(4).collect();
if let Ok(code) = u32::from_str_radix(&hex, 16) {
if let Some(unicode_char) = char::from_u32(code) {
result.push(unicode_char);
} else {
bad_escapes.push(hex);
result.push('\u{FFFD}');
}
} else {
bad_escapes.push(hex);
result.push('\u{FFFD}');
}
}
Some(c) => {
result.push('\\');
result.push(c);
}
None => result.push('\\'),
}
} else {
result.push(ch);
}
}
(result, bad_escapes)
}
#[must_use]
pub fn parse_regex(s: &str) -> Option<(String, String)> {
let content = s.strip_prefix("r/")?;
let last_slash = content.rfind('/')?;
let (pattern, rest) = content.split_at(last_slash);
let flags = rest.strip_prefix('/').unwrap_or_default();
Some((pattern.to_string(), flags.to_string()))
}