1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
//! AST node representation and parsing implementation for string literals.
use std::rc::Rc;
use crate::parser::{ast::{metadata::AstNodeMeta, expression::literal::escapes::unescape}, state::ParserState, util::NodeParserResult, lexer::{tokens::{TokenTy, Token}, IndexedToken}, error::{ParserError, ParserErrorVariant}};
/// The value of a string literal in source code.
#[derive(Debug, Clone)]
pub enum StringLiteralValue<'src> {
/// A string literal in source code without any escapes can be represented directly
/// using a reference into the source code. This will refer to the string literal without the
/// opening and closing quotatation marks.
WithoutEscapes(&'src str),
/// A string literal in source code with escapes must be represented using an owned string, as
/// we have to do some processing to resolve all the escapes into the actual unescaped unicaode string.
/// We store this in an [`Rc`] to make cloning less expensive, as we will not need to mutate this string
/// while it's in the AST.
WithEscapes(Rc<str>)
}
impl<'src> StringLiteralValue<'src> {
pub fn as_str(&self) -> &str {
match self {
StringLiteralValue::WithoutEscapes(s) => s,
StringLiteralValue::WithEscapes(rc) => rc.as_ref(),
}
}
}
/// A string literal in source code.
#[derive(Debug)]
pub struct StringLit<'src> {
/// The metadata about this node.
pub meta: AstNodeMeta<'src>,
/// A reference counted owned string representing the parsed value.
pub value: StringLiteralValue<'src>,
/// Format strings are denoted using '`' instead of '"'. Treat these similarly to string literals.
pub is_format_string: bool,
}
impl<'src> StringLit<'src> {
/// Parse a string literal from source code. If there is not a [`TokenTy::StringLit`]
/// available from the parser state's lexer, then this will not mutate the parser state.
pub fn parse(parser_state: &mut ParserState<'src>) -> NodeParserResult<Self> {
// Peek the type of the next token or error out if there is not one.
let peeked_token_ty = parser_state
.peek_token_ty()
// Dereferencing map here to prevent complaining about ref after mut borrow.
.map(|token_ty: &TokenTy| *token_ty)
// If there is not a next token, error out.
.ok_or(ParserError { byte_range: parser_state.peek_byte_range(), ty: ParserErrorVariant::Expected("string literal") })?;
// Mathc on the next token type available from the lexer.
match peeked_token_ty {
// Unterminated string literals produce an error.
TokenTy::StringLit { is_terminated: false, .. } => Err(parser_state.peek_byte_range_into_error(ParserErrorVariant::UnterminatedStringLiteral)),
// Terminated string literals produce a value.
TokenTy::StringLit { is_format, .. } => {
// Peek the important parts of the token.
let IndexedToken { index, token: Token { length, .. } } = *parser_state.peek_token().unwrap();
// Get the associated part of source code, making an immutable reference into the parser state.
let full_matching_source: &str = &parser_state.source[index..index+length];
// Get a reference to the body of the string literal itself (without the quotes or backticks for format
// strings).
let string_lit_body: &str = &full_matching_source[1..(full_matching_source.len()-1)];
// Try to unescape the string literal.
match unescape(string_lit_body) {
Ok(str_lit_value) => {},
Err(str_lit_errors) => {},
}
unimplemented!()
}
// All other token types produce an error.
_ => Err(parser_state.peek_byte_range_into_error(ParserErrorVariant::Expected("string literal"))),
}
}
}