1use std::ops::Range;
4use std::{borrow::Cow, num::ParseIntError};
5
6use rustc_literal_escaper::{
7 EscapeError, MixedUnit, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char,
8 unescape_str,
9};
10use stdx::always;
11
12use crate::{
13 TextRange, TextSize,
14 ast::{self, AstToken},
15};
16
17impl ast::Comment {
18 pub fn kind(&self) -> CommentKind {
19 CommentKind::from_text(self.text())
20 }
21
22 pub fn is_doc(&self) -> bool {
23 self.kind().doc.is_some()
24 }
25
26 pub fn is_inner(&self) -> bool {
27 self.kind().doc == Some(CommentPlacement::Inner)
28 }
29
30 pub fn is_outer(&self) -> bool {
31 self.kind().doc == Some(CommentPlacement::Outer)
32 }
33
34 pub fn prefix(&self) -> &'static str {
35 let &(prefix, _kind) = CommentKind::BY_PREFIX
36 .iter()
37 .find(|&(prefix, kind)| self.kind() == *kind && self.text().starts_with(prefix))
38 .unwrap();
39 prefix
40 }
41
42 pub fn doc_comment(&self) -> Option<&str> {
45 let kind = self.kind();
46 match kind {
47 CommentKind { shape, doc: Some(_) } => {
48 let prefix = kind.prefix();
49 let text = &self.text()[prefix.len()..];
50 let text = if shape == CommentShape::Block {
51 text.strip_suffix("*/").unwrap_or(text)
52 } else {
53 text
54 };
55 Some(text)
56 }
57 _ => None,
58 }
59 }
60}
61
62#[derive(Debug, PartialEq, Eq, Clone, Copy)]
63pub struct CommentKind {
64 pub shape: CommentShape,
65 pub doc: Option<CommentPlacement>,
66}
67
68#[derive(Debug, PartialEq, Eq, Clone, Copy)]
69pub enum CommentShape {
70 Line,
71 Block,
72}
73
74impl CommentShape {
75 pub fn is_line(self) -> bool {
76 self == CommentShape::Line
77 }
78
79 pub fn is_block(self) -> bool {
80 self == CommentShape::Block
81 }
82}
83
84#[derive(Debug, PartialEq, Eq, Clone, Copy)]
85pub enum CommentPlacement {
86 Inner,
87 Outer,
88}
89
90impl CommentKind {
91 const BY_PREFIX: [(&'static str, CommentKind); 9] = [
92 ("/**/", CommentKind { shape: CommentShape::Block, doc: None }),
93 ("/***", CommentKind { shape: CommentShape::Block, doc: None }),
94 ("////", CommentKind { shape: CommentShape::Line, doc: None }),
95 ("///", CommentKind { shape: CommentShape::Line, doc: Some(CommentPlacement::Outer) }),
96 ("//!", CommentKind { shape: CommentShape::Line, doc: Some(CommentPlacement::Inner) }),
97 ("/**", CommentKind { shape: CommentShape::Block, doc: Some(CommentPlacement::Outer) }),
98 ("/*!", CommentKind { shape: CommentShape::Block, doc: Some(CommentPlacement::Inner) }),
99 ("//", CommentKind { shape: CommentShape::Line, doc: None }),
100 ("/*", CommentKind { shape: CommentShape::Block, doc: None }),
101 ];
102
103 pub(crate) fn from_text(text: &str) -> CommentKind {
104 let &(_prefix, kind) = CommentKind::BY_PREFIX
105 .iter()
106 .find(|&(prefix, _kind)| text.starts_with(prefix))
107 .unwrap();
108 kind
109 }
110
111 pub fn prefix(&self) -> &'static str {
112 let &(prefix, _) =
113 CommentKind::BY_PREFIX.iter().rev().find(|(_, kind)| kind == self).unwrap();
114 prefix
115 }
116}
117
118impl ast::Whitespace {
119 pub fn spans_multiple_lines(&self) -> bool {
120 let text = self.text();
121 text.find('\n').is_some_and(|idx| text[idx + 1..].contains('\n'))
122 }
123}
124
125#[derive(Debug)]
126pub struct QuoteOffsets {
127 pub quotes: (TextRange, TextRange),
128 pub contents: TextRange,
129}
130
131impl QuoteOffsets {
132 fn new(literal: &str) -> Option<QuoteOffsets> {
133 let left_quote = literal.find('"')?;
134 let right_quote = literal.rfind('"')?;
135 if left_quote == right_quote {
136 return None;
138 }
139
140 let start = TextSize::from(0);
141 let left_quote = TextSize::try_from(left_quote).unwrap() + TextSize::of('"');
142 let right_quote = TextSize::try_from(right_quote).unwrap();
143 let end = TextSize::of(literal);
144
145 let res = QuoteOffsets {
146 quotes: (TextRange::new(start, left_quote), TextRange::new(right_quote, end)),
147 contents: TextRange::new(left_quote, right_quote),
148 };
149 Some(res)
150 }
151}
152
153pub trait IsString: AstToken {
154 const RAW_PREFIX: &'static str;
155 fn unescape(s: &str, callback: impl FnMut(Range<usize>, Result<char, EscapeError>));
156 fn is_raw(&self) -> bool {
157 self.text().starts_with(Self::RAW_PREFIX)
158 }
159 fn quote_offsets(&self) -> Option<QuoteOffsets> {
160 let text = self.text();
161 let offsets = QuoteOffsets::new(text)?;
162 let o = self.syntax().text_range().start();
163 let offsets = QuoteOffsets {
164 quotes: (offsets.quotes.0 + o, offsets.quotes.1 + o),
165 contents: offsets.contents + o,
166 };
167 Some(offsets)
168 }
169 fn text_range_between_quotes(&self) -> Option<TextRange> {
170 self.quote_offsets().map(|it| it.contents)
171 }
172 fn text_without_quotes(&self) -> &str {
173 let text = self.text();
174 let Some(offsets) = self.text_range_between_quotes() else { return text };
175 &text[offsets - self.syntax().text_range().start()]
176 }
177 fn open_quote_text_range(&self) -> Option<TextRange> {
178 self.quote_offsets().map(|it| it.quotes.0)
179 }
180 fn close_quote_text_range(&self) -> Option<TextRange> {
181 self.quote_offsets().map(|it| it.quotes.1)
182 }
183 fn escaped_char_ranges(&self, cb: &mut dyn FnMut(TextRange, Result<char, EscapeError>)) {
184 let Some(text_range_no_quotes) = self.text_range_between_quotes() else { return };
185
186 let start = self.syntax().text_range().start();
187 let text = &self.text()[text_range_no_quotes - start];
188 let offset = text_range_no_quotes.start() - start;
189
190 Self::unescape(text, &mut |range: Range<usize>, unescaped_char| {
191 if let Some((s, e)) = range.start.try_into().ok().zip(range.end.try_into().ok()) {
192 cb(TextRange::new(s, e) + offset, unescaped_char);
193 }
194 });
195 }
196 fn map_range_up(&self, range: TextRange) -> Option<TextRange> {
197 let contents_range = self.text_range_between_quotes()?;
198 if always!(TextRange::up_to(contents_range.len()).contains_range(range)) {
199 Some(range + contents_range.start())
200 } else {
201 None
202 }
203 }
204}
205
206impl IsString for ast::String {
207 const RAW_PREFIX: &'static str = "r";
208 fn unescape(s: &str, cb: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
209 unescape_str(s, cb)
210 }
211}
212
213impl ast::String {
214 pub fn value(&self) -> Result<Cow<'_, str>, EscapeError> {
215 let text = self.text();
216 let text_range = self.text_range_between_quotes().ok_or(EscapeError::LoneSlash)?;
217 let text = &text[text_range - self.syntax().text_range().start()];
218 if self.is_raw() {
219 return Ok(Cow::Borrowed(text));
220 }
221
222 let mut buf = String::new();
223 let mut prev_end = 0;
224 let mut has_error = None;
225 unescape_str(text, |char_range, unescaped_char| {
226 match (unescaped_char, buf.capacity() == 0) {
227 (Ok(c), false) => buf.push(c),
228 (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
229 prev_end = char_range.end
230 }
231 (Ok(c), true) => {
232 buf.reserve_exact(text.len());
233 buf.push_str(&text[..prev_end]);
234 buf.push(c);
235 }
236 (Err(e), _) => has_error = Some(e),
237 }
238 });
239
240 match (has_error, buf.capacity() == 0) {
241 (Some(e), _) => Err(e),
242 (None, true) => Ok(Cow::Borrowed(text)),
243 (None, false) => Ok(Cow::Owned(buf)),
244 }
245 }
246}
247
248impl IsString for ast::ByteString {
249 const RAW_PREFIX: &'static str = "br";
250 fn unescape(s: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
251 unescape_byte_str(s, |range, res| callback(range, res.map(char::from)))
252 }
253}
254
255impl ast::ByteString {
256 pub fn value(&self) -> Result<Cow<'_, [u8]>, EscapeError> {
257 let text = self.text();
258 let text_range = self.text_range_between_quotes().ok_or(EscapeError::LoneSlash)?;
259 let text = &text[text_range - self.syntax().text_range().start()];
260 if self.is_raw() {
261 return Ok(Cow::Borrowed(text.as_bytes()));
262 }
263
264 let mut buf: Vec<u8> = Vec::new();
265 let mut prev_end = 0;
266 let mut has_error = None;
267 unescape_byte_str(text, |char_range, unescaped_byte| {
268 match (unescaped_byte, buf.capacity() == 0) {
269 (Ok(b), false) => buf.push(b),
270 (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
271 prev_end = char_range.end
272 }
273 (Ok(b), true) => {
274 buf.reserve_exact(text.len());
275 buf.extend_from_slice(&text.as_bytes()[..prev_end]);
276 buf.push(b);
277 }
278 (Err(e), _) => has_error = Some(e),
279 }
280 });
281
282 match (has_error, buf.capacity() == 0) {
283 (Some(e), _) => Err(e),
284 (None, true) => Ok(Cow::Borrowed(text.as_bytes())),
285 (None, false) => Ok(Cow::Owned(buf)),
286 }
287 }
288}
289
290impl IsString for ast::CString {
291 const RAW_PREFIX: &'static str = "cr";
292 fn unescape(s: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
295 unescape_c_str(s, |range, _res| callback(range, Ok('_')))
296 }
297}
298
299impl ast::CString {
300 pub fn value(&self) -> Result<Cow<'_, [u8]>, EscapeError> {
301 let text = self.text();
302 let text_range = self.text_range_between_quotes().ok_or(EscapeError::LoneSlash)?;
303 let text = &text[text_range - self.syntax().text_range().start()];
304 if self.is_raw() {
305 return Ok(Cow::Borrowed(text.as_bytes()));
306 }
307
308 let mut buf = Vec::new();
309 let mut prev_end = 0;
310 let mut has_error = None;
311 let extend_unit = |buf: &mut Vec<u8>, unit: MixedUnit| match unit {
312 MixedUnit::Char(c) => buf.extend(c.encode_utf8(&mut [0; 4]).as_bytes()),
313 MixedUnit::HighByte(b) => buf.push(b),
314 };
315 unescape_c_str(text, |char_range, unescaped| match (unescaped, buf.capacity() == 0) {
316 (Ok(u), false) => extend_unit(&mut buf, u),
317 (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
318 prev_end = char_range.end
319 }
320 (Ok(u), true) => {
321 buf.reserve_exact(text.len());
322 buf.extend(&text.as_bytes()[..prev_end]);
323 extend_unit(&mut buf, u);
324 }
325 (Err(e), _) => has_error = Some(e),
326 });
327
328 match (has_error, buf.capacity() == 0) {
329 (Some(e), _) => Err(e),
330 (None, true) => Ok(Cow::Borrowed(text.as_bytes())),
331 (None, false) => Ok(Cow::Owned(buf)),
332 }
333 }
334}
335
336impl ast::IntNumber {
337 pub fn radix(&self) -> Radix {
338 match self.text().get(..2).unwrap_or_default() {
339 "0b" => Radix::Binary,
340 "0o" => Radix::Octal,
341 "0x" => Radix::Hexadecimal,
342 _ => Radix::Decimal,
343 }
344 }
345
346 pub fn split_into_parts(&self) -> (&str, &str, &str) {
347 let radix = self.radix();
348 let (prefix, mut text) = self.text().split_at(radix.prefix_len());
349
350 let is_suffix_start: fn(&(usize, char)) -> bool = match radix {
351 Radix::Hexadecimal => |(_, c)| matches!(c, 'g'..='z' | 'G'..='Z'),
352 _ => |(_, c)| c.is_ascii_alphabetic(),
353 };
354
355 let mut suffix = "";
356 if let Some((suffix_start, _)) = text.char_indices().find(is_suffix_start) {
357 let (text2, suffix2) = text.split_at(suffix_start);
358 text = text2;
359 suffix = suffix2;
360 };
361
362 (prefix, text, suffix)
363 }
364
365 pub fn value(&self) -> Result<u128, ParseIntError> {
366 let (_, text, _) = self.split_into_parts();
367 u128::from_str_radix(&text.replace('_', ""), self.radix() as u32)
368 }
369
370 pub fn suffix(&self) -> Option<&str> {
371 let (_, _, suffix) = self.split_into_parts();
372 if suffix.is_empty() { None } else { Some(suffix) }
373 }
374
375 pub fn value_string(&self) -> String {
376 let (_, text, _) = self.split_into_parts();
377 text.replace('_', "")
378 }
379}
380
381impl ast::FloatNumber {
382 pub fn split_into_parts(&self) -> (&str, &str) {
383 let text = self.text();
384 let mut float_text = self.text();
385 let mut suffix = "";
386 let mut indices = text.char_indices();
387 if let Some((mut suffix_start, c)) = indices.by_ref().find(|(_, c)| c.is_ascii_alphabetic())
388 {
389 if c == 'e' || c == 'E' {
390 if let Some(suffix_start_tuple) = indices.find(|(_, c)| c.is_ascii_alphabetic()) {
391 suffix_start = suffix_start_tuple.0;
392
393 float_text = &text[..suffix_start];
394 suffix = &text[suffix_start..];
395 }
396 } else {
397 float_text = &text[..suffix_start];
398 suffix = &text[suffix_start..];
399 }
400 }
401
402 (float_text, suffix)
403 }
404
405 pub fn suffix(&self) -> Option<&str> {
406 let (_, suffix) = self.split_into_parts();
407 if suffix.is_empty() { None } else { Some(suffix) }
408 }
409
410 pub fn value_string(&self) -> String {
411 let (text, _) = self.split_into_parts();
412 text.replace('_', "")
413 }
414}
415
416#[derive(Debug, PartialEq, Eq, Copy, Clone)]
417pub enum Radix {
418 Binary = 2,
419 Octal = 8,
420 Decimal = 10,
421 Hexadecimal = 16,
422}
423
424impl Radix {
425 pub const ALL: &'static [Radix] =
426 &[Radix::Binary, Radix::Octal, Radix::Decimal, Radix::Hexadecimal];
427
428 const fn prefix_len(self) -> usize {
429 match self {
430 Self::Decimal => 0,
431 _ => 2,
432 }
433 }
434}
435
436impl ast::Char {
437 pub fn value(&self) -> Result<char, EscapeError> {
438 let mut text = self.text();
439 if text.starts_with('\'') {
440 text = &text[1..];
441 } else {
442 return Err(EscapeError::ZeroChars);
443 }
444 if text.ends_with('\'') {
445 text = &text[0..text.len() - 1];
446 }
447
448 unescape_char(text)
449 }
450}
451
452impl ast::Byte {
453 pub fn value(&self) -> Result<u8, EscapeError> {
454 let mut text = self.text();
455 if text.starts_with("b\'") {
456 text = &text[2..];
457 } else {
458 return Err(EscapeError::ZeroChars);
459 }
460 if text.ends_with('\'') {
461 text = &text[0..text.len() - 1];
462 }
463
464 unescape_byte(text)
465 }
466}
467
468#[cfg(test)]
469mod tests {
470 use rustc_apfloat::ieee::Quad as f128;
471
472 use crate::ast::{self, FloatNumber, IntNumber, make};
473
474 fn check_float_suffix<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
475 assert_eq!(FloatNumber { syntax: make::tokens::literal(lit) }.suffix(), expected.into());
476 }
477
478 fn check_int_suffix<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
479 assert_eq!(IntNumber { syntax: make::tokens::literal(lit) }.suffix(), expected.into());
480 }
481
482 fn check_float_value(lit: &str, expected: &str) {
484 let expected = Some(expected.parse::<f128>().unwrap());
485 assert_eq!(
486 FloatNumber { syntax: make::tokens::literal(lit) }.value_string().parse::<f128>().ok(),
487 expected
488 );
489 assert_eq!(
490 IntNumber { syntax: make::tokens::literal(lit) }.value_string().parse::<f128>().ok(),
491 expected
492 );
493 }
494
495 fn check_int_value(lit: &str, expected: impl Into<Option<u128>>) {
496 assert_eq!(IntNumber { syntax: make::tokens::literal(lit) }.value().ok(), expected.into());
497 }
498
499 #[test]
500 fn test_float_number_suffix() {
501 check_float_suffix("123.0", None);
502 check_float_suffix("123f32", "f32");
503 check_float_suffix("123.0e", None);
504 check_float_suffix("123.0e4", None);
505 check_float_suffix("123.0ef16", "f16");
506 check_float_suffix("123.0E4f32", "f32");
507 check_float_suffix("1_2_3.0_f128", "f128");
508 }
509
510 #[test]
511 fn test_int_number_suffix() {
512 check_int_suffix("123", None);
513 check_int_suffix("123i32", "i32");
514 check_int_suffix("1_0_1_l_o_l", "l_o_l");
515 check_int_suffix("0b11", None);
516 check_int_suffix("0o11", None);
517 check_int_suffix("0xff", None);
518 check_int_suffix("0b11u32", "u32");
519 check_int_suffix("0o11u32", "u32");
520 check_int_suffix("0xffu32", "u32");
521 }
522
523 fn check_string_value<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
524 assert_eq!(
525 ast::String { syntax: make::tokens::literal(&format!("\"{lit}\"")) }
526 .value()
527 .as_deref()
528 .ok(),
529 expected.into()
530 );
531 }
532
533 #[test]
534 fn test_string_escape() {
535 check_string_value(r"foobar", "foobar");
536 check_string_value(r"\foobar", None);
537 check_string_value(r"\nfoobar", "\nfoobar");
538 check_string_value(r"C:\\Windows\\System32\\", "C:\\Windows\\System32\\");
539 check_string_value(r"\x61bcde", "abcde");
540 check_string_value(
541 r"a\
542bcde", "abcde",
543 );
544 }
545
546 fn check_byte_string_value<'a, const N: usize>(
547 lit: &str,
548 expected: impl Into<Option<&'a [u8; N]>>,
549 ) {
550 assert_eq!(
551 ast::ByteString { syntax: make::tokens::literal(&format!("b\"{lit}\"")) }
552 .value()
553 .as_deref()
554 .ok(),
555 expected.into().map(|value| &value[..])
556 );
557 }
558
559 #[test]
560 fn test_byte_string_escape() {
561 check_byte_string_value(r"foobar", b"foobar");
562 check_byte_string_value(r"\foobar", None::<&[u8; 0]>);
563 check_byte_string_value(r"\nfoobar", b"\nfoobar");
564 check_byte_string_value(r"C:\\Windows\\System32\\", b"C:\\Windows\\System32\\");
565 check_byte_string_value(r"\x61bcde", b"abcde");
566 check_byte_string_value(
567 r"a\
568bcde", b"abcde",
569 );
570 }
571
572 #[test]
573 fn test_value_underscores() {
574 check_float_value("1.3_4665449586950493453___6_f128", "1.346654495869504934536");
575 check_float_value("1.234567891011121_f64", "1.234567891011121");
576 check_float_value("1__0.__0__f32", "10.0");
577 check_float_value("3._0_f16", "3.0");
578 check_int_value("0b__1_0_", 2);
579 check_int_value("1_1_1_1_1_1", 111111);
580 }
581}