1use ratatui::style::Style;
18use ratatui::text::Span;
19
20use crate::theme::Theme;
21
22pub fn highlight_line(line: &str, lang: &str, theme: Theme) -> Vec<Span<'static>> {
24 let grammar = Grammar::for_lang(lang);
25 if grammar.is_none() {
26 return vec![Span::styled(line.to_string(), theme.code_style())];
27 }
28 let grammar = grammar.unwrap();
29 scan(line, grammar, theme)
30}
31
32#[derive(Debug, Clone, Copy)]
33struct Grammar {
34 keywords: &'static [&'static str],
35 types: &'static [&'static str],
36 line_comments: &'static [&'static str],
38 strings: &'static [char],
40 case_sensitive: bool,
43 fn_call_highlight: bool,
45}
46
47impl Grammar {
48 fn for_lang(raw: &str) -> Option<&'static Self> {
49 let lang = raw.trim().to_ascii_lowercase();
50 let key = lang
51 .split(|c: char| c == ',' || c.is_whitespace())
52 .next()
53 .unwrap_or("");
54 match key {
55 "rust" | "rs" => Some(&RUST),
56 "python" | "py" => Some(&PYTHON),
57 "js" | "javascript" | "jsx" | "ts" | "typescript" | "tsx" => Some(&JS),
58 "go" => Some(&GO),
59 "bash" | "sh" | "shell" | "zsh" => Some(&BASH),
60 "json" => Some(&JSON),
61 "yaml" | "yml" => Some(&YAML),
62 "toml" | "ini" => Some(&TOML),
63 "html" | "xml" | "svg" => Some(&HTML),
64 "c" | "h" => Some(&C),
65 "cpp" | "c++" | "hpp" | "cxx" | "hxx" => Some(&CPP),
66 "java" => Some(&JAVA),
67 _ => None,
68 }
69 }
70}
71
72static RUST: Grammar = Grammar {
73 keywords: &[
74 "as", "async", "await", "break", "const", "continue", "crate", "dyn", "else", "enum",
75 "extern", "false", "fn", "for", "if", "impl", "in", "let", "loop", "match", "mod", "move",
76 "mut", "pub", "ref", "return", "self", "Self", "static", "struct", "super", "trait",
77 "true", "type", "union", "unsafe", "use", "where", "while", "yield",
78 ],
79 types: &[
80 "bool", "char", "f32", "f64", "i8", "i16", "i32", "i64", "i128", "isize", "u8", "u16",
81 "u32", "u64", "u128", "usize", "str", "String", "Vec", "Option", "Result", "Box", "Rc",
82 "Arc", "HashMap", "BTreeMap",
83 ],
84 line_comments: &["//"],
85 strings: &['"'],
86 case_sensitive: true,
87 fn_call_highlight: true,
88};
89
90static PYTHON: Grammar = Grammar {
91 keywords: &[
92 "False", "None", "True", "and", "as", "assert", "async", "await", "break", "class",
93 "continue", "def", "del", "elif", "else", "except", "finally", "for", "from", "global",
94 "if", "import", "in", "is", "lambda", "nonlocal", "not", "or", "pass", "raise", "return",
95 "try", "while", "with", "yield", "match", "case",
96 ],
97 types: &[
98 "int", "float", "str", "bool", "list", "dict", "tuple", "set", "bytes",
99 ],
100 line_comments: &["#"],
101 strings: &['"', '\''],
102 case_sensitive: true,
103 fn_call_highlight: true,
104};
105
106static JS: Grammar = Grammar {
107 keywords: &[
108 "async",
109 "await",
110 "break",
111 "case",
112 "catch",
113 "class",
114 "const",
115 "continue",
116 "debugger",
117 "default",
118 "delete",
119 "do",
120 "else",
121 "enum",
122 "export",
123 "extends",
124 "false",
125 "finally",
126 "for",
127 "function",
128 "if",
129 "import",
130 "in",
131 "instanceof",
132 "interface",
133 "let",
134 "new",
135 "null",
136 "of",
137 "return",
138 "static",
139 "super",
140 "switch",
141 "this",
142 "throw",
143 "true",
144 "try",
145 "type",
146 "typeof",
147 "undefined",
148 "var",
149 "void",
150 "while",
151 "with",
152 "yield",
153 ],
154 types: &[
155 "boolean", "number", "string", "object", "symbol", "bigint", "any", "unknown", "never",
156 "void",
157 ],
158 line_comments: &["//"],
159 strings: &['"', '\'', '`'],
160 case_sensitive: true,
161 fn_call_highlight: true,
162};
163
164static GO: Grammar = Grammar {
165 keywords: &[
166 "break",
167 "case",
168 "chan",
169 "const",
170 "continue",
171 "default",
172 "defer",
173 "else",
174 "fallthrough",
175 "for",
176 "func",
177 "go",
178 "goto",
179 "if",
180 "import",
181 "interface",
182 "map",
183 "package",
184 "range",
185 "return",
186 "select",
187 "struct",
188 "switch",
189 "type",
190 "var",
191 "true",
192 "false",
193 "nil",
194 ],
195 types: &[
196 "bool",
197 "byte",
198 "rune",
199 "string",
200 "error",
201 "int",
202 "int8",
203 "int16",
204 "int32",
205 "int64",
206 "uint",
207 "uint8",
208 "uint16",
209 "uint32",
210 "uint64",
211 "uintptr",
212 "float32",
213 "float64",
214 "complex64",
215 "complex128",
216 "any",
217 ],
218 line_comments: &["//"],
219 strings: &['"', '`'],
220 case_sensitive: true,
221 fn_call_highlight: true,
222};
223
224static BASH: Grammar = Grammar {
225 keywords: &[
226 "if", "then", "else", "elif", "fi", "case", "esac", "for", "select", "while", "until",
227 "do", "done", "function", "in", "time", "return", "break", "continue", "export", "local",
228 "readonly", "source", "alias", "unset", "trap",
229 ],
230 types: &[],
231 line_comments: &["#"],
232 strings: &['"', '\''],
233 case_sensitive: true,
234 fn_call_highlight: false,
235};
236
237static JSON: Grammar = Grammar {
238 keywords: &["true", "false", "null"],
239 types: &[],
240 line_comments: &[],
241 strings: &['"'],
242 case_sensitive: true,
243 fn_call_highlight: false,
244};
245
246static YAML: Grammar = Grammar {
247 keywords: &["true", "false", "null", "yes", "no", "on", "off"],
248 types: &[],
249 line_comments: &["#"],
250 strings: &['"', '\''],
251 case_sensitive: false,
252 fn_call_highlight: false,
253};
254
255static TOML: Grammar = Grammar {
256 keywords: &["true", "false"],
257 types: &[],
258 line_comments: &["#"],
259 strings: &['"', '\''],
260 case_sensitive: true,
261 fn_call_highlight: false,
262};
263
264static HTML: Grammar = Grammar {
265 keywords: &[],
266 types: &[],
267 line_comments: &[],
268 strings: &['"', '\''],
269 case_sensitive: false,
270 fn_call_highlight: false,
271};
272
273static C: Grammar = Grammar {
274 keywords: &[
275 "auto", "break", "case", "char", "const", "continue", "default", "do", "double", "else",
276 "enum", "extern", "float", "for", "goto", "if", "inline", "int", "long", "register",
277 "restrict", "return", "short", "signed", "sizeof", "static", "struct", "switch", "typedef",
278 "union", "unsigned", "void", "volatile", "while",
279 ],
280 types: &[
281 "int8_t",
282 "int16_t",
283 "int32_t",
284 "int64_t",
285 "uint8_t",
286 "uint16_t",
287 "uint32_t",
288 "uint64_t",
289 "size_t",
290 "ssize_t",
291 "ptrdiff_t",
292 "intptr_t",
293 "uintptr_t",
294 "bool",
295 "FILE",
296 ],
297 line_comments: &["//"],
298 strings: &['"', '\''],
299 case_sensitive: true,
300 fn_call_highlight: true,
301};
302
303static CPP: Grammar = Grammar {
304 keywords: &[
305 "alignas",
306 "alignof",
307 "and",
308 "auto",
309 "bool",
310 "break",
311 "case",
312 "catch",
313 "char",
314 "class",
315 "const",
316 "constexpr",
317 "continue",
318 "decltype",
319 "default",
320 "delete",
321 "do",
322 "double",
323 "else",
324 "enum",
325 "explicit",
326 "export",
327 "extern",
328 "false",
329 "final",
330 "float",
331 "for",
332 "friend",
333 "goto",
334 "if",
335 "inline",
336 "int",
337 "long",
338 "mutable",
339 "namespace",
340 "new",
341 "noexcept",
342 "not",
343 "nullptr",
344 "operator",
345 "override",
346 "private",
347 "protected",
348 "public",
349 "register",
350 "return",
351 "short",
352 "signed",
353 "sizeof",
354 "static",
355 "struct",
356 "switch",
357 "template",
358 "this",
359 "throw",
360 "true",
361 "try",
362 "typedef",
363 "typeid",
364 "typename",
365 "union",
366 "unsigned",
367 "using",
368 "virtual",
369 "void",
370 "volatile",
371 "while",
372 ],
373 types: &[
374 "int8_t",
375 "int16_t",
376 "int32_t",
377 "int64_t",
378 "uint8_t",
379 "uint16_t",
380 "uint32_t",
381 "uint64_t",
382 "size_t",
383 "string",
384 "vector",
385 "map",
386 "unordered_map",
387 "set",
388 ],
389 line_comments: &["//"],
390 strings: &['"', '\''],
391 case_sensitive: true,
392 fn_call_highlight: true,
393};
394
395static JAVA: Grammar = Grammar {
396 keywords: &[
397 "abstract",
398 "assert",
399 "boolean",
400 "break",
401 "byte",
402 "case",
403 "catch",
404 "char",
405 "class",
406 "const",
407 "continue",
408 "default",
409 "do",
410 "double",
411 "else",
412 "enum",
413 "extends",
414 "final",
415 "finally",
416 "float",
417 "for",
418 "goto",
419 "if",
420 "implements",
421 "import",
422 "instanceof",
423 "int",
424 "interface",
425 "long",
426 "native",
427 "new",
428 "null",
429 "package",
430 "private",
431 "protected",
432 "public",
433 "return",
434 "short",
435 "static",
436 "strictfp",
437 "super",
438 "switch",
439 "synchronized",
440 "this",
441 "throw",
442 "throws",
443 "transient",
444 "true",
445 "false",
446 "try",
447 "void",
448 "volatile",
449 "while",
450 "var",
451 ],
452 types: &[
453 "String", "Integer", "Long", "Double", "Float", "Boolean", "List", "Map", "Set", "Object",
454 ],
455 line_comments: &["//"],
456 strings: &['"', '\''],
457 case_sensitive: true,
458 fn_call_highlight: true,
459};
460
461fn char_at(line: &str, i: usize) -> Option<char> {
463 line[i..].chars().next()
464}
465
466fn scan(line: &str, g: &'static Grammar, theme: Theme) -> Vec<Span<'static>> {
467 let mut spans: Vec<Span<'static>> = Vec::new();
468 let base = theme.code_style();
469 let mut i = 0usize;
470
471 for prefix in g.line_comments {
473 let trimmed = line.trim_start();
474 if trimmed.starts_with(prefix) {
475 let indent_len = line.len() - trimmed.len();
476 if indent_len > 0 {
477 spans.push(Span::styled(line[..indent_len].to_string(), base));
478 }
479 spans.push(Span::styled(
480 line[indent_len..].to_string(),
481 theme.comment_style(),
482 ));
483 return spans;
484 }
485 }
486
487 while i < line.len() {
488 let mut matched_comment = false;
490 for prefix in g.line_comments {
491 if line[i..].starts_with(prefix) {
492 spans.push(Span::styled(line[i..].to_string(), theme.comment_style()));
493 i = line.len();
494 matched_comment = true;
495 break;
496 }
497 }
498 if matched_comment {
499 break;
500 }
501
502 let Some(ch) = char_at(line, i) else { break };
505 let ch_len = ch.len_utf8();
506
507 if g.strings.contains(&ch) {
509 let (span, end) = read_string(line, i, ch, theme);
510 spans.push(span);
511 i = end;
512 continue;
513 }
514
515 if ch.is_ascii_digit()
517 || (ch == '.' && char_at(line, i + 1).is_some_and(|c| c.is_ascii_digit()))
518 {
519 let (span, end) = read_number(line, i, theme);
520 spans.push(span);
521 i = end;
522 continue;
523 }
524
525 if is_ident_start(ch) {
527 let start = i;
528 let mut j = i;
529 while let Some(c) = char_at(line, j) {
530 if !is_ident_continue(c) {
531 break;
532 }
533 j += c.len_utf8();
534 }
535 let word = &line[start..j];
536 let style = classify_word(word, g, theme);
537 let final_style =
538 if g.fn_call_highlight && style == base && char_at(line, j) == Some('(') {
539 theme.fn_style()
540 } else {
541 style
542 };
543 spans.push(Span::styled(word.to_string(), final_style));
544 i = j;
545 continue;
546 }
547
548 let start = i;
550 let mut j = i;
551 while let Some(c) = char_at(line, j) {
552 if g.strings.contains(&c)
553 || is_ident_start(c)
554 || c.is_ascii_digit()
555 || g.line_comments.iter().any(|p| line[j..].starts_with(*p))
556 {
557 break;
558 }
559 j += c.len_utf8();
560 }
561 if j == start {
562 j = start + ch_len;
564 }
565 spans.push(Span::styled(line[start..j].to_string(), base));
566 i = j;
567 }
568
569 if spans.is_empty() {
570 spans.push(Span::styled(String::new(), base));
571 }
572 spans
573}
574
575fn read_string(line: &str, start: usize, delim: char, theme: Theme) -> (Span<'static>, usize) {
576 let mut i = start + delim.len_utf8();
577 let mut escape = false;
578 while let Some(c) = char_at(line, i) {
579 let cl = c.len_utf8();
580 if escape {
581 escape = false;
582 i += cl;
583 continue;
584 }
585 if c == '\\' {
586 escape = true;
587 i += cl;
588 continue;
589 }
590 if c == delim {
591 i += cl;
592 return (
593 Span::styled(line[start..i].to_string(), theme.string_style()),
594 i,
595 );
596 }
597 i += cl;
598 }
599 (
601 Span::styled(line[start..].to_string(), theme.string_style()),
602 line.len(),
603 )
604}
605
606fn read_number(line: &str, start: usize, theme: Theme) -> (Span<'static>, usize) {
607 let mut i = start;
608 let mut saw_dot = false;
609 let mut saw_e = false;
610 if line[i..].starts_with("0x") || line[i..].starts_with("0X") {
612 i += 2;
613 while let Some(c) = char_at(line, i) {
614 if c.is_ascii_hexdigit() || c == '_' {
615 i += c.len_utf8();
616 } else {
617 break;
618 }
619 }
620 } else if line[i..].starts_with("0b") || line[i..].starts_with("0B") {
621 i += 2;
622 while let Some(c) = char_at(line, i) {
623 if matches!(c, '0' | '1' | '_') {
624 i += c.len_utf8();
625 } else {
626 break;
627 }
628 }
629 } else {
630 while let Some(c) = char_at(line, i) {
631 if c.is_ascii_digit() || c == '_' {
632 i += c.len_utf8();
633 } else if c == '.' && !saw_dot && !saw_e {
634 saw_dot = true;
635 i += 1;
636 } else if (c == 'e' || c == 'E') && !saw_e {
637 saw_e = true;
638 i += 1;
639 if matches!(char_at(line, i), Some('+' | '-')) {
640 i += 1;
641 }
642 } else {
643 break;
644 }
645 }
646 }
647 while let Some(c) = char_at(line, i) {
649 if is_ident_continue(c) {
650 i += c.len_utf8();
651 } else {
652 break;
653 }
654 }
655 (
656 Span::styled(line[start..i].to_string(), theme.number_style()),
657 i,
658 )
659}
660
661fn is_ident_start(c: char) -> bool {
662 c == '_' || c.is_ascii_alphabetic()
663}
664
665fn is_ident_continue(c: char) -> bool {
666 c == '_' || c.is_ascii_alphanumeric()
667}
668
669fn classify_word(word: &str, g: &'static Grammar, theme: Theme) -> Style {
670 let cmp: Box<dyn Fn(&&&str) -> bool> = if g.case_sensitive {
671 Box::new(|k: &&&str| **k == word)
672 } else {
673 let lw = word.to_ascii_lowercase();
674 Box::new(move |k: &&&str| k.eq_ignore_ascii_case(&lw))
675 };
676 if g.keywords.iter().any(|k| cmp(&k)) {
677 return theme.keyword_style();
678 }
679 if g.types.iter().any(|k| cmp(&k)) {
680 return theme.type_style();
681 }
682 theme.code_style()
683}
684
685#[cfg(test)]
686mod tests {
687 use super::*;
688 use crate::theme::{Theme, ThemeName};
689
690 fn plain() -> Theme {
691 Theme::resolve(ThemeName::Plain)
692 }
693
694 #[test]
695 fn unknown_lang_passthrough() {
696 let spans = highlight_line("hello world", "klingon", plain());
697 assert_eq!(spans.len(), 1);
698 assert_eq!(spans[0].content, "hello world");
699 }
700
701 #[test]
702 fn rust_keyword_and_string() {
703 let spans = highlight_line(r#"let x = "hi";"#, "rust", plain());
704 let joined: String = spans.iter().map(|s| s.content.as_ref()).collect();
705 assert_eq!(joined, r#"let x = "hi";"#);
706 assert!(spans.iter().any(|s| s.content.as_ref() == "let"));
707 assert!(spans.iter().any(|s| s.content.as_ref() == r#""hi""#));
708 }
709
710 #[test]
711 fn python_comment() {
712 let spans = highlight_line("x = 1 # comment", "python", plain());
713 let joined: String = spans.iter().map(|s| s.content.as_ref()).collect();
714 assert_eq!(joined, "x = 1 # comment");
715 }
716
717 #[test]
718 fn hex_number() {
719 let spans = highlight_line("let n = 0xFF;", "rust", plain());
720 let joined: String = spans.iter().map(|s| s.content.as_ref()).collect();
721 assert_eq!(joined, "let n = 0xFF;");
722 assert!(spans.iter().any(|s| s.content.as_ref() == "0xFF"));
723 }
724
725 #[test]
726 fn unterminated_string_does_not_panic() {
727 let spans = highlight_line(r#"let s = "oops"#, "rust", plain());
728 let joined: String = spans.iter().map(|s| s.content.as_ref()).collect();
729 assert_eq!(joined, r#"let s = "oops"#);
730 }
731
732 #[test]
733 fn fn_call_highlighted() {
734 let spans = highlight_line("println!(foo())", "rust", plain());
735 let joined: String = spans.iter().map(|s| s.content.as_ref()).collect();
736 assert_eq!(joined, "println!(foo())");
737 }
738
739 #[test]
740 fn handles_multibyte_chars_without_panicking() {
741 let line = "if let Err(e) = auth::validate_token(token, &state.conf\u{2026}";
743 let spans = highlight_line(line, "rust", plain());
744 let joined: String = spans.iter().map(|s| s.content.as_ref()).collect();
745 assert_eq!(joined, line);
746 }
747
748 #[test]
749 fn handles_cjk_and_emoji_in_comments() {
750 let line = "let x = 1; // 日本語 🎉 comment";
751 let spans = highlight_line(line, "rust", plain());
752 let joined: String = spans.iter().map(|s| s.content.as_ref()).collect();
753 assert_eq!(joined, line);
754 }
755
756 #[test]
757 fn handles_multibyte_in_string_literal() {
758 let line = r#"let s = "héllo — world";"#;
759 let spans = highlight_line(line, "rust", plain());
760 let joined: String = spans.iter().map(|s| s.content.as_ref()).collect();
761 assert_eq!(joined, line);
762 }
763}