Skip to main content

oxihuman_core/
syntax_highlighter.rs

1// Copyright (C) 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3#![allow(dead_code)]
4
5//! Syntax highlight token classifier stub.
6//!
7//! Given a stream of tokens (string slices) and a language hint, assigns
8//! a `HighlightKind` to each token for downstream rendering.
9
10/// A syntax highlighting category.
11#[derive(Debug, Clone, PartialEq)]
12pub enum HighlightKind {
13    Keyword,
14    Identifier,
15    Literal,
16    StringLit,
17    Comment,
18    Punctuation,
19    Operator,
20    Number,
21    Whitespace,
22    Unknown,
23}
24
25/// A token with an associated highlight kind.
26#[derive(Debug, Clone)]
27pub struct HighlightToken {
28    pub text: String,
29    pub kind: HighlightKind,
30}
31
32/// Supported language modes for highlighting.
33#[derive(Debug, Clone, PartialEq)]
34pub enum Language {
35    Rust,
36    Python,
37    Json,
38    Plain,
39}
40
41/// Configuration for the syntax highlighter.
42#[derive(Debug, Clone)]
43pub struct HighlighterConfig {
44    pub language: Language,
45    pub case_sensitive_keywords: bool,
46}
47
48impl Default for HighlighterConfig {
49    fn default() -> Self {
50        Self {
51            language: Language::Plain,
52            case_sensitive_keywords: true,
53        }
54    }
55}
56
57static RUST_KEYWORDS: &[&str] = &[
58    "fn", "let", "mut", "pub", "use", "mod", "struct", "enum", "impl", "trait", "if", "else",
59    "match", "return", "for", "while", "loop", "in", "as", "where", "type", "const", "static",
60    "self", "Self", "super", "crate", "async", "await", "move",
61];
62
63static PYTHON_KEYWORDS: &[&str] = &[
64    "def", "class", "import", "from", "return", "if", "elif", "else", "for", "while", "in", "not",
65    "and", "or", "with", "as", "pass", "break", "continue", "try", "except", "finally", "lambda",
66    "yield", "None", "True", "False",
67];
68
69/// Classify a single token string given the language config.
70pub fn classify_token(token: &str, cfg: &HighlighterConfig) -> HighlightKind {
71    let keywords: &[&str] = match cfg.language {
72        Language::Rust => RUST_KEYWORDS,
73        Language::Python => PYTHON_KEYWORDS,
74        Language::Json | Language::Plain => &[],
75    };
76
77    if keywords.contains(&token) {
78        return HighlightKind::Keyword;
79    }
80    if token.starts_with("//") || token.starts_with('#') {
81        return HighlightKind::Comment;
82    }
83    if (token.starts_with('"') && token.ends_with('"'))
84        || (token.starts_with('\'') && token.ends_with('\''))
85    {
86        return HighlightKind::StringLit;
87    }
88    if token
89        .chars()
90        .all(|c| c.is_ascii_digit() || c == '.' || c == '_')
91        && !token.is_empty()
92    {
93        return HighlightKind::Number;
94    }
95    if token.chars().all(char::is_whitespace) && !token.is_empty() {
96        return HighlightKind::Whitespace;
97    }
98    if token.chars().all(|c| c.is_alphanumeric() || c == '_') && !token.is_empty() {
99        return HighlightKind::Identifier;
100    }
101    if token.len() == 1 && "{}[]();,.<>".contains(token) {
102        return HighlightKind::Punctuation;
103    }
104    if token.len() == 1 && "+-*/=!&|^~%".contains(token) {
105        return HighlightKind::Operator;
106    }
107    HighlightKind::Unknown
108}
109
110/// Highlight a list of tokens, returning a `HighlightToken` per entry.
111pub fn highlight_tokens(tokens: &[&str], cfg: &HighlighterConfig) -> Vec<HighlightToken> {
112    tokens
113        .iter()
114        .map(|&t| HighlightToken {
115            text: t.to_string(),
116            kind: classify_token(t, cfg),
117        })
118        .collect()
119}
120
121/// Count tokens of a given kind in the result.
122pub fn count_kind(tokens: &[HighlightToken], kind: &HighlightKind) -> usize {
123    tokens.iter().filter(|t| &t.kind == kind).count()
124}
125
126/// Return a simple ANSI-colored representation (stub — only marks keywords).
127pub fn to_ansi_string(tokens: &[HighlightToken]) -> String {
128    let mut out = String::new();
129    for t in tokens {
130        if t.kind == HighlightKind::Keyword {
131            out.push_str("\x1b[1;34m");
132            out.push_str(&t.text);
133            out.push_str("\x1b[0m");
134        } else {
135            out.push_str(&t.text);
136        }
137    }
138    out
139}
140
141#[cfg(test)]
142mod tests {
143    use super::*;
144
145    fn rust_cfg() -> HighlighterConfig {
146        HighlighterConfig {
147            language: Language::Rust,
148            case_sensitive_keywords: true,
149        }
150    }
151
152    #[test]
153    fn test_keyword_classified() {
154        assert_eq!(classify_token("fn", &rust_cfg()), HighlightKind::Keyword);
155    }
156
157    #[test]
158    fn test_identifier_classified() {
159        assert_eq!(
160            classify_token("my_var", &rust_cfg()),
161            HighlightKind::Identifier
162        );
163    }
164
165    #[test]
166    fn test_number_classified() {
167        assert_eq!(classify_token("42", &rust_cfg()), HighlightKind::Number);
168    }
169
170    #[test]
171    fn test_comment_classified() {
172        assert_eq!(
173            classify_token("// a comment", &rust_cfg()),
174            HighlightKind::Comment
175        );
176    }
177
178    #[test]
179    fn test_string_classified() {
180        assert_eq!(
181            classify_token("\"hello\"", &rust_cfg()),
182            HighlightKind::StringLit
183        );
184    }
185
186    #[test]
187    fn test_highlight_tokens_count() {
188        let tokens = ["fn", "main", "(", ")"];
189        let cfg = rust_cfg();
190        let ht = highlight_tokens(&tokens, &cfg);
191        assert_eq!(ht.len(), 4);
192    }
193
194    #[test]
195    fn test_count_kind() {
196        let tokens = ["fn", "let", "x"];
197        let cfg = rust_cfg();
198        let ht = highlight_tokens(&tokens, &cfg);
199        assert_eq!(count_kind(&ht, &HighlightKind::Keyword), 2);
200    }
201
202    #[test]
203    fn test_ansi_string_contains_escape() {
204        let tokens = ["fn"];
205        let cfg = rust_cfg();
206        let ht = highlight_tokens(&tokens, &cfg);
207        let s = to_ansi_string(&ht);
208        assert!(s.contains("\x1b["));
209    }
210
211    #[test]
212    fn test_python_keyword() {
213        let cfg = HighlighterConfig {
214            language: Language::Python,
215            case_sensitive_keywords: true,
216        };
217        assert_eq!(classify_token("def", &cfg), HighlightKind::Keyword);
218    }
219}