rustik-highlight 0.1.0

Rustik code highlighter.
Documentation
//! Raw TextMate JSON data structures.
//!
//! These types mirror the loose JSON shapes accepted by TextMate and VS Code
//! grammar/theme files before the crate compiles them into stricter runtime
//! data. Keeping this layer separate lets parsing tolerate source-file quirks
//! without leaking those shapes into tokenization and styling code.

use std::collections::BTreeMap;
use std::str::FromStr;

use serde::{Deserialize, json as serde_json};

use crate::Error;

/// A parsed, uncompiled TextMate grammar.
#[derive(Clone, Debug, Deserialize)]
pub struct RawGrammar {
    /// Human-readable display name.
    #[serde(rename = "displayName")]
    pub display_name: Option<String>,
    /// Human-readable grammar name.
    pub name: String,
    /// Root scope name.
    #[serde(rename = "scopeName")]
    pub scope_name: String,
    /// File extensions and special file names.
    #[serde(rename = "fileTypes")]
    pub file_types: Option<Vec<String>>,
    /// First-line match expressions, as either a string or string array.
    #[serde(rename = "firstLineMatch")]
    pub first_line_match: Option<serde_json::Value>,
    /// Root patterns.
    pub patterns: Vec<RawPattern>,
    /// Named pattern repository.
    pub repository: Option<BTreeMap<String, RawPattern>>,
}

/// A parsed, uncompiled TextMate pattern.
#[derive(Clone, Debug, Default, Deserialize)]
pub struct RawPattern {
    /// Scope assigned to the match.
    pub name: Option<String>,
    /// Single-line match expression.
    #[serde(rename = "match")]
    pub match_rule: Option<String>,
    /// Begin expression for a stateful rule.
    pub begin: Option<String>,
    /// End expression for a stateful rule.
    pub end: Option<String>,
    /// Nested patterns.
    pub patterns: Option<Vec<RawPattern>>,
    /// Include target such as `$self`, `$base`, or `#name`.
    pub include: Option<String>,
    /// Match captures.
    pub captures: Option<BTreeMap<String, RawCapture>>,
    /// Begin captures.
    #[serde(rename = "beginCaptures")]
    pub begin_captures: Option<BTreeMap<String, RawCapture>>,
    /// End captures.
    #[serde(rename = "endCaptures")]
    pub end_captures: Option<BTreeMap<String, RawCapture>>,
}

/// A parsed, uncompiled TextMate capture.
#[derive(Clone, Debug, Deserialize)]
pub struct RawCapture {
    /// Scope assigned to the captured group.
    pub name: Option<String>,
}

/// A parsed, uncompiled TextMate theme.
#[derive(Clone, Debug, Deserialize)]
pub struct RawTheme {
    /// Theme name.
    pub name: String,
    /// TextMate theme settings.
    pub settings: Option<Vec<RawThemeRule>>,
    /// VS Code token color rules.
    #[serde(rename = "tokenColors")]
    pub token_colors: Option<Vec<RawThemeRule>>,
}

/// One parsed TextMate theme rule.
#[derive(Clone, Debug, Deserialize)]
pub struct RawThemeRule {
    /// Optional selector string or selector array.
    pub scope: Option<serde_json::Value>,
    /// Style settings.
    pub settings: RawStyle,
}

/// Parsed TextMate style settings.
#[derive(Clone, Debug, Default, Deserialize)]
pub struct RawStyle {
    /// Foreground color as `#rrggbb`.
    pub foreground: Option<String>,
    /// Space-separated text style flags.
    #[serde(rename = "fontStyle")]
    pub font_style: Option<String>,
}

impl RawGrammar {
    /// Parses a raw grammar from JSON.
    pub fn parse(input: &str) -> Result<Self, Error> {
        input.parse()
    }
}

impl FromStr for RawGrammar {
    type Err = Error;

    fn from_str(input: &str) -> Result<Self, Self::Err> {
        serde_json::from_str(input).map_err(|_| Error::InvalidGrammar)
    }
}

impl RawTheme {
    /// Parses a raw theme from JSON.
    pub fn parse(input: &str) -> Result<Self, Error> {
        input.parse()
    }
}

impl FromStr for RawTheme {
    type Err = Error;

    fn from_str(input: &str) -> Result<Self, Self::Err> {
        serde_json::from_str(input).map_err(|_| Error::InvalidTheme)
    }
}

impl RawThemeRule {
    /// Returns normalized selector strings from a TextMate `scope` value.
    pub(crate) fn scope_selectors(&self) -> Vec<String> {
        let Some(scope) = self.scope.as_ref() else {
            return Vec::new();
        };
        let mut selectors = Vec::new();
        match scope {
            serde_json::Value::String(scope) => push_selectors(scope, &mut selectors),
            serde_json::Value::Array(scopes) => {
                for scope in scopes {
                    if let serde_json::Value::String(scope) = scope {
                        push_selectors(scope, &mut selectors);
                    }
                }
            }
            _ => {}
        }
        selectors
    }
}

/// Normalizes `firstLineMatch` from TextMate's string-or-array shape.
pub(crate) fn first_line_patterns(value: Option<&serde_json::Value>) -> Vec<String> {
    match value {
        Some(serde_json::Value::String(pattern)) => vec![pattern.clone()],
        Some(serde_json::Value::Array(values)) => values
            .iter()
            .filter_map(|value| {
                if let serde_json::Value::String(pattern) = value {
                    Some(pattern.clone())
                } else {
                    None
                }
            })
            .collect(),
        _ => Vec::new(),
    }
}

/// Splits a comma-separated TextMate selector list.
fn push_selectors(input: &str, selectors: &mut Vec<String>) {
    selectors.extend(
        input
            .split(',')
            .map(str::trim)
            .filter(|selector| !selector.is_empty())
            .map(str::to_owned),
    );
}