Skip to main content

rag_rat_core/
language.rs

1use std::{fmt, str::FromStr};
2
3use serde::{Deserialize, Serialize};
4use thiserror::Error;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
7#[serde(rename_all = "lowercase")]
8pub enum Language {
9    Rust,
10    TypeScript,
11    Kotlin,
12    C,
13    Cpp,
14    Markdown,
15}
16
17impl Language {
18    pub const ALL: [Self; 6] =
19        [Self::Rust, Self::TypeScript, Self::Kotlin, Self::C, Self::Cpp, Self::Markdown];
20
21    pub fn all() -> &'static [Self] {
22        &Self::ALL
23    }
24
25    pub fn as_str(self) -> &'static str {
26        match self {
27            Self::Rust => "rust",
28            Self::TypeScript => "typescript",
29            Self::Kotlin => "kotlin",
30            Self::C => "c",
31            Self::Cpp => "cpp",
32            Self::Markdown => "markdown",
33        }
34    }
35
36    pub fn simple_extensions(self) -> &'static [&'static str] {
37        match self {
38            Self::Rust => &["rs"],
39            Self::TypeScript => &["ts", "tsx"],
40            Self::Kotlin => &["kt", "kts"],
41            Self::C => &["c", "h"],
42            Self::Cpp => &["cc", "cpp", "cxx", "c++", "hh", "hpp", "hxx", "h++"],
43            Self::Markdown => &["md", "markdown"],
44        }
45    }
46
47    pub fn supports_embeddings(self) -> bool {
48        matches!(
49            self,
50            Self::Rust | Self::TypeScript | Self::Kotlin | Self::C | Self::Cpp | Self::Markdown
51        )
52    }
53
54    pub fn from_path(path: &std::path::Path) -> Option<Self> {
55        let ext = path.extension()?.to_str()?;
56        Self::all().iter().copied().find(|language| language.simple_extensions().contains(&ext))
57    }
58}
59
60impl fmt::Display for Language {
61    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
62        f.write_str(self.as_str())
63    }
64}
65
66impl FromStr for Language {
67    type Err = LanguageError;
68
69    fn from_str(value: &str) -> Result<Self, Self::Err> {
70        match value.trim().to_ascii_lowercase().as_str() {
71            "rust" | "rs" => Ok(Self::Rust),
72            "typescript" | "ts" | "tsx" => Ok(Self::TypeScript),
73            "kotlin" | "kt" => Ok(Self::Kotlin),
74            "c" => Ok(Self::C),
75            "cpp" | "c++" | "cc" | "cxx" => Ok(Self::Cpp),
76            "markdown" | "md" => Ok(Self::Markdown),
77            other => Err(LanguageError::Unknown(other.to_string())),
78        }
79    }
80}
81
82#[derive(Debug, Error)]
83pub enum LanguageError {
84    #[error("unknown language `{0}`")]
85    Unknown(String),
86}