use std::fmt;
use std::path::{Component, Path, PathBuf};
use crate::error::{ClaudixError, RecoveryHint, Result};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
pub struct ChunkId(pub u64);
impl fmt::Display for ChunkId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
pub struct FileHash(pub [u8; 16]);
impl fmt::Display for FileHash {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for b in &self.0 {
write!(f, "{b:02x}")?;
}
Ok(())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
pub struct Dimension(pub u16);
#[derive(
Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, serde::Serialize, serde::Deserialize,
)]
pub struct RelativePath(String);
pub(crate) fn path_prefix_matches(path: &str, prefix: &str) -> bool {
let prefix = prefix.trim_end_matches('/');
let Some(rest) = path.strip_prefix(prefix) else {
return false;
};
rest.is_empty()
|| rest.starts_with('/')
|| rest
.strip_prefix('.')
.is_some_and(|extension| !extension.contains(['/', '.']))
}
pub(crate) fn reject_path_escape(path: &Path, recovery: &'static str) -> Result<()> {
if path.is_absolute() {
return Err(ClaudixError::PathTraversal {
path: path.to_path_buf(),
recovery: RecoveryHint(recovery),
});
}
for component in path.components() {
if matches!(
component,
Component::ParentDir | Component::RootDir | Component::Prefix(_)
) {
return Err(ClaudixError::PathTraversal {
path: path.to_path_buf(),
recovery: RecoveryHint(recovery),
});
}
}
Ok(())
}
impl RelativePath {
pub(crate) fn reject_escape(&self, recovery: &'static str) -> Result<()> {
reject_path_escape(&self.to_path_buf(), recovery)
}
pub fn new(s: impl Into<String>) -> Self {
let raw = s.into();
let normalized = raw.replace('\\', "/");
Self(normalized)
}
pub fn from_path(path: &Path) -> Self {
Self::new(path.to_string_lossy().as_ref())
}
pub fn as_str(&self) -> &str {
&self.0
}
pub fn starts_with(&self, prefix: &RelativePath) -> bool {
path_prefix_matches(&self.0, prefix.as_str())
}
pub fn to_path_buf(&self) -> PathBuf {
PathBuf::from(&self.0)
}
}
impl fmt::Display for RelativePath {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct ByteRange {
pub start: u32,
pub end: u32,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct LineRange {
pub start: u32,
pub end: u32,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Language {
Rust,
Python,
JavaScript,
TypeScript,
Go,
Java,
C,
Cpp,
CSharp,
Sql,
Unknown,
}
impl Language {
pub fn from_extension(ext: &str) -> Self {
match ext {
"rs" => Self::Rust,
"py" => Self::Python,
"js" | "mjs" | "cjs" => Self::JavaScript,
"ts" | "tsx" => Self::TypeScript,
"go" => Self::Go,
"java" => Self::Java,
"c" | "h" => Self::C,
"cpp" | "cc" | "cxx" | "hpp" | "hxx" => Self::Cpp,
"cs" => Self::CSharp,
"sql" => Self::Sql,
_ => Self::Unknown,
}
}
pub fn from_storage(value: &str) -> Self {
match value {
"rust" => Self::Rust,
"python" => Self::Python,
"javascript" => Self::JavaScript,
"typescript" => Self::TypeScript,
"go" => Self::Go,
"java" => Self::Java,
"c" => Self::C,
"cpp" => Self::Cpp,
"csharp" => Self::CSharp,
"sql" => Self::Sql,
_ => Self::Unknown,
}
}
pub fn from_filter_input(value: &str) -> Option<Self> {
match value.trim().to_ascii_lowercase().as_str() {
"rust" => Some(Self::Rust),
"python" => Some(Self::Python),
"javascript" | "js" => Some(Self::JavaScript),
"typescript" | "ts" => Some(Self::TypeScript),
"go" => Some(Self::Go),
"java" => Some(Self::Java),
"c" => Some(Self::C),
"cpp" | "c++" => Some(Self::Cpp),
"csharp" | "c#" | "cs" => Some(Self::CSharp),
"sql" => Some(Self::Sql),
"unknown" => Some(Self::Unknown),
_ => None,
}
}
pub fn as_str(self) -> &'static str {
match self {
Self::Rust => "rust",
Self::Python => "python",
Self::JavaScript => "javascript",
Self::TypeScript => "typescript",
Self::Go => "go",
Self::Java => "java",
Self::C => "c",
Self::Cpp => "cpp",
Self::CSharp => "csharp",
Self::Sql => "sql",
Self::Unknown => "unknown",
}
}
}
impl fmt::Display for Language {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ChunkKind {
Function,
Method,
Struct,
Class,
Enum,
Trait,
Interface,
Module,
Impl,
Macro,
Table,
View,
Trigger,
Other,
}
impl ChunkKind {
pub fn as_str(self) -> &'static str {
match self {
Self::Function => "function",
Self::Method => "method",
Self::Struct => "struct",
Self::Class => "class",
Self::Enum => "enum",
Self::Trait => "trait",
Self::Interface => "interface",
Self::Module => "module",
Self::Impl => "impl",
Self::Macro => "macro",
Self::Table => "table",
Self::View => "view",
Self::Trigger => "trigger",
Self::Other => "other",
}
}
pub fn from_storage(value: &str) -> Self {
match value {
"function" => Self::Function,
"method" => Self::Method,
"struct" => Self::Struct,
"class" => Self::Class,
"enum" => Self::Enum,
"trait" => Self::Trait,
"interface" => Self::Interface,
"module" => Self::Module,
"impl" => Self::Impl,
"macro" => Self::Macro,
"table" => Self::Table,
"view" => Self::View,
"trigger" => Self::Trigger,
_ => Self::Other,
}
}
}
impl fmt::Display for ChunkKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct Chunk {
pub id: ChunkId,
pub file_path: RelativePath,
pub language: Language,
pub kind: ChunkKind,
pub name: Option<String>,
pub line_range: LineRange,
pub byte_range: ByteRange,
pub file_hash: FileHash,
pub content: String,
}
#[derive(Debug, Clone)]
pub struct EmbeddedChunk {
pub chunk: Chunk,
pub vector: Vec<f32>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn relative_path_normalizes_windows_separators() {
let path = RelativePath::new(r"src\nested\file.rs");
assert_eq!(path.as_str(), "src/nested/file.rs");
assert_eq!(path.to_string(), "src/nested/file.rs");
}
#[test]
fn relative_path_prefix_match_uses_normalized_form() {
let path = RelativePath::new("src/nested/file.rs");
let prefix = RelativePath::new(r"src\nested");
assert!(path.starts_with(&prefix));
}
#[test]
fn path_prefix_matches_exact_directory_and_extension_boundaries() {
assert!(path_prefix_matches("src/math", "src/math"));
assert!(path_prefix_matches("src/math/add.rs", "src/math"));
assert!(path_prefix_matches("src/math.rs", "src/math"));
assert!(!path_prefix_matches("src/math_extra.rs", "src/math"));
assert!(!path_prefix_matches("src/math.rs.bak", "src/math"));
}
#[test]
fn file_hash_displays_as_lowercase_hex() {
let hash = FileHash([0xAB; 16]);
assert_eq!(hash.to_string(), "abababababababababababababababab");
}
#[test]
fn chunk_id_display_is_decimal() {
assert_eq!(ChunkId(42).to_string(), "42");
}
#[test]
fn language_detects_known_extensions() {
assert_eq!(Language::from_extension("rs"), Language::Rust);
assert_eq!(Language::from_extension("tsx"), Language::TypeScript);
assert_eq!(Language::from_extension("hpp"), Language::Cpp);
assert_eq!(Language::from_extension("cs"), Language::CSharp);
assert_eq!(Language::from_extension("sql"), Language::Sql);
assert_eq!(Language::from_extension("unknown"), Language::Unknown);
}
#[test]
fn csharp_and_sql_round_trip_through_storage_and_filters() {
for lang in [Language::CSharp, Language::Sql] {
assert_eq!(Language::from_storage(lang.as_str()), lang);
}
assert_eq!(Language::from_filter_input("c#"), Some(Language::CSharp));
assert_eq!(
Language::from_filter_input("csharp"),
Some(Language::CSharp)
);
assert_eq!(Language::from_filter_input("sql"), Some(Language::Sql));
for kind in [ChunkKind::Table, ChunkKind::View, ChunkKind::Trigger] {
assert_eq!(ChunkKind::from_storage(kind.as_str()), kind);
}
}
#[test]
fn language_display_matches_serialized_name() {
assert_eq!(Language::JavaScript.to_string(), "javascript");
assert_eq!(Language::Unknown.to_string(), "unknown");
}
#[test]
fn chunk_kind_display_matches_kind_name() {
assert_eq!(ChunkKind::Function.to_string(), "function");
assert_eq!(ChunkKind::Macro.to_string(), "macro");
assert_eq!(ChunkKind::Other.to_string(), "other");
}
#[test]
fn relative_path_starts_with_respects_segment_boundary() {
let prefix = RelativePath::new("src/math");
assert!(RelativePath::new("src/math.rs").starts_with(&prefix));
assert!(RelativePath::new("src/math/util.rs").starts_with(&prefix));
assert!(RelativePath::new("src/math").starts_with(&prefix));
assert!(!RelativePath::new("src/mathematics.rs").starts_with(&prefix));
assert!(!RelativePath::new("src/mathx").starts_with(&prefix));
let prefix_slash = RelativePath::new("src/math/");
assert!(RelativePath::new("src/math/util.rs").starts_with(&prefix_slash));
assert!(!RelativePath::new("src/mathematics.rs").starts_with(&prefix_slash));
}
}