Skip to main content

git_semantic/embedding/
error.rs

1use thiserror::Error;
2
3#[derive(Debug, Error)]
4pub enum EmbeddingError {
5    #[error("failed to determine application data directory")]
6    ProjectDirsNotFound,
7
8    #[error("model not found — run 'git-semantic init' first")]
9    ModelNotDownloaded,
10
11    #[error("model not initialized — call init() before encoding")]
12    ModelNotInitialized,
13
14    #[error("tokenization failed: {0}")]
15    Tokenization(String),
16
17    #[error("failed to download {filename}: {reason}")]
18    DownloadFailed { filename: String, reason: String },
19
20    #[error("missing content length for download of {0}")]
21    MissingContentLength(String),
22
23    #[error("ONNX runtime error: {}", sanitize_ort_error(.0))]
24    Ort(#[from] ort::Error),
25
26    #[error("file I/O error: {}", sanitize_io_error(.0))]
27    Io(#[from] std::io::Error),
28
29    #[error("network error: {}", sanitize_http_error(.0))]
30    Http(#[from] reqwest::Error),
31
32    #[error("tensor shape mismatch: {0}")]
33    Shape(#[from] ndarray::ShapeError),
34}
35
36impl EmbeddingError {
37    /// Returns a user-facing hint for how to resolve this error.
38    pub fn hint(&self) -> Option<&'static str> {
39        match self {
40            Self::ProjectDirsNotFound => {
41                Some("Ensure your system supports standard data directories (e.g. $HOME is set).")
42            }
43            Self::ModelNotDownloaded => Some("Run: git-semantic init"),
44            Self::ModelNotInitialized => Some(
45                "This is an internal error. Please report it at https://github.com/yanxue06/git-semantic-search/issues",
46            ),
47            Self::Tokenization(_) => {
48                Some("The input text may contain unsupported characters. Try a simpler query.")
49            }
50            Self::DownloadFailed { .. } => Some(
51                "Check your internet connection and try again. If behind a proxy, ensure HTTPS_PROXY is set.",
52            ),
53            Self::MissingContentLength(_) => {
54                Some("The model server returned an unexpected response. Try again later.")
55            }
56            Self::Http(_) => Some("Check your internet connection and firewall settings."),
57            Self::Ort(_) => Some("The ONNX model may be corrupted. Try: git-semantic init --force"),
58            Self::Io(_) => Some("Check file permissions and available disk space."),
59            Self::Shape(_) => {
60                Some("The model produced unexpected output. Try: git-semantic init --force")
61            }
62        }
63    }
64
65    /// Returns an error code for programmatic identification.
66    pub fn code(&self) -> &'static str {
67        match self {
68            Self::ProjectDirsNotFound => "E1001",
69            Self::ModelNotDownloaded => "E1002",
70            Self::ModelNotInitialized => "E1003",
71            Self::Tokenization(_) => "E1004",
72            Self::DownloadFailed { .. } => "E1005",
73            Self::MissingContentLength(_) => "E1006",
74            Self::Ort(_) => "E1007",
75            Self::Io(_) => "E1008",
76            Self::Http(_) => "E1009",
77            Self::Shape(_) => "E1010",
78        }
79    }
80}
81
82/// Sanitize ONNX runtime errors to avoid leaking internal file paths.
83fn sanitize_ort_error(err: &ort::Error) -> String {
84    sanitize_path_in_message(&err.to_string())
85}
86
87/// Sanitize I/O errors to avoid leaking full file system paths.
88fn sanitize_io_error(err: &std::io::Error) -> String {
89    sanitize_path_in_message(&err.to_string())
90}
91
92/// Sanitize HTTP errors to avoid leaking URLs with potential tokens.
93fn sanitize_http_error(err: &reqwest::Error) -> String {
94    let msg = err.to_string();
95    // Strip query parameters which might contain tokens
96    if let Some(idx) = msg.find('?') {
97        format!("{}[query params redacted]", &msg[..idx])
98    } else {
99        msg
100    }
101}
102
103/// Replace absolute paths in error messages with redacted versions
104/// to avoid leaking usernames or directory structure.
105fn sanitize_path_in_message(msg: &str) -> String {
106    // Redact home directory paths (Unix and macOS)
107    let sanitized = if let Some(home) = std::env::var_os("HOME") {
108        msg.replace(&home.to_string_lossy().to_string(), "~")
109    } else {
110        msg.to_string()
111    };
112
113    // Redact Windows-style user paths
114    regex_lite_replace_user_paths(&sanitized)
115}
116
117/// Simple replacement for Windows-style user paths (C:\Users\username\...)
118fn regex_lite_replace_user_paths(msg: &str) -> String {
119    // Look for C:\Users\<username>\ pattern and replace username
120    if let Some(idx) = msg.find(":\\Users\\")
121        && let Some(end_idx) = msg[idx + 8..].find('\\')
122    {
123        let before = &msg[..idx + 8];
124        let after = &msg[idx + 8 + end_idx..];
125        return format!("{}<user>{}", before, after);
126    }
127    msg.to_string()
128}