Skip to main content

ollama_file_find/
lib.rs

1use std::{
2    env, fs, io,
3    mem::take,
4    path::{Path, PathBuf},
5    time::SystemTime,
6};
7
8mod models;
9pub use models::{BlobPathInfo, LayerInfo, ListedModel};
10
11mod scan_args;
12pub use scan_args::ScanArgs;
13
14use crate::models::{ManifestData, ModelId};
15
16/// Library wide result type.
17pub type Result<T, E = Error> = std::result::Result<T, E>;
18
19/// Error enum describing all failure modes the library can encounter.
20#[derive(Debug, thiserror::Error)]
21pub enum Error {
22    #[error("Environment variable error: {0}")]
23    EnvVar(#[from] env::VarError),
24    #[error("Home directory not found")]
25    HomeDirNotFound,
26    #[error("IO error at {path}: {source}")]
27    Io { path: PathBuf, source: io::Error },
28    #[error("Walkdir error: {0}")]
29    WalkDir(#[from] walkdir::Error),
30    #[error("JSON parse error at {path}: {source}")]
31    Json {
32        path: PathBuf,
33        source: serde_json::Error,
34    },
35    #[error("Invalid path components for manifest under {0}")]
36    InvalidComponentPath(PathBuf),
37    #[error("Invalid components: {0:?}")]
38    InvalidComponents(Vec<String>),
39}
40
41/// Outcome of a scan: the successfully parsed models plus any errors that occurred.
42#[derive(Debug)]
43pub struct ScanOutcome {
44    pub models: Vec<ListedModel>,
45    pub errors: Vec<Error>,
46}
47
48/// Locate the models directory (`OLLAMA_MODELS` or fallback to $HOME/.ollama/models)
49#[must_use]
50pub fn ollama_models_dir() -> PathBuf {
51    if let Ok(p) = env::var("OLLAMA_MODELS")
52        && !p.is_empty()
53    {
54        return PathBuf::from(p);
55    }
56    // Fallback to home, but if not found just current directory relative path
57    let home = dirs::home_dir().unwrap_or_else(|| PathBuf::from("."));
58    home.join(".ollama").join("models")
59}
60
61/// Get the relative path components for a directory entry.
62fn relative_components(entry: &walkdir::DirEntry, root: &Path) -> Result<Vec<String>> {
63    if !entry.path().starts_with(root) {
64        return Err(Error::InvalidComponentPath(entry.path().to_path_buf()));
65    }
66    let rel = entry.path().strip_prefix(root).expect("Should be relative");
67    let comps: Vec<String> = rel
68        .iter()
69        .map(|c| c.to_string_lossy().to_string())
70        .collect();
71    if comps.is_empty() {
72        return Err(Error::InvalidComponentPath(entry.path().to_path_buf()));
73    }
74    Ok(comps)
75}
76
77/// Interpret path components as (host?, namespace, model, tag).
78fn parse_components(mut comps: Vec<String>, include_hidden: bool) -> Result<Option<ModelId>> {
79    // Accept either:
80    //   4 components: host / namespace / model / tag
81    //   3 components:          namespace / model / tag
82    match comps.len() {
83        3 | 4 => {}
84        _ => return Err(Error::InvalidComponents(comps)),
85    }
86
87    // Exclude any component starting with '.' unless explicitly allowed.
88    if !include_hidden && comps.iter().any(|c| c.starts_with('.')) {
89        return Ok(None);
90    }
91
92    // Destructure and clone only what we need.
93    let (host, namespace, model, tag) = match comps.as_mut_slice() {
94        [host, namespace, model, tag] => (
95            Some(take(host)),
96            Some(take(namespace)),
97            take(model),
98            take(tag),
99        ),
100        [namespace, model, tag] => (None, Some(take(namespace)), take(model), take(tag)),
101        _ => unreachable!("Lengths other than 3 or 4 already returned above"),
102    };
103
104    Ok(Some(ModelId {
105        host,
106        namespace,
107        model,
108        tag,
109    }))
110}
111
112/// Read & parse a manifest JSON file into a strongly typed structure.
113fn load_manifest(path: &Path) -> Result<ManifestData> {
114    let data = fs::read(path).map_err(|e| Error::Io {
115        path: path.to_path_buf(),
116        source: e,
117    })?;
118    let parsed = serde_json::from_slice(&data).map_err(|e| Error::Json {
119        path: path.to_path_buf(),
120        source: e,
121    })?;
122    Ok(parsed)
123}
124
125/// Sum layer + config sizes, returning None if no declared sizes exist.
126fn compute_total_size(layers: &[LayerInfo], config: Option<&LayerInfo>) -> Option<u64> {
127    let mut sum = 0u64;
128    let mut any = false;
129    for l in layers {
130        if let Some(sz) = l.size {
131            sum += sz;
132            any = true;
133        }
134    }
135    if let Some(cfg) = config
136        && let Some(sz) = cfg.size
137    {
138        sum += sz;
139        any = true;
140    }
141    if any { Some(sum) } else { None }
142}
143
144// Number of seconds since the file was last modified, if applicable
145fn compute_mtime(path: &Path) -> Option<u64> {
146    fs::metadata(path)
147        .ok()
148        .and_then(|m| m.modified().ok())
149        .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
150        .map(|d| d.as_secs())
151}
152
153/// Attempt to turn a filesystem entry into a `ListedModel` (only if it's a manifest file
154/// with valid components). Returns `None` for directories, hidden-excluded entries, or
155/// any IO / parse failures.
156fn process_entry(entry: &walkdir::DirEntry, args: &ScanArgs) -> Result<Option<ListedModel>> {
157    if entry.file_type().is_dir() {
158        return Ok(None);
159    }
160    let comps = relative_components(entry, &args.root)?;
161    let Some(id) = parse_components(comps, args.include_hidden)? else {
162        return Ok(None);
163    };
164    let manifest_path = entry.path();
165    let manifest = load_manifest(manifest_path)?;
166    let model = ListedModel::new(id, manifest_path);
167    if args.verbose {
168        Ok(Some(model.into_verbose(manifest, &args.blobs_root)))
169    } else {
170        Ok(Some(model))
171    }
172}
173
174/// Scan manifests and construct `ListedModel` entries.
175#[must_use]
176pub fn scan_manifests(args: &ScanArgs) -> ScanOutcome {
177    let mut models = Vec::new();
178    let mut errors = Vec::new();
179    for entry_res in walkdir::WalkDir::new(&args.root).follow_links(false) {
180        match entry_res {
181            Ok(entry) => match process_entry(&entry, args) {
182                Ok(Some(model)) => models.push(model),
183                Ok(None) => {}
184                Err(e) => errors.push(e),
185            },
186            Err(e) => errors.push(Error::WalkDir(e)),
187        }
188    }
189    models.sort_unstable_by(|a, b| a.name.cmp(&b.name));
190    ScanOutcome { models, errors }
191}
192
193/// Build blob path info list and decide primary digest.
194/// Build blob info records for layers + optional config, returning the primary digest chosen.
195/// Primary heuristic: largest (by declared size) layer; fall back to config if none.
196#[must_use]
197pub fn build_blob_infos<'a>(
198    layers: &'a [LayerInfo],
199    config: Option<&'a LayerInfo>,
200    blobs_root: &Path,
201) -> (Option<&'a str>, Vec<BlobPathInfo>) {
202    let mut primary_digest_idx: Option<usize> = None;
203    let mut max_size: u64 = 0;
204    for (i, l) in layers.iter().enumerate() {
205        if let Some(sz) = l.size
206            && sz > max_size
207        {
208            max_size = sz;
209            primary_digest_idx = Some(i);
210        }
211    }
212    let mut out = Vec::with_capacity(layers.len() + usize::from(config.is_some()));
213    let primary_digest = primary_digest_idx
214        .and_then(|i| layers.get(i).map(|l| l.digest.as_ref()))
215        .or_else(|| config.map(|c| c.digest.as_ref()));
216    for l in layers.iter().chain(config.iter().copied()) {
217        out.push(build_blob_path_info(l, blobs_root));
218    }
219    (primary_digest, out)
220}
221
222/// Produce a `BlobPathInfo` for the provided layer/config entry.
223#[must_use]
224pub fn build_blob_path_info(l: &LayerInfo, blobs_root: &Path) -> BlobPathInfo {
225    let path = digest_to_blob_path(blobs_root, &l.digest);
226    let (exists, actual_size, size_ok) = match fs::metadata(&path) {
227        Ok(meta) => {
228            let a = meta.len();
229            let ok = l.size.map(|decl| decl == a);
230            (true, Some(a), ok)
231        }
232        Err(_) => (false, None, None),
233    };
234    BlobPathInfo {
235        digest: l.digest.clone(),
236        media_type: l.media_type.clone(),
237        declared_size: l.size,
238        path,
239        exists,
240        size_ok,
241        actual_size,
242        primary: false,
243    }
244}
245
246/// Translate a content digest (e.g. `sha256:abcd...`) to Ollama's on-disk blob path.
247#[must_use]
248pub fn digest_to_blob_path(blobs_root: &Path, digest: &str) -> PathBuf {
249    // Expect "sha256:abcdef..."
250    // Ollama stores as "sha256-abcdef..."
251    if let Some(rest) = digest.strip_prefix("sha256:") {
252        blobs_root.join(format!("sha256-{rest}"))
253    } else {
254        // Fallback: direct join (unusual)
255        blobs_root.join(digest.replace(':', "-"))
256    }
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262
263    #[test]
264    pub fn test_digest_to_blob_path() {
265        let root = PathBuf::from("/tmp/blobs");
266        let p = digest_to_blob_path(&root, "sha256:1234abcd");
267        assert_eq!(p, PathBuf::from("/tmp/blobs/sha256-1234abcd"));
268    }
269}