cooklang_fs/
lib.rs

1//! `cooklang-rs` helper crate for the file system.
2//!
3//! Utilities to deal with referencing recipe, images and data related to
4//! recipes that are in other files.
5//!
6//! It implements an index into the file system to efficiently resolve recipes
7//! from a path. The index can be lazy or eager. Both created with
8//! [`new_index`].
9
10mod walker;
11
12use std::{cell::RefCell, collections::HashMap, sync::OnceLock};
13
14use camino::{Utf8Component, Utf8Path, Utf8PathBuf};
15use cooklang::quantity::QuantityValue;
16use serde::Serialize;
17
18pub use walker::DirEntry;
19use walker::Walker;
20
21pub fn new_index(
22    base_path: impl AsRef<std::path::Path>,
23    max_depth: usize,
24) -> Result<FsIndexBuilder, Error> {
25    FsIndexBuilder::new(base_path, max_depth)
26}
27
28pub struct FsIndexBuilder {
29    base_path: Utf8PathBuf,
30    walker: Walker,
31}
32
33impl FsIndexBuilder {
34    pub fn new(base_path: impl AsRef<std::path::Path>, max_depth: usize) -> Result<Self, Error> {
35        let base_path: &Utf8Path = base_path
36            .as_ref()
37            .try_into()
38            .map_err(|e: camino::FromPathError| e.into_io_error())?;
39
40        let walker = Walker::new(base_path, max_depth);
41        Ok(Self {
42            base_path: base_path.to_path_buf(),
43            walker,
44        })
45    }
46
47    /// Sets a config dir to the walker
48    ///
49    /// If this dir is found not in the top level, a warning will be printed.
50    ///
51    /// This also [ignores](Self::ignore) the dir.
52    pub fn config_dir(mut self, dir: String) -> Self {
53        self.walker.set_config_dir(dir);
54        self
55    }
56
57    /// Ignores a given file/dir
58    pub fn ignore(mut self, path: String) -> Self {
59        self.walker.ignore(path);
60        self
61    }
62
63    /// Create a new [lazy index](`LazyFsIndex`)
64    ///
65    /// The structure this creates is not completely thread safe, see
66    /// [`LazyFsIndex`].
67    pub fn lazy(self) -> LazyFsIndex {
68        LazyFsIndex {
69            base_path: self.base_path,
70            walker: RefCell::new(self.walker),
71            cache: RefCell::new(Cache::default()),
72        }
73    }
74
75    /// Create a new [complete index](`FsIndex`)
76    pub fn indexed(mut self) -> Result<FsIndex, Error> {
77        let mut cache = Cache::default();
78        index_all(&mut cache, &mut self.walker)?;
79        Ok(FsIndex {
80            base_path: self.base_path,
81            cache,
82        })
83    }
84}
85
86#[tracing::instrument(level = "debug", skip_all, ret)]
87fn index_all(cache: &mut Cache, walker: &mut Walker) -> Result<(), Error> {
88    for entry in walker {
89        let entry = entry?;
90        let Some((entry_name, path)) = process_entry(&entry) else {
91            continue;
92        };
93        cache.insert(entry_name, path);
94    }
95    Ok(())
96}
97
98/// Lazy index of a directory for cooklang recipes
99///
100/// The index is lazy, so it will only search for things it needs when asked,
101/// not when created. Also, it tries to walk the least amount possible in the
102/// given directory.
103///
104/// Calling the methods on this structure when it's shared between threads can
105/// panic. To avoid this issue put it behind a [`Mutex`](std::sync::Mutex) (not
106/// [`RwLock`](std::sync::RwLock)) or use [`FsIndex`] by calling
107/// [`Self::index_all`] or creating a new one.
108#[derive(Debug)]
109pub struct LazyFsIndex {
110    base_path: Utf8PathBuf,
111    cache: RefCell<Cache>,
112    walker: RefCell<Walker>,
113}
114
115/// Index of a directory for cooklang recipes
116///
117/// The index contains all recipes in the directory.
118#[derive(Debug)]
119pub struct FsIndex {
120    base_path: Utf8PathBuf,
121    cache: Cache,
122}
123
124#[derive(Debug, Default)]
125struct Cache {
126    recipes: HashMap<String, Vec<Utf8PathBuf>>,
127}
128
129#[derive(Debug, thiserror::Error)]
130pub enum Error {
131    #[error("Recipe not found: '{0}'")]
132    NotFound(String),
133    #[error(transparent)]
134    Io(#[from] std::io::Error),
135    #[error("Invalid name: '{0}'")]
136    InvalidName(String),
137    #[error(transparent)]
138    NotRecipe(#[from] NotRecipe),
139    #[error("Path points outside the base dir: '{0}'")]
140    OutsideBase(String),
141}
142
143#[derive(Debug, thiserror::Error)]
144#[error("Non UTF8 path")]
145pub struct NonUtf8(std::path::PathBuf);
146
147impl FsIndex {
148    pub fn base_path(&self) -> &Utf8Path {
149        &self.base_path
150    }
151
152    pub fn contains(&self, recipe: &str) -> bool {
153        let Ok((name, path)) = into_name_path(recipe) else {
154            return false;
155        };
156        self.cache.get(&name, &path).is_some()
157    }
158
159    /// Resolves a recipe query first trying directly as a path and if it fails
160    /// performs a lookup in the index.
161    ///
162    /// The recipe cannot be outside the base path.
163    pub fn resolve(
164        &self,
165        recipe: &str,
166        relative_to: Option<&Utf8Path>,
167    ) -> Result<RecipeEntry, Error> {
168        try_path(recipe, relative_to, &self.base_path).or_else(|_| self.get(recipe))
169    }
170
171    pub fn get(&self, recipe: &str) -> Result<RecipeEntry, Error> {
172        let (name, path) = into_name_path(recipe)?;
173        match self.cache.get(&name, &path) {
174            Some(path) => Ok(RecipeEntry::new(path)),
175            None => Err(Error::NotFound(recipe.to_string())),
176        }
177    }
178
179    pub fn get_all(&self) -> impl Iterator<Item = RecipeEntry> + '_ {
180        self.cache
181            .recipes
182            .values()
183            .flatten()
184            .map(|p| RecipeEntry::new(p.to_path_buf()))
185    }
186
187    /// Remove a recipe from the index
188    ///
189    /// The parameter is the path in disk and has to be prefixed with the
190    /// base path.
191    ///
192    /// # Errors
193    /// The only possible is [`Error::InvalidName``].
194    ///
195    /// # Panics
196    /// If the path does not start with the base path.
197    pub fn remove(&mut self, path: &Utf8Path) -> Result<(), Error> {
198        tracing::trace!("manually removing {path}");
199        assert!(
200            path.starts_with(&self.base_path),
201            "path does not start with the base path"
202        );
203        let (name, path) = into_name_path(path.as_str())?;
204        self.cache.remove(&name, &path);
205        Ok(())
206    }
207
208    /// Manually add a recipe to the index
209    ///
210    /// This does not check if the path contains references to parent
211    /// dirs and therefore a recipe outside the base directory can be
212    /// refereced.
213    ///
214    /// # Errors
215    /// The only possible is [`Error::InvalidName``].
216    ///
217    /// # Panics
218    /// - If the path does not start with the base path
219    /// - If the file does not exist.
220    pub fn insert(&mut self, path: &Utf8Path) -> Result<(), Error> {
221        tracing::trace!("manually adding {path}");
222        assert!(
223            path.starts_with(&self.base_path),
224            "path does not start with the base path"
225        );
226        assert!(path.is_file(), "path does not exist or is not a file");
227
228        // if its known, do nothing
229        if self.get(path.as_str()).is_ok() {
230            return Ok(());
231        }
232
233        let (name, path) = into_name_path(path.as_str())?;
234        self.cache.insert(&name, &path);
235        Ok(())
236    }
237}
238
239impl LazyFsIndex {
240    pub fn base_path(&self) -> &Utf8Path {
241        &self.base_path
242    }
243
244    /// Check if the index contains a recipe
245    pub fn contains(&self, recipe: &str) -> bool {
246        self.get(recipe).is_ok()
247    }
248
249    /// Completes the lazy indexing returning a complete [`FsIndex`]
250    pub fn index_all(self) -> Result<FsIndex, Error> {
251        let mut cache = self.cache.into_inner();
252        let mut walker = self.walker.into_inner();
253        index_all(&mut cache, &mut walker)?;
254        Ok(FsIndex {
255            base_path: self.base_path,
256            cache,
257        })
258    }
259
260    /// Resolves a recipe query first trying directly as a path and if it fails
261    /// performs a lookup in the index.
262    ///
263    /// The recipe cannot be outside the base path.
264    pub fn resolve(
265        &self,
266        recipe: &str,
267        relative_to: Option<&Utf8Path>,
268    ) -> Result<RecipeEntry, Error> {
269        try_path(recipe, relative_to, &self.base_path).or_else(|_| self.get(recipe))
270    }
271
272    /// Get a recipe from the index
273    ///
274    /// The input recipe is a partial path with or without the .cook extension.
275    #[tracing::instrument(level = "debug", name = "lazy_index_get", skip(self))]
276    pub fn get(&self, recipe: &str) -> Result<RecipeEntry, Error> {
277        let (name, path) = into_name_path(recipe)?;
278
279        // Is in cache?
280        if let Some(path) = self.cache.borrow().get(&name, &path) {
281            return Ok(RecipeEntry::new(path));
282        }
283
284        // Walk until found or no more files
285        // as walk is breadth-first and sorted by filename, the first found will
286        // be the wanted: outermost alphabetically
287        let mut walker = self.walker.borrow_mut();
288        for entry in walker.by_ref() {
289            let entry = entry?;
290            let Some((entry_name, entry_path)) = process_entry(&entry) else {
291                continue;
292            };
293
294            // Add to cache
295            self.cache.borrow_mut().insert(entry_name, entry_path);
296
297            if compare_path(entry_path, &path) {
298                return Ok(RecipeEntry::new(entry_path));
299            }
300        }
301        Err(Error::NotFound(recipe.to_string()))
302    }
303}
304
305fn process_entry(dir_entry: &DirEntry) -> Option<(&str, &Utf8Path)> {
306    // Ignore non files or not .cook files
307    if !dir_entry.is_cooklang_file() {
308        return None;
309    }
310
311    let entry_name = dir_entry.file_stem();
312
313    Some((entry_name, dir_entry.path()))
314}
315
316impl Cache {
317    fn get(&self, name: &str, path: &Utf8Path) -> Option<Utf8PathBuf> {
318        let paths = self.recipes.get(&name.to_lowercase())?;
319        paths.iter().find(|p| compare_path(p, path)).cloned()
320    }
321
322    fn insert(&mut self, name: &str, path: &Utf8Path) {
323        tracing::trace!("adding {name}:{path} to index cache");
324        let recipes = self.recipes.entry(name.to_lowercase()).or_default();
325        let pos = recipes.partition_point(|p| {
326            // less components first. same, alphabetically
327            match p.components().count().cmp(&path.components().count()) {
328                std::cmp::Ordering::Less => true,
329                std::cmp::Ordering::Equal => p.as_str() < path.as_str(),
330                std::cmp::Ordering::Greater => false,
331            }
332        });
333        recipes.insert(pos, path.to_path_buf());
334    }
335
336    fn remove(&mut self, name: &str, path: &Utf8Path) {
337        tracing::trace!("removing {name}:{path} from index cache");
338        if let Some(recipes) = self.recipes.get_mut(&name.to_lowercase()) {
339            // can't do swap so "outer" recipes remain first
340            if let Some(index) = recipes.iter().position(|r| r == path) {
341                recipes.remove(index);
342            }
343        }
344    }
345}
346
347fn into_name_path(recipe: &str) -> Result<(String, Utf8PathBuf), Error> {
348    let path = Utf8PathBuf::from(recipe);
349    let name = path
350        .file_stem()
351        .ok_or_else(|| Error::InvalidName(recipe.into()))?
352        .to_string();
353    Ok((name, path))
354}
355
356fn compare_path_key(p: &Utf8Path) -> Utf8PathBuf {
357    Utf8PathBuf::from(p.as_str().to_lowercase()).with_extension("")
358}
359
360fn compare_path(full: &Utf8Path, suffix: &Utf8Path) -> bool {
361    // only compare the end, so partial paths are a valid form of referencing recipes
362    compare_path_key(full).ends_with(compare_path_key(suffix))
363}
364
365/// Get all recipes from a path with a depth limit
366pub fn all_recipes(
367    base_path: impl AsRef<std::path::Path>,
368    max_depth: usize,
369) -> Result<impl Iterator<Item = RecipeEntry>, std::io::Error> {
370    let base_path: &Utf8Path = base_path
371        .as_ref()
372        .try_into()
373        .map_err(|e: camino::FromPathError| e.into_io_error())?;
374    let walker = Walker::new(base_path, max_depth).flatten();
375    let grouped = group_images(walker);
376    Ok(grouped.filter_map(|e| match e {
377        Entry::Dir(_) => None,
378        Entry::Recipe(r) => Some(r),
379    }))
380}
381
382/// Walks a single directory retrieving recipes and other directories
383pub fn walk_dir(
384    path: impl AsRef<std::path::Path>,
385) -> Result<impl Iterator<Item = Entry>, std::io::Error> {
386    let path: &Utf8Path = path
387        .as_ref()
388        .try_into()
389        .map_err(|e: camino::FromPathError| e.into_io_error())?;
390    if !path.is_dir() {
391        return Err(std::io::Error::new(
392            std::io::ErrorKind::NotFound,
393            "dir not found",
394        ));
395    }
396    Ok(group_images(Walker::new(path, 0).flatten()))
397}
398
399fn group_images(walker: impl Iterator<Item = DirEntry>) -> impl Iterator<Item = Entry> {
400    struct ImageGrouper<I: Iterator<Item = DirEntry>> {
401        iter: std::iter::Peekable<I>,
402    }
403
404    impl<I: Iterator<Item = DirEntry>> Iterator for ImageGrouper<I> {
405        type Item = Entry;
406
407        fn next(&mut self) -> Option<Self::Item> {
408            let mut past_images = Vec::new();
409            loop {
410                match self.iter.next()? {
411                    dir if dir.file_type().is_dir() => return Some(Entry::Dir(dir)),
412                    r if r.is_cooklang_file() => {
413                        let recipe_name = r.file_stem();
414                        // because file are sorted by name, recipe images will be with the
415                        // recipes
416                        let mut images = past_images
417                            .into_iter()
418                            .filter_map(|e| Image::new(recipe_name, e))
419                            .collect::<Vec<_>>();
420                        while let Some(image_entry) = self.iter.next_if(|e| e.is_image()) {
421                            if let Some(image) = Image::new(recipe_name, image_entry) {
422                                images.push(image);
423                            }
424                        }
425                        return Some(Entry::Recipe(
426                            RecipeEntry::new(r.into_path()).set_images(images),
427                        ));
428                    }
429                    img if img.is_image() => {
430                        past_images.push(img);
431                    }
432                    _ => {}
433                }
434            }
435        }
436    }
437
438    ImageGrouper {
439        iter: walker.peekable(),
440    }
441}
442
443pub enum Entry {
444    Dir(DirEntry),
445    Recipe(RecipeEntry),
446}
447
448#[tracing::instrument(level = "trace", ret)]
449fn try_path(
450    recipe: &str,
451    relative_to: Option<&Utf8Path>,
452    base_path: &Utf8Path,
453) -> Result<RecipeEntry, Error> {
454    let mut path = Utf8PathBuf::from(recipe).with_extension("cook");
455
456    if path
457        .components()
458        .any(|c| matches!(c, Utf8Component::Prefix(_)))
459    {
460        return Err(Error::InvalidName(recipe.to_string()));
461    }
462
463    if path.has_root() {
464        let no_root = path.as_str().trim_start_matches(['/', '\\']);
465        path = base_path.join(no_root);
466    } else if let Some(parent) = relative_to {
467        path = parent.join(&path);
468    }
469    path = norm_path(&path);
470
471    if !path.starts_with(base_path) {
472        return Err(Error::OutsideBase(recipe.to_string()));
473    }
474
475    DirEntry::new(&path)
476        .map_err(Error::from)
477        .and_then(|e| RecipeEntry::try_from(e).map_err(Error::from))
478}
479
480fn norm_path(path: &Utf8Path) -> Utf8PathBuf {
481    let mut components = path.components().peekable();
482    let mut ret = if let Some(c @ Utf8Component::Prefix(..)) = components.peek().cloned() {
483        components.next();
484        Utf8PathBuf::from(c.as_str())
485    } else {
486        Utf8PathBuf::new()
487    };
488
489    for component in components {
490        match component {
491            Utf8Component::Prefix(..) => unreachable!(),
492            Utf8Component::RootDir => {
493                ret.push(component.as_str());
494            }
495            Utf8Component::CurDir => {
496                if ret.components().count() == 0 {
497                    ret.push(component.as_str());
498                }
499            }
500            Utf8Component::ParentDir => {
501                if ret.components().count() > 0 {
502                    ret.pop();
503                } else {
504                    ret.push(component.as_str());
505                }
506            }
507            Utf8Component::Normal(c) => {
508                ret.push(c);
509            }
510        }
511    }
512    ret
513}
514
515#[derive(Debug, Clone)]
516pub struct RecipeEntry {
517    path: Utf8PathBuf,
518    images: OnceLock<Vec<Image>>,
519}
520
521impl RecipeEntry {
522    /// Creates a new recipe entry
523    ///
524    /// The path is assumed to be a cooklang recipe file.
525    pub fn new(path: impl AsRef<Utf8Path>) -> Self {
526        Self {
527            path: path.as_ref().to_path_buf(),
528            images: OnceLock::new(),
529        }
530    }
531
532    pub fn set_images(self, images: Vec<Image>) -> Self {
533        _ = self.images.set(images);
534        self
535    }
536
537    pub fn path(&self) -> &Utf8Path {
538        &self.path
539    }
540
541    pub fn file_name(&self) -> &str {
542        self.path.file_name().unwrap()
543    }
544
545    pub fn name(&self) -> &str {
546        self.path.file_stem().unwrap()
547    }
548
549    pub fn relative_name(&self) -> &str {
550        self.path.as_str().trim_end_matches(".cook")
551    }
552
553    /// Reads the content of the entry
554    pub fn read(&self) -> std::io::Result<RecipeContent> {
555        let content = std::fs::read_to_string(&self.path)?;
556        Ok(RecipeContent::new(content))
557    }
558
559    /// Finds the images of the recipe
560    ///
561    /// The result is cached, use the [`recipe_images`] to get a fresh result
562    /// each call.
563    pub fn images(&self) -> &[Image] {
564        self.images.get_or_init(|| recipe_images(&self.path))
565    }
566}
567
568#[derive(Debug, thiserror::Error)]
569#[error("The entry is not a recipe: {0}")]
570pub struct NotRecipe(Utf8PathBuf);
571impl TryFrom<DirEntry> for RecipeEntry {
572    type Error = NotRecipe;
573
574    fn try_from(value: DirEntry) -> Result<Self, Self::Error> {
575        if !value.is_cooklang_file() {
576            return Err(NotRecipe(value.into_path()));
577        }
578        Ok(Self::new(value.into_path()))
579    }
580}
581
582#[derive(Debug, Clone)]
583pub struct RecipeContent {
584    content: String,
585}
586
587impl RecipeContent {
588    fn new(content: String) -> Self {
589        Self { content }
590    }
591
592    /// Parses the metadata of the recipe
593    pub fn metadata(&self, parser: &cooklang::CooklangParser) -> cooklang::MetadataResult {
594        parser.parse_metadata(&self.content)
595    }
596
597    /// Same as [`Self::metadata_with_options`] but with extra options
598    pub fn metadata_with_options(
599        &self,
600        parser: &cooklang::CooklangParser,
601        options: cooklang::analysis::ParseOptions,
602    ) -> cooklang::MetadataResult {
603        parser.parse_metadata_with_options(&self.content, options)
604    }
605
606    /// Parses the recipe
607    pub fn parse(&self, parser: &cooklang::CooklangParser) -> cooklang::RecipeResult {
608        parser.parse(&self.content)
609    }
610
611    /// Same as [`Self::parse`] but with extra options
612    pub fn parse_with_options(
613        &self,
614        parser: &cooklang::CooklangParser,
615        options: cooklang::analysis::ParseOptions,
616    ) -> cooklang::RecipeResult {
617        parser.parse_with_options(&self.content, options)
618    }
619
620    pub fn text(&self) -> &str {
621        &self.content
622    }
623
624    pub fn into_text(self) -> String {
625        self.content
626    }
627}
628
629#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize)]
630pub struct Image {
631    pub indexes: Option<ImageIndexes>,
632    pub path: Utf8PathBuf,
633}
634
635#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize)]
636pub struct ImageIndexes {
637    section: u16,
638    step: u16,
639}
640
641impl Image {
642    fn new(recipe_name: &str, entry: DirEntry) -> Option<Self> {
643        let parts = entry.file_name().rsplitn(4, '.').collect::<Vec<_>>();
644
645        // no dots, so no extension
646        if parts.len() == 1 {
647            return None;
648        }
649
650        let name = *parts.last().unwrap();
651        let ext = *parts.first().unwrap();
652
653        if name != recipe_name || !IMAGE_EXTENSIONS.contains(&ext) {
654            return None;
655        }
656
657        let indexes = match &parts[1..parts.len() - 1] {
658            [step, section] => {
659                let section = section.parse::<u16>().ok()?;
660                let step = step.parse::<u16>().ok()?;
661                Some(ImageIndexes { section, step })
662            }
663            [step] => {
664                let step = step.parse::<u16>().ok()?;
665                Some(ImageIndexes { section: 0, step })
666            }
667            _ => None,
668        };
669
670        Some(Image {
671            indexes,
672            path: entry.into_path(),
673        })
674    }
675}
676
677/// Valid image extensions
678pub const IMAGE_EXTENSIONS: &[&str] = &["jpeg", "jpg", "png", "heic", "gif", "webp"];
679
680/// Get a list of the images of the recipe
681///
682/// See [IMAGE_EXTENSIONS].
683pub fn recipe_images(path: &Utf8Path) -> Vec<Image> {
684    let Some(dir) = path.parent().and_then(|dir| dir.read_dir_utf8().ok()) else {
685        return vec![];
686    };
687
688    let Some(recipe_name) = path.file_stem() else {
689        return vec![];
690    };
691
692    let mut images = dir
693        .filter_map(|e| e.ok()) // skip error
694        .filter(|e| e.file_type().map(|t| t.is_file()).unwrap_or(false)) // skip non-file
695        .filter_map(|e| Image::new(recipe_name, DirEntry::new(e.path()).ok()?))
696        .collect::<Vec<_>>();
697    images.sort_unstable();
698    images
699}
700
701#[derive(Debug, thiserror::Error)]
702pub enum RecipeImageError {
703    #[error("No section {section} in recipe, referenced from {image}")]
704    MissingSection { section: u16, image: Utf8PathBuf },
705    #[error("No step {step} in section {section}, referenced from {image}")]
706    MissingStep {
707        section: u16,
708        step: u16,
709        image: Utf8PathBuf,
710    },
711}
712
713/// Check that all images for a recipe actually can reference it.
714///
715/// For example the image `Recipe.14.jpeg` references step 15th, but the
716/// recipe may not have 15 steps, so this function returns an error.
717pub fn check_recipe_images<D, V: QuantityValue>(
718    images: &[Image],
719    recipe: &cooklang::Recipe<D, V>,
720) -> Result<(), Vec<RecipeImageError>> {
721    let mut errors = Vec::new();
722    for image in images {
723        if let Some(ImageIndexes { section, step }) = image.indexes {
724            let Some(recipe_section) = recipe.sections.get(section as usize) else {
725                errors.push(RecipeImageError::MissingSection {
726                    section,
727                    image: image.path.clone(),
728                });
729                continue;
730            };
731
732            if step as usize >= recipe_section.content.len() {
733                errors.push(RecipeImageError::MissingStep {
734                    section,
735                    step,
736                    image: image.path.clone(),
737                });
738            }
739        }
740    }
741    if errors.is_empty() {
742        Ok(())
743    } else {
744        Err(errors)
745    }
746}