noosphere_cli/native/
content.rs

1//! Helpers for working with the file system content within a workspace
2
3use anyhow::{anyhow, Result};
4use cid::Cid;
5use globset::{Glob, GlobSet, GlobSetBuilder};
6use noosphere_core::data::{BodyChunkIpld, ContentType};
7use noosphere_storage::{BlockStore, MemoryStore};
8use pathdiff::diff_paths;
9use std::collections::{BTreeMap, BTreeSet};
10use subtext::util::to_slug;
11use tokio::fs;
12use tokio_stream::StreamExt;
13
14use noosphere_core::context::SphereWalker;
15
16use super::{extension::infer_content_type, paths::SpherePaths, workspace::Workspace};
17
18/// Metadata that identifies some sphere content that is present on the file
19/// system
20pub struct FileReference {
21    /// The [Cid] of the file's body contents
22    pub cid: Cid,
23    /// The inferred [ContentType] of the file
24    pub content_type: ContentType,
25    /// The known extension of the file, if any
26    pub extension: Option<String>,
27}
28
29/// A delta manifest of changes to the local content space
30#[derive(Default)]
31pub struct ContentChanges {
32    /// Newly added files
33    pub new: BTreeMap<String, Option<ContentType>>,
34    /// Updated files
35    pub updated: BTreeMap<String, Option<ContentType>>,
36    /// Removed files
37    pub removed: BTreeMap<String, Option<ContentType>>,
38    /// Unchanged files
39    pub unchanged: BTreeSet<String>,
40}
41
42impl ContentChanges {
43    /// Returns true if there are no recorded changes
44    pub fn is_empty(&self) -> bool {
45        self.new.is_empty() && self.updated.is_empty() && self.removed.is_empty()
46    }
47}
48
49/// A manifest of content to apply some work to in the local content space
50#[derive(Default)]
51pub struct Content {
52    /// Content in the workspace that can be considered for inclusion in the
53    /// sphere's content space
54    pub matched: BTreeMap<String, FileReference>,
55    /// Content in the workspace that has been ignored
56    pub ignored: BTreeSet<String>,
57}
58
59impl Content {
60    /// Returns true if no content has been found that can be included in the
61    /// sphere's content space
62    pub fn is_empty(&self) -> bool {
63        self.matched.is_empty()
64    }
65
66    /// Produce a matcher that will match any path that should be ignored when
67    /// considering the files that make up the local workspace
68    fn get_ignored_patterns() -> Result<GlobSet> {
69        // TODO(#82): User-specified ignore patterns
70        let ignored_patterns = vec!["@*", ".*"];
71
72        let mut builder = GlobSetBuilder::new();
73
74        for pattern in ignored_patterns {
75            builder.add(Glob::new(pattern)?);
76        }
77
78        Ok(builder.build()?)
79    }
80
81    /// Read the local content of the workspace in its entirety.
82    /// This includes files that have not yet been saved to the sphere. All
83    /// files are chunked into blocks, and those blocks are persisted to the
84    /// provided store.
85    // TODO(#556): This is slow; we could probably do a concurrent traversal
86    // similar to how we traverse when rendering files to disk
87    pub async fn read_all<S: BlockStore>(paths: &SpherePaths, store: &mut S) -> Result<Content> {
88        let root_path = paths.root();
89        let mut directories = vec![(None, tokio::fs::read_dir(root_path).await?)];
90
91        let ignore_patterns = Content::get_ignored_patterns()?;
92        let mut content = Content::default();
93
94        while let Some((slug_prefix, mut directory)) = directories.pop() {
95            while let Some(entry) = directory.next_entry().await? {
96                let path = entry.path();
97                let relative_path = diff_paths(&path, root_path)
98                    .ok_or_else(|| anyhow!("Could not determine relative path to {:?}", path))?;
99
100                if ignore_patterns.is_match(&relative_path) {
101                    continue;
102                }
103
104                if path.is_dir() {
105                    let slug_prefix = relative_path.to_string_lossy().to_string();
106
107                    directories.push((Some(slug_prefix), tokio::fs::read_dir(path).await?));
108
109                    // TODO(#557): Limit the depth of the directory traversal to
110                    // some reasonable number
111
112                    continue;
113                }
114
115                let ignored = false;
116
117                let name = match path.file_stem() {
118                    Some(name) => name.to_string_lossy(),
119                    None => continue,
120                };
121
122                let name = match &slug_prefix {
123                    Some(prefix) => format!("{prefix}/{name}"),
124                    None => name.to_string(),
125                };
126
127                let slug = match to_slug(&name) {
128                    Ok(slug) if slug == name => slug,
129                    _ => continue,
130                };
131
132                if ignored {
133                    content.ignored.insert(slug);
134                    continue;
135                }
136
137                let extension = path
138                    .extension()
139                    .map(|extension| String::from(extension.to_string_lossy()));
140
141                let content_type = match &extension {
142                    Some(extension) => infer_content_type(extension).await?,
143                    None => ContentType::Bytes,
144                };
145
146                let file_bytes = fs::read(path).await?;
147                let body_cid = BodyChunkIpld::store_bytes(&file_bytes, store).await?;
148
149                content.matched.insert(
150                    slug,
151                    FileReference {
152                        cid: body_cid,
153                        content_type,
154                        extension,
155                    },
156                );
157            }
158        }
159
160        Ok(content)
161    }
162
163    /// Read all changed content in the sphere's workspace. Changed content will
164    /// include anything that has been modified, moved or deleted. The blocks
165    /// associated with the changed content will be included in the returned
166    /// [MemoryStore].
167    pub async fn read_changes(
168        workspace: &Workspace,
169    ) -> Result<Option<(Content, ContentChanges, MemoryStore)>> {
170        // TODO(#556): We need a better strategy than reading all changed
171        // content into memory at once
172        let mut new_blocks = MemoryStore::default();
173        let file_content =
174            Content::read_all(workspace.require_sphere_paths()?, &mut new_blocks).await?;
175
176        let sphere_context = workspace.sphere_context().await?;
177        let walker = SphereWalker::from(&sphere_context);
178
179        let content_stream = walker.content_stream();
180        tokio::pin!(content_stream);
181
182        let mut changes = ContentChanges::default();
183
184        while let Some((slug, sphere_file)) = content_stream.try_next().await? {
185            if file_content.ignored.contains(&slug) {
186                continue;
187            }
188
189            match file_content.matched.get(&slug) {
190                Some(FileReference {
191                    cid: body_cid,
192                    content_type,
193                    extension: _,
194                }) => {
195                    if &sphere_file.memo.body == body_cid {
196                        changes.unchanged.insert(slug.clone());
197                        continue;
198                    }
199
200                    changes
201                        .updated
202                        .insert(slug.clone(), Some(content_type.clone()));
203                }
204                None => {
205                    changes
206                        .removed
207                        .insert(slug.clone(), sphere_file.memo.content_type());
208                }
209            }
210        }
211
212        for (slug, FileReference { content_type, .. }) in &file_content.matched {
213            if changes.updated.contains_key(slug)
214                || changes.removed.contains_key(slug)
215                || changes.unchanged.contains(slug)
216            {
217                continue;
218            }
219
220            changes.new.insert(slug.clone(), Some(content_type.clone()));
221        }
222
223        if changes.is_empty() {
224            Ok(None)
225        } else {
226            Ok(Some((file_content, changes, new_blocks)))
227        }
228    }
229}