post_archiver/importer/
post.rs

1use std::{
2    collections::{HashMap, HashSet},
3    fmt::Debug,
4    path::PathBuf,
5};
6
7use chrono::{DateTime, Utc};
8
9use crate::{
10    manager::{PostArchiverConnection, PostArchiverManager},
11    AuthorId, CollectionId, Comment, Content, PlatformId, PostId, POSTS_PRE_CHUNK,
12};
13
14use super::{collection::UnsyncCollection, tag::UnsyncTag, UnsyncFileMeta};
15
16impl<T> PostArchiverManager<T>
17where
18    T: PostArchiverConnection,
19{
20    /// Import a post into the archive.
21    ///
22    /// If the post already exists (by source), it updates its title, platform, published date,
23    ///
24    /// # Parameters
25    ///
26    /// - `update_relation`: update the relations of authors and collections after importing.
27    ///
28    /// # Errors
29    ///
30    /// Returns `rusqlite::Error` if there was an error accessing the database.
31    ///
32    /// # Examples
33    ///
34    /// ```
35    /// # use post_archiver::manager::PostArchiverManager;
36    /// # use post_archiver::importer::UnsyncPost;
37    /// # use post_archiver::PlatformId;
38    /// # use std::collections::HashMap;
39    /// # fn example(manager: &PostArchiverManager) -> Result<(), rusqlite::Error> {
40    /// let post: UnsyncPost<()> = UnsyncPost::new(PlatformId(1), "https://blog.example.com/post/1".to_string(), "My First Post".to_string(), vec![]);
41    ///
42    /// let files_data = HashMap::<String,()>::new(); // You can provide file data if needed
43    ///    
44    /// let post = manager.import_post(post, true)?;
45    ///
46    /// Ok(())
47    /// # }
48    /// ```
49    pub fn import_post<U>(
50        &self,
51        post: UnsyncPost<U>,
52        update_relation: bool,
53    ) -> Result<(PostId, Vec<AuthorId>, Vec<CollectionId>, Vec<(PathBuf, U)>), rusqlite::Error>
54    {
55        macro_rules! import_many {
56            ($vec:expr => $method:ident) => {
57                $vec.into_iter()
58                    .map(|d| self.$method(d))
59                    .collect::<Result<Vec<_>, _>>()?
60            };
61        }
62
63        let id = match self.find_post(&post.source)? {
64            Some(id) => {
65                self.set_post_title(id, post.title)?;
66                self.set_post_platform(id, Some(post.platform))?;
67
68                self.set_post_published(id, post.published.unwrap_or_else(Utc::now))?;
69                self.set_post_updated_by_latest(id, post.updated.unwrap_or_else(Utc::now))?;
70                id
71            }
72            None => self.add_post(
73                post.title,
74                Some(post.source),
75                Some(post.platform),
76                post.published,
77                post.updated,
78            )?,
79        };
80
81        let mut thumb = post
82            .thumb
83            .as_ref()
84            .map(|thumb| self.import_file_meta(id, thumb))
85            .transpose()?;
86
87        let content = post
88            .content
89            .iter()
90            .map(|content| {
91                Ok(match content {
92                    UnsyncContent::Text(text) => Content::Text(text.clone()),
93                    UnsyncContent::File(file) => {
94                        let need_thumb = thumb.is_none() && file.mime.starts_with("image/");
95                        let file_meta = self.import_file_meta(id, file)?;
96                        need_thumb.then(|| thumb = Some(file_meta));
97                        Content::File(file_meta)
98                    }
99                })
100            })
101            .collect::<Result<Vec<_>, rusqlite::Error>>()?;
102        self.set_post_content(id, content)?;
103        self.set_post_thumb(id, thumb)?;
104
105        self.set_post_comments(id, post.comments)?;
106
107        let tags = import_many!(post.tags => import_tag);
108        self.add_post_tags(id, &tags)?;
109
110        let collections = import_many!(post.collections => import_collection);
111        self.add_post_collections(id, &collections)?;
112
113        self.add_post_authors(id, &post.authors)?;
114
115        //
116        let path = self
117            .path
118            .join((id.raw() / POSTS_PRE_CHUNK).to_string())
119            .join((id.raw() % POSTS_PRE_CHUNK).to_string());
120
121        let files = post
122            .content
123            .into_iter()
124            .flat_map(|c| match c {
125                UnsyncContent::Text(_) => None,
126                UnsyncContent::File(file) => Some(file),
127            })
128            .chain(post.thumb)
129            .map(|f| (path.join(f.filename), f.data))
130            .collect::<HashMap<_, _>>()
131            .into_iter()
132            .collect::<Vec<_>>();
133
134        if update_relation {
135            post.authors.iter().try_for_each(|&author| {
136                self.set_author_thumb_by_latest(author)?;
137                self.set_author_updated_by_latest(author)
138            })?;
139
140            collections
141                .iter()
142                .try_for_each(|&collection| self.set_collection_thumb_by_latest(collection))?;
143        }
144
145        Ok((
146            id,
147            post.authors.into_iter().collect(),
148            collections.into_iter().collect(),
149            files,
150        ))
151    }
152
153    /// Import multiple posts into the archive.
154    ///
155    /// This function processes each post, importing its authors, collections, and files.
156    ///
157    /// # Parameters
158    ///
159    /// - `update_relation`: update the relations of authors and collections after importing.
160    ///
161    /// # Errors
162    ///
163    /// Returns `rusqlite::Error` if there was an error accessing the database.
164    ///
165    /// # Examples
166    ///
167    /// ```
168    /// # use post_archiver::manager::PostArchiverManager;
169    /// # use post_archiver::importer::UnsyncPost;
170    /// # use post_archiver::PlatformId;
171    /// # use std::collections::HashMap;
172    /// # fn example(manager: &PostArchiverManager) -> Result<(), rusqlite::Error> {
173    /// let posts: Vec<UnsyncPost<()>> = vec![
174    ///     UnsyncPost::new(PlatformId(1), "https://blog.example.com/post/1".to_string(), "My First Post".to_string(), vec![]),
175    ///     UnsyncPost::new(PlatformId(1), "https://blog.example.com/post/2".to_string(), "My Second Post".to_string(), vec![]),
176    /// ];
177    ///
178    /// let post = manager.import_posts(posts, true)?;
179    ///
180    /// Ok(())
181    /// # }
182    /// ```
183    pub fn import_posts<U>(
184        &self,
185        posts: impl IntoIterator<Item = UnsyncPost<U>>,
186        update_relation: bool,
187    ) -> Result<(Vec<PostId>, Vec<(PathBuf, U)>), rusqlite::Error> {
188        let mut total_author = HashSet::new();
189        let mut total_collections = HashSet::new();
190        let mut total_files = Vec::new();
191        let mut results = Vec::new();
192
193        for post in posts {
194            let (id, authors, collections, files_data) = self.import_post(post, false)?;
195
196            results.push(id);
197            total_files.extend(files_data);
198            total_author.extend(authors);
199            total_collections.extend(collections);
200        }
201
202        if update_relation {
203            total_author.into_iter().try_for_each(|author| {
204                self.set_author_thumb_by_latest(author)?;
205                self.set_author_updated_by_latest(author)
206            })?;
207
208            total_collections
209                .into_iter()
210                .try_for_each(|collection| self.set_collection_thumb_by_latest(collection))?;
211        }
212
213        Ok((results, total_files))
214    }
215}
216
217#[derive(Debug, Clone)]
218/// Represents a post that is not yet synchronized with the archive.
219pub struct UnsyncPost<T> {
220    /// The original URL of this post (e.g., "https://example.com/blog/1")
221    pub source: String,
222    /// The title of the post
223    pub title: String,
224    /// The post's content items (text and file references)
225    pub content: Vec<UnsyncContent<T>>,
226    /// Optional thumbnail image for the post
227    pub thumb: Option<UnsyncFileMeta<T>>,
228    /// Comments on the post
229    pub comments: Vec<Comment>,
230    /// When the post was updated
231    pub updated: Option<DateTime<Utc>>,
232    /// When the post was published
233    pub published: Option<DateTime<Utc>>,
234    /// Platform associated with the post
235    pub platform: PlatformId,
236    /// Tags associated with the post
237    pub tags: Vec<UnsyncTag>,
238    /// The IDs of the author who created this post
239    pub authors: Vec<AuthorId>,
240    /// The collections this post belongs to
241    pub collections: Vec<UnsyncCollection>,
242}
243
244impl<T> UnsyncPost<T> {
245    pub fn new(
246        platform: PlatformId,
247        source: String,
248        title: String,
249        content: Vec<UnsyncContent<T>>,
250    ) -> Self {
251        Self {
252            source,
253            title,
254            content,
255            thumb: None,
256            comments: Vec::new(),
257            updated: None,
258            published: None,
259            platform,
260            tags: Vec::new(),
261            authors: Vec::new(),
262            collections: Vec::new(),
263        }
264    }
265
266    pub fn source(self, source: String) -> Self {
267        Self { source, ..self }
268    }
269
270    pub fn title(self, title: String) -> Self {
271        Self { title, ..self }
272    }
273
274    pub fn content(self, content: Vec<UnsyncContent<T>>) -> Self {
275        Self { content, ..self }
276    }
277
278    pub fn thumb(self, thumb: Option<UnsyncFileMeta<T>>) -> Self {
279        Self { thumb, ..self }
280    }
281
282    pub fn comments(self, comments: Vec<Comment>) -> Self {
283        Self { comments, ..self }
284    }
285
286    pub fn updated(self, updated: DateTime<Utc>) -> Self {
287        Self {
288            updated: Some(updated),
289            ..self
290        }
291    }
292
293    pub fn published(self, published: DateTime<Utc>) -> Self {
294        Self {
295            published: Some(published),
296            ..self
297        }
298    }
299
300    pub fn platform(self, platform: PlatformId) -> Self {
301        Self { platform, ..self }
302    }
303
304    pub fn tags(self, tags: Vec<UnsyncTag>) -> Self {
305        Self { tags, ..self }
306    }
307
308    pub fn authors(self, authors: Vec<AuthorId>) -> Self {
309        Self { authors, ..self }
310    }
311
312    pub fn collections(self, collections: Vec<UnsyncCollection>) -> Self {
313        Self {
314            collections,
315            ..self
316        }
317    }
318
319    /// import this post into the archive, synchronizing it with the database.
320    ///
321    /// This is abbreviation for [import_post](crate::PostArchiverManager::import_post)
322    ///
323    /// # Errors
324    ///
325    /// Returns `rusqlite::Error` if there was an error accessing the database.
326    pub fn sync<U>(
327        self,
328        manager: &PostArchiverManager<U>,
329    ) -> Result<(PostId, Vec<(PathBuf, T)>), rusqlite::Error>
330    where
331        U: PostArchiverConnection,
332    {
333        let (id, _, _, files_data) = manager.import_post(self, true)?;
334
335        Ok((id, files_data))
336    }
337}
338
339#[derive(Debug, Clone)]
340pub enum UnsyncContent<T> {
341    Text(String),
342    File(UnsyncFileMeta<T>),
343}