Skip to main content

post_archiver/importer/
post.rs

1use std::{
2    collections::{HashMap, HashSet},
3    fmt::Debug,
4    fs::File,
5    path::PathBuf,
6};
7
8use chrono::{DateTime, Utc};
9use rusqlite::params;
10
11use crate::{
12    error::Result,
13    manager::{
14        PostArchiverConnection, PostArchiverManager, UpdateAuthor, UpdateCollection, UpdatePost,
15        WritableFileMeta,
16    },
17    AuthorId, CollectionId, Comment, Content, PlatformId, PostId, POSTS_PRE_CHUNK,
18};
19
20use super::{collection::UnsyncCollection, tag::UnsyncTag, UnsyncFileMeta};
21
22impl<T> PostArchiverManager<T>
23where
24    T: PostArchiverConnection,
25{
26    /// Import a post into the archive.
27    ///
28    /// If the post already exists (by source), it updates its title, platform, published date,
29    ///
30    /// # Parameters
31    ///
32    /// - `update_relation`: update the relations of authors and collections after importing.
33    ///
34    /// # Errors
35    ///
36    /// Returns `Error` if there was an error accessing the database.
37    pub fn import_post<U>(
38        &self,
39        post: UnsyncPost<U>,
40        update_relation: bool,
41    ) -> Result<(PostId, Vec<AuthorId>, Vec<CollectionId>, Vec<(PathBuf, U)>)> {
42        macro_rules! import_many {
43            ($vec:expr => $method:ident) => {
44                $vec.into_iter()
45                    .map(|d| self.$method(d))
46                    .collect::<std::result::Result<Vec<_>, _>>()?
47            };
48        }
49
50        // find post by source
51        let existing: Option<PostId> = self.find_post(&post.source)?;
52
53        let id = match existing {
54            Some(id) => {
55                let b = self.bind(id);
56                b.update(
57                    UpdatePost::default()
58                        .title(post.title)
59                        .platform(Some(post.platform))
60                        .published(post.published.unwrap_or_else(Utc::now))
61                        .updated_by_latest(post.updated.unwrap_or_else(Utc::now)),
62                )?;
63                id
64            }
65            None => {
66                // insert new post
67                let mut stmt = self.conn().prepare_cached(
68                    "INSERT INTO posts (title, source, platform, published, updated) VALUES (?, ?, ?, ?, ?) RETURNING id",
69                )?;
70                let published = post.published.unwrap_or_else(Utc::now);
71                let updated = post.updated.unwrap_or_else(Utc::now);
72                stmt.query_row(
73                    params![post.title, post.source, post.platform, published, updated],
74                    |row| row.get(0),
75                )?
76            }
77        };
78
79        let b = self.bind(id);
80
81        let mut thumb = post
82            .thumb
83            .as_ref()
84            .map(|thumb| self.import_file_meta(id, thumb))
85            .transpose()?;
86
87        let content = post
88            .content
89            .iter()
90            .map(|content| {
91                Ok(match content {
92                    UnsyncContent::Text(text) => Content::Text(text.clone()),
93                    UnsyncContent::File(file) => {
94                        let need_thumb = thumb.is_none() && file.mime.starts_with("image/");
95                        let file_meta = self.import_file_meta(id, file)?;
96                        need_thumb.then(|| thumb = Some(file_meta));
97                        Content::File(file_meta)
98                    }
99                })
100            })
101            .collect::<Result<Vec<_>>>()?;
102        b.update(
103            UpdatePost::default()
104                .content(content)
105                .thumb(thumb)
106                .comments(post.comments),
107        )?;
108
109        let tags = import_many!(post.tags => import_tag);
110        b.add_tags(&tags)?;
111
112        let collections = import_many!(post.collections => import_collection);
113        b.add_collections(&collections)?;
114
115        b.add_authors(&post.authors)?;
116
117        //
118        let path = self
119            .path
120            .join((id.raw() / POSTS_PRE_CHUNK).to_string())
121            .join((id.raw() % POSTS_PRE_CHUNK).to_string());
122
123        let files = post
124            .content
125            .into_iter()
126            .flat_map(|c| match c {
127                UnsyncContent::Text(_) => None,
128                UnsyncContent::File(file) => Some(file),
129            })
130            .chain(post.thumb)
131            .map(|f| (path.join(f.filename), f.data))
132            .collect::<HashMap<_, _>>()
133            .into_iter()
134            .collect::<Vec<_>>();
135
136        if update_relation {
137            post.authors.iter().try_for_each(|&author| {
138                self.bind(author).update(
139                    UpdateAuthor::default()
140                        .thumb_by_latest()
141                        .updated_by_latest(),
142                )
143            })?;
144
145            collections.iter().try_for_each(|&collection| {
146                self.bind(collection)
147                    .update(UpdateCollection::default().thumb_by_latest())
148            })?;
149        }
150
151        Ok((
152            id,
153            post.authors.into_iter().collect(),
154            collections.into_iter().collect(),
155            files,
156        ))
157    }
158
159    pub fn import_post_with_files<U: WritableFileMeta>(
160        &self,
161        post: UnsyncPost<U>,
162    ) -> Result<PostId> {
163        let (id, _, _, contents) = self.import_post(post, true)?;
164
165        let mut first = true;
166        for (path, content) in &contents {
167            if first {
168                std::fs::create_dir_all(path.parent().unwrap())?;
169                first = false;
170            }
171
172            let mut file = File::create(path)?;
173            content.write_to_file(&mut file)?;
174        }
175
176        Ok(id)
177    }
178
179    pub fn import_post_with_rename_files(&self, post: UnsyncPost<PathBuf>) -> Result<PostId> {
180        let (id, _, _, contents) = self.import_post(post, true)?;
181
182        for (path, src) in &contents {
183            std::fs::create_dir_all(path.parent().unwrap())?;
184            std::fs::rename(src, path)?;
185        }
186
187        Ok(id)
188    }
189
190    /// Import multiple posts into the archive.
191    ///
192    /// This function processes each post, importing its authors, collections, and files.
193    ///
194    /// # Parameters
195    ///
196    /// - `update_relation`: update the relations of authors and collections after importing.
197    ///
198    /// # Errors
199    ///
200    /// Returns `Error` if there was an error accessing the database.
201    pub fn import_posts<U>(
202        &self,
203        posts: impl IntoIterator<Item = UnsyncPost<U>>,
204        update_relation: bool,
205    ) -> Result<(Vec<PostId>, Vec<(PathBuf, U)>)> {
206        let mut total_author = HashSet::new();
207        let mut total_collections = HashSet::new();
208        let mut total_files = Vec::new();
209        let mut results = Vec::new();
210
211        for post in posts {
212            let (id, authors, collections, files_data) = self.import_post(post, false)?;
213
214            results.push(id);
215            total_files.extend(files_data);
216            total_author.extend(authors);
217            total_collections.extend(collections);
218        }
219
220        if update_relation {
221            total_author.into_iter().try_for_each(|author| {
222                self.bind(author).update(
223                    UpdateAuthor::default()
224                        .thumb_by_latest()
225                        .updated_by_latest(),
226                )
227            })?;
228
229            total_collections.into_iter().try_for_each(|collection| {
230                self.bind(collection)
231                    .update(UpdateCollection::default().thumb_by_latest())
232            })?;
233        }
234
235        Ok((results, total_files))
236    }
237
238    pub fn import_posts_with_files<U: WritableFileMeta>(
239        &self,
240        posts: impl IntoIterator<Item = UnsyncPost<U>>,
241    ) -> Result<Vec<PostId>> {
242        let (ids, contents) = self.import_posts(posts, true)?;
243
244        for (path, content) in contents {
245            std::fs::create_dir_all(path.parent().unwrap())?;
246
247            let mut file = File::create(path)?;
248            content.write_to_file(&mut file)?;
249        }
250
251        Ok(ids)
252    }
253
254    pub fn import_posts_with_rename_files(
255        &self,
256        posts: impl IntoIterator<Item = UnsyncPost<PathBuf>>,
257    ) -> Result<Vec<PostId>> {
258        let (ids, contents) = self.import_posts(posts, true)?;
259
260        for (path, src) in contents {
261            std::fs::create_dir_all(path.parent().unwrap())?;
262            std::fs::rename(src, path)?;
263        }
264
265        Ok(ids)
266    }
267}
268
269#[derive(Debug, Clone)]
270/// Represents a post that is not yet synchronized with the archive.
271pub struct UnsyncPost<T> {
272    /// The original URL of this post (e.g., "https://example.com/blog/1")
273    pub source: String,
274    /// The title of the post
275    pub title: String,
276    /// The post's content items (text and file references)
277    pub content: Vec<UnsyncContent<T>>,
278    /// Optional thumbnail image for the post
279    pub thumb: Option<UnsyncFileMeta<T>>,
280    /// Comments on the post
281    pub comments: Vec<Comment>,
282    /// When the post was updated
283    pub updated: Option<DateTime<Utc>>,
284    /// When the post was published
285    pub published: Option<DateTime<Utc>>,
286    /// Platform associated with the post
287    pub platform: PlatformId,
288    /// Tags associated with the post
289    pub tags: Vec<UnsyncTag>,
290    /// The IDs of the author who created this post
291    pub authors: Vec<AuthorId>,
292    /// The collections this post belongs to
293    pub collections: Vec<UnsyncCollection>,
294}
295
296impl<T> UnsyncPost<T> {
297    pub fn new(
298        platform: PlatformId,
299        source: String,
300        title: String,
301        content: Vec<UnsyncContent<T>>,
302    ) -> Self {
303        Self {
304            source,
305            title,
306            content,
307            thumb: None,
308            comments: Vec::new(),
309            updated: None,
310            published: None,
311            platform,
312            tags: Vec::new(),
313            authors: Vec::new(),
314            collections: Vec::new(),
315        }
316    }
317
318    pub fn source(self, source: String) -> Self {
319        Self { source, ..self }
320    }
321
322    pub fn title(self, title: String) -> Self {
323        Self { title, ..self }
324    }
325
326    pub fn content(self, content: Vec<UnsyncContent<T>>) -> Self {
327        Self { content, ..self }
328    }
329
330    pub fn thumb(self, thumb: Option<UnsyncFileMeta<T>>) -> Self {
331        Self { thumb, ..self }
332    }
333
334    pub fn comments(self, comments: Vec<Comment>) -> Self {
335        Self { comments, ..self }
336    }
337
338    pub fn updated(self, updated: DateTime<Utc>) -> Self {
339        Self {
340            updated: Some(updated),
341            ..self
342        }
343    }
344
345    pub fn published(self, published: DateTime<Utc>) -> Self {
346        Self {
347            published: Some(published),
348            ..self
349        }
350    }
351
352    pub fn platform(self, platform: PlatformId) -> Self {
353        Self { platform, ..self }
354    }
355
356    pub fn tags(self, tags: Vec<UnsyncTag>) -> Self {
357        Self { tags, ..self }
358    }
359
360    pub fn authors(self, authors: Vec<AuthorId>) -> Self {
361        Self { authors, ..self }
362    }
363
364    pub fn collections(self, collections: Vec<UnsyncCollection>) -> Self {
365        Self {
366            collections,
367            ..self
368        }
369    }
370
371    /// import this post into the archive, synchronizing it with the database.
372    ///
373    /// This is abbreviation for [import_post](crate::PostArchiverManager::import_post)
374    ///
375    /// # Errors
376    ///
377    /// Returns `Error` if there was an error accessing the database.
378    pub fn sync<U>(self, manager: &PostArchiverManager<U>) -> Result<(PostId, Vec<(PathBuf, T)>)>
379    where
380        U: PostArchiverConnection,
381    {
382        let (id, _, _, files_data) = manager.import_post(self, true)?;
383
384        Ok((id, files_data))
385    }
386}
387
388#[derive(Debug, Clone)]
389pub enum UnsyncContent<T> {
390    Text(String),
391    File(UnsyncFileMeta<T>),
392}