post-archiver 0.5.7

A Unify Archive
Documentation
use std::{
    collections::{HashMap, HashSet},
    fmt::Debug,
    fs::File,
    path::PathBuf,
};

use chrono::{DateTime, Utc};
use rusqlite::params;

use crate::{
    error::Result,
    manager::{
        PostArchiverConnection, PostArchiverManager, UpdateAuthor, UpdateCollection, UpdatePost,
        WritableFileMeta,
    },
    AuthorId, CollectionId, Comment, Content, PlatformId, PostId, POSTS_PRE_CHUNK,
};

use super::{collection::UnsyncCollection, tag::UnsyncTag, UnsyncFileMeta};

impl<T> PostArchiverManager<T>
where
    T: PostArchiverConnection,
{
    /// Import a post into the archive.
    ///
    /// If the post already exists (by source), it updates its title, platform, published date,
    ///
    /// # Parameters
    ///
    /// - `update_relation`: update the relations of authors and collections after importing.
    ///
    /// # Errors
    ///
    /// Returns `Error` if there was an error accessing the database.
    pub fn import_post<U>(
        &self,
        post: UnsyncPost<U>,
        update_relation: bool,
    ) -> Result<(PostId, Vec<AuthorId>, Vec<CollectionId>, Vec<(PathBuf, U)>)> {
        macro_rules! import_many {
            ($vec:expr => $method:ident) => {
                $vec.into_iter()
                    .map(|d| self.$method(d))
                    .collect::<std::result::Result<Vec<_>, _>>()?
            };
        }

        // find post by source
        let existing: Option<PostId> = self.find_post(&post.source)?;

        let id = match existing {
            Some(id) => {
                let b = self.bind(id);
                b.update(
                    UpdatePost::default()
                        .title(post.title)
                        .platform(Some(post.platform))
                        .published(post.published.unwrap_or_else(Utc::now))
                        .updated_by_latest(post.updated.unwrap_or_else(Utc::now)),
                )?;
                id
            }
            None => {
                // insert new post
                let mut stmt = self.conn().prepare_cached(
                    "INSERT INTO posts (title, source, platform, published, updated) VALUES (?, ?, ?, ?, ?) RETURNING id",
                )?;
                let published = post.published.unwrap_or_else(Utc::now);
                let updated = post.updated.unwrap_or_else(Utc::now);
                stmt.query_row(
                    params![post.title, post.source, post.platform, published, updated],
                    |row| row.get(0),
                )?
            }
        };

        let b = self.bind(id);

        let mut thumb = post
            .thumb
            .as_ref()
            .map(|thumb| self.import_file_meta(id, thumb))
            .transpose()?;

        let content = post
            .content
            .iter()
            .map(|content| {
                Ok(match content {
                    UnsyncContent::Text(text) => Content::Text(text.clone()),
                    UnsyncContent::File(file) => {
                        let need_thumb = thumb.is_none() && file.mime.starts_with("image/");
                        let file_meta = self.import_file_meta(id, file)?;
                        need_thumb.then(|| thumb = Some(file_meta));
                        Content::File(file_meta)
                    }
                })
            })
            .collect::<Result<Vec<_>>>()?;
        b.update(
            UpdatePost::default()
                .content(content)
                .thumb(thumb)
                .comments(post.comments),
        )?;

        let tags = import_many!(post.tags => import_tag);
        b.add_tags(&tags)?;

        let collections = import_many!(post.collections => import_collection);
        b.add_collections(&collections)?;

        b.add_authors(&post.authors)?;

        //
        let path = self
            .path
            .join((id.raw() / POSTS_PRE_CHUNK).to_string())
            .join((id.raw() % POSTS_PRE_CHUNK).to_string());

        let files = post
            .content
            .into_iter()
            .flat_map(|c| match c {
                UnsyncContent::Text(_) => None,
                UnsyncContent::File(file) => Some(file),
            })
            .chain(post.thumb)
            .map(|f| (path.join(f.filename), f.data))
            .collect::<HashMap<_, _>>()
            .into_iter()
            .collect::<Vec<_>>();

        if update_relation {
            post.authors.iter().try_for_each(|&author| {
                self.bind(author).update(
                    UpdateAuthor::default()
                        .thumb_by_latest()
                        .updated_by_latest(),
                )
            })?;

            collections.iter().try_for_each(|&collection| {
                self.bind(collection)
                    .update(UpdateCollection::default().thumb_by_latest())
            })?;
        }

        Ok((
            id,
            post.authors.into_iter().collect(),
            collections.into_iter().collect(),
            files,
        ))
    }

    pub fn import_post_with_files<U: WritableFileMeta>(
        &self,
        post: UnsyncPost<U>,
    ) -> Result<PostId> {
        let (id, _, _, contents) = self.import_post(post, true)?;

        let mut first = true;
        for (path, content) in &contents {
            if first {
                std::fs::create_dir_all(path.parent().unwrap())?;
                first = false;
            }

            let mut file = File::create(path)?;
            content.write_to_file(&mut file)?;
        }

        Ok(id)
    }

    pub fn import_post_with_rename_files(&self, post: UnsyncPost<PathBuf>) -> Result<PostId> {
        let (id, _, _, contents) = self.import_post(post, true)?;

        for (path, src) in &contents {
            std::fs::create_dir_all(path.parent().unwrap())?;
            std::fs::rename(src, path)?;
        }

        Ok(id)
    }

    /// Import multiple posts into the archive.
    ///
    /// This function processes each post, importing its authors, collections, and files.
    ///
    /// # Parameters
    ///
    /// - `update_relation`: update the relations of authors and collections after importing.
    ///
    /// # Errors
    ///
    /// Returns `Error` if there was an error accessing the database.
    pub fn import_posts<U>(
        &self,
        posts: impl IntoIterator<Item = UnsyncPost<U>>,
        update_relation: bool,
    ) -> Result<(Vec<PostId>, Vec<(PathBuf, U)>)> {
        let mut total_author = HashSet::new();
        let mut total_collections = HashSet::new();
        let mut total_files = Vec::new();
        let mut results = Vec::new();

        for post in posts {
            let (id, authors, collections, files_data) = self.import_post(post, false)?;

            results.push(id);
            total_files.extend(files_data);
            total_author.extend(authors);
            total_collections.extend(collections);
        }

        if update_relation {
            total_author.into_iter().try_for_each(|author| {
                self.bind(author).update(
                    UpdateAuthor::default()
                        .thumb_by_latest()
                        .updated_by_latest(),
                )
            })?;

            total_collections.into_iter().try_for_each(|collection| {
                self.bind(collection)
                    .update(UpdateCollection::default().thumb_by_latest())
            })?;
        }

        Ok((results, total_files))
    }

    pub fn import_posts_with_files<U: WritableFileMeta>(
        &self,
        posts: impl IntoIterator<Item = UnsyncPost<U>>,
    ) -> Result<Vec<PostId>> {
        let (ids, contents) = self.import_posts(posts, true)?;

        for (path, content) in contents {
            std::fs::create_dir_all(path.parent().unwrap())?;

            let mut file = File::create(path)?;
            content.write_to_file(&mut file)?;
        }

        Ok(ids)
    }

    pub fn import_posts_with_rename_files(
        &self,
        posts: impl IntoIterator<Item = UnsyncPost<PathBuf>>,
    ) -> Result<Vec<PostId>> {
        let (ids, contents) = self.import_posts(posts, true)?;

        for (path, src) in contents {
            std::fs::create_dir_all(path.parent().unwrap())?;
            std::fs::rename(src, path)?;
        }

        Ok(ids)
    }
}

#[derive(Debug, Clone)]
/// Represents a post that is not yet synchronized with the archive.
pub struct UnsyncPost<T> {
    /// The original URL of this post (e.g., "https://example.com/blog/1")
    pub source: String,
    /// The title of the post
    pub title: String,
    /// The post's content items (text and file references)
    pub content: Vec<UnsyncContent<T>>,
    /// Optional thumbnail image for the post
    pub thumb: Option<UnsyncFileMeta<T>>,
    /// Comments on the post
    pub comments: Vec<Comment>,
    /// When the post was updated
    pub updated: Option<DateTime<Utc>>,
    /// When the post was published
    pub published: Option<DateTime<Utc>>,
    /// Platform associated with the post
    pub platform: PlatformId,
    /// Tags associated with the post
    pub tags: Vec<UnsyncTag>,
    /// The IDs of the author who created this post
    pub authors: Vec<AuthorId>,
    /// The collections this post belongs to
    pub collections: Vec<UnsyncCollection>,
}

impl<T> UnsyncPost<T> {
    pub fn new(
        platform: PlatformId,
        source: String,
        title: String,
        content: Vec<UnsyncContent<T>>,
    ) -> Self {
        Self {
            source,
            title,
            content,
            thumb: None,
            comments: Vec::new(),
            updated: None,
            published: None,
            platform,
            tags: Vec::new(),
            authors: Vec::new(),
            collections: Vec::new(),
        }
    }

    pub fn source(self, source: String) -> Self {
        Self { source, ..self }
    }

    pub fn title(self, title: String) -> Self {
        Self { title, ..self }
    }

    pub fn content(self, content: Vec<UnsyncContent<T>>) -> Self {
        Self { content, ..self }
    }

    pub fn thumb(self, thumb: Option<UnsyncFileMeta<T>>) -> Self {
        Self { thumb, ..self }
    }

    pub fn comments(self, comments: Vec<Comment>) -> Self {
        Self { comments, ..self }
    }

    pub fn updated(self, updated: DateTime<Utc>) -> Self {
        Self {
            updated: Some(updated),
            ..self
        }
    }

    pub fn published(self, published: DateTime<Utc>) -> Self {
        Self {
            published: Some(published),
            ..self
        }
    }

    pub fn platform(self, platform: PlatformId) -> Self {
        Self { platform, ..self }
    }

    pub fn tags(self, tags: Vec<UnsyncTag>) -> Self {
        Self { tags, ..self }
    }

    pub fn authors(self, authors: Vec<AuthorId>) -> Self {
        Self { authors, ..self }
    }

    pub fn collections(self, collections: Vec<UnsyncCollection>) -> Self {
        Self {
            collections,
            ..self
        }
    }

    /// import this post into the archive, synchronizing it with the database.
    ///
    /// This is abbreviation for [import_post](crate::PostArchiverManager::import_post)
    ///
    /// # Errors
    ///
    /// Returns `Error` if there was an error accessing the database.
    pub fn sync<U>(self, manager: &PostArchiverManager<U>) -> Result<(PostId, Vec<(PathBuf, T)>)>
    where
        U: PostArchiverConnection,
    {
        let (id, _, _, files_data) = manager.import_post(self, true)?;

        Ok((id, files_data))
    }
}

#[derive(Debug, Clone)]
pub enum UnsyncContent<T> {
    Text(String),
    File(UnsyncFileMeta<T>),
}