wallabag-api 0.4.3

// Copyright 2018 Samuel Walladge <samuel@swalladge.net>
// SPDX-License-Identifier: Apache-2.0 OR MIT

use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

use crate::utils::serde::parse_hashmap_with_null_values;
use crate::utils::serde::parse_intbool;

use super::annotations::Annotations;
use super::common::ID;
use super::tags::Tags;

/// type alias: a list of entries as returned from some endpoints
pub type Entries = Vec<Entry>;

/// A struct representing an entry from wallabag (a full saved article including
/// all annotations and tags; annotations and tags do not need to be requested
/// separately).
///
/// Most fields are controlled by the server. When creating an entry, the server will send a
/// request to the given url and use the response to populate many of the fields. This response is
/// what `headers`, `http_status`, `mimetype`, etc. are referring to.
#[derive(Deserialize, Serialize, Debug)]
pub struct Entry {
    /// Annotation objects for this entry.
    pub annotations: Option<Annotations>,

    /// Content. Should be HTML if present.
    pub content: Option<String>,

    /// The timestamp of when the entry was created on the server.
    pub created_at: DateTime<Utc>,

    /// The resolved domain name of the url. Could be None if the server couldn't resolve the url.
    pub domain_name: Option<String>,

    /// A map of header name -> header value. These appear to be headers from the original source
    /// url.
    #[serde(deserialize_with = "parse_hashmap_with_null_values")]
    pub headers: Option<HashMap<String, String>>,

    /// I'm guessing this is the status the server got when retrieving the content from the url.
    pub http_status: Option<String>,

    /// ID of the entry. Should be an integer. Should also be unique, so can use this directly as
    /// the local id if storing the entry in a DB.
    pub id: ID,

    /// The archived (or read) status of the entry. These boolean options are sometimes represented
    /// as 0 or 1 from the API, which makes parsing in a strongly typed language annoying.
    #[serde(deserialize_with = "parse_intbool")]
    pub is_archived: bool,

    /// The public shared status of the entry. If this is true, then there should be a public link
    /// to this exact entry on the server. The link will be based around the value of the `uid`
    /// field and (TODO: confirm if this can be relied on) formatted as BASE_URL/share/UID.
    #[serde(deserialize_with = "parse_intbool")]
    pub is_public: bool,

    /// The starred status of the entry.
    #[serde(deserialize_with = "parse_intbool")]
    pub is_starred: bool,

    /// The language of the entry - probably generated by the server from inspecting the response.
    pub language: Option<String>,

    /// The mimetype of the entry - probably generated by the server from inspecting the response.
    /// Not sure about the support status for other mimetypes. Observed behaviour suggests that the
    /// server converts everything to HTML - eg. a text/plain mimetype content will be plain text
    /// surrounded by `<pre>` tags.
    pub mimetype: Option<String>,

    /// Supposedly the original url given will be stored here. If a shortened link is submitted to
    /// the server, the short link will be here, but the resolved link will be in URL. Observed
    /// behaviour is that this field is never set.
    pub origin_url: Option<String>,

    /// Optional url for an image related to the entry. Eg. for displaying as a background image to
    /// the entry tile.
    pub preview_picture: Option<String>,

    /// Data about when the entry was published (scraped from the original web page).
    pub published_at: Option<DateTime<Utc>>,

    /// Data about who published the entry (scraped from the original web page).
    pub published_by: Option<Vec<Option<String>>>,

    /// Estimated reading time in minutes. Generated by the server, probably based off your set
    /// reading speed or a default.
    pub reading_time: u32,

    /// Timestamp of when the entry was starred, if it is starred. Unstarring an entry sets this to
    /// None.
    pub starred_at: Option<DateTime<Utc>>,

    /// A list of tag objects associated with this entry.
    pub tags: Tags,

    /// An optional title for the entry.
    pub title: Option<String>,

    /// This will be only set by the server as a unique id to identify the entry if it has been
    /// shared. For example if you share via public link on framabag and the uid is FOO, then the
    /// public url will be framabag.org/share/FOO
    pub uid: Option<String>,

    /// Timestamp when the entry was last updated. This is bumped for any change to any field
    /// attached to the entry except for annotations.
    ///
    /// TODO: check if entry updates if a tag is globally edited (eg. renamed)
    pub updated_at: DateTime<Utc>,

    /// Resolved url of the entry. If the origin_url redirected to a different url (eg. via a
    /// shortened link), the final url will be stored here.
    pub url: Option<String>,

    /// Email of the user who owns this entry. Currently `user_*` fields are redundant since you
    /// can only access entries that belong to you. Entry sharing between users is planned for the
    /// future so this may become relevant soon.
    pub user_email: String,

    /// ID of the user who owns this entry.
    pub user_id: ID,

    /// username of the user who owns this entry.
    pub user_name: String,
}

/// A struct representing a deleted entry from wallabag (a full saved article including
/// annotations and tags). The only difference from the full entry is that this
/// doesn't have an id. Only used internally because a full entry gets
/// reconstituted before being returned to the client.
#[derive(Deserialize, Debug)]
pub(crate) struct DeletedEntry {
    pub annotations: Option<Annotations>,
    pub content: Option<String>,
    pub created_at: DateTime<Utc>,
    pub domain_name: Option<String>,
    pub headers: Option<HashMap<String, String>>,
    pub http_status: Option<String>,

    #[serde(deserialize_with = "parse_intbool")]
    pub is_archived: bool,

    #[serde(deserialize_with = "parse_intbool")]
    pub is_public: bool,

    #[serde(deserialize_with = "parse_intbool")]
    pub is_starred: bool,
    pub language: Option<String>,
    pub mimetype: Option<String>,
    pub origin_url: Option<String>,
    pub preview_picture: Option<String>,
    pub published_at: Option<DateTime<Utc>>,
    pub published_by: Option<Vec<Option<String>>>,
    pub reading_time: u32,
    pub starred_at: Option<DateTime<Utc>>,
    pub tags: Tags,
    pub title: Option<String>,
    pub uid: Option<String>,
    pub updated_at: DateTime<Utc>,
    pub url: Option<String>,
    pub user_email: String,
    pub user_id: ID,
    pub user_name: String,
}

/// This is implemented so that an Entry can be used interchangeably with an ID
/// for some client methods. For convenience.
impl From<Entry> for ID {
    fn from(entry: Entry) -> Self {
        entry.id
    }
}

/// This is implemented so that an &Entry can be used interchangeably with an ID
/// for some client methods. For convenience.
impl From<&Entry> for ID {
    fn from(entry: &Entry) -> Self {
        entry.id
    }
}

/// Internal struct for retrieving a list of entries from the api when
/// paginated.
#[derive(Deserialize, Debug)]
pub(crate) struct PaginatedEntries {
    pub limit: u32,
    pub page: u32,
    pub pages: u32,
    pub total: u32,
    #[serde(rename = "_embedded")]
    pub embedded: EmbeddedEntries,
}

/// Entries as stored in `PaginatedEntries`.
#[derive(Deserialize, Debug)]
pub(crate) struct EmbeddedEntries {
    pub items: Entries,
}

/// Represents a page of Entries returned. Includes both the payload and metadata about the page.
#[derive(Debug)]
pub struct EntriesPage {
    /// Number of entries returned per page. This is set by the server; useful to know if you're
    /// accepting the server default because this will inform what the server default is.
    pub per_page: u32,

    /// The current page number of results.
    pub current_page: u32,

    /// Total number of pages in the set.
    pub total_pages: u32,

    /// Total number of entries in the query set.
    pub total_entries: u32,

    /// The list of entries returned.
    pub entries: Entries,
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_entry_header_with_string_value() {
        let text = r###"{
            "is_archived": 1,
            "is_starred": 0,
            "user_name": "sibben",
            "user_email": "detlef@posteo.org",
            "user_id": 15568,
            "tags": [],
            "is_public": false,
            "id": 10849650,
            "uid": null,
            "title": "Klettenwurzel\u00f6l",
            "url": "https:\/\/oelerini.com\/klettenwurzeloel",
            "hashed_url": "baff1dd17cb2cc15578cb9b6955971dfb8ada45a",
            "origin_url": null,
            "given_url": "https:\/\/oelerini.com\/klettenwurzeloel",
            "hashed_given_url": "baff1dd17cb2cc15578cb9b6955971dfb8ada45a",
            "archived_at": "2020-02-12T10:20:58+0100",
            "content": "Dummy content",
            "created_at": "2019-01-14T18:16:36+0100",
            "updated_at": "2020-02-12T10:20:58+0100",
            "published_at": null,
            "published_by": null,
            "starred_at": null,
            "annotations": [],
            "mimetype": "text\/html",
            "language": "de",
            "reading_time": 12,
            "domain_name": "oelerini.com",
            "preview_picture": "https:\/\/oelerini.com\/img\/klettenwurzeloel.jpg",
            "http_status": null,
            "headers": {
                "content-type": "text\/html"
            },
            "_links": {
                "self": {
                    "href": "\/api\/entries\/10849650"
                }
            }
        }
        "###;
        let entry: Entry = serde_json::from_str(&text).unwrap();
        assert_eq!(
            entry.headers,
            Some(HashMap::from([("content-type".into(), "text/html".into())]))
        );
    }

    #[test]
    fn test_entry_header_with_null_value() {
        let text = r###"{
            "is_archived": 1,
            "is_starred": 0,
            "user_name": "sibben",
            "user_email": "detlef@posteo.org",
            "user_id": 15568,
            "tags": [],
            "is_public": false,
            "id": 10849669,
            "uid": null,
            "title": "Erfahrungen beim Jurtenaufbau Es geht auch zu zweit!",
            "url": "https:\/\/www.jurte.com\/de\/berichte\/ammertal.html",
            "hashed_url": "f3c65c41ecef84d95e7a7afc12dbebdd04a8a471",
            "origin_url": null,
            "given_url": "https:\/\/www.jurte.com\/de\/berichte\/ammertal.html",
            "hashed_given_url": "f3c65c41ecef84d95e7a7afc12dbebdd04a8a471",
            "archived_at": "2020-02-12T10:20:59+0100",
            "content": "wallabag can't retrieve contents for this article. Please <a href=\"http:\/\/doc.wallabag.org\/en\/user\/errors_during_fetching.html#how-can-i-help-to-fix-that\">troubleshoot this issue<\/a>.",
            "created_at": "2018-08-23T22:28:58+0200",
            "updated_at": "2020-02-12T10:20:59+0100",
            "published_at": null,
            "published_by": null,
            "starred_at": null,
            "annotations": [],
            "mimetype": null,
            "language": null,
            "reading_time": 0,
            "domain_name": "www.jurte.com",
            "preview_picture": "https:\/\/www.jurte.com\/images\/ammertal\/aushub_192.jpg",
            "http_status": null,
            "headers": {
                "content-type": null
            },
            "_links": {
                "self": {
                    "href": "\/api\/entries\/10849669"
                }
            }
        }
        "###;
        let entry: Entry = serde_json::from_str(&text).unwrap();
        assert_eq!(entry.headers, Some(HashMap::from([])));
    }
}