dragoman 0.2.9

DOI redirection and content negotiation server
use std::path::PathBuf;

use commonmeta::Data;
use rusqlite::{Connection, OpenFlags, params};

use crate::error::AppError;

#[cfg(test)]
pub(crate) const TEST_DDL: &str = r#"
CREATE TABLE works (
    "id"             TEXT PRIMARY KEY NOT NULL,
    "type"           TEXT NOT NULL DEFAULT '',
    "url"            TEXT NOT NULL DEFAULT '',
    "title"          TEXT NOT NULL DEFAULT '',
    "subjects"       TEXT NOT NULL DEFAULT '[]',
    "language"       TEXT NOT NULL DEFAULT '',
    "date_published" TEXT NOT NULL DEFAULT '',
    "date_updated"   TEXT NOT NULL DEFAULT '',
    "provider"       TEXT NOT NULL DEFAULT '',
    "metadata"       BLOB NOT NULL DEFAULT x''
)
"#;

fn connect(path: &PathBuf) -> Result<Connection, AppError> {
    Connection::open_with_flags(path, OpenFlags::SQLITE_OPEN_READ_ONLY)
        .map_err(|e| AppError::Internal(format!("sqlite open '{}': {e}", path.display())))
}

/// Validate that `path` can be opened as a SQLite database with a `works` table.
///
/// Returns `Ok(None)` when the file does not exist or contains no `works` table
/// (e.g. a commonmeta database used only for organisations or settings).
/// Returns `Err` only when the file exists but cannot be opened at all.
pub fn open(path: &std::path::Path) -> Result<Option<PathBuf>, AppError> {
    if !path.exists() {
        tracing::warn!(path = %path.display(), "sqlite file not found, running without local database");
        return Ok(None);
    }
    let path = path.to_path_buf();
    let conn = connect(&path)?;
    let has_works: bool = conn
        .query_row(
            "SELECT 1 FROM sqlite_master WHERE type='table' AND name='works'",
            [],
            |_| Ok(true),
        )
        .unwrap_or(false);
    if !has_works {
        tracing::info!(path = %path.display(), "sqlite database has no works table, running without local database");
        return Ok(None);
    }
    Ok(Some(path))
}

/// Return a random DOI `id` from the works table, or `None` if the table is empty.
pub fn random_doi(path: &PathBuf) -> Result<Option<String>, AppError> {
    let conn = connect(path)?;
    let result = conn.query_row(
        "SELECT id FROM works ORDER BY RANDOM() LIMIT 1",
        [],
        |row| row.get(0),
    );
    match result {
        Ok(id) => Ok(Some(id)),
        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
        Err(e) => Err(AppError::Internal(format!("sqlite random: {e}"))),
    }
}

/// Look up a single DOI in a commonmeta SQLite database.
pub fn lookup(path: &PathBuf, doi: &str) -> Result<Option<Data>, AppError> {
    let id = commonmeta::doi_utils::normalize_doi(doi);
    if id.is_empty() {
        return Ok(None);
    }

    let conn = connect(path)?;

    let result = conn.query_row(
        "SELECT metadata FROM works WHERE id = ?1",
        params![id],
        |row| row.get::<_, Vec<u8>>(0),
    );

    let blob = match result {
        Ok(b) => b,
        Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
        Err(e) => return Err(AppError::Internal(format!("sqlite query: {e}"))),
    };

    let json = zstd::decode_all(blob.as_slice())
        .map_err(|e| AppError::Internal(format!("zstd decompress: {e}")))?;
    let json_str = String::from_utf8(json)
        .map_err(|e| AppError::Internal(format!("metadata utf8: {e}")))?;

    commonmeta::read("commonmeta", &json_str)
        .map(Some)
        .map_err(|e| AppError::Internal(format!("metadata parse: {e}")))
}

#[cfg(test)]
pub(crate) mod tests {
    use super::*;
    use std::path::Path;

    pub(crate) fn make_test_db(path: &Path) -> PathBuf {
        let conn = Connection::open(path).expect("open test db");
        conn.execute_batch(TEST_DDL).expect("create schema");

        let data = commonmeta::Data {
            id: "https://doi.org/10.1234/test".to_string(),
            type_: "JournalArticle".to_string(),
            url: "https://example.com/test-article".to_string(),
            title: "Test Article on Content Negotiation".to_string(),
            date_published: "2024-01-15".to_string(),
            provider: "Crossref".to_string(),
            ..commonmeta::Data::default()
        };
        let json_bytes = commonmeta::write("commonmeta", &data).unwrap();
        let compressed = zstd::encode_all(json_bytes.as_slice(), 0).unwrap();

        conn.execute(
            r#"INSERT INTO works ("id","type","url","title","date_published","provider","metadata")
               VALUES (?1,?2,?3,?4,?5,?6,?7)"#,
            params![
                &data.id, &data.type_, &data.url, &data.title,
                &data.date_published, &data.provider, compressed,
            ],
        )
        .expect("insert test record");
        path.to_path_buf()
    }

    #[test]
    fn open_returns_none_for_missing_file() {
        let result = open(Path::new("/nonexistent/path/db.sqlite3"));
        assert!(matches!(result, Ok(None)), "expected Ok(None), got {result:?}");
    }

    #[test]
    fn open_returns_none_when_no_works_table() {
        let dir = tempfile::tempdir().unwrap();
        let path = dir.path().join("no_works.sqlite3");
        let conn = Connection::open(&path).unwrap();
        conn.execute_batch("CREATE TABLE organizations (id TEXT PRIMARY KEY, name TEXT);")
            .unwrap();
        drop(conn);
        let result = open(&path);
        assert!(matches!(result, Ok(None)), "expected Ok(None), got {result:?}");
    }

    #[test]
    fn lookup_returns_none_for_unknown_doi() {
        let dir = tempfile::tempdir().unwrap();
        let path = make_test_db(&dir.path().join("test.sqlite3"));
        let result = lookup(&path, "10.9999/does-not-exist").unwrap();
        assert!(result.is_none());
    }

    #[test]
    fn lookup_finds_existing_doi() {
        let dir = tempfile::tempdir().unwrap();
        let path = make_test_db(&dir.path().join("test.sqlite3"));
        let data = lookup(&path, "10.1234/test").unwrap().expect("should find DOI");
        assert_eq!(data.id, "https://doi.org/10.1234/test");
        assert_eq!(data.title, "Test Article on Content Negotiation");
        assert_eq!(data.url, "https://example.com/test-article");
        assert_eq!(data.type_, "JournalArticle");
    }

    #[test]
    fn lookup_normalises_doi_prefix_form() {
        let dir = tempfile::tempdir().unwrap();
        let path = make_test_db(&dir.path().join("test.sqlite3"));
        for doi in &[
            "10.1234/test",
            "https://doi.org/10.1234/test",
            "http://dx.doi.org/10.1234/test",
        ] {
            assert!(
                lookup(&path, doi).unwrap().is_some(),
                "should find DOI in form '{doi}'"
            );
        }
    }

    #[test]
    fn lookup_empty_string_returns_none() {
        let dir = tempfile::tempdir().unwrap();
        let path = make_test_db(&dir.path().join("test.sqlite3"));
        assert!(lookup(&path, "").unwrap().is_none());
    }
}