Skip to main content

akribes_sdk/sub/
convert.rs

1use std::sync::Arc;
2
3use crate::client::{AkribesClient, Inner};
4use crate::error::Result;
5use crate::models::ConvertResult;
6
7/// Sub-client for document conversion via Docling. Obtained via [`AkribesClient::convert()`].
8#[derive(Clone, Debug)]
9pub struct ConvertClient {
10    pub(crate) inner: Arc<Inner>,
11}
12
13impl ConvertClient {
14    pub(crate) fn new(inner: Arc<Inner>) -> Self {
15        Self { inner }
16    }
17
18    fn c(&self) -> AkribesClient {
19        AkribesClient {
20            inner: Arc::clone(&self.inner),
21        }
22    }
23
24    /// Convert a document file to Markdown via the server's Docling integration.
25    ///
26    /// The server supports: PDF, DOCX, DOC, PPTX, XLSX, HTML, PNG, JPG, TIFF.
27    pub async fn convert_file(&self, filename: &str, data: Vec<u8>) -> Result<ConvertResult> {
28        let url = format!("{}/convert", self.inner.base_url);
29        self.post_convert(&url, filename, data).await
30    }
31
32    /// Project-scoped convert. Prefer this over [`convert_file`](Self::convert_file)
33    /// when uploading for a specific project so the server can enforce scope.
34    pub async fn convert_file_for_project(
35        &self,
36        project_id: i64,
37        filename: &str,
38        data: Vec<u8>,
39    ) -> Result<ConvertResult> {
40        let url = format!("{}/projects/{}/convert", self.inner.base_url, project_id);
41        self.post_convert(&url, filename, data).await
42    }
43
44    async fn post_convert(
45        &self,
46        url: &str,
47        filename: &str,
48        data: Vec<u8>,
49    ) -> Result<ConvertResult> {
50        let part = reqwest::multipart::Part::bytes(data)
51            .file_name(filename.to_string())
52            .mime_str(mime_for(filename))?;
53        let form = reqwest::multipart::Form::new().part("file", part);
54        self.c().post_multipart(url, form).await
55    }
56}
57
58fn mime_for(filename: &str) -> &'static str {
59    let ext = filename.rsplit('.').next().unwrap_or("").to_lowercase();
60    match ext.as_str() {
61        "pdf" => "application/pdf",
62        "docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
63        "doc" => "application/msword",
64        "pptx" => "application/vnd.openxmlformats-officedocument.presentationml.presentation",
65        "xlsx" => "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
66        "html" | "htm" => "text/html",
67        "png" => "image/png",
68        "jpg" | "jpeg" => "image/jpeg",
69        "tiff" | "tif" => "image/tiff",
70        _ => "application/octet-stream",
71    }
72}