Skip to main content

papers_datalab/
lib.rs

1//! Async Rust client for the [DataLab Marker REST API](https://www.datalab.to).
2//!
3//! DataLab Marker converts PDF and other documents to markdown, HTML, JSON,
4//! or structured chunks using a cloud-based ML pipeline. Conversion is async:
5//! submit a job with [`DatalabClient::submit_marker`] and poll for the result
6//! with [`DatalabClient::get_marker_result`], or use the convenience method
7//! [`DatalabClient::convert_document`] which handles polling automatically.
8//!
9//! # Quick start
10//!
11//! ```no_run
12//! # async fn example() -> papers_datalab::Result<()> {
13//! use papers_datalab::{DatalabClient, MarkerRequest, OutputFormat, ProcessingMode};
14//!
15//! let client = DatalabClient::from_env()?;
16//! let pdf_bytes = std::fs::read("paper.pdf").unwrap();
17//!
18//! let result = client.convert_document(MarkerRequest {
19//!     file: Some(pdf_bytes),
20//!     filename: Some("paper.pdf".into()),
21//!     output_format: vec![OutputFormat::Markdown],
22//!     mode: ProcessingMode::Accurate,
23//!     ..Default::default()
24//! }).await?;
25//!
26//! println!("{}", result.markdown.unwrap_or_default());
27//! # Ok(())
28//! # }
29//! ```
30//!
31//! # Authentication
32//!
33//! Set the `DATALAB_API_KEY` environment variable, or pass the key directly
34//! to [`DatalabClient::new`].
35
36pub mod client;
37pub mod error;
38pub mod types;
39
40pub use client::DatalabClient;
41pub use error::{DatalabError, Result};
42pub use types::{
43    MarkerPollResponse, MarkerRequest, MarkerStatus, MarkerSubmitResponse, OutputFormat,
44    ProcessingMode, StepType, StepTypesResponse,
45};