[package]
name = "transmutation"
version = "0.3.1"
edition = "2024"
authors = ["HiveLLM Team <team@hivellm.org>"]
license = "MIT"
description = "High-performance document conversion engine for AI/LLM embeddings - 27 formats supported"
repository = "https://github.com/hivellm/transmutation"
homepage = "https://hivellm.org/transmutation"
documentation = "https://docs.rs/transmutation"
keywords = ["document", "conversion", "pdf", "llm", "embedding"]
categories = ["parser-implementations", "text-processing", "multimedia"]
readme = "README.md"
rust-version = "1.85"
exclude = [
"data/*",
"docling_parse",
"docling-parse/",
"build_*",
"libs/",
"*.pdf",
"*.mp3",
"*.mp4",
".github/",
]
[package.metadata.wix]
upgrade-guid = "12345678-1234-1234-1234-123456789012"
path-guid = "87654321-4321-4321-4321-210987654321"
license = "wix/License.rtf"
eula = true
[lib]
name = "transmutation"
path = "src/lib.rs"
[[bin]]
name = "transmutation"
path = "src/bin/transmutation.rs"
required-features = ["cli"]
[dependencies]
tokio = { version = "1.47", features = ["full"] }
async-trait = "0.1"
futures = "0.3"
thiserror = "2.0"
anyhow = "1.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
walkdir = "2.5"
tempfile = "3.20"
mime = "0.3"
mime_guess = "2.0"
sha2 = "0.10"
blake3 = "1.5"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
tracing-opentelemetry = { version = "0.30", optional = true }
file-format = "0.26"
tesseract = { version = "0.15", optional = true }
leptess = { version = "0.14", optional = true }
image = { version = "0.25", features = ["png", "jpeg", "gif", "bmp", "tiff", "webp"] }
imageproc = "0.25"
ort = { version = "2.0.0-rc.10", optional = true, features = ["download-binaries"] }
ndarray = { version = "0.15", optional = true }
rstar = { version = "0.12", optional = true }
lopdf = "0.35"
pdf-extract = "0.7"
pdfium-render = { version = "0.8", optional = true }
scraper = "0.21"
html5ever = "0.29"
quick-xml = "0.37"
roxmltree = { version = "0.21", optional = true }
zip = "6.0"
tar = { version = "0.4", optional = true }
flate2 = { version = "1.0", optional = true }
sevenz-rust = { version = "0.6", optional = true }
docx-rs = { version = "0.4", optional = true }
umya-spreadsheet = { version = "2.3", optional = true }
pulldown-cmark = "0.13"
comrak = "0.29"
regex = "1.11"
once_cell = "1.20"
rayon = "1.10"
num_cpus = "1.16"
dirs = "5.0"
clap = { version = "4.5", features = ["derive", "cargo", "env"], optional = true }
indicatif = { version = "0.17", optional = true }
console = { version = "0.15", optional = true }
colored = { version = "2.2", optional = true }
[dev-dependencies]
criterion = { version = "0.6", features = ["async_tokio", "html_reports"] }
pretty_assertions = "1.4"
proptest = "1.6"
tempfile = "3.20"
mockall = "0.13"
tokio-test = "0.4"
[build-dependencies]
winres = "0.1"
[features]
default = ["office"]
tesseract = ["dep:tesseract", "dep:leptess"]
pdf-to-image = ["dep:pdfium-render"]
office = ["docx-rs", "umya-spreadsheet"]
image-ocr = ["tesseract"]
audio = []
video = []
archives-extended = ["tar", "flate2", "sevenz-rust"]
docling-ffi = ["dep:ort", "dep:ndarray", "dep:rstar", "dep:pdfium-render"]
cli = ["clap", "indicatif", "console", "colored"]
full = [
"pdf-to-image",
"office",
"image-ocr",
"audio",
"video",
"archives-extended",
"cli",
]
[profile.release]
opt-level = 3
lto = true
codegen-units = 1
strip = true
[profile.bench]
opt-level = 3
lto = true
[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs"]
[lints.clippy]
cast_lossless = "warn"
doc_link_with_quotes = "warn"
enum_glob_use = "warn"
explicit_into_iter_loop = "warn"
filter_map_next = "warn"
flat_map_option = "warn"
from_iter_instead_of_collect = "warn"
implicit_clone = "warn"
inconsistent_struct_constructor = "warn"
inefficient_to_string = "warn"
manual_is_variant_and = "warn"
manual_let_else = "warn"
needless_continue = "warn"
needless_raw_string_hashes = "warn"
ptr_as_ptr = "warn"
ref_option_ref = "warn"
uninlined_format_args = "warn"
unnecessary_wraps = "warn"
unused_self = "warn"
used_underscore_binding = "warn"
match_wildcard_for_single_variants = "warn"
needless_pass_by_ref_mut = "warn"
[lints.rust]
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }
[lints.rustdoc]
private_intra_doc_links = "allow"
[package.metadata.deb]
maintainer = "HiveLLM Team <team@hivellm.org>"
depends = "$auto"
license-file = ["LICENSE", "0"]
section = "text"
priority = "optional"
extended-description = """\
Transmutation is a high-performance document conversion engine for AI/LLM embeddings. \
Supports 27+ formats including PDF, Office docs, images, audio, and video. \
Built in Rust for maximum performance and safety.\
"""
assets = [
["target/release/transmutation", "usr/bin/", "755"],
["README.md", "usr/share/doc/transmutation/README", "644"],
["LICENSE", "usr/share/doc/transmutation/LICENSE", "644"],
]
conf-files = []
maintainer-scripts = "debian/"