[package]
edition = "2024"
rust-version = "1.88"
name = "omniparse"
version = "0.4.1"
authors = ["Chris Olson"]
build = false
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "A Rust toolkit for detecting and extracting metadata, text, and content from various file formats"
readme = "README.md"
keywords = [
"parser",
"extraction",
"pdf",
"ocr",
"metadata",
]
categories = [
"parsing",
"text-processing",
]
license = "MIT OR Apache-2.0"
repository = "https://github.com/sirhco/omniparse"
[features]
async = ["tokio"]
default = [
"pdf",
"markdown",
"svg",
"webp",
"epub",
"mp3",
]
epub = ["dep:rbook"]
markdown = ["dep:pulldown-cmark"]
mp3 = ["dep:id3"]
ocr = [
"dep:imageproc",
"dep:symspell",
"dep:log",
]
ocr-ml = [
"ocr",
"dep:ocrs",
"dep:rten",
"dep:rten-imageproc",
"dep:dirs",
"dep:ureq",
"dep:sha2",
]
ocr-parallel = [
"ocr",
"parallel",
]
ocr-train = [
"ocr",
"dep:ab_glyph",
]
parallel = ["rayon"]
pdf = [
"dep:lopdf",
"dep:weezl",
"dep:ascii85",
]
pdf-extract = [
"pdf",
"dep:pdf-extract",
]
svg = []
webp = []
[lib]
name = "omniparse"
path = "src/lib.rs"
[[bin]]
name = "omniparse"
path = "src/main.rs"
[[example]]
name = "async_extraction"
path = "examples/async_extraction.rs"
[[example]]
name = "basic_extraction"
path = "examples/basic_extraction.rs"
[[example]]
name = "batch_processing"
path = "examples/batch_processing.rs"
[[example]]
name = "create_large_test_fixtures"
path = "examples/create_large_test_fixtures.rs"
[[example]]
name = "create_large_test_fixtures_v2"
path = "examples/create_large_test_fixtures_v2.rs"
[[example]]
name = "create_ocr_fixtures"
path = "examples/create_ocr_fixtures.rs"
[[example]]
name = "create_phase3_fixtures"
path = "examples/create_phase3_fixtures.rs"
[[example]]
name = "create_test_fixtures"
path = "examples/create_test_fixtures.rs"
[[example]]
name = "css_extraction"
path = "examples/css_extraction.rs"
[[example]]
name = "custom_parser"
path = "examples/custom_parser.rs"
[[example]]
name = "html_extraction"
path = "examples/html_extraction.rs"
[[example]]
name = "legacy_office_extraction"
path = "examples/legacy_office_extraction.rs"
[[example]]
name = "ocr_basic"
path = "examples/ocr_basic.rs"
[[example]]
name = "ocr_validate"
path = "examples/ocr_validate.rs"
[[example]]
name = "presentation_extraction"
path = "examples/presentation_extraction.rs"
[[example]]
name = "rtf_extraction"
path = "examples/rtf_extraction.rs"
[[example]]
name = "spreadsheet_extraction"
path = "examples/spreadsheet_extraction.rs"
[[example]]
name = "train_prototypes"
path = "examples/train_prototypes.rs"
[[example]]
name = "web_client"
path = "examples/web_client.rs"
[[example]]
name = "web_service"
path = "examples/web_service.rs"
[[example]]
name = "web_service_prod"
path = "examples/web_service_prod.rs"
[[test]]
name = "cli_test"
path = "tests/cli_test.rs"
[[test]]
name = "core_types_test"
path = "tests/core_types_test.rs"
[[test]]
name = "enhancements_test"
path = "tests/enhancements_test.rs"
[[test]]
name = "integration_test"
path = "tests/integration_test.rs"
[[test]]
name = "new_formats_test"
path = "tests/new_formats_test.rs"
[[test]]
name = "ocr_test"
path = "tests/ocr_test.rs"
[[test]]
name = "ocr_train_test"
path = "tests/ocr_train_test.rs"
[[test]]
name = "parser_registration_test"
path = "tests/parser_registration_test.rs"
[[test]]
name = "parser_tests"
path = "tests/parser_tests.rs"
[[test]]
name = "performance_test"
path = "tests/performance_test.rs"
[[test]]
name = "type_detection_test"
path = "tests/type_detection_test.rs"
[dependencies.ab_glyph]
version = "0.2"
optional = true
[dependencies.ascii85]
version = "0.2"
optional = true
[dependencies.calamine]
version = "0.24"
[dependencies.chrono]
version = "0.4"
features = ["serde"]
[dependencies.clap]
version = "4.5"
features = ["derive"]
[dependencies.cssparser]
version = "0.31"
[dependencies.csv]
version = "1.3"
[dependencies.dirs]
version = "5"
optional = true
[dependencies.flate2]
version = "1.0"
[dependencies.id3]
version = "1"
optional = true
[dependencies.image]
version = "0.24"
[dependencies.imageproc]
version = "0.23"
optional = true
[dependencies.kamadak-exif]
version = "0.5"
[dependencies.log]
version = "0.4"
optional = true
[dependencies.lopdf]
version = "0.32"
optional = true
[dependencies.ocrs]
version = "0.12"
optional = true
[dependencies.pdf-extract]
version = "0.7"
optional = true
[dependencies.pulldown-cmark]
version = "0.10"
optional = true
default-features = false
[dependencies.quick-xml]
version = "0.31"
[dependencies.rayon]
version = "1.10"
optional = true
[dependencies.rbook]
version = "0.7"
optional = true
default-features = false
[dependencies.rten]
version = "0.24"
optional = true
[dependencies.rten-imageproc]
version = "0.24"
optional = true
[dependencies.scraper]
version = "0.18"
[dependencies.serde]
version = "1.0"
features = ["derive"]
[dependencies.serde_json]
version = "1.0"
[dependencies.serde_yaml]
version = "0.9"
[dependencies.sha2]
version = "0.10"
optional = true
[dependencies.symspell]
version = "0.5"
optional = true
[dependencies.tar]
version = "0.4"
[dependencies.thiserror]
version = "1.0"
[dependencies.tokio]
version = "1.0"
features = [
"fs",
"io-util",
"macros",
"rt-multi-thread",
]
optional = true
[dependencies.ureq]
version = "2"
features = ["tls"]
optional = true
[dependencies.weezl]
version = "0.1"
optional = true
[dependencies.zip]
version = "0.6"
[dev-dependencies.assert_cmd]
version = "2.0"
[dev-dependencies.axum]
version = "0.7"
features = ["multipart"]
[dev-dependencies.futures]
version = "0.3"
[dev-dependencies.metrics]
version = "0.23"
[dev-dependencies.metrics-exporter-prometheus]
version = "0.15"
default-features = false
[dev-dependencies.num_cpus]
version = "1"
[dev-dependencies.predicates]
version = "3.0"
[dev-dependencies.reqwest]
version = "0.12"
features = [
"multipart",
"json",
"rustls-tls",
"blocking",
]
default-features = false
[dev-dependencies.serde_json]
version = "1"
[dev-dependencies.subtle]
version = "2"
[dev-dependencies.tempfile]
version = "3.8"
[dev-dependencies.tokio]
version = "1.0"
features = [
"full",
"signal",
]
[dev-dependencies.tower]
version = "0.5"
features = [
"limit",
"timeout",
"util",
]
[dev-dependencies.tower-http]
version = "0.6"
features = [
"trace",
"timeout",
"limit",
"cors",
"catch-panic",
"request-id",
"util",
]
[dev-dependencies.tracing]
version = "0.1"
[dev-dependencies.tracing-subscriber]
version = "0.3"
features = [
"env-filter",
"json",
]
[dev-dependencies.uuid]
version = "1"
features = ["v4"]