[package]
edition = "2024"
name = "web2llm"
version = "0.3.0"
build = false
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Fetch web pages and convert to clean Markdown for LLM pipelines"
homepage = "https://github.com/Quippy22/web2llm/releases"
readme = "README.md"
keywords = [
"web",
"scraping",
"markdown",
"llm",
"rag",
]
categories = [
"web-programming",
"text-processing",
]
license = "MIT"
repository = "https://github.com/Quippy22/web2llm"
[features]
default = ["rendered"]
rendered = [
"dep:chromiumoxide",
"dep:tempfile",
]
[lib]
name = "web2llm"
path = "src/lib.rs"
[[example]]
name = "basic"
path = "examples/basic.rs"
[[example]]
name = "batch"
path = "examples/batch.rs"
[[example]]
name = "chunks"
path = "examples/chunks.rs"
[[example]]
name = "links"
path = "examples/links.rs"
[[test]]
name = "batch"
path = "tests/batch.rs"
[[test]]
name = "content_scoring"
path = "tests/content_scoring.rs"
[[test]]
name = "pipeline"
path = "tests/pipeline.rs"
[[bench]]
name = "extraction_bench"
path = "benchmarks/extraction_bench.rs"
harness = false
[dependencies.bumpalo]
version = "3.20.2"
features = ["collections"]
[dependencies.chromiumoxide]
version = "0.9.1"
optional = true
[dependencies.chrono]
version = "0.4.44"
features = ["clock"]
default-features = false
[dependencies.futures]
version = "0.3.32"
[dependencies.governor]
version = "0.10.4"
[dependencies.htmd]
version = "0.5.0"
[dependencies.reqwest]
version = "0.13"
features = ["rustls"]
default-features = false
[dependencies.scraper]
version = "0.25.0"
[dependencies.tempfile]
version = "3.10"
optional = true
[dependencies.texting_robots]
version = "0.2.2"
[dependencies.thiserror]
version = "2.0.18"
[dependencies.tokio]
version = "1.50.0"
features = [
"rt-multi-thread",
"macros",
"sync",
"time",
]
[dependencies.url]
version = "2.5.8"
[dev-dependencies.criterion]
version = "0.8.2"
features = [
"html_reports",
"async_tokio",
]
[dev-dependencies.tempfile]
version = "3.10"
[dev-dependencies.wiremock]
version = "0.6.5"
[profile.release]
lto = true
codegen-units = 1
panic = "abort"
strip = true