[package]
edition = "2024"
name = "wikipedia-article-transform"
version = "0.3.0"
build = false
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Transform Wikipedia articles in html to plaintext and markdown formats"
readme = "README.md"
keywords = [
"wikipedia",
"html",
"text-extraction",
"nlp",
"tree-sitter",
]
categories = [
"text-processing",
"parsing",
]
license = "MIT"
repository = "https://github.com/santhoshtr/wikipedia-article-transform"
[features]
cli = [
"dep:clap",
"dep:reqwest",
"dep:tokio",
]
default = []
web = [
"dep:actix-web",
"dep:reqwest",
"dep:tokio",
]
[lib]
name = "wikipedia_article_transform"
path = "src/lib.rs"
[[bin]]
name = "wikipedia-article-transform"
path = "src/main.rs"
required-features = ["cli"]
[[bin]]
name = "wikipedia-article-transform-web"
path = "src/web.rs"
required-features = ["web"]
[dependencies.actix-web]
version = "4"
optional = true
[dependencies.anyhow]
version = "1.0"
[dependencies.clap]
version = "4.5"
features = ["derive"]
optional = true
[dependencies.reqwest]
version = "0.13"
features = [
"json",
"stream",
"default-tls",
]
optional = true
default-features = false
[dependencies.serde]
version = "1.0"
features = ["derive"]
[dependencies.serde_json]
version = "1.0"
[dependencies.tokio]
version = "1"
features = [
"rt-multi-thread",
"macros",
]
optional = true
[dependencies.tree-sitter]
version = "0.26.6"
[dependencies.tree-sitter-html]
version = "0.23.2"