content-extractor-rl 0.1.2

RL-based article extraction from HTML using Deep Q-Networks and heuristic fallback
Documentation
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.

[package]
edition = "2021"
name = "content-extractor-rl"
version = "0.1.2"
authors = ["Sandeep Singh Sandhu <sandeep.sandhu@gmx.com>"]
build = false
publish = true
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "RL-based article extraction from HTML using Deep Q-Networks and heuristic fallback"
homepage = "https://github.com/sandeepsandhu/content-extractor-rl"
documentation = "https://docs.rs/content-extractor-rl"
readme = "README.md"
keywords = [
    "article",
    "extraction",
    "html",
    "reinforcement",
    "nlp",
]
categories = [
    "text-processing",
    "web-programming",
    "science",
]
license = "MIT OR Apache-2.0"
repository = "https://github.com/sandeepsandhu/content-extractor-rl"

[features]
cuda = [
    "candle-core/cuda",
    "candle-nn/cuda",
]
default = []
mlflow-rs = [
    "trs-mlflow",
    "tokio",
    "futures",
]

[lib]
name = "content_extractor_rl"
path = "src/lib.rs"

[[test]]
name = "algorithm_save_load_tests"
path = "tests/algorithm_save_load_tests.rs"

[[test]]
name = "integration_tests"
path = "tests/integration_tests.rs"

[[test]]
name = "model_integration_tests"
path = "tests/model_integration_tests.rs"

[[test]]
name = "ppo_shape_test"
path = "tests/ppo_shape_test.rs"

[[test]]
name = "sac_agent_tests"
path = "tests/sac_agent_tests.rs"

[[bench]]
name = "concurrency_benchmark"
path = "benches/concurrency_benchmark.rs"
harness = false

[[bench]]
name = "extraction_benchmark"
path = "benches/extraction_benchmark.rs"
harness = false

[[bench]]
name = "memory_benchmark"
path = "benches/memory_benchmark.rs"
harness = false

[dependencies.anyhow]
version = "1.0.100"

[dependencies.bzip2]
version = "0.6.1"

[dependencies.candle-core]
version = "0.9.2"

[dependencies.candle-nn]
version = "0.9.2"

[dependencies.chrono]
version = "0.4.42"
features = ["serde"]

[dependencies.futures]
version = "0.3.31"
optional = true

[dependencies.html5ever]
version = "0.36.1"

[dependencies.indicatif]
version = "0.18.4"

[dependencies.lazy_static]
version = "1.5.0"

[dependencies.ndarray]
version = "0.17.1"

[dependencies.plotters]
version = "0.3.7"

[dependencies.rand]
version = "0.10"

[dependencies.rand_distr]
version = "0.6.0"

[dependencies.rayon]
version = "1.11.0"

[dependencies.regex]
version = "1.12.2"

[dependencies.safetensors]
version = "0.7.0"

[dependencies.scraper]
version = "0.25.0"

[dependencies.selectors]
version = "0.36.1"

[dependencies.serde]
version = "1.0.228"
features = ["derive"]

[dependencies.serde_json]
version = "1.0.147"

[dependencies.sha2]
version = "0.10"

[dependencies.tempfile]
version = "3.27.0"

[dependencies.thiserror]
version = "2.0.17"

[dependencies.tokio]
version = "1.48.0"
features = ["full"]
optional = true

[dependencies.tracing]
version = "0.1.44"

[dependencies.tract-core]
version = "0.22.0"
optional = true

[dependencies.tract-onnx]
version = "0.22.0"
optional = true

[dependencies.trs-mlflow]
version = "0.6.7"
optional = true

[dependencies.url]
version = "2.5.8"

[dependencies.walkdir]
version = "2.5.0"

[dev-dependencies.criterion]
version = "0.8.2"
features = ["html_reports"]

[dev-dependencies.tempfile]
version = "3.27.0"

[dev-dependencies.tokio]
version = "1.50.0"
features = ["full"]