txtfp 0.2.0

Text fingerprinting: MinHash + LSH, SimHash, and ONNX semantic embeddings
Documentation
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.

[package]
edition = "2024"
rust-version = "1.88"
name = "txtfp"
version = "0.2.0"
authors = ["bravo1goingdark <kumarashutosh34169@gmail.com>"]
build = false
exclude = [
    ".github/**",
    "benches/**",
    "examples/**",
    "tests/**",
    "*.sh",
]
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Text fingerprinting: MinHash + LSH, SimHash, and ONNX semantic embeddings"
homepage = "https://github.com/themankindproject/txtfp"
documentation = "https://docs.rs/txtfp"
readme = "README.md"
keywords = [
    "fingerprint",
    "minhash",
    "simhash",
    "lsh",
    "deduplication",
]
categories = [
    "text-processing",
    "algorithms",
]
license = "MIT"
repository = "https://github.com/themankindproject/txtfp"

[package.metadata.docs.rs]
all-features = true
rustdoc-args = [
    "--cfg",
    "docsrs",
]

[features]
cjk = [
    "std",
    "dep:jieba-rs",
]
cjk-japanese = [
    "cjk",
    "dep:lindera",
    "lindera/embed-ipadic",
]
cjk-korean = [
    "cjk",
    "dep:lindera",
    "lindera/embed-ko-dic",
]
cohere = [
    "semantic",
    "dep:reqwest",
    "dep:serde_json",
    "dep:tokio",
]
default = [
    "std",
    "minhash",
    "simhash",
    "lsh",
]
lsh = [
    "minhash",
    "dep:hashbrown",
    "dep:smallvec",
]
markup = [
    "std",
    "dep:html2text",
    "dep:pulldown-cmark",
]
minhash = []
openai = [
    "semantic",
    "dep:reqwest",
    "dep:serde_json",
    "dep:tokio",
]
parallel = [
    "std",
    "dep:rayon",
]
pdf = [
    "std",
    "dep:pdf-extract",
]
security = ["dep:unicode-security"]
semantic = [
    "std",
    "dep:ort",
    "dep:tokenizers",
    "dep:hf-hub",
    "dep:ndarray",
]
serde = ["dep:serde"]
simhash = []
std = [
    "thiserror/std",
    "unicode-normalization/std",
    "unicode-bidi/std",
]
tlsh = ["dep:tlsh2"]
voyage = [
    "semantic",
    "dep:reqwest",
    "dep:serde_json",
    "dep:tokio",
]

[lib]
name = "txtfp"
path = "src/lib.rs"

[dependencies.ahash]
version = "0.8.12"
default-features = false

[dependencies.blake3]
version = "1.8.5"
default-features = false

[dependencies.bytemuck]
version = "1.25.0"
features = ["derive"]

[dependencies.caseless]
version = "0.2.2"
default-features = false

[dependencies.hashbrown]
version = "0.17.0"
features = [
    "default-hasher",
    "inline-more",
]
optional = true
default-features = false

[dependencies.hf-hub]
version = "0.5.0"
features = [
    "ureq",
    "rustls-tls",
]
optional = true
default-features = false

[dependencies.html2text]
version = "0.17.1"
optional = true

[dependencies.jieba-rs]
version = "0.9"
optional = true

[dependencies.lindera]
version = "3.0"
optional = true
default-features = false

[dependencies.ndarray]
version = "0.16"
optional = true

[dependencies.ort]
version = "=2.0.0-rc.10"
features = [
    "std",
    "download-binaries",
    "ndarray",
]
optional = true
default-features = false

[dependencies.pdf-extract]
version = "0.10.0"
optional = true

[dependencies.pulldown-cmark]
version = "0.13.3"
optional = true
default-features = false

[dependencies.rayon]
version = "1.12.0"
optional = true

[dependencies.reqwest]
version = "0.13.2"
features = [
    "json",
    "blocking",
]
optional = true
default-features = false

[dependencies.serde]
version = "1.0.228"
features = [
    "derive",
    "alloc",
]
optional = true
default-features = false

[dependencies.serde_json]
version = "1.0.149"
optional = true

[dependencies.smallvec]
version = "1.15.1"
optional = true

[dependencies.thiserror]
version = "2.0.18"
default-features = false

[dependencies.tlsh2]
version = "1.1.0"
features = ["diff"]
optional = true

[dependencies.tokenizers]
version = "0.22.2"
features = ["onig"]
optional = true
default-features = false

[dependencies.tokio]
version = "1.52.1"
features = [
    "rt",
    "macros",
]
optional = true

[dependencies.unicode-bidi]
version = "0.3.18"
default-features = false

[dependencies.unicode-normalization]
version = "0.1.25"
default-features = false

[dependencies.unicode-security]
version = "0.1.2"
optional = true

[dependencies.unicode-segmentation]
version = "1.13.2"

[dependencies.wide]
version = "0.7"
default-features = false

[dependencies.xxhash-rust]
version = "0.8.15"
features = [
    "xxh3",
    "const_xxh3",
]
default-features = false

[dev-dependencies.criterion]
version = "0.5"
features = ["html_reports"]
default-features = false

[dev-dependencies.hex]
version = "0.4.3"

[dev-dependencies.mimalloc]
version = "0.1.50"
default-features = false

[dev-dependencies.proptest]
version = "1.11.0"

[dev-dependencies.serde_json]
version = "1.0.149"

[dev-dependencies.tempfile]
version = "3.27.0"

[profile.bench]
opt-level = 3
lto = "fat"
codegen-units = 1
debug = 0

[profile.release]
lto = "thin"
codegen-units = 1