[package]
edition = "2021"
rust-version = "1.70"
name = "libgrammstein"
version = "0.1.0"
authors = ["Dylon Edwards"]
build = false
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Hybrid language model (N-gram + Embeddings) for WFST text correction"
readme = "README.md"
keywords = [
"nlp",
"language-model",
"ngram",
"embeddings",
"wfst",
]
categories = [
"text-processing",
"science",
]
license = "Apache-2.0"
repository = "https://github.com/vinary-tree/libgrammstein"
[features]
acoustic = [
"dep:rustfft",
"dep:realfft",
]
async = [
"dep:tokio",
"dep:futures",
]
candle-model = [
"dep:candle-core",
"dep:candle-nn",
"acoustic",
]
cli = [
"http-corpus",
"subword",
"serde-extras",
"dep:clap",
"dep:indicatif",
"dep:console",
"dep:rustyline",
"dep:comfy-table",
"dep:humansize",
"dep:ctrlc",
"dep:zstd",
"dep:whatlang",
"dep:unic-langid",
"dep:env_logger",
"dep:serde_json",
"dep:shellexpand",
"dep:dirs",
]
code = [
"dep:tree-sitter",
"dep:petgraph",
"dep:walkdir",
]
code-full = [
"code-neural",
"code-mainstream",
]
code-javascript = [
"code",
"dep:tree-sitter-javascript",
]
code-mainstream = [
"code-python",
"code-rust",
"code-javascript",
]
code-neural = [
"code",
"neural-rescore",
"dep:ort",
]
code-python = [
"code",
"dep:tree-sitter-python",
]
code-rust = [
"code",
"dep:tree-sitter-rust",
]
default = []
google-books = [
"mimalloc-alloc",
"dep:reqwest",
"dep:tokio",
"dep:tokio-util",
"dep:tokio-stream",
"dep:futures",
"dep:async-compression",
"dep:async-stream",
"dep:async-channel",
"dep:flate2",
"dep:indicatif",
"dep:ratatui",
"dep:chrono",
"dep:serde_json",
"dep:tracing",
"dep:tracing-subscriber",
"dep:tracing-appender",
"dep:tracing-log",
"serde-extras",
]
gpu = [
"dep:wgpu",
"dep:bytemuck",
"dep:pollster",
]
http-corpus = [
"dep:ureq",
"dep:fs2",
"dep:tempfile",
]
language-full = [
"dep:whatlang",
"dep:unic-langid",
]
latex = []
latex-full = [
"latex-neural",
"latex-rag",
]
latex-neural = [
"latex",
"neural-rescore",
]
latex-ocr = [
"ocr",
"neural-rescore",
]
latex-rag = [
"latex",
"rag",
]
lling-llang-integration = ["dep:lling-llang"]
loom-tests = []
mimalloc-alloc = ["dep:mimalloc"]
ner = [
"dep:ort",
"dep:tokenizers",
]
neural-rescore = [
"dep:candle-core",
"dep:candle-nn",
"dep:candle-transformers",
"dep:tokenizers",
"dep:hf-hub",
"dep:serde_json",
]
ocr = [
"dep:tesseract-rs",
"dep:image",
"dep:pdf",
]
pdf-extraction = ["dep:serde_json"]
rag = [
"neural-rescore",
"dep:bytemuck",
"dep:serde_json",
"dep:bincode",
]
rag-hnsw = [
"rag",
"dep:hnsw_rs",
]
serde-extras = [
"dep:bincode",
"dep:chrono",
"liblevenshtein/serialization",
]
subword = ["dep:tokenizers"]
wfst-export = ["lling-llang-integration"]
[lib]
name = "libgrammstein"
path = "src/lib.rs"
[[bin]]
name = "compare_artries"
path = "src/bin/compare_artries.rs"
required-features = [
"cli",
"google-books",
]
[[bin]]
name = "dump_checkpoint"
path = "src/bin/dump_checkpoint.rs"
required-features = [
"cli",
"google-books",
]
[[bin]]
name = "grammstein"
path = "src/bin/grammstein.rs"
required-features = ["cli"]
[[test]]
name = "code_integration"
path = "tests/code_integration.rs"
[[test]]
name = "dump_checkpoint"
path = "tests/dump_checkpoint.rs"
[[test]]
name = "google_books_sharding_integration"
path = "tests/google_books_sharding_integration.rs"
[[test]]
name = "loom_formal_alignment"
path = "tests/loom_formal_alignment.rs"
[[bench]]
name = "checkpoint_ops"
path = "benches/checkpoint_ops.rs"
harness = false
required-features = ["google-books"]
[[bench]]
name = "embedding_similarity"
path = "benches/embedding_similarity.rs"
harness = false
[[bench]]
name = "hash_comparison"
path = "benches/hash_comparison.rs"
harness = false
[[bench]]
name = "mkn_frequency_counts"
path = "benches/mkn_frequency_counts.rs"
harness = false
required-features = ["google-books"]
[[bench]]
name = "ngram_query"
path = "benches/ngram_query.rs"
harness = false
[[bench]]
name = "overlay_eviction"
path = "benches/overlay_eviction.rs"
harness = false
required-features = ["google-books"]
[[bench]]
name = "training"
path = "benches/training.rs"
harness = false
[[bench]]
name = "varint_encoding"
path = "benches/varint_encoding.rs"
harness = false
[dependencies.arc-swap]
version = "1.7"
[dependencies.async-channel]
version = "2.3"
optional = true
[dependencies.async-compression]
version = "0.4"
features = [
"tokio",
"gzip",
]
optional = true
[dependencies.async-stream]
version = "0.3"
optional = true
[dependencies.bincode]
version = "1.3"
optional = true
[dependencies.bytemuck]
version = "1.14"
features = ["derive"]
optional = true
[dependencies.bzip2]
version = "0.5"
[dependencies.candle-core]
version = "0.8"
optional = true
[dependencies.candle-nn]
version = "0.8"
optional = true
[dependencies.candle-transformers]
version = "0.8"
optional = true
[dependencies.chrono]
version = "0.4"
features = ["serde"]
optional = true
[dependencies.clap]
version = "4.5"
features = [
"derive",
"env",
"wrap_help",
]
optional = true
[dependencies.comfy-table]
version = "7"
optional = true
[dependencies.console]
version = "0.15"
optional = true
[dependencies.crossbeam-channel]
version = "0.5"
[dependencies.ctrlc]
version = "3.4"
optional = true
[dependencies.dashmap]
version = "6.1"
[dependencies.dirs]
version = "5.0"
optional = true
[dependencies.env_logger]
version = "0.11"
optional = true
[dependencies.flate2]
version = "1.0"
optional = true
[dependencies.fs2]
version = "0.4"
optional = true
[dependencies.futures]
version = "0.3"
optional = true
[dependencies.gxhash]
version = "3.5"
[dependencies.hf-hub]
version = "0.3"
optional = true
[dependencies.hnsw_rs]
version = "0.3"
features = ["simdeez_f"]
optional = true
[dependencies.humansize]
version = "2"
optional = true
[dependencies.image]
version = "0.25"
optional = true
[dependencies.indicatif]
version = "0.17"
optional = true
[dependencies.lazy_static]
version = "1.4"
[dependencies.libdictenstein]
version = "0.1"
features = [
"persistent-artrie",
"parking_lot",
]
[dependencies.liblevenshtein]
version = "0.9"
features = [
"pathmap-backend",
"persistent-artrie",
"embedded-rules",
]
[dependencies.lling-llang]
version = "0.1"
features = ["lm-rerank"]
optional = true
[dependencies.log]
version = "0.4"
[dependencies.lru]
version = "0.12"
[dependencies.memmap2]
version = "0.9"
[dependencies.mimalloc]
version = "0.1"
optional = true
default-features = false
[dependencies.ndarray]
version = "0.16"
features = ["serde"]
[dependencies.ordered-float]
version = "4.0"
[dependencies.ort]
version = "2.0.0-rc.10"
optional = true
[dependencies.parking_lot]
version = "0.12"
[dependencies.pathmap]
version = "0.2"
[dependencies.pdf]
version = "0.9"
optional = true
[dependencies.petgraph]
version = "0.6"
optional = true
[dependencies.pollster]
version = "0.4"
optional = true
[dependencies.quick-xml]
version = "0.37"
[dependencies.rand]
version = "0.8"
[dependencies.ratatui]
version = "0.29"
features = ["crossterm"]
optional = true
default-features = false
[dependencies.rayon]
version = "1.10"
[dependencies.realfft]
version = "3.3"
optional = true
[dependencies.regex]
version = "1.11"
[dependencies.reqwest]
version = "0.12"
features = [
"stream",
"gzip",
]
optional = true
[dependencies.rustfft]
version = "6.2"
optional = true
[dependencies.rustyline]
version = "14"
optional = true
[dependencies.serde]
version = "1.0"
features = [
"derive",
"rc",
]
[dependencies.serde_json]
version = "1.0"
optional = true
[dependencies.shellexpand]
version = "3.1"
optional = true
[dependencies.smallvec]
version = "1.13"
[dependencies.sysinfo]
version = "0.32"
features = ["system"]
default-features = false
[dependencies.tempfile]
version = "3.14"
optional = true
[dependencies.tesseract-rs]
version = "0.1"
optional = true
[dependencies.thiserror]
version = "2.0"
[dependencies.tokenizers]
version = "0.20"
optional = true
[dependencies.tokio]
version = "1"
features = [
"rt-multi-thread",
"sync",
"time",
"signal",
"macros",
]
optional = true
[dependencies.tokio-stream]
version = "0.1"
features = ["io-util"]
optional = true
[dependencies.tokio-util]
version = "0.7"
features = ["io"]
optional = true
[dependencies.tracing]
version = "0.1"
optional = true
[dependencies.tracing-appender]
version = "0.2"
optional = true
[dependencies.tracing-log]
version = "0.2"
optional = true
[dependencies.tracing-subscriber]
version = "0.3"
features = [
"env-filter",
"fmt",
]
optional = true
[dependencies.tree-sitter]
version = "0.25"
optional = true
[dependencies.tree-sitter-javascript]
version = "0.23"
optional = true
[dependencies.tree-sitter-python]
version = "0.23"
optional = true
[dependencies.tree-sitter-rust]
version = "0.23"
optional = true
[dependencies.unic-langid]
version = "0.9"
optional = true
[dependencies.unicode-normalization]
version = "0.1"
[dependencies.unicode-segmentation]
version = "1"
[dependencies.ureq]
version = "2.9"
optional = true
[dependencies.walkdir]
version = "2.5"
optional = true
[dependencies.wgpu]
version = "23"
optional = true
[dependencies.whatlang]
version = "0.16"
optional = true
[dependencies.xxhash-rust]
version = "0.8"
features = ["xxh3"]
[dependencies.zstd]
version = "0.13"
optional = true
[dev-dependencies.criterion]
version = "0.5"
features = ["html_reports"]
[dev-dependencies.env_logger]
version = "0.11"
[dev-dependencies.loom]
version = "0.5.6"
[dev-dependencies.proptest]
version = "1.4"
[dev-dependencies.stream-vbyte]
version = "0.4"
[dev-dependencies.tempfile]
version = "3.14"
[dev-dependencies.tree-sitter-rust]
version = "0.23"
[dev-dependencies.varint-simd]
version = "0.3"
[dev-dependencies.vu128]
version = "1"
[dev-dependencies.wiremock]
version = "0.6"
[dev-dependencies.xxhash-rust]
version = "0.8"
features = ["xxh3"]