julienne 0.1.0

Range-preserving Rust text chunkers for retrieval and embedding pipelines
Documentation
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.

[package]
edition = "2021"
name = "julienne"
version = "0.1.0"
build = false
exclude = [
    "/.agents",
    "/.claude",
    "/.pre-commit-config.yaml",
    "/AGENTS.md",
    "/CLAUDE.md",
    "/BENCHMARKS.md",
    "/Cargo.lock",
    "/check.sh",
    "/criterion",
    "/deny.toml",
    "/scripts",
]
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Range-preserving Rust text chunkers for retrieval and embedding pipelines"
homepage = "https://github.com/CaliLuke/julienne"
documentation = "https://docs.rs/julienne"
readme = "README.md"
keywords = [
    "text-splitting",
    "chunking",
    "embeddings",
    "rag",
    "nlp",
]
categories = [
    "text-processing",
    "algorithms",
]
license = "PostgreSQL"
repository = "https://github.com/CaliLuke/julienne"

[features]
async = []
code = [
    "dep:tree-sitter",
    "dep:tree-sitter-rust",
    "dep:tree-sitter-python",
]
default = []
tiktoken-rs = ["dep:tiktoken-rs"]
tokenizers = ["dep:tokenizers"]
unicode-segmentation = ["dep:unicode-segmentation"]

[lib]
name = "julienne"
path = "src/lib.rs"

[[example]]
name = "custom_embeddings"
path = "examples/custom_embeddings.rs"

[[example]]
name = "custom_sizers"
path = "examples/custom_sizers.rs"

[[test]]
name = "api_contracts"
path = "tests/api_contracts.rs"

[[test]]
name = "example_content"
path = "tests/example_content.rs"

[[test]]
name = "offsets"
path = "tests/offsets.rs"

[[test]]
name = "public_output_shapes"
path = "tests/public_output_shapes.rs"

[[test]]
name = "semantic_ollama"
path = "tests/semantic_ollama.rs"

[[test]]
name = "semantic_semchunk"
path = "tests/semantic_semchunk.rs"

[[test]]
name = "send_sync"
path = "tests/send_sync.rs"

[[test]]
name = "sizing"
path = "tests/sizing.rs"

[[test]]
name = "structure_chunkers"
path = "tests/structure_chunkers.rs"

[[bench]]
name = "splitters_bench"
path = "benches/splitters_bench.rs"
harness = false

[dependencies.regex]
version = "1"

[dependencies.tiktoken-rs]
version = "0.9"
optional = true

[dependencies.tokenizers]
version = "0.22"
features = ["fancy-regex"]
optional = true
default-features = false

[dependencies.tree-sitter]
version = "0.25"
optional = true

[dependencies.tree-sitter-python]
version = "0.25"
optional = true

[dependencies.tree-sitter-rust]
version = "0.24"
optional = true

[dependencies.unicode-segmentation]
version = "1.12"
optional = true

[dev-dependencies.criterion]
version = "0.5"

[dev-dependencies.reqwest]
version = "0.12"
features = [
    "blocking",
    "json",
    "rustls-tls",
]
default-features = false

[dev-dependencies.serde_json]
version = "1"

[dev-dependencies.tiktoken-rs]
version = "0.9"