List of all items
Structs
- FingerprintMetadata
- TlshFingerprint
- canonical::Canonicalizer
- canonical::CanonicalizerBuilder
- classical::lsh::LshIndex
- classical::lsh::LshIndexBuilder
- classical::minhash::MinHashFingerprinter
- classical::minhash::MinHashFingerprinterBuilder
- classical::minhash::MinHashSig
- classical::minhash::MinHashStreaming
- classical::simhash::IdfTable
- classical::simhash::SimHash64
- classical::simhash::SimHashFingerprinter
- classical::simhash::SimHashFingerprinterBuilder
- classical::simhash::SimHashStreaming
- classical::tlsh::TlshFingerprinter
- markup::MarkdownOptions
- pdf::PdfOptions
- semantic::ChunkingStrategy
- semantic::Embedding
- semantic::LocalProvider
- semantic::LocalProviderBuilder
- semantic::providers::cohere::CohereProvider
- semantic::providers::openai::OpenAiProvider
- semantic::providers::voyage::VoyageProvider
- tokenize::CjkTokenizer
- tokenize::GraphemeTokenizer
- tokenize::ShingleTokenizer
- tokenize::WordTokenizer
Enums
- Error
- Fingerprint
- canonical::CaseFold
- canonical::Normalization
- classical::HashFamily
- classical::simhash::Weighting
- semantic::ChunkMode
- semantic::Pooling
- tokenize::CjkSegmenter
- tokenize::TokenStream
Traits
- classical::Fingerprinter
- classical::StreamingFingerprinter
- semantic::EmbeddingProvider
- tokenize::Tokenizer
Functions
- canonical::canonicalize
- classical::minhash::jaccard
- classical::simhash::cosine_estimate
- classical::simhash::hamming
- classical::tlsh::tlsh_distance
- config_hash
- markup::html_to_text
- markup::markdown_to_text
- markup::markdown_to_text_with
- pdf::pdf_to_text
- pdf::pdf_to_text_with
- semantic::chunk_for_model
- semantic::semantic_similarity
Type Aliases
Constants
- FORMAT_VERSION
- UNCOMPUTED_CONFIG_HASH
- VERSION
- algo::EMBEDDING
- algo::MINHASH
- algo::MINHASH_128
- algo::SIMHASH_64
- algo::TLSH
- classical::minhash::DEFAULT_MAX_BUFFER_BYTES
- classical::minhash::SCHEMA_VERSION
- classical::simhash::SCHEMA_VERSION
- classical::tlsh::MIN_INPUT_BYTES
- pdf::DEFAULT_MAX_BYTES
- pdf::DEFAULT_TIMEOUT_SECS
- semantic::providers::cohere::DEFAULT_MODEL
- semantic::providers::openai::DEFAULT_MODEL
- semantic::providers::voyage::DEFAULT_MODEL