pub mod chunker;
pub mod extractor;
pub mod types;
pub use types::{
ChunkInfo, CodeComplexity, CodeLocation, ContentHashedIndex, EmbeddingUnit, SearchResult,
SemanticPattern, UnitKind,
};
pub use types::{
CHUNK_OVERLAP_TOKENS, MAX_CODE_PREVIEW_TOKENS, MAX_EMBEDDING_TOKENS, SEMANTIC_PATTERNS,
};
pub use extractor::{extract_units, extract_units_from_file, extract_units_with_callgraph};
pub use extractor::{
clear_unit_cache, invalidate_unit_cache, invalidate_unit_cache_matching, unit_cache_stats,
};
pub use extractor::{count_tokens, truncate_to_tokens};
pub use extractor::{detect_code_complexity, detect_semantic_patterns};
pub use extractor::{chunk_unit, split_into_chunks};
pub use extractor::enrich_unit;
pub use extractor::{build_embedding_text, parse_identifier_to_words};
pub use chunker::{chunk_code, chunk_code_default, chunk_code_with_overlap, needs_chunking, Chunk};
pub use chunker::{
compare_tokenizer_counts, count_tokens_batch_tei, count_tokens_tei,
estimate_tokens_unicode_aware, get_tokenizer_type, set_tokenizer_type,
ChunkConfig, TokenizerType,
};
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_public_types_exported() {
fn _assert_types() {
let _: Option<EmbeddingUnit> = None;
let _: Option<SearchResult> = None;
let _: Option<UnitKind> = None;
let _: Option<CodeComplexity> = None;
let _: Option<ChunkInfo> = None;
let _: Option<SemanticPattern> = None;
let _: Option<ContentHashedIndex> = None;
let _: Option<CodeLocation> = None;
}
}
#[test]
fn test_constants_exported() {
assert!(MAX_EMBEDDING_TOKENS > 0);
assert!(MAX_CODE_PREVIEW_TOKENS > 0);
assert!(CHUNK_OVERLAP_TOKENS > 0);
assert!(!SEMANTIC_PATTERNS.is_empty());
}
#[test]
fn test_functions_exported() {
fn _assert_functions() {
let _ = extract_units as fn(&str, &str) -> crate::error::Result<Vec<EmbeddingUnit>>;
let _ = extract_units_from_file as fn(&str) -> crate::error::Result<Vec<EmbeddingUnit>>;
let _ = count_tokens as fn(&str) -> usize;
let _ = truncate_to_tokens as fn(&str, usize) -> String;
let _ = detect_semantic_patterns as fn(&str) -> Vec<String>;
let _ = detect_code_complexity as fn(&str) -> CodeComplexity;
let _ = split_into_chunks as fn(&str, usize, usize) -> Vec<ChunkInfo>;
let _ = chunk_unit as fn(&EmbeddingUnit) -> Vec<EmbeddingUnit>;
let _ = enrich_unit as fn(&mut EmbeddingUnit);
let _ = build_embedding_text as fn(&EmbeddingUnit) -> String;
let _ = parse_identifier_to_words as fn(&str) -> String;
}
}
#[test]
fn test_cache_functions_exported() {
fn _assert_cache_functions() {
let _ = clear_unit_cache as fn();
let _ = unit_cache_stats as fn() -> usize;
let _ = invalidate_unit_cache as fn(&std::path::Path);
}
}
#[test]
fn test_chunker_functions_exported() {
fn _assert_chunker() {
let _ = chunk_code as fn(&str, usize) -> Vec<Chunk>;
let _ = chunk_code_default as fn(&str) -> Vec<Chunk>;
let _ = chunk_code_with_overlap as fn(&str, usize, usize) -> Vec<Chunk>;
let _ = needs_chunking as fn(&str, usize) -> bool;
}
}
#[test]
fn test_chunk_type_exported() {
fn _assert_chunk_type() {
let _: Option<Chunk> = None;
}
}
}