Skip to main content

graphrag_core/nlp/
mod.rs

1//! Advanced NLP Module
2//!
3//! This module provides advanced natural language processing capabilities:
4//! - Multilingual support with automatic language detection
5//! - Semantic chunking algorithms
6//! - Custom NER training pipeline
7//!
8//! ## Features
9//!
10//! ### Multilingual Support
11//! - Automatic language detection using n-gram analysis
12//! - Support for 10+ languages (English, Spanish, French, German, Chinese, Japanese, Korean, Arabic, Russian, Portuguese)
13//! - Language-specific text normalization and tokenization
14//!
15//! ### Semantic Chunking
16//! - Multiple chunking strategies (sentence, paragraph, topic, semantic, hybrid)
17//! - Intelligent boundary detection
18//! - Coherence scoring
19//! - Configurable chunk sizes and overlap
20//!
21//! ### Custom NER
22//! - Pattern-based entity extraction
23//! - Dictionary/gazetteer matching
24//! - Rule-based extraction with priorities
25//! - Training dataset management
26//! - Active learning support
27
28pub mod multilingual;
29pub mod semantic_chunking;
30pub mod custom_ner;
31pub mod syntax_analyzer;
32
33// Re-export main types
34pub use multilingual::{
35    Language, LanguageDetector, DetectionResult,
36    MultilingualProcessor, ProcessedText,
37};
38
39pub use semantic_chunking::{
40    ChunkingStrategy, ChunkingConfig, SemanticChunk,
41    SemanticChunker, ChunkingStats,
42};
43
44pub use custom_ner::{
45    EntityType, ExtractionRule, RuleType, CustomNER,
46    ExtractedEntity, TrainingDataset, AnnotatedExample,
47    DatasetStatistics,
48};
49
50pub use syntax_analyzer::{
51    POSTag, DependencyRelation, Token, Dependency, NounPhrase,
52    SyntaxAnalyzer, SyntaxAnalyzerConfig,
53};