1#![allow(clippy::manual_strip)]
2#![allow(clippy::needless_range_loop)]
3#![allow(clippy::if_same_then_else)]
4#![allow(clippy::cloned_ref_to_slice_refs)]
5#![allow(dead_code)]
6#![warn(missing_docs)]
110
111pub mod classification;
112pub mod cleansing;
113pub mod distance;
114pub mod domain_processors;
115pub mod embeddings;
116pub mod enhanced_vectorize;
117pub mod error;
118pub mod huggingface_compat;
119pub mod information_extraction;
120pub mod ml_integration;
121pub mod ml_sentiment;
122pub mod model_registry;
123pub mod multilingual;
124pub mod neural_architectures;
125pub mod parallel;
126pub mod performance;
127pub mod pos_tagging;
128pub mod preprocess;
129pub mod semantic_similarity;
130pub mod sentiment;
131pub mod simd_ops;
132pub mod sparse;
133pub mod sparse_vectorize;
134pub mod spelling;
135pub mod stemming;
136pub mod streaming;
137pub mod string_metrics;
138pub mod summarization;
139pub mod text_coordinator;
140pub mod text_statistics;
141pub mod token_filter;
142pub mod tokenize;
143pub mod topic_coherence;
144pub mod topic_modeling;
145pub mod transformer;
146pub mod utils;
147pub mod vectorize;
148pub mod visualization;
149pub mod vocabulary;
150pub mod weighted_distance;
151
152pub use classification::{
154 TextClassificationMetrics, TextClassificationPipeline, TextDataset, TextFeatureSelector,
155};
156pub use cleansing::{
157 expand_contractions, normalize_currencies, normalize_numbers, normalize_ordinals,
158 normalize_percentages, normalize_unicode, normalize_whitespace, remove_accents, replace_emails,
159 replace_urls, strip_html_tags, AdvancedTextCleaner,
160};
161pub use distance::{cosine_similarity, jaccard_similarity, levenshtein_distance};
162pub use domain_processors::{
163 Domain, DomainProcessorConfig, FinancialTextProcessor, LegalTextProcessor,
164 MedicalTextProcessor, NewsTextProcessor, PatentTextProcessor, ProcessedDomainText,
165 ScientificTextProcessor, SocialMediaTextProcessor, UnifiedDomainProcessor,
166};
167pub use embeddings::{Word2Vec, Word2VecAlgorithm, Word2VecConfig};
168pub use enhanced_vectorize::{EnhancedCountVectorizer, EnhancedTfidfVectorizer};
169pub use error::{Result, TextError};
170pub use huggingface_compat::{
171 ClassificationResult, FeatureExtractionPipeline, FillMaskPipeline, FillMaskResult,
172 FormatConverter, HfConfig, HfEncodedInput, HfHub, HfModelAdapter, HfPipeline, HfTokenizer,
173 HfTokenizerConfig, QuestionAnsweringPipeline, QuestionAnsweringResult,
174 TextClassificationPipeline as HfTextClassificationPipeline, ZeroShotClassificationPipeline,
175};
176pub use information_extraction::{
177 AdvancedExtractedInformation, AdvancedExtractionPipeline, ConfidenceScorer, CoreferenceChain,
178 CoreferenceMention, CoreferenceResolver, DocumentInformationExtractor, DocumentSummary, Entity,
179 EntityCluster, EntityLinker, EntityType, Event, ExtractedInformation,
180 InformationExtractionPipeline, KeyPhraseExtractor, KnowledgeBaseEntry, LinkedEntity,
181 MentionType, PatternExtractor, Relation, RelationExtractor, RuleBasedNER,
182 StructuredDocumentInformation, TemporalExtractor, Topic,
183};
184pub use ml_integration::{
185 BatchTextProcessor, FeatureExtractionMode, MLTextPreprocessor, TextFeatures, TextMLPipeline,
186};
187pub use ml_sentiment::{
188 ClassMetrics, EvaluationMetrics, MLSentimentAnalyzer, MLSentimentConfig, TrainingMetrics,
189};
190pub use model_registry::{
191 ModelMetadata, ModelRegistry, ModelType, PrebuiltModels, RegistrableModel,
192 SerializableModelData,
193};
194pub use multilingual::{
195 Language, LanguageDetectionResult, LanguageDetector, MultilingualProcessor, ProcessedText,
196 StopWords,
197};
198pub use neural_architectures::{
199 ActivationFunction, AdditiveAttention, BiLSTM, CNNLSTMHybrid, Conv1D, CrossAttention, Dropout,
200 GRUCell, LSTMCell, LayerNorm as NeuralLayerNorm, MaxPool1D,
201 MultiHeadAttention as NeuralMultiHeadAttention, MultiScaleCNN, PositionwiseFeedForward,
202 ResidualBlock1D, SelfAttention, TextCNN,
203};
204pub use parallel::{
205 ParallelCorpusProcessor, ParallelTextProcessor, ParallelTokenizer, ParallelVectorizer,
206};
207pub use performance::{
208 AdvancedPerformanceMonitor, DetailedPerformanceReport, OptimizationRecommendation,
209 PerformanceSummary, PerformanceThresholds,
210};
211pub use pos_tagging::{
212 PosAwareLemmatizer, PosTagResult, PosTagger, PosTaggerConfig, PosTaggingResult,
213};
214pub use preprocess::{BasicNormalizer, BasicTextCleaner, TextCleaner, TextNormalizer};
215pub use semantic_similarity::{
216 LcsSimilarity, SemanticSimilarityEnsemble, SoftCosineSimilarity, WeightedJaccard,
217 WordMoversDistance,
218};
219pub use sentiment::{
220 LexiconSentimentAnalyzer, RuleBasedSentimentAnalyzer, Sentiment, SentimentLexicon,
221 SentimentResult, SentimentRules, SentimentWordCounts,
222};
223pub use simd_ops::{
224 AdvancedSIMDTextProcessor, SimdEditDistance, SimdStringOps, SimdTextAnalyzer,
225 TextProcessingResult,
226};
227pub use sparse::{CsrMatrix, DokMatrix, SparseMatrixBuilder, SparseVector};
228pub use sparse_vectorize::{
229 sparse_cosine_similarity, MemoryStats, SparseCountVectorizer, SparseTfidfVectorizer,
230};
231pub use spelling::{
232 DictionaryCorrector, DictionaryCorrectorConfig, EditOp, ErrorModel, NGramModel,
233 SpellingCorrector, StatisticalCorrector, StatisticalCorrectorConfig,
234};
235pub use stemming::{
236 LancasterStemmer, LemmatizerConfig, PorterStemmer, PosTag, RuleLemmatizer,
237 RuleLemmatizerBuilder, SimpleLemmatizer, SnowballStemmer, Stemmer,
238};
239pub use streaming::{
240 AdvancedStreamingMetrics, AdvancedStreamingProcessor, ChunkedCorpusReader, MemoryMappedCorpus,
241 ProgressTracker, StreamingTextProcessor, StreamingVectorizer,
242};
243pub use string_metrics::{
244 AlignmentResult, DamerauLevenshteinMetric, Metaphone, NeedlemanWunsch, Nysiis,
245 PhoneticAlgorithm, SmithWaterman, Soundex, StringMetric,
246};
247pub use summarization::{CentroidSummarizer, KeywordExtractor, TextRank};
248pub use text_coordinator::{
249 AdvancedBatchClassificationResult, AdvancedSemanticSimilarityResult, AdvancedTextConfig,
250 AdvancedTextCoordinator, AdvancedTextResult, AdvancedTopicModelingResult,
251};
252pub use text_statistics::{ReadabilityMetrics, TextMetrics, TextStatistics};
253pub use token_filter::{
254 CompositeFilter, CustomFilter, FrequencyFilter, LengthFilter, RegexFilter, StopwordsFilter,
255 TokenFilter,
256};
257pub use tokenize::{
258 bpe::{BpeConfig, BpeTokenizer, BpeVocabulary},
259 CharacterTokenizer, NgramTokenizer, RegexTokenizer, SentenceTokenizer, Tokenizer,
260 WhitespaceTokenizer, WordTokenizer,
261};
262pub use topic_coherence::{TopicCoherence, TopicDiversity};
263pub use topic_modeling::{
264 LatentDirichletAllocation, LdaBuilder, LdaConfig, LdaLearningMethod, Topic as LdaTopic,
265};
266pub use transformer::{
267 FeedForward, LayerNorm, MultiHeadAttention, PositionalEncoding, TokenEmbedding,
268 TransformerConfig, TransformerDecoder, TransformerDecoderLayer, TransformerEncoder,
269 TransformerEncoderLayer, TransformerModel,
270};
271pub use vectorize::{CountVectorizer, TfidfVectorizer, Vectorizer};
272pub use visualization::{
273 AttentionVisualizer, Color, ColorScheme, EmbeddingVisualizer, SentimentVisualizer,
274 TextAnalyticsDashboard, TopicVisualizer, VisualizationConfig, WordCloud,
275};
276pub use vocabulary::Vocabulary;
277pub use weighted_distance::{
278 DamerauLevenshteinWeights, LevenshteinWeights, WeightedDamerauLevenshtein, WeightedLevenshtein,
279 WeightedStringMetric,
280};