1#![allow(deprecated)]
2#![allow(clippy::manual_strip)]
3#![allow(clippy::needless_range_loop)]
4#![allow(clippy::if_same_then_else)]
5#![allow(clippy::cloned_ref_to_slice_refs)]
6#![allow(dead_code)]
7#![warn(missing_docs)]
111
112pub mod classification;
113pub mod cleansing;
114pub mod distance;
115pub mod domain_processors;
116pub mod embeddings;
117pub mod enhanced_vectorize;
118pub mod error;
119pub mod huggingface_compat;
120pub mod information_extraction;
121pub mod ml_integration;
122pub mod ml_sentiment;
123pub mod model_registry;
124pub mod multilingual;
125pub mod neural_architectures;
126pub mod parallel;
127pub mod performance;
128pub mod pos_tagging;
129pub mod preprocess;
130pub mod semantic_similarity;
131pub mod sentiment;
132pub mod simd_ops;
133pub mod sparse;
134pub mod sparse_vectorize;
135pub mod spelling;
136pub mod stemming;
137pub mod streaming;
138pub mod string_metrics;
139pub mod summarization;
140pub mod text_coordinator;
141pub mod text_statistics;
142pub mod token_filter;
143pub mod tokenize;
144pub mod topic_coherence;
145pub mod topic_modeling;
146pub mod transformer;
147pub mod utils;
148pub mod vectorize;
149pub mod visualization;
150pub mod vocabulary;
151pub mod weighted_distance;
152
153pub use classification::{
155 TextClassificationMetrics, TextClassificationPipeline, TextDataset, TextFeatureSelector,
156};
157pub use cleansing::{
158 expand_contractions, normalize_currencies, normalize_numbers, normalize_ordinals,
159 normalize_percentages, normalize_unicode, normalize_whitespace, remove_accents, replace_emails,
160 replace_urls, strip_html_tags, AdvancedTextCleaner,
161};
162pub use distance::{cosine_similarity, jaccard_similarity, levenshtein_distance};
163pub use domain_processors::{
164 Domain, DomainProcessorConfig, FinancialTextProcessor, LegalTextProcessor,
165 MedicalTextProcessor, NewsTextProcessor, PatentTextProcessor, ProcessedDomainText,
166 ScientificTextProcessor, SocialMediaTextProcessor, UnifiedDomainProcessor,
167};
168pub use embeddings::{Word2Vec, Word2VecAlgorithm, Word2VecConfig};
169pub use enhanced_vectorize::{EnhancedCountVectorizer, EnhancedTfidfVectorizer};
170pub use error::{Result, TextError};
171pub use huggingface_compat::{
172 ClassificationResult, FeatureExtractionPipeline, FillMaskPipeline, FillMaskResult,
173 FormatConverter, HfConfig, HfEncodedInput, HfHub, HfModelAdapter, HfPipeline, HfTokenizer,
174 HfTokenizerConfig, QuestionAnsweringPipeline, QuestionAnsweringResult,
175 TextClassificationPipeline as HfTextClassificationPipeline, ZeroShotClassificationPipeline,
176};
177pub use information_extraction::{
178 AdvancedExtractedInformation, AdvancedExtractionPipeline, ConfidenceScorer, CoreferenceChain,
179 CoreferenceMention, CoreferenceResolver, DocumentInformationExtractor, DocumentSummary, Entity,
180 EntityCluster, EntityLinker, EntityType, Event, ExtractedInformation,
181 InformationExtractionPipeline, KeyPhraseExtractor, KnowledgeBaseEntry, LinkedEntity,
182 MentionType, PatternExtractor, Relation, RelationExtractor, RuleBasedNER,
183 StructuredDocumentInformation, TemporalExtractor, Topic,
184};
185pub use ml_integration::{
186 BatchTextProcessor, FeatureExtractionMode, MLTextPreprocessor, TextFeatures, TextMLPipeline,
187};
188pub use ml_sentiment::{
189 ClassMetrics, EvaluationMetrics, MLSentimentAnalyzer, MLSentimentConfig, TrainingMetrics,
190};
191pub use model_registry::{
192 ModelMetadata, ModelRegistry, ModelType, PrebuiltModels, RegistrableModel,
193 SerializableModelData,
194};
195pub use multilingual::{
196 Language, LanguageDetectionResult, LanguageDetector, MultilingualProcessor, ProcessedText,
197 StopWords,
198};
199pub use neural_architectures::{
200 ActivationFunction, AdditiveAttention, BiLSTM, CNNLSTMHybrid, Conv1D, CrossAttention, Dropout,
201 GRUCell, LSTMCell, LayerNorm as NeuralLayerNorm, MaxPool1D,
202 MultiHeadAttention as NeuralMultiHeadAttention, MultiScaleCNN, PositionwiseFeedForward,
203 ResidualBlock1D, SelfAttention, TextCNN,
204};
205pub use parallel::{
206 ParallelCorpusProcessor, ParallelTextProcessor, ParallelTokenizer, ParallelVectorizer,
207};
208pub use performance::{
209 AdvancedPerformanceMonitor, DetailedPerformanceReport, OptimizationRecommendation,
210 PerformanceSummary, PerformanceThresholds,
211};
212pub use pos_tagging::{
213 PosAwareLemmatizer, PosTagResult, PosTagger, PosTaggerConfig, PosTaggingResult,
214};
215pub use preprocess::{BasicNormalizer, BasicTextCleaner, TextCleaner, TextNormalizer};
216pub use semantic_similarity::{
217 LcsSimilarity, SemanticSimilarityEnsemble, SoftCosineSimilarity, WeightedJaccard,
218 WordMoversDistance,
219};
220pub use sentiment::{
221 LexiconSentimentAnalyzer, RuleBasedSentimentAnalyzer, Sentiment, SentimentLexicon,
222 SentimentResult, SentimentRules, SentimentWordCounts,
223};
224pub use simd_ops::{
225 AdvancedSIMDTextProcessor, SimdEditDistance, SimdStringOps, SimdTextAnalyzer,
226 TextProcessingResult,
227};
228pub use sparse::{CsrMatrix, DokMatrix, SparseMatrixBuilder, SparseVector};
229pub use sparse_vectorize::{
230 sparse_cosine_similarity, MemoryStats, SparseCountVectorizer, SparseTfidfVectorizer,
231};
232pub use spelling::{
233 DictionaryCorrector, DictionaryCorrectorConfig, EditOp, ErrorModel, NGramModel,
234 SpellingCorrector, StatisticalCorrector, StatisticalCorrectorConfig,
235};
236pub use stemming::{
237 LancasterStemmer, LemmatizerConfig, PorterStemmer, PosTag, RuleLemmatizer,
238 RuleLemmatizerBuilder, SimpleLemmatizer, SnowballStemmer, Stemmer,
239};
240pub use streaming::{
241 AdvancedStreamingMetrics, AdvancedStreamingProcessor, ChunkedCorpusReader, MemoryMappedCorpus,
242 ProgressTracker, StreamingTextProcessor, StreamingVectorizer,
243};
244pub use string_metrics::{
245 AlignmentResult, DamerauLevenshteinMetric, Metaphone, NeedlemanWunsch, Nysiis,
246 PhoneticAlgorithm, SmithWaterman, Soundex, StringMetric,
247};
248pub use summarization::{CentroidSummarizer, KeywordExtractor, TextRank};
249pub use text_coordinator::{
250 AdvancedBatchClassificationResult, AdvancedSemanticSimilarityResult, AdvancedTextConfig,
251 AdvancedTextCoordinator, AdvancedTextResult, AdvancedTopicModelingResult,
252};
253pub use text_statistics::{ReadabilityMetrics, TextMetrics, TextStatistics};
254pub use token_filter::{
255 CompositeFilter, CustomFilter, FrequencyFilter, LengthFilter, RegexFilter, StopwordsFilter,
256 TokenFilter,
257};
258pub use tokenize::{
259 bpe::{BpeConfig, BpeTokenizer, BpeVocabulary},
260 CharacterTokenizer, NgramTokenizer, RegexTokenizer, SentenceTokenizer, Tokenizer,
261 WhitespaceTokenizer, WordTokenizer,
262};
263pub use topic_coherence::{TopicCoherence, TopicDiversity};
264pub use topic_modeling::{
265 LatentDirichletAllocation, LdaBuilder, LdaConfig, LdaLearningMethod, Topic as LdaTopic,
266};
267pub use transformer::{
268 FeedForward, LayerNorm, MultiHeadAttention, PositionalEncoding, TokenEmbedding,
269 TransformerConfig, TransformerDecoder, TransformerDecoderLayer, TransformerEncoder,
270 TransformerEncoderLayer, TransformerModel,
271};
272pub use vectorize::{CountVectorizer, TfidfVectorizer, Vectorizer};
273pub use visualization::{
274 AttentionVisualizer, Color, ColorScheme, EmbeddingVisualizer, SentimentVisualizer,
275 TextAnalyticsDashboard, TopicVisualizer, VisualizationConfig, WordCloud,
276};
277pub use vocabulary::Vocabulary;
278pub use weighted_distance::{
279 DamerauLevenshteinWeights, LevenshteinWeights, WeightedDamerauLevenshtein, WeightedLevenshtein,
280 WeightedStringMetric,
281};