1#![allow(clippy::manual_strip)]
2#![allow(clippy::needless_range_loop)]
3#![allow(clippy::if_same_then_else)]
4#![allow(clippy::cloned_ref_to_slice_refs)]
5#![allow(dead_code)]
6#![warn(missing_docs)]
110
111pub mod classification;
112pub mod cleansing;
113pub mod distance;
114pub mod domain_processors;
115pub mod embeddings;
116pub mod enhanced_vectorize;
117pub mod error;
118pub mod huggingface_compat;
119pub mod information_extraction;
120pub mod ml_integration;
121pub mod ml_sentiment;
122pub mod model_registry;
123pub mod multilingual;
124pub mod neural_architectures;
125pub mod parallel;
126pub mod paraphrasing;
127pub mod performance;
128pub mod pos_tagging;
129pub mod preprocess;
130pub mod semantic_similarity;
131pub mod sentiment;
132pub mod simd_ops;
133pub mod sparse;
134pub mod sparse_vectorize;
135pub mod spelling;
136pub mod stemming;
137pub mod streaming;
138pub mod string_metrics;
139pub mod summarization;
140pub mod text_coordinator;
141pub mod text_statistics;
142pub mod token_filter;
143pub mod tokenize;
144pub mod topic_coherence;
145pub mod topic_modeling;
146pub mod transformer;
147pub mod utils;
148pub mod vectorize;
149pub mod visualization;
150pub mod vocabulary;
151pub mod weighted_distance;
152
153pub use classification::{
155 TextClassificationMetrics, TextClassificationPipeline, TextDataset, TextFeatureSelector,
156};
157pub use cleansing::{
158 expand_contractions, normalize_currencies, normalize_numbers, normalize_ordinals,
159 normalize_percentages, normalize_unicode, normalize_whitespace, remove_accents, replace_emails,
160 replace_urls, strip_html_tags, AdvancedTextCleaner,
161};
162pub use distance::{cosine_similarity, jaccard_similarity, levenshtein_distance};
163pub use domain_processors::{
164 Domain, DomainProcessorConfig, FinancialTextProcessor, LegalTextProcessor,
165 MedicalTextProcessor, NewsTextProcessor, PatentTextProcessor, ProcessedDomainText,
166 ScientificTextProcessor, SocialMediaTextProcessor, UnifiedDomainProcessor,
167};
168pub use embeddings::{Word2Vec, Word2VecAlgorithm, Word2VecConfig};
169pub use enhanced_vectorize::{EnhancedCountVectorizer, EnhancedTfidfVectorizer};
170pub use error::{Result, TextError};
171pub use huggingface_compat::{
172 ClassificationResult, FeatureExtractionPipeline, FillMaskPipeline, FillMaskResult,
173 FormatConverter, HfConfig, HfEncodedInput, HfHub, HfModelAdapter, HfPipeline, HfTokenizer,
174 HfTokenizerConfig, QuestionAnsweringPipeline, QuestionAnsweringResult,
175 TextClassificationPipeline as HfTextClassificationPipeline, ZeroShotClassificationPipeline,
176};
177pub use information_extraction::{
178 AdvancedExtractedInformation, AdvancedExtractionPipeline, ConfidenceScorer, CoreferenceChain,
179 CoreferenceMention, CoreferenceResolver, DocumentInformationExtractor, DocumentSummary, Entity,
180 EntityCluster, EntityLinker, EntityType, Event, ExtractedInformation,
181 InformationExtractionPipeline, KeyPhraseExtractor, KnowledgeBaseEntry, LinkedEntity,
182 MentionType, PatternExtractor, Relation, RelationExtractor, RuleBasedNER,
183 StructuredDocumentInformation, TemporalExtractor, Topic,
184};
185pub use ml_integration::{
186 BatchTextProcessor, FeatureExtractionMode, MLTextPreprocessor, TextFeatures, TextMLPipeline,
187};
188pub use ml_sentiment::{
189 ClassMetrics, EvaluationMetrics, MLSentimentAnalyzer, MLSentimentConfig, TrainingMetrics,
190};
191pub use model_registry::{
192 ModelMetadata, ModelRegistry, ModelType, PrebuiltModels, RegistrableModel,
193 SerializableModelData,
194};
195pub use multilingual::{
196 Language, LanguageDetectionResult, LanguageDetector, MultilingualProcessor, ProcessedText,
197 StopWords,
198};
199pub use neural_architectures::{
200 ActivationFunction, AdditiveAttention, BiLSTM, CNNLSTMHybrid, Conv1D, CrossAttention, Dropout,
201 GRUCell, LSTMCell, LayerNorm as NeuralLayerNorm, MaxPool1D,
202 MultiHeadAttention as NeuralMultiHeadAttention, MultiScaleCNN, PositionwiseFeedForward,
203 ResidualBlock1D, SelfAttention, TextCNN,
204};
205pub use parallel::{
206 ParallelCorpusProcessor, ParallelTextProcessor, ParallelTokenizer, ParallelVectorizer,
207};
208pub use paraphrasing::{ParaphraseConfig, ParaphraseResult, ParaphraseStrategy, Paraphraser};
209pub use performance::{
210 AdvancedPerformanceMonitor, DetailedPerformanceReport, OptimizationRecommendation,
211 PerformanceSummary, PerformanceThresholds,
212};
213pub use pos_tagging::{
214 PosAwareLemmatizer, PosTagResult, PosTagger, PosTaggerConfig, PosTaggingResult,
215};
216pub use preprocess::{BasicNormalizer, BasicTextCleaner, TextCleaner, TextNormalizer};
217pub use semantic_similarity::{
218 LcsSimilarity, SemanticSimilarityEnsemble, SoftCosineSimilarity, WeightedJaccard,
219 WordMoversDistance,
220};
221pub use sentiment::{
222 LexiconSentimentAnalyzer, RuleBasedSentimentAnalyzer, Sentiment, SentimentLexicon,
223 SentimentResult, SentimentRules, SentimentWordCounts,
224};
225pub use simd_ops::{
226 AdvancedSIMDTextProcessor, SimdEditDistance, SimdStringOps, SimdTextAnalyzer,
227 TextProcessingResult,
228};
229pub use sparse::{CsrMatrix, DokMatrix, SparseMatrixBuilder, SparseVector};
230pub use sparse_vectorize::{
231 sparse_cosine_similarity, MemoryStats, SparseCountVectorizer, SparseTfidfVectorizer,
232};
233pub use spelling::{
234 DictionaryCorrector, DictionaryCorrectorConfig, EditOp, ErrorModel, NGramModel,
235 SpellingCorrector, StatisticalCorrector, StatisticalCorrectorConfig,
236};
237pub use stemming::{
238 LancasterStemmer, LemmatizerConfig, PorterStemmer, PosTag, RuleLemmatizer,
239 RuleLemmatizerBuilder, SimpleLemmatizer, SnowballStemmer, Stemmer,
240};
241pub use streaming::{
242 AdvancedStreamingMetrics, AdvancedStreamingProcessor, ChunkedCorpusReader, MemoryMappedCorpus,
243 ProgressTracker, StreamingTextProcessor, StreamingVectorizer,
244};
245pub use string_metrics::{
246 AlignmentResult, DamerauLevenshteinMetric, Metaphone, NeedlemanWunsch, Nysiis,
247 PhoneticAlgorithm, SmithWaterman, Soundex, StringMetric,
248};
249pub use summarization::{CentroidSummarizer, KeywordExtractor, TextRank};
250pub use text_coordinator::{
251 AdvancedBatchClassificationResult, AdvancedSemanticSimilarityResult, AdvancedTextConfig,
252 AdvancedTextCoordinator, AdvancedTextResult, AdvancedTopicModelingResult,
253};
254pub use text_statistics::{ReadabilityMetrics, TextMetrics, TextStatistics};
255pub use token_filter::{
256 CompositeFilter, CustomFilter, FrequencyFilter, LengthFilter, RegexFilter, StopwordsFilter,
257 TokenFilter,
258};
259pub use tokenize::{
260 bpe::{BpeConfig, BpeTokenizer, BpeVocabulary},
261 CharacterTokenizer, NgramTokenizer, RegexTokenizer, SentenceTokenizer, Tokenizer,
262 WhitespaceTokenizer, WordTokenizer,
263};
264pub use topic_coherence::{TopicCoherence, TopicDiversity};
265pub use topic_modeling::{
266 LatentDirichletAllocation, LdaBuilder, LdaConfig, LdaLearningMethod, Topic as LdaTopic,
267};
268pub use transformer::{
269 FeedForward, LayerNorm, MultiHeadAttention, PositionalEncoding, TokenEmbedding,
270 TransformerConfig, TransformerDecoder, TransformerDecoderLayer, TransformerEncoder,
271 TransformerEncoderLayer, TransformerModel,
272};
273pub use vectorize::{CountVectorizer, TfidfVectorizer, Vectorizer};
274pub use visualization::{
275 AttentionVisualizer, Color, ColorScheme, EmbeddingVisualizer, SentimentVisualizer,
276 TextAnalyticsDashboard, TopicVisualizer, VisualizationConfig, WordCloud,
277};
278pub use vocabulary::Vocabulary;
279pub use weighted_distance::{
280 DamerauLevenshteinWeights, LevenshteinWeights, WeightedDamerauLevenshtein, WeightedLevenshtein,
281 WeightedStringMetric,
282};