1#![warn(missing_docs)]
7
8pub mod classification;
9pub mod cleansing;
10pub mod distance;
11pub mod embeddings;
12pub mod enhanced_vectorize;
13pub mod error;
14pub mod ml_integration;
15pub mod ml_sentiment;
16pub mod multilingual;
17pub mod parallel;
18pub mod preprocess;
19pub mod sentiment;
20pub mod spelling;
21pub mod stemming;
22pub mod string_metrics;
23pub mod summarization;
24pub mod text_statistics;
25pub mod token_filter;
26pub mod tokenize;
27pub mod topic_coherence;
28pub mod topic_modeling;
29pub mod utils;
30pub mod vectorize;
31pub mod vocabulary;
32pub mod weighted_distance;
33
34pub use classification::{
36 TextClassificationMetrics, TextClassificationPipeline, TextDataset, TextFeatureSelector,
37};
38pub use cleansing::{
39 expand_contractions, normalize_unicode, normalize_whitespace, remove_accents, replace_emails,
40 replace_urls, strip_html_tags, AdvancedTextCleaner,
41};
42pub use distance::{cosine_similarity, jaccard_similarity, levenshtein_distance};
43pub use embeddings::{Word2Vec, Word2VecAlgorithm, Word2VecConfig};
44pub use enhanced_vectorize::{EnhancedCountVectorizer, EnhancedTfidfVectorizer};
45pub use error::{Result, TextError};
46pub use ml_integration::{
47 BatchTextProcessor, FeatureExtractionMode, MLTextPreprocessor, TextFeatures, TextMLPipeline,
48};
49pub use ml_sentiment::{
50 ClassMetrics, EvaluationMetrics, MLSentimentAnalyzer, MLSentimentConfig, TrainingMetrics,
51};
52pub use multilingual::{
53 Language, LanguageDetectionResult, LanguageDetector, MultilingualProcessor, ProcessedText,
54 StopWords,
55};
56pub use parallel::{
57 ParallelCorpusProcessor, ParallelTextProcessor, ParallelTokenizer, ParallelVectorizer,
58};
59pub use preprocess::{BasicNormalizer, BasicTextCleaner, TextCleaner, TextNormalizer};
60pub use sentiment::{
61 LexiconSentimentAnalyzer, RuleBasedSentimentAnalyzer, Sentiment, SentimentLexicon,
62 SentimentResult, SentimentRules, SentimentWordCounts,
63};
64pub use spelling::{
65 DictionaryCorrector, DictionaryCorrectorConfig, EditOp, ErrorModel, NGramModel,
66 SpellingCorrector, StatisticalCorrector, StatisticalCorrectorConfig,
67};
68pub use stemming::{
69 LancasterStemmer, LemmatizerConfig, PorterStemmer, PosTag, RuleLemmatizer,
70 RuleLemmatizerBuilder, SimpleLemmatizer, SnowballStemmer, Stemmer,
71};
72pub use string_metrics::{
73 DamerauLevenshteinMetric, Metaphone, PhoneticAlgorithm, Soundex, StringMetric,
74};
75pub use summarization::{CentroidSummarizer, KeywordExtractor, TextRank};
76pub use text_statistics::{ReadabilityMetrics, TextMetrics, TextStatistics};
77pub use token_filter::{
78 CompositeFilter, CustomFilter, FrequencyFilter, LengthFilter, RegexFilter, StopwordsFilter,
79 TokenFilter,
80};
81pub use tokenize::{
82 bpe::{BpeConfig, BpeTokenizer, BpeVocabulary},
83 CharacterTokenizer, NgramTokenizer, RegexTokenizer, SentenceTokenizer, Tokenizer,
84 WhitespaceTokenizer, WordTokenizer,
85};
86pub use topic_coherence::{TopicCoherence, TopicDiversity};
87pub use topic_modeling::{
88 LatentDirichletAllocation, LdaBuilder, LdaConfig, LdaLearningMethod, Topic,
89};
90pub use vectorize::{CountVectorizer, TfidfVectorizer, Vectorizer};
91pub use vocabulary::Vocabulary;
92pub use weighted_distance::{
93 DamerauLevenshteinWeights, LevenshteinWeights, WeightedDamerauLevenshtein, WeightedLevenshtein,
94 WeightedStringMetric,
95};