1#![allow(clippy::too_many_arguments)]
24#![allow(clippy::module_inception)]
25#![allow(clippy::large_enum_variant)]
26#![allow(dead_code)]
28pub mod analysis;
31pub mod convenience;
32pub mod datasets;
33pub mod embeddings;
34pub mod generation;
35pub mod models;
37pub mod prelude;
38pub mod scirs2_ops;
39pub mod scirs2_text_integration; pub mod tokenization;
41pub mod utils;
42pub mod vocab;
43
44#[cfg(test)]
45mod test_utils;
46
47pub use analysis::*;
48pub use convenience::*;
49pub use datasets::*;
50pub use embeddings::*;
51pub use generation::{
52 BeamHypothesis, BeamSearchDecoder, GenerationConfig as TextGenerationConfig,
53 NGramRepetitionFilter, RepetitionPenalty, TextGenerator, TextSampler,
54};
55pub use models::*;
57pub use scirs2_ops::advanced_analytics::{
58 compute_advanced_stats, AdvancedTextSampler, AdvancedTextStats, ComplexityAnalyzer,
59 ComplexityMetrics,
60};
61pub use scirs2_ops::performance::{PerformanceMetrics, PerformanceMonitor};
62pub use scirs2_ops::*;
63pub use scirs2_text_integration::{
64 advanced_ops::{cluster_documents, extract_topics, paraphrase_text},
65 ClassificationResult, ClusterResult, DeviceType as TextDeviceType, EntityType,
66 LanguageDetection, LanguageModel, NamedEntity, PrecisionLevel, SciRS2TextProcessor,
67 SentimentLabel, SentimentResult, TextConfig, TextEmbeddings, Topic,
68};
69pub use tokenization::*;
70#[allow(deprecated)]
73pub use utils::{
74 clean_text, count_words, label_encode, normalize_text, one_hot_encode,
75 pad_and_truncate_sequences, pad_sequence, split_sentences, truncate_sequence, BatchProcessor,
76 BatchTextStats, CustomStep, MaxLengthTruncateStep, MinLengthFilterStep, OptimizedBatchOps,
77 PaddingStrategy, PreprocessingStats, PreprocessingUtils, RemoveExtraWhitespaceStep,
78 StreamingBatchProcessor, TextAugmenter, TextCleaner, TextNormalizer, TextPreprocessingPipeline,
79 TruncationStrategy,
80};
81pub use vocab::*;
82
83pub const VERSION: &str = env!("CARGO_PKG_VERSION");
85pub const VERSION_MAJOR: u32 = 0;
86pub const VERSION_MINOR: u32 = 1;
87pub const VERSION_PATCH: u32 = 0;
88
89#[derive(Debug, thiserror::Error)]
90pub enum TextError {
91 #[error("Tokenization error: {0}")]
92 TokenizationError(String),
93
94 #[error("Model error: {0}")]
95 ModelError(String),
96
97 #[error("Vocabulary error: {0}")]
98 VocabError(String),
99
100 #[error("Dataset error: {0}")]
101 DatasetError(String),
102
103 #[error("Validation error: {0}")]
104 ValidationError(String),
105
106 #[error("Empty input provided where non-empty input is required")]
107 EmptyInput,
108
109 #[error("Invalid parameter: {parameter} = {value}, expected {expected}")]
110 InvalidParameter {
111 parameter: String,
112 value: String,
113 expected: String,
114 },
115
116 #[error("Processing failed for {item}: {reason}")]
117 ProcessingError { item: String, reason: String },
118
119 #[error("Configuration error: {0}")]
120 ConfigurationError(String),
121
122 #[error("IO error: {0}")]
123 IoError(#[from] std::io::Error),
124
125 #[error("Tensor error: {0}")]
126 TensorError(#[from] torsh_core::TorshError),
127
128 #[error("Other error: {0}")]
129 Other(#[from] anyhow::Error),
130}
131
132pub type Result<T> = std::result::Result<T, TextError>;
133
134impl From<TextError> for torsh_core::TorshError {
135 fn from(error: TextError) -> Self {
136 torsh_core::TorshError::Other(error.to_string())
137 }
138}