1pub mod convert;
41pub mod expr_context;
42pub mod format;
43pub mod formats;
44pub mod plugin;
45pub mod types;
46
47#[must_use]
51pub fn truncate_str(s: &str, max_chars: usize) -> String {
52 let char_count = s.chars().count();
53 if char_count <= max_chars {
54 s.to_string()
55 } else {
56 let truncated: String = s.chars().take(max_chars.saturating_sub(3)).collect();
57 format!("{}...", truncated)
58 }
59}
60
61pub use format::{Format, FormatInfo};
62pub use plugin::{PluginError, PluginLoadReport, PluginRegistry};
63pub use types::*;
64
65use formats::{
66 AngleFormat, ArchiveFormat, AreaFormat, AudioFormat, Base64Format, BinaryFormat,
67 BytesToIntFormat, CharFormat, CidrFormat, ColorFormat, ConstantsFormat, CoordsFormat,
68 CuidFormat, CurrencyFormat, DataSizeFormat, DateTimeFormat, DecimalFormat, DurationFormat,
69 EnergyFormat, EpochFormat, EscapeFormat, ExprFormat, FontFormat, GraphFormat, HashFormat,
70 HexFormat, HexdumpFormat, ImageFormat, IpAddrFormat, IsbnFormat, JsonFormat, JwtFormat,
71 LengthFormat, MsgPackFormat, NanoIdFormat, NaturalDateFormat, OctalFormat, OfficeFormat,
72 PdfFormat, PermissionsFormat, PlistFormat, PressureFormat, ProtobufFormat, SpeedFormat,
73 TemperatureFormat, UlidFormat, UrlEncodingFormat, Utf8Format, UuidFormat, VideoFormat,
74 VolumeFormat, WeightFormat,
75};
76
77pub struct Formatorbit {
79 formats: Vec<Box<dyn Format>>,
80 config: Option<ConversionConfig>,
81 plugins: Option<PluginRegistry>,
82}
83
84impl Formatorbit {
85 #[must_use]
97 pub fn new() -> Self {
98 Self {
99 formats: Self::create_format_list(),
100 config: None,
101 plugins: None,
102 }
103 }
104
105 #[must_use]
107 pub fn with_config(config: ConversionConfig) -> Self {
108 Self {
109 formats: Self::create_format_list(),
110 config: Some(config),
111 plugins: None,
112 }
113 }
114
115 #[cfg(feature = "python")]
126 pub fn with_plugins() -> Result<(Self, PluginLoadReport), PluginError> {
127 let mut registry = PluginRegistry::new();
128 let report = registry.load_default()?;
129
130 expr_context::set_from_registry(®istry);
132
133 Self::register_plugin_currencies(®istry);
135
136 Ok((
137 Self {
138 formats: Self::create_format_list(),
139 config: None,
140 plugins: Some(registry),
141 },
142 report,
143 ))
144 }
145
146 #[cfg(feature = "python")]
148 fn register_plugin_currencies(registry: &PluginRegistry) {
149 use formats::currency_rates::{register_plugin_currency, PluginCurrencyInfo};
150
151 for currency in registry.currencies() {
152 if let Some((rate, base)) = currency.rate() {
153 register_plugin_currency(
154 currency.code(),
155 PluginCurrencyInfo {
156 rate,
157 base_currency: base,
158 symbol: currency.symbol().to_string(),
159 decimals: currency.decimals(),
160 },
161 );
162 }
163 }
164 }
165
166 #[must_use]
168 pub fn set_plugins(mut self, plugins: PluginRegistry) -> Self {
169 self.plugins = Some(plugins);
170 self
171 }
172
173 #[must_use]
175 pub fn plugins(&self) -> Option<&PluginRegistry> {
176 self.plugins.as_ref()
177 }
178
179 #[must_use]
181 pub fn set_config(mut self, config: ConversionConfig) -> Self {
182 self.config = Some(config);
183 self
184 }
185
186 #[must_use]
188 pub fn config(&self) -> Option<&ConversionConfig> {
189 self.config.as_ref()
190 }
191
192 fn create_format_list() -> Vec<Box<dyn Format>> {
194 vec![
195 Box::new(JwtFormat),
197 Box::new(UlidFormat),
198 Box::new(UuidFormat),
199 Box::new(IpAddrFormat),
200 Box::new(CidrFormat),
201 Box::new(CoordsFormat),
202 Box::new(ColorFormat),
203 Box::new(CharFormat),
204 Box::new(NaturalDateFormat),
205 Box::new(ConstantsFormat),
206 Box::new(PermissionsFormat),
207 Box::new(UrlEncodingFormat),
208 Box::new(IsbnFormat),
210 Box::new(CuidFormat),
211 Box::new(NanoIdFormat),
212 Box::new(HashFormat),
214 Box::new(HexFormat),
215 Box::new(BinaryFormat),
216 Box::new(OctalFormat),
217 Box::new(Base64Format),
218 Box::new(EpochFormat),
219 Box::new(DecimalFormat),
220 Box::new(DataSizeFormat),
221 Box::new(TemperatureFormat),
222 Box::new(LengthFormat),
224 Box::new(WeightFormat),
225 Box::new(VolumeFormat),
226 Box::new(SpeedFormat),
227 Box::new(PressureFormat),
228 Box::new(AngleFormat),
229 Box::new(AreaFormat),
230 Box::new(EnergyFormat),
231 Box::new(CurrencyFormat),
232 Box::new(ExprFormat),
233 Box::new(EscapeFormat),
234 Box::new(DurationFormat),
235 Box::new(DateTimeFormat),
236 Box::new(JsonFormat),
237 Box::new(GraphFormat),
238 Box::new(Utf8Format),
239 Box::new(BytesToIntFormat),
241 Box::new(HexdumpFormat),
242 Box::new(ImageFormat),
243 Box::new(MsgPackFormat),
244 Box::new(PlistFormat),
245 Box::new(ProtobufFormat),
246 Box::new(ArchiveFormat),
248 Box::new(AudioFormat),
249 Box::new(FontFormat),
250 Box::new(OfficeFormat),
251 Box::new(PdfFormat),
252 Box::new(VideoFormat),
253 ]
254 }
255
256 #[must_use]
273 pub fn interpret(&self, input: &str) -> Vec<Interpretation> {
274 let mut results = Vec::new();
275
276 for format in &self.formats {
278 if let Some(ref config) = self.config {
280 if config.blocking.is_format_blocked(format.id()) {
281 continue;
282 }
283 }
284 results.extend(format.parse(input));
285 }
286
287 if let Some(ref plugins) = self.plugins {
289 for decoder in plugins.decoders() {
290 if let Some(ref config) = self.config {
292 if config.blocking.is_format_blocked(decoder.id()) {
293 continue;
294 }
295 }
296 results.extend(decoder.parse(input));
297 }
298 }
299
300 results.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
302 results
303 }
304
305 #[must_use]
307 pub fn convert(&self, value: &CoreValue) -> Vec<Conversion> {
308 convert::find_all_conversions(&self.formats, value, None, None, self.config.as_ref())
309 }
310
311 #[must_use]
314 pub fn convert_excluding(&self, value: &CoreValue, source_format: &str) -> Vec<Conversion> {
315 #[allow(unused_mut)]
316 let mut conversions = convert::find_all_conversions(
317 &self.formats,
318 value,
319 Some(source_format),
320 Some(source_format),
321 self.config.as_ref(),
322 );
323
324 #[cfg(feature = "python")]
326 if let Some(ref plugins) = self.plugins {
327 conversions.extend(self.get_plugin_traits(value, source_format, plugins));
328 }
329
330 conversions
331 }
332
333 #[cfg(feature = "python")]
335 fn get_plugin_traits(
336 &self,
337 value: &CoreValue,
338 source_format: &str,
339 plugins: &PluginRegistry,
340 ) -> Vec<Conversion> {
341 use types::{ConversionKind, ConversionPriority, ConversionStep};
342
343 let mut traits = Vec::new();
344
345 let value_type = match value {
347 CoreValue::Int { .. } => "int",
348 CoreValue::Float(_) => "float",
349 CoreValue::String(_) => "string",
350 CoreValue::Bytes(_) => "bytes",
351 CoreValue::Bool(_) => "bool",
352 CoreValue::DateTime(_) => "datetime",
353 CoreValue::Json(_) => "json",
354 _ => "",
355 };
356
357 for trait_plugin in plugins.traits() {
358 let applies = trait_plugin.value_types().is_empty()
360 || trait_plugin.value_types().iter().any(|t| t == value_type);
361
362 if !applies {
363 continue;
364 }
365
366 if let Some(description) = trait_plugin.check(value) {
368 traits.push(Conversion {
369 value: value.clone(),
370 target_format: trait_plugin.id().to_string(),
371 display: description.clone(),
372 path: vec![source_format.to_string(), trait_plugin.id().to_string()],
373 is_lossy: false,
374 steps: vec![ConversionStep {
375 format: trait_plugin.id().to_string(),
376 value: value.clone(),
377 display: description,
378 }],
379 priority: ConversionPriority::Semantic,
380 kind: ConversionKind::Trait,
381 display_only: true,
382 hidden: false,
383 rich_display: vec![],
384 });
385 }
386 }
387
388 traits
389 }
390
391 #[must_use]
416 pub fn convert_all(&self, input: &str) -> Vec<ConversionResult> {
417 self.interpret(input)
418 .into_iter()
419 .map(|interp| {
420 let conversions = self.convert_excluding(&interp.value, &interp.source_format);
422 ConversionResult {
423 input: input.to_string(),
424 interpretation: interp,
425 conversions,
426 }
427 })
428 .collect()
429 }
430
431 #[must_use]
447 pub fn convert_bytes(&self, data: &[u8]) -> Vec<ConversionResult> {
448 self.convert_bytes_internal(data, &[])
449 }
450
451 #[must_use]
453 pub fn convert_bytes_filtered(
454 &self,
455 data: &[u8],
456 format_filter: &[String],
457 ) -> Vec<ConversionResult> {
458 self.convert_bytes_internal(data, format_filter)
459 }
460
461 fn convert_bytes_internal(
467 &self,
468 data: &[u8],
469 format_filter: &[String],
470 ) -> Vec<ConversionResult> {
471 use base64::Engine;
472
473 let base64_input = base64::engine::general_purpose::STANDARD.encode(data);
477
478 let mut interpretations = Vec::new();
479
480 let binary_formats = [
482 "image", "archive", "video", "audio", "font", "pdf", "office",
483 ];
484
485 for format in &self.formats {
486 if !format_filter.is_empty() {
488 let matches = format_filter.iter().any(|name| format.matches_name(name));
489 if !matches {
490 continue;
491 }
492 }
493
494 let is_binary_format = binary_formats
496 .iter()
497 .any(|&bf| format.id() == bf || format.aliases().contains(&bf));
498 if !is_binary_format {
499 continue;
500 }
501
502 if let Some(ref config) = self.config {
504 if config.blocking.is_format_blocked(format.id()) {
505 continue;
506 }
507 }
508
509 interpretations.extend(format.parse(&base64_input));
510 }
511
512 if interpretations.is_empty() {
514 interpretations.push(Interpretation {
515 value: CoreValue::Bytes(data.to_vec()),
516 source_format: "bytes".to_string(),
517 confidence: 1.0,
518 description: format!("{} bytes", data.len()),
519 rich_display: vec![],
520 });
521 }
522
523 interpretations.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
525
526 interpretations
528 .into_iter()
529 .map(|interp| {
530 let conversions = self.convert_excluding(&interp.value, &interp.source_format);
531 ConversionResult {
532 input: base64_input.clone(),
533 interpretation: interp,
534 conversions,
535 }
536 })
537 .collect()
538 }
539
540 #[must_use]
542 pub fn format_infos(&self) -> Vec<FormatInfo> {
543 self.formats.iter().map(|f| f.info()).collect()
544 }
545
546 #[must_use]
551 pub fn formats_with_validation(&self) -> Vec<FormatInfo> {
552 self.formats
553 .iter()
554 .map(|f| f.info())
555 .filter(|info| info.has_validation)
556 .collect()
557 }
558
559 #[must_use]
562 pub fn interpret_filtered(&self, input: &str, format_filter: &[String]) -> Vec<Interpretation> {
563 if format_filter.is_empty() {
564 return self.interpret(input);
565 }
566
567 let mut results = Vec::new();
568 for format in &self.formats {
569 let matches = format_filter.iter().any(|name| format.matches_name(name));
571 if matches {
572 results.extend(format.parse(input));
573 }
574 }
575 results.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
577 results
578 }
579
580 #[must_use]
582 pub fn convert_all_filtered(
583 &self,
584 input: &str,
585 format_filter: &[String],
586 ) -> Vec<ConversionResult> {
587 self.interpret_filtered(input, format_filter)
588 .into_iter()
589 .map(|interp| {
590 let conversions = self.convert_excluding(&interp.value, &interp.source_format);
592 ConversionResult {
593 input: input.to_string(),
594 interpretation: interp,
595 conversions,
596 }
597 })
598 .collect()
599 }
600
601 pub fn validate(&self, input: &str, format_name: &str) -> Option<String> {
608 for format in &self.formats {
609 if format.matches_name(format_name) {
610 return format.validate(input);
611 }
612 }
613 None
614 }
615
616 #[must_use]
618 pub fn is_valid_format(&self, name: &str) -> bool {
619 self.formats.iter().any(|f| f.matches_name(name))
620 }
621
622 #[must_use]
624 pub fn format_ids(&self) -> Vec<&'static str> {
625 self.formats.iter().map(|f| f.id()).collect()
626 }
627}
628
629impl Default for Formatorbit {
630 fn default() -> Self {
631 Self::new()
632 }
633}
634
635#[cfg(test)]
636mod tests {
637 use super::*;
638
639 #[test]
641 fn test_sha1_hash_interpretation() {
642 let forb = Formatorbit::new();
643 let results = forb.convert_all("da39a3ee5e6b4b0d3255bfef95601890afd80709");
645
646 let has_hash = results
647 .iter()
648 .any(|r| r.interpretation.source_format == "hash");
649
650 assert!(
651 has_hash,
652 "Expected 'hash' interpretation but got: {:?}",
653 results
654 .iter()
655 .map(|r| &r.interpretation.source_format)
656 .collect::<Vec<_>>()
657 );
658
659 let hash_result = results
661 .iter()
662 .find(|r| r.interpretation.source_format == "hash")
663 .unwrap();
664 assert!(hash_result.interpretation.description.contains("SHA-1"));
665 }
666
667 #[test]
669 fn test_words_not_parsed_as_geohash() {
670 let forb = Formatorbit::new();
671 let results = forb.convert_all("rustfmt");
673 let formats: Vec<_> = results
674 .iter()
675 .map(|r| &r.interpretation.source_format)
676 .collect();
677
678 assert!(
679 !formats.contains(&&"coords".to_string()),
680 "should NOT have coords interpretation (geohash parsing removed)"
681 );
682 assert!(
683 formats.contains(&&"text".to_string()),
684 "should have text interpretation"
685 );
686 }
687}