1pub mod convert;
41pub mod expr_context;
42pub mod format;
43pub mod formats;
44pub mod plugin;
45pub mod types;
46
47#[must_use]
51pub fn truncate_str(s: &str, max_chars: usize) -> String {
52 let char_count = s.chars().count();
53 if char_count <= max_chars {
54 s.to_string()
55 } else {
56 let truncated: String = s.chars().take(max_chars.saturating_sub(3)).collect();
57 format!("{}...", truncated)
58 }
59}
60
61pub use format::{Format, FormatInfo};
62pub use plugin::{PluginError, PluginLoadReport, PluginRegistry};
63pub use types::*;
64
65use formats::{
66 AngleFormat, ArchiveFormat, AreaFormat, AudioFormat, Base64Format, BinaryFormat,
67 BytesToIntFormat, CharFormat, CidrFormat, ColorFormat, ConstantsFormat, CoordsFormat,
68 CuidFormat, CurrencyFormat, DataSizeFormat, DateTimeFormat, DecimalFormat, DurationFormat,
69 EnergyFormat, EpochFormat, EscapeFormat, ExprFormat, FontFormat, GraphFormat, HashFormat,
70 HexFormat, HexdumpFormat, ImageFormat, IpAddrFormat, IsbnFormat, JsonFormat, JwtFormat,
71 LengthFormat, MsgPackFormat, NanoIdFormat, NaturalDateFormat, OctalFormat, OfficeFormat,
72 PdfFormat, PermissionsFormat, PlistFormat, PressureFormat, ProtobufFormat, SpeedFormat,
73 TemperatureFormat, UlidFormat, UrlEncodingFormat, Utf8Format, UuidFormat, VideoFormat,
74 VolumeFormat, WeightFormat,
75};
76
77pub struct Formatorbit {
79 formats: Vec<Box<dyn Format>>,
80 config: Option<ConversionConfig>,
81 plugins: Option<PluginRegistry>,
82}
83
84impl Formatorbit {
85 #[must_use]
97 pub fn new() -> Self {
98 Self {
99 formats: Self::create_format_list(),
100 config: None,
101 plugins: None,
102 }
103 }
104
105 #[must_use]
107 pub fn with_config(config: ConversionConfig) -> Self {
108 Self {
109 formats: Self::create_format_list(),
110 config: Some(config),
111 plugins: None,
112 }
113 }
114
115 #[cfg(feature = "python")]
126 pub fn with_plugins() -> Result<(Self, PluginLoadReport), PluginError> {
127 let mut registry = PluginRegistry::new();
128 let report = registry.load_default()?;
129
130 expr_context::set_from_registry(®istry);
132
133 Self::register_plugin_currencies(®istry);
135
136 Ok((
137 Self {
138 formats: Self::create_format_list(),
139 config: None,
140 plugins: Some(registry),
141 },
142 report,
143 ))
144 }
145
146 #[cfg(feature = "python")]
148 fn register_plugin_currencies(registry: &PluginRegistry) {
149 use formats::currency_rates::{register_plugin_currency, PluginCurrencyInfo};
150
151 for currency in registry.currencies() {
152 if let Some((rate, base)) = currency.rate() {
153 register_plugin_currency(
154 currency.code(),
155 PluginCurrencyInfo {
156 rate,
157 base_currency: base,
158 symbol: currency.symbol().to_string(),
159 decimals: currency.decimals(),
160 },
161 );
162 }
163 }
164 }
165
166 #[must_use]
168 pub fn set_plugins(mut self, plugins: PluginRegistry) -> Self {
169 self.plugins = Some(plugins);
170 self
171 }
172
173 #[must_use]
175 pub fn plugins(&self) -> Option<&PluginRegistry> {
176 self.plugins.as_ref()
177 }
178
179 #[must_use]
181 pub fn set_config(mut self, config: ConversionConfig) -> Self {
182 self.config = Some(config);
183 self
184 }
185
186 #[must_use]
188 pub fn config(&self) -> Option<&ConversionConfig> {
189 self.config.as_ref()
190 }
191
192 fn create_format_list() -> Vec<Box<dyn Format>> {
194 vec![
195 Box::new(JwtFormat),
197 Box::new(UlidFormat),
198 Box::new(UuidFormat),
199 Box::new(IpAddrFormat),
200 Box::new(CidrFormat),
201 Box::new(CoordsFormat),
202 Box::new(ColorFormat),
203 Box::new(CharFormat),
204 Box::new(NaturalDateFormat),
205 Box::new(ConstantsFormat),
206 Box::new(PermissionsFormat),
207 Box::new(UrlEncodingFormat),
208 Box::new(IsbnFormat),
210 Box::new(CuidFormat),
211 Box::new(NanoIdFormat),
212 Box::new(HashFormat),
214 Box::new(HexFormat),
215 Box::new(BinaryFormat),
216 Box::new(OctalFormat),
217 Box::new(Base64Format),
218 Box::new(EpochFormat),
219 Box::new(DecimalFormat),
220 Box::new(DataSizeFormat),
221 Box::new(TemperatureFormat),
222 Box::new(LengthFormat),
224 Box::new(WeightFormat),
225 Box::new(VolumeFormat),
226 Box::new(SpeedFormat),
227 Box::new(PressureFormat),
228 Box::new(AngleFormat),
229 Box::new(AreaFormat),
230 Box::new(EnergyFormat),
231 Box::new(CurrencyFormat),
232 Box::new(ExprFormat),
233 Box::new(EscapeFormat),
234 Box::new(DurationFormat),
235 Box::new(DateTimeFormat),
236 Box::new(JsonFormat),
237 Box::new(GraphFormat),
238 Box::new(Utf8Format),
239 Box::new(BytesToIntFormat),
241 Box::new(HexdumpFormat),
242 Box::new(ImageFormat),
243 Box::new(MsgPackFormat),
244 Box::new(PlistFormat),
245 Box::new(ProtobufFormat),
246 Box::new(ArchiveFormat),
248 Box::new(AudioFormat),
249 Box::new(FontFormat),
250 Box::new(OfficeFormat),
251 Box::new(PdfFormat),
252 Box::new(VideoFormat),
253 ]
254 }
255
256 #[must_use]
273 pub fn interpret(&self, input: &str) -> Vec<Interpretation> {
274 let mut results = Vec::new();
275
276 for format in &self.formats {
278 if let Some(ref config) = self.config {
280 if config.blocking.is_format_blocked(format.id()) {
281 continue;
282 }
283 }
284 results.extend(format.parse(input));
285 }
286
287 if let Some(ref plugins) = self.plugins {
289 for decoder in plugins.decoders() {
290 if let Some(ref config) = self.config {
292 if config.blocking.is_format_blocked(decoder.id()) {
293 continue;
294 }
295 }
296 results.extend(decoder.parse(input));
297 }
298 }
299
300 results.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
302 results
303 }
304
305 #[must_use]
307 pub fn convert(&self, value: &CoreValue) -> Vec<Conversion> {
308 convert::find_all_conversions(&self.formats, value, None, None, self.config.as_ref())
309 }
310
311 #[must_use]
314 pub fn convert_excluding(&self, value: &CoreValue, source_format: &str) -> Vec<Conversion> {
315 #[allow(unused_mut)]
316 let mut conversions = convert::find_all_conversions(
317 &self.formats,
318 value,
319 Some(source_format),
320 Some(source_format),
321 self.config.as_ref(),
322 );
323
324 #[cfg(feature = "python")]
326 if let Some(ref plugins) = self.plugins {
327 conversions.extend(self.get_plugin_traits(value, source_format, plugins));
328 }
329
330 conversions
331 }
332
333 #[cfg(feature = "python")]
335 fn get_plugin_traits(
336 &self,
337 value: &CoreValue,
338 source_format: &str,
339 plugins: &PluginRegistry,
340 ) -> Vec<Conversion> {
341 use types::{ConversionKind, ConversionPriority, ConversionStep};
342
343 let mut traits = Vec::new();
344
345 let value_type = match value {
347 CoreValue::Int { .. } => "int",
348 CoreValue::Float(_) => "float",
349 CoreValue::String(_) => "string",
350 CoreValue::Bytes(_) => "bytes",
351 CoreValue::Bool(_) => "bool",
352 CoreValue::DateTime(_) => "datetime",
353 CoreValue::Json(_) => "json",
354 _ => "",
355 };
356
357 for trait_plugin in plugins.traits() {
358 let applies = trait_plugin.value_types().is_empty()
360 || trait_plugin.value_types().iter().any(|t| t == value_type);
361
362 if !applies {
363 continue;
364 }
365
366 if let Some(description) = trait_plugin.check(value) {
368 traits.push(Conversion {
369 value: value.clone(),
370 target_format: trait_plugin.id().to_string(),
371 display: description.clone(),
372 path: vec![source_format.to_string(), trait_plugin.id().to_string()],
373 is_lossy: false,
374 steps: vec![ConversionStep {
375 format: trait_plugin.id().to_string(),
376 value: value.clone(),
377 display: description,
378 }],
379 priority: ConversionPriority::Semantic,
380 kind: ConversionKind::Trait,
381 display_only: true,
382 hidden: false,
383 rich_display: vec![],
384 });
385 }
386 }
387
388 traits
389 }
390
391 #[must_use]
416 pub fn convert_all(&self, input: &str) -> Vec<ConversionResult> {
417 self.interpret(input)
418 .into_iter()
419 .map(|interp| {
420 let conversions = self.convert_excluding(&interp.value, &interp.source_format);
422 ConversionResult {
423 input: input.to_string(),
424 interpretation: interp,
425 conversions,
426 }
427 })
428 .collect()
429 }
430
431 #[must_use]
447 pub fn convert_bytes(&self, data: &[u8]) -> Vec<ConversionResult> {
448 self.convert_bytes_internal(data, &[])
449 }
450
451 #[must_use]
453 pub fn convert_bytes_filtered(
454 &self,
455 data: &[u8],
456 format_filter: &[String],
457 ) -> Vec<ConversionResult> {
458 self.convert_bytes_internal(data, format_filter)
459 }
460
461 fn convert_bytes_internal(
467 &self,
468 data: &[u8],
469 format_filter: &[String],
470 ) -> Vec<ConversionResult> {
471 use base64::Engine;
472
473 let base64_input = base64::engine::general_purpose::STANDARD.encode(data);
477
478 let mut interpretations = Vec::new();
479
480 let binary_formats = [
482 "image", "archive", "video", "audio", "font", "pdf", "office",
483 ];
484
485 for format in &self.formats {
486 if !format_filter.is_empty() {
488 let matches = format_filter.iter().any(|name| format.matches_name(name));
489 if !matches {
490 continue;
491 }
492 }
493
494 let is_binary_format = binary_formats
496 .iter()
497 .any(|&bf| format.id() == bf || format.aliases().contains(&bf));
498 if !is_binary_format {
499 continue;
500 }
501
502 if let Some(ref config) = self.config {
504 if config.blocking.is_format_blocked(format.id()) {
505 continue;
506 }
507 }
508
509 interpretations.extend(format.parse(&base64_input));
510 }
511
512 if interpretations.is_empty() {
514 interpretations.push(Interpretation {
515 value: CoreValue::Bytes(data.to_vec()),
516 source_format: "bytes".to_string(),
517 confidence: 1.0,
518 description: format!("{} bytes", data.len()),
519 rich_display: vec![],
520 });
521 }
522
523 interpretations.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
525
526 interpretations
528 .into_iter()
529 .map(|interp| {
530 let conversions = self.convert_excluding(&interp.value, &interp.source_format);
531 ConversionResult {
532 input: base64_input.clone(),
533 interpretation: interp,
534 conversions,
535 }
536 })
537 .collect()
538 }
539
540 #[must_use]
542 pub fn format_infos(&self) -> Vec<FormatInfo> {
543 self.formats.iter().map(|f| f.info()).collect()
544 }
545
546 #[must_use]
549 pub fn interpret_filtered(&self, input: &str, format_filter: &[String]) -> Vec<Interpretation> {
550 if format_filter.is_empty() {
551 return self.interpret(input);
552 }
553
554 let mut results = Vec::new();
555 for format in &self.formats {
556 let matches = format_filter.iter().any(|name| format.matches_name(name));
558 if matches {
559 results.extend(format.parse(input));
560 }
561 }
562 results.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
564 results
565 }
566
567 #[must_use]
569 pub fn convert_all_filtered(
570 &self,
571 input: &str,
572 format_filter: &[String],
573 ) -> Vec<ConversionResult> {
574 self.interpret_filtered(input, format_filter)
575 .into_iter()
576 .map(|interp| {
577 let conversions = self.convert_excluding(&interp.value, &interp.source_format);
579 ConversionResult {
580 input: input.to_string(),
581 interpretation: interp,
582 conversions,
583 }
584 })
585 .collect()
586 }
587
588 pub fn validate(&self, input: &str, format_name: &str) -> Option<String> {
595 for format in &self.formats {
596 if format.matches_name(format_name) {
597 return format.validate(input);
598 }
599 }
600 None
601 }
602
603 #[must_use]
605 pub fn is_valid_format(&self, name: &str) -> bool {
606 self.formats.iter().any(|f| f.matches_name(name))
607 }
608
609 #[must_use]
611 pub fn format_ids(&self) -> Vec<&'static str> {
612 self.formats.iter().map(|f| f.id()).collect()
613 }
614}
615
616impl Default for Formatorbit {
617 fn default() -> Self {
618 Self::new()
619 }
620}
621
622#[cfg(test)]
623mod tests {
624 use super::*;
625
626 #[test]
628 fn test_sha1_hash_interpretation() {
629 let forb = Formatorbit::new();
630 let results = forb.convert_all("da39a3ee5e6b4b0d3255bfef95601890afd80709");
632
633 let has_hash = results
634 .iter()
635 .any(|r| r.interpretation.source_format == "hash");
636
637 assert!(
638 has_hash,
639 "Expected 'hash' interpretation but got: {:?}",
640 results
641 .iter()
642 .map(|r| &r.interpretation.source_format)
643 .collect::<Vec<_>>()
644 );
645
646 let hash_result = results
648 .iter()
649 .find(|r| r.interpretation.source_format == "hash")
650 .unwrap();
651 assert!(hash_result.interpretation.description.contains("SHA-1"));
652 }
653
654 #[test]
656 fn test_words_not_parsed_as_geohash() {
657 let forb = Formatorbit::new();
658 let results = forb.convert_all("rustfmt");
660 let formats: Vec<_> = results
661 .iter()
662 .map(|r| &r.interpretation.source_format)
663 .collect();
664
665 assert!(
666 !formats.contains(&&"coords".to_string()),
667 "should NOT have coords interpretation (geohash parsing removed)"
668 );
669 assert!(
670 formats.contains(&&"text".to_string()),
671 "should have text interpretation"
672 );
673 }
674}