formatorbit_core/
lib.rs

1//! Formatorbit Core
2//!
3//! A cross-platform data format converter. Input data (e.g., `691E01B8`) and
4//! get all possible interpretations and conversions automatically.
5//!
6//! # Quick Start
7//!
8//! ```
9//! use formatorbit_core::Formatorbit;
10//!
11//! let forb = Formatorbit::new();
12//!
13//! // Get all interpretations and conversions
14//! let results = forb.convert_all("691E01B8");
15//! assert!(!results.is_empty());
16//!
17//! // The highest-confidence interpretation is first
18//! let best = &results[0];
19//! println!("Format: {}", best.interpretation.source_format);
20//! println!("Confidence: {:.0}%", best.interpretation.confidence * 100.0);
21//!
22//! // Each interpretation has conversions to other formats
23//! for conv in &best.conversions[..3.min(best.conversions.len())] {
24//!     println!("  → {}: {}", conv.target_format, conv.display);
25//! }
26//! ```
27//!
28//! # Filtering by Format
29//!
30//! ```
31//! use formatorbit_core::Formatorbit;
32//!
33//! let forb = Formatorbit::new();
34//!
35//! // Force interpretation as a specific format
36//! let results = forb.convert_all_filtered("1703456789", &["epoch".into()]);
37//! assert_eq!(results[0].interpretation.source_format, "epoch-seconds");
38//! ```
39
40pub mod convert;
41pub mod expr_context;
42pub mod format;
43pub mod formats;
44pub mod plugin;
45pub mod types;
46
47/// Truncate a string to at most `max_chars` characters, appending "..." if truncated.
48///
49/// This is UTF-8 safe - it counts characters, not bytes.
50#[must_use]
51pub fn truncate_str(s: &str, max_chars: usize) -> String {
52    let char_count = s.chars().count();
53    if char_count <= max_chars {
54        s.to_string()
55    } else {
56        let truncated: String = s.chars().take(max_chars.saturating_sub(3)).collect();
57        format!("{}...", truncated)
58    }
59}
60
61pub use format::{Format, FormatInfo};
62pub use plugin::{PluginError, PluginLoadReport, PluginRegistry};
63pub use types::*;
64
65use formats::{
66    AngleFormat, ArchiveFormat, AreaFormat, AudioFormat, Base64Format, BinaryFormat,
67    BytesToIntFormat, CharFormat, CidrFormat, ColorFormat, ConstantsFormat, CoordsFormat,
68    CuidFormat, CurrencyFormat, DataSizeFormat, DateTimeFormat, DecimalFormat, DurationFormat,
69    EnergyFormat, EpochFormat, EscapeFormat, ExprFormat, FontFormat, GraphFormat, HashFormat,
70    HexFormat, HexdumpFormat, ImageFormat, IpAddrFormat, IsbnFormat, JsonFormat, JwtFormat,
71    LengthFormat, MsgPackFormat, NanoIdFormat, NaturalDateFormat, OctalFormat, OfficeFormat,
72    PdfFormat, PermissionsFormat, PlistFormat, PressureFormat, ProtobufFormat, SpeedFormat,
73    TemperatureFormat, UlidFormat, UrlEncodingFormat, Utf8Format, UuidFormat, VideoFormat,
74    VolumeFormat, WeightFormat,
75};
76
77/// Main entry point - a configured converter instance.
78pub struct Formatorbit {
79    formats: Vec<Box<dyn Format>>,
80    config: Option<ConversionConfig>,
81    plugins: Option<PluginRegistry>,
82}
83
84impl Formatorbit {
85    /// Create a new converter with all built-in formats.
86    ///
87    /// # Examples
88    ///
89    /// ```
90    /// use formatorbit_core::Formatorbit;
91    ///
92    /// let forb = Formatorbit::new();
93    /// let results = forb.convert_all("0xDEADBEEF");
94    /// assert!(!results.is_empty());
95    /// ```
96    #[must_use]
97    pub fn new() -> Self {
98        Self {
99            formats: Self::create_format_list(),
100            config: None,
101            plugins: None,
102        }
103    }
104
105    /// Create a new converter with custom configuration.
106    #[must_use]
107    pub fn with_config(config: ConversionConfig) -> Self {
108        Self {
109            formats: Self::create_format_list(),
110            config: Some(config),
111            plugins: None,
112        }
113    }
114
115    /// Create a new converter with plugins enabled.
116    ///
117    /// Loads plugins from `~/.config/forb/plugins/` and any additional
118    /// configured directories.
119    ///
120    /// # Errors
121    ///
122    /// Returns an error if the Python runtime fails to initialize.
123    /// Individual plugin load failures are logged but don't prevent
124    /// the converter from being created.
125    #[cfg(feature = "python")]
126    pub fn with_plugins() -> Result<(Self, PluginLoadReport), PluginError> {
127        let mut registry = PluginRegistry::new();
128        let report = registry.load_default()?;
129
130        // Set the global expression context for plugin variables/functions
131        expr_context::set_from_registry(&registry);
132
133        // Register plugin currencies with the rate cache
134        Self::register_plugin_currencies(&registry);
135
136        Ok((
137            Self {
138                formats: Self::create_format_list(),
139                config: None,
140                plugins: Some(registry),
141            },
142            report,
143        ))
144    }
145
146    /// Register plugin currencies with the rate cache.
147    #[cfg(feature = "python")]
148    fn register_plugin_currencies(registry: &PluginRegistry) {
149        use formats::currency_rates::{register_plugin_currency, PluginCurrencyInfo};
150
151        for currency in registry.currencies() {
152            if let Some((rate, base)) = currency.rate() {
153                register_plugin_currency(
154                    currency.code(),
155                    PluginCurrencyInfo {
156                        rate,
157                        base_currency: base,
158                        symbol: currency.symbol().to_string(),
159                        decimals: currency.decimals(),
160                    },
161                );
162            }
163        }
164    }
165
166    /// Set the plugin registry.
167    #[must_use]
168    pub fn set_plugins(mut self, plugins: PluginRegistry) -> Self {
169        self.plugins = Some(plugins);
170        self
171    }
172
173    /// Get the plugin registry (if any).
174    #[must_use]
175    pub fn plugins(&self) -> Option<&PluginRegistry> {
176        self.plugins.as_ref()
177    }
178
179    /// Set the configuration.
180    #[must_use]
181    pub fn set_config(mut self, config: ConversionConfig) -> Self {
182        self.config = Some(config);
183        self
184    }
185
186    /// Get the current configuration (if any).
187    #[must_use]
188    pub fn config(&self) -> Option<&ConversionConfig> {
189        self.config.as_ref()
190    }
191
192    /// Create the list of built-in formats.
193    fn create_format_list() -> Vec<Box<dyn Format>> {
194        vec![
195            // High-specificity formats first
196            Box::new(JwtFormat),
197            Box::new(UlidFormat),
198            Box::new(UuidFormat),
199            Box::new(IpAddrFormat),
200            Box::new(CidrFormat),
201            Box::new(CoordsFormat),
202            Box::new(ColorFormat),
203            Box::new(CharFormat),
204            Box::new(NaturalDateFormat),
205            Box::new(ConstantsFormat),
206            Box::new(PermissionsFormat),
207            Box::new(UrlEncodingFormat),
208            // Identifier formats (lower specificity)
209            Box::new(IsbnFormat),
210            Box::new(CuidFormat),
211            Box::new(NanoIdFormat),
212            // Common formats
213            Box::new(HashFormat),
214            Box::new(HexFormat),
215            Box::new(BinaryFormat),
216            Box::new(OctalFormat),
217            Box::new(Base64Format),
218            Box::new(EpochFormat),
219            Box::new(DecimalFormat),
220            Box::new(DataSizeFormat),
221            Box::new(TemperatureFormat),
222            // Unit conversions
223            Box::new(LengthFormat),
224            Box::new(WeightFormat),
225            Box::new(VolumeFormat),
226            Box::new(SpeedFormat),
227            Box::new(PressureFormat),
228            Box::new(AngleFormat),
229            Box::new(AreaFormat),
230            Box::new(EnergyFormat),
231            Box::new(CurrencyFormat),
232            Box::new(ExprFormat),
233            Box::new(EscapeFormat),
234            Box::new(DurationFormat),
235            Box::new(DateTimeFormat),
236            Box::new(JsonFormat),
237            Box::new(GraphFormat),
238            Box::new(Utf8Format),
239            // Conversion-only formats (don't parse strings directly)
240            Box::new(BytesToIntFormat),
241            Box::new(HexdumpFormat),
242            Box::new(ImageFormat),
243            Box::new(MsgPackFormat),
244            Box::new(PlistFormat),
245            Box::new(ProtobufFormat),
246            // Binary file metadata formats
247            Box::new(ArchiveFormat),
248            Box::new(AudioFormat),
249            Box::new(FontFormat),
250            Box::new(OfficeFormat),
251            Box::new(PdfFormat),
252            Box::new(VideoFormat),
253        ]
254    }
255
256    /// Parse input and return all possible interpretations.
257    ///
258    /// Returns interpretations sorted by confidence (highest first).
259    ///
260    /// # Examples
261    ///
262    /// ```
263    /// use formatorbit_core::Formatorbit;
264    ///
265    /// let forb = Formatorbit::new();
266    /// let interps = forb.interpret("550e8400-e29b-41d4-a716-446655440000");
267    ///
268    /// // UUID has high confidence due to its distinctive format
269    /// assert_eq!(interps[0].source_format, "uuid");
270    /// assert!(interps[0].confidence > 0.9);
271    /// ```
272    #[must_use]
273    pub fn interpret(&self, input: &str) -> Vec<Interpretation> {
274        let mut results = Vec::new();
275
276        // Built-in formats
277        for format in &self.formats {
278            // Skip blocked formats
279            if let Some(ref config) = self.config {
280                if config.blocking.is_format_blocked(format.id()) {
281                    continue;
282                }
283            }
284            results.extend(format.parse(input));
285        }
286
287        // Plugin decoders
288        if let Some(ref plugins) = self.plugins {
289            for decoder in plugins.decoders() {
290                // Skip blocked plugins
291                if let Some(ref config) = self.config {
292                    if config.blocking.is_format_blocked(decoder.id()) {
293                        continue;
294                    }
295                }
296                results.extend(decoder.parse(input));
297            }
298        }
299
300        // Sort by confidence, highest first
301        results.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
302        results
303    }
304
305    /// Find all possible conversions from a value.
306    #[must_use]
307    pub fn convert(&self, value: &CoreValue) -> Vec<Conversion> {
308        convert::find_all_conversions(&self.formats, value, None, None, self.config.as_ref())
309    }
310
311    /// Find all possible conversions, excluding the source format (to avoid hex→hex etc.)
312    /// The source_format is also included in the path to show the full conversion chain.
313    #[must_use]
314    pub fn convert_excluding(&self, value: &CoreValue, source_format: &str) -> Vec<Conversion> {
315        #[allow(unused_mut)]
316        let mut conversions = convert::find_all_conversions(
317            &self.formats,
318            value,
319            Some(source_format),
320            Some(source_format),
321            self.config.as_ref(),
322        );
323
324        // Add plugin traits
325        #[cfg(feature = "python")]
326        if let Some(ref plugins) = self.plugins {
327            conversions.extend(self.get_plugin_traits(value, source_format, plugins));
328        }
329
330        conversions
331    }
332
333    /// Get trait conversions from plugins.
334    #[cfg(feature = "python")]
335    fn get_plugin_traits(
336        &self,
337        value: &CoreValue,
338        source_format: &str,
339        plugins: &PluginRegistry,
340    ) -> Vec<Conversion> {
341        use types::{ConversionKind, ConversionPriority, ConversionStep};
342
343        let mut traits = Vec::new();
344
345        // Get the value type name for filtering
346        let value_type = match value {
347            CoreValue::Int { .. } => "int",
348            CoreValue::Float(_) => "float",
349            CoreValue::String(_) => "string",
350            CoreValue::Bytes(_) => "bytes",
351            CoreValue::Bool(_) => "bool",
352            CoreValue::DateTime(_) => "datetime",
353            CoreValue::Json(_) => "json",
354            _ => "",
355        };
356
357        for trait_plugin in plugins.traits() {
358            // Check if this trait applies to this value type
359            let applies = trait_plugin.value_types().is_empty()
360                || trait_plugin.value_types().iter().any(|t| t == value_type);
361
362            if !applies {
363                continue;
364            }
365
366            // Call the trait's check method
367            if let Some(description) = trait_plugin.check(value) {
368                traits.push(Conversion {
369                    value: value.clone(),
370                    target_format: trait_plugin.id().to_string(),
371                    display: description.clone(),
372                    path: vec![source_format.to_string(), trait_plugin.id().to_string()],
373                    is_lossy: false,
374                    steps: vec![ConversionStep {
375                        format: trait_plugin.id().to_string(),
376                        value: value.clone(),
377                        display: description,
378                    }],
379                    priority: ConversionPriority::Semantic,
380                    kind: ConversionKind::Trait,
381                    display_only: true,
382                    hidden: false,
383                    rich_display: vec![],
384                });
385            }
386        }
387
388        traits
389    }
390
391    /// Combined: interpret input and find all conversions.
392    ///
393    /// This is the main entry point for most use cases. It parses the input,
394    /// finds all possible interpretations, and for each interpretation,
395    /// discovers all possible conversions via BFS traversal.
396    ///
397    /// # Examples
398    ///
399    /// ```
400    /// use formatorbit_core::Formatorbit;
401    ///
402    /// let forb = Formatorbit::new();
403    /// let results = forb.convert_all("1703456789");
404    ///
405    /// // Find the epoch timestamp interpretation
406    /// let epoch = results.iter()
407    ///     .find(|r| r.interpretation.source_format == "epoch-seconds")
408    ///     .expect("should find epoch interpretation");
409    ///
410    /// // Check that datetime conversion is available
411    /// let has_datetime = epoch.conversions.iter()
412    ///     .any(|c| c.target_format == "datetime");
413    /// assert!(has_datetime);
414    /// ```
415    #[must_use]
416    pub fn convert_all(&self, input: &str) -> Vec<ConversionResult> {
417        self.interpret(input)
418            .into_iter()
419            .map(|interp| {
420                // Skip self-conversion (e.g., hex→hex)
421                let conversions = self.convert_excluding(&interp.value, &interp.source_format);
422                ConversionResult {
423                    input: input.to_string(),
424                    interpretation: interp,
425                    conversions,
426                }
427            })
428            .collect()
429    }
430
431    /// Convert raw bytes and return all possible interpretations.
432    ///
433    /// This creates a single bytes interpretation and runs the conversion graph.
434    /// Specialized formats (image, archive, etc.) will be detected from bytes.
435    ///
436    /// # Examples
437    ///
438    /// ```
439    /// use formatorbit_core::Formatorbit;
440    ///
441    /// let forb = Formatorbit::new();
442    /// let png_header = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
443    /// let results = forb.convert_bytes(&png_header);
444    /// assert!(!results.is_empty());
445    /// ```
446    #[must_use]
447    pub fn convert_bytes(&self, data: &[u8]) -> Vec<ConversionResult> {
448        self.convert_bytes_internal(data, &[])
449    }
450
451    /// Convert raw bytes with only the specified formats.
452    #[must_use]
453    pub fn convert_bytes_filtered(
454        &self,
455        data: &[u8],
456        format_filter: &[String],
457    ) -> Vec<ConversionResult> {
458        self.convert_bytes_internal(data, format_filter)
459    }
460
461    /// Internal: Convert raw bytes with optional format filter.
462    ///
463    /// Creates interpretations directly from bytes:
464    /// 1. Try specialized binary formats (image, archive, etc.)
465    /// 2. Fall back to generic "bytes" interpretation
466    fn convert_bytes_internal(
467        &self,
468        data: &[u8],
469        format_filter: &[String],
470    ) -> Vec<ConversionResult> {
471        use base64::Engine;
472
473        // For specialized formats (image, archive, etc.), we need to pass
474        // the data as base64 since they expect string input.
475        // But we only create ONE interpretation to avoid duplicate processing.
476        let base64_input = base64::engine::general_purpose::STANDARD.encode(data);
477
478        let mut interpretations = Vec::new();
479
480        // Try specialized binary formats that can parse base64-encoded data
481        let binary_formats = [
482            "image", "archive", "video", "audio", "font", "pdf", "office",
483        ];
484
485        for format in &self.formats {
486            // If filter is active, check if format matches
487            if !format_filter.is_empty() {
488                let matches = format_filter.iter().any(|name| format.matches_name(name));
489                if !matches {
490                    continue;
491                }
492            }
493
494            // Only try formats that handle binary data
495            let is_binary_format = binary_formats
496                .iter()
497                .any(|&bf| format.id() == bf || format.aliases().contains(&bf));
498            if !is_binary_format {
499                continue;
500            }
501
502            // Skip blocked formats
503            if let Some(ref config) = self.config {
504                if config.blocking.is_format_blocked(format.id()) {
505                    continue;
506                }
507            }
508
509            interpretations.extend(format.parse(&base64_input));
510        }
511
512        // If no specialized format matched, create a generic bytes interpretation
513        if interpretations.is_empty() {
514            interpretations.push(Interpretation {
515                value: CoreValue::Bytes(data.to_vec()),
516                source_format: "bytes".to_string(),
517                confidence: 1.0,
518                description: format!("{} bytes", data.len()),
519                rich_display: vec![],
520            });
521        }
522
523        // Sort by confidence, highest first
524        interpretations.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
525
526        // Convert each interpretation
527        interpretations
528            .into_iter()
529            .map(|interp| {
530                let conversions = self.convert_excluding(&interp.value, &interp.source_format);
531                ConversionResult {
532                    input: base64_input.clone(),
533                    interpretation: interp,
534                    conversions,
535                }
536            })
537            .collect()
538    }
539
540    /// Get info about all registered formats (for help/documentation).
541    #[must_use]
542    pub fn format_infos(&self) -> Vec<FormatInfo> {
543        self.formats.iter().map(|f| f.info()).collect()
544    }
545
546    /// Get info about formats that have validation support.
547    ///
548    /// These formats provide detailed error messages when `--only` is used
549    /// and parsing fails.
550    #[must_use]
551    pub fn formats_with_validation(&self) -> Vec<FormatInfo> {
552        self.formats
553            .iter()
554            .map(|f| f.info())
555            .filter(|info| info.has_validation)
556            .collect()
557    }
558
559    /// Parse input with only the specified formats (by id or alias).
560    /// If `format_filter` is empty, all formats are used.
561    #[must_use]
562    pub fn interpret_filtered(&self, input: &str, format_filter: &[String]) -> Vec<Interpretation> {
563        if format_filter.is_empty() {
564            return self.interpret(input);
565        }
566
567        let mut results = Vec::new();
568        for format in &self.formats {
569            // Check if this format matches any of the filter names
570            let matches = format_filter.iter().any(|name| format.matches_name(name));
571            if matches {
572                results.extend(format.parse(input));
573            }
574        }
575        // Sort by confidence, highest first
576        results.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
577        results
578    }
579
580    /// Combined: interpret input (with filter) and find all conversions.
581    #[must_use]
582    pub fn convert_all_filtered(
583        &self,
584        input: &str,
585        format_filter: &[String],
586    ) -> Vec<ConversionResult> {
587        self.interpret_filtered(input, format_filter)
588            .into_iter()
589            .map(|interp| {
590                // Skip self-conversion (e.g., hex→hex)
591                let conversions = self.convert_excluding(&interp.value, &interp.source_format);
592                ConversionResult {
593                    input: input.to_string(),
594                    interpretation: interp,
595                    conversions,
596                }
597            })
598            .collect()
599    }
600
601    /// Validate input for a specific format and return an error message if invalid.
602    ///
603    /// This is useful when a user requests a specific format (e.g., `--only json`)
604    /// and we want to explain why parsing failed.
605    ///
606    /// Returns `None` if the format doesn't provide validation or the input is valid.
607    pub fn validate(&self, input: &str, format_name: &str) -> Option<String> {
608        for format in &self.formats {
609            if format.matches_name(format_name) {
610                return format.validate(input);
611            }
612        }
613        None
614    }
615
616    /// Check if a format name (id or alias) is valid.
617    #[must_use]
618    pub fn is_valid_format(&self, name: &str) -> bool {
619        self.formats.iter().any(|f| f.matches_name(name))
620    }
621
622    /// Get a list of all valid format names (ids only, not aliases).
623    #[must_use]
624    pub fn format_ids(&self) -> Vec<&'static str> {
625        self.formats.iter().map(|f| f.id()).collect()
626    }
627}
628
629impl Default for Formatorbit {
630    fn default() -> Self {
631        Self::new()
632    }
633}
634
635#[cfg(test)]
636mod tests {
637    use super::*;
638
639    /// Regression test: SHA-1 hash detection should appear in results
640    #[test]
641    fn test_sha1_hash_interpretation() {
642        let forb = Formatorbit::new();
643        // SHA-1 of empty string
644        let results = forb.convert_all("da39a3ee5e6b4b0d3255bfef95601890afd80709");
645
646        let has_hash = results
647            .iter()
648            .any(|r| r.interpretation.source_format == "hash");
649
650        assert!(
651            has_hash,
652            "Expected 'hash' interpretation but got: {:?}",
653            results
654                .iter()
655                .map(|r| &r.interpretation.source_format)
656                .collect::<Vec<_>>()
657        );
658
659        // Verify hash description mentions SHA-1
660        let hash_result = results
661            .iter()
662            .find(|r| r.interpretation.source_format == "hash")
663            .unwrap();
664        assert!(hash_result.interpretation.description.contains("SHA-1"));
665    }
666
667    /// Test that words are not parsed as geohash (geohash input parsing removed)
668    #[test]
669    fn test_words_not_parsed_as_geohash() {
670        let forb = Formatorbit::new();
671        // "rustfmt" was previously parsed as geohash, now it should only be text
672        let results = forb.convert_all("rustfmt");
673        let formats: Vec<_> = results
674            .iter()
675            .map(|r| &r.interpretation.source_format)
676            .collect();
677
678        assert!(
679            !formats.contains(&&"coords".to_string()),
680            "should NOT have coords interpretation (geohash parsing removed)"
681        );
682        assert!(
683            formats.contains(&&"text".to_string()),
684            "should have text interpretation"
685        );
686    }
687}