formatorbit_core/
lib.rs

1//! Formatorbit Core
2//!
3//! A cross-platform data format converter. Input data (e.g., `691E01B8`) and
4//! get all possible interpretations and conversions automatically.
5//!
6//! # Quick Start
7//!
8//! ```
9//! use formatorbit_core::Formatorbit;
10//!
11//! let forb = Formatorbit::new();
12//!
13//! // Get all interpretations and conversions
14//! let results = forb.convert_all("691E01B8");
15//! assert!(!results.is_empty());
16//!
17//! // The highest-confidence interpretation is first
18//! let best = &results[0];
19//! println!("Format: {}", best.interpretation.source_format);
20//! println!("Confidence: {:.0}%", best.interpretation.confidence * 100.0);
21//!
22//! // Each interpretation has conversions to other formats
23//! for conv in &best.conversions[..3.min(best.conversions.len())] {
24//!     println!("  → {}: {}", conv.target_format, conv.display);
25//! }
26//! ```
27//!
28//! # Filtering by Format
29//!
30//! ```
31//! use formatorbit_core::Formatorbit;
32//!
33//! let forb = Formatorbit::new();
34//!
35//! // Force interpretation as a specific format
36//! let results = forb.convert_all_filtered("1703456789", &["epoch".into()]);
37//! assert_eq!(results[0].interpretation.source_format, "epoch-seconds");
38//! ```
39
40pub mod convert;
41
42/// Truncate a string to at most `max_chars` characters, appending "..." if truncated.
43///
44/// This is UTF-8 safe - it counts characters, not bytes.
45#[must_use]
46pub fn truncate_str(s: &str, max_chars: usize) -> String {
47    let char_count = s.chars().count();
48    if char_count <= max_chars {
49        s.to_string()
50    } else {
51        let truncated: String = s.chars().take(max_chars.saturating_sub(3)).collect();
52        format!("{}...", truncated)
53    }
54}
55pub mod format;
56pub mod formats;
57pub mod types;
58
59pub use format::{Format, FormatInfo};
60pub use types::*;
61
62use formats::{
63    AngleFormat, ArchiveFormat, AreaFormat, AudioFormat, Base64Format, BinaryFormat,
64    BytesToIntFormat, CharFormat, CidrFormat, ColorFormat, ConstantsFormat, CoordsFormat,
65    CuidFormat, CurrencyFormat, DataSizeFormat, DateTimeFormat, DecimalFormat, DigestFormat,
66    DurationFormat, EnergyFormat, EpochFormat, EscapeFormat, ExprFormat, FontFormat, GraphFormat,
67    HashFormat, HexFormat, HexdumpFormat, ImageFormat, IpAddrFormat, IsbnFormat, JsonFormat,
68    JwtFormat, LengthFormat, MsgPackFormat, NanoIdFormat, NaturalDateFormat, OctalFormat,
69    OfficeFormat, PdfFormat, PermissionsFormat, PlistFormat, PressureFormat, ProtobufFormat,
70    SpeedFormat, TemperatureFormat, UlidFormat, UrlEncodingFormat, Utf8Format, UuidFormat,
71    VideoFormat, VolumeFormat, WeightFormat,
72};
73
74/// Main entry point - a configured converter instance.
75pub struct Formatorbit {
76    formats: Vec<Box<dyn Format>>,
77    config: Option<ConversionConfig>,
78}
79
80impl Formatorbit {
81    /// Create a new converter with all built-in formats.
82    ///
83    /// # Examples
84    ///
85    /// ```
86    /// use formatorbit_core::Formatorbit;
87    ///
88    /// let forb = Formatorbit::new();
89    /// let results = forb.convert_all("0xDEADBEEF");
90    /// assert!(!results.is_empty());
91    /// ```
92    #[must_use]
93    pub fn new() -> Self {
94        Self {
95            formats: Self::create_format_list(),
96            config: None,
97        }
98    }
99
100    /// Create a new converter with custom configuration.
101    #[must_use]
102    pub fn with_config(config: ConversionConfig) -> Self {
103        Self {
104            formats: Self::create_format_list(),
105            config: Some(config),
106        }
107    }
108
109    /// Set the configuration.
110    #[must_use]
111    pub fn set_config(mut self, config: ConversionConfig) -> Self {
112        self.config = Some(config);
113        self
114    }
115
116    /// Get the current configuration (if any).
117    #[must_use]
118    pub fn config(&self) -> Option<&ConversionConfig> {
119        self.config.as_ref()
120    }
121
122    /// Create the list of built-in formats.
123    fn create_format_list() -> Vec<Box<dyn Format>> {
124        vec![
125            // High-specificity formats first
126            Box::new(JwtFormat),
127            Box::new(UlidFormat),
128            Box::new(UuidFormat),
129            Box::new(IpAddrFormat),
130            Box::new(CidrFormat),
131            Box::new(CoordsFormat),
132            Box::new(ColorFormat),
133            Box::new(CharFormat),
134            Box::new(NaturalDateFormat),
135            Box::new(ConstantsFormat),
136            Box::new(PermissionsFormat),
137            Box::new(UrlEncodingFormat),
138            // Identifier formats (lower specificity)
139            Box::new(IsbnFormat),
140            Box::new(CuidFormat),
141            Box::new(NanoIdFormat),
142            // Common formats
143            Box::new(HashFormat),
144            Box::new(HexFormat),
145            Box::new(BinaryFormat),
146            Box::new(OctalFormat),
147            Box::new(Base64Format),
148            Box::new(EpochFormat),
149            Box::new(DecimalFormat),
150            Box::new(DataSizeFormat),
151            Box::new(TemperatureFormat),
152            // Unit conversions
153            Box::new(LengthFormat),
154            Box::new(WeightFormat),
155            Box::new(VolumeFormat),
156            Box::new(SpeedFormat),
157            Box::new(PressureFormat),
158            Box::new(AngleFormat),
159            Box::new(AreaFormat),
160            Box::new(EnergyFormat),
161            Box::new(CurrencyFormat),
162            Box::new(ExprFormat),
163            Box::new(EscapeFormat),
164            Box::new(DurationFormat),
165            Box::new(DateTimeFormat),
166            Box::new(JsonFormat),
167            Box::new(GraphFormat),
168            Box::new(Utf8Format),
169            // Conversion-only formats (don't parse strings directly)
170            Box::new(BytesToIntFormat),
171            Box::new(DigestFormat),
172            Box::new(HexdumpFormat),
173            Box::new(ImageFormat),
174            Box::new(MsgPackFormat),
175            Box::new(PlistFormat),
176            Box::new(ProtobufFormat),
177            // Binary file metadata formats
178            Box::new(ArchiveFormat),
179            Box::new(AudioFormat),
180            Box::new(FontFormat),
181            Box::new(OfficeFormat),
182            Box::new(PdfFormat),
183            Box::new(VideoFormat),
184        ]
185    }
186
187    /// Parse input and return all possible interpretations.
188    ///
189    /// Returns interpretations sorted by confidence (highest first).
190    ///
191    /// # Examples
192    ///
193    /// ```
194    /// use formatorbit_core::Formatorbit;
195    ///
196    /// let forb = Formatorbit::new();
197    /// let interps = forb.interpret("550e8400-e29b-41d4-a716-446655440000");
198    ///
199    /// // UUID has high confidence due to its distinctive format
200    /// assert_eq!(interps[0].source_format, "uuid");
201    /// assert!(interps[0].confidence > 0.9);
202    /// ```
203    #[must_use]
204    pub fn interpret(&self, input: &str) -> Vec<Interpretation> {
205        let mut results = Vec::new();
206        for format in &self.formats {
207            // Skip blocked formats
208            if let Some(ref config) = self.config {
209                if config.blocking.is_format_blocked(format.id()) {
210                    continue;
211                }
212            }
213            results.extend(format.parse(input));
214        }
215        // Sort by confidence, highest first
216        results.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
217        results
218    }
219
220    /// Find all possible conversions from a value.
221    #[must_use]
222    pub fn convert(&self, value: &CoreValue) -> Vec<Conversion> {
223        convert::find_all_conversions(&self.formats, value, None, None, self.config.as_ref())
224    }
225
226    /// Find all possible conversions, excluding the source format (to avoid hex→hex etc.)
227    /// The source_format is also included in the path to show the full conversion chain.
228    #[must_use]
229    pub fn convert_excluding(&self, value: &CoreValue, source_format: &str) -> Vec<Conversion> {
230        convert::find_all_conversions(
231            &self.formats,
232            value,
233            Some(source_format),
234            Some(source_format),
235            self.config.as_ref(),
236        )
237    }
238
239    /// Combined: interpret input and find all conversions.
240    ///
241    /// This is the main entry point for most use cases. It parses the input,
242    /// finds all possible interpretations, and for each interpretation,
243    /// discovers all possible conversions via BFS traversal.
244    ///
245    /// # Examples
246    ///
247    /// ```
248    /// use formatorbit_core::Formatorbit;
249    ///
250    /// let forb = Formatorbit::new();
251    /// let results = forb.convert_all("1703456789");
252    ///
253    /// // Find the epoch timestamp interpretation
254    /// let epoch = results.iter()
255    ///     .find(|r| r.interpretation.source_format == "epoch-seconds")
256    ///     .expect("should find epoch interpretation");
257    ///
258    /// // Check that datetime conversion is available
259    /// let has_datetime = epoch.conversions.iter()
260    ///     .any(|c| c.target_format == "datetime");
261    /// assert!(has_datetime);
262    /// ```
263    #[must_use]
264    pub fn convert_all(&self, input: &str) -> Vec<ConversionResult> {
265        self.interpret(input)
266            .into_iter()
267            .map(|interp| {
268                // Skip self-conversion (e.g., hex→hex)
269                let conversions = self.convert_excluding(&interp.value, &interp.source_format);
270                ConversionResult {
271                    input: input.to_string(),
272                    interpretation: interp,
273                    conversions,
274                }
275            })
276            .collect()
277    }
278
279    /// Convert raw bytes and return all possible interpretations.
280    ///
281    /// This creates a single bytes interpretation and runs the conversion graph.
282    /// Specialized formats (image, archive, etc.) will be detected from bytes.
283    ///
284    /// # Examples
285    ///
286    /// ```
287    /// use formatorbit_core::Formatorbit;
288    ///
289    /// let forb = Formatorbit::new();
290    /// let png_header = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
291    /// let results = forb.convert_bytes(&png_header);
292    /// assert!(!results.is_empty());
293    /// ```
294    #[must_use]
295    pub fn convert_bytes(&self, data: &[u8]) -> Vec<ConversionResult> {
296        self.convert_bytes_internal(data, &[])
297    }
298
299    /// Convert raw bytes with only the specified formats.
300    #[must_use]
301    pub fn convert_bytes_filtered(
302        &self,
303        data: &[u8],
304        format_filter: &[String],
305    ) -> Vec<ConversionResult> {
306        self.convert_bytes_internal(data, format_filter)
307    }
308
309    /// Internal: Convert raw bytes with optional format filter.
310    ///
311    /// Creates interpretations directly from bytes:
312    /// 1. Try specialized binary formats (image, archive, etc.)
313    /// 2. Fall back to generic "bytes" interpretation
314    fn convert_bytes_internal(
315        &self,
316        data: &[u8],
317        format_filter: &[String],
318    ) -> Vec<ConversionResult> {
319        use base64::Engine;
320
321        // For specialized formats (image, archive, etc.), we need to pass
322        // the data as base64 since they expect string input.
323        // But we only create ONE interpretation to avoid duplicate processing.
324        let base64_input = base64::engine::general_purpose::STANDARD.encode(data);
325
326        let mut interpretations = Vec::new();
327
328        // Try specialized binary formats that can parse base64-encoded data
329        let binary_formats = [
330            "image", "archive", "video", "audio", "font", "pdf", "office",
331        ];
332
333        for format in &self.formats {
334            // If filter is active, check if format matches
335            if !format_filter.is_empty() {
336                let matches = format_filter.iter().any(|name| format.matches_name(name));
337                if !matches {
338                    continue;
339                }
340            }
341
342            // Only try formats that handle binary data
343            let is_binary_format = binary_formats
344                .iter()
345                .any(|&bf| format.id() == bf || format.aliases().contains(&bf));
346            if !is_binary_format {
347                continue;
348            }
349
350            // Skip blocked formats
351            if let Some(ref config) = self.config {
352                if config.blocking.is_format_blocked(format.id()) {
353                    continue;
354                }
355            }
356
357            interpretations.extend(format.parse(&base64_input));
358        }
359
360        // If no specialized format matched, create a generic bytes interpretation
361        if interpretations.is_empty() {
362            interpretations.push(Interpretation {
363                value: CoreValue::Bytes(data.to_vec()),
364                source_format: "bytes".to_string(),
365                confidence: 1.0,
366                description: format!("{} bytes", data.len()),
367                rich_display: vec![],
368            });
369        }
370
371        // Sort by confidence, highest first
372        interpretations.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
373
374        // Convert each interpretation
375        interpretations
376            .into_iter()
377            .map(|interp| {
378                let conversions = self.convert_excluding(&interp.value, &interp.source_format);
379                ConversionResult {
380                    input: base64_input.clone(),
381                    interpretation: interp,
382                    conversions,
383                }
384            })
385            .collect()
386    }
387
388    /// Get info about all registered formats (for help/documentation).
389    #[must_use]
390    pub fn format_infos(&self) -> Vec<FormatInfo> {
391        self.formats.iter().map(|f| f.info()).collect()
392    }
393
394    /// Parse input with only the specified formats (by id or alias).
395    /// If `format_filter` is empty, all formats are used.
396    #[must_use]
397    pub fn interpret_filtered(&self, input: &str, format_filter: &[String]) -> Vec<Interpretation> {
398        if format_filter.is_empty() {
399            return self.interpret(input);
400        }
401
402        let mut results = Vec::new();
403        for format in &self.formats {
404            // Check if this format matches any of the filter names
405            let matches = format_filter.iter().any(|name| format.matches_name(name));
406            if matches {
407                results.extend(format.parse(input));
408            }
409        }
410        // Sort by confidence, highest first
411        results.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
412        results
413    }
414
415    /// Combined: interpret input (with filter) and find all conversions.
416    #[must_use]
417    pub fn convert_all_filtered(
418        &self,
419        input: &str,
420        format_filter: &[String],
421    ) -> Vec<ConversionResult> {
422        self.interpret_filtered(input, format_filter)
423            .into_iter()
424            .map(|interp| {
425                // Skip self-conversion (e.g., hex→hex)
426                let conversions = self.convert_excluding(&interp.value, &interp.source_format);
427                ConversionResult {
428                    input: input.to_string(),
429                    interpretation: interp,
430                    conversions,
431                }
432            })
433            .collect()
434    }
435
436    /// Validate input for a specific format and return an error message if invalid.
437    ///
438    /// This is useful when a user requests a specific format (e.g., `--only json`)
439    /// and we want to explain why parsing failed.
440    ///
441    /// Returns `None` if the format doesn't provide validation or the input is valid.
442    pub fn validate(&self, input: &str, format_name: &str) -> Option<String> {
443        for format in &self.formats {
444            if format.matches_name(format_name) {
445                return format.validate(input);
446            }
447        }
448        None
449    }
450
451    /// Check if a format name (id or alias) is valid.
452    #[must_use]
453    pub fn is_valid_format(&self, name: &str) -> bool {
454        self.formats.iter().any(|f| f.matches_name(name))
455    }
456
457    /// Get a list of all valid format names (ids only, not aliases).
458    #[must_use]
459    pub fn format_ids(&self) -> Vec<&'static str> {
460        self.formats.iter().map(|f| f.id()).collect()
461    }
462}
463
464impl Default for Formatorbit {
465    fn default() -> Self {
466        Self::new()
467    }
468}
469
470#[cfg(test)]
471mod tests {
472    use super::*;
473
474    /// Regression test: SHA-1 hash detection should appear in results
475    #[test]
476    fn test_sha1_hash_interpretation() {
477        let forb = Formatorbit::new();
478        // SHA-1 of empty string
479        let results = forb.convert_all("da39a3ee5e6b4b0d3255bfef95601890afd80709");
480
481        let has_hash = results
482            .iter()
483            .any(|r| r.interpretation.source_format == "hash");
484
485        assert!(
486            has_hash,
487            "Expected 'hash' interpretation but got: {:?}",
488            results
489                .iter()
490                .map(|r| &r.interpretation.source_format)
491                .collect::<Vec<_>>()
492        );
493
494        // Verify hash description mentions SHA-1
495        let hash_result = results
496            .iter()
497            .find(|r| r.interpretation.source_format == "hash")
498            .unwrap();
499        assert!(hash_result.interpretation.description.contains("SHA-1"));
500    }
501
502    /// Test that geohash-like words show both coords and text in core
503    /// (CLI may filter low-confidence interpretations for cleaner output)
504    #[test]
505    fn test_geohash_word_returns_multiple_interpretations() {
506        let forb = Formatorbit::new();
507        // "rustfmt" is valid geohash but core should return both interpretations
508        let results = forb.convert_all("rustfmt");
509        let formats: Vec<_> = results
510            .iter()
511            .map(|r| &r.interpretation.source_format)
512            .collect();
513
514        assert!(
515            formats.contains(&&"coords".to_string()),
516            "should have coords interpretation"
517        );
518        assert!(
519            formats.contains(&&"text".to_string()),
520            "should have text interpretation (low confidence fallback)"
521        );
522    }
523}