formatorbit_core/lib.rs
1//! Formatorbit Core
2//!
3//! A cross-platform data format converter. Input data (e.g., `691E01B8`) and
4//! get all possible interpretations and conversions automatically.
5//!
6//! # Quick Start
7//!
8//! ```
9//! use formatorbit_core::Formatorbit;
10//!
11//! let forb = Formatorbit::new();
12//!
13//! // Get all interpretations and conversions
14//! let results = forb.convert_all("691E01B8");
15//! assert!(!results.is_empty());
16//!
17//! // The highest-confidence interpretation is first
18//! let best = &results[0];
19//! println!("Format: {}", best.interpretation.source_format);
20//! println!("Confidence: {:.0}%", best.interpretation.confidence * 100.0);
21//!
22//! // Each interpretation has conversions to other formats
23//! for conv in &best.conversions[..3.min(best.conversions.len())] {
24//! println!(" → {}: {}", conv.target_format, conv.display);
25//! }
26//! ```
27//!
28//! # Filtering by Format
29//!
30//! ```
31//! use formatorbit_core::Formatorbit;
32//!
33//! let forb = Formatorbit::new();
34//!
35//! // Force interpretation as a specific format
36//! let results = forb.convert_all_filtered("1703456789", &["epoch".into()]);
37//! assert_eq!(results[0].interpretation.source_format, "epoch-seconds");
38//! ```
39
40pub mod convert;
41
42/// Truncate a string to at most `max_chars` characters, appending "..." if truncated.
43///
44/// This is UTF-8 safe - it counts characters, not bytes.
45#[must_use]
46pub fn truncate_str(s: &str, max_chars: usize) -> String {
47 let char_count = s.chars().count();
48 if char_count <= max_chars {
49 s.to_string()
50 } else {
51 let truncated: String = s.chars().take(max_chars.saturating_sub(3)).collect();
52 format!("{}...", truncated)
53 }
54}
55pub mod format;
56pub mod formats;
57pub mod types;
58
59pub use format::{Format, FormatInfo};
60pub use types::*;
61
62use formats::{
63 AngleFormat, ArchiveFormat, AreaFormat, AudioFormat, Base64Format, BinaryFormat,
64 BytesToIntFormat, CharFormat, CidrFormat, ColorFormat, ConstantsFormat, CoordsFormat,
65 CuidFormat, CurrencyFormat, DataSizeFormat, DateTimeFormat, DecimalFormat, DigestFormat,
66 DurationFormat, EnergyFormat, EpochFormat, EscapeFormat, ExprFormat, FontFormat, GraphFormat,
67 HashFormat, HexFormat, HexdumpFormat, ImageFormat, IpAddrFormat, IsbnFormat, JsonFormat,
68 JwtFormat, LengthFormat, MsgPackFormat, NanoIdFormat, NaturalDateFormat, OctalFormat,
69 OfficeFormat, PdfFormat, PermissionsFormat, PlistFormat, PressureFormat, ProtobufFormat,
70 SpeedFormat, TemperatureFormat, UlidFormat, UrlEncodingFormat, Utf8Format, UuidFormat,
71 VideoFormat, VolumeFormat, WeightFormat,
72};
73
74/// Main entry point - a configured converter instance.
75pub struct Formatorbit {
76 formats: Vec<Box<dyn Format>>,
77 config: Option<ConversionConfig>,
78}
79
80impl Formatorbit {
81 /// Create a new converter with all built-in formats.
82 ///
83 /// # Examples
84 ///
85 /// ```
86 /// use formatorbit_core::Formatorbit;
87 ///
88 /// let forb = Formatorbit::new();
89 /// let results = forb.convert_all("0xDEADBEEF");
90 /// assert!(!results.is_empty());
91 /// ```
92 #[must_use]
93 pub fn new() -> Self {
94 Self {
95 formats: Self::create_format_list(),
96 config: None,
97 }
98 }
99
100 /// Create a new converter with custom configuration.
101 #[must_use]
102 pub fn with_config(config: ConversionConfig) -> Self {
103 Self {
104 formats: Self::create_format_list(),
105 config: Some(config),
106 }
107 }
108
109 /// Set the configuration.
110 #[must_use]
111 pub fn set_config(mut self, config: ConversionConfig) -> Self {
112 self.config = Some(config);
113 self
114 }
115
116 /// Get the current configuration (if any).
117 #[must_use]
118 pub fn config(&self) -> Option<&ConversionConfig> {
119 self.config.as_ref()
120 }
121
122 /// Create the list of built-in formats.
123 fn create_format_list() -> Vec<Box<dyn Format>> {
124 vec![
125 // High-specificity formats first
126 Box::new(JwtFormat),
127 Box::new(UlidFormat),
128 Box::new(UuidFormat),
129 Box::new(IpAddrFormat),
130 Box::new(CidrFormat),
131 Box::new(CoordsFormat),
132 Box::new(ColorFormat),
133 Box::new(CharFormat),
134 Box::new(NaturalDateFormat),
135 Box::new(ConstantsFormat),
136 Box::new(PermissionsFormat),
137 Box::new(UrlEncodingFormat),
138 // Identifier formats (lower specificity)
139 Box::new(IsbnFormat),
140 Box::new(CuidFormat),
141 Box::new(NanoIdFormat),
142 // Common formats
143 Box::new(HashFormat),
144 Box::new(HexFormat),
145 Box::new(BinaryFormat),
146 Box::new(OctalFormat),
147 Box::new(Base64Format),
148 Box::new(EpochFormat),
149 Box::new(DecimalFormat),
150 Box::new(DataSizeFormat),
151 Box::new(TemperatureFormat),
152 // Unit conversions
153 Box::new(LengthFormat),
154 Box::new(WeightFormat),
155 Box::new(VolumeFormat),
156 Box::new(SpeedFormat),
157 Box::new(PressureFormat),
158 Box::new(AngleFormat),
159 Box::new(AreaFormat),
160 Box::new(EnergyFormat),
161 Box::new(CurrencyFormat),
162 Box::new(ExprFormat),
163 Box::new(EscapeFormat),
164 Box::new(DurationFormat),
165 Box::new(DateTimeFormat),
166 Box::new(JsonFormat),
167 Box::new(GraphFormat),
168 Box::new(Utf8Format),
169 // Conversion-only formats (don't parse strings directly)
170 Box::new(BytesToIntFormat),
171 Box::new(DigestFormat),
172 Box::new(HexdumpFormat),
173 Box::new(ImageFormat),
174 Box::new(MsgPackFormat),
175 Box::new(PlistFormat),
176 Box::new(ProtobufFormat),
177 // Binary file metadata formats
178 Box::new(ArchiveFormat),
179 Box::new(AudioFormat),
180 Box::new(FontFormat),
181 Box::new(OfficeFormat),
182 Box::new(PdfFormat),
183 Box::new(VideoFormat),
184 ]
185 }
186
187 /// Parse input and return all possible interpretations.
188 ///
189 /// Returns interpretations sorted by confidence (highest first).
190 ///
191 /// # Examples
192 ///
193 /// ```
194 /// use formatorbit_core::Formatorbit;
195 ///
196 /// let forb = Formatorbit::new();
197 /// let interps = forb.interpret("550e8400-e29b-41d4-a716-446655440000");
198 ///
199 /// // UUID has high confidence due to its distinctive format
200 /// assert_eq!(interps[0].source_format, "uuid");
201 /// assert!(interps[0].confidence > 0.9);
202 /// ```
203 #[must_use]
204 pub fn interpret(&self, input: &str) -> Vec<Interpretation> {
205 let mut results = Vec::new();
206 for format in &self.formats {
207 // Skip blocked formats
208 if let Some(ref config) = self.config {
209 if config.blocking.is_format_blocked(format.id()) {
210 continue;
211 }
212 }
213 results.extend(format.parse(input));
214 }
215 // Sort by confidence, highest first
216 results.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
217 results
218 }
219
220 /// Find all possible conversions from a value.
221 #[must_use]
222 pub fn convert(&self, value: &CoreValue) -> Vec<Conversion> {
223 convert::find_all_conversions(&self.formats, value, None, None, self.config.as_ref())
224 }
225
226 /// Find all possible conversions, excluding the source format (to avoid hex→hex etc.)
227 /// The source_format is also included in the path to show the full conversion chain.
228 #[must_use]
229 pub fn convert_excluding(&self, value: &CoreValue, source_format: &str) -> Vec<Conversion> {
230 convert::find_all_conversions(
231 &self.formats,
232 value,
233 Some(source_format),
234 Some(source_format),
235 self.config.as_ref(),
236 )
237 }
238
239 /// Combined: interpret input and find all conversions.
240 ///
241 /// This is the main entry point for most use cases. It parses the input,
242 /// finds all possible interpretations, and for each interpretation,
243 /// discovers all possible conversions via BFS traversal.
244 ///
245 /// # Examples
246 ///
247 /// ```
248 /// use formatorbit_core::Formatorbit;
249 ///
250 /// let forb = Formatorbit::new();
251 /// let results = forb.convert_all("1703456789");
252 ///
253 /// // Find the epoch timestamp interpretation
254 /// let epoch = results.iter()
255 /// .find(|r| r.interpretation.source_format == "epoch-seconds")
256 /// .expect("should find epoch interpretation");
257 ///
258 /// // Check that datetime conversion is available
259 /// let has_datetime = epoch.conversions.iter()
260 /// .any(|c| c.target_format == "datetime");
261 /// assert!(has_datetime);
262 /// ```
263 #[must_use]
264 pub fn convert_all(&self, input: &str) -> Vec<ConversionResult> {
265 self.interpret(input)
266 .into_iter()
267 .map(|interp| {
268 // Skip self-conversion (e.g., hex→hex)
269 let conversions = self.convert_excluding(&interp.value, &interp.source_format);
270 ConversionResult {
271 input: input.to_string(),
272 interpretation: interp,
273 conversions,
274 }
275 })
276 .collect()
277 }
278
279 /// Convert raw bytes and return all possible interpretations.
280 ///
281 /// This creates a single bytes interpretation and runs the conversion graph.
282 /// Specialized formats (image, archive, etc.) will be detected from bytes.
283 ///
284 /// # Examples
285 ///
286 /// ```
287 /// use formatorbit_core::Formatorbit;
288 ///
289 /// let forb = Formatorbit::new();
290 /// let png_header = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
291 /// let results = forb.convert_bytes(&png_header);
292 /// assert!(!results.is_empty());
293 /// ```
294 #[must_use]
295 pub fn convert_bytes(&self, data: &[u8]) -> Vec<ConversionResult> {
296 self.convert_bytes_internal(data, &[])
297 }
298
299 /// Convert raw bytes with only the specified formats.
300 #[must_use]
301 pub fn convert_bytes_filtered(
302 &self,
303 data: &[u8],
304 format_filter: &[String],
305 ) -> Vec<ConversionResult> {
306 self.convert_bytes_internal(data, format_filter)
307 }
308
309 /// Internal: Convert raw bytes with optional format filter.
310 ///
311 /// Creates interpretations directly from bytes:
312 /// 1. Try specialized binary formats (image, archive, etc.)
313 /// 2. Fall back to generic "bytes" interpretation
314 fn convert_bytes_internal(
315 &self,
316 data: &[u8],
317 format_filter: &[String],
318 ) -> Vec<ConversionResult> {
319 use base64::Engine;
320
321 // For specialized formats (image, archive, etc.), we need to pass
322 // the data as base64 since they expect string input.
323 // But we only create ONE interpretation to avoid duplicate processing.
324 let base64_input = base64::engine::general_purpose::STANDARD.encode(data);
325
326 let mut interpretations = Vec::new();
327
328 // Try specialized binary formats that can parse base64-encoded data
329 let binary_formats = [
330 "image", "archive", "video", "audio", "font", "pdf", "office",
331 ];
332
333 for format in &self.formats {
334 // If filter is active, check if format matches
335 if !format_filter.is_empty() {
336 let matches = format_filter.iter().any(|name| format.matches_name(name));
337 if !matches {
338 continue;
339 }
340 }
341
342 // Only try formats that handle binary data
343 let is_binary_format = binary_formats
344 .iter()
345 .any(|&bf| format.id() == bf || format.aliases().contains(&bf));
346 if !is_binary_format {
347 continue;
348 }
349
350 // Skip blocked formats
351 if let Some(ref config) = self.config {
352 if config.blocking.is_format_blocked(format.id()) {
353 continue;
354 }
355 }
356
357 interpretations.extend(format.parse(&base64_input));
358 }
359
360 // If no specialized format matched, create a generic bytes interpretation
361 if interpretations.is_empty() {
362 interpretations.push(Interpretation {
363 value: CoreValue::Bytes(data.to_vec()),
364 source_format: "bytes".to_string(),
365 confidence: 1.0,
366 description: format!("{} bytes", data.len()),
367 rich_display: vec![],
368 });
369 }
370
371 // Sort by confidence, highest first
372 interpretations.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
373
374 // Convert each interpretation
375 interpretations
376 .into_iter()
377 .map(|interp| {
378 let conversions = self.convert_excluding(&interp.value, &interp.source_format);
379 ConversionResult {
380 input: base64_input.clone(),
381 interpretation: interp,
382 conversions,
383 }
384 })
385 .collect()
386 }
387
388 /// Get info about all registered formats (for help/documentation).
389 #[must_use]
390 pub fn format_infos(&self) -> Vec<FormatInfo> {
391 self.formats.iter().map(|f| f.info()).collect()
392 }
393
394 /// Parse input with only the specified formats (by id or alias).
395 /// If `format_filter` is empty, all formats are used.
396 #[must_use]
397 pub fn interpret_filtered(&self, input: &str, format_filter: &[String]) -> Vec<Interpretation> {
398 if format_filter.is_empty() {
399 return self.interpret(input);
400 }
401
402 let mut results = Vec::new();
403 for format in &self.formats {
404 // Check if this format matches any of the filter names
405 let matches = format_filter.iter().any(|name| format.matches_name(name));
406 if matches {
407 results.extend(format.parse(input));
408 }
409 }
410 // Sort by confidence, highest first
411 results.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
412 results
413 }
414
415 /// Combined: interpret input (with filter) and find all conversions.
416 #[must_use]
417 pub fn convert_all_filtered(
418 &self,
419 input: &str,
420 format_filter: &[String],
421 ) -> Vec<ConversionResult> {
422 self.interpret_filtered(input, format_filter)
423 .into_iter()
424 .map(|interp| {
425 // Skip self-conversion (e.g., hex→hex)
426 let conversions = self.convert_excluding(&interp.value, &interp.source_format);
427 ConversionResult {
428 input: input.to_string(),
429 interpretation: interp,
430 conversions,
431 }
432 })
433 .collect()
434 }
435
436 /// Validate input for a specific format and return an error message if invalid.
437 ///
438 /// This is useful when a user requests a specific format (e.g., `--only json`)
439 /// and we want to explain why parsing failed.
440 ///
441 /// Returns `None` if the format doesn't provide validation or the input is valid.
442 pub fn validate(&self, input: &str, format_name: &str) -> Option<String> {
443 for format in &self.formats {
444 if format.matches_name(format_name) {
445 return format.validate(input);
446 }
447 }
448 None
449 }
450
451 /// Check if a format name (id or alias) is valid.
452 #[must_use]
453 pub fn is_valid_format(&self, name: &str) -> bool {
454 self.formats.iter().any(|f| f.matches_name(name))
455 }
456
457 /// Get a list of all valid format names (ids only, not aliases).
458 #[must_use]
459 pub fn format_ids(&self) -> Vec<&'static str> {
460 self.formats.iter().map(|f| f.id()).collect()
461 }
462}
463
464impl Default for Formatorbit {
465 fn default() -> Self {
466 Self::new()
467 }
468}
469
470#[cfg(test)]
471mod tests {
472 use super::*;
473
474 /// Regression test: SHA-1 hash detection should appear in results
475 #[test]
476 fn test_sha1_hash_interpretation() {
477 let forb = Formatorbit::new();
478 // SHA-1 of empty string
479 let results = forb.convert_all("da39a3ee5e6b4b0d3255bfef95601890afd80709");
480
481 let has_hash = results
482 .iter()
483 .any(|r| r.interpretation.source_format == "hash");
484
485 assert!(
486 has_hash,
487 "Expected 'hash' interpretation but got: {:?}",
488 results
489 .iter()
490 .map(|r| &r.interpretation.source_format)
491 .collect::<Vec<_>>()
492 );
493
494 // Verify hash description mentions SHA-1
495 let hash_result = results
496 .iter()
497 .find(|r| r.interpretation.source_format == "hash")
498 .unwrap();
499 assert!(hash_result.interpretation.description.contains("SHA-1"));
500 }
501
502 /// Test that geohash-like words show both coords and text in core
503 /// (CLI may filter low-confidence interpretations for cleaner output)
504 #[test]
505 fn test_geohash_word_returns_multiple_interpretations() {
506 let forb = Formatorbit::new();
507 // "rustfmt" is valid geohash but core should return both interpretations
508 let results = forb.convert_all("rustfmt");
509 let formats: Vec<_> = results
510 .iter()
511 .map(|r| &r.interpretation.source_format)
512 .collect();
513
514 assert!(
515 formats.contains(&&"coords".to_string()),
516 "should have coords interpretation"
517 );
518 assert!(
519 formats.contains(&&"text".to_string()),
520 "should have text interpretation (low confidence fallback)"
521 );
522 }
523}