formatorbit_core/
convert.rs

1//! Conversion graph traversal using BFS.
2//!
3//! This module finds all possible conversions from a value by traversing
4//! a graph where nodes are values and edges are format conversions.
5
6use std::collections::VecDeque;
7
8use crate::format::Format;
9use crate::types::{
10    BlockingConfig, Conversion, ConversionConfig, ConversionKind, ConversionPriority,
11    ConversionStep, CoreValue, PriorityConfig,
12};
13
14/// Maximum BFS depth to prevent infinite loops in conversion graph traversal.
15const MAX_BFS_DEPTH: usize = 5;
16
17/// Unit format IDs that shouldn't cross-convert to each other.
18const UNIT_FORMATS: &[&str] = &[
19    "length",
20    "weight",
21    "volume",
22    "speed",
23    "pressure",
24    "angle",
25    "area",
26    "energy",
27    "temperature",
28];
29
30/// Target format IDs produced by unit conversions.
31const UNIT_TARGETS: &[&str] = &[
32    // Length
33    "meters",
34    "kilometers",
35    "centimeters",
36    "millimeters",
37    "feet",
38    "miles",
39    "inches",
40    // Weight
41    "grams",
42    "kilograms",
43    "milligrams",
44    "pounds",
45    "ounces",
46    // Volume
47    "milliliters",
48    "liters",
49    "gallons",
50    "fluid ounces",
51    "cups",
52    // Speed
53    "m/s",
54    "km/h",
55    "mph",
56    "knots",
57    // Pressure
58    "pascals",
59    "kilopascals",
60    "megapascals",
61    "bar",
62    "psi",
63    "atmospheres",
64    // Angle
65    "degrees",
66    "radians",
67    "gradians",
68    "turns",
69    // Area
70    "square meters",
71    "square kilometers",
72    "square centimeters",
73    "square feet",
74    "acres",
75    "hectares",
76    // Energy
77    "joules",
78    "kilojoules",
79    "megajoules",
80    "calories",
81    "kilocalories",
82    "kilowatt-hours",
83    // Temperature
84    "celsius",
85    "fahrenheit",
86    "kelvin",
87];
88
89/// Root-based blocking: targets that should never be reached from a given root interpretation.
90/// Unlike BLOCKED_PATHS which blocks immediate source→target, this blocks the target
91/// regardless of the path taken (e.g., "text" blocks ipv4 via text→bytes→ipv4).
92const ROOT_BLOCKED_TARGETS: &[(&str, &str)] = &[
93    // Text bytes shouldn't be interpreted as IP addresses
94    // (4 bytes of ASCII like "test" aren't an IPv4 address)
95    ("text", "ipv4"),
96    ("text", "ipv6"),
97    // Text bytes shouldn't be interpreted as colors
98    ("text", "color-rgb"),
99    ("text", "color-hex"),
100    ("text", "color-hsl"),
101    // Text bytes shouldn't be interpreted as integers or timestamps
102    // (already blocked via BLOCKED_PATHS for immediate, but this catches all paths)
103    ("text", "int-be"),
104    ("text", "int-le"),
105    ("text", "epoch-seconds"),
106    ("text", "epoch-millis"),
107    ("text", "apple-cocoa"),
108    ("text", "filetime"),
109    ("text", "duration"),
110    ("text", "duration-ms"),
111    ("text", "datasize"),
112    ("text", "datasize-iec"),
113    ("text", "datasize-si"),
114    // Text bytes shouldn't be interpreted as UUIDs
115    // (any 16 bytes can be formatted as UUID, but "🏳️‍🌈oj" isn't a UUID)
116    ("text", "uuid"),
117    // Hex bytes shouldn't be interpreted as IP addresses
118    // (DEADBEEF as bytes isn't an IP like 222.173.190.239)
119    ("hex", "ipv4"),
120    ("hex", "ipv6"),
121    ("hex", "ip"),
122    // Hex bytes shouldn't be interpreted as colors
123    // (use #DEADBEEF explicitly for color interpretation)
124    ("hex", "color-rgb"),
125    ("hex", "color-hsl"),
126];
127
128/// Nonsensical source→target combinations to filter out.
129/// These are conversions that technically work but are never useful.
130const BLOCKED_PATHS: &[(&str, &str)] = &[
131    // IP addresses aren't msgpack-encoded data
132    ("ipv4", "msgpack"),
133    ("ipv6", "msgpack"),
134    // UUIDs aren't msgpack-encoded data
135    ("uuid", "msgpack"),
136    // IP addresses aren't timestamps
137    ("ipv4", "epoch-seconds"),
138    ("ipv4", "epoch-millis"),
139    ("ipv4", "apple-cocoa"),
140    ("ipv4", "filetime"),
141    // UUIDs aren't timestamps (except v1, but that's handled separately)
142    ("uuid", "epoch-seconds"),
143    ("uuid", "epoch-millis"),
144    ("uuid", "apple-cocoa"),
145    ("uuid", "filetime"),
146    // Expression results - filter noise, keep primary result and hex/binary/octal representations
147    ("expr", "msgpack"),
148    ("expr", "octal"),
149    ("expr", "datasize"),
150    ("expr", "datasize-iec"),
151    ("expr", "datasize-si"),
152    ("expr", "duration"),
153    ("expr", "duration-ms"),
154    ("expr", "decimal"),
155    // Data sizes aren't durations
156    ("datasize", "duration"),
157    ("datasize", "duration-ms"),
158    // Durations aren't data sizes or re-interpreted as different time scales
159    ("duration", "datasize"),
160    ("duration", "datasize-iec"),
161    ("duration", "datasize-si"),
162    ("duration", "duration-ms"),
163    // Colors aren't timestamps or data sizes
164    ("color-hex", "duration"),
165    ("color-hex", "duration-ms"),
166    ("color-hex", "datasize"),
167    ("color-hex", "datasize-iec"),
168    ("color-hex", "datasize-si"),
169    ("color-rgb", "duration"),
170    ("color-rgb", "duration-ms"),
171    ("color-rgb", "datasize"),
172    ("color-rgb", "datasize-iec"),
173    ("color-rgb", "datasize-si"),
174    ("color-hsl", "duration"),
175    ("color-hsl", "duration-ms"),
176    ("color-hsl", "datasize"),
177    ("color-hsl", "datasize-iec"),
178    ("color-hsl", "datasize-si"),
179    // Hexdump output is for display only - don't re-encode it
180    ("hexdump", "bytes"),
181    ("hexdump", "url-encoded"),
182    ("hexdump", "escape-unicode"),
183    ("hexdump", "escape-hex"),
184    ("hexdump", "msgpack"),
185    // URL-encoded shouldn't chain further (double/triple encoding is noise)
186    ("url-encoded", "url-encoded"),
187    ("url-encoded", "bytes"),
188    ("url-encoded", "escape-unicode"),
189    ("url-encoded", "escape-hex"),
190    // Plain text shouldn't produce noisy intermediate conversions
191    ("text", "url-encoded"),
192    ("text", "graph"),
193    ("text", "text"),
194    ("text", "msgpack"),
195    ("text", "escape-unicode"),
196    // Escape sequences are terminal display formats
197    ("escape-hex", "bytes"),
198    ("escape-hex", "url-encoded"),
199    ("escape-unicode", "bytes"),
200    ("escape-unicode", "url-encoded"),
201    // Text bytes shouldn't be interpreted as integers (the bytes represent characters, not numbers)
202    ("text", "int-be"),
203    ("text", "int-le"),
204    // Circular: text → bytes → utf8 just produces the original text again
205    ("text", "utf8"),
206    // Text bytes shouldn't be interpreted as IP addresses or colors
207    // (4 bytes of ASCII text like "test" aren't an IPv4 address or RGBA color)
208    ("text", "ipv4"),
209    ("text", "ipv6"),
210    ("text", "color-rgb"),
211    ("text", "color-hex"),
212    ("text", "color-hsl"),
213];
214
215/// Check if a source→target conversion should be blocked (hardcoded rules only).
216fn is_blocked_path_builtin(source_format: &str, target_format: &str) -> bool {
217    // Check explicit blocked paths
218    if BLOCKED_PATHS
219        .iter()
220        .any(|(src, tgt)| source_format == *src && target_format == *tgt)
221    {
222        return true;
223    }
224
225    // Block unit format cross-conversions
226    // (e.g., length -> temperature targets like "celsius")
227    if UNIT_FORMATS.contains(&source_format) && UNIT_TARGETS.contains(&target_format) {
228        // Check if target belongs to a different unit type
229        // Allow same-type conversions (length -> meters, etc.)
230        let source_owns_target = match source_format {
231            "length" => matches!(
232                target_format,
233                "meters"
234                    | "kilometers"
235                    | "centimeters"
236                    | "millimeters"
237                    | "feet"
238                    | "miles"
239                    | "inches"
240            ),
241            "weight" => matches!(
242                target_format,
243                "grams" | "kilograms" | "milligrams" | "pounds" | "ounces"
244            ),
245            "volume" => matches!(
246                target_format,
247                "milliliters" | "liters" | "gallons" | "fluid ounces" | "cups"
248            ),
249            "speed" => matches!(target_format, "m/s" | "km/h" | "mph" | "knots"),
250            "pressure" => matches!(
251                target_format,
252                "pascals" | "kilopascals" | "megapascals" | "bar" | "psi" | "atmospheres"
253            ),
254            "angle" => matches!(target_format, "degrees" | "radians" | "gradians" | "turns"),
255            "area" => matches!(
256                target_format,
257                "square meters"
258                    | "square kilometers"
259                    | "square centimeters"
260                    | "square feet"
261                    | "acres"
262                    | "hectares"
263            ),
264            "energy" => matches!(
265                target_format,
266                "joules"
267                    | "kilojoules"
268                    | "megajoules"
269                    | "calories"
270                    | "kilocalories"
271                    | "kilowatt-hours"
272            ),
273            "temperature" => matches!(target_format, "celsius" | "fahrenheit" | "kelvin"),
274            _ => false,
275        };
276        if !source_owns_target {
277            return true;
278        }
279    }
280
281    false
282}
283
284/// Check if a target is blocked based on root interpretation (builtin rules).
285fn is_root_blocked_builtin(root_format: &str, target_format: &str) -> bool {
286    ROOT_BLOCKED_TARGETS
287        .iter()
288        .any(|(root, target)| root_format == *root && target_format == *target)
289}
290
291/// Check if a conversion should be blocked (builtin rules + user config).
292fn is_blocked(
293    source_format: &str,
294    target_format: &str,
295    root_format: Option<&str>,
296    path: &[String],
297    blocking: Option<&BlockingConfig>,
298) -> bool {
299    // Check builtin blocked paths (immediate source→target)
300    if is_blocked_path_builtin(source_format, target_format) {
301        return true;
302    }
303
304    // Check builtin root-based blocking (root→...→target)
305    if let Some(root) = root_format {
306        if is_root_blocked_builtin(root, target_format) {
307            return true;
308        }
309    }
310
311    // Check user-configured blocking
312    if let Some(config) = blocking {
313        // Check if target format is blocked
314        if config.is_format_blocked(target_format) {
315            return true;
316        }
317        // Check if this path is blocked
318        if config.is_path_blocked(path) {
319            return true;
320        }
321        // Check root-based blocking from user config
322        if let Some(root) = root_format {
323            if config.is_root_blocked(root, target_format) {
324                return true;
325            }
326        }
327    }
328
329    false
330}
331
332/// Find all possible conversions from a value using BFS.
333///
334/// This traverses the conversion graph, collecting all reachable formats.
335/// The path is tracked to show how we got from the source to each target.
336/// If `exclude_format` is provided, skip conversions to that format (to avoid hex→hex etc.)
337/// If `source_format` is provided, it's included as the first element in the path.
338/// If `config` is provided, user-configured blocking and priority settings are applied.
339pub fn find_all_conversions(
340    formats: &[Box<dyn Format>],
341    initial: &CoreValue,
342    exclude_format: Option<&str>,
343    source_format: Option<&str>,
344    config: Option<&ConversionConfig>,
345) -> Vec<Conversion> {
346    let blocking = config.map(|c| &c.blocking);
347    let priority = config.map(|c| &c.priority);
348    let mut results = Vec::new();
349    // Track seen conversions by (target_format, display) to allow different values
350    // for the same format (e.g., int-be → epoch vs int-le → epoch with different dates)
351    let mut seen_results: std::collections::HashSet<(String, String)> =
352        std::collections::HashSet::new();
353    // Track seen formats for BFS exploration (to prevent infinite loops)
354    // We use a separate set here because we still want to explore from a format only once
355    // per unique value, but we want to show all unique results.
356    let mut seen_for_bfs: std::collections::HashSet<(String, String)> =
357        std::collections::HashSet::new();
358
359    // Pre-exclude the source format if specified
360    if let Some(excluded) = exclude_format {
361        // For the source format, we block all display values
362        seen_results.insert((excluded.to_string(), String::new()));
363    }
364
365    // Queue holds (value, path_so_far, steps_so_far)
366    let mut queue: VecDeque<(CoreValue, Vec<String>, Vec<ConversionStep>)> = VecDeque::new();
367
368    // Initialize with source format if provided, so paths show the full chain
369    let initial_path = source_format
370        .map(|s| vec![s.to_string()])
371        .unwrap_or_default();
372    queue.push_back((initial.clone(), initial_path, vec![]));
373
374    // Call source_conversions() for the source format only.
375    // These are conversions specific to the format that parsed the input,
376    // not applicable to values from other sources during BFS.
377    if let Some(source_fmt) = source_format {
378        if let Some(format) = formats.iter().find(|f| f.id() == source_fmt) {
379            for mut conv in format.source_conversions(initial) {
380                // Build path including source format
381                let mut path = vec![source_fmt.to_string()];
382                path.push(conv.target_format.clone());
383                conv.path = path;
384
385                // Build steps
386                conv.steps = vec![ConversionStep {
387                    format: conv.target_format.clone(),
388                    value: conv.value.clone(),
389                    display: conv.display.clone(),
390                }];
391
392                let result_key = (conv.target_format.clone(), conv.display.clone());
393                if seen_results.insert(result_key) {
394                    results.push(conv);
395                }
396            }
397        }
398    }
399
400    // Also format the initial value with all applicable formats
401    for format in formats {
402        if format.can_format(initial) {
403            if let Some(display) = format.format(initial) {
404                let format_id = format.id().to_string();
405                let result_key = (format_id.clone(), display.clone());
406                if seen_results.insert(result_key) {
407                    // Build path including source format if provided
408                    let mut path = source_format
409                        .map(|s| vec![s.to_string()])
410                        .unwrap_or_default();
411                    path.push(format_id.clone());
412
413                    results.push(Conversion {
414                        value: initial.clone(),
415                        target_format: format_id.clone(),
416                        display: display.clone(),
417                        path,
418                        steps: vec![ConversionStep {
419                            format: format_id,
420                            value: initial.clone(),
421                            display,
422                        }],
423                        is_lossy: false,
424                        priority: ConversionPriority::default(),
425                        display_only: false,
426                        kind: ConversionKind::default(),
427                        hidden: false,
428                        rich_display: vec![],
429                    });
430                }
431            }
432        }
433    }
434
435    // Get reinterpret threshold from config (default 0.7)
436    let reinterpret_threshold = config.map(|c| c.reinterpret_threshold()).unwrap_or(0.7);
437
438    // BFS through conversions
439    let mut depth = 0;
440
441    while !queue.is_empty() && depth < MAX_BFS_DEPTH {
442        let level_size = queue.len();
443
444        for _ in 0..level_size {
445            let Some((current_value, current_path, current_steps)) = queue.pop_front() else {
446                break;
447            };
448
449            // Get the immediate source format (last element of current path, or root)
450            let immediate_source = current_path.last().map(|s| s.as_str()).unwrap_or("");
451
452            // String reinterpretation: when we have a decoded string (not from original input),
453            // try parsing it as other formats (UUID, IP, JSON, datetime, etc.)
454            if let CoreValue::String(s) = &current_value {
455                // Only reinterpret if this came from a conversion (not original input)
456                // and reinterpretation is enabled (threshold < 1.0)
457                if !current_path.is_empty() && reinterpret_threshold < 1.0 {
458                    for format in formats {
459                        // Skip text format to prevent infinite loops
460                        if format.id() == "text" {
461                            continue;
462                        }
463
464                        for interp in format.parse(s) {
465                            // Only consider high-confidence interpretations
466                            if interp.confidence < reinterpret_threshold {
467                                continue;
468                            }
469
470                            let target_format = interp.source_format.clone();
471
472                            // Check blocking - but skip root-based blocking for string reinterpretation
473                            // because we're now in a different semantic domain (text content, not raw bytes).
474                            // E.g., hex bytes→ipv4 is blocked, but hex→utf8("192.168.1.1")→ipv4 should be allowed.
475                            if is_blocked(
476                                immediate_source,
477                                &target_format,
478                                None, // Skip root blocking for string reinterpretation
479                                &current_path,
480                                blocking,
481                            ) {
482                                continue;
483                            }
484
485                            // Format the interpreted value for display
486                            let display = format
487                                .format(&interp.value)
488                                .unwrap_or_else(|| interp.description.clone());
489
490                            let result_key = (target_format.clone(), display.clone());
491                            let bfs_key = (target_format.clone(), display.clone());
492
493                            // Build path
494                            let mut full_path = current_path.clone();
495                            full_path.push(target_format.clone());
496
497                            // Build steps
498                            let mut full_steps = current_steps.clone();
499                            full_steps.push(ConversionStep {
500                                format: target_format.clone(),
501                                value: interp.value.clone(),
502                                display: display.clone(),
503                            });
504
505                            // Add to results
506                            if seen_results.insert(result_key) {
507                                results.push(Conversion {
508                                    value: interp.value.clone(),
509                                    target_format: target_format.clone(),
510                                    display: display.clone(),
511                                    path: full_path.clone(),
512                                    steps: full_steps.clone(),
513                                    is_lossy: false,
514                                    priority: ConversionPriority::Structured,
515                                    kind: ConversionKind::Conversion,
516                                    display_only: false,
517                                    hidden: false,
518                                    rich_display: interp.rich_display.clone(),
519                                });
520                            }
521
522                            // Add to queue for further exploration
523                            if seen_for_bfs.insert(bfs_key) {
524                                queue.push_back((interp.value, full_path, full_steps));
525                            }
526                        }
527                    }
528                }
529            }
530
531            // Get conversions from all formats
532            for format in formats {
533                for conv in format.conversions(&current_value) {
534                    // Check blocking early (before adding to results or queue)
535                    if is_blocked(
536                        immediate_source,
537                        &conv.target_format,
538                        source_format,
539                        &current_path,
540                        blocking,
541                    ) {
542                        continue;
543                    }
544
545                    let result_key = (conv.target_format.clone(), conv.display.clone());
546                    let bfs_key = (conv.target_format.clone(), conv.display.clone());
547
548                    // Build the full path (format IDs only, for backwards compat)
549                    let mut full_path = current_path.clone();
550                    full_path.extend(conv.path.clone());
551
552                    // Build the full steps (with values)
553                    let mut full_steps = current_steps.clone();
554                    // Add any intermediate steps from this conversion
555                    for step in &conv.steps {
556                        full_steps.push(step.clone());
557                    }
558                    // Add the final step if not already included
559                    if full_steps.is_empty()
560                        || full_steps.last().map(|s| &s.format) != Some(&conv.target_format)
561                    {
562                        full_steps.push(ConversionStep {
563                            format: conv.target_format.clone(),
564                            value: conv.value.clone(),
565                            display: conv.display.clone(),
566                        });
567                    }
568
569                    // Add to results if we haven't seen this exact (format, display) pair
570                    if seen_results.insert(result_key) {
571                        results.push(Conversion {
572                            value: conv.value.clone(),
573                            target_format: conv.target_format.clone(),
574                            display: conv.display.clone(),
575                            path: full_path.clone(),
576                            steps: full_steps.clone(),
577                            is_lossy: conv.is_lossy,
578                            priority: conv.priority,
579                            kind: conv.kind,
580                            display_only: conv.display_only,
581                            hidden: conv.hidden,
582                            rich_display: conv.rich_display.clone(),
583                        });
584                    }
585
586                    // Add to queue for further exploration (unless terminal or already explored)
587                    if !conv.display_only && seen_for_bfs.insert(bfs_key) {
588                        queue.push_back((conv.value, full_path, full_steps));
589                    }
590                }
591            }
592        }
593
594        depth += 1;
595    }
596
597    // Filter out blocked source→target combinations (builtin + user config)
598    // This catches any that slipped through (e.g., from initial format() calls)
599    if let Some(source) = exclude_format {
600        results.retain(|conv| {
601            !is_blocked(
602                source,
603                &conv.target_format,
604                source_format,
605                &conv.path,
606                blocking,
607            )
608        });
609    }
610
611    // Sort by priority, respecting user configuration
612    sort_conversions(&mut results, priority);
613
614    results
615}
616
617/// Sort conversions by priority, respecting user configuration.
618fn sort_conversions(results: &mut [Conversion], priority_config: Option<&PriorityConfig>) {
619    results.sort_by(|a, b| {
620        if let Some(config) = priority_config {
621            // User-configured category order
622            let cat_a = config.category_sort_key(a.priority);
623            let cat_b = config.category_sort_key(b.priority);
624
625            // Within same category, apply format offsets
626            if cat_a == cat_b {
627                // Higher offset = shown earlier (so negate for comparison)
628                let off_a = config.format_offset(&a.target_format);
629                let off_b = config.format_offset(&b.target_format);
630                // Higher offset comes first
631                off_b
632                    .cmp(&off_a)
633                    .then_with(|| a.path.len().cmp(&b.path.len()))
634            } else {
635                cat_a.cmp(&cat_b)
636            }
637        } else {
638            // Default: priority enum order, then path length
639            a.priority
640                .cmp(&b.priority)
641                .then_with(|| a.path.len().cmp(&b.path.len()))
642        }
643    });
644}
645
646#[cfg(test)]
647mod tests {
648    use super::*;
649    use crate::formats::{Base64Format, BytesToIntFormat, DateTimeFormat, HexFormat};
650
651    #[test]
652    fn test_bytes_to_multiple_formats() {
653        let formats: Vec<Box<dyn Format>> = vec![
654            Box::new(HexFormat),
655            Box::new(Base64Format),
656            Box::new(BytesToIntFormat),
657        ];
658
659        let bytes = CoreValue::Bytes(vec![0x69, 0x1E, 0x01, 0xB8]);
660        let conversions = find_all_conversions(&formats, &bytes, None, None, None);
661
662        // Should have hex, base64, int-be, int-le
663        let format_ids: Vec<_> = conversions
664            .iter()
665            .map(|c| c.target_format.as_str())
666            .collect();
667
668        assert!(format_ids.contains(&"hex"));
669        assert!(format_ids.contains(&"base64"));
670        assert!(format_ids.contains(&"int-be"));
671        assert!(format_ids.contains(&"int-le"));
672    }
673
674    #[test]
675    fn test_int_to_datetime() {
676        let formats: Vec<Box<dyn Format>> = vec![Box::new(DateTimeFormat)];
677
678        let value = CoreValue::Int {
679            value: 1763574200,
680            original_bytes: None,
681        };
682
683        let conversions = find_all_conversions(&formats, &value, None, None, None);
684
685        let datetime_conv = conversions
686            .iter()
687            .find(|c| c.target_format == "epoch-seconds");
688        assert!(datetime_conv.is_some());
689        assert!(datetime_conv.unwrap().display.contains("2025"));
690    }
691
692    #[test]
693    fn test_chained_conversions() {
694        let formats: Vec<Box<dyn Format>> = vec![
695            Box::new(HexFormat),
696            Box::new(BytesToIntFormat),
697            Box::new(DateTimeFormat),
698        ];
699
700        // Start with bytes that represent epoch 1763574200
701        let bytes = CoreValue::Bytes(vec![0x69, 0x1E, 0x01, 0xB8]);
702        let conversions = find_all_conversions(&formats, &bytes, None, None, None);
703
704        // Should find datetime via bytes -> int-be -> epoch-seconds
705        let datetime_conv = conversions
706            .iter()
707            .find(|c| c.target_format == "epoch-seconds");
708
709        assert!(
710            datetime_conv.is_some(),
711            "Should find epoch-seconds conversion"
712        );
713        let dt = datetime_conv.unwrap();
714        assert!(dt.display.contains("2025"));
715        assert!(!dt.path.is_empty()); // Has a path
716    }
717}