Skip to main content

iridium_units/
parsing.rs

1//! String parsing for units and quantities.
2//!
3//! This module provides flexible parsing of unit strings from various formats,
4//! including technical documents, academic papers, and user input. It supports
5//! Unicode symbols, LaTeX notation, natural language ("per"), and parenthesized
6//! expressions.
7//!
8//! # Quick Start
9//!
10//! ```
11//! use iridium_units::prelude::*;
12//! use std::str::FromStr;
13//!
14//! // Parse simple units
15//! let meter = Unit::from_str("m").unwrap();
16//! let velocity = Unit::from_str("m/s").unwrap();
17//!
18//! // Parse quantities (value + unit)
19//! let distance = Quantity::from_str("100 km").unwrap();
20//! let speed = Quantity::from_str("9.8 m/s^2").unwrap();
21//!
22//! // Unicode and technical formats work too
23//! let area = parse_unit("m²").unwrap();           // Unicode superscript
24//! let wavelength = parse_unit("µm").unwrap();     // Unicode micro
25//! ```
26//!
27//! # Supported Syntax
28//!
29//! ## Basic Unit Strings
30//!
31//! | Format | Examples |
32//! |--------|----------|
33//! | Simple units | `"m"`, `"meter"`, `"meters"`, `"km"`, `"kg"` |
34//! | Powers | `"m^2"`, `"s^-1"`, `"m^1/2"` (fractional) |
35//! | Multiplication | `"kg m"`, `"kg*m"`, `"kg.m"` |
36//! | Division | `"m/s"`, `"kg/m^3"`, `"erg/cm^2/s"` |
37//! | Combined | `"kg m^2 / s^2"`, `"m/s^2"` |
38//!
39//! ## Unicode Support
40//!
41//! The parser automatically normalizes Unicode characters:
42//!
43//! | Unicode | Normalized | Description |
44//! |---------|------------|-------------|
45//! | `µ`, `μ` | `u` | Micro sign, Greek mu |
46//! | `Ω` | `ohm` | Ohm sign, Greek Omega |
47//! | `Å` | `angstrom` | Angstrom sign |
48//! | `°` | `deg` | Degree symbol |
49//! | `′` | `arcmin` | Prime (arc minute) |
50//! | `″` | `arcsec` | Double prime (arc second) |
51//! | `²`, `³`, etc. | `^2`, `^3` | Superscript digits |
52//! | `⁻¹` | `^-1` | Superscript negative |
53//! | `·`, `×` | `*` | Multiplication signs |
54//! | `÷` | `/` | Division sign |
55//!
56//! ```
57//! # use iridium_units::prelude::*;
58//! // All of these work:
59//! let _ = parse_unit("m²").unwrap();      // Unicode superscript
60//! let _ = parse_unit("s⁻¹").unwrap();     // Negative superscript
61//! let _ = parse_unit("µm").unwrap();      // Micro symbol
62//! let _ = parse_unit("kg·m").unwrap();    // Middle dot multiplication
63//! let _ = parse_unit("Ω").unwrap();       // Ohm symbol
64//! ```
65//!
66//! ## LaTeX Notation
67//!
68//! Common LaTeX patterns are recognized:
69//!
70//! | LaTeX | Normalized | Description |
71//! |-------|------------|-------------|
72//! | `m^{2}` | `m^2` | Braced exponents |
73//! | `\cdot` | `*` | Multiplication |
74//! | `\times` | `*` | Multiplication |
75//! | `\mu` | `u` | Micro prefix |
76//! | `\Omega` | `ohm` | Ohm |
77//!
78//! ```
79//! # use iridium_units::prelude::*;
80//! let energy = parse_unit("kg m^{2} / s^{2}").unwrap();
81//! let product = parse_unit(r"kg \cdot m").unwrap();
82//! ```
83//!
84//! ## Natural Language ("per" notation)
85//!
86//! The word "per" is converted to division:
87//!
88//! ```
89//! # use iridium_units::prelude::*;
90//! let speed = parse_unit("km per hour").unwrap();
91//! let rate = parse_unit("m per s").unwrap();
92//! ```
93//!
94//! ## Astrophysical Subscript Notation
95//!
96//! Common astrophysical subscript patterns are recognized:
97//!
98//! | Input | Recognized as |
99//! |-------|---------------|
100//! | `M_sun`, `M_⊙` | Solar mass |
101//! | `R_sun`, `R_⊙` | Solar radius |
102//! | `L_sun`, `L_⊙` | Solar luminosity |
103//! | `M_jup` | Jupiter mass |
104//! | `R_jup` | Jupiter radius |
105//! | `M_earth`, `M_⊕` | Earth mass |
106//! | `R_earth`, `R_⊕` | Earth radius |
107//!
108//! ```no_run
109//! # #[cfg(feature = "astrophysics")]
110//! # fn main() {
111//! # use iridium_units::prelude::*;
112//! let stellar_mass = parse_unit("M_sun").unwrap();
113//! let planet_radius = parse_unit("R_jup").unwrap();
114//! # }
115//! # #[cfg(not(feature = "astrophysics"))]
116//! # fn main() {}
117//! ```
118//!
119//! ## Parentheses
120//!
121//! Parentheses can be used for grouping:
122//!
123//! ```
124//! # use iridium_units::prelude::*;
125//! let force = parse_unit("(kg m)/s^2").unwrap();
126//! let accel = parse_unit("m/(s^2)").unwrap();
127//! let squared_vel = parse_unit("(m/s)^2").unwrap();
128//! ```
129//!
130//! ## Quantity Strings
131//!
132//! Quantities combine a numeric value with a unit:
133//!
134//! ```
135//! # use iridium_units::prelude::*;
136//! let distance = parse_quantity("100 km").unwrap();
137//! let speed = parse_quantity("9.8 m/s^2").unwrap();
138//! let wavelength = parse_quantity("500 nm").unwrap();
139//! let scientific = parse_quantity("1.5e8 m").unwrap();
140//! let negative = parse_quantity("-3.14 rad").unwrap();
141//! ```
142//!
143//! # Error Handling and Suggestions
144//!
145//! When a unit name is not recognized, the parser suggests similar unit names:
146//!
147//! ```
148//! # use iridium_units::prelude::*;
149//! let result = parse_unit("metrs");
150//! match result {
151//!     Err(UnitError::UnknownUnit { name, suggestions }) => {
152//!         println!("Unknown unit '{}', did you mean: {:?}?", name, suggestions);
153//!         // Output: Unknown unit 'metrs', did you mean: ["meters", "meter"]?
154//!     }
155//!     _ => {}
156//! }
157//! ```
158//!
159//! # Custom Unit Registry
160//!
161//! For applications that need custom units (e.g., from a database), use [`UnitRegistry`]:
162//!
163//! ```
164//! use iridium_units::prelude::*;
165//!
166//! // Create a registry with all built-in units
167//! let registry = UnitRegistry::with_builtins();
168//!
169//! // Parse using the registry
170//! let unit = registry.parse_unit("m/s").unwrap();
171//! let quantity = registry.parse_quantity("100 km").unwrap();
172//!
173//! // Create a custom registry with additional units
174//! let custom_registry = UnitRegistry::with_builtins()
175//!     .with_unit(&["custom_length", "cl"], Unit::from(M));
176//!
177//! let custom = custom_registry.parse_unit("custom_length").unwrap();
178//! ```
179//!
180//! ## Registry API
181//!
182//! | Method | Description |
183//! |--------|-------------|
184//! | `new()` | Create empty registry |
185//! | `with_builtins()` | Create with all standard units |
186//! | `register(&mut self, names, unit)` | Add a unit with aliases |
187//! | `with_unit(self, names, unit)` | Builder: add unit |
188//! | `lookup(name)` | Look up unit by name |
189//! | `parse_unit(s)` | Parse unit string |
190//! | `parse_quantity(s)` | Parse quantity string |
191//! | `merge(&mut self, other)` | Merge another registry |
192//!
193//! # Available Unit Names
194//!
195//! ## SI Units
196//!
197//! - **Base**: `m`, `s`, `kg`, `a` (ampere), `k` (kelvin), `mol`, `cd`, `rad`, `sr`
198//! - **Length**: `km`, `cm`, `mm`, `um`/`µm`, `nm`, `pm`, `fm`
199//! - **Time**: `ms`, `us`/`µs`, `ns`, `ps`, `min`, `h`/`hr`, `d`/`day`, `yr`
200//! - **Mass**: `g`, `mg`, `ug`/`µg`, `t`/`tonne`
201//! - **Frequency**: `hz`, `khz`, `mhz`, `ghz`, `thz`
202//! - **Derived**: `n` (newton), `j` (joule), `w` (watt), `pa`, `c` (coulomb), `v`, `f` (farad), `ohm`/`Ω`
203//! - **Energy**: `ev`, `kev`, `mev`, `gev`
204//! - **Angle**: `deg`/`°`, `arcmin`/`′`, `arcsec`/`″`, `mas`, `uas`
205//!
206//! ## Astrophysical Units
207//!
208//! - **Distance**: `au`, `pc`, `kpc`, `mpc`, `gpc`, `ly`/`lightyear`
209//! - **Solar**: `msun`/`M_sun`/`solar_mass`, `rsun`/`R_sun`, `lsun`/`L_sun`
210//! - **Planetary**: `mjup`/`M_jup`, `rjup`/`R_jup`, `mearth`/`M_earth`, `rearth`/`R_earth`
211//! - **Spectroscopic**: `angstrom`/`Å`, `jy` (jansky), `mjy`, `ujy`, `barn`
212//! - **CGS**: `erg`, `dyn`, `gauss`
213//!
214//! ## Imperial Units
215//!
216//! - **Length**: `in`, `ft`, `yd`, `mi`, `nmi`
217//! - **Mass**: `lb`, `oz`, `ton`
218//! - **Other**: `psi`, `mph`, `hp`, `btu`, `gal`, `pt`, `qt`
219
220use crate::dimension::Rational16;
221use crate::error::{UnitError, UnitResult};
222use crate::quantity::Quantity;
223use crate::unit::Unit;
224use std::collections::HashMap;
225use std::str::FromStr;
226use std::sync::LazyLock;
227use std::sync::RwLock;
228
229// ============================================================================
230// Levenshtein Distance and Suggestions
231// ============================================================================
232
233/// Calculate the Levenshtein distance between two strings.
234fn levenshtein_distance(a: &str, b: &str) -> usize {
235    let a_chars: Vec<char> = a.chars().collect();
236    let b_chars: Vec<char> = b.chars().collect();
237    let a_len = a_chars.len();
238    let b_len = b_chars.len();
239
240    if a_len == 0 {
241        return b_len;
242    }
243    if b_len == 0 {
244        return a_len;
245    }
246
247    let mut prev_row: Vec<usize> = (0..=b_len).collect();
248    let mut curr_row: Vec<usize> = vec![0; b_len + 1];
249
250    for i in 1..=a_len {
251        curr_row[0] = i;
252        for j in 1..=b_len {
253            let cost = if a_chars[i - 1] == b_chars[j - 1] {
254                0
255            } else {
256                1
257            };
258            curr_row[j] = (prev_row[j] + 1)
259                .min(curr_row[j - 1] + 1)
260                .min(prev_row[j - 1] + cost);
261        }
262        std::mem::swap(&mut prev_row, &mut curr_row);
263    }
264
265    prev_row[b_len]
266}
267
268/// Find similar unit names for suggestions.
269fn find_similar_units(
270    name: &str,
271    registry: &HashMap<String, UnitEntry>,
272    max_suggestions: usize,
273) -> Vec<String> {
274    let name_lower = name.to_lowercase();
275    let threshold = (name_lower.len() / 2).clamp(2, 3);
276
277    let mut candidates: Vec<(String, usize)> = registry
278        .keys()
279        .filter_map(|key| {
280            let dist = levenshtein_distance(&name_lower, key);
281            if dist <= threshold {
282                Some((key.clone(), dist))
283            } else {
284                None
285            }
286        })
287        .collect();
288
289    candidates.sort_by_key(|(_, dist)| *dist);
290    candidates.truncate(max_suggestions);
291    candidates.into_iter().map(|(name, _)| name).collect()
292}
293
294// ============================================================================
295// Unicode Normalization
296// ============================================================================
297
298/// Normalize Unicode characters to ASCII equivalents for parsing.
299fn normalize_unicode(s: &str) -> String {
300    let mut result = String::with_capacity(s.len() * 2);
301    let chars: Vec<char> = s.chars().collect();
302    let mut i = 0;
303
304    while i < chars.len() {
305        let c = chars[i];
306        match c {
307            // Greek letters
308            'µ' | 'μ' => result.push('u'), // micro sign (U+00B5) and Greek mu (U+03BC)
309            '\u{2126}' | '\u{03A9}' => result.push_str("ohm"), // Ohm sign (U+2126) and Greek Omega (U+03A9)
310            'α' => result.push_str("alpha"),
311            'β' => result.push_str("beta"),
312            'γ' => result.push_str("gamma"),
313            'δ' => result.push_str("delta"),
314            'λ' => result.push_str("lambda"),
315            'π' => result.push_str("pi"),
316
317            // Symbols
318            '\u{212B}' | '\u{00C5}' => result.push_str("angstrom"), // Angstrom sign (U+212B) and A with ring (U+00C5)
319            '°' => result.push_str("deg"),
320            '′' => result.push_str("arcmin"),
321            '″' => result.push_str("arcsec"),
322            '℃' => result.push_str("degC"),
323            '℉' => result.push_str("degF"),
324
325            // Superscripts for powers
326            '⁰' => result.push_str("^0"),
327            '¹' => result.push_str("^1"),
328            '²' => result.push_str("^2"),
329            '³' => result.push_str("^3"),
330            '⁴' => result.push_str("^4"),
331            '⁵' => result.push_str("^5"),
332            '⁶' => result.push_str("^6"),
333            '⁷' => result.push_str("^7"),
334            '⁸' => result.push_str("^8"),
335            '⁹' => result.push_str("^9"),
336            '⁻' => result.push_str("^-"),
337            '⁺' => result.push_str("^+"),
338
339            // Fractions
340            '½' => result.push_str("^1/2"),
341            '⅓' => result.push_str("^1/3"),
342            '¼' => result.push_str("^1/4"),
343            '⅔' => result.push_str("^2/3"),
344            '¾' => result.push_str("^3/4"),
345
346            // Multiplication signs
347            '·' | '×' | '∙' | '⋅' => result.push('*'),
348
349            // Division
350            '÷' => result.push('/'),
351
352            _ => result.push(c),
353        }
354        i += 1;
355    }
356
357    // Post-process to merge consecutive power operators
358    // e.g., "m^-^2" -> "m^-2" and "m^^2" -> "m^2"
359    result
360        .replace("^^", "^")
361        .replace("^-^", "^-")
362        .replace("^+^", "^")
363}
364
365// ============================================================================
366// LaTeX/Technical Format Support
367// ============================================================================
368
369/// Normalize LaTeX-style notation to standard format.
370fn normalize_latex(s: &str) -> String {
371    let mut result = s.to_string();
372
373    // Handle LaTeX braces in exponents: m^{2} -> m^2
374    while let Some(start) = result.find("^{") {
375        if let Some(end) = result[start..].find('}') {
376            let inner = &result[start + 2..start + end];
377            result = format!(
378                "{}{}{}",
379                &result[..start + 1],
380                inner,
381                &result[start + end + 1..]
382            );
383        } else {
384            break;
385        }
386    }
387
388    // Handle LaTeX commands
389    result = result.replace(r"\cdot", "*");
390    result = result.replace(r"\times", "*");
391    result = result.replace(r"\mu", "u");
392    result = result.replace(r"\alpha", "alpha");
393    result = result.replace(r"\beta", "beta");
394    result = result.replace(r"\gamma", "gamma");
395    result = result.replace(r"\Omega", "ohm");
396    result = result.replace(r"\AA", "angstrom");
397    result = result.replace(r"\deg", "deg");
398    result = result.replace(r"\prime", "arcmin");
399    result = result.replace(r"\arcsec", "arcsec");
400    result = result.replace(r"\arcmin", "arcmin");
401    result = result.replace(r"\frac{1}{2}", "^1/2");
402
403    result
404}
405
406/// Normalize "per" notation to division.
407fn normalize_per_notation(s: &str) -> String {
408    // Handle various "per" patterns (case insensitive)
409    let result = s.to_string();
410
411    // Use regex-like replacement for "per" patterns
412    let words: Vec<&str> = result.split_whitespace().collect();
413    let mut new_words = Vec::new();
414    let mut i = 0;
415
416    while i < words.len() {
417        if words[i].eq_ignore_ascii_case("per") && i + 1 < words.len() {
418            // Replace "per X" with "/ X"
419            new_words.push("/");
420            new_words.push(words[i + 1]);
421            i += 2;
422        } else {
423            new_words.push(words[i]);
424            i += 1;
425        }
426    }
427
428    new_words.join(" ")
429}
430
431/// Normalize subscript notation for astrophysical units.
432fn normalize_subscripts(s: &str) -> String {
433    let mut result = s.to_string();
434
435    // Common astrophysical subscript patterns
436    let subscript_mappings = [
437        ("M_sun", "msun"),
438        ("m_sun", "msun"),
439        ("M_⊙", "msun"),
440        ("R_sun", "rsun"),
441        ("r_sun", "rsun"),
442        ("R_⊙", "rsun"),
443        ("L_sun", "lsun"),
444        ("l_sun", "lsun"),
445        ("L_⊙", "lsun"),
446        ("M_jup", "mjup"),
447        ("m_jup", "mjup"),
448        ("R_jup", "rjup"),
449        ("r_jup", "rjup"),
450        ("M_earth", "mearth"),
451        ("m_earth", "mearth"),
452        ("M_⊕", "mearth"),
453        ("R_earth", "rearth"),
454        ("r_earth", "rearth"),
455        ("R_⊕", "rearth"),
456        ("sol_mass", "msun"),
457        ("solar_mass", "msun"),
458        ("sol_rad", "rsun"),
459        ("solar_rad", "rsun"),
460        ("sol_lum", "lsun"),
461        ("solar_lum", "lsun"),
462        ("jup_mass", "mjup"),
463        ("jupiter_mass", "mjup"),
464        ("jup_rad", "rjup"),
465        ("jupiter_rad", "rjup"),
466        ("earth_mass", "mearth"),
467        ("earth_rad", "rearth"),
468    ];
469
470    for (pattern, replacement) in &subscript_mappings {
471        // Case-insensitive replacement
472        let pattern_lower = pattern.to_lowercase();
473        let result_lower = result.to_lowercase();
474        if let Some(pos) = result_lower.find(&pattern_lower) {
475            let before = &result[..pos];
476            let after = &result[pos + pattern.len()..];
477            result = format!("{}{}{}", before, replacement, after);
478        }
479    }
480
481    result
482}
483
484// ============================================================================
485// Parentheses Support
486// ============================================================================
487
488/// Check if parentheses in a string are balanced.
489fn check_balanced_parens(s: &str) -> UnitResult<()> {
490    let mut depth = 0i32;
491    for c in s.chars() {
492        match c {
493            '(' => depth += 1,
494            ')' => {
495                depth -= 1;
496                if depth < 0 {
497                    return Err(UnitError::ParseError(
498                        "unbalanced parentheses: unexpected ')'".into(),
499                    ));
500                }
501            }
502            _ => {}
503        }
504    }
505    if depth != 0 {
506        return Err(UnitError::ParseError(
507            "unbalanced parentheses: missing ')'".into(),
508        ));
509    }
510    Ok(())
511}
512
513/// Find the position of a top-level division operator (respecting parentheses).
514fn find_top_level_division(s: &str) -> Option<usize> {
515    let mut depth = 0;
516    let mut in_exponent = false;
517
518    for (i, c) in s.char_indices() {
519        match c {
520            '(' => depth += 1,
521            ')' => depth -= 1,
522            '^' => in_exponent = true,
523            '/' if depth == 0 && !in_exponent => return Some(i),
524            _ if c.is_whitespace() && in_exponent => in_exponent = false,
525            _ if !c.is_ascii_digit() && c != '-' && c != '+' && c != '/' && in_exponent => {
526                in_exponent = false;
527            }
528            _ => {}
529        }
530    }
531    None
532}
533
534/// Parse an expression that may contain parentheses.
535fn parse_with_parens(s: &str, registry: &HashMap<String, UnitEntry>) -> UnitResult<Unit> {
536    let s = s.trim();
537    if s.is_empty() {
538        return Ok(Unit::dimensionless());
539    }
540
541    check_balanced_parens(s)?;
542
543    // Check for top-level division first
544    if let Some(div_pos) = find_top_level_division(s) {
545        let numerator = parse_with_parens(&s[..div_pos], registry)?;
546        let denominator = parse_with_parens(&s[div_pos + 1..], registry)?;
547        return Ok(&numerator / &denominator);
548    }
549
550    // Check for parenthesized expression with optional power
551    if s.starts_with('(') {
552        if let Some(close_pos) = find_matching_paren(s, 0) {
553            let inner = &s[1..close_pos];
554            let after = &s[close_pos + 1..];
555
556            let inner_unit = parse_with_parens(inner, registry)?;
557
558            // Check for power after closing paren
559            if let Some(power_str) = after.strip_prefix('^') {
560                let power = parse_power(power_str)?;
561                let result = inner_unit.pow(power);
562                return Ok(result);
563            } else if after.is_empty() {
564                return Ok(inner_unit);
565            } else {
566                // There's more to parse - multiply
567                let rest = parse_with_parens(after.trim_start_matches(['*', ' ']), registry)?;
568                return Ok(&inner_unit * &rest);
569            }
570        }
571    }
572
573    // No parentheses at this level - parse as product
574    parse_unit_product_with_registry(s, registry)
575}
576
577/// Find the position of the matching closing parenthesis.
578fn find_matching_paren(s: &str, open_pos: usize) -> Option<usize> {
579    let chars: Vec<char> = s.chars().collect();
580    if chars.get(open_pos) != Some(&'(') {
581        return None;
582    }
583
584    let mut depth = 1;
585    for (i, &c) in chars.iter().enumerate().skip(open_pos + 1) {
586        match c {
587            '(' => depth += 1,
588            ')' => {
589                depth -= 1;
590                if depth == 0 {
591                    return Some(i);
592                }
593            }
594            _ => {}
595        }
596    }
597    None
598}
599
600// ============================================================================
601// UnitRegistry Struct
602// ============================================================================
603
604/// A registry mapping unit names to Unit values.
605///
606/// The registry provides a way to look up units by name, with support for
607/// custom units beyond the built-in set. This is useful for applications
608/// that need to populate unit definitions from external sources (e.g., databases).
609///
610/// # Examples
611///
612/// ```
613/// use iridium_units::prelude::*;
614///
615/// // Create a registry with built-in units
616/// let registry = UnitRegistry::with_builtins();
617/// let meter = registry.lookup("m").unwrap();
618///
619/// // Parse units using the registry
620/// let velocity = registry.parse_unit("m/s").unwrap();
621/// ```
622#[derive(Clone)]
623pub struct UnitRegistry {
624    entries: HashMap<String, UnitEntry>,
625}
626
627impl Default for UnitRegistry {
628    fn default() -> Self {
629        Self::new()
630    }
631}
632
633impl UnitRegistry {
634    /// Create a new empty registry.
635    pub fn new() -> Self {
636        UnitRegistry {
637            entries: HashMap::new(),
638        }
639    }
640
641    /// Create a new registry with all built-in units registered.
642    pub fn with_builtins() -> Self {
643        let mut registry = Self::new();
644        register_builtin_units(&mut registry.entries);
645        register_extended_aliases(&mut registry.entries);
646        registry
647    }
648
649    /// Register a unit with one or more names.
650    pub fn register(&mut self, names: &[&str], unit: Unit) {
651        let entry = UnitEntry { unit };
652        for name in names {
653            self.entries.insert(name.to_lowercase(), entry.clone());
654        }
655    }
656
657    /// Builder method to register a unit.
658    pub fn with_unit(mut self, names: &[&str], unit: Unit) -> Self {
659        self.register(names, unit);
660        self
661    }
662
663    /// Register multiple units at once.
664    pub fn register_many(&mut self, units: Vec<(&[&str], Unit)>) {
665        for (names, unit) in units {
666            self.register(names, unit);
667        }
668    }
669
670    /// Look up a unit by name.
671    pub fn lookup(&self, name: &str) -> Option<Unit> {
672        self.entries
673            .get(&name.to_lowercase())
674            .map(|e| e.unit.clone())
675    }
676
677    /// Parse a unit string using this registry.
678    pub fn parse_unit(&self, s: &str) -> UnitResult<Unit> {
679        parse_unit_with_registry(s, &self.entries)
680    }
681
682    /// Parse a quantity string using this registry.
683    pub fn parse_quantity(&self, s: &str) -> UnitResult<Quantity> {
684        parse_quantity_with_registry(s, &self.entries)
685    }
686
687    /// Merge another registry into this one.
688    ///
689    /// Entries from the other registry will overwrite existing entries
690    /// with the same name.
691    pub fn merge(&mut self, other: &UnitRegistry) {
692        for (name, entry) in &other.entries {
693            self.entries.insert(name.clone(), entry.clone());
694        }
695    }
696
697    /// Get the number of registered unit names.
698    pub fn len(&self) -> usize {
699        self.entries.len()
700    }
701
702    /// Check if the registry is empty.
703    pub fn is_empty(&self) -> bool {
704        self.entries.is_empty()
705    }
706
707    /// Get all registered unit names.
708    pub fn names(&self) -> Vec<&str> {
709        self.entries.keys().map(|s| s.as_str()).collect()
710    }
711}
712
713/// A registry entry for a unit.
714#[derive(Clone)]
715struct UnitEntry {
716    unit: Unit,
717}
718
719/// Global unit registry mapping strings to units.
720static UNIT_REGISTRY: LazyLock<RwLock<HashMap<String, UnitEntry>>> = LazyLock::new(|| {
721    let mut map = HashMap::new();
722    register_builtin_units(&mut map);
723    register_extended_aliases(&mut map);
724    RwLock::new(map)
725});
726
727/// Register all built-in units with the registry.
728fn register_builtin_units(map: &mut HashMap<String, UnitEntry>) {
729    use crate::systems::imperial::*;
730    use crate::systems::si::*;
731
732    // Helper to register a unit with multiple names
733    macro_rules! register {
734        ($map:expr, $unit:expr, $($name:expr),+) => {
735            let entry = UnitEntry { unit: Unit::from($unit) };
736            $(
737                $map.insert($name.to_lowercase(), entry.clone());
738            )+
739        };
740    }
741
742    // SI Base Units
743    register!(map, M, "m", "meter", "meters", "metre", "metres");
744    register!(map, S, "s", "sec", "second", "seconds");
745    register!(map, KG, "kg", "kilogram", "kilograms");
746    register!(map, A, "a", "amp", "ampere", "amperes");
747    register!(map, K, "k", "kelvin");
748    register!(map, DEG_C, "°c", "degc", "celsius");
749    register!(map, DEG_F, "°f", "degf", "fahrenheit");
750    register!(map, MOL, "mol", "mole", "moles");
751    register!(map, CD, "cd", "candela");
752    register!(map, RAD, "rad", "radian", "radians");
753    register!(map, SR, "sr", "steradian", "steradians");
754
755    // SI Length
756    register!(
757        map,
758        KM,
759        "km",
760        "kilometer",
761        "kilometers",
762        "kilometre",
763        "kilometres"
764    );
765    register!(
766        map,
767        CM,
768        "cm",
769        "centimeter",
770        "centimeters",
771        "centimetre",
772        "centimetres"
773    );
774    register!(
775        map,
776        MM,
777        "mm",
778        "millimeter",
779        "millimeters",
780        "millimetre",
781        "millimetres"
782    );
783    register!(
784        map,
785        UM,
786        "um",
787        "micrometer",
788        "micrometers",
789        "micron",
790        "microns"
791    );
792    register!(map, NM, "nm", "nanometer", "nanometers");
793    register!(map, PM, "pm", "picometer", "picometers");
794    register!(map, FM, "fm", "femtometer", "femtometers");
795
796    // SI Time
797    register!(map, MS, "ms", "millisecond", "milliseconds");
798    register!(map, US, "us", "microsecond", "microseconds");
799    register!(map, NS, "ns", "nanosecond", "nanoseconds");
800    register!(map, PS, "ps", "picosecond", "picoseconds");
801    register!(map, MIN, "min", "minute", "minutes");
802    register!(map, H, "h", "hr", "hour", "hours");
803    register!(map, DAY, "d", "day", "days");
804    register!(map, YR, "yr", "year", "years", "julian_year");
805
806    // SI Mass
807    register!(map, G, "g", "gram", "grams");
808    register!(map, MG, "mg", "milligram", "milligrams");
809    register!(map, UG, "ug", "microgram", "micrograms");
810    register!(map, TONNE, "t", "tonne", "tonnes", "metric_ton");
811
812    // SI Derived - Frequency
813    register!(map, HZ, "hz", "hertz");
814    register!(map, KHZ, "khz", "kilohertz");
815    register!(map, MHZ, "mhz", "megahertz");
816    register!(map, GHZ, "ghz", "gigahertz");
817    register!(map, THZ, "thz", "terahertz");
818
819    // SI Derived - Mechanics
820    register!(map, N, "n", "newton", "newtons");
821    register!(map, J, "j", "joule", "joules");
822    register!(map, W, "w", "watt", "watts");
823    register!(map, KW, "kw", "kilowatt", "kilowatts");
824    register!(map, MW, "mw", "megawatt", "megawatts");
825    register!(map, PA, "pa", "pascal", "pascals");
826
827    // SI Derived - Electrical
828    register!(map, C, "c", "coulomb", "coulombs");
829    register!(map, V, "v", "volt", "volts");
830    register!(map, F, "f", "farad", "farads");
831    register!(map, OHM, "ohm", "ohms");
832
833    // SI Derived - Energy
834    register!(map, EV, "ev", "electronvolt", "electronvolts");
835    register!(map, KEV, "kev", "kiloelectronvolt");
836    register!(map, MEV, "mev", "megaelectronvolt");
837    register!(map, GEV, "gev", "gigaelectronvolt");
838
839    // SI Angles
840    register!(map, DEG, "deg", "degree", "degrees");
841    register!(map, ARCMIN, "arcmin", "arcminute", "arcminutes");
842    register!(map, ARCSEC, "arcsec", "arcsecond", "arcseconds");
843    register!(map, MAS, "mas", "milliarcsecond", "milliarcseconds");
844    register!(map, UAS, "uas", "microarcsecond", "microarcseconds");
845
846    // Imperial - Length
847    register!(map, INCH, "in", "inch", "inches");
848    register!(map, FOOT, "ft", "foot", "feet");
849    register!(map, YARD, "yd", "yard", "yards");
850    register!(map, MILE, "mi", "mile", "miles");
851    register!(map, NAUTICAL_MILE, "nmi", "nautical_mile");
852
853    // Imperial - Mass
854    register!(map, POUND, "lb", "lbm", "pound", "pounds");
855    register!(map, OUNCE, "oz", "ounce", "ounces");
856    register!(map, TON, "ton", "tons", "short_ton");
857
858    // Imperial - Volume
859    register!(map, GALLON, "gal", "gallon", "gallons");
860    register!(map, PINT, "pt", "pint", "pints");
861    register!(map, QUART, "qt", "quart", "quarts");
862
863    // Imperial - Other
864    register!(map, PSI, "psi");
865    register!(map, MPH, "mph");
866    register!(map, KNOT, "kn", "kt", "knot", "knots");
867    register!(map, HORSEPOWER, "hp", "horsepower");
868    register!(map, BTU, "btu");
869
870    #[cfg(feature = "astrophysics")]
871    register_astrophysical_units(map);
872
873    #[cfg(feature = "cgs")]
874    register_cgs_units(map);
875}
876
877#[cfg(feature = "astrophysics")]
878fn register_astrophysical_units(map: &mut HashMap<String, UnitEntry>) {
879    use crate::systems::astrophysical::{
880        ANGSTROM, AU, BARN, DYN, EARTH_MASS, EARTH_RADIUS, ERG, GAUSS, GPC, JANSKY, JUPITER_MASS,
881        JUPITER_RADIUS, KPC, LIGHT_YEAR, MJY, MPC, PARSEC, SOLAR_LUMINOSITY, SOLAR_MASS,
882        SOLAR_RADIUS, UJY,
883    };
884
885    macro_rules! register {
886        ($map:expr, $unit:expr, $($name:expr),+) => {
887            let entry = UnitEntry { unit: Unit::from($unit) };
888            $(
889                $map.insert($name.to_lowercase(), entry.clone());
890            )+
891        };
892    }
893
894    // Distance
895    register!(map, AU, "au", "astronomical_unit");
896    register!(map, PARSEC, "pc", "parsec", "parsecs");
897    register!(map, KPC, "kpc", "kiloparsec", "kiloparsecs");
898    register!(map, MPC, "mpc", "megaparsec", "megaparsecs");
899    register!(map, GPC, "gpc", "gigaparsec", "gigaparsecs");
900    register!(
901        map,
902        LIGHT_YEAR,
903        "ly",
904        "lyr",
905        "lightyear",
906        "lightyears",
907        "light_year",
908        "light_years"
909    );
910
911    // Solar
912    register!(map, SOLAR_MASS, "m_sun", "msun", "solmass", "solar_mass");
913    register!(map, SOLAR_RADIUS, "r_sun", "rsun", "solrad", "solar_radius");
914    register!(
915        map,
916        SOLAR_LUMINOSITY,
917        "l_sun",
918        "lsun",
919        "sollum",
920        "solar_luminosity"
921    );
922
923    // Planetary
924    register!(map, JUPITER_MASS, "m_jup", "mjup", "jupiter_mass");
925    register!(map, JUPITER_RADIUS, "r_jup", "rjup", "jupiter_radius");
926    register!(map, EARTH_MASS, "m_earth", "mearth", "earth_mass");
927    register!(map, EARTH_RADIUS, "r_earth", "rearth", "earth_radius");
928
929    // Spectroscopic
930    register!(map, ANGSTROM, "angstrom", "aa");
931    register!(map, JANSKY, "jy", "jansky");
932    register!(map, MJY, "mjy", "millijansky");
933    register!(map, UJY, "ujy", "microjansky");
934    register!(map, BARN, "barn", "barns");
935
936    // CGS commonly used in astrophysics
937    register!(map, ERG, "erg", "ergs");
938    register!(map, DYN, "dyn", "dyne", "dynes");
939    register!(map, GAUSS, "gauss");
940}
941
942#[cfg(feature = "cgs")]
943fn register_cgs_units(map: &mut HashMap<String, UnitEntry>) {
944    use crate::systems::cgs::{CENTIMETER, GRAM};
945
946    macro_rules! register {
947        ($map:expr, $unit:expr, $($name:expr),+) => {
948            let entry = UnitEntry { unit: Unit::from($unit) };
949            $(
950                $map.insert($name.to_lowercase(), entry.clone());
951            )+
952        };
953    }
954
955    register!(map, CENTIMETER, "centimeter_cgs");
956    register!(map, GRAM, "gram_cgs");
957}
958
959/// Register extended Unicode and academic aliases.
960fn register_extended_aliases(map: &mut HashMap<String, UnitEntry>) {
961    use crate::systems::si::*;
962
963    macro_rules! register {
964        ($map:expr, $unit:expr, $($name:expr),+) => {
965            let entry = UnitEntry { unit: Unit::from($unit) };
966            $(
967                $map.insert($name.to_lowercase(), entry.clone());
968            )+
969        };
970    }
971
972    // Unicode micro symbol aliases
973    register!(map, UM, "µm");
974    register!(map, US, "µs");
975    register!(map, UG, "µg");
976
977    // Ohm with Unicode
978    register!(map, OHM, "ω");
979
980    // Degree symbol
981    register!(map, DEG, "°");
982
983    // Arc minute/second with Unicode
984    register!(map, ARCMIN, "′");
985    register!(map, ARCSEC, "″");
986
987    #[cfg(feature = "astrophysics")]
988    register_astrophysical_aliases(map);
989}
990
991#[cfg(feature = "astrophysics")]
992fn register_astrophysical_aliases(map: &mut HashMap<String, UnitEntry>) {
993    use crate::systems::astrophysical::{
994        ANGSTROM, EARTH_MASS, EARTH_RADIUS, JUPITER_MASS, JUPITER_RADIUS, SOLAR_LUMINOSITY,
995        SOLAR_MASS, SOLAR_RADIUS,
996    };
997
998    macro_rules! register {
999        ($map:expr, $unit:expr, $($name:expr),+) => {
1000            let entry = UnitEntry { unit: Unit::from($unit) };
1001            $(
1002                $map.insert($name.to_lowercase(), entry.clone());
1003            )+
1004        };
1005    }
1006
1007    // Angstrom with Unicode
1008    register!(map, ANGSTROM, "å");
1009
1010    // Extended astrophysical aliases
1011    register!(map, SOLAR_MASS, "m⊙", "solmass", "sol_mass");
1012    register!(map, SOLAR_RADIUS, "r⊙", "solrad", "sol_rad", "solarradius");
1013    register!(
1014        map,
1015        SOLAR_LUMINOSITY,
1016        "l⊙",
1017        "sollum",
1018        "sol_lum",
1019        "solarluminosity"
1020    );
1021    register!(map, JUPITER_MASS, "m_jupiter", "jupitermass");
1022    register!(map, JUPITER_RADIUS, "r_jupiter", "jupiterradius");
1023    register!(map, EARTH_MASS, "m⊕", "earthmass");
1024    register!(map, EARTH_RADIUS, "r⊕", "earthradius");
1025}
1026
1027/// Look up a simple unit by name.
1028pub fn lookup_unit(name: &str) -> Option<Unit> {
1029    let registry = UNIT_REGISTRY.read().ok()?;
1030    registry.get(&name.to_lowercase()).map(|e| e.unit.clone())
1031}
1032
1033/// Register a custom unit with the registry.
1034///
1035/// This allows adding user-defined units that can be parsed from strings.
1036pub fn register_unit(names: &[&str], unit: Unit) {
1037    if let Ok(mut registry) = UNIT_REGISTRY.write() {
1038        let entry = UnitEntry { unit };
1039        for name in names {
1040            registry.insert(name.to_lowercase(), entry.clone());
1041        }
1042    }
1043}
1044
1045/// Parse a unit string into a Unit.
1046///
1047/// Supports:
1048/// - Simple units: "m", "kg", "s"
1049/// - Powers: "m^2", "s^-1", "m^1/2"
1050/// - Multiplication: "kg m", "kg*m"
1051/// - Division: "m/s", "kg/m^3"
1052/// - Combined: "kg m^2 / s^2"
1053/// - Unicode: "m²", "µm", "Ω", "°", "′", "″"
1054/// - LaTeX: "m^{2}", "\cdot", "\mu"
1055/// - Per notation: "km per hour"
1056/// - Subscripts: "M_sun", "R_jup"
1057/// - Parentheses: "(kg m)/s^2"
1058pub fn parse_unit(s: &str) -> UnitResult<Unit> {
1059    let registry = UNIT_REGISTRY
1060        .read()
1061        .map_err(|_| UnitError::ParseError("failed to acquire registry lock".into()))?;
1062    parse_unit_with_registry(s, &registry)
1063}
1064
1065/// Parse a unit string using a specific registry.
1066fn parse_unit_with_registry(s: &str, registry: &HashMap<String, UnitEntry>) -> UnitResult<Unit> {
1067    let s = s.trim();
1068    if s.is_empty() {
1069        return Ok(Unit::dimensionless());
1070    }
1071
1072    // Apply normalizations in order
1073    let normalized = normalize_unicode(s);
1074    let normalized = normalize_latex(&normalized);
1075    let normalized = normalize_per_notation(&normalized);
1076    let normalized = normalize_subscripts(&normalized);
1077
1078    // Check for parentheses - use the parentheses-aware parser
1079    if normalized.contains('(') || normalized.contains(')') {
1080        return parse_with_parens(&normalized, registry);
1081    }
1082
1083    // Split by division, but be careful not to split inside exponents
1084    // Exponents like "m^1/2" should not be split at the "/"
1085    let parts = split_unit_by_division(&normalized);
1086
1087    match parts.len() {
1088        1 => parse_unit_product_with_registry(&parts[0], registry),
1089        2 => {
1090            let numerator = parse_unit_product_with_registry(&parts[0], registry)?;
1091            let denominator = parse_unit_product_with_registry(&parts[1], registry)?;
1092            Ok(&numerator / &denominator)
1093        }
1094        _ => {
1095            // Multiple divisions: a/b/c = a / (b * c)
1096            let numerator = parse_unit_product_with_registry(&parts[0], registry)?;
1097            let mut denominator = parse_unit_product_with_registry(&parts[1], registry)?;
1098            for part in &parts[2..] {
1099                let next = parse_unit_product_with_registry(part, registry)?;
1100                denominator = &denominator * &next;
1101            }
1102            Ok(&numerator / &denominator)
1103        }
1104    }
1105}
1106
1107/// Split a unit string by division, respecting exponent notation.
1108///
1109/// This handles cases like "m^1/2" where the "/" is part of the exponent,
1110/// not a division between units.
1111fn split_unit_by_division(s: &str) -> Vec<String> {
1112    let mut parts = Vec::new();
1113    let mut current = String::new();
1114    let mut in_exponent = false;
1115
1116    let chars: Vec<char> = s.chars().collect();
1117    let mut i = 0;
1118
1119    while i < chars.len() {
1120        let c = chars[i];
1121
1122        if c == '^' {
1123            in_exponent = true;
1124            current.push(c);
1125        } else if c == '/' && !in_exponent {
1126            // This is a unit division
1127            parts.push(current.trim().to_string());
1128            current = String::new();
1129        } else if c == '/' && in_exponent {
1130            // Check if this is a fractional power or a unit division
1131            // It's a fractional power only if followed by a digit (possibly with sign)
1132            let next_idx = i + 1;
1133            let is_fraction = if next_idx < chars.len() {
1134                let next = chars[next_idx];
1135                next.is_ascii_digit()
1136                    || (next == '-'
1137                        && next_idx + 1 < chars.len()
1138                        && chars[next_idx + 1].is_ascii_digit())
1139            } else {
1140                false
1141            };
1142
1143            if is_fraction {
1144                // This is part of a fractional exponent like ^1/2
1145                current.push(c);
1146            } else {
1147                // Exponent has ended, this is a unit division
1148                in_exponent = false;
1149                parts.push(current.trim().to_string());
1150                current = String::new();
1151            }
1152        } else if c.is_whitespace() && in_exponent {
1153            // Exponent ends at whitespace
1154            in_exponent = false;
1155            current.push(c);
1156        } else if !c.is_ascii_digit() && c != '-' && c != '+' && in_exponent {
1157            // Exponent ends at non-numeric character
1158            in_exponent = false;
1159            current.push(c);
1160        } else {
1161            current.push(c);
1162        }
1163
1164        i += 1;
1165    }
1166
1167    if !current.is_empty() {
1168        parts.push(current.trim().to_string());
1169    }
1170
1171    if parts.is_empty() {
1172        parts.push(String::new());
1173    }
1174
1175    parts
1176}
1177
1178/// Parse a product of units using a specific registry.
1179fn parse_unit_product_with_registry(
1180    s: &str,
1181    registry: &HashMap<String, UnitEntry>,
1182) -> UnitResult<Unit> {
1183    let s = s.trim();
1184    if s.is_empty() {
1185        return Ok(Unit::dimensionless());
1186    }
1187
1188    // Split by whitespace, *, or . (multiplication separators)
1189    let tokens: Vec<&str> = s
1190        .split(|c: char| c.is_whitespace() || c == '*' || c == '.')
1191        .filter(|t| !t.is_empty())
1192        .collect();
1193
1194    if tokens.is_empty() {
1195        return Ok(Unit::dimensionless());
1196    }
1197
1198    let mut result = parse_unit_with_power_registry(tokens[0], registry)?;
1199    for token in &tokens[1..] {
1200        let next = parse_unit_with_power_registry(token, registry)?;
1201        result = &result * &next;
1202    }
1203
1204    Ok(result)
1205}
1206
1207/// Parse a single unit with optional power using a specific registry.
1208fn parse_unit_with_power_registry(
1209    s: &str,
1210    registry: &HashMap<String, UnitEntry>,
1211) -> UnitResult<Unit> {
1212    let s = s.trim();
1213
1214    // Check for power notation
1215    if let Some(idx) = s.find('^') {
1216        let (name, power_str) = s.split_at(idx);
1217        let power_str = &power_str[1..]; // Skip the '^'
1218
1219        let power = parse_power(power_str)?;
1220        let base_unit = lookup_simple_unit_with_registry(name, registry)?;
1221        Ok(base_unit.pow(power))
1222    } else if let Some(idx) = s.find("**") {
1223        // Python-style power notation
1224        let (name, power_str) = s.split_at(idx);
1225        let power_str = &power_str[2..]; // Skip the '**'
1226
1227        let power = parse_power(power_str)?;
1228        let base_unit = lookup_simple_unit_with_registry(name, registry)?;
1229        Ok(base_unit.pow(power))
1230    } else {
1231        lookup_simple_unit_with_registry(s, registry)
1232    }
1233}
1234
1235/// Parse a power exponent (integer or fraction)
1236fn parse_power(s: &str) -> UnitResult<Rational16> {
1237    let s = s.trim();
1238
1239    // Check for fraction notation (e.g., "1/2")
1240    if let Some(idx) = s.find('/') {
1241        let (num_str, den_str) = s.split_at(idx);
1242        let den_str = &den_str[1..];
1243
1244        let num: i16 = num_str
1245            .trim()
1246            .parse()
1247            .map_err(|_| UnitError::ParseError(format!("invalid power numerator: {}", num_str)))?;
1248        let den: i16 = den_str.trim().parse().map_err(|_| {
1249            UnitError::ParseError(format!("invalid power denominator: {}", den_str))
1250        })?;
1251
1252        if den == 0 {
1253            return Err(UnitError::ParseError(
1254                "power denominator cannot be zero".into(),
1255            ));
1256        }
1257
1258        Ok(Rational16::new(num, den))
1259    } else {
1260        // Simple integer power
1261        let exp: i16 = s
1262            .parse()
1263            .map_err(|_| UnitError::ParseError(format!("invalid power: {}", s)))?;
1264        Ok(Rational16::new(exp, 1))
1265    }
1266}
1267
1268/// Look up a simple unit name using a specific registry.
1269fn lookup_simple_unit_with_registry(
1270    name: &str,
1271    registry: &HashMap<String, UnitEntry>,
1272) -> UnitResult<Unit> {
1273    let name = name.trim();
1274    let name_lower = name.to_lowercase();
1275
1276    if let Some(entry) = registry.get(&name_lower) {
1277        return Ok(entry.unit.clone());
1278    }
1279
1280    // Unit not found - provide helpful suggestions
1281    let suggestions = find_similar_units(name, registry, 3);
1282    Err(UnitError::UnknownUnit {
1283        name: name.to_string(),
1284        suggestions,
1285    })
1286}
1287
1288/// Parse a quantity string (value + unit).
1289///
1290/// Format: `<value> <unit>`
1291///
1292/// Examples:
1293/// - "5.0 km"
1294/// - "100 m/s"
1295/// - "9.8 m/s^2"
1296/// - "1.5e8 m"
1297/// - "-3.14 rad"
1298pub fn parse_quantity(s: &str) -> UnitResult<Quantity> {
1299    let registry = UNIT_REGISTRY
1300        .read()
1301        .map_err(|_| UnitError::ParseError("failed to acquire registry lock".into()))?;
1302    parse_quantity_with_registry(s, &registry)
1303}
1304
1305/// Parse a quantity string using a specific registry.
1306fn parse_quantity_with_registry(
1307    s: &str,
1308    registry: &HashMap<String, UnitEntry>,
1309) -> UnitResult<Quantity> {
1310    let s = s.trim();
1311
1312    // Find where the number ends and the unit begins
1313    // Numbers can contain: digits, '.', 'e', 'E', '+', '-'
1314    let mut unit_start = 0;
1315    let mut in_exponent = false;
1316
1317    for (i, c) in s.char_indices() {
1318        if c == 'e' || c == 'E' {
1319            in_exponent = true;
1320            continue;
1321        }
1322
1323        if in_exponent && (c == '+' || c == '-') {
1324            in_exponent = false;
1325            continue;
1326        }
1327
1328        if c.is_ascii_digit() || c == '.' || c == '-' || c == '+' {
1329            continue;
1330        }
1331
1332        // Found a non-number character
1333        if c.is_whitespace() {
1334            unit_start = i;
1335            break;
1336        } else {
1337            // Unit starts immediately after number (e.g., "5km")
1338            unit_start = i;
1339            break;
1340        }
1341    }
1342
1343    if unit_start == 0 {
1344        // No unit found, try parsing whole string as number
1345        return Err(UnitError::ParseError(format!(
1346            "cannot parse quantity: no unit found in '{}'",
1347            s
1348        )));
1349    }
1350
1351    let (value_str, unit_str) = s.split_at(unit_start);
1352    let value_str = value_str.trim();
1353    let unit_str = unit_str.trim();
1354
1355    let value: f64 = value_str
1356        .parse()
1357        .map_err(|_| UnitError::ParseError(format!("invalid number: '{}'", value_str)))?;
1358
1359    let unit = parse_unit_with_registry(unit_str, registry)?;
1360
1361    Ok(Quantity::new(value, unit))
1362}
1363
1364// Implement FromStr for Unit
1365impl FromStr for Unit {
1366    type Err = UnitError;
1367
1368    fn from_str(s: &str) -> Result<Self, Self::Err> {
1369        parse_unit(s)
1370    }
1371}
1372
1373// Implement FromStr for Quantity
1374impl FromStr for Quantity {
1375    type Err = UnitError;
1376
1377    fn from_str(s: &str) -> Result<Self, Self::Err> {
1378        parse_quantity(s)
1379    }
1380}
1381
1382#[cfg(test)]
1383mod tests {
1384    use super::*;
1385    use crate::systems::si::{H, KG, KM, M, S};
1386
1387    #[test]
1388    fn test_lookup_simple_unit() {
1389        let m = lookup_unit("m").unwrap();
1390        assert_eq!(m.symbol(), "m");
1391
1392        let meter = lookup_unit("meter").unwrap();
1393        assert_eq!(meter.symbol(), "m");
1394
1395        let meters = lookup_unit("meters").unwrap();
1396        assert_eq!(meters.symbol(), "m");
1397    }
1398
1399    #[test]
1400    fn test_lookup_case_insensitive() {
1401        let m1 = lookup_unit("M").unwrap();
1402        let m2 = lookup_unit("m").unwrap();
1403        let m3 = lookup_unit("METER").unwrap();
1404
1405        assert_eq!(m1.dimension(), m2.dimension());
1406        assert_eq!(m2.dimension(), m3.dimension());
1407    }
1408
1409    #[test]
1410    fn test_parse_simple_unit() {
1411        let m = parse_unit("m").unwrap();
1412        assert_eq!(m.dimension(), M.dimension());
1413
1414        let km = parse_unit("km").unwrap();
1415        assert_eq!(km.dimension(), KM.dimension());
1416    }
1417
1418    #[test]
1419    fn test_parse_unit_with_power() {
1420        let m2 = parse_unit("m^2").unwrap();
1421        let dim = m2.dimension();
1422        assert_eq!(dim.length, Rational16::new(2, 1));
1423
1424        let s_inv = parse_unit("s^-1").unwrap();
1425        let dim = s_inv.dimension();
1426        assert_eq!(dim.time, Rational16::new(-1, 1));
1427    }
1428
1429    #[test]
1430    fn test_parse_unit_division() {
1431        let velocity = parse_unit("m/s").unwrap();
1432        let dim = velocity.dimension();
1433        assert_eq!(dim.length, Rational16::ONE);
1434        assert_eq!(dim.time, Rational16::new(-1, 1));
1435    }
1436
1437    #[test]
1438    fn test_parse_unit_product() {
1439        let momentum = parse_unit("kg m").unwrap();
1440        let dim = momentum.dimension();
1441        assert_eq!(dim.mass, Rational16::ONE);
1442        assert_eq!(dim.length, Rational16::ONE);
1443
1444        // With asterisk
1445        let momentum2 = parse_unit("kg*m").unwrap();
1446        assert_eq!(momentum2.dimension(), momentum.dimension());
1447    }
1448
1449    #[test]
1450    fn test_parse_complex_unit() {
1451        // Energy: kg m^2 / s^2
1452        let energy = parse_unit("kg m^2 / s^2").unwrap();
1453        let dim = energy.dimension();
1454        assert_eq!(dim.mass, Rational16::ONE);
1455        assert_eq!(dim.length, Rational16::new(2, 1));
1456        assert_eq!(dim.time, Rational16::new(-2, 1));
1457    }
1458
1459    #[test]
1460    fn test_parse_acceleration() {
1461        let accel = parse_unit("m/s^2").unwrap();
1462        let dim = accel.dimension();
1463        assert_eq!(dim.length, Rational16::ONE);
1464        assert_eq!(dim.time, Rational16::new(-2, 1));
1465    }
1466
1467    #[test]
1468    fn test_parse_quantity_simple() {
1469        let q = parse_quantity("100 km").unwrap();
1470        assert!((q.value() - 100.0).abs() < 1e-10);
1471        assert_eq!(q.unit().dimension(), KM.dimension());
1472    }
1473
1474    #[test]
1475    fn test_parse_quantity_velocity() {
1476        let q = parse_quantity("10 m/s").unwrap();
1477        assert!((q.value() - 10.0).abs() < 1e-10);
1478        let dim = q.unit().dimension();
1479        assert_eq!(dim.length, Rational16::ONE);
1480        assert_eq!(dim.time, Rational16::new(-1, 1));
1481    }
1482
1483    #[test]
1484    fn test_parse_quantity_scientific() {
1485        let q = parse_quantity("1.5e8 m").unwrap();
1486        assert!((q.value() - 1.5e8).abs() < 1.0);
1487    }
1488
1489    #[test]
1490    fn test_parse_quantity_negative() {
1491        let q = parse_quantity("-3.14 rad").unwrap();
1492        assert!((q.value() - (-3.14)).abs() < 1e-10);
1493    }
1494
1495    #[test]
1496    fn test_unit_from_str() {
1497        let m: Unit = "m".parse().unwrap();
1498        assert_eq!(m.dimension(), M.dimension());
1499
1500        let velocity: Unit = "km/h".parse().unwrap();
1501        let expected_dim = (KM / H).dimension();
1502        assert_eq!(velocity.dimension(), expected_dim);
1503    }
1504
1505    #[test]
1506    fn test_quantity_from_str() {
1507        let q: Quantity = "100 km".parse().unwrap();
1508        assert!((q.value() - 100.0).abs() < 1e-10);
1509    }
1510
1511    #[test]
1512    fn test_unknown_unit_error() {
1513        let result = parse_unit("foo");
1514        assert!(matches!(result, Err(UnitError::UnknownUnit { .. })));
1515    }
1516
1517    #[test]
1518    fn test_unknown_unit_with_suggestions() {
1519        let result = parse_unit("metrs");
1520        match result {
1521            Err(UnitError::UnknownUnit { name, suggestions }) => {
1522                assert_eq!(name, "metrs");
1523                // Should suggest "meters" or similar
1524                assert!(!suggestions.is_empty());
1525            }
1526            _ => panic!("Expected UnknownUnit error"),
1527        }
1528    }
1529
1530    #[cfg(feature = "astrophysics")]
1531    #[test]
1532    fn test_astrophysical_units() {
1533        let pc = parse_unit("pc").unwrap();
1534        let au = parse_unit("AU").unwrap();
1535        let ly = parse_unit("ly").unwrap();
1536
1537        // All are length units
1538        assert_eq!(pc.dimension(), M.dimension());
1539        assert_eq!(au.dimension(), M.dimension());
1540        assert_eq!(ly.dimension(), M.dimension());
1541    }
1542
1543    #[test]
1544    fn test_imperial_units() {
1545        let ft = parse_unit("ft").unwrap();
1546        let mi = parse_unit("mi").unwrap();
1547        let lb = parse_unit("lb").unwrap();
1548
1549        assert_eq!(ft.dimension(), M.dimension());
1550        assert_eq!(mi.dimension(), M.dimension());
1551        assert_eq!(lb.dimension(), KG.dimension());
1552    }
1553
1554    #[test]
1555    fn test_dimensionless() {
1556        let d = parse_unit("").unwrap();
1557        assert!(d.is_dimensionless());
1558    }
1559
1560    #[test]
1561    fn test_fractional_power() {
1562        let sqrt_m = parse_unit("m^1/2").unwrap();
1563        let dim = sqrt_m.dimension();
1564        assert_eq!(dim.length, Rational16::new(1, 2));
1565    }
1566
1567    // ========================================================================
1568    // Unicode Parsing Tests
1569    // ========================================================================
1570
1571    #[test]
1572    fn test_unicode_superscript_power() {
1573        let m2 = parse_unit("m²").unwrap();
1574        let dim = m2.dimension();
1575        assert_eq!(dim.length, Rational16::new(2, 1));
1576
1577        let m3 = parse_unit("m³").unwrap();
1578        let dim = m3.dimension();
1579        assert_eq!(dim.length, Rational16::new(3, 1));
1580    }
1581
1582    #[test]
1583    fn test_unicode_negative_power() {
1584        let s_inv = parse_unit("s⁻¹").unwrap();
1585        let dim = s_inv.dimension();
1586        assert_eq!(dim.time, Rational16::new(-1, 1));
1587
1588        let accel = parse_unit("m/s²").unwrap();
1589        let dim = accel.dimension();
1590        assert_eq!(dim.length, Rational16::ONE);
1591        assert_eq!(dim.time, Rational16::new(-2, 1));
1592    }
1593
1594    #[test]
1595    fn test_unicode_micro() {
1596        let um = parse_unit("µm").unwrap();
1597        assert_eq!(um.dimension(), M.dimension());
1598    }
1599
1600    #[test]
1601    fn test_unicode_multiplication() {
1602        let momentum = parse_unit("kg·m").unwrap();
1603        let dim = momentum.dimension();
1604        assert_eq!(dim.mass, Rational16::ONE);
1605        assert_eq!(dim.length, Rational16::ONE);
1606
1607        let momentum2 = parse_unit("kg×m").unwrap();
1608        assert_eq!(momentum2.dimension(), momentum.dimension());
1609    }
1610
1611    #[test]
1612    fn test_unicode_division() {
1613        let velocity = parse_unit("m÷s").unwrap();
1614        let dim = velocity.dimension();
1615        assert_eq!(dim.length, Rational16::ONE);
1616        assert_eq!(dim.time, Rational16::new(-1, 1));
1617    }
1618
1619    // ========================================================================
1620    // LaTeX Format Tests
1621    // ========================================================================
1622
1623    #[test]
1624    fn test_latex_braces() {
1625        let m2 = parse_unit("m^{2}").unwrap();
1626        let dim = m2.dimension();
1627        assert_eq!(dim.length, Rational16::new(2, 1));
1628
1629        let energy = parse_unit("kg m^{2} / s^{2}").unwrap();
1630        let dim = energy.dimension();
1631        assert_eq!(dim.mass, Rational16::ONE);
1632        assert_eq!(dim.length, Rational16::new(2, 1));
1633        assert_eq!(dim.time, Rational16::new(-2, 1));
1634    }
1635
1636    #[test]
1637    fn test_latex_cdot() {
1638        let momentum = parse_unit(r"kg \cdot m").unwrap();
1639        let dim = momentum.dimension();
1640        assert_eq!(dim.mass, Rational16::ONE);
1641        assert_eq!(dim.length, Rational16::ONE);
1642    }
1643
1644    #[test]
1645    fn test_latex_times() {
1646        let area = parse_unit(r"m \times m").unwrap();
1647        let dim = area.dimension();
1648        assert_eq!(dim.length, Rational16::new(2, 1));
1649    }
1650
1651    // ========================================================================
1652    // Per Notation Tests
1653    // ========================================================================
1654
1655    #[test]
1656    fn test_per_notation() {
1657        let velocity = parse_unit("km per hour").unwrap();
1658        let dim = velocity.dimension();
1659        assert_eq!(dim.length, Rational16::ONE);
1660        assert_eq!(dim.time, Rational16::new(-1, 1));
1661
1662        let velocity2 = parse_unit("m per s").unwrap();
1663        assert_eq!(velocity2.dimension(), (M / S).dimension());
1664    }
1665
1666    #[test]
1667    fn test_per_notation_case_insensitive() {
1668        let v1 = parse_unit("km PER hour").unwrap();
1669        let v2 = parse_unit("km Per hour").unwrap();
1670        assert_eq!(v1.dimension(), v2.dimension());
1671    }
1672
1673    // ========================================================================
1674    // Subscript/Astrophysical Notation Tests
1675    // ========================================================================
1676
1677    #[cfg(feature = "astrophysics")]
1678    #[test]
1679    fn test_subscript_solar() {
1680        let msun = parse_unit("M_sun").unwrap();
1681        assert_eq!(msun.dimension(), KG.dimension());
1682
1683        let rsun = parse_unit("R_sun").unwrap();
1684        assert_eq!(rsun.dimension(), M.dimension());
1685    }
1686
1687    #[cfg(feature = "astrophysics")]
1688    #[test]
1689    fn test_subscript_planetary() {
1690        let mjup = parse_unit("M_jup").unwrap();
1691        assert_eq!(mjup.dimension(), KG.dimension());
1692
1693        let mearth = parse_unit("M_earth").unwrap();
1694        assert_eq!(mearth.dimension(), KG.dimension());
1695    }
1696
1697    // ========================================================================
1698    // Parentheses Support Tests
1699    // ========================================================================
1700
1701    #[test]
1702    fn test_parentheses_simple() {
1703        let force = parse_unit("(kg m)/s^2").unwrap();
1704        let dim = force.dimension();
1705        assert_eq!(dim.mass, Rational16::ONE);
1706        assert_eq!(dim.length, Rational16::ONE);
1707        assert_eq!(dim.time, Rational16::new(-2, 1));
1708    }
1709
1710    #[test]
1711    fn test_parentheses_denominator() {
1712        let unit = parse_unit("m/(s^2)").unwrap();
1713        let dim = unit.dimension();
1714        assert_eq!(dim.length, Rational16::ONE);
1715        assert_eq!(dim.time, Rational16::new(-2, 1));
1716    }
1717
1718    #[test]
1719    fn test_parentheses_with_power() {
1720        let unit = parse_unit("(m/s)^2").unwrap();
1721        let dim = unit.dimension();
1722        assert_eq!(dim.length, Rational16::new(2, 1));
1723        assert_eq!(dim.time, Rational16::new(-2, 1));
1724    }
1725
1726    #[test]
1727    fn test_parentheses_complex() {
1728        let unit = parse_unit("(kg m^2)/(s^2)").unwrap();
1729        let dim = unit.dimension();
1730        assert_eq!(dim.mass, Rational16::ONE);
1731        assert_eq!(dim.length, Rational16::new(2, 1));
1732        assert_eq!(dim.time, Rational16::new(-2, 1));
1733    }
1734
1735    #[test]
1736    fn test_unbalanced_parens_error() {
1737        let result = parse_unit("(m/s");
1738        assert!(result.is_err());
1739
1740        let result = parse_unit("m/s)");
1741        assert!(result.is_err());
1742    }
1743
1744    // ========================================================================
1745    // UnitRegistry Tests
1746    // ========================================================================
1747
1748    #[test]
1749    fn test_registry_new() {
1750        let registry = UnitRegistry::new();
1751        assert!(registry.is_empty());
1752        assert_eq!(registry.len(), 0);
1753    }
1754
1755    #[test]
1756    fn test_registry_with_builtins() {
1757        let registry = UnitRegistry::with_builtins();
1758        assert!(!registry.is_empty());
1759
1760        let m = registry.lookup("m").unwrap();
1761        assert_eq!(m.dimension(), M.dimension());
1762
1763        let km = registry.lookup("km").unwrap();
1764        assert_eq!(km.dimension(), KM.dimension());
1765    }
1766
1767    #[test]
1768    fn test_registry_register() {
1769        let mut registry = UnitRegistry::new();
1770        registry.register(&["custom", "cust"], Unit::from(M));
1771
1772        let custom = registry.lookup("custom").unwrap();
1773        assert_eq!(custom.dimension(), M.dimension());
1774
1775        let cust = registry.lookup("cust").unwrap();
1776        assert_eq!(cust.dimension(), M.dimension());
1777    }
1778
1779    #[test]
1780    fn test_registry_builder_pattern() {
1781        let registry = UnitRegistry::new()
1782            .with_unit(&["custom1"], Unit::from(M))
1783            .with_unit(&["custom2", "c2"], Unit::from(KG));
1784
1785        assert!(registry.lookup("custom1").is_some());
1786        assert!(registry.lookup("custom2").is_some());
1787        assert!(registry.lookup("c2").is_some());
1788    }
1789
1790    #[test]
1791    fn test_registry_parse_unit() {
1792        let registry = UnitRegistry::with_builtins();
1793
1794        let velocity = registry.parse_unit("m/s").unwrap();
1795        assert_eq!(velocity.dimension(), (M / S).dimension());
1796
1797        let energy = registry.parse_unit("kg m^2 / s^2").unwrap();
1798        let dim = energy.dimension();
1799        assert_eq!(dim.mass, Rational16::ONE);
1800        assert_eq!(dim.length, Rational16::new(2, 1));
1801        assert_eq!(dim.time, Rational16::new(-2, 1));
1802    }
1803
1804    #[test]
1805    fn test_registry_parse_quantity() {
1806        let registry = UnitRegistry::with_builtins();
1807
1808        let q = registry.parse_quantity("100 km").unwrap();
1809        assert!((q.value() - 100.0).abs() < 1e-10);
1810        assert_eq!(q.unit().dimension(), KM.dimension());
1811    }
1812
1813    #[test]
1814    fn test_registry_merge() {
1815        let mut registry1 = UnitRegistry::new();
1816        registry1.register(&["unit1"], Unit::from(M));
1817
1818        let mut registry2 = UnitRegistry::new();
1819        registry2.register(&["unit2"], Unit::from(KG));
1820
1821        registry1.merge(&registry2);
1822
1823        assert!(registry1.lookup("unit1").is_some());
1824        assert!(registry1.lookup("unit2").is_some());
1825    }
1826
1827    #[test]
1828    fn test_registry_names() {
1829        let mut registry = UnitRegistry::new();
1830        registry.register(&["a", "b", "c"], Unit::from(M));
1831
1832        let names = registry.names();
1833        assert_eq!(names.len(), 3);
1834        assert!(names.contains(&"a"));
1835        assert!(names.contains(&"b"));
1836        assert!(names.contains(&"c"));
1837    }
1838
1839    // ========================================================================
1840    // Levenshtein Distance Tests
1841    // ========================================================================
1842
1843    #[test]
1844    fn test_levenshtein_distance_identical() {
1845        assert_eq!(levenshtein_distance("meter", "meter"), 0);
1846    }
1847
1848    #[test]
1849    fn test_levenshtein_distance_one_char() {
1850        assert_eq!(levenshtein_distance("meter", "meters"), 1);
1851        assert_eq!(levenshtein_distance("metr", "meter"), 1);
1852    }
1853
1854    #[test]
1855    fn test_levenshtein_distance_swap() {
1856        assert_eq!(levenshtein_distance("metrs", "meters"), 1);
1857    }
1858
1859    #[test]
1860    fn test_levenshtein_distance_empty() {
1861        assert_eq!(levenshtein_distance("", "meter"), 5);
1862        assert_eq!(levenshtein_distance("meter", ""), 5);
1863    }
1864
1865    // ========================================================================
1866    // Normalization Function Tests
1867    // ========================================================================
1868
1869    #[test]
1870    fn test_normalize_unicode() {
1871        assert_eq!(normalize_unicode("m²"), "m^2");
1872        assert_eq!(normalize_unicode("s⁻¹"), "s^-1");
1873        assert_eq!(normalize_unicode("kg·m"), "kg*m");
1874        assert_eq!(normalize_unicode("µm"), "um");
1875    }
1876
1877    #[test]
1878    fn test_normalize_latex() {
1879        assert_eq!(normalize_latex("m^{2}"), "m^2");
1880        assert_eq!(normalize_latex(r"kg \cdot m"), "kg * m");
1881        assert_eq!(normalize_latex(r"\mu m"), "u m");
1882    }
1883
1884    #[test]
1885    fn test_normalize_per_notation() {
1886        assert_eq!(normalize_per_notation("km per hour"), "km / hour");
1887        assert_eq!(normalize_per_notation("m per s"), "m / s");
1888    }
1889
1890    #[test]
1891    fn test_normalize_subscripts() {
1892        let result = normalize_subscripts("M_sun");
1893        assert_eq!(result, "msun");
1894    }
1895
1896    // ========================================================================
1897    // Combined/Integration Tests
1898    // ========================================================================
1899
1900    #[cfg(feature = "astrophysics")]
1901    #[test]
1902    fn test_astrophysical_flux_unit() {
1903        let flux = parse_unit("erg/cm^2/s").unwrap();
1904        let dim = flux.dimension();
1905        // Energy / area / time = mass * length^2 / time^2 / length^2 / time
1906        // = mass / time^3
1907        assert_eq!(dim.mass, Rational16::ONE);
1908        assert_eq!(dim.time, Rational16::new(-3, 1));
1909    }
1910
1911    #[cfg(feature = "astrophysics")]
1912    #[test]
1913    fn test_unicode_astrophysical() {
1914        let flux = parse_unit("erg/cm²/s").unwrap();
1915        let dim = flux.dimension();
1916        assert_eq!(dim.mass, Rational16::ONE);
1917        assert_eq!(dim.time, Rational16::new(-3, 1));
1918    }
1919
1920    #[test]
1921    fn test_mixed_formats() {
1922        // Unicode superscript with division
1923        let accel = parse_unit("m·s⁻²").unwrap();
1924        let dim = accel.dimension();
1925        assert_eq!(dim.length, Rational16::ONE);
1926        assert_eq!(dim.time, Rational16::new(-2, 1));
1927    }
1928}