awk_rs/
value.rs

1use std::borrow::Cow;
2use std::cmp::Ordering;
3use std::fmt;
4
5/// AWK value type with dynamic typing and automatic coercion
6///
7/// AWK has a unique type system where values can be strings, numbers, or
8/// "numeric strings" (strings that look like numbers). This enum captures
9/// all three cases.
10///
11/// # Examples
12///
13/// ```
14/// use awk_rs::Value;
15///
16/// // Numbers
17/// let num = Value::Number(42.0);
18/// assert_eq!(num.to_number(), 42.0);
19/// assert_eq!(num.to_string_val(), "42");
20///
21/// // Strings
22/// let s = Value::from_string("hello".to_string());
23/// assert_eq!(s.to_string_val(), "hello");
24/// assert_eq!(s.to_number(), 0.0);  // Non-numeric string coerces to 0
25///
26/// // Numeric strings
27/// let ns = Value::from_string("123".to_string());
28/// assert_eq!(ns.to_number(), 123.0);
29/// assert_eq!(ns.to_string_val(), "123");
30///
31/// // Truthiness
32/// assert!(Value::Number(1.0).is_truthy());
33/// assert!(!Value::Number(0.0).is_truthy());
34/// assert!(Value::from_string("hello".to_string()).is_truthy());
35/// assert!(!Value::from_string("".to_string()).is_truthy());
36/// ```
37#[derive(Debug, Clone, Default)]
38pub enum Value {
39    /// Uninitialized value - coerces to "" or 0 depending on context
40    #[default]
41    Uninitialized,
42    /// Numeric value
43    Number(f64),
44    /// String value
45    String(String),
46    /// Numeric string - a string that looks like a number
47    /// (used for comparison semantics)
48    NumericString(String, f64),
49}
50
51impl Value {
52    /// Create a new string value, detecting if it's a numeric string
53    #[inline]
54    pub fn from_string(s: String) -> Self {
55        if let Some(num) = parse_numeric_string(&s) {
56            Value::NumericString(s, num)
57        } else {
58            Value::String(s)
59        }
60    }
61
62    /// Create a numeric value
63    #[inline]
64    pub fn from_number(n: f64) -> Self {
65        Value::Number(n)
66    }
67
68    /// Check if this value is "true" in boolean context
69    /// - Uninitialized is false
70    /// - Number 0 is false
71    /// - Empty string is false
72    /// - Everything else is true
73    #[inline]
74    pub fn is_truthy(&self) -> bool {
75        match self {
76            Value::Uninitialized => false,
77            Value::Number(n) => *n != 0.0,
78            Value::String(s) => !s.is_empty(),
79            Value::NumericString(s, _) => !s.is_empty(),
80        }
81    }
82
83    /// Coerce to numeric value
84    #[inline]
85    pub fn to_number(&self) -> f64 {
86        match self {
87            Value::Uninitialized => 0.0,
88            Value::Number(n) => *n,
89            Value::String(s) => parse_leading_number(s),
90            Value::NumericString(_, n) => *n,
91        }
92    }
93
94    /// Coerce to string value
95    #[inline]
96    pub fn to_string_val(&self) -> String {
97        self.to_string_with_format("%.6g")
98    }
99
100    /// Get string as Cow to avoid allocation when possible
101    #[inline]
102    pub fn as_str(&self) -> Cow<'_, str> {
103        match self {
104            Value::Uninitialized => Cow::Borrowed(""),
105            Value::Number(n) => Cow::Owned(format_number(*n, "%.6g")),
106            Value::String(s) => Cow::Borrowed(s),
107            Value::NumericString(s, _) => Cow::Borrowed(s),
108        }
109    }
110
111    /// Coerce to string with specific format (for OFMT/CONVFMT)
112    pub fn to_string_with_format(&self, format: &str) -> String {
113        match self {
114            Value::Uninitialized => String::new(),
115            Value::Number(n) => format_number(*n, format),
116            Value::String(s) => s.clone(),
117            Value::NumericString(s, _) => s.clone(),
118        }
119    }
120
121    /// Check if this value is definitely numeric
122    #[inline]
123    pub fn is_numeric(&self) -> bool {
124        matches!(self, Value::Number(_))
125    }
126
127    /// Check if this value is a numeric string
128    #[inline]
129    pub fn is_numeric_string(&self) -> bool {
130        matches!(self, Value::NumericString(_, _))
131    }
132
133    /// Check if this value should compare as a number
134    #[inline]
135    pub fn compares_as_number(&self) -> bool {
136        matches!(
137            self,
138            Value::Number(_) | Value::NumericString(_, _) | Value::Uninitialized
139        )
140    }
141}
142
143impl fmt::Display for Value {
144    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
145        match self {
146            Value::Uninitialized => Ok(()),
147            Value::Number(n) => write!(f, "{}", format_number(*n, "%.6g")),
148            Value::String(s) => write!(f, "{}", s),
149            Value::NumericString(s, _) => write!(f, "{}", s),
150        }
151    }
152}
153
154/// Compare two AWK values according to AWK comparison rules
155#[inline]
156pub fn compare_values(left: &Value, right: &Value) -> Ordering {
157    // If both are numeric or numeric strings, compare numerically
158    if left.compares_as_number() && right.compares_as_number() {
159        let l = left.to_number();
160        let r = right.to_number();
161        l.partial_cmp(&r).unwrap_or(Ordering::Equal)
162    } else {
163        // Otherwise compare as strings - use as_str to avoid allocation
164        left.as_str().cmp(&right.as_str())
165    }
166}
167
168/// Parse the leading numeric portion of a string using optimized byte-based parsing
169/// "42abc" -> 42.0
170/// "  3.14  " -> 3.14
171/// "abc" -> 0.0
172#[inline]
173pub fn parse_leading_number(s: &str) -> f64 {
174    let bytes = s.as_bytes();
175    let mut i = 0;
176
177    // Skip leading whitespace
178    while i < bytes.len() && bytes[i].is_ascii_whitespace() {
179        i += 1;
180    }
181
182    if i >= bytes.len() {
183        return 0.0;
184    }
185
186    let start = i;
187
188    // Optional sign
189    if i < bytes.len() && (bytes[i] == b'+' || bytes[i] == b'-') {
190        i += 1;
191    }
192
193    let mut has_digits = false;
194
195    // Digits before decimal
196    while i < bytes.len() && bytes[i].is_ascii_digit() {
197        i += 1;
198        has_digits = true;
199    }
200
201    // Decimal point and digits after
202    if i < bytes.len() && bytes[i] == b'.' {
203        i += 1;
204        while i < bytes.len() && bytes[i].is_ascii_digit() {
205            i += 1;
206            has_digits = true;
207        }
208    }
209
210    // Exponent
211    if i < bytes.len() && (bytes[i] == b'e' || bytes[i] == b'E') {
212        let exp_start = i;
213        i += 1;
214        if i < bytes.len() && (bytes[i] == b'+' || bytes[i] == b'-') {
215            i += 1;
216        }
217        if i < bytes.len() && bytes[i].is_ascii_digit() {
218            while i < bytes.len() && bytes[i].is_ascii_digit() {
219                i += 1;
220            }
221        } else {
222            // Invalid exponent, back up
223            i = exp_start;
224        }
225    }
226
227    if !has_digits {
228        return 0.0;
229    }
230
231    // Fast path for common integer case
232    let num_str = &s[start..i];
233    if !num_str.contains('.')
234        && !num_str.contains('e')
235        && !num_str.contains('E')
236        && let Ok(n) = num_str.parse::<i64>()
237    {
238        return n as f64;
239    }
240
241    num_str.parse().unwrap_or(0.0)
242}
243
244/// Check if a string is a numeric string (looks entirely like a number)
245#[inline]
246fn parse_numeric_string(s: &str) -> Option<f64> {
247    let trimmed = s.trim();
248    if trimmed.is_empty() {
249        return None;
250    }
251
252    // Fast path: check if it's a simple integer
253    if trimmed.bytes().all(|b| b.is_ascii_digit()) {
254        return trimmed.parse().ok();
255    }
256
257    // Check for leading sign
258    let check = if trimmed.starts_with('-') || trimmed.starts_with('+') {
259        &trimmed[1..]
260    } else {
261        trimmed
262    };
263
264    // Simple float pattern check
265    let mut has_dot = false;
266    let mut has_e = false;
267    for (i, b) in check.bytes().enumerate() {
268        match b {
269            b'0'..=b'9' => continue,
270            b'.' if !has_dot && !has_e => has_dot = true,
271            b'e' | b'E' if !has_e && i > 0 => {
272                has_e = true;
273                // Check for sign after e
274                if i + 1 < check.len() {
275                    let next = check.as_bytes()[i + 1];
276                    if next == b'+' || next == b'-' {
277                        continue;
278                    }
279                }
280            }
281            b'+' | b'-' if has_e => continue,
282            _ => return None,
283        }
284    }
285
286    trimmed.parse().ok()
287}
288
289/// Format a number according to printf-style format
290pub fn format_number(n: f64, format: &str) -> String {
291    if n.is_nan() {
292        return "nan".to_string();
293    }
294    if n.is_infinite() {
295        return if n > 0.0 { "inf" } else { "-inf" }.to_string();
296    }
297
298    // Handle %.6g (default OFMT) - optimized path
299    if format == "%.6g" {
300        // If it's an integer, print without decimal
301        if n.fract() == 0.0 && n.abs() < 1e15 {
302            return itoa_fast(n as i64);
303        }
304        // Otherwise use default formatting with reasonable precision
305        let s = format!("{:.6}", n);
306        // Trim trailing zeros after decimal point
307        if s.contains('.') {
308            let trimmed = s.trim_end_matches('0');
309            if let Some(stripped) = trimmed.strip_suffix('.') {
310                return stripped.to_string();
311            }
312            return trimmed.to_string();
313        }
314        return s;
315    }
316
317    // Fallback
318    format!("{}", n)
319}
320
321/// Fast integer to string conversion
322#[inline]
323fn itoa_fast(n: i64) -> String {
324    if n == 0 {
325        return "0".to_string();
326    }
327
328    let mut result = String::with_capacity(20);
329    let mut num = n;
330    let negative = num < 0;
331    if negative {
332        num = -num;
333    }
334
335    while num > 0 {
336        result.push((b'0' + (num % 10) as u8) as char);
337        num /= 10;
338    }
339
340    if negative {
341        result.push('-');
342    }
343
344    result.chars().rev().collect()
345}
346
347#[cfg(test)]
348mod tests {
349    use super::*;
350
351    #[test]
352    fn test_uninitialized() {
353        let v = Value::Uninitialized;
354        assert_eq!(v.to_number(), 0.0);
355        assert_eq!(v.to_string_val(), "");
356        assert!(!v.is_truthy());
357    }
358
359    #[test]
360    fn test_number() {
361        let v = Value::Number(42.0);
362        assert_eq!(v.to_number(), 42.0);
363        assert_eq!(v.to_string_val(), "42");
364        assert!(v.is_truthy());
365
366        let zero = Value::Number(0.0);
367        assert!(!zero.is_truthy());
368    }
369
370    #[test]
371    fn test_string() {
372        let v = Value::from_string("hello".to_string());
373        assert_eq!(v.to_number(), 0.0);
374        assert_eq!(v.to_string_val(), "hello");
375        assert!(v.is_truthy());
376
377        let empty = Value::from_string("".to_string());
378        assert!(!empty.is_truthy());
379    }
380
381    #[test]
382    fn test_numeric_string() {
383        let v = Value::from_string("42".to_string());
384        assert!(v.is_numeric_string());
385        assert_eq!(v.to_number(), 42.0);
386        assert_eq!(v.to_string_val(), "42");
387    }
388
389    #[test]
390    fn test_leading_number() {
391        assert_eq!(parse_leading_number("42abc"), 42.0);
392        assert_eq!(parse_leading_number("  2.75  "), 2.75);
393        assert_eq!(parse_leading_number("abc"), 0.0);
394        assert_eq!(parse_leading_number("-5.5"), -5.5);
395        assert_eq!(parse_leading_number("1e10"), 1e10);
396    }
397
398    #[test]
399    fn test_comparison() {
400        let n1 = Value::Number(10.0);
401        let n2 = Value::Number(2.0);
402        assert_eq!(compare_values(&n1, &n2), Ordering::Greater);
403
404        let s1 = Value::from_string("10".to_string());
405        let s2 = Value::from_string("2".to_string());
406        // Both numeric strings -> compare numerically
407        assert_eq!(compare_values(&s1, &s2), Ordering::Greater);
408
409        let s3 = Value::from_string("abc".to_string());
410        let s4 = Value::from_string("def".to_string());
411        // Both pure strings -> compare lexically
412        assert_eq!(compare_values(&s3, &s4), Ordering::Less);
413    }
414
415    #[test]
416    fn test_itoa_fast() {
417        assert_eq!(itoa_fast(0), "0");
418        assert_eq!(itoa_fast(42), "42");
419        assert_eq!(itoa_fast(-123), "-123");
420        assert_eq!(itoa_fast(1000000), "1000000");
421    }
422
423    #[test]
424    fn test_format_number_nan() {
425        assert_eq!(format_number(f64::NAN, "%.6g"), "nan");
426    }
427
428    #[test]
429    fn test_format_number_inf() {
430        assert_eq!(format_number(f64::INFINITY, "%.6g"), "inf");
431        assert_eq!(format_number(f64::NEG_INFINITY, "%.6g"), "-inf");
432    }
433
434    #[test]
435    fn test_format_number_integer() {
436        assert_eq!(format_number(42.0, "%.6g"), "42");
437        assert_eq!(format_number(-100.0, "%.6g"), "-100");
438    }
439
440    #[test]
441    fn test_format_number_float() {
442        assert_eq!(format_number(2.75, "%.6g"), "2.75");
443    }
444
445    #[test]
446    fn test_from_number() {
447        let v = Value::from_number(2.75);
448        assert_eq!(v.to_number(), 2.75);
449    }
450
451    #[test]
452    fn test_is_truthy_numeric_string() {
453        let v = Value::NumericString("42".to_string(), 42.0);
454        assert!(v.is_truthy());
455
456        let empty = Value::NumericString("".to_string(), 0.0);
457        assert!(!empty.is_truthy());
458    }
459
460    #[test]
461    fn test_comparison_number_vs_string() {
462        let n = Value::Number(10.0);
463        let s = Value::from_string("hello".to_string());
464        // Number vs non-numeric string
465        assert!(compare_values(&n, &s) != Ordering::Equal);
466    }
467
468    #[test]
469    fn test_comparison_uninitialized() {
470        let u = Value::Uninitialized;
471        let n = Value::Number(1.0);
472        // Uninitialized (0) vs 1 should be Less
473        assert_eq!(compare_values(&u, &n), Ordering::Less);
474    }
475
476    #[test]
477    fn test_parse_leading_with_sign() {
478        assert_eq!(parse_leading_number("+42"), 42.0);
479        assert_eq!(parse_leading_number("  +2.75"), 2.75);
480    }
481
482    #[test]
483    fn test_parse_leading_exponent() {
484        assert_eq!(parse_leading_number("1e-5"), 1e-5);
485        assert_eq!(parse_leading_number("2E+3"), 2000.0);
486    }
487
488    #[test]
489    fn test_numeric_string_with_exponent() {
490        let v = Value::from_string("1e5".to_string());
491        assert!(v.is_numeric_string());
492        assert_eq!(v.to_number(), 1e5);
493    }
494
495    #[test]
496    fn test_numeric_string_with_sign() {
497        let v = Value::from_string("-42.5".to_string());
498        assert!(v.is_numeric_string());
499        assert_eq!(v.to_number(), -42.5);
500    }
501
502    #[test]
503    fn test_numeric_string_whitespace() {
504        let v = Value::from_string("  123  ".to_string());
505        assert!(v.is_numeric_string());
506        assert_eq!(v.to_number(), 123.0);
507    }
508
509    #[test]
510    fn test_to_string_val_uninitialized() {
511        let v = Value::Uninitialized;
512        assert_eq!(v.to_string_val(), "");
513    }
514
515    #[test]
516    fn test_to_number_uninitialized() {
517        let v = Value::Uninitialized;
518        assert_eq!(v.to_number(), 0.0);
519    }
520}