netform_ir 0.6.0

Lossless intermediate representation and parser for network configuration text
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
//! Score-based dialect auto-detection from configuration text.
//!
//! Scans lines for dialect-specific patterns, accumulates per-dialect scores,
//! and returns the highest-scoring dialect as a [`DialectHint`].  Falls back to
//! [`DialectHint::Generic`] when the top score is too low or when the margin
//! between the top two candidates is too narrow.
//!
//! # Scoring algorithm
//!
//! Each input line is tested against a set of pattern rules per dialect.  When a
//! rule matches, the corresponding dialect's score is incremented by a weight
//! that reflects how distinctive the pattern is:
//!
//! | Weight | Meaning | Examples |
//! |--------|---------|---------|
//! | `STRONG_SIGNAL` (3) | Highly distinctive, near-unique to one dialect | FortiOS `config <section>`, NX-OS `feature <name>`, Junos stanza names |
//! | `MODERATE_SIGNAL` (2) | Moderately distinctive | FortiOS `end`/`next`, Junos brace blocks, EOS non-slot interfaces |
//! | `WEAK_SIGNAL` (1) | Weakly suggestive, shared across dialects | Junos semicolons, FortiOS plain `set`, IOS XE wildcard masks |
//!
//! After scoring, the highest-scoring dialect is accepted only if:
//! 1. Its score meets `MIN_CONFIDENCE_SCORE` (currently 3 — at least one
//!    strong signal or multiple weaker ones).
//! 2. Its score is at least `MARGIN_FACTOR`× the runner-up's score, ensuring
//!    the winner stands clearly above the noise.
//!
//! Both checks must pass; otherwise the result is [`DialectHint::Generic`].
//!
//! # Example
//!
//! ```rust
//! use netform_ir::detect::detect_dialect;
//! use netform_ir::DialectHint;
//!
//! let junos_cfg = "interfaces {\n    ge-0/0/0 {\n        mtu 9216;\n    }\n}\n";
//! assert_eq!(detect_dialect(junos_cfg), DialectHint::Named("junos".into()));
//!
//! assert_eq!(detect_dialect(""), DialectHint::Generic);
//! ```

use crate::{DialectHint, Document, parse_generic};

// ---------------------------------------------------------------------------
// Scoring constants
// ---------------------------------------------------------------------------

/// Score for a highly distinctive, dialect-unique pattern (e.g. FortiOS
/// `config <section>`, NX-OS `feature <name>`, Junos top-level stanza names).
const STRONG_SIGNAL: i32 = 3;

/// Score for a moderately distinctive pattern (e.g. FortiOS `end`/`next`,
/// Junos brace open/close, EOS non-slot Ethernet interfaces).
const MODERATE_SIGNAL: i32 = 2;

/// Score for a pattern that weakly suggests a dialect (e.g. Junos trailing
/// semicolons, FortiOS plain `set <field>`, IOS XE wildcard masks in ACLs).
const WEAK_SIGNAL: i32 = 1;

// ---------------------------------------------------------------------------
// Decision thresholds
// ---------------------------------------------------------------------------

/// Minimum total score a dialect must reach to be considered detected.  Below
/// this threshold, the input is too short or too ambiguous to identify.
const MIN_CONFIDENCE_SCORE: i32 = 3;

/// The winning dialect must outscore the runner-up by at least this factor.
/// A value of 2 means the winner needs ≥ 2× the runner-up's score.
const MARGIN_FACTOR: i32 = 2;

/// Detect the likely network-device dialect from configuration text.
///
/// Returns a [`DialectHint`] identifying the detected dialect:
/// - `DialectHint::Named("eos")` — Arista EOS
/// - `DialectHint::Named("fortios")` — Fortinet FortiOS
/// - `DialectHint::Named("iosxe")` — Cisco IOS XE
/// - `DialectHint::Named("junos")` — Juniper Junos
/// - `DialectHint::Named("nxos")` — Cisco NX-OS
/// - `DialectHint::Generic` — no dialect detected with sufficient confidence
pub fn detect_dialect(input: &str) -> DialectHint {
    let mut fortios: i32 = 0;
    let mut junos: i32 = 0;
    let mut nxos: i32 = 0;
    let mut eos: i32 = 0;
    let mut iosxe: i32 = 0;

    for raw_line in input.lines() {
        let line = raw_line.trim();
        if line.is_empty() || line == "!" {
            continue;
        }

        // --- FortiOS signals ---
        // `config <section>` / `end` block structure is unique to FortiOS.
        if line.starts_with("config ")
            && !line.contains('{')
            && line.split_whitespace().count() >= 2
        {
            fortios += STRONG_SIGNAL;
        }
        if line.starts_with("edit ") {
            fortios += STRONG_SIGNAL;
        }
        if line == "end" {
            fortios += MODERATE_SIGNAL;
        }
        if line == "next" {
            fortios += MODERATE_SIGNAL;
        }
        if line.starts_with("set ") || line.starts_with("unset ") {
            let second = line.split_whitespace().nth(1).unwrap_or("");
            if is_junos_stanza_name(second) {
                // `set interfaces ...`, `set protocols ...` etc — Junos set-style.
                junos += STRONG_SIGNAL;
            } else {
                // Plain `set <field> <value>` leans FortiOS (inside config blocks).
                fortios += WEAK_SIGNAL;
            }
        }

        // --- Junos signals ---
        // Brace-and-semicolon syntax is highly distinctive.
        if line.ends_with('{') {
            junos += MODERATE_SIGNAL;
        }
        if line == "}" || line.ends_with("};") {
            junos += MODERATE_SIGNAL;
        }
        if line.ends_with(';') && !line.ends_with("};") {
            junos += WEAK_SIGNAL;
        }
        // Junos-specific stanza names at top-level (hierarchical style).
        if is_junos_stanza_name(line.split_whitespace().next().unwrap_or("")) {
            junos += STRONG_SIGNAL;
        }

        // --- NX-OS signals ---
        // `feature <name>` is unique to NX-OS among supported dialects.
        if line.starts_with("feature ") {
            nxos += STRONG_SIGNAL;
        }
        // Slot/port interfaces: Ethernet1/1, port-channel1, etc.
        if line.starts_with("interface ") {
            let iface = line.trim_start_matches("interface ");
            if iface.starts_with("Ethernet") && iface.contains('/') {
                // NX-OS uses plain Ethernet with slot/port (Ethernet1/1).
                // IOS XE uses GigabitEthernet, TenGigabitEthernet etc. with slashes.
                nxos += STRONG_SIGNAL;
            } else if iface.starts_with("Ethernet") || iface.starts_with("Management") {
                // No slot → could be EOS.
                eos += MODERATE_SIGNAL;
            }
        }
        if line.starts_with("vpc ") {
            nxos += STRONG_SIGNAL;
        }
        if line.starts_with("role name ") {
            nxos += MODERATE_SIGNAL;
        }

        // --- IOS XE signals ---
        // `ip access-list extended` is a strong IOS XE marker.
        if line.starts_with("ip access-list extended ") {
            iosxe += STRONG_SIGNAL;
        }
        // Dotted subnet masks with `ip address` (IOS XE uses masks, not CIDR).
        if line.starts_with("ip address ") {
            let parts: Vec<&str> = line.split_whitespace().collect();
            if parts.len() >= 4 && looks_like_dotted_mask(parts[3]) {
                iosxe += MODERATE_SIGNAL;
            }
        }
        // `network ... mask ...` in BGP address-family.
        if line.contains(" mask ") && line.starts_with("network ") {
            iosxe += MODERATE_SIGNAL;
        }
        // Wildcard masks in ACL permit/deny lines.
        if (line.starts_with("permit ") || line.starts_with("deny "))
            && line.split_whitespace().any(looks_like_dotted_mask)
        {
            iosxe += WEAK_SIGNAL;
        }

        // --- EOS signals ---
        if line.starts_with("ip access-list ") && !line.contains("extended") {
            eos += MODERATE_SIGNAL;
        }
        // Numbered ACL entries (EOS style: `10 permit ...`).
        if let Some(first) = line.split_whitespace().next()
            && first.parse::<u32>().is_ok()
            && (line.contains(" permit ") || line.contains(" deny "))
        {
            eos += WEAK_SIGNAL;
        }
        // EOS uses CIDR notation for ip addresses (no dotted mask).
        if line.starts_with("ip address ") {
            let parts: Vec<&str> = line.split_whitespace().collect();
            if parts.len() >= 3 && parts[2].contains('/') {
                eos += MODERATE_SIGNAL;
            }
        }
    }

    let candidates = [
        ("fortios", fortios),
        ("junos", junos),
        ("nxos", nxos),
        ("eos", eos),
        ("iosxe", iosxe),
    ];

    let mut sorted = candidates;
    sorted.sort_by_key(|c| std::cmp::Reverse(c.1));

    let (best_name, best_score) = sorted[0];
    let (_, second_score) = sorted[1];

    if best_score < MIN_CONFIDENCE_SCORE {
        return DialectHint::Generic;
    }
    if best_score < second_score * MARGIN_FACTOR {
        return DialectHint::Generic;
    }

    DialectHint::Named(best_name.to_string())
}

/// Parse input with automatic dialect detection.
///
/// Runs [`detect_dialect`] to identify the dialect from the input text, then
/// parses with the generic parser and sets the detected [`DialectHint`] in the
/// document metadata.
///
/// For full dialect-specific parsing (Junos brace handling, FortiOS block
/// structure, etc.), call [`detect_dialect`] directly and dispatch to the
/// appropriate dialect parser.
pub fn auto_parse(input: &str) -> Document {
    let hint = detect_dialect(input);
    let mut doc = parse_generic(input);
    doc.metadata.dialect_hint = hint;
    doc
}

/// Returns `true` if `name` is a well-known Junos top-level stanza name.
fn is_junos_stanza_name(name: &str) -> bool {
    matches!(
        name,
        "interfaces"
            | "protocols"
            | "policy-options"
            | "routing-options"
            | "forwarding-options"
            | "class-of-service"
            | "system"
            | "security"
            | "firewall"
            | "vlans"
            | "chassis"
            | "snmp"
            | "applications"
            | "groups"
            | "routing-instances"
    )
}

/// Returns `true` if `s` looks like a dotted-decimal subnet or wildcard mask
/// (e.g. `255.255.255.0` or `0.0.0.255`).
fn looks_like_dotted_mask(s: &str) -> bool {
    let parts: Vec<&str> = s.split('.').collect();
    parts.len() == 4 && parts.iter().all(|p| p.parse::<u8>().is_ok())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn detect_fortios() {
        let input = "\
config system global
    set hostname \"FortiGate-01\"
    set timezone 04
end
config firewall address
    edit \"web-server\"
        set type ipmask
        set subnet 10.0.1.10 255.255.255.255
    next
end
";
        assert_eq!(detect_dialect(input), DialectHint::Named("fortios".into()));
    }

    #[test]
    fn detect_junos_hierarchical() {
        let input = "\
interfaces {
    ge-0/0/0 {
        description \"uplink\";
        mtu 9216;
        unit 0 {
            family inet {
                address 192.0.2.2/30;
            }
        }
    }
}
";
        assert_eq!(detect_dialect(input), DialectHint::Named("junos".into()));
    }

    #[test]
    fn detect_junos_set_style() {
        let input = "\
set interfaces ge-0/0/0 description \"uplink\"
set interfaces ge-0/0/0 mtu 9216
set interfaces ge-0/0/0 unit 0 family inet address 192.0.2.2/30
set protocols bgp group EBGP type external
set routing-options static route 0.0.0.0/0 next-hop 10.0.0.1
";
        assert_eq!(detect_dialect(input), DialectHint::Named("junos".into()));
    }

    #[test]
    fn detect_nxos() {
        let input = "\
hostname n9k-leaf-01
!
feature bgp
feature interface-vlan
feature lacp
!
vlan 10
  name SERVERS
!
interface Ethernet1/1
  description uplink-spine-a
  mtu 9216
  ip address 192.0.2.2/31
  no shutdown
!
router bgp 65001
  router-id 10.255.255.1
";
        assert_eq!(detect_dialect(input), DialectHint::Named("nxos".into()));
    }

    #[test]
    fn detect_eos() {
        let input = "\
hostname leaf-01
interface Ethernet1
   description uplink-spine-a
   mtu 9214
   ip address 192.0.2.2/31
   no shutdown
router bgp 65000
   router-id 10.255.255.1
ip access-list ACL-EDGE-IN
   10 permit tcp 10.10.1.0/24 any eq https
   20 permit tcp 10.10.1.0/24 any eq ssh
   90 deny ip any any log
";
        assert_eq!(detect_dialect(input), DialectHint::Named("eos".into()));
    }

    #[test]
    fn detect_iosxe() {
        let input = "\
interface GigabitEthernet0/0/0
  description uplink-core-a
  mtu 9216
  ip address 192.0.2.2 255.255.255.252
  no shutdown
router bgp 65000
  bgp log-neighbor-changes
  address-family ipv4 unicast
    network 10.10.1.0 mask 255.255.255.0
ip access-list extended ACL-EDGE-IN
  permit tcp 10.10.1.0 0.0.0.255 any eq 443
  deny ip any any log
";
        assert_eq!(detect_dialect(input), DialectHint::Named("iosxe".into()));
    }

    #[test]
    fn detect_generic_for_empty() {
        assert_eq!(detect_dialect(""), DialectHint::Generic);
    }

    #[test]
    fn detect_generic_for_plain_text() {
        let input = "\
hostname router
# a comment
some random config line
";
        assert_eq!(detect_dialect(input), DialectHint::Generic);
    }

    #[test]
    fn detect_generic_when_ambiguous() {
        // Minimal content with weak signals from multiple dialects.
        let input = "\
set hostname myrouter
interface Ethernet1
";
        // Both junos/fortios and eos get mild scores — should fall back.
        assert_eq!(detect_dialect(input), DialectHint::Generic);
    }

    #[test]
    fn detect_generic_on_exact_tie() {
        // Craft input where NX-OS and EOS each score exactly 3.
        // `feature ospf` → nxos += 3
        // `ip access-list ACL-IN` → eos += 2
        // `10 permit tcp any any` → eos += 1  (numbered ACL entry)
        let input = "\
feature ospf
ip access-list ACL-IN
   10 permit tcp any any
";
        assert_eq!(detect_dialect(input), DialectHint::Generic);
    }

    // -- Edge case tests near score margins --

    #[test]
    fn detect_at_minimum_score_single_strong_signal() {
        // One STRONG_SIGNAL (3) with no competition → exactly MIN_CONFIDENCE_SCORE.
        let input = "feature ospf\n";
        assert_eq!(detect_dialect(input), DialectHint::Named("nxos".into()));
    }

    #[test]
    fn detect_below_minimum_score_single_moderate_signal() {
        // One MODERATE_SIGNAL (2) → below MIN_CONFIDENCE_SCORE → Generic.
        let input = "role name admin\n";
        assert_eq!(detect_dialect(input), DialectHint::Generic);
    }

    #[test]
    fn detect_margin_exact_boundary_passes() {
        // iosxe = STRONG(3) + WEAK(1) = 4, eos = MODERATE(2).
        // 4 >= 2 * MARGIN_FACTOR(2) → passes margin check.
        let input = "\
ip access-list extended ACL-IN
  permit tcp any 0.0.0.255 any
interface Ethernet1
";
        assert_eq!(detect_dialect(input), DialectHint::Named("iosxe".into()));
    }

    #[test]
    fn detect_margin_just_below_boundary_fails() {
        // iosxe = STRONG(3), eos = MODERATE(2).
        // 3 < 2 * MARGIN_FACTOR(2) = 4 → fails margin check → Generic.
        let input = "\
ip access-list extended ACL-IN
interface Ethernet1
";
        assert_eq!(detect_dialect(input), DialectHint::Generic);
    }

    #[test]
    fn detect_clear_winner_no_runner_up() {
        // Two STRONG_SIGNAL NX-OS features, everything else at zero.
        // nxos = 6, second = 0 → 6 >= 0 → clear win.
        let input = "\
feature bgp
feature ospf
";
        assert_eq!(detect_dialect(input), DialectHint::Named("nxos".into()));
    }

    #[test]
    fn detect_strong_signal_drowned_by_cross_dialect_noise() {
        // NX-OS gets one strong signal, but Junos accumulates more from
        // brace/semicolon syntax surrounding it.
        // nxos = STRONG(3)
        // junos = STRONG(3) [interfaces stanza] + MODERATE(2) [open brace]
        //       + WEAK(1) [semicolon] + MODERATE(2) [close brace] = 8
        // junos 8 >= 3*2 → junos wins.
        let input = "\
feature ospf
interfaces {
    mtu 9216;
}
";
        assert_eq!(detect_dialect(input), DialectHint::Named("junos".into()));
    }

    #[test]
    fn detect_two_moderate_signals_reach_margin() {
        // Two MODERATE_SIGNAL FortiOS lines: end(2) + next(2) = 4.
        // 4 >= MIN_CONFIDENCE_SCORE(3) ✓, second = 0, 4 >= 0 ✓ → detected.
        let input = "\
end
next
";
        assert_eq!(detect_dialect(input), DialectHint::Named("fortios".into()));
    }

    #[test]
    fn detect_only_weak_signals_below_threshold() {
        // Two WEAK_SIGNAL lines: junos semicolons.
        // junos = 1 + 1 = 2 → below MIN_CONFIDENCE_SCORE → Generic.
        let input = "\
mtu 9216;
description uplink;
";
        assert_eq!(detect_dialect(input), DialectHint::Generic);
    }

    #[test]
    fn detect_three_weak_signals_reach_threshold() {
        // Three WEAK_SIGNAL junos semicolons = 3 → exactly MIN_CONFIDENCE_SCORE.
        // No competition → detected.
        let input = "\
mtu 9216;
description uplink;
no-readvertise;
";
        assert_eq!(detect_dialect(input), DialectHint::Named("junos".into()));
    }

    #[test]
    fn auto_parse_sets_dialect_hint() {
        let input = "\
interfaces {
    ge-0/0/0 {
        mtu 9216;
    }
}
";
        let doc = auto_parse(input);
        assert_eq!(
            doc.metadata.dialect_hint,
            DialectHint::Named("junos".into())
        );
    }

    #[test]
    fn auto_parse_generic_fallback() {
        let doc = auto_parse("hostname router\n");
        assert_eq!(doc.metadata.dialect_hint, DialectHint::Generic);
    }

    #[test]
    fn auto_parse_preserves_content() {
        let input = "interface Ethernet1\n  description uplink\n";
        let doc = auto_parse(input);
        assert_eq!(doc.render(), input);
    }
}