Skip to main content

riscfetch_core/
parsing.rs

1//! ISA string parsing functions
2
3use crate::extensions::{
4    STANDARD_EXTENSIONS, S_CATEGORY_NAMES, S_EXTENSIONS, Z_CATEGORY_NAMES, Z_EXTENSIONS,
5};
6
7/// Extension info with category and support status
8#[derive(Debug, Clone)]
9pub struct ExtensionInfo {
10    pub name: String,
11    pub description: String,
12    pub category: String,
13    pub supported: bool,
14}
15
16/// Strip rv32/rv64 prefix from ISA base part to get extension letters only
17#[must_use]
18pub fn strip_rv_prefix(base: &str) -> &str {
19    base.strip_prefix("rv64")
20        .or_else(|| base.strip_prefix("rv32"))
21        .unwrap_or(base)
22}
23
24/// Check if an ISA string contains a multi-letter extension by exact part matching.
25/// Extensions are underscore-separated; this avoids false positives from substring
26/// matching (e.g. "zk" matching inside "zkn", or "sha" inside "shvstvala").
27fn isa_has_extension(isa: &str, pattern: &str) -> bool {
28    isa.split('_').any(|part| part == pattern)
29}
30
31/// Parse extensions from ISA string (pure function for testing)
32#[must_use]
33pub fn parse_extensions_compact(isa: &str) -> String {
34    let isa = isa.to_lowercase();
35    let mut exts = Vec::new();
36
37    // Get the base part before any underscore
38    let base = isa.split('_').next().unwrap_or(&isa);
39    let ext_part = strip_rv_prefix(base);
40
41    // G is shorthand for IMAFD (per RISC-V spec)
42    let has_g = ext_part.contains('g');
43
44    // Standard extensions in canonical order
45    // Note: E and I are mutually exclusive
46    let standard = [
47        ('i', "I", false), // (char, name, implied_by_g)
48        ('e', "E", false), // E = embedded (16 registers)
49        ('m', "M", true),
50        ('a', "A", true),
51        ('f', "F", true),
52        ('d', "D", true),
53        ('q', "Q", false),
54        ('c', "C", false),
55        ('b', "B", false),
56        ('v', "V", false),
57        ('h', "H", false),
58    ];
59
60    for (ch, name, implied_by_g) in standard {
61        if ext_part.contains(ch) || (has_g && implied_by_g) {
62            exts.push(name);
63        }
64    }
65
66    // If G is present but I wasn't explicitly added, add I (G implies IMAFD)
67    if has_g && !exts.contains(&"I") && !exts.contains(&"E") {
68        exts.insert(0, "I");
69    }
70
71    exts.join(" ")
72}
73
74/// Parse Z-extensions from ISA string (pure function for testing)
75#[must_use]
76pub fn parse_z_extensions(isa: &str) -> String {
77    let isa = isa.to_lowercase();
78    let mut z_exts = Vec::new();
79
80    // Check if G is present (G implies Zicsr_Zifencei per RISC-V spec)
81    let base = isa.split('_').next().unwrap_or(&isa);
82    let ext_part = strip_rv_prefix(base);
83    let has_g = ext_part.contains('g');
84
85    // Add implied Z-extensions from G
86    if has_g {
87        z_exts.push("zicsr".to_string());
88        z_exts.push("zifencei".to_string());
89    }
90
91    // Add explicit Z-extensions (z prefix only)
92    for part in isa.split('_') {
93        if part.starts_with('z') && !z_exts.contains(&part.to_string()) {
94            z_exts.push(part.to_string());
95        }
96    }
97
98    z_exts.join(" ")
99}
100
101/// Parse S-extensions from ISA string (pure function for testing)
102#[must_use]
103pub fn parse_s_extensions(isa: &str) -> String {
104    let isa = isa.to_lowercase();
105    let mut s_exts = Vec::new();
106
107    // Add explicit S-extensions (s prefix only)
108    for part in isa.split('_') {
109        if part.starts_with('s') && !s_exts.contains(&part.to_string()) {
110            s_exts.push(part.to_string());
111        }
112    }
113
114    s_exts.join(" ")
115}
116
117/// Parse extensions with explanations (pure function for testing)
118#[must_use]
119pub fn parse_extensions_explained(isa: &str) -> Vec<(String, String)> {
120    let isa = isa.to_lowercase();
121    let base = isa.split('_').next().unwrap_or(&isa);
122    let ext_part = strip_rv_prefix(base);
123    let mut exts = Vec::new();
124
125    for &(ch, name, desc) in STANDARD_EXTENSIONS {
126        if ext_part.contains(ch) {
127            exts.push((name.to_string(), desc.to_string()));
128        }
129    }
130
131    exts
132}
133
134/// Parse Z-extensions with explanations (pure function for testing)
135#[must_use]
136pub fn parse_z_extensions_explained(isa: &str) -> Vec<(String, String)> {
137    let isa = isa.to_lowercase();
138    let mut z_exts = Vec::new();
139
140    for &(pattern, name, desc, _category) in Z_EXTENSIONS {
141        if isa_has_extension(&isa, pattern) {
142            z_exts.push((name.to_string(), desc.to_string()));
143        }
144    }
145
146    z_exts
147}
148
149/// Parse S-extensions with explanations (pure function for testing)
150#[must_use]
151pub fn parse_s_extensions_explained(isa: &str) -> Vec<(String, String)> {
152    let isa = isa.to_lowercase();
153    let mut s_exts = Vec::new();
154
155    for &(pattern, name, desc, _category) in S_EXTENSIONS {
156        if isa_has_extension(&isa, pattern) {
157            s_exts.push((name.to_string(), desc.to_string()));
158        }
159    }
160
161    s_exts
162}
163
164/// Parse Z-extensions with category info
165#[must_use]
166pub fn parse_z_extensions_with_category(isa: &str) -> Vec<ExtensionInfo> {
167    let isa = isa.to_lowercase();
168    let mut z_exts = Vec::new();
169
170    // Check if G is present (G implies Zicsr_Zifencei per RISC-V spec)
171    let base = isa.split('_').next().unwrap_or(&isa);
172    let ext_part = strip_rv_prefix(base);
173    let has_g = ext_part.contains('g');
174
175    // Add implied Z-extensions from G
176    if has_g {
177        z_exts.push(ExtensionInfo {
178            name: "Zicsr".to_string(),
179            description: "CSR Instructions".to_string(),
180            category: "base".to_string(),
181            supported: true,
182        });
183        z_exts.push(ExtensionInfo {
184            name: "Zifencei".to_string(),
185            description: "Instruction-Fetch Fence".to_string(),
186            category: "base".to_string(),
187            supported: true,
188        });
189    }
190
191    for &(pattern, name, desc, category) in Z_EXTENSIONS {
192        if isa_has_extension(&isa, pattern) {
193            // Skip if already added (implied by G)
194            if !z_exts.iter().any(|e| e.name.eq_ignore_ascii_case(name)) {
195                z_exts.push(ExtensionInfo {
196                    name: name.to_string(),
197                    description: desc.to_string(),
198                    category: category.to_string(),
199                    supported: true,
200                });
201            }
202        }
203    }
204
205    z_exts
206}
207
208/// Parse S-extensions with category info
209#[must_use]
210pub fn parse_s_extensions_with_category(isa: &str) -> Vec<ExtensionInfo> {
211    let isa = isa.to_lowercase();
212    let mut s_exts = Vec::new();
213
214    for &(pattern, name, desc, category) in S_EXTENSIONS {
215        if isa_has_extension(&isa, pattern) {
216            s_exts.push(ExtensionInfo {
217                name: name.to_string(),
218                description: desc.to_string(),
219                category: category.to_string(),
220                supported: true,
221            });
222        }
223    }
224
225    s_exts
226}
227
228/// Get category display name for Z-extensions
229#[must_use]
230pub fn get_z_category_name(category: &str) -> &'static str {
231    Z_CATEGORY_NAMES
232        .iter()
233        .find(|(id, _)| *id == category)
234        .map_or("Other", |(_, name)| *name)
235}
236
237/// Get category display name for S-extensions
238#[must_use]
239pub fn get_s_category_name(category: &str) -> &'static str {
240    S_CATEGORY_NAMES
241        .iter()
242        .find(|(id, _)| *id == category)
243        .map_or("Other", |(_, name)| *name)
244}
245
246/// Group extensions by category
247#[must_use]
248pub fn group_by_category(extensions: &[ExtensionInfo]) -> Vec<(String, Vec<&ExtensionInfo>)> {
249    use std::collections::BTreeMap;
250    let mut groups: BTreeMap<String, Vec<&ExtensionInfo>> = BTreeMap::new();
251
252    for ext in extensions {
253        groups.entry(ext.category.clone()).or_default().push(ext);
254    }
255
256    groups.into_iter().collect()
257}
258
259/// Get ALL Z-extensions with support status based on ISA string
260#[must_use]
261pub fn get_all_z_extensions_with_status(isa: &str) -> Vec<ExtensionInfo> {
262    let isa = isa.to_lowercase();
263    let base = isa.split('_').next().unwrap_or(&isa);
264    let ext_part = strip_rv_prefix(base);
265    let has_g = ext_part.contains('g');
266
267    Z_EXTENSIONS
268        .iter()
269        .map(|&(pattern, name, desc, category)| {
270            let supported = isa_has_extension(&isa, pattern)
271                || (has_g && (pattern == "zicsr" || pattern == "zifencei"));
272            ExtensionInfo {
273                name: name.to_string(),
274                description: desc.to_string(),
275                category: category.to_string(),
276                supported,
277            }
278        })
279        .collect()
280}
281
282/// Get ALL S-extensions with support status based on ISA string
283#[must_use]
284pub fn get_all_s_extensions_with_status(isa: &str) -> Vec<ExtensionInfo> {
285    let isa = isa.to_lowercase();
286
287    S_EXTENSIONS
288        .iter()
289        .map(|&(pattern, name, desc, category)| {
290            let supported = isa_has_extension(&isa, pattern);
291            ExtensionInfo {
292                name: name.to_string(),
293                description: desc.to_string(),
294                category: category.to_string(),
295                supported,
296            }
297        })
298        .collect()
299}
300
301/// Get ALL standard extensions with support status
302#[must_use]
303pub fn get_all_standard_extensions_with_status(isa: &str) -> Vec<(String, String, bool)> {
304    let isa = isa.to_lowercase();
305    let base = isa.split('_').next().unwrap_or(&isa);
306    let ext_part = strip_rv_prefix(base);
307    let has_g = ext_part.contains('g');
308
309    STANDARD_EXTENSIONS
310        .iter()
311        .map(|&(char, name, desc)| {
312            let supported =
313                ext_part.contains(char) || (has_g && matches!(char, 'i' | 'm' | 'a' | 'f' | 'd'));
314            (name.to_string(), desc.to_string(), supported)
315        })
316        .collect()
317}
318
319/// Parse vector details from ISA string (pure function for testing)
320/// Returns None if no vector extension, Some(details) otherwise
321#[must_use]
322pub fn parse_vector_from_isa(isa: &str) -> Option<String> {
323    let isa = isa.to_lowercase();
324    let base = isa.split('_').next().unwrap_or(&isa);
325    let ext_part = strip_rv_prefix(base);
326
327    // Check for V extension in the extension part, or zve* in Z-extensions
328    let has_zve = isa.split('_').any(|part| part.starts_with("zve"));
329    if !ext_part.contains('v') && !has_zve {
330        return None;
331    }
332
333    let mut details = vec!["Enabled".to_string()];
334
335    // Detect VLEN from zvl* extensions (use largest value)
336    // If no zvl* specified, VLEN is implementation-defined (do not display)
337    if isa_has_extension(&isa, "zvl65536b") {
338        details.push("VLEN>=65536".to_string());
339    } else if isa_has_extension(&isa, "zvl32768b") {
340        details.push("VLEN>=32768".to_string());
341    } else if isa_has_extension(&isa, "zvl16384b") {
342        details.push("VLEN>=16384".to_string());
343    } else if isa_has_extension(&isa, "zvl8192b") {
344        details.push("VLEN>=8192".to_string());
345    } else if isa_has_extension(&isa, "zvl4096b") {
346        details.push("VLEN>=4096".to_string());
347    } else if isa_has_extension(&isa, "zvl2048b") {
348        details.push("VLEN>=2048".to_string());
349    } else if isa_has_extension(&isa, "zvl1024b") {
350        details.push("VLEN>=1024".to_string());
351    } else if isa_has_extension(&isa, "zvl512b") {
352        details.push("VLEN>=512".to_string());
353    } else if isa_has_extension(&isa, "zvl256b") {
354        details.push("VLEN>=256".to_string());
355    } else if isa_has_extension(&isa, "zvl128b") {
356        details.push("VLEN>=128".to_string());
357    } else if isa_has_extension(&isa, "zvl64b") {
358        details.push("VLEN>=64".to_string());
359    } else if isa_has_extension(&isa, "zvl32b") {
360        details.push("VLEN>=32".to_string());
361    }
362    // No default VLEN - it's implementation-defined per RISC-V spec
363
364    Some(details.join(", "))
365}
366
367#[cfg(test)]
368mod tests {
369    use super::*;
370
371    // Real ISA strings from actual RISC-V systems
372    const ISA_VISIONFIVE2: &str = "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zba_zbb";
373    const ISA_SPACEMIT_K1: &str = "rv64imafdcv_zicbom_zicboz_zicntr_zicsr_zifencei_zihintpause_zihpm_zba_zbb_zbc_zbs_zkt_zvkt_zvl128b_zvl256b_zvl32b_zvl64b";
374    const ISA_MINIMAL: &str = "rv64imac";
375    const ISA_RV32: &str = "rv32imc";
376
377    // === parse_extensions_compact tests ===
378
379    #[test]
380    fn test_visionfive2() {
381        assert_eq!(parse_extensions_compact(ISA_VISIONFIVE2), "I M A F D C");
382    }
383
384    #[test]
385    fn test_spacemit() {
386        assert_eq!(parse_extensions_compact(ISA_SPACEMIT_K1), "I M A F D C V");
387    }
388
389    #[test]
390    fn test_minimal() {
391        assert_eq!(parse_extensions_compact(ISA_MINIMAL), "I M A C");
392    }
393
394    #[test]
395    fn test_rv32() {
396        assert_eq!(parse_extensions_compact(ISA_RV32), "I M C");
397    }
398
399    #[test]
400    fn test_unknown() {
401        assert_eq!(parse_extensions_compact("unknown"), "");
402    }
403
404    #[test]
405    fn test_case_insensitive() {
406        assert_eq!(
407            parse_extensions_compact("RV64IMAFDC"),
408            parse_extensions_compact("rv64imafdc")
409        );
410    }
411
412    #[test]
413    fn test_empty() {
414        assert_eq!(parse_extensions_compact(""), "");
415    }
416
417    // === Specification-based tests (from SPEC.md) ===
418
419    #[test]
420    fn spec_g_expansion() {
421        assert_eq!(parse_extensions_compact("rv64gc"), "I M A F D C");
422    }
423
424    #[test]
425    fn spec_g_expansion_uppercase() {
426        assert_eq!(parse_extensions_compact("RV64GC"), "I M A F D C");
427    }
428
429    #[test]
430    fn spec_e_extension() {
431        assert_eq!(parse_extensions_compact("rv32e"), "E");
432    }
433
434    #[test]
435    fn spec_e_with_c() {
436        assert_eq!(parse_extensions_compact("rv32ec"), "E C");
437    }
438
439    #[test]
440    fn spec_with_vector() {
441        assert_eq!(parse_extensions_compact("rv64imafdcv"), "I M A F D C V");
442    }
443
444    #[test]
445    fn spec_rv64_prefix_not_vector() {
446        let result = parse_extensions_compact("rv64imafdc");
447        assert!(!result.contains('V'));
448    }
449
450    #[test]
451    fn spec_z_extensions_ignored() {
452        assert_eq!(
453            parse_extensions_compact("rv64imafdc_zba_zbb"),
454            "I M A F D C"
455        );
456    }
457
458    #[test]
459    fn spec_rv64_only() {
460        assert_eq!(parse_extensions_compact("rv64"), "");
461    }
462
463    // === parse_z_extensions tests ===
464
465    #[test]
466    fn test_z_extensions_visionfive2() {
467        let result = parse_z_extensions(ISA_VISIONFIVE2);
468        assert!(result.contains("zicntr"));
469        assert!(result.contains("zicsr"));
470        assert!(result.contains("zifencei"));
471        assert!(result.contains("zba"));
472        assert!(result.contains("zbb"));
473    }
474
475    #[test]
476    fn test_z_extensions_spacemit() {
477        let result = parse_z_extensions(ISA_SPACEMIT_K1);
478        assert!(result.contains("zicbom"));
479        assert!(result.contains("zicboz"));
480        assert!(result.contains("zbc"));
481        assert!(result.contains("zbs"));
482        assert!(result.contains("zvl256b"));
483    }
484
485    #[test]
486    fn test_z_extensions_minimal() {
487        assert!(parse_z_extensions(ISA_MINIMAL).is_empty());
488    }
489
490    #[test]
491    fn spec_z_extensions_basic() {
492        assert_eq!(parse_z_extensions("rv64i_zicsr_zifencei"), "zicsr zifencei");
493    }
494
495    #[test]
496    fn spec_z_extensions_order() {
497        assert_eq!(parse_z_extensions("rv64i_zba_zbb_zbc"), "zba zbb zbc");
498    }
499
500    #[test]
501    fn spec_z_extensions_none() {
502        assert_eq!(parse_z_extensions("rv64imafdc"), "");
503    }
504
505    #[test]
506    fn spec_z_extensions_g_implies() {
507        assert_eq!(parse_z_extensions("rv64gc"), "zicsr zifencei");
508    }
509
510    #[test]
511    fn spec_z_extensions_case() {
512        assert_eq!(parse_z_extensions("rv64i_Zicsr"), "zicsr");
513    }
514
515    // === parse_s_extensions tests ===
516
517    #[test]
518    fn spec_s_extensions() {
519        let result = parse_s_extensions("rv64i_sstc");
520        assert!(result.contains("sstc"));
521    }
522
523    // === parse_extensions_explained tests ===
524
525    #[test]
526    fn test_explained_visionfive2() {
527        let result = parse_extensions_explained(ISA_VISIONFIVE2);
528        assert_eq!(result.len(), 6); // I M A F D C
529        assert!(result.iter().any(|(n, _)| n == "I"));
530        assert!(result.iter().any(|(n, _)| n == "M"));
531        assert!(result.iter().any(|(n, _)| n == "F"));
532        assert!(result.iter().any(|(n, _)| n == "D"));
533        assert!(result.iter().any(|(n, _)| n == "C"));
534    }
535
536    #[test]
537    fn test_z_explained_spacemit() {
538        let result = parse_z_extensions_explained(ISA_SPACEMIT_K1);
539        assert!(result
540            .iter()
541            .any(|(n, d)| n == "Zba" && d == "Address Generation"));
542        assert!(result
543            .iter()
544            .any(|(n, d)| n == "Zbb" && d == "Basic Bit Manipulation"));
545        assert!(result
546            .iter()
547            .any(|(n, d)| n == "Zbc" && d == "Carry-less Multiply"));
548    }
549
550    // === parse_vector_from_isa tests ===
551
552    #[test]
553    fn test_vector_no_vector() {
554        assert!(parse_vector_from_isa(ISA_VISIONFIVE2).is_none());
555    }
556
557    #[test]
558    fn test_vector_with_v() {
559        let result = parse_vector_from_isa(ISA_SPACEMIT_K1);
560        assert!(result.is_some());
561        let detail = result.unwrap();
562        assert!(detail.contains("Enabled"));
563        assert!(detail.contains("VLEN>=256"));
564    }
565
566    #[test]
567    fn test_vector_zve_only() {
568        let result = parse_vector_from_isa("rv64imac_zve32x");
569        assert!(result.is_some());
570        assert!(result.unwrap().contains("Enabled"));
571    }
572
573    #[test]
574    fn spec_vector_with_v() {
575        let result = parse_vector_from_isa("rv64imafdcv");
576        assert!(result.is_some());
577        assert!(result.unwrap().contains("Enabled"));
578    }
579
580    #[test]
581    fn spec_vector_none() {
582        assert!(parse_vector_from_isa("rv64imafdc").is_none());
583    }
584
585    #[test]
586    fn spec_vector_vlen_256() {
587        let result = parse_vector_from_isa("rv64imafdcv_zvl256b");
588        assert!(result.is_some());
589        assert!(result.unwrap().contains("VLEN>=256"));
590    }
591
592    #[test]
593    fn spec_vector_vlen_largest() {
594        let result = parse_vector_from_isa("rv64imafdcv_zvl128b_zvl256b");
595        assert!(result.is_some());
596        assert!(result.unwrap().contains("VLEN>=256"));
597    }
598
599    // === False positive prevention tests ===
600
601    #[test]
602    fn test_zk_does_not_false_match_zkn() {
603        // "zk" (Scalar Crypto All) must not be reported when only "zkn" is present
604        let isa = "rv64i_zkn";
605        let result = parse_z_extensions_explained(isa);
606        assert!(
607            result.iter().any(|(n, _)| n == "Zkn"),
608            "Zkn should be found"
609        );
610        assert!(
611            !result.iter().any(|(n, _)| n == "Zk"),
612            "Zk should NOT be found (false positive)"
613        );
614    }
615
616    #[test]
617    fn test_zks_does_not_false_match_zksed() {
618        // "zks" must not be reported when only "zksed" is present
619        let isa = "rv64i_zksed";
620        let result = parse_z_extensions_explained(isa);
621        assert!(
622            result.iter().any(|(n, _)| n == "Zksed"),
623            "Zksed should be found"
624        );
625        assert!(
626            !result.iter().any(|(n, _)| n == "Zks"),
627            "Zks should NOT be found (false positive)"
628        );
629        assert!(
630            !result.iter().any(|(n, _)| n == "Zk"),
631            "Zk should NOT be found (false positive)"
632        );
633    }
634
635    #[test]
636    fn test_s_extension_no_collision_with_z() {
637        // S-extensions should not appear when only Z-extensions with 's' in name are present
638        let isa = "rv64i_zbs_zks";
639        let result = parse_s_extensions_explained(isa);
640        assert!(
641            result.is_empty(),
642            "No S-extensions should be found in Z-only ISA: {:?}",
643            result
644        );
645    }
646
647    #[test]
648    fn test_s_extension_exact_match() {
649        // "sstc" should match exactly, not as substring
650        let isa = "rv64i_sstc_svnapot";
651        let result = parse_s_extensions_explained(isa);
652        assert!(
653            result.iter().any(|(n, _)| n == "Sstc"),
654            "Sstc should be found"
655        );
656        assert!(
657            result.iter().any(|(n, _)| n == "Svnapot"),
658            "Svnapot should be found"
659        );
660    }
661
662    #[test]
663    fn test_zvks_does_not_false_match_zvksc() {
664        // "zvks" must not be reported when only "zvksc" is present
665        let isa = "rv64iv_zvksc";
666        let result = parse_z_extensions_explained(isa);
667        assert!(
668            result.iter().any(|(n, _)| n == "Zvksc"),
669            "Zvksc should be found"
670        );
671        assert!(
672            !result.iter().any(|(n, _)| n == "Zvks"),
673            "Zvks should NOT be found (false positive)"
674        );
675    }
676
677    #[test]
678    fn test_zicsr_not_false_positive_for_c() {
679        // Having "zicsr" in the ISA should not cause "C" to appear in standard extensions
680        // (C should only come from the base part before underscores)
681        let isa = "rv64ima_zicsr";
682        let result = parse_extensions_compact(isa);
683        assert_eq!(result, "I M A", "C should not appear from zicsr");
684    }
685
686    #[test]
687    fn spec_vector_no_default_vlen() {
688        let result = parse_vector_from_isa("rv64imafdcv");
689        assert!(result.is_some());
690        let detail = result.unwrap();
691        assert!(detail.contains("Enabled"));
692        assert!(!detail.contains("VLEN"));
693    }
694}