1use std::{
2 collections::HashMap,
3 env::args,
4 fs,
5 io::Write as _,
6 iter::Peekable,
7 path::PathBuf,
8 str::{FromStr, Lines},
9};
10
11use anyhow::{Result, anyhow};
12use htmlentity::entity::ICodedDataTrait;
13use quick_xml::Reader;
14use quick_xml::escape::unescape;
15use quick_xml::events::attributes::Attribute;
16use quick_xml::events::{BytesStart, Event};
17use quick_xml::name::QName;
18use regex::Regex;
19use reqwest;
20use serde::Deserialize;
21use url_escape::encode_www_form_urlencoded;
22
23use crate::{
24 AvrStatusRegister, AvrTiming, InstructionAlias,
25 types::{
26 Arch, Assembler, Directive, ISA, Instruction, InstructionForm, MMXMode, NameToDirectiveMap,
27 NameToInstructionMap, NameToRegisterMap, Operand, OperandType, Register, RegisterBitInfo,
28 RegisterType, RegisterWidth, XMMMode, Z80Timing, Z80TimingInfo,
29 },
30 ustr,
31};
32
33pub fn populate_riscv_registers(rst_contents: &str) -> Result<Vec<Register>> {
48 enum ParseState {
49 FileStart,
50 SectionStart,
51 TableStart,
52 TableSeparator,
53 TableEntry,
54 TableEnd,
55 FileEnd,
56 }
57 let mut parse_state = ParseState::FileStart;
58 let mut registers = Vec::new();
59 let mut curr_reg_type: Option<RegisterType> = None;
60 let mut lines = rst_contents.lines().peekable();
61
62 loop {
63 match parse_state {
64 ParseState::FileStart => {
65 let file_header = lines.next().unwrap();
66 assert!(file_header.eq("Register Definitions"));
67 let separator = lines.next().unwrap();
68 assert!(separator.starts_with('='));
69 consume_empty_lines(&mut lines);
70 parse_state = ParseState::SectionStart;
71 }
72 ParseState::SectionStart => {
73 let section_header = lines.next().unwrap();
74 if section_header.contains("Integer") {
75 curr_reg_type = Some(RegisterType::GeneralPurpose);
76 } else if section_header.contains("Floating Point") {
77 curr_reg_type = Some(RegisterType::FloatingPoint);
78 } else {
79 panic!("Unexpected section header: {section_header}");
80 }
81 let separator = lines.next().unwrap();
82 assert!(separator.starts_with('-'));
83 consume_empty_lines(&mut lines);
84 parse_state = ParseState::TableStart;
85 }
86 ParseState::TableStart => {
87 let top = lines.next().unwrap();
88 assert!(top.starts_with('+'));
89 let column_headers = lines.next().unwrap();
90 assert!(
91 column_headers
92 .eq("|Register | ABI Name | Description | Saver |")
93 );
94 parse_state = ParseState::TableSeparator;
95 }
96 ParseState::TableSeparator => {
97 let separator = lines.next().unwrap();
98 assert!(separator.starts_with('+'));
99 match lines.peek() {
100 Some(next) => {
101 if next.is_empty() {
102 parse_state = ParseState::TableEnd;
103 } else {
104 parse_state = ParseState::TableEntry;
105 }
106 }
107 None => parse_state = ParseState::TableEnd,
108 }
109 }
110 ParseState::TableEntry => {
111 let entries: Vec<&str> = lines
112 .next()
113 .unwrap()
114 .trim_start_matches('|')
115 .trim_end_matches('|')
116 .split('|')
117 .collect();
118 assert!(entries.len() == 4);
119 let saved_info = if entries[3].trim_ascii().is_empty() {
120 String::new()
121 } else {
122 format!("\n{} saved", entries[3].trim_ascii())
123 };
124 let description = format!("{}{}", entries[2].trim_ascii(), saved_info);
125 let reg_name = entries[0].trim_ascii().to_lowercase();
126 let curr_register = Register {
127 name: reg_name,
128 description: Some(description),
129 reg_type: curr_reg_type,
130 arch: Arch::RISCV,
131 ..Default::default()
132 };
133 registers.push(curr_register);
134 parse_state = ParseState::TableSeparator;
135 }
136 ParseState::TableEnd => {
137 consume_empty_lines(&mut lines);
138 if lines.peek().is_some() {
139 parse_state = ParseState::SectionStart;
140 } else {
141 parse_state = ParseState::FileEnd;
142 }
143 }
144 ParseState::FileEnd => break,
145 }
146 }
147
148 Ok(registers)
149}
150
151pub fn populate_riscv_instructions(docs_path: &PathBuf) -> Result<Vec<Instruction>> {
166 let mut instructions_map = HashMap::<String, Instruction>::new();
167
168 let mut entries: Vec<PathBuf> = std::fs::read_dir(docs_path)?
170 .map(|res| res.map(|e| e.path()))
171 .collect::<Result<Vec<_>, std::io::Error>>()?;
172 entries.sort();
173
174 for path in entries {
176 if let Ok(docs) = std::fs::read_to_string(&path) {
177 for instr in parse_riscv_instructions(&docs) {
178 let instr_name = instr.name.to_ascii_lowercase();
179 assert!(!instructions_map.contains_key(&instr_name));
180 instructions_map.insert(instr_name, instr);
181 }
182 }
183 }
184
185 Ok(instructions_map.into_values().collect())
186}
187
188fn parse_riscv_instructions(rst_contents: &str) -> Vec<Instruction> {
200 enum ParseState {
203 FileStart,
204 InstructionStart,
205 InstructionTableInfo,
206 InstructionFormat,
207 InstructionDescription,
208 InstructionImplementation,
209 InstructionExpansion,
210 FileEnd,
211 }
212 let mut parse_state = ParseState::FileStart;
213 let mut instructions = Vec::new();
214 let mut curr_instruction = Instruction {
215 arch: Arch::RISCV,
216 ..Default::default()
217 };
218 let mut lines = rst_contents.lines().peekable();
219
220 loop {
221 match parse_state {
222 ParseState::FileStart => {
223 let _header = lines.next().unwrap();
224 let separator = lines.next().unwrap();
225 assert!(separator.trim_ascii().starts_with('='));
226 consume_empty_lines(&mut lines);
227 parse_state = ParseState::InstructionStart;
228 }
229 ParseState::InstructionStart => {
230 curr_instruction.name = lines.next().unwrap().trim_ascii().to_ascii_lowercase();
231 let separator = lines.next().unwrap();
232 assert!(separator.trim_ascii().starts_with('-'));
234 consume_empty_lines(&mut lines);
235
236 if !lines.peek().unwrap().starts_with("..") {
238 curr_instruction.summary =
239 format!("{}\n\n", lines.next().unwrap().trim_ascii());
240 consume_empty_lines(&mut lines);
241 }
242 parse_state = ParseState::InstructionTableInfo;
243 }
244 ParseState::InstructionTableInfo => {
245 let table_info_1 = lines.next().unwrap();
247 assert!(table_info_1.trim_ascii().starts_with(".."));
248 let table_info_2 = lines.next().unwrap();
250 assert!(table_info_2.trim_ascii().starts_with(".."));
251
252 consume_empty_lines(&mut lines);
253
254 let top = lines.next().unwrap();
262 assert!(top.trim_ascii().starts_with('+'));
263 let first_row = lines.next().unwrap();
264 assert!(first_row.trim_ascii().starts_with('|'));
265 let middle = lines.next().unwrap();
266 assert!(middle.trim_ascii().starts_with('+'));
267 let second_row = lines.next().unwrap();
268 assert!(second_row.trim_ascii().starts_with('|'));
269 let bottom = lines.next().unwrap();
270 assert!(bottom.trim_ascii().starts_with('+'));
271 consume_empty_lines(&mut lines);
272 parse_state = ParseState::InstructionFormat;
273 }
274 ParseState::InstructionFormat => {
275 let header = lines.next().unwrap();
276 assert!(header.eq(":Format:"));
277 curr_instruction.asm_templates.push(
278 lines
279 .next()
280 .unwrap()
281 .trim_ascii()
282 .trim_start_matches('|')
283 .trim_ascii()
284 .to_string(),
285 );
286 consume_empty_lines(&mut lines);
287 parse_state = ParseState::InstructionDescription;
288 }
289 ParseState::InstructionDescription => {
290 let header = lines.next().unwrap();
291 assert!(header.eq(":Description:"));
292 while let Some(next) = lines.peek() {
293 if next.contains('|') {
294 curr_instruction.summary += lines
295 .next()
296 .unwrap()
297 .trim_ascii()
298 .trim_start_matches('|')
299 .trim_ascii();
300 } else {
301 break;
302 }
303 }
304 consume_empty_lines(&mut lines);
305 parse_state = ParseState::InstructionImplementation;
306 }
307 ParseState::InstructionImplementation => {
308 let header = lines.next().unwrap();
309 assert!(header.eq(":Implementation:"));
310 let _impl_body = lines.next(); consume_empty_lines(&mut lines);
312 parse_state = ParseState::InstructionExpansion;
313 }
314 ParseState::InstructionExpansion => {
316 match lines.peek() {
317 Some(&":Expansion:") => {
318 let header = lines.next().unwrap();
319 assert!(header.eq(":Expansion:"));
320 let _exp_body = lines.next(); consume_empty_lines(&mut lines);
322 if lines.peek().is_some() {
323 parse_state = ParseState::InstructionStart;
324 } else {
325 parse_state = ParseState::FileEnd;
326 }
327 }
328 Some(other) => {
329 if other.eq(&".. [classify table]") {
330 consume_classify_table(&mut lines);
331 }
332 if lines.peek().is_some() {
333 parse_state = ParseState::InstructionStart;
334 } else {
335 parse_state = ParseState::FileEnd;
336 }
337 }
338 None => parse_state = ParseState::FileEnd,
339 }
340
341 instructions.push(curr_instruction);
342 curr_instruction = Instruction {
343 arch: Arch::RISCV,
344 ..Default::default()
345 };
346 }
347 ParseState::FileEnd => break,
348 }
349 }
350
351 instructions
352}
353
354fn consume_empty_lines(line_iter: &mut Peekable<Lines>) {
355 while let Some(next) = line_iter.peek() {
356 if next.is_empty() {
357 _ = line_iter.next();
358 } else {
359 break;
360 }
361 }
362}
363
364fn consume_classify_table(line_iter: &mut Peekable<Lines>) {
365 let info_1 = line_iter.next().unwrap();
366 assert!(info_1.eq(".. [classify table]"));
367 let info_2 = line_iter.next().unwrap();
368 assert!(info_2.eq(".. table::"));
369 let info_3 = line_iter.next().unwrap();
370 assert!(info_3.trim_ascii().eq("Classify Table:"));
371 let empty = line_iter.next().unwrap();
372 assert!(empty.is_empty());
373 while let Some(next) = line_iter.peek() {
374 if next.is_empty() {
375 break;
376 }
377 _ = line_iter.next();
378 }
379}
380
381pub fn populate_arm_instructions(docs_path: &PathBuf) -> Result<Vec<Instruction>> {
396 let mut instructions_map = HashMap::<String, Instruction>::new();
397 let mut alias_map = HashMap::<String, Vec<InstructionAlias>>::new();
398
399 let mut entries: Vec<PathBuf> = std::fs::read_dir(docs_path)?
401 .map(|res| res.map(|e| e.path()))
402 .collect::<Result<Vec<_>, std::io::Error>>()?;
403 entries.sort();
404
405 for path in entries {
407 if path.extension().unwrap_or_default() != "xml"
408 || path.file_stem().unwrap_or_default() == "notice"
409 || path.file_stem().unwrap_or_default() == "constraint_text_mappings"
410 || path.file_stem().unwrap_or_default() == "shared_pseudocode"
411 {
412 continue;
413 }
414 if let Ok(docs) = std::fs::read_to_string(&path) {
415 if let Some((alias, aliased_instr)) = parse_arm_alias(&docs)? {
416 assert!(!aliased_instr.is_empty());
417 let aliases = alias_map.entry(aliased_instr).or_default();
418 aliases.push(alias);
419 } else if let Some(mut instr) = parse_arm_instruction(&docs) {
420 assert!(!instr.name.is_empty());
421 if let Some(entry) = instructions_map.get_mut(&instr.name) {
422 entry.aliases.append(&mut instr.aliases);
423 entry.asm_templates.append(&mut instr.asm_templates);
424 if entry.summary.is_empty() {
425 entry.summary = instr.summary;
426 }
427 } else {
428 instructions_map.insert(instr.name.clone(), instr);
429 }
430 }
431 } else {
432 println!(
433 "WARNING: Skipping entry, could not read file {}",
434 path.display()
435 );
436 }
437 }
438
439 for (instr_name, aliases) in &mut alias_map {
441 if let Some(entry) = instructions_map.get_mut(instr_name) {
442 entry.aliases.append(aliases);
443 } else {
444 instructions_map.insert(
445 instr_name.to_owned(),
446 Instruction {
447 name: instr_name.to_owned(),
448 arch: Arch::ARM64,
452 aliases: aliases.to_owned(),
453 ..Default::default()
454 },
455 );
456 }
457 }
458
459 Ok(instructions_map.into_values().collect())
460}
461
462fn parse_arm_alias(xml_contents: &str) -> Result<Option<(InstructionAlias, String)>> {
475 let mut reader = Reader::from_str(xml_contents);
477 let mut aliased_instr: Option<String> = None;
478 let mut alias = InstructionAlias::default();
479 let mut curr_template: Option<String> = None;
480 let mut in_desc = false;
481 let mut in_para = false;
482 let mut in_template = false;
483
484 loop {
485 match reader.read_event() {
486 Ok(Event::Start(ref e)) => match e.name() {
487 QName(b"instructionsection") => {
488 for attr in e.attributes() {
489 let Attribute { key, value } = attr.unwrap();
490 if b"title" == key.into_inner() {
491 alias.title = ustr::get_str(&value).to_string();
492 }
493 }
494 }
495 QName(b"desc") => in_desc = true,
496 QName(b"para") => in_para = true,
497 QName(b"asmtemplate") => in_template = true,
498 QName(b"alphaindex" | b"encodingindex") => return Ok(None),
499 _ => {}
500 },
501 Ok(Event::Text(ref txt)) => {
502 if in_template {
503 let cleaned = txt.unescape().unwrap();
504 if let Some(existing) = curr_template {
505 curr_template = Some(format!("{existing}{cleaned}"));
506 } else {
507 let mut new_template = cleaned.into_owned().trim_ascii().to_owned();
508 new_template.push(' ');
509 curr_template = Some(new_template);
510 }
511 } else if in_desc && in_para && alias.summary.is_empty() {
512 ustr::get_str(txt).clone_into(&mut alias.summary);
513 }
514 }
515 Ok(Event::Empty(ref e)) => {
516 if QName(b"docvar") == e.name() {
517 let mut alias_next = false;
518 for attr in e.attributes() {
519 let Attribute { key, value } = attr.unwrap();
520 if alias_next && b"value" == key.into_inner() {
523 aliased_instr = Some(ustr::get_str(&value).to_ascii_lowercase());
524 break;
525 }
526 if b"key" == key.into_inner()
527 && b"alias_mnemonic" == ustr::get_str(&value).as_bytes()
528 {
529 alias_next = true;
530 }
531 }
532 }
533 }
534 Ok(Event::End(ref e)) => match e.name() {
536 QName(b"instructionsection") => break,
537 QName(b"asmtemplate") => {
538 if let Some(template) = curr_template.take() {
539 alias.asm_templates.push(template);
540 }
541 in_template = false;
542 }
543 QName(b"docvars") => {
544 if aliased_instr.is_none() {
545 return Ok(None);
546 }
547 }
548 _ => {}
549 },
550 _ => {}
551 }
552 }
553
554 aliased_instr.map_or_else(|| Ok(None), |aliased_name| Ok(Some((alias, aliased_name))))
555}
556
557fn parse_arm_instruction(xml_contents: &str) -> Option<Instruction> {
569 let mut reader = Reader::from_str(xml_contents);
571
572 let mut instruction = Instruction {
574 arch: Arch::ARM64,
576 ..Default::default()
577 };
578 let mut curr_template: Option<String> = None;
579 let mut in_desc = false;
580 let mut in_para = false;
581 let mut in_template = false;
582
583 loop {
584 match reader.read_event() {
585 Ok(Event::Start(ref e)) => match e.name() {
586 QName(b"desc") => in_desc = true,
587 QName(b"para") => in_para = true,
588 QName(b"asmtemplate") => in_template = true,
589 QName(b"alphaindex" | b"encodingindex") => return None,
590 _ => {}
591 },
592 Ok(Event::Empty(ref e)) => {
593 if QName(b"docvar") == e.name() {
595 if instruction.name.is_empty() {
599 let mut mnemonic_next = false;
600 for attr in e.attributes() {
601 let Attribute { key: _, value } = attr.unwrap();
602 if b"mnemonic" == ustr::get_str(&value).as_bytes() {
603 mnemonic_next = true;
604 } else if mnemonic_next {
605 instruction.name = ustr::get_str(&value).to_ascii_lowercase();
606 break;
607 }
608 }
609 }
610 }
611 }
612 Ok(Event::Text(ref txt)) => {
613 if in_template {
614 let cleaned = txt.unescape().unwrap();
615 if let Some(existing) = curr_template {
616 curr_template = Some(format!("{existing}{cleaned}"));
617 } else {
618 let mut new_template = cleaned.into_owned().trim_ascii().to_owned();
619 new_template.push(' ');
620 curr_template = Some(new_template);
621 }
622 } else if in_desc && in_para && instruction.summary.is_empty() {
623 ustr::get_str(txt).clone_into(&mut instruction.summary);
624 }
625 }
626 Ok(Event::End(ref e)) => {
628 match e.name() {
629 QName(b"instructionsection") => break,
630 QName(b"encoding") => {
631 if let Some(template) = curr_template.take() {
632 instruction.asm_templates.push(template);
633 }
634 }
635 QName(b"desc") => in_desc = false,
636 QName(b"para") => in_para = false,
637 QName(b"asmtemplate") => in_template = false,
638 _ => {} }
640 }
641 Ok(Event::Eof) => break,
642 Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
643 _ => {} }
645 }
646
647 Some(instruction)
648}
649
650pub fn populate_mars_pseudo_instructions(contents: &str) -> Result<Vec<Instruction>> {
663 let mut prev_instr: Option<&mut Instruction> = None;
664 let mut instructions = Vec::new();
665
666 for line in contents
667 .lines()
668 .filter(|l| !l.is_empty() && !l.trim_start().starts_with('#'))
669 .map(str::trim)
670 {
671 let name = line.split_once(' ').unwrap().0;
672 let (_, description) = line.split_once('#').unwrap();
673 let template = line.replace('\t', " ");
674 match prev_instr {
675 Some(ref mut prev) if prev.name == name => {
676 prev.asm_templates.push(template);
677 continue;
678 }
679 _ => {}
680 }
681
682 let mut summary = description.trim().replace('\t', " ");
683 if let Some(colon_idx) = summary.find(':') {
684 summary = summary[..colon_idx].trim().to_string();
686 }
687
688 instructions.push(Instruction {
689 name: name.to_string(),
690 summary: format!("{summary}\n\nPseudo-op provided by the MARS assembler",),
691 asm_templates: vec![template],
692 arch: Arch::Mips,
693 forms: Vec::new(),
694 aliases: Vec::new(),
695 url: None,
696 });
697
698 prev_instr = instructions.last_mut();
699 }
700
701 Ok(instructions)
702}
703
704pub fn populate_mips_instructions(json_contents: &str) -> Result<Vec<Instruction>> {
710 #[derive(Deserialize, Debug)]
711 struct MipsInstruction {
712 pub name: String,
713 pub summary: String,
714 pub asm_templates: Vec<String>,
715 }
716
717 impl From<MipsInstruction> for Instruction {
718 fn from(instr: MipsInstruction) -> Self {
719 Self {
720 name: instr.name.to_ascii_lowercase(),
721 summary: instr.summary,
722 asm_templates: instr.asm_templates,
723 arch: Arch::Mips,
724 forms: Vec::new(),
725 aliases: Vec::new(),
726 url: Some(
727 "https://www.cs.cornell.edu/courses/cs3410/2008fa/MIPS_Vol2.pdf".to_string(),
728 ),
729 }
730 }
731 }
732
733 let raw_instrs: Vec<MipsInstruction> =
734 serde_json::from_str(json_contents).map_err(|e| anyhow!("Failed to parse JSON: {e}"))?;
735 let instructions: Vec<Instruction> = raw_instrs.into_iter().map(Instruction::from).collect();
736
737 Ok(instructions)
738}
739
740pub fn populate_6502_instructions(html_conts: &str) -> Result<Vec<Instruction>> {
756 let name_regex = Regex::new(r#"<dt id="[A-Z]{3}">(?<name>[A-Z]{3})</dt>$"#).unwrap();
757 let summary_regex = Regex::new(r#"<p aria-label="summary">(?<summary>.+)</p>$"#).unwrap();
758 let mut instructions = Vec::new();
759 let start = {
760 let start_marker = r#"<dl class="opcodes">"#;
761 let section_start = html_conts.find(start_marker).unwrap();
762 section_start + start_marker.len() + 1 };
764 let mut lines = html_conts[start..].lines().peekable();
765 loop {
766 let Some(name_line) = lines.next() else {
768 break;
769 };
770 if name_line.is_empty() {
771 continue;
772 }
773 let name = &name_regex.captures(name_line).unwrap()["name"];
774 assert_eq!(lines.next().unwrap(), "<dd>");
775 let mut summary =
777 summary_regex.captures(lines.next().unwrap()).unwrap()["summary"].to_string();
778 let implementation_notes_marker = r#"<p aria-label="notes on the implementation">"#;
779 let synopsis_marker = r#"<p aria-label="synopsis">"#;
780 if lines
781 .peek()
782 .unwrap()
783 .starts_with(implementation_notes_marker)
784 {
785 summary.push('\n');
786 while !lines.peek().unwrap().starts_with(synopsis_marker) {
787 summary += &lines
788 .next()
789 .unwrap()
790 .replace(r#"<p aria-label="notes on the implementation">"#, "")
791 .replace("<br />", "")
792 .replace("</p>", "");
793 }
794 }
795 let synopsis_line = lines.next().unwrap();
797 let mut synopsis = String::new();
798 let mut prev_idx = 0;
799 for (i, c) in synopsis_line.chars().enumerate() {
800 match c {
801 '<' => {
802 if prev_idx != 0 {
803 let bytes: Vec<u8> = synopsis_line.as_bytes()[prev_idx..i].to_vec();
804 let decoded = htmlentity::entity::decode(&bytes).to_string().unwrap();
805 synopsis += &decoded;
806 }
807 }
808 '>' => prev_idx = i + 1,
809 _ => {}
810 }
811 }
812 assert_eq!(
814 r#"<table aria-label="flags">"#,
815 lines.next().unwrap().trim()
816 );
817 assert_eq!(
819 r"<tr><th>N</th><th>Z</th><th>C</th><th>I</th><th>D</th><th>V</th></tr>",
820 lines.next().unwrap().trim()
821 );
822 let flag_line = lines.next().unwrap().trim();
823 let flags: String = if flag_line.contains("from stack") {
824 "from stack".to_string()
825 } else {
826 flag_line
827 .chars()
828 .skip("<tr><td>".len())
829 .step_by("</td><td>".len() + 1)
830 .take(6) .collect()
832 };
833 assert!(
834 flags.len() == 6 || flags.eq("from stack"),
835 "name: {name}, flagline: {flag_line}"
836 );
837 assert_eq!("</table>", lines.next().unwrap().trim());
838 assert_eq!(
840 r#"<table aria-label="details">"#,
841 lines.next().unwrap().trim()
842 );
843 let mut templates = Vec::new();
844 assert_eq!(
845 r"<tr><th>addressing</th><th>assembler</th><th>opc</th><th>bytes</th><th>cycles</th></tr>",
846 lines.next().unwrap().trim()
847 );
848 loop {
849 let next = lines.next().unwrap().trim();
850 if next.eq("</table>") {
851 break;
852 }
853 let template_marker = "</td><td>";
854 let start_idx = next.find(template_marker).unwrap() + template_marker.len();
855 let end_offset = next[start_idx..].find(template_marker).unwrap();
856 templates.push(next[start_idx..start_idx + end_offset].to_string());
857 }
858 assert_eq!("</dd>", lines.next().unwrap().trim());
859 let combined_summary = format!("{summary}\n{synopsis}\nNZCIDV\n`{flags}`");
860 instructions.push(Instruction {
861 name: name.to_lowercase(),
862 summary: combined_summary,
863 forms: Vec::new(),
864 asm_templates: templates,
865 aliases: Vec::new(),
866 arch: Arch::MOS6502,
867 url: Some(format!(
868 "https://www.masswerk.at/6502/6502_instruction_set.html#{}",
869 name.to_uppercase()
870 )),
871 });
872 if name.eq("TYA") {
873 break;
874 }
875 }
876
877 Ok(instructions)
878}
879
880pub fn populate_power_isa_instructions(json_conts: &str) -> Result<Vec<Instruction>> {
898 #[allow(non_camel_case_types, clippy::upper_case_acronyms)]
899 #[derive(Deserialize, Debug, Copy, Clone)]
900 enum PowerReleaseRepr {
901 P1,
902 P2,
903 PPC,
904 #[serde(rename = "v2.00")]
905 v200,
906 #[serde(rename = "v2.01")]
907 v201,
908 #[serde(rename = "v2.02")]
909 v202,
910 #[serde(rename = "v2.03")]
911 v203,
912 #[serde(rename = "v2.04")]
913 v204,
914 #[serde(rename = "v2.05")]
915 v205,
916 #[serde(rename = "v2.06")]
917 v206,
918 #[serde(rename = "v2.07")]
919 v207,
920 #[serde(rename = "v3.0")]
921 v30,
922 #[serde(rename = "v3.0B")]
923 v30B,
924 #[serde(rename = "v3.0C")]
925 v30C,
926 #[serde(rename = "v3.1")]
927 v31,
928 #[serde(rename = "v3.1B")]
929 v31B,
930 }
931
932 impl PowerReleaseRepr {
933 fn release_message(self) -> String {
934 String::from(match self {
935 Self::P1 => "Introduced in POWER Architecture",
936 Self::P2 => "Introduced in POWER2 Architecture",
937 Self::PPC => "Introduced in PowerPC Architecture prior to v2.00",
938 Self::v200 => "Introduced in PowerPC Architecture Version 2.00",
939 Self::v201 => "Introduced in PowerPC Architecture Version 2.01",
940 Self::v202 => "Introduced in PowerPC Architecture Version 2.02",
941 Self::v203 => "Introduced in Power ISA Version 2.03",
942 Self::v204 => "Introduced in Power ISA Version 2.04",
943 Self::v205 => "Introduced in Power ISA Version 2.05",
944 Self::v206 => "Introduced in Power ISA Version 2.06",
945 Self::v207 => "Introduced in Power ISA Version 2.07",
946 Self::v30 => "Introduced in Power ISA Version 3.0",
947 Self::v30B => "Introduced in Power ISA Version 3.0B",
948 Self::v30C => "Introduced in Power ISA Version 3.0C",
949 Self::v31 => "Introduced in Power ISA Version 3.1",
950 Self::v31B => "Introduced in Power ISA Version 3.1B",
951 })
952 }
953 }
954 #[allow(dead_code)]
955 #[derive(Deserialize, Debug)]
956 struct PowerConditionRepr {
957 pub field: String,
958 pub value: String,
959 }
960 #[allow(dead_code)]
961 #[derive(Deserialize, Debug)]
962 struct PowerLayoutRepr {
963 pub name: String,
964 pub size: String,
965 }
966 #[allow(dead_code)]
967 #[derive(Deserialize, Debug)]
968 struct PowerMnemonicRepr {
969 pub name: String,
970 pub form: String,
971 pub mnemonic: String,
972 pub operands: Vec<String>,
973 pub conditions: Vec<PowerConditionRepr>,
974 pub layout: Vec<PowerLayoutRepr>,
975 pub release: PowerReleaseRepr,
976 }
977 #[derive(Deserialize, Debug)]
978 struct PowerJsonRepr {
979 pub mnemonics: Vec<PowerMnemonicRepr>,
980 pub body: Vec<String>,
981 }
982
983 impl From<PowerJsonRepr> for Vec<Instruction> {
984 fn from(value: PowerJsonRepr) -> Self {
985 let mut instructions = Self::new();
986 for op in value.mnemonics {
987 let name = op.mnemonic.trim();
988 let mut instruction = Instruction {
989 arch: Arch::PowerISA,
990 name: name.to_string(),
991 ..Default::default()
992 };
993 instruction.summary = {
994 let operands = op.operands.iter().fold(String::new(), |accum, x| {
995 format!("{} + `{x}`", if accum.is_empty() { "" } else { "\n" })
996 });
997 let description = value.body.join("\n");
998
999 format!(
1000 "\n{} ({})\n\n{operands}\n{description}",
1001 op.name,
1002 op.release.release_message(),
1003 )
1004 };
1005 instructions.push(instruction);
1006 }
1007
1008 instructions
1009 }
1010 }
1011
1012 let json_instrs: Vec<PowerJsonRepr> = serde_json::from_str(json_conts)?;
1013 let mut instructions = Vec::new();
1014 for instr in json_instrs {
1015 instructions.append(&mut instr.into());
1016 }
1017
1018 Ok(instructions)
1019}
1020
1021pub fn populate_instructions(xml_contents: &str) -> Result<Vec<Instruction>> {
1037 let mut instructions_map = HashMap::<String, Instruction>::new();
1039
1040 let mut reader = Reader::from_str(xml_contents);
1042
1043 let mut curr_instruction = Instruction::default();
1045 let mut curr_instruction_form = InstructionForm::default();
1046 let mut arch: Arch = Arch::None;
1047
1048 loop {
1049 match reader.read_event() {
1050 Ok(Event::Start(ref e)) => {
1052 match e.name() {
1053 QName(b"InstructionSet") => {
1054 for attr in e.attributes() {
1055 let Attribute { key, value } = attr.unwrap();
1056 if b"name" == key.into_inner() {
1057 arch = Arch::from_str(ustr::get_str(&value)).unwrap_or_else(|e| {
1058 panic!("Failed parse Arch {} -- {e}", ustr::get_str(&value))
1059 });
1060 } else {
1061 panic!("Failed to parse architecture name -- no name value");
1062 }
1063 }
1064 }
1065 QName(b"Instruction") => {
1066 curr_instruction = Instruction::default();
1068 curr_instruction.arch = arch;
1069
1070 for attr in e.attributes() {
1072 let Attribute { key, value } = attr.unwrap();
1073 match ustr::get_str(key.into_inner()) {
1074 "name" => {
1075 let name = ustr::get_str(&value);
1076 curr_instruction.name = name.to_ascii_lowercase();
1077 }
1078 "summary" => {
1079 ustr::get_str(&value).clone_into(&mut curr_instruction.summary);
1080 }
1081 _ => {}
1082 }
1083 }
1084 }
1085 QName(b"InstructionForm") => {
1086 curr_instruction_form = InstructionForm::default();
1098
1099 for attr in e.attributes() {
1101 let Attribute { key, value } = attr.unwrap();
1102 match ustr::get_str(key.into_inner()) {
1103 "gas-name" => {
1104 curr_instruction_form.gas_name =
1105 Some(ustr::get_str(&value).to_owned());
1106 }
1107 "go-name" => {
1108 curr_instruction_form.go_name =
1109 Some(ustr::get_str(&value).to_owned());
1110 }
1111 "mmx-mode" => {
1112 let value_ = value.as_ref();
1113 curr_instruction_form.mmx_mode =
1114 Some(MMXMode::from_str(ustr::get_str(value_))?);
1115 }
1116 "xmm-mode" => {
1117 let value_ = value.as_ref();
1118 curr_instruction_form.xmm_mode =
1119 Some(XMMMode::from_str(ustr::get_str(value_))?);
1120 }
1121 "cancelling-inputs" => match ustr::get_str(&value) {
1122 "true" => curr_instruction_form.cancelling_inputs = Some(true),
1123 "false" => {
1124 curr_instruction_form.cancelling_inputs = Some(false);
1125 }
1126 val => {
1127 return Err(anyhow!(
1128 "Unknown value '{val}' for XML attribute cancelling inputs"
1129 ));
1130 }
1131 },
1132 "nacl-version" => {
1133 curr_instruction_form.nacl_version =
1134 value.as_ref().first().copied();
1135 }
1136 "nacl-zero-extends-outputs" => match ustr::get_str(&value) {
1137 "true" => {
1138 curr_instruction_form.nacl_zero_extends_outputs =
1139 Some(true);
1140 }
1141 "false" => {
1142 curr_instruction_form.nacl_zero_extends_outputs =
1143 Some(false);
1144 }
1145 val => {
1146 return Err(anyhow!(
1147 "Unknown value '{val}' for XML attribute nacl-zero-extends-outputs",
1148 ));
1149 }
1150 },
1151 "z80name" => {
1152 curr_instruction_form.z80_name =
1153 Some(ustr::get_str(&value).to_owned());
1154 }
1155 "form" => {
1156 let value_ = ustr::get_str(&value);
1157 curr_instruction_form.urls.push(format!(
1158 "https://www.zilog.com/docs/z80/z80cpu_um.pdf#{}",
1159 encode_www_form_urlencoded(value_)
1160 ));
1161 curr_instruction_form.z80_form = Some(value_.to_string());
1162 }
1163 _ => {}
1164 }
1165 }
1166 }
1167 QName(b"Encoding") => {
1169 for attr in e.attributes() {
1170 let Attribute { key, value } = attr.unwrap();
1171 if key.into_inner() == b"byte" {
1172 let disp_code = ustr::get_str(&value);
1173 if let Some(ref mut opcodes) = curr_instruction_form.z80_opcode {
1174 opcodes.push_str(disp_code);
1175 } else {
1176 curr_instruction_form.z80_opcode = Some(disp_code.to_owned());
1177 }
1178 }
1179 }
1180 }
1181 _ => {} }
1183 }
1184 Ok(Event::Empty(ref e)) => {
1185 match e.name() {
1186 QName(b"ISA") => {
1187 for attr in e.attributes() {
1188 let Attribute { key, value } = attr.unwrap();
1189 if key.into_inner() == b"id" {
1190 curr_instruction_form.isa = Some(
1191 ISA::from_str(ustr::get_str(value.as_ref())).unwrap_or_else(
1192 |_| {
1193 panic!(
1194 "Unexpected ISA variant {}",
1195 ustr::get_str(&value)
1196 )
1197 },
1198 ),
1199 );
1200 }
1201 }
1202 }
1203 QName(b"Operand") => {
1204 let mut type_ = OperandType::k; let mut extended_size = None;
1206 let mut input = None;
1207 let mut output = None;
1208
1209 for attr in e.attributes() {
1210 let Attribute { key, value } = attr.unwrap();
1211 match key.into_inner() {
1212 b"type" => {
1213 type_ = match OperandType::from_str(ustr::get_str(&value)) {
1214 Ok(op_type) => op_type,
1215 Err(_) => {
1216 return Err(anyhow!(
1217 "Unknown value for operand type -- Variant: {}",
1218 ustr::get_str(&value)
1219 ));
1220 }
1221 }
1222 }
1223 b"input" => match value.as_ref() {
1224 b"true" => input = Some(true),
1225 b"false" => input = Some(false),
1226 _ => return Err(anyhow!("Unknown value for operand type")),
1227 },
1228 b"output" => match value.as_ref() {
1229 b"true" => output = Some(true),
1230 b"false" => output = Some(false),
1231 _ => return Err(anyhow!("Unknown value for operand type")),
1232 },
1233 b"extended-size" => {
1234 extended_size =
1235 Some(ustr::get_str(value.as_ref()).parse::<usize>()?);
1236 }
1237 _ => {} }
1239 }
1240
1241 curr_instruction_form.operands.push(Operand {
1242 type_,
1243 input,
1244 output,
1245 extended_size,
1246 });
1247 }
1248 QName(b"TimingZ80") => {
1249 for attr in e.attributes() {
1250 let Attribute { key, value } = attr.unwrap();
1251 if key.into_inner() == b"value" {
1252 let z80 = match Z80TimingInfo::from_str(ustr::get_str(&value)) {
1253 Ok(timing) => timing,
1254 Err(e) => return Err(anyhow!(e)),
1255 };
1256 if let Some(ref mut timing_entry) = curr_instruction_form.z80_timing
1257 {
1258 timing_entry.z80 = z80;
1259 } else {
1260 curr_instruction_form.z80_timing = Some(Z80Timing {
1261 z80,
1262 ..Default::default()
1263 });
1264 }
1265 }
1266 }
1267 }
1268 QName(b"TimingZ80M1") => {
1269 for attr in e.attributes() {
1270 let Attribute { key, value } = attr.unwrap();
1271 if key.into_inner() == b"value" {
1272 let z80_plus_m1 =
1273 match Z80TimingInfo::from_str(ustr::get_str(&value)) {
1274 Ok(timing) => timing,
1275 Err(e) => return Err(anyhow!(e)),
1276 };
1277 if let Some(ref mut timing_entry) = curr_instruction_form.z80_timing
1278 {
1279 timing_entry.z80_plus_m1 = z80_plus_m1;
1280 } else {
1281 curr_instruction_form.z80_timing = Some(Z80Timing {
1282 z80_plus_m1,
1283 ..Default::default()
1284 });
1285 }
1286 }
1287 }
1288 }
1289 QName(b"TimingR800") => {
1290 for attr in e.attributes() {
1291 let Attribute { key, value } = attr.unwrap();
1292 if key.into_inner() == b"value" {
1293 let r800 = match Z80TimingInfo::from_str(ustr::get_str(&value)) {
1294 Ok(timing) => timing,
1295 Err(e) => return Err(anyhow!(e)),
1296 };
1297 if let Some(ref mut timing_entry) = curr_instruction_form.z80_timing
1298 {
1299 timing_entry.r800 = r800;
1300 } else {
1301 curr_instruction_form.z80_timing = Some(Z80Timing {
1302 r800,
1303 ..Default::default()
1304 });
1305 }
1306 }
1307 }
1308 }
1309 QName(b"TimingR800Wait") => {
1310 for attr in e.attributes() {
1311 let Attribute { key, value } = attr.unwrap();
1312 if key.into_inner() == b"value" {
1313 let r800_plus_wait =
1314 match Z80TimingInfo::from_str(ustr::get_str(&value)) {
1315 Ok(timing) => timing,
1316 Err(e) => return Err(anyhow!(e)),
1317 };
1318 if let Some(ref mut timing_entry) = curr_instruction_form.z80_timing
1319 {
1320 timing_entry.r800_plus_wait = r800_plus_wait;
1321 } else {
1322 curr_instruction_form.z80_timing = Some(Z80Timing {
1323 r800_plus_wait,
1324 ..Default::default()
1325 });
1326 }
1327 }
1328 }
1329 }
1330 _ => {} }
1332 }
1333 Ok(Event::End(ref e)) => {
1335 match e.name() {
1336 QName(b"Instruction") => {
1337 assert!(curr_instruction.arch != Arch::None);
1339 instructions_map
1340 .insert(curr_instruction.name.clone(), curr_instruction.clone());
1341 }
1342 QName(b"InstructionForm") => {
1343 curr_instruction.push_form(curr_instruction_form.clone());
1344 }
1345 _ => {} }
1347 }
1348 Ok(Event::Eof) => break,
1349 Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
1350 _ => {} }
1352 }
1353
1354 if matches!(arch, Arch::X86 | Arch::X86_64) {
1355 let x86_online_docs = get_x86_docs_url();
1356 let body = get_docs_body(&x86_online_docs).unwrap_or_default();
1357 let body_it = body.split("<td>").skip(1).step_by(2);
1358
1359 let re = Regex::new(r"<a href='\/x86\/(.*?)'>(.*?)<\/a>.*<\/td>")?;
1367 for line in body_it {
1368 let caps = re.captures(line).unwrap();
1370 let url_suffix = caps.get(1).map_or("", |m| m.as_str());
1371 let instruction_name = caps.get(2).map_or("", |m| m.as_str());
1372
1373 if let Some(instruction) = instructions_map.get_mut(instruction_name) {
1375 instruction.url = Some(x86_online_docs.clone() + url_suffix);
1376 }
1377 }
1378 }
1379
1380 Ok(instructions_map.into_values().collect())
1381}
1382
1383pub fn populate_name_to_instruction_map(
1384 arch: Arch,
1385 instructions: &Vec<Instruction>,
1386 names_to_instructions: &mut NameToInstructionMap,
1387) {
1388 for instruction in instructions {
1389 names_to_instructions.insert((arch, instruction.name.clone()), instruction.clone());
1390 for name in &instruction.get_associated_names() {
1393 names_to_instructions
1394 .entry((arch, (*name).to_string()))
1395 .or_insert_with(|| instruction.clone());
1396 }
1397 }
1398}
1399
1400fn process_sreg_value(
1401 e: &BytesStart,
1402 curr_instruction_form: &mut InstructionForm,
1403 field_setter: impl FnOnce(&mut AvrStatusRegister, char),
1404) {
1405 for attr in e.attributes() {
1406 let Attribute { key, value } = attr.unwrap();
1407 if key.into_inner() == b"value" {
1408 let val = ustr::get_str(&value);
1409 let status = if val.eq("–") {
1410 '-'
1411 } else {
1412 ustr::get_str(&value)
1413 .chars()
1414 .next()
1415 .expect("Empty status register value")
1416 };
1417 if let Some(ref mut sreg_entry) = curr_instruction_form.avr_status_register {
1418 field_setter(sreg_entry, status);
1419 } else {
1420 let mut sreg = AvrStatusRegister::default();
1421 field_setter(&mut sreg, status);
1422 curr_instruction_form.avr_status_register = Some(sreg);
1423 }
1424 break;
1425 }
1426 }
1427}
1428
1429fn process_clock_value(
1430 e: &BytesStart,
1431 curr_instruction_form: &mut InstructionForm,
1432 field_setter: impl FnOnce(&mut AvrTiming, Option<String>),
1433) {
1434 for attr in e.attributes() {
1435 let Attribute { key, value } = attr.unwrap();
1436 if key.into_inner() == b"value" {
1437 let cycles = Some(ustr::get_str(&value).to_string());
1438 if let Some(ref mut timing_entry) = curr_instruction_form.avr_timing {
1439 field_setter(timing_entry, cycles);
1440 } else {
1441 let mut timing = AvrTiming::default();
1442 field_setter(&mut timing, cycles);
1443 curr_instruction_form.avr_timing = Some(timing);
1444 }
1445 break;
1446 }
1447 }
1448}
1449
1450pub fn populate_avr_instructions(xml_contents: &str) -> Result<Vec<Instruction>> {
1466 let mut instructions_map = HashMap::<String, Instruction>::new();
1468
1469 let mut reader = Reader::from_str(xml_contents);
1471
1472 let mut curr_instruction = Instruction::default();
1474 let mut curr_instruction_form = InstructionForm::default();
1475 let mut arch: Arch = Arch::None;
1476 let mut curr_version: Option<String> = None;
1477
1478 loop {
1479 match reader.read_event() {
1480 Ok(Event::Start(ref e)) => {
1482 match e.name() {
1483 QName(b"InstructionSet") => {
1484 for attr in e.attributes() {
1485 let Attribute { key, value } = attr.unwrap();
1486 if b"name" == key.into_inner() {
1487 arch = Arch::from_str(ustr::get_str(&value)).unwrap_or_else(|e| {
1488 panic!("Failed parse Arch {} -- {e}", ustr::get_str(&value))
1489 });
1490 assert!(arch == Arch::Avr);
1491 } else {
1492 panic!("Failed parse Arch -- no name value");
1493 }
1494 }
1495 }
1496 QName(b"Instruction") => {
1497 curr_instruction = Instruction::default();
1499 curr_instruction.arch = arch;
1500
1501 for attr in e.attributes() {
1502 let Attribute { key, value } = attr.unwrap();
1503 match ustr::get_str(key.into_inner()) {
1504 "name" => {
1505 let name = ustr::get_str(&value);
1506 curr_instruction.name = name.to_ascii_lowercase();
1507 }
1508 "summary" => {
1509 ustr::get_str(&value).clone_into(&mut curr_instruction.summary);
1510 }
1511 _ => {}
1512 }
1513 }
1514 }
1515 QName(b"Version") => {
1517 for attr in e.attributes() {
1518 let Attribute { key, value } = attr.unwrap();
1519 if "value" == ustr::get_str(key.into_inner()) {
1520 curr_version = Some(ustr::get_str(&value).to_string());
1521 }
1522 }
1523 }
1524 QName(b"InstructionForm") => {
1525 assert!(curr_version.is_some());
1526 curr_instruction_form = InstructionForm::default();
1528 curr_instruction_form.avr_version.clone_from(&curr_version);
1529
1530 for attr in e.attributes() {
1532 let Attribute { key, value } = attr.unwrap();
1533 match ustr::get_str(key.into_inner()) {
1534 "mnemonic" => {
1535 curr_instruction_form.avr_mneumonic =
1536 Some(ustr::get_str(&value).to_owned());
1537 }
1538 "summary" => {
1539 curr_instruction_form.avr_summary =
1540 Some(ustr::get_str(&value).to_owned());
1541 }
1542 _ => {}
1543 }
1544 }
1545 }
1546 _ => {} }
1550 }
1551 Ok(Event::Empty(ref e)) => {
1552 match e.name() {
1553 QName(b"Operand") => {
1554 for attr in e.attributes() {
1555 let Attribute { key, value } = attr.unwrap();
1556 if key.into_inner() == b"type" {
1557 let val = ustr::get_str(&value);
1558 for oper in val.split(',') {
1559 if oper.is_empty() {
1560 continue;
1561 }
1562 let Ok(type_) = OperandType::from_str(oper) else {
1563 return Err(anyhow!(
1564 "Unknown value for operand type -- Variant: {}",
1565 ustr::get_str(&value)
1566 ));
1567 };
1568 curr_instruction_form.operands.push(Operand {
1569 type_,
1570 input: None,
1571 output: None,
1572 extended_size: None,
1573 });
1574 }
1575 }
1576 }
1577 }
1578 QName(b"I") => {
1580 process_sreg_value(e, &mut curr_instruction_form, |sreg, val| sreg.i = val);
1581 }
1582 QName(b"T") => {
1583 process_sreg_value(e, &mut curr_instruction_form, |sreg, val| sreg.t = val);
1584 }
1585 QName(b"H") => {
1586 process_sreg_value(e, &mut curr_instruction_form, |sreg, val| sreg.h = val);
1587 }
1588 QName(b"S") => {
1589 process_sreg_value(e, &mut curr_instruction_form, |sreg, val| sreg.s = val);
1590 }
1591 QName(b"V") => {
1592 process_sreg_value(e, &mut curr_instruction_form, |sreg, val| sreg.v = val);
1593 }
1594 QName(b"Z") => {
1595 process_sreg_value(e, &mut curr_instruction_form, |sreg, val| sreg.z = val);
1596 }
1597 QName(b"C") => {
1598 process_sreg_value(e, &mut curr_instruction_form, |sreg, val| sreg.c = val);
1599 }
1600 QName(b"N") => {
1601 process_sreg_value(e, &mut curr_instruction_form, |sreg, val| sreg.n = val);
1602 }
1603 QName(b"AVRe") => {
1605 process_clock_value(e, &mut curr_instruction_form, |timing, val| {
1606 timing.avre = val;
1607 });
1608 }
1609 QName(b"AVRxm") => {
1610 process_clock_value(e, &mut curr_instruction_form, |timing, val| {
1611 timing.avrxm = val;
1612 });
1613 }
1614 QName(b"AVRxt") => {
1615 process_clock_value(e, &mut curr_instruction_form, |timing, val| {
1616 timing.avrxt = val;
1617 });
1618 }
1619 QName(b"AVRrc") => {
1620 process_clock_value(e, &mut curr_instruction_form, |timing, val| {
1621 timing.avrrc = val;
1622 });
1623 }
1624 _ => {} }
1626 }
1627 Ok(Event::End(ref e)) => {
1629 match e.name() {
1630 QName(b"Instruction") => {
1631 assert!(curr_instruction.arch != Arch::None);
1633 instructions_map
1634 .insert(curr_instruction.name.clone(), curr_instruction.clone());
1635 curr_version = None;
1636 }
1637 QName(b"InstructionForm") => {
1638 curr_instruction.push_form(curr_instruction_form.clone());
1639 }
1640 _ => {} }
1642 }
1643 Ok(Event::Eof) => break,
1644 Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
1645 _ => {} }
1647 }
1648
1649 Ok(instructions_map.into_values().collect())
1650}
1651
1652pub fn populate_registers(xml_contents: &str) -> Result<Vec<Register>> {
1668 let mut registers_map = HashMap::<String, Register>::new();
1669
1670 let mut reader = Reader::from_str(xml_contents);
1672
1673 let mut curr_register = Register::default();
1675 let mut curr_bit_flag = RegisterBitInfo::default();
1676 let mut arch: Arch = Arch::None;
1677
1678 loop {
1679 match reader.read_event() {
1680 Ok(Event::Start(ref e)) => {
1682 match e.name() {
1683 QName(b"InstructionSet") => {
1684 for attr in e.attributes() {
1685 let Attribute { key, value } = attr.unwrap();
1686 if b"name" == key.into_inner() {
1687 arch = Arch::from_str(ustr::get_str(&value)).unwrap_or_else(|e| {
1688 panic!(
1689 "Unexpected Arch variant {} -- {e}",
1690 ustr::get_str(&value)
1691 )
1692 });
1693 }
1694 }
1695 }
1696 QName(b"Register") => {
1697 curr_register = Register::default();
1699 curr_register.arch = arch;
1700
1701 for attr in e.attributes() {
1703 let Attribute { key, value } = attr.unwrap();
1704 match key.into_inner() {
1705 b"name" => {
1706 let name_ = String::from(ustr::get_str(&value));
1707 curr_register.name = name_.to_ascii_lowercase();
1708 }
1709 b"description" => {
1710 curr_register.description =
1711 Some(String::from(ustr::get_str(&value)));
1712 }
1713 b"type" => {
1714 curr_register.reg_type =
1715 RegisterType::from_str(ustr::get_str(&value))
1716 .map_or(None, |reg| Some(reg));
1717 }
1718 b"width" => {
1719 curr_register.width =
1720 RegisterWidth::from_str(ustr::get_str(&value))
1721 .map_or(None, |width| Some(width));
1722 }
1723 _ => {}
1724 }
1725 }
1726 }
1727 QName(b"Flag") => {
1729 curr_bit_flag = RegisterBitInfo::default();
1730
1731 for attr in e.attributes() {
1732 let Attribute { key, value } = attr.unwrap();
1733 match key.into_inner() {
1734 b"bit" => {
1735 curr_bit_flag.bit =
1736 ustr::get_str(&value).parse::<u32>().unwrap();
1737 }
1738 b"label" => {
1739 curr_bit_flag.label = String::from(ustr::get_str(&value));
1740 }
1741 b"description" => {
1742 curr_bit_flag.description = String::from(ustr::get_str(&value));
1743 }
1744 b"pae" => {
1745 curr_bit_flag.pae = String::from(ustr::get_str(&value));
1746 }
1747 b"longmode" => {
1748 curr_bit_flag.long_mode = String::from(ustr::get_str(&value));
1749 }
1750 _ => {}
1751 }
1752 }
1753 }
1754 _ => {} }
1756 }
1757 Ok(Event::End(ref e)) => {
1759 match e.name() {
1760 QName(b"Register") => {
1761 assert!(curr_register.arch != Arch::None);
1763 registers_map.insert(curr_register.name.clone(), curr_register.clone());
1764 }
1765 QName(b"Flag") => {
1766 curr_register.push_flag(curr_bit_flag.clone());
1767 }
1768 _ => {} }
1770 }
1771 Ok(Event::Eof) => break,
1772 Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
1773 _ => {} }
1775 }
1776
1777 Ok(registers_map.into_values().collect())
1782}
1783
1784pub fn populate_name_to_register_map(
1785 arch: Arch,
1786 registers: &Vec<Register>,
1787 names_to_registers: &mut NameToRegisterMap,
1788) {
1789 for register in registers {
1790 for name in ®ister.get_associated_names() {
1791 names_to_registers.insert((arch, (*name).to_string()), register.clone());
1792 }
1793 }
1794}
1795
1796pub fn populate_masm_nasm_fasm_mars_directives(xml_contents: &str) -> Result<Vec<Directive>> {
1812 let mut directives_map = HashMap::<String, Directive>::new();
1813
1814 let mut reader = Reader::from_str(xml_contents);
1816
1817 let mut curr_directive = Directive::default();
1819 let mut in_desc = false;
1820
1821 loop {
1822 match reader.read_event() {
1823 Ok(Event::Start(ref e)) => {
1825 match e.name() {
1826 QName(b"directive") => {
1827 curr_directive = Directive::default();
1829
1830 for attr in e.attributes() {
1832 let Attribute { key, value } = attr.unwrap();
1833 match key.into_inner() {
1834 b"name" => {
1835 let name = ustr::get_str(&value);
1836 curr_directive.name = name.to_ascii_lowercase();
1837 }
1838 b"tool" => {
1839 let assembler = Assembler::from_str(ustr::get_str(&value))?;
1840 curr_directive.assembler = assembler;
1841 }
1842 _ => {}
1843 }
1844 }
1845 }
1846 QName(b"description") => {
1847 in_desc = true;
1848 }
1849 _ => {} }
1851 }
1852 Ok(Event::Text(ref txt)) => {
1853 if in_desc {
1854 ustr::get_str(txt)
1855 .trim_ascii()
1856 .clone_into(&mut curr_directive.description);
1857 }
1858 }
1859 Ok(Event::End(ref e)) => {
1861 if QName(b"directive") == e.name() {
1862 directives_map.insert(curr_directive.name.clone(), curr_directive.clone());
1863 } else if QName(b"description") == e.name() {
1864 in_desc = false;
1865 }
1866 }
1867 Ok(Event::Eof) => break,
1868 Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
1869 _ => {} }
1871 }
1872
1873 for directive in directives_map.values() {
1876 assert_ne!(directive.assembler, Assembler::None);
1877 }
1878
1879 Ok(directives_map.into_values().collect())
1880}
1881
1882pub fn populate_gas_directives(xml_contents: &str) -> Result<Vec<Directive>> {
1898 let mut directives_map = HashMap::<String, Directive>::new();
1899
1900 let mut reader = Reader::from_str(xml_contents);
1902
1903 let mut curr_directive = Directive::default();
1905 let mut assembler = Assembler::None;
1906
1907 loop {
1908 match reader.read_event() {
1909 Ok(Event::Start(ref e)) => {
1911 match e.name() {
1912 QName(b"Assembler") => {
1913 for attr in e.attributes() {
1914 let Attribute { key, value } = attr.unwrap();
1915 if b"name" == key.into_inner() {
1916 assembler = Assembler::from_str(ustr::get_str(&value)).unwrap();
1917 }
1918 }
1919 }
1920 QName(b"Directive") => {
1921 curr_directive = Directive::default();
1923 curr_directive.assembler = assembler;
1924
1925 for attr in e.attributes() {
1927 let Attribute { key, value } = attr.unwrap();
1928 match key.into_inner() {
1929 b"name" => {
1930 let name = ustr::get_str(&value);
1931 curr_directive.name = name.to_ascii_lowercase();
1932 }
1933 b"md_description" => {
1934 let description = ustr::get_str(&value);
1935 curr_directive.description =
1936 unescape(description).unwrap().to_string();
1937 }
1938 b"deprecated" => {
1939 curr_directive.deprecated =
1940 FromStr::from_str(ustr::get_str(&value)).unwrap();
1941 }
1942 b"url_fragment" => {
1943 curr_directive.url = Some(format!(
1944 "https://sourceware.org/binutils/docs-2.41/as/{}.html",
1945 ustr::get_str(&value)
1946 ));
1947 }
1948 _ => {}
1949 }
1950 }
1951 }
1952 QName(b"Signature") => {
1953 for attr in e.attributes() {
1954 let Attribute { key, value } = attr.unwrap();
1955 if b"sig" == key.into_inner() {
1956 let sig = ustr::get_str(&value);
1957 curr_directive
1958 .signatures
1959 .push(unescape(sig).unwrap().to_string());
1960 }
1961 }
1962 }
1963 _ => {} }
1965 }
1966 Ok(Event::End(ref e)) => {
1968 if QName(b"Directive") == e.name() {
1969 directives_map.insert(curr_directive.name.clone(), curr_directive.clone());
1971 }
1972 }
1973 Ok(Event::Eof) => break,
1974 Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
1975 _ => {} }
1977 }
1978
1979 for directive in directives_map.values() {
1982 assert_ne!(directive.assembler, Assembler::None);
1983 }
1984
1985 Ok(directives_map.into_values().collect())
1986}
1987
1988pub fn populate_avr_directives(xml_contents: &str) -> Result<Vec<Directive>> {
2004 let mut directives_map = HashMap::<String, Directive>::new();
2005
2006 let mut reader = Reader::from_str(xml_contents);
2008
2009 let mut curr_directive = Directive::default();
2011 let mut assembler = Assembler::None;
2012
2013 loop {
2014 match reader.read_event() {
2015 Ok(Event::Start(ref e)) => {
2017 match e.name() {
2018 QName(b"Assembler") => {
2019 for attr in e.attributes() {
2020 let Attribute { key, value } = attr.unwrap();
2021 if b"name" == key.into_inner() {
2022 assembler = Assembler::from_str(ustr::get_str(&value)).unwrap();
2023 }
2024 }
2025 }
2026 QName(b"Directive") => {
2027 curr_directive = Directive::default();
2029 curr_directive.assembler = assembler;
2030
2031 for attr in e.attributes() {
2033 let Attribute { key, value } = attr.unwrap();
2034 match key.into_inner() {
2035 b"name" => {
2036 let name = ustr::get_str(&value);
2037 curr_directive.name = name.to_ascii_lowercase();
2038 }
2039 b"description" => {
2040 let description = ustr::get_str(&value);
2041 curr_directive.description =
2042 unescape(description).unwrap().to_string();
2043 }
2044 _ => {}
2045 }
2046 }
2047 }
2048 QName(b"Signature") => {
2049 for attr in e.attributes() {
2050 let Attribute { key, value } = attr.unwrap();
2051 if b"sig" == key.into_inner() {
2052 let sig = ustr::get_str(&value);
2053 curr_directive
2054 .signatures
2055 .push(unescape(sig).unwrap().to_string());
2056 }
2057 }
2058 }
2059 _ => {} }
2061 }
2062 Ok(Event::End(ref e)) => {
2064 if QName(b"Directive") == e.name() {
2065 directives_map.insert(curr_directive.name.clone(), curr_directive.clone());
2067 }
2068 }
2069 Ok(Event::Eof) => break,
2070 Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
2071 _ => {} }
2073 }
2074
2075 for directive in directives_map.values() {
2078 assert_ne!(directive.assembler, Assembler::None);
2079 }
2080
2081 Ok(directives_map.into_values().collect())
2082}
2083
2084pub fn populate_ca65_directives(html_conts: &str) -> Result<Vec<Directive>> {
2099 let eat_lines = |lines: &mut Peekable<Lines<'_>>, empty: bool| {
2100 while let Some(line) = lines.peek() {
2101 if empty != line.is_empty() {
2102 break;
2103 }
2104 _ = lines.next().unwrap();
2105 }
2106 };
2107 let name_regex = Regex::new(r"<CODE>(?<name>.+)</CODE>").unwrap();
2108 let url_regex =
2109 Regex::new(r#"^<H2><A NAME=".+"></A> <A NAME="(?<fragment>[a-z, A-Z, 0-9, .]+)">"#)
2110 .unwrap();
2111 let mut directives = Vec::new();
2112 let start = {
2113 let start_marker = r##"<H2><A NAME="pseudo-variables"></A> <A NAME="s9">9.</A> <A HREF="#toc9">Pseudo variables</A></H2>"##;
2114 let section_start = html_conts.find(start_marker).unwrap();
2115 section_start + start_marker.len() + 1 };
2117 let mut lines = html_conts[start..].lines().peekable();
2118 eat_lines(&mut lines, true);
2119 _ = lines.next().unwrap(); _ = lines.next().unwrap();
2121 eat_lines(&mut lines, true);
2122 'outer: loop {
2123 loop {
2125 let Some(next) = lines.peek() else {
2126 break 'outer;
2127 };
2128 let next = next.trim();
2129 if next.starts_with("<H2><A NAME=\".") || next.starts_with("<H2><A NAME=\"*") {
2130 break;
2131 }
2132 _ = lines.next().unwrap();
2133 }
2134
2135 let name_line = lines.next().unwrap();
2136 let name = {
2137 let Some(caps) = &name_regex.captures(name_line) else {
2138 eat_lines(&mut lines, true);
2142 eat_lines(&mut lines, false);
2143 eat_lines(&mut lines, true);
2144 continue;
2145 };
2146 caps["name"].to_string()
2147 };
2148 let fragment = &url_regex.captures(name_line).unwrap()["fragment"];
2149 let url = format!("https://cc65.github.io/doc/ca65.html#{fragment}");
2150 assert_eq!(lines.next().unwrap().trim(), "</H2>");
2151 eat_lines(&mut lines, true);
2152 let mut description = String::new();
2154 while !lines.peek().unwrap().is_empty() {
2155 let description_line = lines.next().unwrap();
2156 let len_before = description.len();
2157 let mut prev_idx = 0;
2158 for (i, c) in description_line.chars().enumerate() {
2159 match c {
2160 '<' => {
2161 #[allow(
2162 clippy::sliced_string_as_bytes,
2163 clippy::char_indices_as_byte_indices
2164 )]
2165 let bytes: Vec<u8> = description_line[prev_idx..i].as_bytes().to_vec();
2166 let decoded = htmlentity::entity::decode(&bytes).to_string().unwrap();
2167 description += &decoded;
2168 }
2169 '>' => prev_idx = i + 1,
2170 _ => {}
2171 }
2172 }
2173 let line_len = description_line.len();
2174 if prev_idx < line_len - 1 {
2176 #[allow(clippy::sliced_string_as_bytes)]
2177 let bytes = description_line[prev_idx..description_line.len()].as_bytes();
2178 let decoded = htmlentity::entity::decode(bytes).to_string().unwrap();
2179 description += &decoded;
2180 }
2181 if description.len() != len_before {
2182 description.push(' ');
2183 }
2184 }
2185 let description = {
2186 while description.ends_with('\n') {
2187 _ = description.pop();
2188 }
2189 description.push('\n');
2190 description.trim().replace(" ", " ")
2191 };
2192 if name.contains(',') {
2194 for alias in name.split(", ") {
2195 directives.push(Directive {
2196 name: alias.trim().to_lowercase(),
2197 signatures: Vec::new(),
2198 description: description.clone(),
2199 deprecated: false,
2200 url: Some(url.clone()),
2201 assembler: Assembler::Ca65,
2202 });
2203 }
2204 } else {
2205 directives.push(Directive {
2206 name: name.to_lowercase(),
2207 signatures: Vec::new(),
2208 description,
2209 deprecated: false,
2210 url: Some(url),
2211 assembler: Assembler::Ca65,
2212 });
2213 }
2214 }
2215
2216 Ok(directives)
2217}
2218
2219pub fn populate_name_to_directive_map(
2220 assem: Assembler,
2221 directives: &Vec<Directive>,
2222 names_to_directives: &mut NameToDirectiveMap,
2223) {
2224 for directive in directives {
2225 for name in &directive.get_associated_names() {
2226 names_to_directives.insert((assem, (*name).to_string()), directive.clone());
2227 }
2228 }
2229}
2230
2231fn get_docs_body(x86_online_docs: &str) -> Option<String> {
2232 let cache_refresh = args().any(|arg| arg.contains("--cache-refresh"));
2238 let mut x86_cache_path = match get_cache_dir() {
2239 Ok(cache_path) => Some(cache_path),
2240 Err(e) => {
2241 eprintln!("Failed to resolve the cache file path - Error: {e}.");
2242 None
2243 }
2244 };
2245
2246 let cache_exists: bool;
2248 if let Some(mut path) = x86_cache_path {
2249 path.push("x86_instr_docs.html");
2250 cache_exists = matches!(path.try_exists(), Ok(true));
2251 x86_cache_path = Some(path);
2252 } else {
2253 cache_exists = false;
2254 }
2255
2256 let body = if cache_refresh || !cache_exists {
2257 match get_x86_docs_web(x86_online_docs) {
2258 Ok(docs) => {
2259 if let Some(ref path) = x86_cache_path {
2260 set_x86_docs_cache(&docs, path);
2261 }
2262 docs
2263 }
2264 Err(e) => {
2265 eprintln!("Failed to fetch documentation from {x86_online_docs} - Error: {e}.");
2266 return None;
2267 }
2268 }
2269 } else if let Some(ref path) = x86_cache_path {
2270 match get_x86_docs_cache(path) {
2271 Ok(docs) => docs,
2272 Err(e) => {
2273 eprintln!(
2274 "Failed to fetch documentation from the cache: {} - Error: {e}.",
2275 path.display()
2276 );
2277 return None;
2278 }
2279 }
2280 } else {
2281 eprintln!("Failed to fetch documentation from the cache - Invalid path.");
2282 return None;
2283 };
2284
2285 if body.split("<td>").skip(1).step_by(2).next().is_none() {
2288 eprintln!("Invalid docs contents.");
2289 if let Some(ref path) = x86_cache_path {
2290 eprintln!("Attempting to remove the cache file {}...", path.display());
2291 match std::fs::remove_file(path) {
2292 Ok(()) => {
2293 eprintln!("Cache file removed.");
2294 }
2295 Err(e) => {
2296 eprintln!("Failed to remove the cache file - Error: {e}.",);
2297 }
2298 }
2299 } else {
2300 eprintln!("Unable to clear the cache, invalid path.");
2301 }
2302 return None;
2303 }
2304
2305 Some(body)
2306}
2307
2308pub fn get_cache_dir() -> Result<PathBuf> {
2319 if let Ok(path) = std::env::var("ASM_LSP_CACHE_DIR") {
2321 let path = PathBuf::from(path);
2322 if path.is_dir() {
2324 return Ok(path);
2325 }
2326 }
2327
2328 let mut x86_cache_path = home::home_dir().ok_or_else(|| anyhow!("Home directory not found"))?;
2330
2331 x86_cache_path.push(".cache");
2332 x86_cache_path.push("asm-lsp");
2333
2334 fs::create_dir_all(&x86_cache_path)?;
2336
2337 Ok(x86_cache_path)
2338}
2339
2340#[cfg(not(test))]
2341fn get_x86_docs_url() -> String {
2342 String::from("https://www.felixcloutier.com/x86/")
2343}
2344
2345#[cfg(test)]
2346fn get_x86_docs_url() -> String {
2347 String::from("http://127.0.0.1:8080/x86/")
2348}
2349
2350fn get_x86_docs_web(x86_online_docs: &str) -> Result<String> {
2351 println!("Fetching further documentation from the web -> {x86_online_docs}...");
2352 let contents = reqwest::blocking::get(x86_online_docs)?.text()?;
2354 Ok(contents)
2355}
2356
2357fn get_x86_docs_cache(x86_cache_path: &PathBuf) -> Result<String, std::io::Error> {
2358 println!(
2359 "Fetching html page containing further documentation, from the cache -> {}...",
2360 x86_cache_path.display()
2361 );
2362 fs::read_to_string(x86_cache_path)
2363}
2364
2365fn set_x86_docs_cache(contents: &str, x86_cache_path: &PathBuf) {
2366 println!("Writing to the cache file {}...", x86_cache_path.display());
2367 match fs::File::create(x86_cache_path) {
2368 Ok(mut cache_file) => {
2369 println!("Created the cache file {} .", x86_cache_path.display());
2370 match cache_file.write_all(contents.as_bytes()) {
2371 Ok(()) => {
2372 println!("Populated the cache.");
2373 }
2374 Err(e) => {
2375 eprintln!(
2376 "Failed to write to the cache file {} - Error: {e}.",
2377 x86_cache_path.display()
2378 );
2379 }
2380 }
2381 }
2382 Err(e) => {
2383 eprintln!(
2384 "Failed to create the cache file {} - Error: {e}.",
2385 x86_cache_path.display()
2386 );
2387 }
2388 }
2389}
2390
2391#[cfg(test)]
2392mod tests {
2393 use mockito::ServerOpts;
2394
2395 use crate::parser::{get_cache_dir, populate_instructions};
2396 #[test]
2397 fn test_populate_instructions() {
2398 let mut server = mockito::Server::new_with_opts(ServerOpts {
2399 port: 8080,
2400 ..Default::default()
2401 });
2402
2403 let _ = server
2404 .mock("GET", "/x86/")
2405 .with_status(200)
2406 .with_header("content-type", "text/html")
2407 .with_body(include_str!(
2408 "../docs_store/instr_info_cache/x86_instr_docs.html"
2409 ))
2410 .create();
2411
2412 let mut x86_cache_path = get_cache_dir().unwrap();
2415 x86_cache_path.push("x86_instr_docs.html");
2416 if x86_cache_path.is_file() {
2417 std::fs::remove_file(&x86_cache_path).unwrap();
2418 }
2419 let xml_conts_x86 = include_str!("../docs_store/opcodes/x86.xml");
2420 assert!(populate_instructions(xml_conts_x86).is_ok());
2421
2422 if x86_cache_path.is_file() {
2423 std::fs::remove_file(&x86_cache_path).unwrap();
2424 }
2425 let xml_conts_x86_64 = include_str!("../docs_store/opcodes/x86_64.xml");
2426 assert!(populate_instructions(xml_conts_x86_64).is_ok());
2427
2428 if x86_cache_path.is_file() {
2430 std::fs::remove_file(&x86_cache_path).unwrap();
2431 }
2432 }
2433}