a2kit/lang/merlin/
mod.rs

1//! # Merlin analysis and transformations
2//! 
3//! This module is used by both the CLI and the language server.
4//! The Merlin parser is provided by `tree_sitter_merlin6502`.  Every file will be parsed as Merlin 16+,
5//! other Merlin versions are handled via diagnostic filters.
6//! The server compiles to a separate executable, its entry point is in `src/bin/server-merlin/main.rs`.
7//! 
8//! The analyzer performs functions that begin to resemble assembly, such as resolving
9//! file relationships and identifying symbols.  There is a spot assembler that is used to aid in
10//! disassembly. As of this writing, however, full assembly is not supported.
11//! 
12//! ## Conditional Macro Definitions
13//! 
14//! The Merlin 8/16 manual explicitly recommends wrapping `MAC` in a `DO 0` fold.
15//! Testing shows this is unnecessary and can lead to unexpected behaviors.  For example, something
16//! other than `MAC` within this fold can end up being assembled by legacy Merlin.
17//! This language server does not conform to legacy Merlin in this regard, except to produce a
18//! warning if `MAC` appears inside a conditional.
19//! 
20//! ## Macro Locals
21//! 
22//! Labels that are defined within a macro definition require some discussion.
23//! They are scoped to a macro and all of its dependencies, e.g., a label defined
24//! in a nested macro is visible to the enclosing macro, and vice-versa.
25//! Duplicate labels are legal in this scope, but this does *not* make them variables.
26//! In particular, the first (last) assigned value is used everywhere in Merlin 32 (legacy Merlin).
27//! Merlin 8 errors out if an equivalence is used after a reference, but Merlin 16+ does not.
28//! 
29//! ## Testing
30//! 
31//! We test against Merlin 8/16/16+/32.
32//! Test versions are M8(2.58), M16(3.41), M16+(4.08), M32(1.1).
33//! CI test coverage is 100% in terms of instructions and addressing modes available on
34//! all processor variants (barring human error in constructing the tests).
35//! Table of some under-documented distinctions follows.
36//! 
37//! Operation | Merlin 8 | Merlin 16 | Merlin 16+ | Merlin 32
38//! ----------|----------|-----------|------------|-----------
39//! DCI | invert end of dstring | M8 | M32 | invert end of whole argument
40//! INV | wrong lower case | M8 | correct lower case | refuses lower case (1)
41//! FLS | wrong lower case | M8 | M8 | refuses lower case (2)
42//! FLS | flashes specials | M8 | M8 | does not flash specials
43//! REV | hex ignored | M8 | M32 | hex incorporated into string (3)
44//! STR | hex not counted | M8 | M32 | hex is counted
45//! STRL | n/a | n/a | M32 | hex is counted
46//! DS | bit shift not allowed | M8 | M8 | bit shift works
47//! JML ($8000) | n/a | `DC 00 80` | `DC 00 80` | `6C 00 80`
48//! MAC | last duplicate label takes precedence | M8 | M8 | first takes precedence
49//! MAC | unreachable label accepted | M8 | M8 | unreachable label errors out (4)
50//! MX | n/a | no shadowing | no shadowing | equivalence can shadow
51//! LDA #'A' | `A9 41` | `A9 41` | `A9 41` | refuses until v1.1
52//! DO 0 | MAC cancels | M8 | M8 | MAC does not cancel (5)
53//!
54//! 1. lower case is invertible in the alternate character set
55//! 2. refusal is correct, flashing lower case is not possible
56//! 3. a2kit will reject trailing hex as a syntax error, but the REV processor would reverse each dstring separately if the parser allowed it
57//! 4. this can happen when expanding a nested macro definition
58//! 5. "cancels" means code following the macro definition is assembled, even if `DO 0` is not closed
59
60use lsp_types as lsp;
61use std::collections::{HashSet,HashMap};
62use std::fmt;
63use std::str::FromStr;
64
65use super::node_text;
66
67mod context;
68pub mod settings;
69pub mod checkpoint;
70pub mod hovers;
71pub mod completions;
72pub mod tokenizer;
73pub mod formatter;
74pub mod assembly;
75pub mod disassembly;
76pub mod diagnostics;
77pub mod semantic_tokens;
78pub mod handbook;
79
80#[cfg(test)]
81mod tests;
82
83const RCH: &str = "unreachable was reached";
84const COLUMN_SEPARATOR: char = '\u{0100}';
85const CALL_TOK: char = '\u{0100}';
86
87pub mod symbol_flags {
88    /// entry label (exported)
89    pub const ENT: u64 = 0x01;
90    /// external label (imported)
91    pub const EXT: u64 = 0x02;
92    /// label is a subroutine
93    pub const SUB: u64 = 0x04;
94    /// label is a macro
95    pub const MAC: u64 = 0x08;
96    /// label is a macro argument such as `]1`
97    pub const ARG: u64 = 0x10;
98    /// label is a local
99    pub const LOC: u64 = 0x20;
100    /// label is a macro local
101    pub const MLC: u64 = 0x40;
102    /// label is a variable
103    pub const VAR: u64 = 0x80;
104}
105
106/// Stackable source types, e.g., we may have a nesting such as (Master (Put (MacroRef)) (MacroRef))
107#[derive(PartialEq,Clone)]
108pub enum SourceType {
109    Master,
110    Module,
111    UseAndPut,
112    Use,
113    Put,
114    MacroRef,
115    Linker
116}
117
118#[derive(Clone,PartialEq)]
119pub enum MerlinVersion {
120    Merlin8,
121    Merlin16,
122    Merlin16Plus,
123    Merlin32
124}
125
126#[derive(Clone,PartialEq)]
127pub enum ProcessorType {
128    _6502,
129    _65c02,
130    _65802,
131    _65c816
132}
133
134#[derive(Clone,PartialEq)]
135pub enum LabelType {
136    Local,
137    Global,
138    Macro,
139    MacroLocal,
140    Variable
141}
142
143#[derive(Clone)]
144pub struct AddressMode {
145    pub mnemonic: String,
146    pub code: i64,
147    pub cycles: i64,
148    pub processors: Vec<ProcessorType>,
149    pub m_sensitive: bool,
150    pub x_sensitive: bool
151}
152
153/// Info on processor instructions for handbook
154#[derive(Clone)]
155pub struct Operation {
156    pub alt: Vec<String>,
157    pub brief: String,
158    pub desc: String,
159    pub modes: Vec<AddressMode>,
160    pub processors: Vec<ProcessorType>,
161    pub status: String,
162    pub m_sensitive: bool,
163    pub x_sensitive: bool,
164    pub abs_suffixable: bool,
165    pub absl_suffixable: bool,
166    pub absl_prefixable: bool
167}
168
169/// Info on a specific instruction and mode, useful for disassembly.
170#[derive(Clone)]
171pub struct MachineOperation {
172    pub mnemonic: String,
173    /// this is a snippet, such as `(1),y`, where the digit is bytes of data, and can be replaced by a value
174    pub operand_snippet: String,
175    pub processors: Vec<ProcessorType>,
176    pub relative: bool,
177    pub m_sensitive: bool,
178    pub x_sensitive: bool,
179    pub abs_suffixable: bool,
180    pub absl_suffixable: bool,
181    pub absl_prefixable: bool
182}
183
184#[derive(Clone)]
185pub struct PseudoOperation {
186    pub alt: Vec<String>,
187    pub brief: String,
188    pub category: String,
189    pub caveat: Option<String>,
190    pub desc: String,
191    pub eg: Vec<String>,
192    pub choices: Vec<String>,
193    /// if regex matches the argument is not supported in Merlin 8
194    pub v8x: Option<regex::Regex>,
195    /// if the regex matches the argument is not supported in Merlin 16
196    pub v16x: Option<regex::Regex>,
197    pub version: Vec<MerlinVersion>
198}
199
200#[derive(Clone)]
201pub struct Symbol {
202    name: String,
203    flags: u64,
204    decs: Vec<lsp::Location>,
205    defs: Vec<lsp::Location>,
206    refs: Vec<lsp::Location>,
207    /// This should record only *bad* forward references.
208    /// It is a map from a reference's location to label types that were not defined up to that point.
209    /// Multiple label types can occur due to ambiguities during the first pass (e.g. global vs. macro local).
210    fwd_refs: HashMap<lsp::Location,Vec<LabelType>>,
211    /// Current value of a symbol. When post-analyzing variables, the value at a given location
212    /// can be reconstructed using `value_history`.
213    value: Option<i64>,
214    /// It is useful to be able to stash values for later restoration during analysis.
215    value_stack: Vec<Option<i64>>,
216    /// Merlin children are as follows:
217    /// * global labels can have local labels as children
218    /// * macros can have "global labels" as children (macro locals)
219    children: HashMap<String,Symbol>,
220    /// heading that precedes a symbol definition is its docstring
221    docstring: String,
222    /// line(s) of code defining this symbol
223    defining_code: Option<String>,
224    /// macro call or nested macro within another macro
225    dependencies: HashSet<String>,
226    /// ordered record of values assumed by the symbol, for a variable there can be many.
227    /// when closing an include an entry should be added at the including location.
228    /// At present LUP updates are not handled (variables are unset upon exit).
229    /// The @ substitution feature is also not handled.
230    checkpoints: Vec<(lsp::Location,Option<i64>)>
231}
232
233/// Extended symbol table applicable to a single module.
234/// The symbols are gathered into globals, variables, and macros.
235/// Locals and macro-locals appear as children of the former.
236/// The symbols themselves contain global information such as all
237/// the places where a symbol is referenced, defined, assigned a value, etc.
238#[derive(Clone)]
239pub struct Symbols {
240    assembler: MerlinVersion,
241    processor: ProcessorType,
242    master_doc_uri: String,
243    display_doc_uri: String,
244    display_doc_type: SourceType,
245    globals: HashMap<String,Symbol>,
246    vars: HashMap<String,Symbol>,
247    macros: HashMap<String,Symbol>,
248    mx: Symbol,
249    /// lines in the display document that need the parser hint
250    alt_parser_lines: HashSet<isize>
251}
252
253#[derive(Clone)]
254pub struct Workspace {
255    pub ws_folders: Vec<lsp::Uri>,
256	/// array of documents in this workspace
257    pub docs: Vec<super::Document>,
258	/// map from an include file uri to all master uri that `put` it
259	pub put_map: HashMap<String, HashSet<String>>,
260	/// map from an include file uri to all master uri that `use` it
261	pub use_map: HashMap<String, HashSet<String>>,
262	/// set of uri that are included by another file
263	pub includes: HashSet<String>,
264	/// as of v4.1 this includes ENT, EXT, and EXD instances, and the decs, defs, refs lists
265    /// are populated differently.
266    pub entries: HashMap<String,Symbol>,
267    /// fraction of linker operations in a document
268    pub linker_frac: HashMap<String,f64>,
269    /// did REL appear in the file
270    pub rel_modules: HashSet<String>
271}
272
273/// This wraps the Tree-sitter parser, making use of the symbol
274/// information to distinguish implicit macro calls from operations
275/// or pseudo-operations with trailing characters.  This is needed
276/// to mimic the behavior of Merlin 8/16/16+.
277/// Merlin 32 syntax is further checked in diagnostic passes.
278pub struct MerlinParser {
279    parser: tree_sitter::Parser,
280    op_book: handbook::operations::OperationHandbook,
281    psop_book: handbook::pseudo_ops::PseudoOperationHandbook,
282    col: isize,
283    adj_line: String,
284    c2_regex: regex::Regex
285}
286
287impl fmt::Display for SourceType {
288    fn fmt(&self,f: &mut fmt::Formatter<'_>) -> fmt::Result {
289        match self {
290            Self::Master => write!(f,"master"),
291            Self::Module => write!(f,"module"),
292            Self::UseAndPut => write!(f,"USE/PUT include"),
293            Self::Use => write!(f,"USE include"),
294            Self::Put => write!(f,"PUT include"),
295            Self::Linker => write!(f,"linker"),
296            _ => write!(f,"unknown")
297        }
298    }
299}
300
301impl fmt::Display for MerlinVersion {
302    fn fmt(&self,f: &mut fmt::Formatter<'_>) -> fmt::Result {
303        match self {
304            Self::Merlin8 => write!(f,"Merlin 8"),
305            Self::Merlin16 => write!(f,"Merlin 16"),
306            Self::Merlin16Plus => write!(f,"Merlin 16+"),
307            Self::Merlin32 => write!(f,"Merlin 32")
308        }
309    }
310}
311
312impl fmt::Display for ProcessorType {
313    fn fmt(&self,f: &mut fmt::Formatter<'_>) -> fmt::Result {
314        match self {
315            Self::_6502 => write!(f,"6502"),
316            Self::_65c02 => write!(f,"65C02"),
317            Self::_65802 => write!(f,"65802"),
318            Self::_65c816 => write!(f,"65816")
319        }
320    }
321}
322
323impl Symbol {
324    pub fn new(name: &str) -> Self {
325        Self {
326            name: name.to_owned(),
327            flags: 0,
328            decs: Vec::new(),
329            defs: Vec::new(),
330            refs: Vec::new(),
331            fwd_refs: HashMap::new(),
332            value: None,
333            value_stack: Vec::new(),
334            children: HashMap::new(),
335            docstring: String::new(),
336            defining_code: None,
337            dependencies: HashSet::new(),
338            checkpoints: Vec::new()
339        }
340    }
341    /// create new symbol and add a node in one step
342    pub fn create(loc: lsp::Location, node: &tree_sitter::Node, source: &str) -> Self {
343        let mut ans = Self::new(&node_text(node, source));
344        ans.add_node(loc, node, source);
345        ans
346    }
347    /// Add a node to the symbol.  Do not use for workspace symbols.
348    /// The node can be `label_def`, `label_ref`, `macro_def`, `macro_ref`, or `var_mac`.
349    /// The latter can occur without a wrapper in some pseudo-ops such as `ASC`.
350    /// This will not create any overlaps between `refs`, `defs`, and `decs`.
351    pub fn add_node(&mut self, loc: lsp::Location, node: &tree_sitter::Node, _source: &str) {
352        if node.kind() == "var_mac" {
353            self.refs.push(loc);
354            self.flags |= symbol_flags::ARG | symbol_flags::VAR;
355            return;
356        }
357        let mut pushed_loc = false;
358        if let Some(parent) = node.parent() {
359            if let Some(grandparent) = parent.parent() {
360                if grandparent.kind() == "arg_jsr" {
361                    self.flags |= symbol_flags::SUB;
362                }
363            }
364            if parent.kind() == "arg_ent" {
365                self.flags |= symbol_flags::ENT;
366                self.decs.push(loc.clone());
367                pushed_loc = true;
368            }
369            if parent.kind() == "arg_ext" {
370                self.flags |= symbol_flags::EXT;
371                self.decs.push(loc.clone());
372                pushed_loc = true;
373            }
374            if parent.kind() == "arg_exd" {
375                self.flags |= symbol_flags::EXT;
376                self.decs.push(loc.clone());
377                pushed_loc = true;
378            }
379        }
380        if let Some(next) = node.next_named_sibling() {
381            if next.kind() == "psop_ent" {
382                self.flags |= symbol_flags::ENT;
383                self.defs.push(loc.clone());
384                pushed_loc = true;
385            }
386            if next.kind() == "psop_ext" {
387                self.flags |= symbol_flags::EXT;
388                self.decs.push(loc.clone());
389                pushed_loc = true;
390            }
391            if next.kind() == "psop_exd" {
392                self.flags |= symbol_flags::EXT;
393                self.decs.push(loc.clone());
394                pushed_loc = true;
395            }
396        }
397        if !pushed_loc {
398            match node.kind()  {
399                "label_def" => self.defs.push(loc),
400                "macro_def" => { self.defs.push(loc); self.flags |= symbol_flags::MAC },
401                "label_ref" => self.refs.push(loc),
402                "macro_ref" => { self.refs.push(loc); self.flags |= symbol_flags::MAC},
403                _ => {}
404            };
405        }
406        if let Some(child) = node.named_child(0) {
407            if child.kind() == "local_label" {
408                self.flags |= symbol_flags::LOC;
409            } else if child.kind() == "var_label" {
410                self.flags |= symbol_flags::VAR;
411            }
412        }
413    }
414    /// Add a node to the symbol with workspace rules.
415    /// This differs from add_node because it only expects ENT, EXT, or EXD labels,
416    /// and adds every ENT instance to the decs/defs list, and every EXT/EXD instance to the refs list.
417    /// In the workspace, both EXT and ENT flags can be set for the same symbol.
418    /// By contrast, add_node takes the module's point of view, where these are all decs or defs,
419    /// and the flags are expected to be mutually exclusive.
420    pub fn add_node_ws(&mut self, loc: lsp::Location, node: &tree_sitter::Node, _source: &str) {
421        if let Some(parent) = node.parent() {
422            if parent.kind() == "arg_ent" {
423                self.flags |= symbol_flags::ENT;
424                self.decs.push(loc.clone());
425            }
426            if parent.kind() == "arg_ext" {
427                self.flags |= symbol_flags::EXT;
428                self.refs.push(loc.clone());
429            }
430            if parent.kind() == "arg_exd" {
431                self.flags |= symbol_flags::EXT;
432                self.refs.push(loc.clone());
433            }
434        }
435        if let Some(next) = node.next_named_sibling() {
436            if next.kind() == "psop_ent" {
437                self.flags |= symbol_flags::ENT;
438                self.defs.push(loc.clone());
439            }
440            if next.kind() == "psop_ext" {
441                self.flags |= symbol_flags::EXT;
442                self.refs.push(loc.clone());
443            }
444            if next.kind() == "psop_exd" {
445                self.flags |= symbol_flags::EXT;
446                self.refs.push(loc.clone());
447            }
448        }
449    }
450    fn add_dependency(&mut self,label: &str) {
451        self.dependencies.insert(label.to_string());
452    }
453    fn dependencies(&self) -> &HashSet<String> {
454        &self.dependencies
455    }
456    /// Set symbol to its value just prior to the given line.
457    /// This can be used to rollback a variable for local analysis.
458    fn localize_value(&mut self,loc: &lsp::Location) {
459        let mut latest_val: Option<i64> = None;
460        for (prev_loc,val) in &self.checkpoints {
461            if prev_loc.uri == loc.uri {
462                if prev_loc.range.start.line >= loc.range.start.line {
463                    break;
464                } 
465                latest_val = *val;
466            }
467        }
468        self.value = latest_val;
469    }
470    /// set value of child nodes to None, does not affect duplicates that may be
471    /// defined in dependencies
472    fn unset_children(&mut self) {
473        for child in self.children.values_mut() {
474            child.value = None;
475        }
476    }
477}
478
479impl Symbols {
480    pub fn new() -> Self {
481        Self {
482            assembler: MerlinVersion::Merlin8,
483            processor: ProcessorType::_6502,
484            master_doc_uri: String::new(),
485            display_doc_uri: String::new(),
486            display_doc_type: SourceType::Master,
487            globals: HashMap::new(),
488            vars: HashMap::new(),
489            macros: HashMap::new(),
490            mx: Symbol::new("MX"),
491            alt_parser_lines: HashSet::new()
492        }
493    }
494    /// return strings to be displayed in the client's toolbar,
495    /// currently [master document , display document type]
496    pub fn toolbar_info(&self) -> Vec<String> {
497        let mut ans = Vec::new();
498        let mut master = "unknown".to_string();
499        // parse the display doc URI first so it is used if master does not parse
500        if let Ok(uri) = lsp::Uri::from_str(&self.display_doc_uri) {
501            if let Ok(path) = super::pathbuf_from_uri(&uri) {
502                if let Some(os) = path.file_name() {
503                    if let Some(s) = os.to_str() {
504                        master = s.to_string();
505                    }
506                }
507            }
508        };
509        if let Ok(uri) = lsp::Uri::from_str(&self.master_doc_uri) {
510            if let Ok(path) = super::pathbuf_from_uri(&uri) {
511                if let Some(os) = path.file_name() {
512                    if let Some(s) = os.to_str() {
513                        master = s.to_string();
514                    }
515                }
516            }
517        };
518        ans.push(master);
519        ans.push(self.display_doc_type.to_string());
520        ans
521    }
522    pub fn mac_defined(&self,txt: &str) -> bool {
523        match self.macros.get(txt) { Some(sym) => {
524            sym.defs.len() > 0
525        } _ => {
526            false
527        }}
528    }
529    pub fn mac_forward(&self,txt: &str,loc: &lsp::Location) -> bool {
530        match self.macros.get(txt) { Some(sym) => {
531            match sym.fwd_refs.get(loc) {
532                Some(fwd) => fwd.contains(&LabelType::Macro),
533                None => false
534            }
535        } _ => {
536            false
537        }}
538    }
539    pub fn global_declared_or_defined(&self,txt: &str) -> bool {
540        match self.globals.get(txt) { Some(sym) => {
541            sym.decs.len() + sym.defs.len() > 0
542        } _ => {
543            false
544        }}
545    }
546    pub fn global_declared(&self,txt: &str) -> bool {
547        match self.globals.get(txt) { Some(sym) => {
548            sym.decs.len() > 0
549        } _ => {
550            false
551        }}
552    }
553    pub fn global_defined(&self,txt: &str) -> bool {
554        match self.globals.get(txt) { Some(sym) => {
555            sym.defs.len() > 0
556        } _ => {
557            false
558        }}
559    }
560    pub fn global_forward(&self,txt: &str,loc: &lsp::Location) -> bool {
561        match self.globals.get(txt) { Some(sym) => {
562            match sym.fwd_refs.get(loc) {
563                Some(fwd) => fwd.contains(&LabelType::Global),
564                None => false
565            }
566        } _ => {
567            false
568        }}
569    }
570    pub fn var_defined(&self,txt: &str) -> bool {
571        match self.vars.get(txt) { Some(sym) => {
572            sym.defs.len() > 0
573        } _ => {
574            false
575        }}
576    }
577    pub fn var_forward(&self,txt: &str,loc: &lsp::Location) -> bool {
578        match self.vars.get(txt) { Some(sym) => {
579            match sym.fwd_refs.get(loc) {
580                Some(fwd) => fwd.contains(&LabelType::Variable),
581                None => false
582            }
583        } _ => {
584            false
585        }}
586    }
587    pub fn child_defined(&self,txt: &str,scope: &Symbol) -> bool {
588        if scope.flags & symbol_flags::MAC > 0 {
589            if let Ok(count) = self.count_macro_loc_definitions(scope, txt, 0, 15) {
590                return count > 0;
591            }
592        }
593        match scope.children.get(txt) { Some(sym) => {
594            sym.defs.len() > 0
595        } _ => {
596            false
597        }}
598    }
599    pub fn child_forward(&self,txt: &str,scope: &Symbol, loc: &lsp::Location) -> bool {
600        match scope.children.get(txt) { Some(sym) => {
601            match sym.fwd_refs.get(loc) {
602                Some(fwd) => fwd.contains(&LabelType::Local) || fwd.contains(&LabelType::MacroLocal),
603                None => false
604            }
605        } _ => {
606            false
607        }}
608    }
609    /// should only be used if symbols have been updated
610    pub fn adjust_line(&self,row: isize,line: &str,term: &str) -> String {
611        let prefix = match self.alt_parser_lines.contains(&row) {
612            true => CALL_TOK.to_string(),
613            false => "".to_string()
614        };
615        [&prefix,line,term].concat()
616    }
617    /// use this during analysis to save the rows that need the parser hint
618    pub fn update_row_data(&mut self,doc: &super::Document,row: isize,col: isize) {
619        if col<0 && doc.uri.to_string()==self.display_doc_uri {
620            self.alt_parser_lines.insert(row);
621        }
622    }
623    /// recursively check to see if this label is a dependency of the symbol
624    fn is_dependency(&self,label: &str,sym: &Symbol,curr_depth: usize,max_depth: usize) -> Result<bool,crate::DYNERR> {
625        if curr_depth > max_depth {
626            return Err(Box::new(assembly::Error::Nesting));
627        }
628        for m2 in sym.dependencies() {
629            if m2 == label {
630                log::debug!("indirect reference to {}",label);
631                log::debug!("    from {}",&sym.name);
632                return Ok(true);
633            }
634            if let Some(dep) = self.macros.get(m2) {
635                if self.is_dependency(label, dep,curr_depth+1,max_depth)? {
636                    log::debug!("    from {}",&sym.name);
637                    return Ok(true);
638                }
639            }
640        }
641        Ok(false)
642    }
643    /// test whether any label is referenced or an entry
644    fn is_label_referenced_or_ent(&self,label: &str,scope: Option<&Symbol>) -> bool {
645        let maybe_sym =  match label.get(0..1) {
646            Some(":") => match scope {
647                Some(parent) => parent.children.get(label),
648                None => None
649            },
650            Some("]") => self.vars.get(label),
651            Some(_) => self.globals.get(label),
652            None => None
653        };
654        match maybe_sym {
655            Some(sym) => sym.refs.len() > 0 || sym.flags & symbol_flags::ENT > 0,
656            None => false
657        }
658    }
659    /// test whether a macro is ever referenced, even indirectly
660    fn is_macro_referenced(&self,label: &str, max_depth: usize) -> Result<bool,crate::DYNERR> {
661        // first see if it is directly referenced, if yes we are done
662        if let Some(sym) = self.macros.get(label) {
663            if sym.refs.len() > 0 {
664                return Ok(true);
665            }
666        }
667        for sym in self.macros.values() {
668            if sym.refs.len() > 0 {
669                if self.is_dependency(label, sym,0,max_depth)? {
670                    return Ok(true);
671                }
672            }
673        }
674        Ok(false)
675    }
676    /// how many times is the label defined in this macro and its dependencies
677    fn count_macro_loc_definitions(&self,mac: &Symbol, label: &str, curr_depth: usize, max_depth: usize) -> Result<usize,crate::DYNERR> {
678        let mut count: usize = 0;
679        if curr_depth > max_depth {
680            return Err(Box::new(assembly::Error::Nesting));
681        }
682        if let Some(child) = mac.children.get(label) {
683            count += child.defs.len();
684        }
685        for m2 in mac.dependencies() {
686            if let Some(sym) = self.macros.get(m2) {
687                count += self.count_macro_loc_definitions(sym,label,curr_depth+1,max_depth)?;
688            }
689        }
690        Ok(count)
691    }
692    /// this can be called as a macro definition is closed to get a list of duplicates
693    fn detect_all_duplicates_in_macro(&self,mac: &Symbol) -> Result<Option<String>,crate::DYNERR> {
694        let mut ans = String::new();
695        for label in mac.children.keys() {
696            if self.count_macro_loc_definitions(mac, label, 0, 15)? > 1 {
697                ans += &label;
698                ans += ",";
699            }
700        }
701        if ans.len() > 0 {
702            ans.pop();
703            Ok(Some(ans))
704        } else {
705            Ok(None)
706        }
707    }
708    /// Set variables to value at the given location.
709    /// The analyzer's first pass establishes the values.
710    fn localize_all_variables(&mut self,loc: &lsp::Location) {
711        for var in self.vars.values_mut() {
712            var.localize_value(loc);
713        }
714        self.mx.localize_value(loc);
715    }
716    /// Clear the current value of all variables, the variables themselves remain.
717    fn unset_all_variables(&mut self) {
718        for var in self.vars.values_mut() {
719            var.value = None;
720        }
721        self.mx.value = None;
722    }
723    /// Save the current value of all variables onto a stack.
724    fn stash_all_variables(&mut self) {
725        for var in self.vars.values_mut() {
726            var.value_stack.push(var.value);
727        }
728        self.mx.value_stack.push(self.mx.value);
729    }
730    /// Restore the current values of all variables from a stack.
731    fn restore_all_variables(&mut self) {
732        for var in self.vars.values_mut() {
733            if let Some(v) = var.value_stack.pop() {
734                var.value = v;
735            }
736        }
737        if let Some(v) = self.mx.value_stack.pop() {
738            self.mx.value = v;
739        }
740    }
741    /// Checkpoint variables at the given location.
742    /// Locations must be visited in order, and only during the first pass.
743    /// Typical use is immediately after exiting a source scope.
744    fn checkpoint_all_variables(&mut self,loc: &lsp::Location) {
745        for var in self.vars.values_mut() {
746            var.checkpoints.push((loc.clone(),var.value));
747        }
748        self.mx.checkpoints.push((loc.clone(),self.mx.value));
749    }
750}
751
752impl MerlinParser {
753    pub fn new() -> Self {
754        let mut parser = tree_sitter::Parser::new();
755        parser.set_language(&tree_sitter_merlin6502::LANGUAGE.into()).expect(RCH);
756        Self {
757            parser,
758            op_book: handbook::operations::OperationHandbook::new(),
759            psop_book: handbook::pseudo_ops::PseudoOperationHandbook::new(),
760            col: 0,
761            adj_line: String::new(),
762            c2_regex: regex::Regex::new(r"\s+\S+").expect(RCH)
763        }
764    }
765    /// This adjusts the parsing precedence rules to emulate Merlin's behavior.
766    /// The underlying parser requires a hint to do this because it depends on the document
767    /// symbols, the Merlin version, and the processor target.
768    /// This issue centers around the fact that Merlin 8/16/16+ will accept a "weak match"
769    /// to an operation or psuedo-operation mnemonic.
770    fn adjust_line(&mut self, old_line: &str, symbols: &Symbols) -> String {
771        self.col = 0;
772        if old_line.starts_with(CALL_TOK) {
773            self.col = -(CALL_TOK.len_utf8() as isize); // LSP position encoding assumption
774            return old_line.to_string();
775        }
776        if old_line.starts_with("*") || old_line.starts_with(";") {
777            return old_line.to_string();
778        }
779        let prefix = match self.c2_regex.find(old_line) {
780            Some(sep_c2) => {
781                let c2 = sep_c2.as_str().trim();
782                if c2.starts_with(";") || 
783                    self.op_book.strong_match(c2,&symbols.processor) ||
784                    self.psop_book.strong_match(c2,&symbols.assembler) {
785                    "".to_string() // always take strong match as is
786                } else if symbols.mac_defined(c2) {
787                    CALL_TOK.to_string() // no strong match, so if macro is defined take it
788                } else if self.op_book.weak_match(&c2.to_lowercase(),&symbols.processor) ||
789                    self.psop_book.weak_match(&c2.to_lowercase(),&symbols.assembler) {
790                    "".to_string() // weak match, take it
791                } else {
792                    // At this point the parser could still weak match a disabled mnemonic.
793                    // Our decision is to interpret it as a macro call.
794                    CALL_TOK.to_string()
795                }
796            }
797            None => "".to_string()
798        };
799        self.col = -(prefix.len() as isize); // LSP position encoding assumption
800        prefix + old_line
801    }
802    /// column offset to apply to squiggles or highlights, should be 0 or -2 for default position encoding
803    pub fn col_offset(&self) -> isize {
804        self.col
805    }
806    /// borrow the adjusted line, may have a parsing hint prefix, ends with LF
807    pub fn line(&self) -> &str {
808        &self.adj_line
809    }
810    /// adjust the line with possible parsing hints, then parse.
811    pub fn parse(&mut self, line: &str, symbols: &Symbols) -> Result<tree_sitter::Tree,crate::DYNERR> {
812        self.adj_line = self.adjust_line(line,symbols);
813        if !self.adj_line.ends_with("\n") {
814            self.adj_line += "\n";
815        }
816        if let Some(tree) = self.parser.parse(&self.adj_line,None) {
817            return Ok(tree);
818        }
819        Err(Box::new(super::Error::Syntax))
820    }
821}