Skip to main content

kaccy_bitcoin/
output_descriptor.rs

1//! BIP 380-386 Output Descriptor parsing, validation, and analysis.
2//!
3//! Output descriptors describe how to derive Bitcoin scripts and addresses.
4//! This module provides a parser, validator, and utilities for working with
5//! the full range of output descriptor types defined by BIPs 380-386.
6//!
7//! # Supported Descriptor Types
8//! - `pk(KEY)` — bare pubkey (BIP 380)
9//! - `pkh(KEY)` — P2PKH (BIP 382)
10//! - `wpkh(KEY)` — P2WPKH (BIP 381/382)
11//! - `sh(SCRIPT)` — P2SH wrapper (BIP 383)
12//! - `wsh(SCRIPT)` — P2WSH wrapper (BIP 381)
13//! - `combo(KEY)` — multiple script types from one key (BIP 384)
14//! - `addr(ADDR)` — raw address (BIP 385)
15//! - `raw(HEX)` — raw script (BIP 385)
16//! - `tr(KEY)` and `tr(KEY, TREE)` — Taproot (BIP 386)
17//! - `multi(K, KEY...)` and `sortedmulti(K, KEY...)` — multisig (BIP 383)
18//!
19//! # Example
20//!
21//! ```rust
22//! use kaccy_bitcoin::output_descriptor::DescriptorParser;
23//!
24//! let desc = "pkh(02c6047f9441ed7d6d3045406e95c07cd85c778e4b8cef3ca7abac09b95c709ee5)";
25//! let parsed = DescriptorParser::parse(desc).unwrap();
26//! assert_eq!(parsed.descriptor_type(), "pkh");
27//! assert!(!parsed.is_taproot());
28//! ```
29
30/// Error types for output descriptor parsing and validation.
31#[derive(Debug, thiserror::Error)]
32pub enum DescriptorError {
33    /// Descriptor has invalid syntax.
34    #[error("Invalid descriptor syntax: {0}")]
35    InvalidSyntax(String),
36    /// Unknown or unsupported descriptor function type.
37    #[error("Unknown descriptor type: {0}")]
38    UnknownType(String),
39    /// Key expression is malformed or invalid.
40    #[error("Invalid key expression: {0}")]
41    InvalidKey(String),
42    /// Multisig threshold exceeds the number of keys.
43    #[error("Invalid threshold: got {got}, max {max}")]
44    InvalidThreshold {
45        /// The threshold value specified in the descriptor.
46        got: usize,
47        /// The maximum allowed threshold (equal to the number of keys).
48        max: usize,
49    },
50    /// Descriptor is missing its trailing `#checksum`.
51    #[error("Missing checksum")]
52    MissingChecksum,
53    /// Descriptor checksum does not match the computed value.
54    #[error("Invalid checksum: expected {expected}, got {got}")]
55    InvalidChecksum {
56        /// The checksum appended to the descriptor string.
57        expected: String,
58        /// The checksum computed from the descriptor body.
59        got: String,
60    },
61    /// Descriptor nesting depth exceeds the limit (8).
62    #[error("Nested depth exceeded: {0}")]
63    DepthExceeded(usize),
64    /// Address string inside `addr()` is not valid.
65    #[error("Invalid address: {0}")]
66    InvalidAddress(String),
67    /// Empty input string.
68    #[error("Empty descriptor")]
69    Empty,
70}
71
72// ─── BIP 380 checksum ────────────────────────────────────────────────────────
73
74/// Input charset used by the BIP 380 descriptor checksum algorithm (95 chars).
75const INPUT_CHARSET: &str =
76    "0123456789()[],'/*abcdefgh@:$%{}IJKLMNOPQRSTUVWXYZ&+-.;<=>?!^_|~ijklmnopqrstuvwxyzABCDEFGH#`";
77
78/// Output charset for the 8-character checksum (32 chars, BCH-like).
79const CHECKSUM_CHARSET: &str = "qpzry9x8gf2tvdw0s3jn54khce6mua7l";
80
81/// GF(32) polynomial step used in the BIP 380 polymod calculation.
82///
83/// Matches Bitcoin Core `DescriptorChecksum`'s generator polynomial.
84fn descriptor_polymod(mut c: u64, val: u64) -> u64 {
85    let c0 = c >> 35;
86    c = ((c & 0x7_ffff_ffff) << 5) ^ val;
87    if c0 & 1 != 0 {
88        c ^= 0xf5dee51989;
89    }
90    if c0 & 2 != 0 {
91        c ^= 0xa9fdca3312;
92    }
93    if c0 & 4 != 0 {
94        c ^= 0x1bab10e32d;
95    }
96    if c0 & 8 != 0 {
97        c ^= 0x3706b1677a;
98    }
99    if c0 & 16 != 0 {
100        c ^= 0x644d626ffd;
101    }
102    c
103}
104
105// ─── Key types ───────────────────────────────────────────────────────────────
106
107/// Classification of the key material inside a descriptor key expression.
108#[derive(Debug, Clone, PartialEq, Eq)]
109pub enum DescriptorKeyType {
110    /// Raw compressed public key (66 hex characters).
111    RawPubkey,
112    /// Extended public key with version bytes (xpub/tpub/ypub/zpub …).
113    ExtendedPubkey,
114    /// Extended private key with version bytes (xprv/tprv …).
115    ExtendedPrivkey,
116    /// WIF-encoded private key.
117    Wif,
118}
119
120/// A single key expression in an output descriptor.
121///
122/// Covers bare keys, xpubs with optional origin and child paths, and
123/// wildcard (ranged) derivation.
124#[derive(Debug, Clone, PartialEq, Eq)]
125pub struct DescriptorKey {
126    /// The raw key token as it appears in the descriptor (after origin prefix).
127    pub key_str: String,
128    /// How to interpret `key_str`.
129    pub key_type: DescriptorKeyType,
130    /// 4-byte master fingerprint in hex (8 chars) when an origin is present.
131    pub fingerprint: Option<String>,
132    /// Derivation path from the master key to the xpub/xprv (origin path).
133    pub origin_path: Option<String>,
134    /// Child derivation path appended after the key (e.g. `/0/*`).
135    pub child_path: Option<String>,
136    /// Whether the child path contains a `*` wildcard.
137    pub is_ranged: bool,
138    /// Whether the wildcard is a hardened range (`*h` / `*'`).
139    pub is_hardened_range: bool,
140}
141
142impl DescriptorKey {
143    /// Parse a key expression string, which may include an origin prefix
144    /// `[fingerprint/path]` and a child path suffix `/0/*`.
145    ///
146    /// # Errors
147    ///
148    /// Returns [`DescriptorError::InvalidKey`] when the string is malformed.
149    pub fn parse(s: &str) -> Result<Self, DescriptorError> {
150        let s = s.trim();
151        if s.is_empty() {
152            return Err(DescriptorError::InvalidKey("empty key expression".into()));
153        }
154
155        let mut fingerprint: Option<String> = None;
156        let mut origin_path: Option<String> = None;
157        let remainder: &str;
158
159        // Consume optional origin: `[fingerprint/path]`
160        if s.starts_with('[') {
161            let close = s
162                .find(']')
163                .ok_or_else(|| DescriptorError::InvalidKey("unclosed '[' in origin".into()))?;
164            let origin_inner = &s[1..close];
165            remainder = &s[close + 1..];
166
167            // origin_inner is "fingerprint" or "fingerprint/path"
168            let slash_pos = origin_inner.find('/');
169            if let Some(pos) = slash_pos {
170                fingerprint = Some(origin_inner[..pos].to_string());
171                origin_path = Some(origin_inner[pos + 1..].to_string());
172            } else {
173                fingerprint = Some(origin_inner.to_string());
174            }
175        } else {
176            remainder = s;
177        }
178
179        // Split remainder into key token and optional child path
180        // The key token is everything up to the first '/' that comes after
181        // the base58-like key body.  We locate the child path boundary.
182        let (key_token, child_path_str) = split_key_and_child_path(remainder);
183
184        let child_path = if child_path_str.is_empty() {
185            None
186        } else {
187            Some(child_path_str.to_string())
188        };
189
190        let is_ranged = child_path
191            .as_deref()
192            .map(|p| p.contains('*'))
193            .unwrap_or(false);
194        let is_hardened_range = child_path
195            .as_deref()
196            .map(|p| p.contains("*h") || p.contains("*'"))
197            .unwrap_or(false);
198
199        let key_type = classify_key(key_token)?;
200
201        Ok(DescriptorKey {
202            key_str: key_token.to_string(),
203            key_type,
204            fingerprint,
205            origin_path,
206            child_path,
207            is_ranged,
208            is_hardened_range,
209        })
210    }
211
212    /// Reconstruct the original string representation of this key expression.
213    pub fn to_string_repr(&self) -> String {
214        let mut out = String::new();
215        if let (Some(fp), Some(op)) = (&self.fingerprint, &self.origin_path) {
216            out.push_str(&format!("[{}/{}]", fp, op));
217        } else if let Some(fp) = &self.fingerprint {
218            out.push_str(&format!("[{}]", fp));
219        }
220        out.push_str(&self.key_str);
221        if let Some(cp) = &self.child_path {
222            out.push('/');
223            out.push_str(cp);
224        }
225        out
226    }
227
228    /// Returns `true` if this is an extended public key (xpub family).
229    pub fn is_xpub(&self) -> bool {
230        matches!(self.key_type, DescriptorKeyType::ExtendedPubkey)
231    }
232}
233
234/// Splits a key+child-path string into `(key_token, child_path)`.
235///
236/// For bare pubkeys the entire string is the key token.
237/// For extended keys the token ends at the start of the child path, which
238/// begins at the *first* `/` after the base58 body of the key.
239fn split_key_and_child_path(s: &str) -> (&str, &str) {
240    // Extended keys start with 'x', 't', 'y', 'z', etc. and use base58.
241    // They don't contain '/'.  If the string contains '/' it must mark
242    // the beginning of the child derivation path.
243    if let Some(pos) = s.find('/') {
244        (&s[..pos], &s[pos + 1..])
245    } else {
246        (s, "")
247    }
248}
249
250/// Classify a raw key token into a [`DescriptorKeyType`].
251fn classify_key(token: &str) -> Result<DescriptorKeyType, DescriptorError> {
252    if token.is_empty() {
253        return Err(DescriptorError::InvalidKey("empty key token".into()));
254    }
255
256    // Extended private keys
257    if token.starts_with("xprv")
258        || token.starts_with("tprv")
259        || token.starts_with("yprv")
260        || token.starts_with("zprv")
261    {
262        return Ok(DescriptorKeyType::ExtendedPrivkey);
263    }
264
265    // Extended public keys
266    if token.starts_with("xpub")
267        || token.starts_with("tpub")
268        || token.starts_with("ypub")
269        || token.starts_with("zpub")
270        || token.starts_with("Ypub")
271        || token.starts_with("Zpub")
272        || token.starts_with("Xpub")
273        || token.starts_with("Vpub")
274        || token.starts_with("Upub")
275    {
276        return Ok(DescriptorKeyType::ExtendedPubkey);
277    }
278
279    // Raw compressed public key: 66 hex chars starting with 02 or 03
280    if (token.starts_with("02") || token.starts_with("03"))
281        && token.len() == 66
282        && token.chars().all(|c| c.is_ascii_hexdigit())
283    {
284        return Ok(DescriptorKeyType::RawPubkey);
285    }
286
287    // WIF: starts with 5, K, or L (mainnet) or c (testnet compressed)
288    if token.starts_with('5')
289        || token.starts_with('K')
290        || token.starts_with('L')
291        || token.starts_with('c')
292    {
293        // Plausible WIF length range: 51-52 chars
294        if token.len() >= 51 && token.len() <= 52 {
295            return Ok(DescriptorKeyType::Wif);
296        }
297    }
298
299    // Fall back: treat anything that looks like base58 as an extended key
300    if token
301        .chars()
302        .all(|c| c.is_alphanumeric() || c == '+' || c == '/' || c == '=')
303    {
304        return Ok(DescriptorKeyType::ExtendedPubkey);
305    }
306
307    Err(DescriptorError::InvalidKey(format!(
308        "unrecognised key token: '{}'",
309        token
310    )))
311}
312
313// ─── Script tree ─────────────────────────────────────────────────────────────
314
315/// A node in the Taproot script tree.
316///
317/// See BIP 386 for the tree notation.
318#[derive(Debug, Clone)]
319pub enum DescriptorTree {
320    /// A leaf script with a specific script version and body.
321    Leaf {
322        /// Tapscript version byte (0xc0 for normal tapscript).
323        version: u8,
324        /// The script expression at this leaf.
325        script: Box<DescriptorScript>,
326    },
327    /// A branch combining two sub-trees.
328    Branch(Box<DescriptorTree>, Box<DescriptorTree>),
329}
330
331// ─── Descriptor script ───────────────────────────────────────────────────────
332
333/// The parsed inner expression of an output descriptor.
334///
335/// This enum models all descriptor types defined in BIPs 380-386.
336#[derive(Debug, Clone)]
337pub enum DescriptorScript {
338    /// `pk(KEY)` — bare pubkey output (BIP 380).
339    Key(DescriptorKey),
340    /// `pkh(KEY)` — P2PKH (BIP 382).
341    Pkh(DescriptorKey),
342    /// `wpkh(KEY)` — P2WPKH (BIP 381/382).
343    Wpkh(DescriptorKey),
344    /// `sh(SCRIPT)` — P2SH wrapper (BIP 383).
345    Sh(Box<DescriptorScript>),
346    /// `wsh(SCRIPT)` — P2WSH wrapper (BIP 381).
347    Wsh(Box<DescriptorScript>),
348    /// `combo(KEY)` — pk + pkh + if-segwit wpkh + p2sh-wpkh (BIP 384).
349    Combo(DescriptorKey),
350    /// `multi(K, KEY…)` — bare multisig (BIP 383).
351    Multi {
352        /// The required-signature threshold K.
353        threshold: usize,
354        /// The N keys.
355        keys: Vec<DescriptorKey>,
356    },
357    /// `sortedmulti(K, KEY…)` — multisig with sorted keys (BIP 383).
358    SortedMulti {
359        /// The required-signature threshold K.
360        threshold: usize,
361        /// The N keys (sorted at script generation time).
362        keys: Vec<DescriptorKey>,
363    },
364    /// `addr(ADDRESS)` — raw address descriptor (BIP 385).
365    Addr(String),
366    /// `raw(HEX)` — raw script hex (BIP 385).
367    Raw(String),
368    /// `tr(KEY)` or `tr(KEY, TREE)` — Taproot (BIP 386).
369    Tr {
370        /// Taproot internal key.
371        internal_key: DescriptorKey,
372        /// Optional script-path tree.
373        tree: Option<Box<DescriptorTree>>,
374    },
375}
376
377impl DescriptorScript {
378    /// Return a short string naming the top-level function.
379    pub fn type_name(&self) -> &str {
380        match self {
381            DescriptorScript::Key(_) => "pk",
382            DescriptorScript::Pkh(_) => "pkh",
383            DescriptorScript::Wpkh(_) => "wpkh",
384            DescriptorScript::Sh(_) => "sh",
385            DescriptorScript::Wsh(_) => "wsh",
386            DescriptorScript::Combo(_) => "combo",
387            DescriptorScript::Multi { .. } => "multi",
388            DescriptorScript::SortedMulti { .. } => "sortedmulti",
389            DescriptorScript::Addr(_) => "addr",
390            DescriptorScript::Raw(_) => "raw",
391            DescriptorScript::Tr { .. } => "tr",
392        }
393    }
394
395    /// Collect all [`DescriptorKey`] references in this script recursively.
396    pub fn keys(&self) -> Vec<&DescriptorKey> {
397        match self {
398            DescriptorScript::Key(k)
399            | DescriptorScript::Pkh(k)
400            | DescriptorScript::Wpkh(k)
401            | DescriptorScript::Combo(k) => vec![k],
402            DescriptorScript::Sh(inner) | DescriptorScript::Wsh(inner) => inner.keys(),
403            DescriptorScript::Multi { keys, .. } | DescriptorScript::SortedMulti { keys, .. } => {
404                keys.iter().collect()
405            }
406            DescriptorScript::Addr(_) | DescriptorScript::Raw(_) => vec![],
407            DescriptorScript::Tr { internal_key, tree } => {
408                let mut v = vec![internal_key];
409                if let Some(t) = tree {
410                    v.extend(collect_tree_keys(t));
411                }
412                v
413            }
414        }
415    }
416
417    /// Returns `true` when at least one key path in this script contains a
418    /// derivation wildcard (`*`).
419    pub fn is_ranged(&self) -> bool {
420        self.keys().iter().any(|k| k.is_ranged)
421    }
422}
423
424/// Recursively collect all keys from a [`DescriptorTree`].
425fn collect_tree_keys(tree: &DescriptorTree) -> Vec<&DescriptorKey> {
426    match tree {
427        DescriptorTree::Leaf { script, .. } => script.keys(),
428        DescriptorTree::Branch(left, right) => {
429            let mut v = collect_tree_keys(left);
430            v.extend(collect_tree_keys(right));
431            v
432        }
433    }
434}
435
436// ─── Parsed descriptor ───────────────────────────────────────────────────────
437
438/// A fully parsed output descriptor, including optional checksum.
439///
440/// Obtain one via [`DescriptorParser::parse`] or the convenience
441/// `ParsedDescriptor::parse` method.
442#[derive(Debug, Clone)]
443pub struct ParsedDescriptor {
444    /// The original descriptor string (with checksum if provided).
445    pub raw: String,
446    /// The parsed script expression.
447    pub script: DescriptorScript,
448    /// The 8-character checksum following `#`, if present.
449    pub checksum: Option<String>,
450    /// Whether any key in the descriptor is ranged (contains `*`).
451    pub is_ranged: bool,
452}
453
454impl ParsedDescriptor {
455    /// Parse a descriptor string.
456    ///
457    /// # Errors
458    ///
459    /// Returns a [`DescriptorError`] when the input is malformed.
460    pub fn parse(s: &str) -> Result<Self, DescriptorError> {
461        DescriptorParser::parse(s)
462    }
463
464    /// Return the top-level descriptor function name.
465    ///
466    /// Examples: `"pkh"`, `"wpkh"`, `"tr"`, `"multi"`.
467    pub fn descriptor_type(&self) -> &str {
468        self.script.type_name()
469    }
470
471    /// Count the total number of keys referenced in this descriptor.
472    pub fn key_count(&self) -> usize {
473        self.extract_keys().len()
474    }
475
476    /// Returns `true` if this is a Taproot (`tr(…)`) descriptor.
477    pub fn is_taproot(&self) -> bool {
478        matches!(self.script, DescriptorScript::Tr { .. })
479    }
480
481    /// Returns `true` if this descriptor produces a SegWit output
482    /// (wpkh, wsh, or tr).
483    pub fn is_segwit(&self) -> bool {
484        matches!(
485            self.script,
486            DescriptorScript::Wpkh(_) | DescriptorScript::Wsh(_) | DescriptorScript::Tr { .. }
487        )
488    }
489
490    /// Returns `true` if this is a multisig descriptor (multi or sortedmulti).
491    pub fn is_multisig(&self) -> bool {
492        matches!(
493            self.script,
494            DescriptorScript::Multi { .. } | DescriptorScript::SortedMulti { .. }
495        )
496    }
497
498    /// Return the threshold K for multisig descriptors, or `None` otherwise.
499    pub fn threshold(&self) -> Option<usize> {
500        match &self.script {
501            DescriptorScript::Multi { threshold, .. }
502            | DescriptorScript::SortedMulti { threshold, .. } => Some(*threshold),
503            _ => None,
504        }
505    }
506
507    /// Whether this descriptor uses derivation wildcards.
508    pub fn is_ranged(&self) -> bool {
509        self.is_ranged
510    }
511
512    /// Collect references to all [`DescriptorKey`]s in this descriptor.
513    pub fn extract_keys(&self) -> Vec<&DescriptorKey> {
514        self.script.keys()
515    }
516}
517
518// ─── Parser ──────────────────────────────────────────────────────────────────
519
520/// A stateless parser for Bitcoin output descriptors (BIPs 380-386).
521///
522/// Entry points:
523/// - [`DescriptorParser::parse`] — parse a descriptor string.
524/// - [`DescriptorParser::validate_checksum`] — verify the `#checksum` suffix.
525/// - [`DescriptorParser::compute_checksum`] — compute the BIP 380 checksum.
526/// - [`DescriptorParser::strip_checksum`] — remove `#checksum` from a string.
527pub struct DescriptorParser;
528
529impl DescriptorParser {
530    /// Parse a descriptor string, with or without a trailing `#checksum`.
531    ///
532    /// # Errors
533    ///
534    /// Returns [`DescriptorError::Empty`] for an empty input, or various
535    /// parse errors for malformed input.
536    pub fn parse(descriptor: &str) -> Result<ParsedDescriptor, DescriptorError> {
537        let descriptor = descriptor.trim();
538        if descriptor.is_empty() {
539            return Err(DescriptorError::Empty);
540        }
541
542        let (desc_part, checksum) = Self::split_checksum(descriptor);
543
544        let script = parse_script(desc_part, 0)?;
545        let is_ranged = script.is_ranged();
546
547        Ok(ParsedDescriptor {
548            raw: descriptor.to_string(),
549            script,
550            checksum: checksum.map(|s| s.to_string()),
551            is_ranged,
552        })
553    }
554
555    /// Validate the 8-character `#checksum` appended to a descriptor.
556    ///
557    /// # Errors
558    ///
559    /// Returns [`DescriptorError::MissingChecksum`] when no `#` is present,
560    /// or [`DescriptorError::InvalidChecksum`] when the checksum is wrong.
561    pub fn validate_checksum(descriptor: &str) -> Result<(), DescriptorError> {
562        let (desc_part, checksum) = Self::split_checksum(descriptor);
563        let checksum = checksum.ok_or(DescriptorError::MissingChecksum)?;
564        let computed = Self::compute_checksum(desc_part);
565        if computed == checksum {
566            Ok(())
567        } else {
568            Err(DescriptorError::InvalidChecksum {
569                expected: checksum.to_string(),
570                got: computed,
571            })
572        }
573    }
574
575    /// Return the descriptor string with any `#checksum` suffix removed.
576    pub fn strip_checksum(descriptor: &str) -> &str {
577        let (desc_part, _) = Self::split_checksum(descriptor);
578        desc_part
579    }
580
581    /// Compute the 8-character BIP 380 descriptor checksum.
582    ///
583    /// The algorithm is a BCH code over GF(32) using the
584    /// `INPUT_CHARSET` / `CHECKSUM_CHARSET` defined by BIP 380.
585    pub fn compute_checksum(descriptor: &str) -> String {
586        let input_charset_chars: Vec<char> = INPUT_CHARSET.chars().collect();
587        let checksum_chars: Vec<char> = CHECKSUM_CHARSET.chars().collect();
588
589        let mut c: u64 = 1;
590        let mut cls: u64 = 0;
591        let mut clscount: u32 = 0;
592
593        for ch in descriptor.chars() {
594            let pos = input_charset_chars.iter().position(|&x| x == ch);
595            let pos = match pos {
596                Some(p) => p as u64,
597                // Characters not in INPUT_CHARSET produce a non-matching
598                // checksum; feed a sentinel value that pollutes the polymod.
599                None => {
600                    c = descriptor_polymod(c, 0x7f);
601                    continue;
602                }
603            };
604            // Feed low 5 bits of the class-3 group
605            c = descriptor_polymod(c, pos & 31);
606            cls = cls * 3 + (pos >> 5);
607            clscount += 1;
608            if clscount == 3 {
609                c = descriptor_polymod(c, cls);
610                cls = 0;
611                clscount = 0;
612            }
613        }
614        // Flush any remaining class bits
615        if clscount > 0 {
616            c = descriptor_polymod(c, cls);
617        }
618        // 8 rounds of polymod(0) to "finalise"
619        for _ in 0..8 {
620            c = descriptor_polymod(c, 0);
621        }
622        c ^= 1;
623
624        // Extract 8 × 5-bit groups from the 40-bit result
625        let mut result = String::with_capacity(8);
626        for i in (0..8).rev() {
627            let idx = ((c >> (5 * i)) & 31) as usize;
628            result.push(checksum_chars[idx]);
629        }
630        result
631    }
632
633    // ── helpers ──────────────────────────────────────────────────────────────
634
635    /// Split a descriptor into `(body, Option<checksum>)`.
636    fn split_checksum(s: &str) -> (&str, Option<&str>) {
637        if let Some(pos) = s.rfind('#') {
638            (&s[..pos], Some(&s[pos + 1..]))
639        } else {
640            (s, None)
641        }
642    }
643}
644
645// ─── Recursive-descent parser internals ──────────────────────────────────────
646
647/// Maximum nesting depth (prevents stack overflow on adversarial input).
648const MAX_DEPTH: usize = 8;
649
650/// Parse a descriptor expression starting at depth `depth`.
651///
652/// `s` should be the part of the descriptor *without* the `#checksum` suffix.
653fn parse_script(s: &str, depth: usize) -> Result<DescriptorScript, DescriptorError> {
654    if depth > MAX_DEPTH {
655        return Err(DescriptorError::DepthExceeded(depth));
656    }
657    let s = s.trim();
658    if s.is_empty() {
659        return Err(DescriptorError::InvalidSyntax("empty expression".into()));
660    }
661
662    // Find the opening parenthesis to separate function name from argument.
663    let paren_open = s.find('(').ok_or_else(|| {
664        // Could be just a key expression at the top level — but the BIP
665        // requires a function wrapper at the top level.
666        DescriptorError::InvalidSyntax(format!("expected '(' in '{}'", s))
667    })?;
668
669    let func_name = &s[..paren_open];
670
671    // Verify the string ends with ')'
672    if !s.ends_with(')') {
673        return Err(DescriptorError::InvalidSyntax(format!(
674            "missing closing ')' in '{}'",
675            s
676        )));
677    }
678    let inner = &s[paren_open + 1..s.len() - 1];
679
680    match func_name {
681        "pk" => {
682            let key = DescriptorKey::parse(inner)?;
683            Ok(DescriptorScript::Key(key))
684        }
685        "pkh" => {
686            let key = DescriptorKey::parse(inner)?;
687            Ok(DescriptorScript::Pkh(key))
688        }
689        "wpkh" => {
690            let key = DescriptorKey::parse(inner)?;
691            Ok(DescriptorScript::Wpkh(key))
692        }
693        "combo" => {
694            let key = DescriptorKey::parse(inner)?;
695            Ok(DescriptorScript::Combo(key))
696        }
697        "sh" => {
698            let inner_script = parse_script(inner, depth + 1)?;
699            Ok(DescriptorScript::Sh(Box::new(inner_script)))
700        }
701        "wsh" => {
702            let inner_script = parse_script(inner, depth + 1)?;
703            Ok(DescriptorScript::Wsh(Box::new(inner_script)))
704        }
705        "addr" => {
706            if inner.is_empty() {
707                return Err(DescriptorError::InvalidAddress("empty address".into()));
708            }
709            Ok(DescriptorScript::Addr(inner.to_string()))
710        }
711        "raw" => Ok(DescriptorScript::Raw(inner.to_string())),
712        "multi" | "sortedmulti" => parse_multisig(inner, func_name == "sortedmulti"),
713        "tr" => parse_taproot(inner, depth),
714        other => Err(DescriptorError::UnknownType(other.to_string())),
715    }
716}
717
718/// Parse `K,KEY1,KEY2,...` from the body of a multi/sortedmulti expression.
719fn parse_multisig(inner: &str, sorted: bool) -> Result<DescriptorScript, DescriptorError> {
720    // Split on top-level commas (no nesting inside multisig args)
721    let parts: Vec<&str> = split_top_level_commas(inner);
722    if parts.is_empty() {
723        return Err(DescriptorError::InvalidSyntax(
724            "empty multisig arguments".into(),
725        ));
726    }
727
728    let threshold: usize = parts[0]
729        .trim()
730        .parse()
731        .map_err(|_| DescriptorError::InvalidSyntax(format!("invalid threshold '{}'", parts[0])))?;
732
733    let keys_raw = &parts[1..];
734    if threshold > keys_raw.len() {
735        return Err(DescriptorError::InvalidThreshold {
736            got: threshold,
737            max: keys_raw.len(),
738        });
739    }
740
741    let mut keys = Vec::with_capacity(keys_raw.len());
742    for k in keys_raw {
743        keys.push(DescriptorKey::parse(k.trim())?);
744    }
745
746    if sorted {
747        Ok(DescriptorScript::SortedMulti { threshold, keys })
748    } else {
749        Ok(DescriptorScript::Multi { threshold, keys })
750    }
751}
752
753/// Parse `KEY` or `KEY,TREE` from the body of a `tr(…)` expression.
754fn parse_taproot(inner: &str, depth: usize) -> Result<DescriptorScript, DescriptorError> {
755    // Find the first top-level comma that separates internal key from tree.
756    let comma_pos = find_top_level_comma(inner);
757    let (key_str, tree_str) = if let Some(pos) = comma_pos {
758        (&inner[..pos], Some(&inner[pos + 1..]))
759    } else {
760        (inner, None)
761    };
762
763    let internal_key = DescriptorKey::parse(key_str.trim())?;
764    let tree = if let Some(tree_s) = tree_str {
765        Some(Box::new(parse_tree(tree_s.trim(), depth + 1)?))
766    } else {
767        None
768    };
769
770    Ok(DescriptorScript::Tr { internal_key, tree })
771}
772
773/// Parse a Taproot script tree node: either `{LEFT,RIGHT}` or a script.
774fn parse_tree(s: &str, depth: usize) -> Result<DescriptorTree, DescriptorError> {
775    if depth > MAX_DEPTH {
776        return Err(DescriptorError::DepthExceeded(depth));
777    }
778    let s = s.trim();
779    if s.starts_with('{') && s.ends_with('}') {
780        let inner = &s[1..s.len() - 1];
781        let mid = find_top_level_comma(inner).ok_or_else(|| {
782            DescriptorError::InvalidSyntax("tree branch requires two children".into())
783        })?;
784        let left = parse_tree(inner[..mid].trim(), depth + 1)?;
785        let right = parse_tree(inner[mid + 1..].trim(), depth + 1)?;
786        Ok(DescriptorTree::Branch(Box::new(left), Box::new(right)))
787    } else {
788        // Treat as a leaf script expression (default version 0xc0)
789        let script = parse_script(s, depth + 1)?;
790        Ok(DescriptorTree::Leaf {
791            version: 0xc0,
792            script: Box::new(script),
793        })
794    }
795}
796
797// ─── Comma-splitting helpers ──────────────────────────────────────────────────
798
799/// Split `s` on commas that are at nesting depth 0 (not inside parentheses or
800/// braces).
801fn split_top_level_commas(s: &str) -> Vec<&str> {
802    let mut parts = Vec::new();
803    let mut depth: i32 = 0;
804    let mut start = 0;
805    for (i, ch) in s.char_indices() {
806        match ch {
807            '(' | '{' | '[' => depth += 1,
808            ')' | '}' | ']' => depth -= 1,
809            ',' if depth == 0 => {
810                parts.push(&s[start..i]);
811                start = i + 1;
812            }
813            _ => {}
814        }
815    }
816    parts.push(&s[start..]);
817    parts
818}
819
820/// Find the byte position of the first top-level comma in `s`.
821fn find_top_level_comma(s: &str) -> Option<usize> {
822    let mut depth: i32 = 0;
823    for (i, ch) in s.char_indices() {
824        match ch {
825            '(' | '{' | '[' => depth += 1,
826            ')' | '}' | ']' => depth -= 1,
827            ',' if depth == 0 => return Some(i),
828            _ => {}
829        }
830    }
831    None
832}
833
834// ─── Tests ───────────────────────────────────────────────────────────────────
835
836#[cfg(test)]
837mod tests {
838    use super::*;
839
840    // Compressed public keys used across tests
841    const PK1: &str = "02c6047f9441ed7d6d3045406e95c07cd85c778e4b8cef3ca7abac09b95c709ee5";
842    const PK2: &str = "02f9308a019258c31049344f85f89d5229b531c845836f99b08601f113bce036f9";
843    const PK3: &str = "03a34b99f22c790c4e36b2b3c2c35a36db06226e41c692fc82b8b56ac1c540c5bd";
844
845    #[test]
846    fn test_parse_pkh() {
847        let desc = format!("pkh({})", PK1);
848        let parsed = ParsedDescriptor::parse(&desc).expect("should parse pkh");
849        assert_eq!(parsed.descriptor_type(), "pkh");
850        assert_eq!(parsed.key_count(), 1);
851        assert!(!parsed.is_taproot());
852        assert!(!parsed.is_multisig());
853        assert!(!parsed.is_segwit());
854    }
855
856    #[test]
857    fn test_parse_wpkh() {
858        let desc = format!("wpkh({})", PK2);
859        let parsed = ParsedDescriptor::parse(&desc).expect("should parse wpkh");
860        assert_eq!(parsed.descriptor_type(), "wpkh");
861        assert!(parsed.is_segwit());
862        assert!(!parsed.is_taproot());
863        assert_eq!(parsed.key_count(), 1);
864    }
865
866    #[test]
867    fn test_parse_sh_wpkh() {
868        let desc = format!("sh(wpkh({}))", PK2);
869        let parsed = ParsedDescriptor::parse(&desc).expect("should parse sh(wpkh)");
870        assert_eq!(parsed.descriptor_type(), "sh");
871        assert!(!parsed.is_segwit());
872        assert_eq!(parsed.key_count(), 1);
873    }
874
875    #[test]
876    fn test_parse_multisig() {
877        let desc = format!("multi(2,{},{},{})", PK1, PK2, PK3);
878        let parsed = ParsedDescriptor::parse(&desc).expect("should parse multi");
879        assert_eq!(parsed.descriptor_type(), "multi");
880        assert!(parsed.is_multisig());
881        assert_eq!(parsed.threshold(), Some(2));
882        assert_eq!(parsed.key_count(), 3);
883    }
884
885    #[test]
886    fn test_parse_sortedmulti() {
887        let desc = format!("sortedmulti(1,{},{})", PK1, PK2);
888        let parsed = ParsedDescriptor::parse(&desc).expect("should parse sortedmulti");
889        assert_eq!(parsed.descriptor_type(), "sortedmulti");
890        assert!(parsed.is_multisig());
891        assert_eq!(parsed.threshold(), Some(1));
892        assert_eq!(parsed.key_count(), 2);
893    }
894
895    #[test]
896    fn test_parse_addr() {
897        let desc = "addr(bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4)";
898        let parsed = ParsedDescriptor::parse(desc).expect("should parse addr");
899        assert_eq!(parsed.descriptor_type(), "addr");
900        assert_eq!(parsed.key_count(), 0);
901        assert!(!parsed.is_taproot());
902        assert!(!parsed.is_multisig());
903    }
904
905    #[test]
906    fn test_parse_taproot_simple() {
907        let desc = format!("tr({})", PK1);
908        let parsed = ParsedDescriptor::parse(&desc).expect("should parse tr");
909        assert_eq!(parsed.descriptor_type(), "tr");
910        assert!(parsed.is_taproot());
911        assert!(parsed.is_segwit());
912        assert_eq!(parsed.key_count(), 1);
913    }
914
915    #[test]
916    fn test_descriptor_type_name() {
917        let cases = vec![
918            (format!("pk({})", PK1), "pk"),
919            (format!("wpkh({})", PK1), "wpkh"),
920            ("raw(deadbeef)".to_string(), "raw"),
921            (
922                "addr(1BitcoinEaterAddressDontSendf59kuE)".to_string(),
923                "addr",
924            ),
925        ];
926        for (desc, expected) in cases {
927            let parsed = ParsedDescriptor::parse(&desc)
928                .unwrap_or_else(|e| panic!("failed to parse '{}': {:?}", desc, e));
929            assert_eq!(parsed.descriptor_type(), expected);
930        }
931    }
932
933    #[test]
934    fn test_is_taproot() {
935        let tr_desc = format!("tr({})", PK1);
936        let pkh_desc = format!("pkh({})", PK1);
937        assert!(ParsedDescriptor::parse(&tr_desc).unwrap().is_taproot());
938        assert!(!ParsedDescriptor::parse(&pkh_desc).unwrap().is_taproot());
939    }
940
941    #[test]
942    fn test_is_multisig() {
943        let multi = format!("multi(2,{},{})", PK1, PK2);
944        let sorted = format!("sortedmulti(1,{},{})", PK1, PK2);
945        let single = format!("pkh({})", PK1);
946        assert!(ParsedDescriptor::parse(&multi).unwrap().is_multisig());
947        assert!(ParsedDescriptor::parse(&sorted).unwrap().is_multisig());
948        assert!(!ParsedDescriptor::parse(&single).unwrap().is_multisig());
949    }
950
951    #[test]
952    fn test_key_count_multi() {
953        let desc = format!("multi(2,{},{},{})", PK1, PK2, PK3);
954        let parsed = ParsedDescriptor::parse(&desc).unwrap();
955        assert_eq!(parsed.key_count(), 3);
956    }
957
958    #[test]
959    fn test_strip_checksum() {
960        let with_cs = "wpkh(xpub6...)#12345678";
961        assert_eq!(DescriptorParser::strip_checksum(with_cs), "wpkh(xpub6...)");
962
963        let without_cs = "wpkh(xpub6...)";
964        assert_eq!(
965            DescriptorParser::strip_checksum(without_cs),
966            "wpkh(xpub6...)"
967        );
968    }
969
970    #[test]
971    fn test_compute_checksum_length() {
972        let desc = format!("pkh({})", PK1);
973        let cs = DescriptorParser::compute_checksum(&desc);
974        assert_eq!(cs.len(), 8, "BIP 380 checksum must be exactly 8 chars");
975        // All characters must be from CHECKSUM_CHARSET
976        for ch in cs.chars() {
977            assert!(
978                CHECKSUM_CHARSET.contains(ch),
979                "checksum char '{}' not in CHECKSUM_CHARSET",
980                ch
981            );
982        }
983    }
984
985    #[test]
986    fn test_parse_ranged_descriptor() {
987        // Descriptor with a wildcard child path
988        let desc = "wpkh(xpub661MyMwAqRbcGHoJePhy7S4JdFEFXwg/0/*)";
989        let parsed = ParsedDescriptor::parse(desc).expect("should parse ranged");
990        assert!(parsed.is_ranged(), "descriptor should be ranged");
991    }
992
993    #[test]
994    fn test_parse_empty_fails() {
995        let result = ParsedDescriptor::parse("");
996        assert!(matches!(result, Err(DescriptorError::Empty)));
997    }
998
999    #[test]
1000    fn test_parse_unknown_type_fails() {
1001        let result = ParsedDescriptor::parse("foo(bar)");
1002        assert!(matches!(result, Err(DescriptorError::UnknownType(_))));
1003    }
1004
1005    #[test]
1006    fn test_multisig_threshold_too_high() {
1007        let desc = format!("multi(3,{},{})", PK1, PK2); // 3-of-2 is invalid
1008        let result = ParsedDescriptor::parse(&desc);
1009        assert!(matches!(
1010            result,
1011            Err(DescriptorError::InvalidThreshold { .. })
1012        ));
1013    }
1014
1015    #[test]
1016    fn test_parse_wsh_multi() {
1017        let desc = format!("wsh(multi(2,{},{}))", PK1, PK2);
1018        let parsed = ParsedDescriptor::parse(&desc).expect("should parse wsh(multi)");
1019        assert_eq!(parsed.descriptor_type(), "wsh");
1020        assert!(parsed.is_segwit());
1021        // key_count walks into the inner multi
1022        assert_eq!(parsed.key_count(), 2);
1023    }
1024
1025    #[test]
1026    fn test_parse_raw() {
1027        let desc = "raw(76a91489abcdefabbaabbaabbaabbaabbaabbaabbaabba88ac)";
1028        let parsed = ParsedDescriptor::parse(desc).expect("should parse raw");
1029        assert_eq!(parsed.descriptor_type(), "raw");
1030        assert_eq!(parsed.key_count(), 0);
1031    }
1032
1033    #[test]
1034    fn test_descriptor_key_parse_with_origin() {
1035        let key_str = format!("[deadbeef/84'/0'/0']{}/0/*", PK1);
1036        let key = DescriptorKey::parse(&key_str).expect("should parse key with origin");
1037        assert_eq!(key.fingerprint.as_deref(), Some("deadbeef"));
1038        assert!(key.is_ranged);
1039        assert!(!key.is_hardened_range);
1040    }
1041
1042    #[test]
1043    fn test_descriptor_key_is_xpub() {
1044        let raw_key = DescriptorKey::parse(PK1).unwrap();
1045        assert!(!raw_key.is_xpub());
1046
1047        let xpub = "xpub661MyMwAqRbcGHoJePhy7S4JdFEFXwg";
1048        let xpub_key = DescriptorKey::parse(xpub).unwrap();
1049        assert!(xpub_key.is_xpub());
1050    }
1051
1052    #[test]
1053    fn test_validate_checksum_missing() {
1054        let desc = format!("pkh({})", PK1);
1055        let result = DescriptorParser::validate_checksum(&desc);
1056        assert!(matches!(result, Err(DescriptorError::MissingChecksum)));
1057    }
1058
1059    #[test]
1060    fn test_validate_checksum_correct() {
1061        // Generate a checksum and then validate it
1062        let desc = format!("pkh({})", PK1);
1063        let cs = DescriptorParser::compute_checksum(&desc);
1064        let full = format!("{}#{}", desc, cs);
1065        DescriptorParser::validate_checksum(&full).expect("checksum should validate");
1066    }
1067
1068    #[test]
1069    fn test_validate_checksum_wrong() {
1070        let desc = format!("pkh({})", PK1);
1071        let full = format!("{}#xxxxxxxx", desc);
1072        let result = DescriptorParser::validate_checksum(&full);
1073        assert!(matches!(
1074            result,
1075            Err(DescriptorError::InvalidChecksum { .. })
1076        ));
1077    }
1078}