bcloop 0.1.0

A tool for processing Bitcoin-like blockchain data
Documentation
use std::sync::Arc;

use bcloop::{AddrType, BcWalker};
use fxhash::{FxHashMap as HashMap, FxHashSet as HashSet};
use serde::{Deserialize, Serialize};
use thousands::Separable;

pub type WithError<T = ()> = Result<T, Box<dyn std::error::Error + Sync + Send>>;

fn get_pbar(size: u64) -> indicatif::ProgressBar {
  let pbar = indicatif::ProgressBar::new(size);
  let pfmt = "{wide_msg} {decimal_bytes} / {decimal_total_bytes} ~ {elapsed} < {eta} ~ {decimal_bytes_per_sec}";
  pbar.set_style(indicatif::ProgressStyle::with_template(pfmt).unwrap());
  pbar
}

#[derive(Debug, Clone, Serialize, Deserialize, Eq, Hash, PartialEq)]
pub struct TxoKey {
  pub txid: [u8; 32],
  pub vout: u32,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TxoVal {
  pub height: u32,
  pub amount: u64,
  pub script: Box<[u8]>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CsvRow {
  pub height: u32,
  pub txid: String,
  pub vout: u32,
  pub amount: u64,
  pub addr: String,
  pub script: String,
}

fn csv_append(outfile: &str, data: &HashMap<TxoKey, TxoVal>) -> WithError {
  let is_new = std::fs::metadata(outfile).is_err();

  let wtr = std::fs::OpenOptions::new().append(true).create(true).open(outfile)?;
  let mut wtr = csv::Writer::from_writer(wtr);

  if is_new {
    wtr.serialize(("height", "txid", "idx", "amount", "addr", "script"))?;
  }

  for (key, val) in data.iter() {
    let txid = hex::encode(&key.txid);
    let vout = key.vout;
    let height = val.height;
    let amount = val.amount;

    let addr = AddrType::load(&val.script);
    let script = match addr {
      AddrType::Unknown => hex::encode(&val.script),
      _ => "".to_string(), // skip script for known types to reduce csv size
    };

    let addr = addr.hex();
    wtr.serialize((height, txid, vout, amount, addr, script))?;
  }

  wtr.flush()?;
  Ok(())
}

pub fn utxodump(blocks_dir: &str, outfile: &str) -> WithError {
  let blocks_dir = std::fs::canonicalize(blocks_dir).expect("Invalid blocks_dir");
  let blocks_dir = blocks_dir.to_str().unwrap().to_string();
  let outfile = std::env::current_dir()?.join(outfile).to_str().unwrap().to_string();

  println!("Loading blocks from: {}", blocks_dir);
  println!("Output file: {}", outfile);

  let stime = std::time::Instant::now();
  print!("Loading blocks from {} ~ ", blocks_dir);
  let walker = BcWalker::from_dir(&blocks_dir)?;
  let blocks_len = walker.blocks_count();
  println!("Blocks: {} ({:.1}s)", blocks_len.separate_with_commas(), stime.elapsed().as_secs_f64());

  let mut pbar = get_pbar(walker.bc_size());

  let mut min_block = 0 as usize;
  let mut max_block = blocks_len - 1;
  let max_cap = 1_000_000 * 64;
  let mut cycle = 0;

  while min_block < max_block {
    let minix = if min_block > 0 { min_block - 1 } else { 0 };
    let lsize = walker.bc_size_at(0, minix);
    let rsize = walker.bc_size_at(min_block, max_block);
    pbar.set_position(lsize);
    pbar.set_length(lsize + rsize);

    let mut flag = false;
    let mut tmp_txo: HashMap<TxoKey, TxoVal> = HashMap::default();
    tmp_txo.reserve(max_cap / 2);

    let itime = std::time::Instant::now();
    let chan = walker.get_chan_at(min_block, max_block);
    cycle += 1;

    let mut cur_block = min_block;
    while let Ok((blk, blk_size)) = chan.recv() {
      cur_block = blk.height as usize;
      let msg = format!(
        "{} / {} | Chkpt: {} | {}: {}",
        cur_block.separate_with_commas(),
        max_block.separate_with_commas(),
        min_block.separate_with_commas(),
        if flag { "D" } else { "C" },
        (tmp_txo.len()).separate_with_commas(),
      );
      pbar.set_message(msg.clone());
      pbar.inc(blk_size as u64);

      if !flag {
        let new_len = blk.txs.iter().map(|tx| tx.outputs.len()).sum::<usize>() + tmp_txo.len();
        let cur_cap = tmp_txo.capacity();
        let new_cap = if new_len > cur_cap { cur_cap * 2 } else { cur_cap };
        flag = new_cap > max_cap;

        if flag {
          min_block = cur_block;
        }
      }

      // outputs can be spent in the same block and transactions can be in any order
      // so keep all spents in a temporary set and remove them after block processing
      let mut tmp_txi: HashSet<TxoKey> = HashSet::default();

      for (i, tx) in blk.txs.iter().enumerate() {
        // process outputs only when no flag
        if !flag {
          for (idx, txo) in tx.outputs.iter().enumerate() {
            // skip if dust, empty script, or OP_RETURN
            if txo.amount < 546 || txo.script_pub.is_empty() || txo.script_pub[0] == 0x6a {
              continue;
            }

            tmp_txo.insert(
              TxoKey { txid: tx.txid, vout: idx as u32 },
              TxoVal { height: blk.height, amount: txo.amount, script: txo.script_pub.clone() },
            );
          }
        }

        // inputs; skip coinbase tx (always first tx)
        if i > 0 {
          for txi in tx.inputs.iter() {
            let key = TxoKey { txid: txi.prev_tx, vout: txi.prev_idx };
            tmp_txi.insert(key);
          }
        }
      }

      // remove spent outputs
      for key in tmp_txi.iter() {
        tmp_txo.remove(key);
      }
    }

    if !flag {
      min_block = cur_block + 1; // all blocks processed, for printing
    }

    pbar.println(format!(
      ">> {cycle:>3} {:>6.1}s {:>8} {:>12}",
      itime.elapsed().as_secs_f64(),
      min_block.separate_with_commas(),
      tmp_txo.len().separate_with_commas(),
    ));

    csv_append(&outfile, &tmp_txo)?;

    if !flag {
      break; // exit from loop
    }
  }

  pbar.finish();

  Ok(())
}

pub fn utxohex160(utxofile: &str) -> WithError {
  let utxofile = std::fs::canonicalize(utxofile)?.to_str().unwrap().to_string();

  let mut rdr = csv::Reader::from_path(utxofile)?;
  for row in rdr.deserialize() {
    let row: CsvRow = row?;
    if row.addr.len() != 40 {
      continue;
    }

    println!("{}", row.addr);
  }

  return Ok(());
}