ubt 0.4.2

Unified Binary Tree implementation based on EIP-7864
Documentation
{
  "eip": 7864,
  "title": "Ethereum state using a unified binary tree",
  "description": "Switch Ethereum state tree to a unified binary tree",
  "status": "Draft",
  "type": "Standards Track",
  "category": "Core",
  "created": "2025-01-20",
  "discussion_url": "https://ethereum-magicians.org/t/eip-7864-ethereum-state-using-a-unified-binary-tree/22611",
  "authors": [
    {
      "name": "Vitalik Buterin",
      "github": "vbuterin"
    },
    {
      "name": "Guillaume Ballet",
      "github": "gballet"
    },
    {
      "name": "Dankrad Feist",
      "github": "dankrad"
    },
    {
      "name": "Ignacio Hagopian",
      "github": "jsign"
    },
    {
      "name": "Kevaundray Wedderburn",
      "github": "kevaundray"
    },
    {
      "name": "Tanishq Jasoria",
      "github": "tanishqjasoria"
    },
    {
      "name": "Gajinder Singh",
      "github": "g11tech"
    },
    {
      "name": "Danno Ferrin",
      "github": "shemnon"
    },
    {
      "name": "Piper Merriam",
      "github": "pipermerriam"
    },
    {
      "name": "Gottfried Herold",
      "github": "GottfriedHerold"
    }
  ],
  "requires": [],
  "abstract": "Introduce a new binary state tree, intended to replace the hexary patricia trees. Account and storage tries are merged into a single tree with 32-byte keys, which also contains contracts code. Account data is broken into independent leaves which are grouped by 256 in order to provide some locality.",
  "motivation": {
    "summary": "Ethereum's long-term goal is to allow blocks to be proved with validity proof so that chain verification is as simple and fast as possible.",
    "problems_with_current_mpt": [
      "Uses RLP for node encoding",
      "Uses Keccak as a hashing function (not SNARK-friendly)",
      "Is a tree of trees structure",
      "Does not include account code as part of the state",
      "Large Merkle proof sizes due to hexary structure"
    ],
    "proof_size_comparison": {
      "hexary_tree_expected_proof_size": "3840 bytes for single branch in 2^32 size tree",
      "worst_case_block_proof": "330MB for 30M gas accessing single bytes of different account codes"
    }
  },
  "specification": {
    "hash_function": {
      "note": "Hash function not final",
      "current_reference_implementation": "BLAKE3",
      "candidates": ["BLAKE3", "Keccak", "Poseidon2"],
      "poseidon2_considerations": "Ongoing EF cryptography initiative assessing security properties"
    },
    "notable_changes": [
      "Account and storage tries merged into single trie",
      "RLP no longer used",
      "Account code is chunked and included in the tree",
      "Account data is co-located in the tree to reduce branch openings"
    ],
    "tree_structure": {
      "key_size": 32,
      "value_size": 32,
      "stem_size": 31,
      "subindex_size": 1,
      "stem_subtree_size": 256,
      "node_types": [
        {
          "name": "InternalNode",
          "fields": ["left_hash", "right_hash"],
          "description": "Internal branching node"
        },
        {
          "name": "StemNode",
          "fields": ["stem", "left_hash", "right_hash"],
          "description": "Node at the end of a stem path containing 256 leaf values"
        },
        {
          "name": "LeafNode",
          "fields": ["value"],
          "description": "32-byte value or empty"
        },
        {
          "name": "EmptyNode",
          "fields": [],
          "description": "Represents empty node/sub-tree"
        }
      ]
    },
    "node_merkelization": {
      "hash_function_rules": {
        "empty_input": "hash([0x00] * 64) = [0x00] * 32",
        "normal_input": "hash(value) = H(value)"
      },
      "valid_input_lengths": [32, 64],
      "node_hash_formulas": {
        "internal_node": "hash(left_hash || right_hash)",
        "stem_node": "hash(stem || 0x00 || hash(left_hash || right_hash))",
        "leaf_node": "hash(value)",
        "empty_node": "[0x00] * 32"
      }
    },
    "tree_embedding": {
      "parameters": {
        "BASIC_DATA_LEAF_KEY": 0,
        "CODE_HASH_LEAF_KEY": 1,
        "HEADER_STORAGE_OFFSET": 64,
        "CODE_OFFSET": 128,
        "STEM_SUBTREE_WIDTH": 256,
        "MAIN_STORAGE_OFFSET": "256^31"
      },
      "invariants": [
        "STEM_SUBTREE_WIDTH > CODE_OFFSET > HEADER_STORAGE_OFFSET",
        "HEADER_STORAGE_OFFSET > leaf keys",
        "MAIN_STORAGE_OFFSET must be power of STEM_SUBTREE_WIDTH"
      ],
      "address_conversion": "Prepend 12 zero bytes to convert Address to Address32"
    },
    "header_values": {
      "basic_data_layout": [
        {
          "name": "version",
          "offset": 0,
          "size": 1
        },
        {
          "name": "reserved",
          "offset": 1,
          "size": 4
        },
        {
          "name": "code_size",
          "offset": 5,
          "size": 3
        },
        {
          "name": "nonce",
          "offset": 8,
          "size": 8
        },
        {
          "name": "balance",
          "offset": 16,
          "size": 16
        }
      ],
      "encoding": "big-endian",
      "notes": [
        "code_size can be extended to 4 bytes without version change",
        "Packing reduces gas costs by requiring only one branch opening"
      ]
    },
    "code": {
      "chunk_size": 32,
      "code_slice_size": 31,
      "chunk_format": {
        "byte_0": "Number of leading bytes that are part of PUSHDATA",
        "bytes_1_to_31": "31 bytes of code"
      },
      "push_constants": {
        "PUSH_OFFSET": 95,
        "PUSH1": 96,
        "PUSH32": 127
      }
    },
    "storage": {
      "first_slots_location": "In account stem at HEADER_STORAGE_OFFSET",
      "first_slots_count": 64,
      "main_storage_location": "MAIN_STORAGE_OFFSET + storage_key",
      "note": "Slots in same STEM_SUBTREE_WIDTH range share single stem"
    },
    "related_eips": {
      "fork": "EIP-7612",
      "access_events": "EIP-4762"
    }
  },
  "rationale": {
    "single_tree_design": {
      "reasons": [
        "Simplicity: easier to work with key/value store abstraction",
        "Uniformity: state uniformly spread throughout tree",
        "Extensibility: account headers and code in same structure as storage"
      ],
      "benefits": [
        "Simpler witness gas rules",
        "Better state-syncing algorithms",
        "Reduced effectiveness of unbalanced tree-filling attacks"
      ]
    },
    "snark_friendliness_and_post_quantum": {
      "design_goals": [
        "Simple tree structure without complex branching rules",
        "No extension nodes in middle of branches",
        "No RLP encoding",
        "Efficient implementations both out and in circuits"
      ],
      "hash_function_candidates": {
        "BLAKE3": {
          "pros": [
            "Good out-of-circuit performance",
            "Reasonable in-circuit performance",
            "Well-established security"
          ],
          "status": "Current reference implementation"
        },
        "Keccak": {
          "pros": [
            "Already used in Ethereum",
            "Well-studied security"
          ],
          "cons": ["Less efficient for circuit proving"]
        },
        "Poseidon2": {
          "pros": [
            "Excellent in-circuit performance",
            "Better proving throughput"
          ],
          "cons": ["Security analysis ongoing"]
        }
      },
      "post_quantum_considerations": {
        "timeline": "Quantum computers potentially real in 2030s",
        "nist_recommendation": "Stop using ECC by 2030",
        "advantage_over_verkle": "Only depends on hash functions, which are post-quantum safe"
      }
    },
    "arity_2": {
      "reason": "Minimizes witness size",
      "formula": "branch_size ≈ 32 * (k-1) * log(N) / log(k)",
      "comparison_table": {
        "description": "Branch lengths for N = 2^24",
        "data": [
          {"arity": 2, "chunks": 24, "bytes": 768},
          {"arity": 4, "chunks": 36, "bytes": 1152},
          {"arity": 8, "chunks": 56, "bytes": 1792},
          {"arity": 16, "chunks": 90, "bytes": 2880}
        ]
      }
    },
    "tree_depth": {
      "approach": "Avoid full 248-bit depth of Sparse Merkle Tree",
      "reason": "Reduces hashing load in proving systems",
      "trade_off": "Avoid extension nodes for simplicity despite potential optimization"
    },
    "state_expiry": {
      "compatibility": "Could apply strategies like EIP-7736",
      "potential_solutions": [
        "Add epoch field to StemNode",
        "Use 247-bits for stem with two subtrees: StemValuesNode and StemMetaNode"
      ]
    }
  },
  "backwards_compatibility": {
    "breaking_changes": [
      {
        "issue": "Gas costs for code chunk access",
        "impact": "May affect applications' economic viability",
        "mitigation": "Increase gas limit while implementing"
      },
      {
        "issue": "Tree structure change",
        "impact": "In-EVM proofs of historical state no longer work",
        "mitigation": null
      }
    ]
  },
  "security_considerations": {
    "status": "Needs discussion"
  },
  "implementation": {
    "related_eips": ["EIP-7612", "EIP-4762", "EIP-7748", "EIP-7736"]
  },
  "copyright": "CC0"
}