Constant LIBRARY_SOURCE

Source
pub const LIBRARY_SOURCE: &str = "#!lua name=flowfabric\n\n-- source: lua/helpers.lua\n-- FlowFabric shared library-local helpers\n-- These are local functions available to all registered functions in the\n-- flowfabric library. They are NOT independently FCALL-able.\n-- Reference: RFC-010 \u{a7}4.8, RFC-004 \u{a7}Waitpoint Security (HMAC tokens)\n\n---------------------------------------------------------------------------\n-- Capability CSV bounds (RFC-009 \u{a7}7.5)\n---------------------------------------------------------------------------\n-- Shared ceiling for BOTH the worker-side CSV (ff_issue_claim_grant ARGV[9])\n-- AND the execution-side CSV (exec_core.required_capabilities). Defense in\n-- depth against runaway field sizes: a 10k-token list turns into a multi-MB\n-- HSET value and a per-candidate O(N) atomic scan that blocks the shard.\n--\n-- Inclusivity: these are MAXIMUM accepted values. `#csv == CAPS_MAX_BYTES`\n-- and `n == CAPS_MAX_TOKENS` are accepted; one more rejects. Rust-side\n-- ingress (ff-sdk::FlowFabricWorker::connect, ff-scheduler::Scheduler::\n-- claim_for_worker, ff-core::policy::RoutingRequirements deserialization\n-- via lua/execution.lua) enforces the same ceilings so the Lua check is a\n-- defense-in-depth backstop, not the primary validator.\nlocal CAPS_MAX_BYTES  = 4096\nlocal CAPS_MAX_TOKENS = 256\n\n---------------------------------------------------------------------------\n-- Hex / binary helpers (for HMAC-SHA1 token derivation)\n---------------------------------------------------------------------------\n\n-- Convert a hex string to a binary (byte) string. Accepts mixed case.\n-- Returns nil on ANY malformed input: non-string, odd length, OR any\n-- non-hex char (including whitespace, unicode, control chars). Callers\n-- treat nil as invalid_secret.\n--\n-- Rust side (ServerConfig) already validates the env secret as even-length\n-- 0-9a-fA-F, but an operator writing directly to Valkey (or a torn write\n-- during rotation) could bypass that validator. We refuse the conversion\n-- here instead of silently coercing bad pairs to 0 bytes (which would\n-- produce a bogus but valid-looking MAC).\nlocal function hex_to_bytes(hex)\n  if type(hex) ~= \"string\" or #hex % 2 ~= 0 then\n    return nil\n  end\n  local out = {}\n  for i = 1, #hex - 1, 2 do\n    local byte = tonumber(hex:sub(i, i + 1), 16)\n    if not byte then\n      return nil\n    end\n    out[#out + 1] = string.char(byte)\n  end\n  return table.concat(out)\nend\n\n-- XOR two equal-length byte strings. Used for HMAC key-pad construction.\nlocal function xor_bytes(a, b)\n  local out = {}\n  for i = 1, #a do\n    out[i] = string.char(bit.bxor(a:byte(i), b:byte(i)))\n  end\n  return table.concat(out)\nend\n\n-- HMAC-SHA1(key_hex, message) \u{2192} lowercase hex digest (40 chars), or nil on\n-- malformed key_hex (odd-length / non-string). Callers must treat nil as\n-- an invalid-secret error \u{2014} never pass it to HSET / concat / return.\n-- Reference: RFC 2104. SHA1 block size = 64 bytes.\nlocal function hmac_sha1_hex(key_hex, message)\n  local key = hex_to_bytes(key_hex)\n  if not key then\n    return nil\n  end\n  local block_size = 64\n  if #key > block_size then\n    -- Reduce oversized key via SHA1 (per RFC 2104). sha1hex output is 40\n    -- lowercase hex chars, so the inner hex_to_bytes cannot fail.\n    key = hex_to_bytes(redis.sha1hex(key))\n  end\n  if #key < block_size then\n    key = key .. string.rep(\"\\0\", block_size - #key)\n  end\n  local ipad = string.rep(string.char(0x36), block_size)\n  local opad = string.rep(string.char(0x5c), block_size)\n  local inner = redis.sha1hex(xor_bytes(key, ipad) .. message)\n  return redis.sha1hex(xor_bytes(key, opad) .. hex_to_bytes(inner))\nend\n\n-- Constant-time string equality. Returns true iff strings are equal in\n-- both length and content. Uses XOR-accumulation to avoid early-exit\n-- timing leaks on byte mismatches during HMAC token validation.\n-- Reference: Remote timing attacks on authentication (e.g., CVE-2011-3389 class).\n--\n-- Safety note on the length check: a length-mismatch early return reveals\n-- whether the presented string matches the expected length, which is a\n-- timing side channel IF attacker-controlled length is used to probe the\n-- expected length. In this codebase the caller normalizes to a fixed shape\n-- BEFORE reaching here \u{2014} validate_waitpoint_token already requires\n-- #presented == 40 (SHA1 hex digest length) at the parsing boundary, so\n-- any input reaching constant_time_eq has a length already known to be 40\n-- by the attacker. The only length variation here is on `expected`, which\n-- is server-computed and constant. Hence this early return does not leak\n-- secret-dependent timing.\nlocal function constant_time_eq(a, b)\n  if type(a) ~= \"string\" or type(b) ~= \"string\" then\n    return false\n  end\n  if #a ~= #b then\n    return false\n  end\n  local acc = 0\n  for i = 1, #a do\n    acc = bit.bor(acc, bit.bxor(a:byte(i), b:byte(i)))\n  end\n  return acc == 0\nend\n\n---------------------------------------------------------------------------\n-- Waitpoint HMAC tokens (RFC-004 \u{a7}Waitpoint Security)\n---------------------------------------------------------------------------\n--\n-- Token format: \"kid:40hex\"  \u{2014} kid identifies which key signed the token,\n-- enabling zero-downtime rotation. ANY kid present in the secrets hash\n-- with a future `expires_at:<kid>` (or the current kid, which has no\n-- expiry) accepts tokens. This supports rapid rotation: rotating A\u{2192}B\u{2192}C\n-- within a grace window keeps A\'s secret validatable as long as\n-- expires_at:A is still future.\n--\n-- HMAC input binds (waitpoint_id | waitpoint_key | created_at_ms) with a\n-- pipe delimiter so field-boundary confusion cannot produce collisions\n-- across waitpoints.\n--\n-- Secret storage: per-partition replicated hash at\n--   ff:sec:{p:N}:waitpoint_hmac\n-- Fields:\n--   current_kid               \u{2014} the kid minting new tokens (no expiry)\n--   secret:<kid>              \u{2014} hex-encoded HMAC key for each kid ever installed\n--   expires_at:<kid>          \u{2014} unix ms; accept tokens under <kid> iff exp > now_ms\n--                               INVARIANT: expires_at:<current_kid> is NEVER written\n--   previous_kid              \u{2014} observability/audit only: the kid immediately\n--                               preceding current_kid (NOT the only acceptable one)\n--   previous_expires_at       \u{2014} observability/audit only: matches\n--                               expires_at:<previous_kid>\n--\n-- Replication is required for Valkey cluster mode (all FCALL KEYS must\n-- hash to the same slot); rotation fans out across partitions.\n---------------------------------------------------------------------------\n\n-- Read the hmac_secrets hash. Returns a table with:\n--   current_kid, current_secret \u{2014} the minting kid (nil if not initialized)\n--   kid_secrets = { [kid] = { secret = <hex>, expires_at = <ms or nil> } }\n--     includes current_kid (expires_at = nil \u{2192} no expiry)\n--     includes every secret:<kid> present in the hash\n--   previous_kid, previous_secret, previous_expires_at \u{2014} kept for back-compat\n--     (audit log / observability); validate path does NOT depend on them.\n-- Returns nil if the hash is absent.\nlocal function load_waitpoint_secrets(secrets_key)\n  local raw = redis.call(\"HGETALL\", secrets_key)\n  if #raw == 0 then\n    return nil\n  end\n  local t = {}\n  for i = 1, #raw, 2 do\n    t[raw[i]] = raw[i + 1]\n  end\n  local out = {\n    current_kid = t.current_kid,\n    previous_kid = t.previous_kid,\n    previous_expires_at = t.previous_expires_at,\n    kid_secrets = {},\n  }\n  if out.current_kid then\n    out.current_secret = t[\"secret:\" .. out.current_kid]\n  end\n  if out.previous_kid then\n    out.previous_secret = t[\"secret:\" .. out.previous_kid]\n  end\n  -- Multi-kid scan: every secret:<kid> becomes a validation candidate.\n  -- current_kid has no expiry entry (intentional \u{2014} it\'s always valid).\n  -- Other kids are accepted iff expires_at:<kid> is set AND > now_ms; the\n  -- expiry check runs in validate_waitpoint_token so we simply carry the\n  -- raw expires_at string here.\n  for k, v in pairs(t) do\n    if k:sub(1, 7) == \"secret:\" then\n      local kid = k:sub(8)\n      if kid ~= \"\" then\n        out.kid_secrets[kid] = {\n          secret = v,\n          expires_at = t[\"expires_at:\" .. kid],\n        }\n      end\n    end\n  end\n  return out\nend\n\n-- Build the HMAC input string. Pipe delimiter prevents concatenation\n-- collisions across distinct (waitpoint_id, waitpoint_key) pairs.\nlocal function waitpoint_hmac_input(waitpoint_id, waitpoint_key, created_at_ms)\n  return waitpoint_id .. \"|\" .. waitpoint_key .. \"|\" .. tostring(created_at_ms)\nend\n\n-- Mint a waitpoint token using the current kid.\n-- Returns (token, kid) on success or (nil, error_code) on failure.\n-- Defense-in-depth: returns a typed error for missing secrets_key / missing\n-- secrets hash so external callers that construct FCALL KEYS by hand cannot\n-- produce the \"arguments must be strings or integers\" Lua panic via nil.\nlocal function mint_waitpoint_token(secrets_key, waitpoint_id, waitpoint_key, created_at_ms)\n  if type(secrets_key) ~= \"string\" or secrets_key == \"\" then\n    return nil, \"invalid_keys_missing_hmac\"\n  end\n  local secrets = load_waitpoint_secrets(secrets_key)\n  if not secrets or not secrets.current_kid or not secrets.current_secret then\n    return nil, \"hmac_secret_not_initialized\"\n  end\n  local input = waitpoint_hmac_input(waitpoint_id, waitpoint_key, created_at_ms)\n  local digest = hmac_sha1_hex(secrets.current_secret, input)\n  if not digest then\n    return nil, \"invalid_secret\"\n  end\n  return secrets.current_kid .. \":\" .. digest, secrets.current_kid\nend\n\n-- Validate a waitpoint token against the (waitpoint_id, waitpoint_key,\n-- created_at_ms) that were bound at mint time. Accepts tokens signed with\n-- current_kid, or previous_kid if previous_expires_at has not passed.\n-- Returns nil on success or an error code string on failure.\nlocal function validate_waitpoint_token(\n  secrets_key, token, waitpoint_id, waitpoint_key, created_at_ms, now_ms\n)\n  if type(secrets_key) ~= \"string\" or secrets_key == \"\" then\n    return \"invalid_keys_missing_hmac\"\n  end\n  if type(token) ~= \"string\" or token == \"\" then\n    return \"missing_token\"\n  end\n  local sep = token:find(\":\", 1, true)\n  if not sep or sep < 2 or sep >= #token then\n    return \"invalid_token\"\n  end\n  local kid = token:sub(1, sep - 1)\n  local presented = token:sub(sep + 1)\n  if #presented ~= 40 then\n    -- SHA1 hex digest is always 40 chars.\n    return \"invalid_token\"\n  end\n\n  local secrets = load_waitpoint_secrets(secrets_key)\n  if not secrets or not secrets.current_kid then\n    return \"hmac_secret_not_initialized\"\n  end\n\n  -- Multi-kid validation. ANY secret:<kid> present in the hash is a\n  -- candidate IF:\n  --   - kid == current_kid (no expiry, always valid), OR\n  --   - expires_at:<kid> is a positive integer AND > now_ms.\n  --\n  -- Rationale: rapid rotation (A\u{2192}B\u{2192}C inside a grace window) must keep\n  -- in-flight A-signed tokens valid. The previous 2-slot model\n  -- (current + previous) evicted A as soon as B became previous, even\n  -- though expires_at:A was still future. RFC-004 \u{a7}Waitpoint Security\n  -- promises grace duration, not \"grace until next rotation\".\n  --\n  -- Fail-CLOSED on malformed expires_at: a corrupted/non-numeric value\n  -- means \"no affirmative unexpired proof\" \u{2014} reject.\n  local secret = nil\n  local expiry_state = nil  -- \"known_kid_expired\" | \"unknown_kid\"\n  if kid == secrets.current_kid then\n    secret = secrets.current_secret\n  else\n    local entry = secrets.kid_secrets and secrets.kid_secrets[kid]\n    if entry then\n      local exp = tonumber(entry.expires_at)\n      if not exp or exp <= 0 or exp < now_ms then\n        -- secret:<kid> is present but its grace has elapsed (or was\n        -- never recorded). Distinguishable from unknown_kid so the\n        -- caller can log the more-actionable \"token_expired\".\n        expiry_state = \"known_kid_expired\"\n      else\n        secret = entry.secret\n      end\n    else\n      expiry_state = \"unknown_kid\"\n    end\n  end\n\n  if not secret then\n    if expiry_state == \"known_kid_expired\" then\n      return \"token_expired\"\n    end\n    return \"invalid_token\"\n  end\n\n  local input = waitpoint_hmac_input(waitpoint_id, waitpoint_key, created_at_ms)\n  local expected = hmac_sha1_hex(secret, input)\n  if not expected then\n    return \"invalid_secret\"\n  end\n  if not constant_time_eq(expected, presented) then\n    return \"invalid_token\"\n  end\n  return nil\nend\n\n---------------------------------------------------------------------------\n-- Time\n---------------------------------------------------------------------------\n\n-- Returns the Valkey server time as milliseconds. Always prefer this over\n-- a caller-supplied now_ms for fields that are used in retention windows,\n-- eligibility scoring, lease expiry, or any cross-execution causal\n-- comparison. Client-supplied timestamps are trivially skewable and\n-- produce observability drift when compared against fields written by\n-- other Lua functions (which already use redis.call(\"TIME\")).\nlocal function server_time_ms()\n  local t = redis.call(\"TIME\")\n  return tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\nend\n\n---------------------------------------------------------------------------\n-- Return wrappers (\u{a7}4.9)\n---------------------------------------------------------------------------\n\nlocal function ok(...)\n  return {1, \"OK\", ...}\nend\n\nlocal function err(...)\n  return {0, ...}\nend\n\n-- Require a numeric value from ARGV. Returns the number on success or\n-- an err() tuple on failure. Callers must check: if type(n) == \"table\"\n-- then return n end  (the table IS the err tuple).\nlocal function require_number(val, name)\n  local n = tonumber(val)\n  if n == nil then\n    return err(\"invalid_input\", name .. \" must be a number, got: \" .. tostring(val))\n  end\n  return n\nend\n\nlocal function ok_already_satisfied(...)\n  return {1, \"ALREADY_SATISFIED\", ...}\nend\n\nlocal function ok_duplicate(...)\n  return {1, \"DUPLICATE\", ...}\nend\n\n-- RFC-014 Pattern 3 \u{2014} expand {suspension_id, wp_id, wp_key, wp_tok,\n-- extras_table} into the 4 primary fields + N_extra count + N_extra \u{d7}\n-- (id, key, tok) response tail. `extras` is an array of {waitpoint_id,\n-- waitpoint_key, waitpoint_token} tables. Empty array \u{2192} N_extra=0.\nlocal function ok_extras(susp_id, wp_id, wp_key, wp_tok, extras)\n  extras = extras or {}\n  local out = {1, \"OK\", susp_id, wp_id, wp_key, wp_tok, tostring(#extras)}\n  for _, e in ipairs(extras) do\n    out[#out + 1] = e.waitpoint_id or \"\"\n    out[#out + 1] = e.waitpoint_key or \"\"\n    out[#out + 1] = e.waitpoint_token or \"\"\n  end\n  return out\nend\n\nlocal function ok_already_satisfied_extras(susp_id, wp_id, wp_key, wp_tok, extras)\n  extras = extras or {}\n  local out = {1, \"ALREADY_SATISFIED\", susp_id, wp_id, wp_key, wp_tok, tostring(#extras)}\n  for _, e in ipairs(extras) do\n    out[#out + 1] = e.waitpoint_id or \"\"\n    out[#out + 1] = e.waitpoint_key or \"\"\n    out[#out + 1] = e.waitpoint_token or \"\"\n  end\n  return out\nend\n\n---------------------------------------------------------------------------\n-- Data access\n---------------------------------------------------------------------------\n\n-- Converts HGETALL flat array {k1, v1, k2, v2, ...} to a Lua dict table.\n-- All RFC pseudocode uses core.field syntax which requires this conversion.\nlocal function hgetall_to_table(flat)\n  local t = {}\n  for i = 1, #flat, 2 do\n    t[flat[i]] = flat[i + 1]\n  end\n  return t\nend\n\n-- Safe nil/empty check. Valkey hashes cannot store nil: HGET on a missing\n-- field returns false (via Lua), and cleared fields store \"\". This helper\n-- handles both cases plus actual nil for fields absent from hgetall_to_table.\nlocal function is_set(v)\n  return v ~= nil and v ~= false and v ~= \"\"\nend\n\n---------------------------------------------------------------------------\n-- Lease validation (most widely shared \u{2014} prevents copy-paste drift)\n-- RFC-010 \u{a7}4.8: 7+ functions use this: complete, fail, suspend, delay,\n-- move_to_waiting_children, append_frame, report_usage.\n---------------------------------------------------------------------------\n\n-- Validates that the caller holds a valid, non-expired, non-revoked lease.\n-- Returns an error tuple on failure, or nil on success.\n-- @param core   table from hgetall_to_table(HGETALL exec_core)\n-- @param argv   table with lease_id, lease_epoch, attempt_id\n-- @param now_ms current timestamp in milliseconds\nlocal function validate_lease(core, argv, now_ms)\n  if core.lifecycle_phase ~= \"active\" then\n    -- See validate_lease_and_mark_expired for the full detail layout.\n    return err(\"execution_not_active\",\n      core.terminal_outcome or \"\",\n      core.current_lease_epoch or \"\",\n      core.lifecycle_phase or \"\",\n      core.current_attempt_id or \"\")\n  end\n  if core.ownership_state == \"lease_revoked\" then\n    return err(\"lease_revoked\")\n  end\n  if tonumber(core.lease_expires_at or \"0\") <= now_ms then\n    return err(\"lease_expired\")\n  end\n  if core.current_lease_id ~= argv.lease_id then\n    return err(\"stale_lease\")\n  end\n  if core.current_lease_epoch ~= argv.lease_epoch then\n    return err(\"stale_lease\")\n  end\n  if core.current_attempt_id ~= argv.attempt_id then\n    return err(\"stale_lease\")\n  end\n  return nil\nend\n\n-- Sets ownership_state to lease_expired_reclaimable. Idempotent.\n--\n-- Also writes `closed_at`/`closed_reason=\"lease_expired\"` on the attempt\'s\n-- `stream_meta` hash when that stream exists, so `tail_stream` consumers\n-- observe the terminal signal without having to fall back to polling\n-- `execution_state`. This matters for the permanent-failure case (worker\n-- OOM or node dead, no replacement reclaims): the reclaim path that\n-- normally writes `closed_reason=\"reclaimed\"` may never run, and without\n-- this signal the tail poll loop waits forever.\n--\n-- Write order: we only write stream_meta if its existing `closed_at` is\n-- empty. A later `ff_reclaim_execution` that overwrites `closed_reason`\n-- to \"reclaimed\" still wins because it unconditionally HSETs the field;\n-- this function intentionally does NOT overwrite a pre-existing close.\n--\n-- Key construction: the stream_meta key is derived from core_key\'s\n-- `{p:N}` hash tag + current_attempt_index. All three keys share the\n-- same hash tag, so this stays single-slot in cluster mode despite not\n-- being declared in KEYS upfront \u{2014} mirrors the dynamic attempt/lane key\n-- construction in `ff_create_execution`.\n--\n-- @param keys   table with core_key, lease_history_key\n-- @param core   table from hgetall_to_table\n-- @param now_ms current timestamp in milliseconds\n-- @param maxlen MAXLEN for lease_history stream\nlocal function mark_expired(keys, core, now_ms, maxlen)\n  if core.ownership_state == \"lease_expired_reclaimable\" then\n    return -- idempotent\n  end\n  -- ALL 7 dims (preserve lifecycle_phase=active, eligibility_state, terminal_outcome=none)\n  redis.call(\"HSET\", keys.core_key,\n    \"lifecycle_phase\", core.lifecycle_phase or \"active\",     -- preserve\n    \"ownership_state\", \"lease_expired_reclaimable\",\n    \"eligibility_state\", core.eligibility_state or \"not_applicable\", -- preserve\n    \"blocking_reason\", \"waiting_for_worker\",\n    \"blocking_detail\", \"lease expired, awaiting reclaim\",\n    \"terminal_outcome\", core.terminal_outcome or \"none\",     -- preserve\n    \"attempt_state\", \"attempt_interrupted\",\n    \"public_state\", \"active\",\n    \"lease_expired_at\", now_ms,\n    \"last_mutation_at\", now_ms)\n  redis.call(\"XADD\", keys.lease_history_key, \"MAXLEN\", \"~\", maxlen, \"*\",\n    \"event\", \"expired\",\n    \"lease_id\", core.current_lease_id or \"\",\n    \"lease_epoch\", core.current_lease_epoch or \"\",\n    \"attempt_index\", core.current_attempt_index or \"\",\n    \"attempt_id\", core.current_attempt_id or \"\",\n    \"worker_id\", core.current_worker_id or \"\",\n    \"worker_instance_id\", core.current_worker_instance_id or \"\",\n    \"ts\", now_ms)\n\n  -- Close stream_meta (if the stream was lazily created) so tail_stream\n  -- consumers receive the terminal signal. Core key format:\n  --   ff:exec:{p:N}:<eid>:core\n  -- Stream meta key format:\n  --   ff:stream:{p:N}:<eid>:<attempt_index>:meta\n  local att_idx = core.current_attempt_index\n  if att_idx ~= nil and att_idx ~= \"\" then\n    local tag_open  = string.find(keys.core_key, \"{\", 1, true)\n    local tag_close = tag_open and string.find(keys.core_key, \"}\", tag_open, true)\n    if tag_open and tag_close then\n      local tag = string.sub(keys.core_key, tag_open, tag_close)\n      -- After `}:` comes `<eid>:core`. Walk past the `}:` delimiter.\n      local after_tag = string.sub(keys.core_key, tag_close + 2)\n      local eid_end = string.find(after_tag, \":core\", 1, true)\n      if eid_end then\n        local eid = string.sub(after_tag, 1, eid_end - 1)\n        local stream_meta_key = \"ff:stream:\" .. tag .. \":\" .. eid\n                                .. \":\" .. tostring(att_idx) .. \":meta\"\n        if redis.call(\"EXISTS\", stream_meta_key) == 1 then\n          local existing_closed_at = redis.call(\"HGET\", stream_meta_key, \"closed_at\")\n          if not is_set(existing_closed_at) then\n            redis.call(\"HSET\", stream_meta_key,\n              \"closed_at\", tostring(now_ms),\n              \"closed_reason\", \"lease_expired\")\n          end\n        end\n      end\n    end\n  end\nend\n\n-- Validates lease AND atomically marks expired if lease has lapsed.\n-- Use this variant for write-path callers (complete, fail, suspend, delay,\n-- move_to_waiting_children) that have the lease_history key available.\n-- For read-only callers (append_frame, report_usage) use validate_lease.\n-- @param core   table from hgetall_to_table\n-- @param argv   table with lease_id, lease_epoch, attempt_id\n-- @param now_ms current timestamp in milliseconds\n-- @param keys   table with core_key, lease_history_key\n-- @param maxlen MAXLEN for lease_history stream\nlocal function validate_lease_and_mark_expired(core, argv, now_ms, keys, maxlen)\n  if core.lifecycle_phase ~= \"active\" then\n    -- Enriched error detail lets the SDK reconcile a replay of a terminal\n    -- operation after a network drop: if the caller\'s (lease_epoch,\n    -- attempt_id) match what\'s stored and the outcome matches what they\n    -- asked for, treat the \"error\" as a successful replay. See\n    -- parse_terminal_replay() on the Rust side. Detail slots:\n    --   idx 2: terminal_outcome         (e.g. \"success\", \"failed\", \"cancelled\", \"none\")\n    --   idx 3: current_lease_epoch      (persists across terminal; cleared for retry-scheduled)\n    --   idx 4: lifecycle_phase          (\"terminal\" vs \"runnable\" disambiguates\n    --                                    terminal_failed from retry_scheduled replay)\n    --   idx 5: current_attempt_id       (preserved on terminal, cleared on retry;\n    --                                    per-attempt replay guard)\n    return err(\"execution_not_active\",\n      core.terminal_outcome or \"\",\n      core.current_lease_epoch or \"\",\n      core.lifecycle_phase or \"\",\n      core.current_attempt_id or \"\")\n  end\n  if core.ownership_state == \"lease_revoked\" then\n    return err(\"lease_revoked\")\n  end\n  if tonumber(core.lease_expires_at or \"0\") <= now_ms then\n    mark_expired(keys, core, now_ms, maxlen)\n    return err(\"lease_expired\")\n  end\n  if core.current_lease_id ~= argv.lease_id then\n    return err(\"stale_lease\")\n  end\n  if core.current_lease_epoch ~= argv.lease_epoch then\n    return err(\"stale_lease\")\n  end\n  if core.current_attempt_id ~= argv.attempt_id then\n    return err(\"stale_lease\")\n  end\n  return nil\nend\n\n-- RFC #58.5 \u{2014} resolve the (lease_id, lease_epoch, attempt_id) fence triple.\n-- Returns (fence_table, must_check) on success, or (nil, err_table) on a\n-- partial triple (programming error \u{2014} caller passed some but not all three).\n--\n-- Semantics:\n--   * All three present (non-empty) \u{2192} fence triple, must_check=true.\n--     Caller is expected to run validate_lease_and_mark_expired next.\n--   * All three empty               \u{2192} server-resolved from exec_core,\n--                                      must_check=false. Caller decides\n--                                      whether unfenced mode is allowed\n--                                      (terminal ops gate on `source`;\n--                                      renew/suspend hard-reject).\n--   * Any mix of set/empty          \u{2192} err(\"partial_fence_triple\").\n--\n-- @param core  hgetall_to_table(exec_core)\n-- @param argv  table with .lease_id, .lease_epoch, .attempt_id (strings)\nlocal function resolve_lease_fence(core, argv)\n  local has_id = is_set(argv.lease_id)\n  local has_ep = is_set(argv.lease_epoch)\n  local has_at = is_set(argv.attempt_id)\n  if has_id or has_ep or has_at then\n    if not (has_id and has_ep and has_at) then\n      return nil, err(\"partial_fence_triple\")\n    end\n    return {\n      lease_id    = argv.lease_id,\n      lease_epoch = argv.lease_epoch,\n      attempt_id  = argv.attempt_id,\n    }, true\n  end\n  return {\n    lease_id    = core.current_lease_id    or \"\",\n    lease_epoch = core.current_lease_epoch or \"\",\n    attempt_id  = core.current_attempt_id  or \"\",\n  }, false\nend\n\n-- Consolidates the ~15-line lease release block shared by 7 functions.\n-- DEL lease_current, ZREM lease_expiry + worker_leases + active_index,\n-- clear lease fields on exec_core, XADD lease_history \"released\".\n-- @param keys    table with lease_current_key, lease_expiry_key,\n--                worker_leases_key, active_index_key, lease_history_key,\n--                attempt_timeout_key, core_key\n-- @param core    table from hgetall_to_table\n-- @param reason  string reason for release (e.g. \"completed\", \"suspend\")\n-- @param now_ms  current timestamp in milliseconds\n-- @param maxlen  MAXLEN for lease_history stream\nlocal function clear_lease_and_indexes(keys, core, reason, now_ms, maxlen)\n  local eid = core.execution_id or \"\"\n\n  -- DEL lease record\n  redis.call(\"DEL\", keys.lease_current_key)\n\n  -- ZREM/SREM from scheduling indexes\n  redis.call(\"ZREM\", keys.lease_expiry_key, eid)\n  redis.call(\"SREM\", keys.worker_leases_key, eid)\n  redis.call(\"ZREM\", keys.active_index_key, eid)\n  redis.call(\"ZREM\", keys.attempt_timeout_key, eid)\n\n  -- Clear lease fields on exec_core (including stale expiry/revocation markers)\n  redis.call(\"HSET\", keys.core_key,\n    \"current_lease_id\", \"\",\n    \"current_worker_id\", \"\",\n    \"current_worker_instance_id\", \"\",\n    \"lease_expires_at\", \"\",\n    \"lease_last_renewed_at\", \"\",\n    \"lease_renewal_deadline\", \"\",\n    \"lease_expired_at\", \"\",\n    \"lease_revoked_at\", \"\",\n    \"lease_revoke_reason\", \"\",\n    \"last_mutation_at\", now_ms)\n\n  -- Lease history event\n  redis.call(\"XADD\", keys.lease_history_key, \"MAXLEN\", \"~\", maxlen, \"*\",\n    \"event\", \"released\",\n    \"lease_id\", core.current_lease_id or \"\",\n    \"lease_epoch\", core.current_lease_epoch or \"\",\n    \"attempt_index\", core.current_attempt_index or \"\",\n    \"attempt_id\", core.current_attempt_id or \"\",\n    \"reason\", reason,\n    \"ts\", now_ms)\nend\n\n---------------------------------------------------------------------------\n-- Defensive index cleanup\n-- RFC-010 \u{a7}4.8: ZREM execution_id from all scheduling + timeout indexes\n-- except except_key. ~14 ZREM/SREM calls.\n---------------------------------------------------------------------------\n\n-- @param keys       table with all index key names\n-- @param eid        execution_id string\n-- @param except_key optional key to skip (the target index for this transition)\nlocal function defensive_zrem_all_indexes(keys, eid, except_key)\n  -- Each index key and whether it uses ZREM or SREM\n  local zrem_keys = {\n    keys.eligible_key,\n    keys.delayed_key,\n    keys.active_index_key,\n    keys.suspended_key,\n    keys.terminal_key,\n    keys.blocked_deps_key,\n    keys.blocked_budget_key,\n    keys.blocked_quota_key,\n    keys.blocked_route_key,\n    keys.blocked_operator_key,\n    keys.lease_expiry_key,\n    keys.suspension_timeout_key,\n    keys.attempt_timeout_key,\n    keys.execution_deadline_key,\n  }\n  for _, k in ipairs(zrem_keys) do\n    if k and k ~= except_key then\n      redis.call(\"ZREM\", k, eid)\n    end\n  end\n  -- worker_leases is a SET, not ZSET\n  if keys.worker_leases_key and keys.worker_leases_key ~= except_key then\n    redis.call(\"SREM\", keys.worker_leases_key, eid)\n  end\nend\n\n---------------------------------------------------------------------------\n-- Suspension reason \u{2192} blocking_reason mapping\n-- RFC-004 \u{a7}Suspension Reason Categories\n---------------------------------------------------------------------------\n\nlocal REASON_TO_BLOCKING = {\n  waiting_for_signal       = \"waiting_for_signal\",\n  waiting_for_approval     = \"waiting_for_approval\",\n  waiting_for_callback     = \"waiting_for_callback\",\n  waiting_for_tool_result  = \"waiting_for_tool_result\",\n  waiting_for_operator_review = \"paused_by_operator\",\n  paused_by_policy         = \"paused_by_policy\",\n  paused_by_budget         = \"waiting_for_budget\",\n  step_boundary            = \"waiting_for_signal\",\n  manual_pause             = \"paused_by_operator\",\n}\n\nlocal function map_reason_to_blocking(reason_code)\n  return REASON_TO_BLOCKING[reason_code] or \"waiting_for_signal\"\nend\n\n---------------------------------------------------------------------------\n-- Resume condition evaluation (shared module)\n-- RFC-004 \u{a7}Resume Condition Model, RFC-005 \u{a7}8.3\n---------------------------------------------------------------------------\n\n-- Parse resume_condition_json into a matcher table used by\n-- evaluate_signal_against_condition and is_condition_satisfied.\n-- @param json  JSON string of the resume condition\n-- @return table with matchers array, match_mode, minimum_signal_count, etc.\n--   For composite conditions (RFC-014) the returned table also carries\n--   `composite = true` and a parsed `tree` spec; the matcher fields\n--   remain populated (empty) so legacy diagnostics that read\n--   `satisfied_count` / `total_matchers` on the wp_condition hash still\n--   work without crashing.\nlocal function initialize_condition(json)\n  local spec = cjson.decode(json)\n  -- RFC-014: composite path. The Rust-side serializer sets\n  -- `composite = true` on multi-signal conditions. Legacy single/\n  -- operator/timeout paths continue to use the matcher array below.\n  if spec.composite then\n    if spec.v and tonumber(spec.v) ~= 1 then\n      -- Future-proofing: unknown version. RFC-014 \u{a7}7.2 rejects v>1.\n      return {\n        condition_type       = \"composite\",\n        composite            = true,\n        invalid              = \"invalid_resume_condition\",\n        matchers             = {},\n        total_matchers       = 0,\n        satisfied_count      = 0,\n        closed               = false,\n      }\n    end\n    return {\n      condition_type       = \"composite\",\n      composite            = true,\n      tree                 = spec.tree,\n      match_mode           = \"composite\",\n      minimum_signal_count = 1,\n      total_matchers       = 0,\n      satisfied_count      = 0,\n      matchers             = {},\n      closed               = false,\n    }\n  end\n  local matchers = {}\n  local names = spec.required_signal_names or {}\n  if #names == 0 then\n    -- Empty required_signal_names acts as wildcard: ANY signal satisfies the condition.\n    -- To require explicit operator resume (no signal match), pass a sentinel name\n    -- that no real signal will match, or use a different resume mechanism.\n    matchers[1] = { name = \"\", satisfied = false, signal_id = \"\" }\n  else\n    for i, name in ipairs(names) do\n      matchers[i] = { name = name, satisfied = false, signal_id = \"\" }\n    end\n  end\n  return {\n    condition_type       = spec.condition_type or \"signal_set\",\n    match_mode           = spec.signal_match_mode or \"any\",\n    minimum_signal_count = tonumber(spec.minimum_signal_count or \"1\"),\n    total_matchers       = #names > 0 and #names or 1,\n    satisfied_count      = 0,\n    matchers             = matchers,\n    closed               = false,\n  }\nend\n\n-- Write condition state to a dedicated condition hash key.\n-- @param key    Valkey key for the condition hash\n-- @param cond   condition table from initialize_condition\n-- @param now_ms current timestamp\nlocal function write_condition_hash(key, cond, now_ms)\n  -- RFC-014 composite path: write a minimal marker + the parsed tree.\n  -- The composite evaluator reads `composite` + `tree_json` at signal\n  -- delivery time; legacy `satisfied_count` / `total_matchers` fields\n  -- stay zero so operator diagnostics that read them don\'t blow up.\n  if cond.composite then\n    redis.call(\"HSET\", key,\n      \"condition_type\", \"composite\",\n      \"composite\", \"1\",\n      \"match_mode\", \"composite\",\n      \"minimum_signal_count\", \"1\",\n      \"total_matchers\", \"0\",\n      \"satisfied_count\", \"0\",\n      \"closed\", cond.closed and \"1\" or \"0\",\n      \"updated_at\", tostring(now_ms),\n      \"tree_json\", cond.tree and cjson.encode(cond.tree) or \"\")\n    return\n  end\n  local fields = {\n    \"condition_type\", cond.condition_type,\n    \"match_mode\", cond.match_mode,\n    \"minimum_signal_count\", tostring(cond.minimum_signal_count),\n    \"total_matchers\", tostring(cond.total_matchers),\n    \"satisfied_count\", tostring(cond.satisfied_count),\n    \"closed\", cond.closed and \"1\" or \"0\",\n    \"updated_at\", tostring(now_ms),\n  }\n  for i = 1, cond.total_matchers do\n    local m = cond.matchers[i]\n    local idx = i - 1  -- external field names remain 0-based for wire compat\n    fields[#fields + 1] = \"matcher:\" .. idx .. \":name\"\n    fields[#fields + 1] = m.name\n    fields[#fields + 1] = \"matcher:\" .. idx .. \":satisfied\"\n    fields[#fields + 1] = m.satisfied and \"1\" or \"0\"\n    fields[#fields + 1] = \"matcher:\" .. idx .. \":signal_id\"\n    fields[#fields + 1] = m.signal_id\n  end\n  redis.call(\"HSET\", key, unpack(fields))\nend\n\n-- Match a signal against the condition\'s matchers. Mutates cond in-place.\n-- @param cond        condition table from initialize_condition\n-- @param signal_name signal name string\n-- @param signal_id   signal ID string\n-- @return true if this signal matched a matcher, false otherwise\nlocal function evaluate_signal_against_condition(cond, signal_name, signal_id)\n  for i = 1, cond.total_matchers do\n    local m = cond.matchers[i]\n    if not m.satisfied then\n      -- Empty name = wildcard matcher (matches any signal)\n      if m.name == \"\" or m.name == signal_name then\n        m.satisfied = true\n        m.signal_id = signal_id or \"\"\n        cond.satisfied_count = cond.satisfied_count + 1\n        return true\n      end\n    end\n  end\n  return false\nend\n\n-- Check if the overall condition is satisfied based on mode.\n-- @param cond  condition table\n-- @return true if condition is satisfied\nlocal function is_condition_satisfied(cond)\n  local mode = cond.match_mode\n  local min_count = cond.minimum_signal_count\n  if mode == \"any\" then\n    return cond.satisfied_count >= min_count\n  elseif mode == \"all\" then\n    return cond.satisfied_count >= cond.total_matchers\n  end\n  -- count(n) mode \u{2014} same as any with minimum_signal_count = n\n  return cond.satisfied_count >= min_count\nend\n\n---------------------------------------------------------------------------\n-- RFC-014 composite condition evaluator\n--\n-- Storage model (\u{a7}3.1):\n--   satisfied_set_key: SET of satisfier tokens (\"wp:<id>\" | \"sig:<id>\" |\n--                     \"src:<type>:<identity>\" | \"node:<path>\").\n--   member_map_key:   HASH of waitpoint_key \u{2192} node_path (write-once at\n--                     suspend-time, informational only under single-\n--                     waitpoint scoping \u{2014} see \u{a7}3.1).\n--\n-- Per-signal algorithm (\u{a7}3.3):\n--   1. Walk the tree, identifying nodes this signal may contribute to.\n--   2. For each such node, apply the local matcher (step 2.5); reject on\n--      mismatch with `signal_ignored_matcher_failed`.\n--   3. SADD the per-kind satisfier token to satisfied_set. Dedup returns\n--      `appended_to_waitpoint_duplicate`.\n--   4. Re-evaluate the tree: if the root is satisfied, emit\n--      `resume_condition_satisfied`.\n---------------------------------------------------------------------------\n\n-- Does a matcher accept this signal? Matches RFC-013 SignalMatcher shape.\n-- ByName compares by signal_name; Wildcard accepts any.\nlocal function matcher_accepts(matcher, signal_name)\n  if matcher == nil or type(matcher) ~= \"table\" then\n    return true\n  end\n  if matcher.kind == \"Wildcard\" or matcher.kind == nil then\n    return true\n  end\n  if matcher.kind == \"ByName\" then\n    return matcher.name == signal_name\n  end\n  return false\nend\n\n-- Compute the satisfier token for a Count node given CountKind + signal.\nlocal function count_satisfier_token(kind, signal, waitpoint_id)\n  if kind == \"DistinctSignals\" then\n    return \"sig:\" .. (signal.signal_id or \"\")\n  elseif kind == \"DistinctSources\" then\n    return \"src:\" .. (signal.source_type or \"\") .. \":\" .. (signal.source_identity or \"\")\n  end\n  -- DistinctWaitpoints (default)\n  return \"wp:\" .. (waitpoint_id or \"\")\nend\n\n-- Set-membership helper: tokens table keyed-by-name (in-memory snapshot).\nlocal function set_has(tokens, token)\n  return tokens[token] == true\nend\n\n-- Count how many satisfier tokens a Count node currently has in the set.\n-- For DistinctWaitpoints we count tokens of the form `wp:<id>` where <id>\n-- corresponds to any waitpoint_key in node.waitpoints. For\n-- DistinctSignals / DistinctSources we count tokens of the right prefix\n-- that were SADDed while delivering a signal matching this node (scoped\n-- correctness relies on the per-node matcher filter at step 2.5).\nlocal function count_node_satisfiers(node, tokens)\n  local kind = node.count_kind or \"DistinctWaitpoints\"\n  local count = 0\n  -- For single-waitpoint composites (current scope), `tokens` contains\n  -- the satisfier tokens of signals that landed on THIS node after\n  -- passing the matcher filter. The tokens are already namespaced by\n  -- the delivery routine so counting by prefix is sufficient.\n  local prefix\n  if kind == \"DistinctWaitpoints\" then\n    prefix = \"wp:\"\n  elseif kind == \"DistinctSignals\" then\n    prefix = \"sig:\"\n  else\n    prefix = \"src:\"\n  end\n  local plen = #prefix\n  for token, _ in pairs(tokens) do\n    if string.sub(token, 1, plen) == prefix then\n      count = count + 1\n    end\n  end\n  return count\nend\n\n-- Evaluate a node against the current satisfied-set snapshot.\n-- @param node    parsed tree node\n-- @param tokens  table<string, true> snapshot of satisfied_set\n-- @return true if satisfied\nlocal function composite_evaluate_node(node, tokens)\n  if node == nil then return false end\n  local kind = node.kind\n  if kind == \"Single\" then\n    -- Leaf: the wp:<id> token is the satisfaction marker (RFC \u{a7}3.2).\n    -- We look up by path instead so a same-waitpoint AllOf can\n    -- disambiguate by matcher; the delivery routine writes\n    -- `leaf:<path>` tokens for satisfied Single-under-AllOf leaves.\n    return set_has(tokens, \"leaf:\" .. (node.path or \"\"))\n  elseif kind == \"AllOf\" then\n    local members = node.members or {}\n    for _, child in ipairs(members) do\n      if child.kind == \"Single\" then\n        if not set_has(tokens, \"leaf:\" .. (child.path or \"\")) then\n          return false\n        end\n      else\n        if not set_has(tokens, \"node:\" .. (child.path or \"\")) then\n          return false\n        end\n      end\n    end\n    return true\n  elseif kind == \"Count\" then\n    -- Timeout token short-circuits a Count (RFC \u{a7}6.1).\n    if tokens[\"timeout:*\"] == true then return true end\n    return count_node_satisfiers(node, tokens) >= (tonumber(node.n) or 1)\n  end\n  -- NeverBySignal / unknown\n  return false\nend\n\n-- Load satisfied_set contents into an in-memory table<string, true>.\nlocal function load_satisfied_set(satisfied_set_key)\n  local members = redis.call(\"SMEMBERS\", satisfied_set_key)\n  local t = {}\n  for _, m in ipairs(members) do\n    t[m] = true\n  end\n  return t\nend\n\n-- Find nodes in the tree that this signal may contribute to. Returns a\n-- list of {node, ancestors} where ancestors is the path from the node\n-- upward to the root (exclusive of the node itself).\nlocal function composite_collect_candidate_nodes(tree, waitpoint_key, signal_name)\n  -- For single-waitpoint composites, every Single/Count node whose\n  -- waitpoint_key (or waitpoints list) contains `waitpoint_key` is a\n  -- candidate. Walk the tree DFS, tracking ancestors.\n  local results = {}\n  local function walk(node, ancestors)\n    if node == nil then return end\n    local kind = node.kind\n    if kind == \"Single\" then\n      if node.waitpoint_key == waitpoint_key then\n        results[#results + 1] = { node = node, ancestors = ancestors }\n      end\n    elseif kind == \"Count\" then\n      local matches_wp = false\n      for _, wk in ipairs(node.waitpoints or {}) do\n        if wk == waitpoint_key then matches_wp = true; break end\n      end\n      if matches_wp then\n        results[#results + 1] = { node = node, ancestors = ancestors }\n      end\n    elseif kind == \"AllOf\" then\n      local nested = {}\n      for i = 1, #ancestors do nested[i] = ancestors[i] end\n      nested[#nested + 1] = node\n      for _, child in ipairs(node.members or {}) do\n        walk(child, nested)\n      end\n    end\n  end\n  walk(tree, {})\n  return results\nend\n\n-- Emit initial member_map for operator diagnostics (RFC \u{a7}4.4).\n-- Accepts a single waitpoint_key (patterns 1 + 2) or a list of\n-- waitpoint_keys (RFC-014 Pattern 3). Every candidate node path for\n-- each key maps back to that key. Write-once at suspend-time.\nlocal function seed_composite_member_map(member_map_key, tree, waitpoint_keys)\n  if type(waitpoint_keys) == \"string\" then\n    waitpoint_keys = { waitpoint_keys }\n  end\n  local fields = {}\n  for _, wk in ipairs(waitpoint_keys or {}) do\n    local cands = composite_collect_candidate_nodes(tree, wk, nil)\n    for _, c in ipairs(cands) do\n      fields[#fields + 1] = \"wp:\" .. wk .. \":\" .. (c.node.path or \"\")\n      fields[#fields + 1] = c.node.path or \"\"\n    end\n  end\n  if #fields > 0 then\n    redis.call(\"HSET\", member_map_key, unpack(fields))\n  end\nend\n\n-- Deliver a signal against a composite condition. Returns a table:\n--   { effect = <string>, resume = <bool>, closer = <signal_id>,\n--     all_satisfiers_json = <string> }\n-- satisfied_set_key / member_map_key are RFC-014 \u{a7}3.1 keys.\nlocal function composite_deliver_signal(\n  tree, satisfied_set_key, member_map_key, waitpoint_id, waitpoint_key,\n  signal)\n  local tokens = load_satisfied_set(satisfied_set_key)\n  local candidates = composite_collect_candidate_nodes(tree, waitpoint_key, signal.signal_name)\n  if #candidates == 0 then\n    return { effect = \"signal_ignored_not_in_condition\", resume = false }\n  end\n\n  local added_any = false\n  local matcher_failed_all = true\n  -- RFC \u{a7}3.3 step 2.5: per-node matcher filter. If ANY candidate node\'s\n  -- matcher accepts this signal, we proceed; if all reject, bail with\n  -- `signal_ignored_matcher_failed`.\n  for _, c in ipairs(candidates) do\n    local node = c.node\n    local matcher = node.matcher\n    if matcher_accepts(matcher, signal.signal_name) then\n      matcher_failed_all = false\n      local token\n      if node.kind == \"Single\" then\n        token = \"leaf:\" .. (node.path or \"\")\n      else -- Count\n        token = count_satisfier_token(node.count_kind, signal, waitpoint_id)\n      end\n      local added = redis.call(\"SADD\", satisfied_set_key, token)\n      if added == 1 then\n        added_any = true\n        -- Track the signal id for `all_satisfier_signals` emission.\n        redis.call(\"SADD\", satisfied_set_key .. \":signals\", signal.signal_id)\n      end\n    end\n  end\n\n  if matcher_failed_all then\n    return { effect = \"signal_ignored_matcher_failed\", resume = false }\n  end\n  if not added_any then\n    return { effect = \"appended_to_waitpoint_duplicate\", resume = false }\n  end\n\n  -- Re-load + re-evaluate (cheap: depth \u{2264} 4).\n  tokens = load_satisfied_set(satisfied_set_key)\n\n  -- Propagate non-leaf child satisfaction upward: for each AllOf\n  -- ancestor whose child is now satisfied, SADD `node:<child_path>`.\n  -- Bounded by depth.\n  local function propagate(node)\n    if node == nil or node.kind ~= \"AllOf\" then return end\n    for _, child in ipairs(node.members or {}) do\n      if child.kind ~= \"Single\" then\n        if composite_evaluate_node(child, tokens) then\n          local added2 = redis.call(\"SADD\", satisfied_set_key, \"node:\" .. (child.path or \"\"))\n          if added2 == 1 then\n            tokens[\"node:\" .. (child.path or \"\")] = true\n          end\n          propagate(child)\n        end\n      end\n    end\n  end\n  propagate(tree)\n\n  local root_sat = composite_evaluate_node(tree, tokens)\n  if root_sat then\n    -- Gather satisfier signals (the ids we SADDed during this and prior\n    -- deliveries).\n    local sig_ids = redis.call(\"SMEMBERS\", satisfied_set_key .. \":signals\")\n    return {\n      effect = \"resume_condition_satisfied\",\n      resume = true,\n      closer = signal.signal_id,\n      all_satisfiers_json = cjson.encode(sig_ids),\n    }\n  end\n  return { effect = \"appended_to_waitpoint\", resume = false }\nend\n\n-- Clear composite keys on suspension termination (\u{a7}3.1.1).\nlocal function composite_cleanup(satisfied_set_key, member_map_key)\n  redis.call(\"DEL\", satisfied_set_key)\n  redis.call(\"DEL\", satisfied_set_key .. \":signals\")\n  redis.call(\"DEL\", member_map_key)\nend\n\n-- RFC-014 Pattern 3 \u{2014} close per-extra waitpoint storage on\n-- cancel/expire/resume. The extras list is stored in\n-- `suspension_current.additional_waitpoints_json` as a JSON array of\n-- {waitpoint_id, waitpoint_key} pairs at suspend-time. Cleanup owners\n-- reconstruct the wp_hash + wp_condition keys dynamically from the\n-- suspension\'s hash tag (same trick ff_cancel_execution uses for lane\n-- keys) so no KEYS arity growth is needed.\n--\n-- @param suspension_current_key  e.g. \"ff:exec:{p:12}:E-...:suspension:current\"\n-- @param additional_json         value of HGET suspension_current additional_waitpoints_json\n-- @param close_state_fields      table of HSET fields to apply to each extra waitpoint hash\n--                                (e.g. {\"state\",\"closed\",\"close_reason\",\"cancelled\",\"closed_at\",\"<now>\"})\n-- @param close_cond_fields       table of HSET fields to apply to each extra wp_condition hash\nlocal function close_additional_waitpoints(suspension_current_key, additional_json,\n                                           close_state_fields, close_cond_fields)\n  if not additional_json or additional_json == \"\" or additional_json == \"[]\" then\n    return\n  end\n  local ok_dec, pairs_list = pcall(cjson.decode, additional_json)\n  if not ok_dec or type(pairs_list) ~= \"table\" then return end\n  local tag = string.match(suspension_current_key, \"(%b{})\")\n  if not tag then return end\n  for _, entry in ipairs(pairs_list) do\n    local wp_id = entry.waitpoint_id\n    if type(wp_id) == \"string\" and wp_id ~= \"\" then\n      local wp_hash_key = \"ff:wp:\" .. tag .. \":\" .. wp_id\n      local wp_cond_key = \"ff:wp:\" .. tag .. \":\" .. wp_id .. \":condition\"\n      if redis.call(\"EXISTS\", wp_hash_key) == 1 and close_state_fields and #close_state_fields > 0 then\n        redis.call(\"HSET\", wp_hash_key, unpack(close_state_fields))\n      end\n      if redis.call(\"EXISTS\", wp_cond_key) == 1 and close_cond_fields and #close_cond_fields > 0 then\n        redis.call(\"HSET\", wp_cond_key, unpack(close_cond_fields))\n      end\n    end\n  end\nend\n\n-- Extract a named field from a Valkey Stream entry\'s flat field array.\n-- Stream entries return {id, {field1, val1, field2, val2, ...}}.\n-- This operates on the inner flat array.\n-- @param fields flat array from stream entry[2]\n-- @param name   field name to extract\n-- @return value string or nil\nlocal function extract_field(fields, name)\n  for i = 1, #fields, 2 do\n    if fields[i] == name then\n      return fields[i + 1]\n    end\n  end\n  return nil\nend\n\n---------------------------------------------------------------------------\n-- Suspension helpers (RFC-004)\n---------------------------------------------------------------------------\n\n-- Returns an empty signal summary JSON string for initial suspension record.\nlocal function initial_signal_summary_json()\n  return \'{\"total_count\":0,\"matched_count\":0,\"signal_names\":[]}\'\nend\n\n-- Validates that a pending waitpoint can be activated by a suspension.\n-- Returns error tuple on failure, nil on success.\n-- @param wp_raw   flat array from HGETALL on waitpoint hash\n-- @param eid      expected execution_id\n-- @param att_idx  expected attempt_index (string)\n-- @param now_ms   current timestamp\nlocal function validate_pending_waitpoint(wp_raw, eid, att_idx, now_ms)\n  if #wp_raw == 0 then\n    return err(\"waitpoint_not_found\")\n  end\n  local wp = hgetall_to_table(wp_raw)\n  if wp.state ~= \"pending\" then\n    return err(\"waitpoint_not_pending\")\n  end\n  if wp.execution_id ~= eid then\n    return err(\"invalid_waitpoint_for_execution\")\n  end\n  if tostring(wp.attempt_index) ~= tostring(att_idx) then\n    return err(\"invalid_waitpoint_for_execution\")\n  end\n  -- Check if pending waitpoint has expired\n  if is_set(wp.expires_at) and tonumber(wp.expires_at) <= now_ms then\n    return err(\"pending_waitpoint_expired\")\n  end\n  return nil\nend\n\n-- Validates that a suspension record exists and is open (not closed).\n-- Returns error tuple on failure, nil on success. Also returns the parsed table.\n-- @param susp_raw flat array from HGETALL on suspension:current\nlocal function assert_active_suspension(susp_raw)\n  if #susp_raw == 0 then\n    return err(\"execution_not_suspended\")\n  end\n  local susp = hgetall_to_table(susp_raw)\n  if not is_set(susp.suspension_id) then\n    return err(\"execution_not_suspended\")\n  end\n  if is_set(susp.closed_at) then\n    return err(\"execution_not_suspended\")\n  end\n  return nil, susp\nend\n\n-- Validates that a waitpoint belongs to the expected execution + suspension.\n-- Returns error tuple on failure, nil on success.\n-- @param wp_raw   flat array from HGETALL on waitpoint hash\n-- @param eid      expected execution_id\n-- @param sid      expected suspension_id\n-- @param wid      expected waitpoint_id\nlocal function assert_waitpoint_belongs(wp_raw, eid, sid, wid)\n  if #wp_raw == 0 then\n    return err(\"waitpoint_not_found\")\n  end\n  local wp = hgetall_to_table(wp_raw)\n  if wp.execution_id ~= eid then\n    return err(\"invalid_waitpoint_for_execution\")\n  end\n  if is_set(sid) and wp.suspension_id ~= sid then\n    return err(\"invalid_waitpoint_for_execution\")\n  end\n  if is_set(wid) and wp.waitpoint_id ~= wid then\n    return err(\"invalid_waitpoint_for_execution\")\n  end\n  return nil\nend\n\n---------------------------------------------------------------------------\n-- Policy\n---------------------------------------------------------------------------\n\n-- Decode a JSON policy string into flat key-value pairs suitable for HSET.\n-- @param json  JSON string of the policy object\n-- @return flat array {k1, v1, k2, v2, ...} for use with redis.call(\"HSET\", key, unpack(...))\nlocal function unpack_policy(json)\n  local policy = cjson.decode(json)\n  local flat = {}\n  for k, v in pairs(policy) do\n    flat[#flat + 1] = k\n    if type(v) == \"table\" then\n      flat[#flat + 1] = cjson.encode(v)\n    else\n      flat[#flat + 1] = tostring(v)\n    end\n  end\n  return flat\nend\n\n\n-- source: lua/version.lua\n-- FlowFabric library version check\n-- Returns the library version string. Used by the loader to detect\n-- whether the library is loaded and at the expected version.\n--\n-- Bump this string whenever any registered function\'s KEYS or ARGV arity\n-- changes, or a new function is added. Mismatched versions force\n-- `FUNCTION LOAD REPLACE` so old binaries cannot FCALL a library whose key\n-- signatures they expect a different shape for.\n--\n-- SINGLE SOURCE OF TRUTH: Rust\'s `LIBRARY_VERSION` is extracted from the\n-- `return \'X\'` literal below by `scripts/gen-ff-script-lua.sh`, which\n-- writes `crates/ff-script/src/flowfabric_lua_version`. Rust reads that\n-- file via `include_str!`. Do NOT maintain a separate Rust literal.\n-- Extract contract: the body MUST contain exactly one `return \'X\'` literal\n-- with single quotes (not double). CI runs the gen script and diffs; any\n-- drift fails the build.\n\nredis.register_function(\'ff_version\', function(keys, args)\n  return \'27\'\nend)\n\n\n-- source: lua/lease.lua\n-- FlowFabric lease management functions\n-- Reference: RFC-003 (Lease and Fencing), RFC-010 \u{a7}4 (function inventory)\n--\n-- Depends on helpers: ok, err, ok_already_satisfied, hgetall_to_table,\n--   is_set, validate_lease, mark_expired\n\n---------------------------------------------------------------------------\n-- #6  ff_renew_lease\n--\n-- Extends a still-valid lease.  Preserves lease_id and lease_epoch.\n-- KEYS (4): exec_core, lease_current, lease_history, lease_expiry_zset\n-- ARGV (7): execution_id, attempt_index, attempt_id,\n--           lease_id, lease_epoch,\n--           lease_ttl_ms, lease_history_grace_ms\n---------------------------------------------------------------------------\nredis.register_function(\'ff_renew_lease\', function(keys, args)\n  -- Positional KEYS\n  local K = {\n    core_key         = keys[1],\n    lease_current    = keys[2],\n    lease_history    = keys[3],\n    lease_expiry_key = keys[4],\n  }\n\n  -- Positional ARGV\n  local lease_ttl_n = require_number(args[6], \"lease_ttl_ms\")\n  if type(lease_ttl_n) == \"table\" then return lease_ttl_n end\n  local grace_n = require_number(args[7], \"lease_history_grace_ms\")\n  if type(grace_n) == \"table\" then return grace_n end\n\n  local A = {\n    execution_id         = args[1],\n    attempt_index        = args[2],\n    attempt_id           = args[3] or \"\",\n    lease_id             = args[4] or \"\",\n    lease_epoch          = args[5] or \"\",\n    lease_ttl_ms         = lease_ttl_n,\n    lease_history_grace_ms = grace_n,\n  }\n\n  -- Server time\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- Read execution core\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then\n    return err(\"execution_not_found\")\n  end\n  local core = hgetall_to_table(raw)\n\n  -- RFC #58.5 \u{2014} renew_lease is only meaningful when the caller holds the\n  -- current lease. Hard-reject empty or partial fence triples; there is\n  -- no operator-override escape hatch (owner\'s directive \u{2014} cleaner\n  -- behavior than resolving-then-passing-self-match).\n  local fence, must_check_or_err = resolve_lease_fence(core, A)\n  if not fence then return must_check_or_err end\n  if not must_check_or_err then return err(\"fence_required\") end\n\n  -- Validate lifecycle\n  if core.lifecycle_phase ~= \"active\" then\n    return err(\"execution_not_active\",\n      core.terminal_outcome or \"\",\n      core.current_lease_epoch or \"\",\n      core.lifecycle_phase or \"\",\n      core.current_attempt_id or \"\")\n  end\n\n  -- Check revocation\n  if core.ownership_state == \"lease_revoked\" or is_set(core.lease_revoked_at) then\n    return err(\"lease_revoked\")\n  end\n\n  -- Check expiry (if expired, mark and reject)\n  if tonumber(core.lease_expires_at or \"0\") <= now_ms then\n    mark_expired(\n      { core_key = K.core_key, lease_history_key = K.lease_history },\n      core, now_ms, 1000)\n    return err(\"lease_expired\")\n  end\n\n  -- Validate caller identity\n  if tostring(core.current_attempt_index) ~= A.attempt_index then\n    return err(\"stale_lease\")\n  end\n  if core.current_attempt_id ~= A.attempt_id then\n    return err(\"stale_lease\")\n  end\n  if core.current_lease_id ~= A.lease_id then\n    return err(\"stale_lease\")\n  end\n  if tostring(core.current_lease_epoch) ~= A.lease_epoch then\n    return err(\"stale_lease\")\n  end\n\n  -- Compute new deadlines\n  local new_expires_at = now_ms + A.lease_ttl_ms\n  local new_renewal_deadline = now_ms + math.floor(A.lease_ttl_ms * 2 / 3)\n\n  -- Update exec_core\n  redis.call(\"HSET\", K.core_key,\n    \"lease_last_renewed_at\", tostring(now_ms),\n    \"lease_renewal_deadline\", tostring(new_renewal_deadline),\n    \"lease_expires_at\", tostring(new_expires_at),\n    \"last_mutation_at\", tostring(now_ms))\n\n  -- Update lease_current\n  redis.call(\"HSET\", K.lease_current,\n    \"last_renewed_at\", tostring(now_ms),\n    \"renewal_deadline\", tostring(new_renewal_deadline),\n    \"expires_at\", tostring(new_expires_at))\n  redis.call(\"PEXPIREAT\", K.lease_current,\n    new_expires_at + A.lease_history_grace_ms)\n\n  -- Update lease_expiry index\n  redis.call(\"ZADD\", K.lease_expiry_key, new_expires_at, A.execution_id)\n\n  -- Renewal history event OFF for v1 (RFC-010 \u{a7}4.8g).\n  -- The exec_core field lease_last_renewed_at provides the latest renewal\n  -- timestamp without stream overhead.  Enable per-lane when detailed\n  -- ownership audit trails are needed.\n\n  return ok(tostring(new_expires_at))\nend)\n\n---------------------------------------------------------------------------\n-- #28  ff_mark_lease_expired_if_due\n--\n-- Called by the lease expiry scanner.  Re-validates that the lease is\n-- actually expired (guards against renewal since the ZRANGEBYSCORE read).\n-- Idempotent: no-op if already expired/reclaimed/revoked or not yet due.\n--\n-- KEYS (4): exec_core, lease_current, lease_expiry_zset, lease_history\n-- ARGV (1): execution_id\n---------------------------------------------------------------------------\nredis.register_function(\'ff_mark_lease_expired_if_due\', function(keys, args)\n  -- Positional KEYS\n  local K = {\n    core_key         = keys[1],\n    lease_current    = keys[2],\n    lease_expiry_key = keys[3],\n    lease_history    = keys[4],\n  }\n\n  local execution_id = args[1]\n\n  -- Server time\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- Read execution core\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then\n    -- Execution gone \u{2014} clean up stale index entry\n    redis.call(\"ZREM\", K.lease_expiry_key, execution_id)\n    return ok_already_satisfied(\"execution_not_found\")\n  end\n  local core = hgetall_to_table(raw)\n\n  -- Guard: not active \u{2192} nothing to expire\n  if core.lifecycle_phase ~= \"active\" then\n    -- Stale index entry: execution already left active phase.\n    -- Clean up and return.\n    redis.call(\"ZREM\", K.lease_expiry_key, execution_id)\n    return ok_already_satisfied(\"not_active\")\n  end\n\n  -- Guard: already marked expired or ownership already cleared\n  if core.ownership_state == \"lease_expired_reclaimable\" then\n    return ok_already_satisfied(\"already_expired\")\n  end\n  if core.ownership_state == \"lease_revoked\" then\n    return ok_already_satisfied(\"already_revoked\")\n  end\n  if core.ownership_state == \"unowned\" then\n    -- Shouldn\'t happen for active phase, but defensive\n    redis.call(\"ZREM\", K.lease_expiry_key, execution_id)\n    return ok_already_satisfied(\"unowned\")\n  end\n\n  -- Check if lease is actually expired\n  local expires_at = tonumber(core.lease_expires_at or \"0\")\n  if expires_at > now_ms then\n    -- Lease was renewed since the scanner read the index.\n    -- The ZADD in renew_lease already updated the score.\n    return ok_already_satisfied(\"not_yet_expired\")\n  end\n\n  -- Mark expired using shared helper\n  -- Sets ownership_state=lease_expired_reclaimable, blocking_reason,\n  -- blocking_detail, lease_expired_at, XADD lease_history \"expired\" event.\n  mark_expired(\n    { core_key = K.core_key, lease_history_key = K.lease_history },\n    core, now_ms, 1000)\n\n  -- NOTE: Do NOT ZREM from lease_expiry_key here.\n  -- The entry stays so the scheduler (or a subsequent scanner cycle)\n  -- can discover reclaimable executions from the same index.\n  -- reclaim_execution or cancel_execution removes it.\n\n  return ok(\"marked_expired\")\nend)\n\n---------------------------------------------------------------------------\n-- #8  ff_revoke_lease\n--\n-- Operator-initiated lease revocation.  Immediate \u{2014} no grace period.\n-- Does NOT terminal-transition the execution.  It clears the current\n-- owner and places the execution into lease_revoked ownership condition.\n-- The scheduler or operator must subsequently reclaim or cancel.\n--\n-- KEYS (5): exec_core, lease_current, lease_history,\n--           lease_expiry_zset, worker_leases\n-- ARGV (3): execution_id, expected_lease_id (or \"\" to skip check),\n--           revoke_reason\n---------------------------------------------------------------------------\nredis.register_function(\'ff_revoke_lease\', function(keys, args)\n  -- Positional KEYS\n  local K = {\n    core_key         = keys[1],\n    lease_current    = keys[2],\n    lease_history    = keys[3],\n    lease_expiry_key = keys[4],\n    worker_leases    = keys[5],\n  }\n\n  -- Positional ARGV\n  local A = {\n    execution_id     = args[1],\n    expected_lease_id = args[2],\n    revoke_reason    = args[3],\n  }\n\n  -- Server time\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- Read execution core\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then\n    return err(\"execution_not_found\")\n  end\n  local core = hgetall_to_table(raw)\n\n  -- Must be active + leased\n  if core.lifecycle_phase ~= \"active\" then\n    return err(\"execution_not_active\",\n      core.terminal_outcome or \"\",\n      core.current_lease_epoch or \"\",\n      core.lifecycle_phase or \"\",\n      core.current_attempt_id or \"\")\n  end\n  if core.ownership_state ~= \"leased\" then\n    if core.ownership_state == \"lease_revoked\" then\n      return ok_already_satisfied(\"already_revoked\")\n    end\n    if core.ownership_state == \"lease_expired_reclaimable\" then\n      return ok_already_satisfied(\"already_expired\")\n    end\n    return err(\"no_active_lease\")\n  end\n\n  -- Optional lease_id check (for targeted revocation)\n  if is_set(A.expected_lease_id) and core.current_lease_id ~= A.expected_lease_id then\n    return err(\"stale_lease\",\n      \"expected \" .. A.expected_lease_id .. \" but current is \" .. (core.current_lease_id or \"\"))\n  end\n\n  -- Capture identity for history before clearing\n  local lease_id = core.current_lease_id or \"\"\n  local lease_epoch = core.current_lease_epoch or \"\"\n  local attempt_index = core.current_attempt_index or \"\"\n  local attempt_id = core.current_attempt_id or \"\"\n  local worker_id = core.current_worker_id or \"\"\n  local worker_instance_id = core.current_worker_instance_id or \"\"\n\n  -- Update exec_core: all 7 state vector dimensions + public_state\n  redis.call(\"HSET\", K.core_key,\n    -- 7 state vector dimensions\n    \"lifecycle_phase\",   \"active\",\n    \"ownership_state\",   \"lease_revoked\",\n    \"eligibility_state\", core.eligibility_state or \"not_applicable\",\n    \"blocking_reason\",   \"waiting_for_worker\",\n    \"blocking_detail\",   \"lease revoked: \" .. (A.revoke_reason or \"operator\"),\n    \"terminal_outcome\",  \"none\",\n    \"attempt_state\",     \"attempt_interrupted\",\n    -- Derived public state (RFC-001 \u{a7}2.4 D3: active \u{2192} public_state=active)\n    \"public_state\",      \"active\",\n    -- Revocation fields\n    \"lease_revoked_at\",    tostring(now_ms),\n    \"lease_revoke_reason\", A.revoke_reason or \"operator\",\n    \"last_mutation_at\",    tostring(now_ms))\n\n  -- DEL lease_current\n  redis.call(\"DEL\", K.lease_current)\n\n  -- ZREM from lease expiry index\n  redis.call(\"ZREM\", K.lease_expiry_key, A.execution_id)\n\n  -- SREM from worker leases set\n  redis.call(\"SREM\", K.worker_leases, A.execution_id)\n\n  -- Append revoked event to lease history\n  redis.call(\"XADD\", K.lease_history, \"MAXLEN\", \"~\", 1000, \"*\",\n    \"event\",              \"revoked\",\n    \"lease_id\",           lease_id,\n    \"lease_epoch\",        lease_epoch,\n    \"attempt_index\",      attempt_index,\n    \"attempt_id\",         attempt_id,\n    \"worker_id\",          worker_id,\n    \"worker_instance_id\", worker_instance_id,\n    \"reason\",             A.revoke_reason or \"operator\",\n    \"ts\",                 tostring(now_ms))\n\n  return ok(\"revoked\", lease_id, lease_epoch)\nend)\n-- NOTE: ff_issue_reclaim_grant is in scheduling.lua\n-- NOTE: ff_reclaim_execution is in execution.lua\n\n\n-- source: lua/execution.lua\n-- FlowFabric execution lifecycle functions\n-- Reference: RFC-001 (Execution), RFC-010 \u{a7}4 (function inventory)\n--\n-- Depends on helpers: ok, err, hgetall_to_table, is_set,\n--   validate_lease, validate_lease_and_mark_expired,\n--   clear_lease_and_indexes, defensive_zrem_all_indexes, unpack_policy\n\n---------------------------------------------------------------------------\n-- #0  ff_create_execution\n--\n-- Creates a new execution: core hash, payload, policy, tags, indexes.\n-- Idempotent via idempotency key (SET NX with TTL).\n--\n-- KEYS (8): exec_core, payload_key, policy_key, tags_key,\n--           eligible_or_delayed_zset, idem_key,\n--           execution_deadline_zset, all_executions_set\n-- ARGV (13): execution_id, namespace, lane_id, execution_kind,\n--            priority, creator_identity, policy_json,\n--            input_payload, delay_until, dedup_ttl_ms,\n--            tags_json, execution_deadline_at, partition_id\n---------------------------------------------------------------------------\nredis.register_function(\'ff_create_execution\', function(keys, args)\n  local K = {\n    core_key            = keys[1],\n    payload_key         = keys[2],\n    policy_key          = keys[3],\n    tags_key            = keys[4],\n    scheduling_zset     = keys[5],  -- eligible or delayed\n    idem_key            = keys[6],\n    deadline_zset       = keys[7],\n    all_executions_set  = keys[8],\n  }\n\n  local priority_n = require_number(args[5], \"priority\")\n  if type(priority_n) == \"table\" then return priority_n end\n\n  local A = {\n    execution_id        = args[1],\n    namespace           = args[2],\n    lane_id             = args[3],\n    execution_kind      = args[4],\n    priority            = priority_n,\n    creator_identity    = args[6],\n    policy_json         = args[7],\n    input_payload       = args[8],\n    delay_until         = args[9],   -- \"\" or ms timestamp\n    dedup_ttl_ms        = args[10],  -- \"\" or ms\n    tags_json           = args[11],  -- \"\" or JSON object\n    execution_deadline_at = args[12], -- \"\" or ms timestamp\n    partition_id        = args[13],\n  }\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- Clamp priority to [0, 9000]. The composite eligible-ZSET score formula\n  -- -(priority * 1_000_000_000_000) + created_at uses Lua doubles (IEEE 754,\n  -- 53-bit mantissa). priority > 9007 overflows the multiplication step;\n  -- priority > ~8300 overflows the combined score when created_at is added.\n  -- Clamping to 9000 keeps a safe margin while supporting 4+ orders of magnitude.\n  if A.priority < 0 then A.priority = 0 end\n  if A.priority > 9000 then A.priority = 9000 end\n\n  -- 1. Idempotency check (only when idem_key is a real key, not the noop placeholder)\n  if K.idem_key ~= \"\" and not string.find(K.idem_key, \"ff:noop:\") then\n    local existing = redis.call(\"GET\", K.idem_key)\n    if existing then\n      return ok_duplicate(existing)\n    end\n  end\n\n  -- 2. Guard: execution already exists\n  if redis.call(\"EXISTS\", K.core_key) == 1 then\n    return {1, \"DUPLICATE\", A.execution_id}\n  end\n\n  -- 2b. Policy JSON validation \u{2014} extract required_capabilities CSV now, BEFORE\n  -- any write, so an error aborts without leaving a half-written exec_core.\n  -- Fail-CLOSED on any malformed/typed input rather than silently defaulting\n  -- to the empty-required wildcard \u{2014} required_capabilities is security-\n  -- sensitive (an empty set matches any worker).\n  --\n  --   * An empty / \"{}\" policy_json \u{2192} no required_capabilities field written\n  --     \u{2192} wildcard match, intentional.\n  --   * A non-empty policy_json that fails cjson.decode \u{2192} fail with\n  --     invalid_policy_json. An operator who MEANT \"no policy\" passes \"\".\n  --   * routing_requirements.required_capabilities present but not an array\n  --     \u{2192} fail with invalid_capabilities:required_not_array.\n  --   * Any element that is not a non-empty string \u{2192} fail with\n  --     invalid_capabilities:required:non_string_token. Silent-drop on\n  --     `[\"gpu\", null, 42]` would erase real requirements.\n  --   * Comma in a token \u{2192} fail (reserved delimiter, see ff_issue_claim_grant).\n  --   * ASCII control byte (0x00-0x1F, 0x7F) or space (0x20) \u{2192} fail\n  --     (invalid_capabilities:required:control_or_whitespace). Mirrors the\n  --     Rust ingress in ff-sdk::FlowFabricWorker::connect and\n  --     ff-scheduler::Scheduler::claim_for_worker (R3 relaxed printable-ASCII\n  --     check to allow UTF-8 printable above 0x7F while still rejecting\n  --     whitespace/control). This is the \"last line of defense\" for admin\n  --     direct-HSET bypass: a required cap containing \"\\n\" or \"\\0\" is\n  --     impossible to type, impossible to debug, and would silently pin an\n  --     execution as unclaimable forever.\n  --   * Bounds: same 4096 bytes / 256 tokens ceiling as the worker CSV.\n  --\n  -- Note on UTF-8: the byte-range test rejects ASCII control + space but\n  -- accepts every byte \u{2265} 0x21 except 0x7F (DEL). UTF-8 multibyte sequences\n  -- use only bytes 0x80-0xBF (continuation) or 0xC0-0xFD (lead), all above\n  -- 0x7F, so i18n caps like \"\u{4e1c}\u{4eac}-gpu\" pass through intact. See RFC-009 \u{a7}7.5.\n  local required_caps_csv = nil\n  if is_set(A.policy_json) and A.policy_json ~= \"{}\" then\n    local ok_decode, policy = pcall(cjson.decode, A.policy_json)\n    if not ok_decode then\n      return err(\"invalid_policy_json\", \"malformed\")\n    end\n    if type(policy) == \"table\" and policy.routing_requirements ~= nil then\n      if type(policy.routing_requirements) ~= \"table\" then\n        return err(\"invalid_policy_json\", \"routing_requirements:not_object\")\n      end\n      local caps = policy.routing_requirements.required_capabilities\n      if caps ~= nil then\n        if type(caps) ~= \"table\" then\n          return err(\"invalid_capabilities\", \"required:not_array\")\n        end\n        local list = {}\n        for _, cap in ipairs(caps) do\n          if type(cap) ~= \"string\" then\n            return err(\"invalid_capabilities\", \"required:non_string_token\")\n          end\n          if #cap == 0 then\n            return err(\"invalid_capabilities\", \"required:empty_token\")\n          end\n          if string.find(cap, \",\", 1, true) then\n            return err(\"invalid_capabilities\", \"required:comma_in_token\")\n          end\n          -- Reject ASCII control (0x00-0x1F), DEL (0x7F), and space (0x20).\n          -- Iterating byte-by-byte: any byte in 0x00..=0x20 or == 0x7F fails.\n          for i = 1, #cap do\n            local b = cap:byte(i)\n            if b <= 0x20 or b == 0x7F then\n              return err(\"invalid_capabilities\", \"required:control_or_whitespace\")\n            end\n          end\n          list[#list + 1] = cap\n        end\n        if #list > CAPS_MAX_TOKENS then\n          return err(\"invalid_capabilities\", \"required:too_many_tokens\")\n        end\n        table.sort(list)\n        if #list > 0 then\n          local csv = table.concat(list, \",\")\n          if #csv > CAPS_MAX_BYTES then\n            return err(\"invalid_capabilities\", \"required:too_many_bytes\")\n          end\n          required_caps_csv = csv\n        end\n      end\n    end\n  end\n\n  -- 3. Determine initial eligibility\n  local lifecycle_phase = \"runnable\"\n  local eligibility_state, blocking_reason, blocking_detail, public_state\n  local is_delayed = is_set(A.delay_until) and tonumber(A.delay_until) > now_ms\n\n  if is_delayed then\n    eligibility_state = \"not_eligible_until_time\"\n    blocking_reason   = \"waiting_for_delay\"\n    blocking_detail   = \"delayed until \" .. A.delay_until\n    public_state      = \"delayed\"\n  else\n    eligibility_state = \"eligible_now\"\n    blocking_reason   = \"waiting_for_worker\"\n    blocking_detail   = \"\"\n    public_state      = \"waiting\"\n  end\n\n  -- 4. Create exec_core \u{2014} ALL 7 state vector dimensions\n  redis.call(\"HSET\", K.core_key,\n    \"execution_id\", A.execution_id,\n    \"namespace\", A.namespace,\n    \"lane_id\", A.lane_id,\n    \"execution_kind\", A.execution_kind,\n    \"partition_id\", A.partition_id,\n    \"priority\", A.priority,\n    \"creator_identity\", A.creator_identity,\n    -- 7 state vector dimensions\n    \"lifecycle_phase\", lifecycle_phase,\n    \"ownership_state\", \"unowned\",\n    \"eligibility_state\", eligibility_state,\n    \"blocking_reason\", blocking_reason,\n    \"blocking_detail\", blocking_detail,\n    \"terminal_outcome\", \"none\",\n    \"attempt_state\", \"pending_first_attempt\",\n    \"public_state\", public_state,\n    -- accounting\n    \"total_attempt_count\", \"0\",\n    \"current_attempt_index\", \"\",\n    \"current_attempt_id\", \"\",\n    \"current_lease_id\", \"\",\n    \"current_lease_epoch\", \"0\",\n    \"current_worker_id\", \"\",\n    \"current_worker_instance_id\", \"\",\n    \"current_lane\", A.lane_id,\n    \"retry_count\", \"0\",\n    \"replay_count\", \"0\",\n    \"lease_reclaim_count\", \"0\",\n    -- timestamps\n    \"created_at\", now_ms,\n    \"started_at\", \"\",\n    \"completed_at\", \"\",\n    \"last_transition_at\", now_ms,\n    \"last_mutation_at\", now_ms,\n    -- lease fields (cleared)\n    \"lease_acquired_at\", \"\",\n    \"lease_expires_at\", \"\",\n    \"lease_last_renewed_at\", \"\",\n    \"lease_renewal_deadline\", \"\",\n    \"lease_expired_at\", \"\",\n    \"lease_revoked_at\", \"\",\n    \"lease_revoke_reason\", \"\",\n    -- delay\n    \"delay_until\", is_delayed and A.delay_until or \"\",\n    -- suspension (cleared)\n    \"current_suspension_id\", \"\",\n    \"current_waitpoint_id\", \"\",\n    -- pending lineage (cleared)\n    \"pending_retry_reason\", \"\",\n    \"pending_replay_reason\", \"\",\n    \"pending_replay_requested_by\", \"\",\n    \"pending_previous_attempt_index\", \"\",\n    -- progress\n    \"progress_pct\", \"\",\n    \"progress_message\", \"\",\n    \"progress_updated_at\", \"\",\n    -- flow\n    \"flow_id\", \"\")\n\n  -- 5. Store payload\n  if is_set(A.input_payload) then\n    redis.call(\"SET\", K.payload_key, A.input_payload)\n  end\n\n  -- 6. Store policy + required_capabilities. Validation already ran before\n  -- any write (step 2b); here we only persist the pre-validated CSV. See\n  -- step 2b for the fail-closed rationale on malformed / typed inputs.\n  if is_set(A.policy_json) then\n    redis.call(\"SET\", K.policy_key, A.policy_json)\n  end\n  if required_caps_csv then\n    redis.call(\"HSET\", K.core_key, \"required_capabilities\", required_caps_csv)\n  end\n\n  -- 7. Store tags\n  if is_set(A.tags_json) and A.tags_json ~= \"{}\" then\n    local ok_decode, tags = pcall(cjson.decode, A.tags_json)\n    if ok_decode and type(tags) == \"table\" then\n      local flat = {}\n      for k, v in pairs(tags) do\n        flat[#flat + 1] = k\n        flat[#flat + 1] = tostring(v)\n      end\n      if #flat > 0 then\n        redis.call(\"HSET\", K.tags_key, unpack(flat))\n      end\n    end\n  end\n\n  -- 8. Add to scheduling index\n  if is_delayed then\n    redis.call(\"ZADD\", K.scheduling_zset, tonumber(A.delay_until), A.execution_id)\n  else\n    -- Composite score: -(priority * 1_000_000_000_000) + created_at_ms\n    local score = 0 - (A.priority * 1000000000000) + now_ms\n    redis.call(\"ZADD\", K.scheduling_zset, score, A.execution_id)\n  end\n\n  -- 9. SADD to all_executions partition index\n  redis.call(\"SADD\", K.all_executions_set, A.execution_id)\n\n  -- 10. Execution deadline index\n  if is_set(A.execution_deadline_at) then\n    redis.call(\"ZADD\", K.deadline_zset, tonumber(A.execution_deadline_at), A.execution_id)\n  end\n\n  -- 11. Set idempotency key with TTL\n  -- Guard: PX 0 or PX <0 causes Valkey error (\"invalid expire time\"),\n  -- which would abort the FCALL after exec_core was already written (step 4).\n  local dedup_ms = tonumber(A.dedup_ttl_ms) or 0\n  if dedup_ms > 0 and K.idem_key ~= \"\" and not string.find(K.idem_key, \"ff:noop:\") then\n    redis.call(\"SET\", K.idem_key, A.execution_id,\n      \"PX\", dedup_ms)\n  end\n\n  return ok(A.execution_id, public_state)\nend)\n\n---------------------------------------------------------------------------\n-- #1  ff_claim_execution (new attempt)\n--\n-- Consumes a claim grant, creates a new attempt + lease, transitions\n-- runnable \u{2192} active. Attempt type derived from exec_core attempt_state.\n--\n-- KEYS (14): exec_core, claim_grant, eligible_zset, lease_expiry_zset,\n--            worker_leases, attempt_hash, attempt_usage, attempt_policy,\n--            attempts_zset, lease_current, lease_history, active_index,\n--            attempt_timeout_zset, execution_deadline_zset\n-- ARGV (12): execution_id, worker_id, worker_instance_id, lane,\n--            capability_snapshot_hash, lease_id, lease_ttl_ms,\n--            renew_before_ms, attempt_id, attempt_policy_json,\n--            attempt_timeout_ms, execution_deadline_at\n--\n-- KNOWN LIMITATION (flow-cancel race): ff_claim_execution reads\n-- exec_core on {p:N} but cannot atomically read flow_core on {fp:N}\n-- (cross-slot). If ff_cancel_flow fired and its async member dispatch\n-- was dropped by a transient Valkey error, the member\'s exec_core has\n-- NOT yet been flipped to terminal \u{2014} so a worker may still claim it and\n-- run it to completion inside a cancelled flow. The flow\'s own\n-- public_flow_state is correctly terminal; only this one member escapes.\n-- Mitigations already in place:\n--  * cancel_flow(wait=true) avoids the bg dispatch entirely.\n--  * ff_apply_dependency_to_child rejects additions to terminal flows,\n--    so children cannot be added mid-cancel.\n--  * retention eventually trims the stale member.\n-- Full fix (flag on exec_core maintained by a broadcast loop) is a\n-- deliberate design change deferred past Batch A.\n---------------------------------------------------------------------------\nredis.register_function(\'ff_claim_execution\', function(keys, args)\n  local K = {\n    core_key          = keys[1],\n    claim_grant       = keys[2],\n    eligible_zset     = keys[3],\n    lease_expiry_key  = keys[4],\n    worker_leases_key = keys[5],\n    attempt_hash      = keys[6],\n    attempt_usage     = keys[7],\n    attempt_policy    = keys[8],\n    attempts_zset     = keys[9],\n    lease_current_key = keys[10],\n    lease_history_key = keys[11],\n    active_index_key  = keys[12],\n    attempt_timeout_key = keys[13],\n    execution_deadline_key = keys[14],\n  }\n\n  local lease_ttl_n = require_number(args[7], \"lease_ttl_ms\")\n  if type(lease_ttl_n) == \"table\" then return lease_ttl_n end\n  local renew_before_n = require_number(args[8], \"renew_before_ms\")\n  if type(renew_before_n) == \"table\" then return renew_before_n end\n\n  local A = {\n    execution_id         = args[1],\n    worker_id            = args[2],\n    worker_instance_id   = args[3],\n    lane                 = args[4],\n    capability_hash      = args[5],\n    lease_id             = args[6],\n    lease_ttl_ms         = lease_ttl_n,\n    renew_before_ms      = renew_before_n,\n    attempt_id           = args[9],\n    attempt_policy_json  = args[10],\n    attempt_timeout_ms   = args[11],  -- \"\" or ms\n    execution_deadline_at = args[12], -- \"\" or ms\n  }\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- 1. Validate execution state\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return err(\"execution_not_found\") end\n  local core = hgetall_to_table(raw)\n\n  if core.lifecycle_phase ~= \"runnable\" then return err(\"execution_not_leaseable\") end\n  if core.ownership_state ~= \"unowned\" then return err(\"lease_conflict\") end\n  if core.eligibility_state ~= \"eligible_now\" then return err(\"execution_not_leaseable\") end\n  if core.terminal_outcome ~= \"none\" then return err(\"execution_not_leaseable\") end\n\n  -- Defense-in-depth: invariant A3\n  if core.attempt_state == \"running_attempt\" then\n    return err(\"active_attempt_exists\")\n  end\n  -- Dispatch: resume-from-suspension must use claim_resumed_execution\n  if core.attempt_state == \"attempt_interrupted\" then\n    return err(\"use_claim_resumed_execution\")\n  end\n\n  -- 2. Validate and consume claim grant\n  local grant_raw = redis.call(\"HGETALL\", K.claim_grant)\n  if #grant_raw == 0 then return err(\"invalid_claim_grant\") end\n  local grant = hgetall_to_table(grant_raw)\n\n  if grant.worker_id ~= A.worker_id then\n    return err(\"invalid_claim_grant\")\n  end\n  if is_set(grant.grant_expires_at) and tonumber(grant.grant_expires_at) < now_ms then\n    return err(\"claim_grant_expired\")\n  end\n  redis.call(\"DEL\", K.claim_grant)\n\n  -- 3. Compute lease fields\n  local next_epoch = tonumber(core.current_lease_epoch or \"0\") + 1\n  local expires_at = now_ms + A.lease_ttl_ms\n  local renewal_deadline = now_ms + A.renew_before_ms\n  local next_att_idx = tonumber(core.total_attempt_count or \"0\")\n\n  -- 4. Derive attempt type from exec core attempt_state\n  local attempt_type = \"initial\"\n  local lineage_fields = {}\n  if core.attempt_state == \"pending_retry_attempt\" then\n    attempt_type = \"retry\"\n    lineage_fields = {\n      \"retry_reason\", core.pending_retry_reason or \"\",\n      \"previous_attempt_index\", core.pending_previous_attempt_index or \"\"\n    }\n  elseif core.attempt_state == \"pending_replay_attempt\" then\n    attempt_type = \"replay\"\n    lineage_fields = {\n      \"replay_reason\", core.pending_replay_reason or \"\",\n      \"replay_requested_by\", core.pending_replay_requested_by or \"\",\n      \"replayed_from_attempt_index\", core.pending_previous_attempt_index or \"\"\n    }\n  end\n\n  -- 5. Create attempt record\n  --    Construct actual attempt key from hash tag + computed index.\n  --    KEYS[6] is a placeholder (always index 0); on retry/replay the\n  --    actual index is > 0, so we build the real key dynamically.\n  --    All attempt keys share the same {p:N} hash tag \u{2192} same Cluster slot.\n  local tag = string.match(K.core_key, \"(%b{})\")\n  local att_key = \"ff:attempt:\" .. tag .. \":\" .. A.execution_id .. \":\" .. tostring(next_att_idx)\n  local att_usage_key = att_key .. \":usage\"\n  local att_policy_key = att_key .. \":policy\"\n\n  local attempt_fields = {\n    \"attempt_id\", A.attempt_id,\n    \"execution_id\", A.execution_id,\n    \"attempt_index\", tostring(next_att_idx),\n    \"attempt_type\", attempt_type,\n    \"attempt_state\", \"started\",\n    \"created_at\", tostring(now_ms),\n    \"started_at\", tostring(now_ms),\n    \"lease_id\", A.lease_id,\n    \"lease_epoch\", tostring(next_epoch),\n    \"worker_id\", A.worker_id,\n    \"worker_instance_id\", A.worker_instance_id\n  }\n  for _, v in ipairs(lineage_fields) do\n    attempt_fields[#attempt_fields + 1] = v\n  end\n  redis.call(\"HSET\", att_key, unpack(attempt_fields))\n  redis.call(\"ZADD\", K.attempts_zset, now_ms, tostring(next_att_idx))\n\n  -- 5b. Initialize attempt usage counters\n  redis.call(\"HSET\", att_usage_key,\n    \"last_usage_report_seq\", \"0\")\n\n  -- 5c. Store attempt policy snapshot\n  if is_set(A.attempt_policy_json) then\n    local policy_flat = unpack_policy(A.attempt_policy_json)\n    if #policy_flat > 0 then\n      redis.call(\"HSET\", att_policy_key, unpack(policy_flat))\n    end\n  end\n\n  -- 6. Create lease record\n  redis.call(\"DEL\", K.lease_current_key)\n  redis.call(\"HSET\", K.lease_current_key,\n    \"lease_id\", A.lease_id,\n    \"lease_epoch\", tostring(next_epoch),\n    \"execution_id\", A.execution_id,\n    \"attempt_id\", A.attempt_id,\n    \"worker_id\", A.worker_id,\n    \"worker_instance_id\", A.worker_instance_id,\n    \"acquired_at\", tostring(now_ms),\n    \"expires_at\", tostring(expires_at),\n    \"last_renewed_at\", tostring(now_ms),\n    \"renewal_deadline\", tostring(renewal_deadline))\n\n  -- 7. Update execution core \u{2014} ALL 7 state vector dimensions\n  redis.call(\"HSET\", K.core_key,\n    \"lifecycle_phase\", \"active\",\n    \"ownership_state\", \"leased\",\n    \"eligibility_state\", \"not_applicable\",\n    \"blocking_reason\", \"none\",\n    \"blocking_detail\", \"\",\n    \"terminal_outcome\", \"none\",\n    \"attempt_state\", \"running_attempt\",\n    \"public_state\", \"active\",\n    \"current_attempt_index\", tostring(next_att_idx),\n    \"total_attempt_count\", tostring(next_att_idx + 1),\n    \"current_attempt_id\", A.attempt_id,\n    \"current_lease_id\", A.lease_id,\n    \"current_lease_epoch\", tostring(next_epoch),\n    \"current_worker_id\", A.worker_id,\n    \"current_worker_instance_id\", A.worker_instance_id,\n    \"current_lane\", A.lane,\n    \"lease_acquired_at\", tostring(now_ms),\n    \"lease_expires_at\", tostring(expires_at),\n    \"lease_last_renewed_at\", tostring(now_ms),\n    \"lease_renewal_deadline\", tostring(renewal_deadline),\n    -- Preserve the first-claim timestamp across retries; only fall\n    -- back to `now_ms` when the stored value is empty (initial state\n    -- written at HSET, line 208, or after a reset). In Lua the empty\n    -- string is truthy, so `core.started_at or now_ms` would wedge at\n    -- \"\" forever on the first claim \u{2014} explicit emptiness check fixes\n    -- the scenario 4 stage_latency bench (exec_core surfaced this via\n    -- the new ExecutionInfo.started_at REST field).\n    \"started_at\", (core.started_at ~= nil and core.started_at ~= \"\") and core.started_at or tostring(now_ms),\n    -- Clear pending lineage fields (consumed above)\n    \"pending_retry_reason\", \"\",\n    \"pending_replay_reason\", \"\",\n    \"pending_replay_requested_by\", \"\",\n    \"pending_previous_attempt_index\", \"\",\n    \"last_transition_at\", tostring(now_ms),\n    \"last_mutation_at\", tostring(now_ms))\n\n  -- 8. Update indexes\n  redis.call(\"ZREM\", K.eligible_zset, A.execution_id)\n  redis.call(\"ZADD\", K.lease_expiry_key, expires_at, A.execution_id)\n  redis.call(\"SADD\", K.worker_leases_key, A.execution_id)\n  redis.call(\"ZADD\", K.active_index_key, expires_at, A.execution_id)\n\n  -- 8a. Timeout indexes\n  if is_set(A.attempt_timeout_ms) and A.attempt_timeout_ms ~= \"0\" then\n    redis.call(\"ZADD\", K.attempt_timeout_key,\n      now_ms + tonumber(A.attempt_timeout_ms), A.execution_id)\n  end\n  if is_set(A.execution_deadline_at) and A.execution_deadline_at ~= \"0\" then\n    redis.call(\"ZADD\", K.execution_deadline_key,\n      tonumber(A.execution_deadline_at), A.execution_id)\n  end\n\n  -- 9. Lease history event\n  redis.call(\"XADD\", K.lease_history_key, \"MAXLEN\", \"~\", 1000, \"*\",\n    \"event\", \"acquired\",\n    \"lease_id\", A.lease_id,\n    \"lease_epoch\", tostring(next_epoch),\n    \"attempt_id\", A.attempt_id,\n    \"attempt_index\", tostring(next_att_idx),\n    \"worker_id\", A.worker_id,\n    \"ts\", tostring(now_ms))\n\n  return ok(A.lease_id, tostring(next_epoch), tostring(expires_at),\n            A.attempt_id, tostring(next_att_idx), attempt_type)\nend)\n\n---------------------------------------------------------------------------\n-- #3  ff_complete_execution\n--\n-- Validate lease, end attempt, store result, transition active\u{2192}terminal.\n--\n-- KEYS (12): exec_core, attempt_hash, lease_expiry_zset, worker_leases,\n--            terminal_zset, lease_current, lease_history, active_index,\n--            stream_meta, result_key, attempt_timeout_zset,\n--            execution_deadline_zset\n-- ARGV (6): execution_id, lease_id, lease_epoch, attempt_id, result_payload,\n--           source\n--   source (args[6]) \u{2014} RFC #58.5: empty fence triple requires\n--   source == \"operator_override\" to proceed unfenced; otherwise the\n--   FCALL returns err(\"fence_required\"). Terminal op, so gate is strict.\n---------------------------------------------------------------------------\nredis.register_function(\'ff_complete_execution\', function(keys, args)\n  local K = {\n    core_key            = keys[1],\n    attempt_hash        = keys[2],\n    lease_expiry_key    = keys[3],\n    worker_leases_key   = keys[4],\n    terminal_zset       = keys[5],\n    lease_current_key   = keys[6],\n    lease_history_key   = keys[7],\n    active_index_key    = keys[8],\n    stream_meta         = keys[9],\n    result_key          = keys[10],\n    attempt_timeout_key = keys[11],\n    execution_deadline_key = keys[12],\n  }\n\n  local A = {\n    execution_id  = args[1],\n    lease_id      = args[2] or \"\",\n    lease_epoch   = args[3] or \"\",\n    attempt_id    = args[4] or \"\",\n    result_payload = args[5] or \"\",\n    source         = args[6] or \"\",\n  }\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- Read + validate lease\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return err(\"execution_not_found\") end\n  local core = hgetall_to_table(raw)\n\n  -- RFC #58.5 \u{2014} fence resolution. Terminal ops require either a\n  -- non-empty fence triple OR source==\"operator_override\".\n  -- In both branches validate_lease_and_mark_expired runs afterwards,\n  -- which enforces lifecycle_phase==\"active\", ownership!=revoked, and\n  -- expiry \u{2014} so operator overrides cannot bypass those preconditions.\n  local fence, must_check_or_err = resolve_lease_fence(core, A)\n  if not fence then return must_check_or_err end\n  if not must_check_or_err then\n    if A.source ~= \"operator_override\" then return err(\"fence_required\") end\n    -- Bind server-resolved fence so validate_lease_and_mark_expired\'s\n    -- identity check trivially matches; the lifecycle/expiry gates\n    -- still fire and are the real safety net for the override path.\n    A.lease_id    = fence.lease_id\n    A.lease_epoch = fence.lease_epoch\n    A.attempt_id  = fence.attempt_id\n  end\n  local lease_err = validate_lease_and_mark_expired(\n    core, A, now_ms, K, 1000)\n  if lease_err then return lease_err end\n\n  -- End attempt\n  redis.call(\"HSET\", K.attempt_hash,\n    \"attempt_state\", \"ended_success\",\n    \"ended_at\", tostring(now_ms))\n\n  -- Close stream if exists\n  if redis.call(\"EXISTS\", K.stream_meta) == 1 then\n    redis.call(\"HSET\", K.stream_meta,\n      \"closed_at\", tostring(now_ms),\n      \"closed_reason\", \"attempt_success\")\n  end\n\n  -- Store result\n  if A.result_payload ~= \"\" then\n    redis.call(\"SET\", K.result_key, A.result_payload)\n  end\n\n  -- Update execution core \u{2014} ALL 7 state vector dimensions\n  redis.call(\"HSET\", K.core_key,\n    \"lifecycle_phase\", \"terminal\",\n    \"ownership_state\", \"unowned\",\n    \"eligibility_state\", \"not_applicable\",\n    \"blocking_reason\", \"none\",\n    \"blocking_detail\", \"\",\n    \"terminal_outcome\", \"success\",\n    \"attempt_state\", \"attempt_terminal\",\n    \"public_state\", \"completed\",\n    \"completed_at\", tostring(now_ms),\n    \"current_lease_id\", \"\",\n    \"current_worker_id\", \"\",\n    \"current_worker_instance_id\", \"\",\n    \"lease_expires_at\", \"\",\n    \"lease_last_renewed_at\", \"\",\n    \"lease_renewal_deadline\", \"\",\n    \"lease_expired_at\", \"\",\n    \"lease_revoked_at\", \"\",\n    \"lease_revoke_reason\", \"\",\n    \"last_transition_at\", tostring(now_ms),\n    \"last_mutation_at\", tostring(now_ms))\n\n  -- Update indexes\n  redis.call(\"ZREM\", K.lease_expiry_key, A.execution_id)\n  redis.call(\"SREM\", K.worker_leases_key, A.execution_id)\n  redis.call(\"ZADD\", K.terminal_zset, now_ms, A.execution_id)\n  redis.call(\"ZREM\", K.active_index_key, A.execution_id)\n  redis.call(\"ZREM\", K.attempt_timeout_key, A.execution_id)\n  redis.call(\"ZREM\", K.execution_deadline_key, A.execution_id)\n\n  -- Clean up lease\n  redis.call(\"DEL\", K.lease_current_key)\n  redis.call(\"XADD\", K.lease_history_key, \"MAXLEN\", \"~\", 1000, \"*\",\n    \"event\", \"released\",\n    \"lease_id\", A.lease_id,\n    \"lease_epoch\", A.lease_epoch,\n    \"attempt_index\", core.current_attempt_index or \"\",\n    \"reason\", \"completed\",\n    \"ts\", tostring(now_ms))\n\n  -- Push-based DAG promotion (Batch C item 6). Only publish when the\n  -- execution actually belongs to a flow \u{2014} standalone executions never\n  -- have downstream edges for the engine to resolve. The engine\'s\n  -- CompletionListener scanner SUBSCRIBEs to ff:dag:completions and\n  -- calls dispatch_dependency_resolution per message. The\n  -- dependency_reconciler interval scan is retained as a safety net\n  -- for: missed messages during listener restart, ungated state\n  -- (flow_id empty on older executions), and cluster broadcast gaps.\n  if is_set(core.flow_id) then\n    local payload = cjson.encode({\n      execution_id = A.execution_id,\n      flow_id = core.flow_id,\n      outcome = \"success\",\n    })\n    redis.call(\"PUBLISH\", \"ff:dag:completions\", payload)\n  end\n\n  return ok(\"completed\")\nend)\n\n---------------------------------------------------------------------------\n-- #12a  ff_cancel_execution\n--\n-- Cancel from any non-terminal state. Multi-path:\n--   active \u{2192} validate lease or operator override, end attempt, clear lease\n--   runnable \u{2192} defensive ZREM all indexes\n--   suspended \u{2192} close suspension/waitpoint, end attempt\n-- All paths: terminal(cancelled), defensive ZREM, ZADD terminal.\n--\n-- KEYS (21): exec_core, attempt_hash, stream_meta, lease_current,\n--            lease_history, lease_expiry_zset, worker_leases,\n--            suspension_current, waitpoint_hash, wp_condition,\n--            suspension_timeout_zset, terminal_zset,\n--            attempt_timeout_zset, execution_deadline_zset,\n--            eligible_zset, delayed_zset, blocked_deps_zset,\n--            blocked_budget_zset, blocked_quota_zset,\n--            blocked_route_zset, blocked_operator_zset\n-- ARGV (5): execution_id, reason, source, lease_id, lease_epoch\n---------------------------------------------------------------------------\nredis.register_function(\'ff_cancel_execution\', function(keys, args)\n  local K = {\n    core_key              = keys[1],\n    attempt_hash          = keys[2],\n    stream_meta           = keys[3],\n    lease_current_key     = keys[4],\n    lease_history_key     = keys[5],\n    lease_expiry_key      = keys[6],\n    worker_leases_key     = keys[7],\n    suspension_current    = keys[8],\n    waitpoint_hash        = keys[9],\n    wp_condition          = keys[10],\n    suspension_timeout_key = keys[11],\n    terminal_key          = keys[12],\n    attempt_timeout_key   = keys[13],\n    execution_deadline_key = keys[14],\n    eligible_key          = keys[15],\n    delayed_key           = keys[16],\n    blocked_deps_key      = keys[17],\n    blocked_budget_key    = keys[18],\n    blocked_quota_key     = keys[19],\n    blocked_route_key     = keys[20],\n    blocked_operator_key  = keys[21],\n    -- active_index_key and suspended_key are constructed dynamically below\n    -- from the hash tag + lane_id (avoids changing KEYS count).\n    active_index_key      = nil,\n    suspended_key         = nil,\n  }\n\n  local A = {\n    execution_id = args[1],\n    reason       = args[2],\n    source       = args[3] or \"\",  -- \"operator_override\" or \"\"\n    lease_id     = args[4] or \"\",\n    lease_epoch  = args[5] or \"\",\n  }\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return err(\"execution_not_found\") end\n  local core = hgetall_to_table(raw)\n\n  -- Construct lane:active and lane:suspended keys dynamically from hash tag.\n  -- These are not in the 21 KEYS array but defensive_zrem_all_indexes needs\n  -- them to clean stale entries when cancelling active or suspended executions.\n  local tag = string.match(K.core_key, \"(%b{})\")\n  local lane = core.lane_id or \"default\"\n  K.active_index_key = \"ff:idx:\" .. tag .. \":lane:\" .. lane .. \":active\"\n  K.suspended_key    = \"ff:idx:\" .. tag .. \":lane:\" .. lane .. \":suspended\"\n\n  -- Already terminal. Enriched error detail mirrors validate_lease_and_mark_expired\n  -- so SDK-side replay reconciliation works for cancel the same as for\n  -- complete/fail.\n  if core.lifecycle_phase == \"terminal\" then\n    return err(\"execution_not_active\",\n      core.terminal_outcome or \"\",\n      core.current_lease_epoch or \"\",\n      \"terminal\",\n      core.current_attempt_id or \"\")\n  end\n\n  local cancelled_from = core.lifecycle_phase\n\n  -- PATH: Active\n  if core.lifecycle_phase == \"active\" then\n    -- Require lease validation or operator override\n    if A.source ~= \"operator_override\" then\n      if core.ownership_state == \"lease_revoked\" then\n        return err(\"lease_revoked\")\n      end\n      if is_set(A.lease_id) then\n        if core.current_lease_id ~= A.lease_id then return err(\"stale_lease\") end\n        if core.current_lease_epoch ~= A.lease_epoch then return err(\"stale_lease\") end\n      end\n    end\n\n    -- End attempt\n    if is_set(core.current_attempt_index) then\n      redis.call(\"HSET\", K.attempt_hash,\n        \"attempt_state\", \"ended_cancelled\",\n        \"ended_at\", tostring(now_ms),\n        \"failure_reason\", \"cancelled: \" .. A.reason)\n    end\n\n    -- Close stream if exists\n    if redis.call(\"EXISTS\", K.stream_meta) == 1 then\n      redis.call(\"HSET\", K.stream_meta,\n        \"closed_at\", tostring(now_ms),\n        \"closed_reason\", \"attempt_cancelled\")\n    end\n\n    -- Release lease\n    redis.call(\"DEL\", K.lease_current_key)\n    redis.call(\"XADD\", K.lease_history_key, \"MAXLEN\", \"~\", \"1000\", \"*\",\n      \"event\", \"released\",\n      \"lease_id\", core.current_lease_id or \"\",\n      \"lease_epoch\", core.current_lease_epoch or \"\",\n      \"attempt_index\", core.current_attempt_index or \"\",\n      \"reason\", \"cancelled\",\n      \"ts\", tostring(now_ms))\n  end\n\n  -- PATH: Suspended\n  if core.lifecycle_phase == \"suspended\" then\n    -- End attempt (interrupted \u{2192} ended_cancelled)\n    if is_set(core.current_attempt_index) then\n      redis.call(\"HSET\", K.attempt_hash,\n        \"attempt_state\", \"ended_cancelled\",\n        \"ended_at\", tostring(now_ms),\n        \"failure_reason\", \"cancelled: \" .. A.reason)\n    end\n\n    -- Close stream if exists\n    if redis.call(\"EXISTS\", K.stream_meta) == 1 then\n      redis.call(\"HSET\", K.stream_meta,\n        \"closed_at\", tostring(now_ms),\n        \"closed_reason\", \"attempt_cancelled\")\n    end\n\n    -- Close suspension + waitpoint + wp_condition\n    if redis.call(\"EXISTS\", K.suspension_current) == 1 then\n      redis.call(\"HSET\", K.suspension_current,\n        \"closed_at\", tostring(now_ms),\n        \"close_reason\", \"cancelled\")\n    end\n    if redis.call(\"EXISTS\", K.waitpoint_hash) == 1 then\n      redis.call(\"HSET\", K.waitpoint_hash,\n        \"state\", \"closed\",\n        \"closed_at\", tostring(now_ms),\n        \"close_reason\", \"cancelled\")\n    end\n    if redis.call(\"EXISTS\", K.wp_condition) == 1 then\n      redis.call(\"HSET\", K.wp_condition,\n        \"closed\", \"1\",\n        \"closed_at\", tostring(now_ms),\n        \"closed_reason\", \"cancelled\")\n    end\n    -- RFC-014 \u{a7}3.1.1 composite cleanup owner: cancel path.\n    composite_cleanup(\n      K.suspension_current .. \":satisfied_set\",\n      K.suspension_current .. \":member_map\")\n    -- RFC-014 Pattern 3: close additional waitpoints co-owned by the\n    -- suspension before it\'s discarded. Reread suspension to pick up\n    -- the additional_waitpoints_json field.\n    local susp_raw = redis.call(\"HGETALL\", K.suspension_current)\n    if #susp_raw > 0 then\n      local susp = hgetall_to_table(susp_raw)\n      close_additional_waitpoints(\n        K.suspension_current,\n        susp.additional_waitpoints_json or \"\",\n        { \"state\", \"closed\",\n          \"closed_at\", tostring(now_ms),\n          \"close_reason\", \"cancelled\" },\n        { \"closed\", \"1\",\n          \"closed_at\", tostring(now_ms),\n          \"closed_reason\", \"cancelled\" })\n    end\n  end\n\n  -- ALL PATHS: exec_core FIRST for terminal transition (\u{a7}4.8b Rule 2)\n  redis.call(\"HSET\", K.core_key,\n    \"lifecycle_phase\", \"terminal\",\n    \"ownership_state\", \"unowned\",\n    \"eligibility_state\", \"not_applicable\",\n    \"blocking_reason\", \"none\",\n    \"blocking_detail\", \"\",\n    \"terminal_outcome\", \"cancelled\",\n    \"attempt_state\", is_set(core.current_attempt_index) and \"attempt_terminal\" or (core.attempt_state or \"none\"),\n    \"public_state\", \"cancelled\",\n    \"cancellation_reason\", A.reason,\n    \"cancelled_by\", A.source ~= \"\" and A.source or A.execution_id,\n    \"completed_at\", tostring(now_ms),\n    \"current_lease_id\", \"\",\n    \"current_worker_id\", \"\",\n    \"current_worker_instance_id\", \"\",\n    \"lease_expires_at\", \"\",\n    \"lease_last_renewed_at\", \"\",\n    \"lease_renewal_deadline\", \"\",\n    \"current_suspension_id\", \"\",\n    \"current_waitpoint_id\", \"\",\n    \"last_transition_at\", tostring(now_ms),\n    \"last_mutation_at\", tostring(now_ms))\n\n  -- Defensive ZREM from ALL scheduling + timeout indexes\n  defensive_zrem_all_indexes(K, A.execution_id, K.terminal_key)\n\n  -- ZADD terminal (unconditional)\n  redis.call(\"ZADD\", K.terminal_key, now_ms, A.execution_id)\n\n  -- Push-based DAG promotion (Batch C item 6). Skip the publish when\n  -- the cancel came from `flow_cascade` \u{2014} that source means a parent\n  -- already propagated and the engine is walking the edge graph; an\n  -- additional publish here would trigger redundant dispatch work.\n  -- (Not a correctness issue since dispatch is idempotent, but it\n  -- avoids unnecessary load under wide fan-out cancellations.)\n  if is_set(core.flow_id) and A.source ~= \"flow_cascade\" then\n    local payload = cjson.encode({\n      execution_id = A.execution_id,\n      flow_id = core.flow_id,\n      outcome = \"cancelled\",\n    })\n    redis.call(\"PUBLISH\", \"ff:dag:completions\", payload)\n  end\n\n  return ok(\"cancelled\", cancelled_from)\nend)\n\n---------------------------------------------------------------------------\n-- #30  ff_delay_execution\n--\n-- Worker delays its own active execution. Releases lease, transitions\n-- to runnable + not_eligible_until_time. Same attempt continues (paused).\n--\n-- KEYS (9): exec_core, attempt_hash, lease_current, lease_history,\n--           lease_expiry_zset, worker_leases, active_index,\n--           delayed_zset, attempt_timeout_zset\n-- ARGV (6): execution_id, lease_id, lease_epoch, attempt_id, delay_until,\n--           source\n--   source (args[6]) \u{2014} RFC #58.5: terminal-op-style fence gate. Empty\n--   fence triple requires source == \"operator_override\" to proceed unfenced.\n---------------------------------------------------------------------------\nredis.register_function(\'ff_delay_execution\', function(keys, args)\n  local K = {\n    core_key            = keys[1],\n    attempt_hash        = keys[2],\n    lease_current_key   = keys[3],\n    lease_history_key   = keys[4],\n    lease_expiry_key    = keys[5],\n    worker_leases_key   = keys[6],\n    active_index_key    = keys[7],\n    delayed_zset        = keys[8],\n    attempt_timeout_key = keys[9],\n  }\n\n  local A = {\n    execution_id = args[1],\n    lease_id     = args[2] or \"\",\n    lease_epoch  = args[3] or \"\",\n    attempt_id   = args[4] or \"\",\n    delay_until  = args[5] or \"\",\n    source       = args[6] or \"\",\n  }\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return err(\"execution_not_found\") end\n  local core = hgetall_to_table(raw)\n\n  -- RFC #58.5 \u{2014} fence resolution with operator override for terminal-style ops.\n  -- In both branches validate_lease_and_mark_expired runs afterwards, which\n  -- enforces lifecycle_phase==\"active\", ownership!=revoked, and expiry \u{2014}\n  -- operator overrides cannot bypass those preconditions.\n  local fence, must_check_or_err = resolve_lease_fence(core, A)\n  if not fence then return must_check_or_err end\n  if not must_check_or_err then\n    if A.source ~= \"operator_override\" then return err(\"fence_required\") end\n    A.lease_id    = fence.lease_id\n    A.lease_epoch = fence.lease_epoch\n    A.attempt_id  = fence.attempt_id\n  end\n  local lease_err = validate_lease_and_mark_expired(\n    core, A, now_ms, K, 1000)\n  if lease_err then return lease_err end\n\n  -- OOM-SAFE WRITE ORDERING: exec_core FIRST (point of no return)\n  -- ALL 7 state vector dimensions\n  redis.call(\"HSET\", K.core_key,\n    \"lifecycle_phase\", \"runnable\",\n    \"ownership_state\", \"unowned\",\n    \"eligibility_state\", \"not_eligible_until_time\",\n    \"blocking_reason\", \"waiting_for_delay\",\n    \"blocking_detail\", \"delayed until \" .. A.delay_until,\n    \"terminal_outcome\", \"none\",\n    \"attempt_state\", \"attempt_interrupted\",\n    \"public_state\", \"delayed\",\n    \"delay_until\", A.delay_until,\n    \"current_lease_id\", \"\",\n    \"current_worker_id\", \"\",\n    \"current_worker_instance_id\", \"\",\n    \"lease_expires_at\", \"\",\n    \"lease_last_renewed_at\", \"\",\n    \"lease_renewal_deadline\", \"\",\n    \"last_transition_at\", tostring(now_ms),\n    \"last_mutation_at\", tostring(now_ms))\n\n  -- Pause attempt: started \u{2192} suspended\n  redis.call(\"HSET\", K.attempt_hash,\n    \"attempt_state\", \"suspended\",\n    \"suspended_at\", tostring(now_ms),\n    \"suspension_id\", \"worker_delay\")\n\n  -- Release lease + update indexes\n  redis.call(\"DEL\", K.lease_current_key)\n  redis.call(\"ZREM\", K.lease_expiry_key, A.execution_id)\n  redis.call(\"SREM\", K.worker_leases_key, A.execution_id)\n  redis.call(\"ZREM\", K.active_index_key, A.execution_id)\n  redis.call(\"ZREM\", K.attempt_timeout_key, A.execution_id)\n\n  -- Add to delayed set\n  redis.call(\"ZADD\", K.delayed_zset, tonumber(A.delay_until), A.execution_id)\n\n  -- Lease history event\n  redis.call(\"XADD\", K.lease_history_key, \"MAXLEN\", \"~\", \"1000\", \"*\",\n    \"event\", \"released\",\n    \"lease_id\", A.lease_id,\n    \"lease_epoch\", A.lease_epoch,\n    \"attempt_index\", core.current_attempt_index or \"\",\n    \"attempt_id\", A.attempt_id,\n    \"reason\", \"worker_delay\",\n    \"ts\", tostring(now_ms))\n\n  return ok(A.delay_until)\nend)\n\n---------------------------------------------------------------------------\n-- #31  ff_move_to_waiting_children\n--\n-- Worker blocks on child dependencies. Releases lease, transitions to\n-- runnable + blocked_by_dependencies. Same attempt continues (paused).\n--\n-- KEYS (9): exec_core, attempt_hash, lease_current, lease_history,\n--           lease_expiry_zset, worker_leases, active_index,\n--           blocked_deps_zset, attempt_timeout_zset\n-- ARGV (5): execution_id, lease_id, lease_epoch, attempt_id, source\n--   source (args[5]) \u{2014} RFC #58.5: terminal-op-style fence gate. Empty\n--   fence triple requires source == \"operator_override\" to proceed unfenced.\n---------------------------------------------------------------------------\nredis.register_function(\'ff_move_to_waiting_children\', function(keys, args)\n  local K = {\n    core_key            = keys[1],\n    attempt_hash        = keys[2],\n    lease_current_key   = keys[3],\n    lease_history_key   = keys[4],\n    lease_expiry_key    = keys[5],\n    worker_leases_key   = keys[6],\n    active_index_key    = keys[7],\n    blocked_deps_zset   = keys[8],\n    attempt_timeout_key = keys[9],\n  }\n\n  local A = {\n    execution_id = args[1],\n    lease_id     = args[2] or \"\",\n    lease_epoch  = args[3] or \"\",\n    attempt_id   = args[4] or \"\",\n    source       = args[5] or \"\",\n  }\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return err(\"execution_not_found\") end\n  local core = hgetall_to_table(raw)\n\n  -- RFC #58.5 \u{2014} fence resolution with operator override for terminal-style ops.\n  -- In both branches validate_lease_and_mark_expired runs afterwards, which\n  -- enforces lifecycle_phase==\"active\", ownership!=revoked, and expiry \u{2014}\n  -- operator overrides cannot bypass those preconditions.\n  local fence, must_check_or_err = resolve_lease_fence(core, A)\n  if not fence then return must_check_or_err end\n  if not must_check_or_err then\n    if A.source ~= \"operator_override\" then return err(\"fence_required\") end\n    A.lease_id    = fence.lease_id\n    A.lease_epoch = fence.lease_epoch\n    A.attempt_id  = fence.attempt_id\n  end\n  local lease_err = validate_lease_and_mark_expired(\n    core, A, now_ms, K, 1000)\n  if lease_err then return lease_err end\n\n  -- OOM-SAFE WRITE ORDERING: exec_core FIRST (point of no return, \u{a7}4.8b Rule 2)\n  -- ALL 7 state vector dimensions\n  redis.call(\"HSET\", K.core_key,\n    \"lifecycle_phase\", \"runnable\",\n    \"ownership_state\", \"unowned\",\n    \"eligibility_state\", \"blocked_by_dependencies\",\n    \"blocking_reason\", \"waiting_for_children\",\n    \"blocking_detail\", \"waiting for child executions to complete\",\n    \"terminal_outcome\", \"none\",\n    \"attempt_state\", \"attempt_interrupted\",\n    \"public_state\", \"waiting_children\",\n    \"current_lease_id\", \"\",\n    \"current_worker_id\", \"\",\n    \"current_worker_instance_id\", \"\",\n    \"lease_expires_at\", \"\",\n    \"lease_last_renewed_at\", \"\",\n    \"lease_renewal_deadline\", \"\",\n    \"last_transition_at\", tostring(now_ms),\n    \"last_mutation_at\", tostring(now_ms))\n\n  -- Pause attempt: started \u{2192} suspended (waiting children, not ended)\n  redis.call(\"HSET\", K.attempt_hash,\n    \"attempt_state\", \"suspended\",\n    \"suspended_at\", tostring(now_ms),\n    \"suspension_id\", \"waiting_children\")\n\n  -- Release lease + update indexes\n  redis.call(\"DEL\", K.lease_current_key)\n  redis.call(\"ZREM\", K.lease_expiry_key, A.execution_id)\n  redis.call(\"SREM\", K.worker_leases_key, A.execution_id)\n  redis.call(\"ZREM\", K.active_index_key, A.execution_id)\n  redis.call(\"ZREM\", K.attempt_timeout_key, A.execution_id)\n\n  -- Lease history event\n  redis.call(\"XADD\", K.lease_history_key, \"MAXLEN\", \"~\", \"1000\", \"*\",\n    \"event\", \"released\",\n    \"lease_id\", A.lease_id,\n    \"lease_epoch\", A.lease_epoch,\n    \"attempt_index\", core.current_attempt_index or \"\",\n    \"attempt_id\", A.attempt_id,\n    \"reason\", \"waiting_children\",\n    \"ts\", tostring(now_ms))\n\n  -- Add to blocked:dependencies set\n  redis.call(\"ZADD\", K.blocked_deps_zset,\n    tonumber(core.created_at or \"0\"), A.execution_id)\n\n  return ok()\nend)\n\n---------------------------------------------------------------------------\n-- #4  ff_fail_execution\n--\n-- Fail an active execution. If retries remain: set pending_retry_attempt\n-- + lineage on exec_core (deferred creation \u{2014} claim_execution creates\n-- the attempt, per R21 fix). If max retries reached: terminal failure.\n--\n-- KEYS (12): exec_core, attempt_hash, lease_expiry_zset, worker_leases,\n--            terminal_zset, delayed_zset, lease_current, lease_history,\n--            active_index, stream_meta, attempt_timeout_zset,\n--            execution_deadline_zset\n-- ARGV (8): execution_id, lease_id, lease_epoch, attempt_id,\n--           failure_reason, failure_category, retry_policy_json, source\n--   source (args[8]) \u{2014} RFC #58.5: terminal op, so empty fence triple\n--   requires source == \"operator_override\" to proceed unfenced.\n---------------------------------------------------------------------------\nredis.register_function(\'ff_fail_execution\', function(keys, args)\n  local K = {\n    core_key              = keys[1],\n    attempt_hash          = keys[2],\n    lease_expiry_key      = keys[3],\n    worker_leases_key     = keys[4],\n    terminal_key          = keys[5],\n    delayed_zset          = keys[6],\n    lease_current_key     = keys[7],\n    lease_history_key     = keys[8],\n    active_index_key      = keys[9],\n    stream_meta           = keys[10],\n    attempt_timeout_key   = keys[11],\n    execution_deadline_key = keys[12],\n  }\n\n  local A = {\n    execution_id     = args[1],\n    lease_id         = args[2] or \"\",\n    lease_epoch      = args[3] or \"\",\n    attempt_id       = args[4] or \"\",\n    failure_reason   = args[5] or \"\",\n    failure_category = args[6] or \"\",\n    retry_policy_json = args[7] or \"\",\n    source            = args[8] or \"\",\n  }\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- 1. Read + validate lease\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return err(\"execution_not_found\") end\n  local core = hgetall_to_table(raw)\n\n  -- RFC #58.5 \u{2014} fence resolution with operator override for terminal ops.\n  -- In both branches validate_lease_and_mark_expired runs afterwards, which\n  -- enforces lifecycle_phase==\"active\", ownership!=revoked, and expiry \u{2014}\n  -- operator overrides cannot bypass those preconditions.\n  local fence, must_check_or_err = resolve_lease_fence(core, A)\n  if not fence then return must_check_or_err end\n  if not must_check_or_err then\n    if A.source ~= \"operator_override\" then return err(\"fence_required\") end\n    A.lease_id    = fence.lease_id\n    A.lease_epoch = fence.lease_epoch\n    A.attempt_id  = fence.attempt_id\n  end\n  local lease_err = validate_lease_and_mark_expired(\n    core, A, now_ms, K, 1000)\n  if lease_err then return lease_err end\n\n  -- 2. End current attempt\n  redis.call(\"HSET\", K.attempt_hash,\n    \"attempt_state\", \"ended_failure\",\n    \"ended_at\", tostring(now_ms),\n    \"failure_reason\", A.failure_reason,\n    \"failure_category\", A.failure_category)\n\n  -- 3. Close stream if exists\n  if redis.call(\"EXISTS\", K.stream_meta) == 1 then\n    redis.call(\"HSET\", K.stream_meta,\n      \"closed_at\", tostring(now_ms),\n      \"closed_reason\", \"attempt_failure\")\n  end\n\n  -- 4. Determine retry eligibility\n  local retry_count = tonumber(core.retry_count or \"0\")\n  local max_retries = 0\n  local backoff_ms = 1000\n  local can_retry = false\n\n  if is_set(A.retry_policy_json) then\n    local ok_decode, policy = pcall(cjson.decode, A.retry_policy_json)\n    if ok_decode and type(policy) == \"table\" then\n      max_retries = tonumber(policy.max_retries or \"0\")\n      if retry_count < max_retries then\n        can_retry = true\n        local bt = policy.backoff or {}\n        if bt.type == \"exponential\" then\n          local initial = (tonumber(bt.initial_delay_ms) or 1000)\n          local max_d = (tonumber(bt.max_delay_ms) or 60000)\n          local mult = (tonumber(bt.multiplier) or 2)\n          backoff_ms = math.min(initial * (mult ^ retry_count), max_d)\n        elseif bt.type == \"fixed\" then\n          backoff_ms = (tonumber(bt.delay_ms) or 1000)\n        end\n      end\n    end\n  end\n\n  if can_retry then\n    -- RETRY PATH: deferred attempt creation (R21 fix)\n    -- Do NOT create attempt record here. claim_execution creates the\n    -- attempt with correct type (retry) by reading pending_retry_attempt.\n    local delay_until = now_ms + backoff_ms\n\n    -- ALL 7 state vector dimensions + pending lineage\n    redis.call(\"HSET\", K.core_key,\n      \"lifecycle_phase\", \"runnable\",\n      \"ownership_state\", \"unowned\",\n      \"eligibility_state\", \"not_eligible_until_time\",\n      \"blocking_reason\", \"waiting_for_retry_backoff\",\n      \"blocking_detail\", \"retry backoff until \" .. tostring(delay_until) ..\n        \" (attempt \" .. (core.current_attempt_index or \"0\") ..\n        \" failed: \" .. A.failure_reason .. \")\",\n      \"terminal_outcome\", \"none\",\n      \"attempt_state\", \"pending_retry_attempt\",\n      \"public_state\", \"delayed\",\n      -- Pending lineage for claim_execution to consume\n      \"pending_retry_reason\", A.failure_reason,\n      \"pending_previous_attempt_index\", core.current_attempt_index or \"0\",\n      \"retry_count\", tostring(retry_count + 1),\n      \"current_attempt_id\", \"\",\n      \"current_lease_id\", \"\",\n      \"current_worker_id\", \"\",\n      \"current_worker_instance_id\", \"\",\n      \"lease_expires_at\", \"\",\n      \"lease_last_renewed_at\", \"\",\n      \"lease_renewal_deadline\", \"\",\n      \"delay_until\", tostring(delay_until),\n      \"failure_reason\", A.failure_reason,\n      \"last_transition_at\", tostring(now_ms),\n      \"last_mutation_at\", tostring(now_ms))\n\n    -- Release lease + update indexes\n    redis.call(\"DEL\", K.lease_current_key)\n    redis.call(\"ZREM\", K.lease_expiry_key, A.execution_id)\n    redis.call(\"SREM\", K.worker_leases_key, A.execution_id)\n    redis.call(\"ZREM\", K.active_index_key, A.execution_id)\n    redis.call(\"ZREM\", K.attempt_timeout_key, A.execution_id)\n\n    -- ZADD to delayed index\n    redis.call(\"ZADD\", K.delayed_zset, delay_until, A.execution_id)\n\n    -- Lease history\n    redis.call(\"XADD\", K.lease_history_key, \"MAXLEN\", \"~\", \"1000\", \"*\",\n      \"event\", \"released\",\n      \"lease_id\", A.lease_id,\n      \"lease_epoch\", A.lease_epoch,\n      \"attempt_index\", core.current_attempt_index or \"\",\n      \"reason\", \"failed_retry_scheduled\",\n      \"ts\", tostring(now_ms))\n\n    return ok(\"retry_scheduled\", tostring(delay_until))\n  else\n    -- TERMINAL PATH\n    redis.call(\"HSET\", K.core_key,\n      \"lifecycle_phase\", \"terminal\",\n      \"ownership_state\", \"unowned\",\n      \"eligibility_state\", \"not_applicable\",\n      \"blocking_reason\", \"none\",\n      \"blocking_detail\", \"\",\n      \"terminal_outcome\", \"failed\",\n      \"attempt_state\", \"attempt_terminal\",\n      \"public_state\", \"failed\",\n      \"failure_reason\", A.failure_reason,\n      \"completed_at\", tostring(now_ms),\n      \"current_lease_id\", \"\",\n      \"current_worker_id\", \"\",\n      \"current_worker_instance_id\", \"\",\n      \"lease_expires_at\", \"\",\n      \"lease_last_renewed_at\", \"\",\n      \"lease_renewal_deadline\", \"\",\n      \"last_transition_at\", tostring(now_ms),\n      \"last_mutation_at\", tostring(now_ms))\n\n    -- Release lease + cleanup\n    redis.call(\"DEL\", K.lease_current_key)\n    redis.call(\"ZREM\", K.lease_expiry_key, A.execution_id)\n    redis.call(\"SREM\", K.worker_leases_key, A.execution_id)\n    redis.call(\"ZREM\", K.active_index_key, A.execution_id)\n    redis.call(\"ZREM\", K.attempt_timeout_key, A.execution_id)\n    redis.call(\"ZREM\", K.execution_deadline_key, A.execution_id)\n    redis.call(\"ZADD\", K.terminal_key, now_ms, A.execution_id)\n\n    redis.call(\"XADD\", K.lease_history_key, \"MAXLEN\", \"~\", \"1000\", \"*\",\n      \"event\", \"released\",\n      \"lease_id\", A.lease_id,\n      \"lease_epoch\", A.lease_epoch,\n      \"attempt_index\", core.current_attempt_index or \"\",\n      \"reason\", \"failed_terminal\",\n      \"ts\", tostring(now_ms))\n\n    -- Push-based DAG promotion (Batch C item 6). See ff_complete_execution\n    -- for rationale. A terminal-failed upstream triggers\n    -- child-skip cascades via ff_resolve_dependency on the receiver side.\n    if is_set(core.flow_id) then\n      local payload = cjson.encode({\n        execution_id = A.execution_id,\n        flow_id = core.flow_id,\n        outcome = \"failed\",\n      })\n      redis.call(\"PUBLISH\", \"ff:dag:completions\", payload)\n    end\n\n    return ok(\"terminal_failed\")\n  end\nend)\n\n---------------------------------------------------------------------------\n-- #26  ff_reclaim_execution\n--\n-- Atomically reclaim an expired/revoked execution: interrupt old attempt,\n-- create new attempt + new lease.\n--\n-- KEYS (14): exec_core, claim_grant, old_attempt_hash, old_stream_meta,\n--            new_attempt_hash, new_attempt_usage, attempts_zset,\n--            lease_current, lease_history, lease_expiry_zset,\n--            worker_leases, active_index, attempt_timeout_zset,\n--            execution_deadline_zset\n-- ARGV (8): execution_id, worker_id, worker_instance_id, lane,\n--           lease_id, lease_ttl_ms, attempt_id, attempt_policy_json\n---------------------------------------------------------------------------\nredis.register_function(\'ff_reclaim_execution\', function(keys, args)\n  local K = {\n    core_key              = keys[1],\n    claim_grant           = keys[2],\n    old_attempt_hash      = keys[3],\n    old_stream_meta       = keys[4],\n    new_attempt_hash      = keys[5],\n    new_attempt_usage     = keys[6],\n    attempts_zset         = keys[7],\n    lease_current_key     = keys[8],\n    lease_history_key     = keys[9],\n    lease_expiry_key      = keys[10],\n    worker_leases_key     = keys[11],\n    active_index_key      = keys[12],\n    attempt_timeout_key   = keys[13],\n    execution_deadline_key = keys[14],\n  }\n\n  local reclaim_ttl_n = require_number(args[6], \"lease_ttl_ms\")\n  if type(reclaim_ttl_n) == \"table\" then return reclaim_ttl_n end\n\n  local A = {\n    execution_id        = args[1],\n    worker_id           = args[2],\n    worker_instance_id  = args[3],\n    lane                = args[4],\n    lease_id            = args[5],\n    lease_ttl_ms        = reclaim_ttl_n,\n    attempt_id          = args[7],\n    attempt_policy_json = args[8] or \"\",\n  }\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- 1. Validate execution state\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return err(\"execution_not_found\") end\n  local core = hgetall_to_table(raw)\n\n  -- Must be active + reclaimable\n  if core.lifecycle_phase ~= \"active\" then\n    return err(\"execution_not_reclaimable\")\n  end\n  if core.ownership_state ~= \"lease_expired_reclaimable\"\n    and core.ownership_state ~= \"lease_revoked\" then\n    return err(\"execution_not_reclaimable\")\n  end\n\n  -- Check max_reclaim_count\n  local reclaim_count = tonumber(core.lease_reclaim_count or \"0\")\n  local max_reclaim = 100  -- default\n  -- Read from policy if available\n  local policy_key = string.gsub(K.core_key, \":core$\", \":policy\")\n  local policy_raw = redis.call(\"GET\", policy_key)\n  if policy_raw then\n    local ok_p, policy = pcall(cjson.decode, policy_raw)\n    if ok_p and type(policy) == \"table\" then\n      max_reclaim = tonumber(policy.max_reclaim_count or \"100\")\n    end\n  end\n\n  if reclaim_count >= max_reclaim then\n    -- Terminal: max reclaims exceeded\n    redis.call(\"HSET\", K.core_key,\n      \"lifecycle_phase\", \"terminal\",\n      \"ownership_state\", \"unowned\",\n      \"eligibility_state\", \"not_applicable\",\n      \"blocking_reason\", \"none\",\n      \"blocking_detail\", \"\",\n      \"terminal_outcome\", \"failed\",\n      \"attempt_state\", \"attempt_terminal\",\n      \"public_state\", \"failed\",\n      \"failure_reason\", \"max_reclaims_exceeded\",\n      \"completed_at\", tostring(now_ms),\n      \"current_lease_id\", \"\",\n      \"current_worker_id\", \"\",\n      \"current_worker_instance_id\", \"\",\n      \"last_transition_at\", tostring(now_ms),\n      \"last_mutation_at\", tostring(now_ms))\n\n    redis.call(\"DEL\", K.lease_current_key)\n    redis.call(\"ZREM\", K.lease_expiry_key, A.execution_id)\n    redis.call(\"SREM\", K.worker_leases_key, A.execution_id)\n    -- Dynamic worker_leases SREM: K.worker_leases_key targets the NEW (reclaiming)\n    -- worker\'s set, but the entry is in the OLD (expired) worker\'s set. Use\n    -- current_worker_instance_id from exec_core to SREM from the correct set.\n    local wiid = core.current_worker_instance_id or \"\"\n    if wiid ~= \"\" then\n      local tag_wl = string.match(K.core_key, \"(%b{})\")\n      redis.call(\"SREM\", \"ff:idx:\" .. tag_wl .. \":worker:\" .. wiid .. \":leases\", A.execution_id)\n    end\n    redis.call(\"ZREM\", K.active_index_key, A.execution_id)\n    redis.call(\"ZREM\", K.attempt_timeout_key, A.execution_id)\n    redis.call(\"ZREM\", K.execution_deadline_key, A.execution_id)\n\n    -- ZADD terminal (construct key from hash tag + lane)\n    local tag = string.match(K.core_key, \"(%b{})\")\n    local lane = core.lane_id or core.current_lane or \"default\"\n    local terminal_key = \"ff:idx:\" .. tag .. \":lane:\" .. lane .. \":terminal\"\n    redis.call(\"ZADD\", terminal_key, now_ms, A.execution_id)\n\n    return err(\"max_retries_exhausted\")\n  end\n\n  -- 2. Validate and consume claim grant\n  local grant_raw = redis.call(\"HGETALL\", K.claim_grant)\n  if #grant_raw == 0 then return err(\"invalid_claim_grant\") end\n  local grant = hgetall_to_table(grant_raw)\n  if grant.worker_id ~= A.worker_id then return err(\"invalid_claim_grant\") end\n  redis.call(\"DEL\", K.claim_grant)\n\n  -- 3. Interrupt old attempt\n  local old_att_idx = core.current_attempt_index or \"0\"\n  redis.call(\"HSET\", K.old_attempt_hash,\n    \"attempt_state\", \"interrupted_reclaimed\",\n    \"ended_at\", tostring(now_ms),\n    \"failure_reason\", \"lease_\" .. (core.ownership_state or \"expired\"))\n\n  -- Close old stream if exists\n  if redis.call(\"EXISTS\", K.old_stream_meta) == 1 then\n    redis.call(\"HSET\", K.old_stream_meta,\n      \"closed_at\", tostring(now_ms),\n      \"closed_reason\", \"reclaimed\")\n  end\n\n  -- 4. Create new attempt\n  --    Construct actual attempt key from hash tag + computed index\n  --    (same dynamic-key pattern as ff_claim_execution).\n  local next_epoch = tonumber(core.current_lease_epoch or \"0\") + 1\n  local next_att_idx = tonumber(core.total_attempt_count or \"0\")\n  local expires_at = now_ms + A.lease_ttl_ms\n  local renewal_deadline = now_ms + math.floor(A.lease_ttl_ms * 2 / 3)\n\n  local tag = string.match(K.core_key, \"(%b{})\")\n  local att_key = \"ff:attempt:\" .. tag .. \":\" .. A.execution_id .. \":\" .. tostring(next_att_idx)\n  local att_usage_key = att_key .. \":usage\"\n\n  redis.call(\"HSET\", att_key,\n    \"attempt_id\", A.attempt_id,\n    \"execution_id\", A.execution_id,\n    \"attempt_index\", tostring(next_att_idx),\n    \"attempt_type\", \"reclaim\",\n    \"attempt_state\", \"started\",\n    \"created_at\", tostring(now_ms),\n    \"started_at\", tostring(now_ms),\n    \"lease_id\", A.lease_id,\n    \"lease_epoch\", tostring(next_epoch),\n    \"worker_id\", A.worker_id,\n    \"worker_instance_id\", A.worker_instance_id,\n    \"reclaim_reason\", \"lease_\" .. (core.ownership_state or \"expired\"),\n    \"previous_attempt_index\", old_att_idx)\n  redis.call(\"ZADD\", K.attempts_zset, now_ms, tostring(next_att_idx))\n\n  -- Initialize usage counters\n  redis.call(\"HSET\", att_usage_key, \"last_usage_report_seq\", \"0\")\n\n  -- 5. Create new lease\n  redis.call(\"DEL\", K.lease_current_key)\n  redis.call(\"HSET\", K.lease_current_key,\n    \"lease_id\", A.lease_id,\n    \"lease_epoch\", tostring(next_epoch),\n    \"execution_id\", A.execution_id,\n    \"attempt_id\", A.attempt_id,\n    \"worker_id\", A.worker_id,\n    \"worker_instance_id\", A.worker_instance_id,\n    \"acquired_at\", tostring(now_ms),\n    \"expires_at\", tostring(expires_at),\n    \"last_renewed_at\", tostring(now_ms),\n    \"renewal_deadline\", tostring(renewal_deadline))\n\n  -- 6. Update exec_core \u{2014} ALL 7 state vector dimensions\n  redis.call(\"HSET\", K.core_key,\n    \"lifecycle_phase\", \"active\",\n    \"ownership_state\", \"leased\",\n    \"eligibility_state\", \"not_applicable\",\n    \"blocking_reason\", \"none\",\n    \"blocking_detail\", \"\",\n    \"terminal_outcome\", \"none\",\n    \"attempt_state\", \"running_attempt\",\n    \"public_state\", \"active\",\n    \"current_attempt_index\", tostring(next_att_idx),\n    \"total_attempt_count\", tostring(next_att_idx + 1),\n    \"current_attempt_id\", A.attempt_id,\n    \"current_lease_id\", A.lease_id,\n    \"current_lease_epoch\", tostring(next_epoch),\n    \"current_worker_id\", A.worker_id,\n    \"current_worker_instance_id\", A.worker_instance_id,\n    \"current_lane\", A.lane,\n    \"lease_acquired_at\", tostring(now_ms),\n    \"lease_expires_at\", tostring(expires_at),\n    \"lease_last_renewed_at\", tostring(now_ms),\n    \"lease_renewal_deadline\", tostring(renewal_deadline),\n    \"lease_expired_at\", \"\",\n    \"lease_revoked_at\", \"\",\n    \"lease_revoke_reason\", \"\",\n    \"lease_reclaim_count\", tostring(reclaim_count + 1),\n    \"last_transition_at\", tostring(now_ms),\n    \"last_mutation_at\", tostring(now_ms))\n\n  -- 7. Update indexes\n  redis.call(\"ZADD\", K.lease_expiry_key, expires_at, A.execution_id)\n  -- SREM from the OLD worker\'s leases set before SADD to the new one.\n  -- K.worker_leases_key targets the NEW (reclaiming) worker, but the\n  -- execution is currently in the OLD worker\'s set via the original\n  -- claim\'s SADD. Without this SREM the execution stays indexed under\n  -- the old worker even though exec_core now reports a new owner,\n  -- breaking operator drain visibility and worker\u{2192}execution lookup.\n  -- Same dynamic-key pattern as the max-reclaim branch above and as\n  -- ff_expire_execution\'s cleanup path. Skipped when the same worker\n  -- reclaims its own execution (idempotent no-op anyway).\n  local old_wiid = core.current_worker_instance_id or \"\"\n  if old_wiid ~= \"\" and old_wiid ~= A.worker_instance_id then\n    local tag_wl = string.match(K.core_key, \"(%b{})\")\n    redis.call(\"SREM\",\n      \"ff:idx:\" .. tag_wl .. \":worker:\" .. old_wiid .. \":leases\",\n      A.execution_id)\n  end\n  redis.call(\"SADD\", K.worker_leases_key, A.execution_id)\n  redis.call(\"ZADD\", K.active_index_key, expires_at, A.execution_id)\n\n  -- 8. Lease history events\n  redis.call(\"XADD\", K.lease_history_key, \"MAXLEN\", \"~\", 1000, \"*\",\n    \"event\", \"reclaimed\",\n    \"old_lease_epoch\", core.current_lease_epoch or \"\",\n    \"new_lease_id\", A.lease_id,\n    \"new_lease_epoch\", tostring(next_epoch),\n    \"new_attempt_id\", A.attempt_id,\n    \"new_attempt_index\", tostring(next_att_idx),\n    \"worker_id\", A.worker_id,\n    \"ts\", tostring(now_ms))\n\n  return ok(A.lease_id, tostring(next_epoch), tostring(expires_at),\n            A.attempt_id, tostring(next_att_idx), \"reclaim\")\nend)\n\n---------------------------------------------------------------------------\n-- #29  ff_expire_execution\n--\n-- Timeout-based expiration. Handles active, runnable, and suspended phases.\n-- Called by the attempt_timeout and execution_deadline scanners.\n--\n-- KEYS (14): exec_core, attempt_hash, stream_meta, lease_current,\n--            lease_history, lease_expiry_zset, worker_leases,\n--            active_index, terminal_zset, attempt_timeout_zset,\n--            execution_deadline_zset, suspended_zset,\n--            suspension_timeout_zset, suspension_current\n-- ARGV (2): execution_id, expire_reason\n---------------------------------------------------------------------------\nredis.register_function(\'ff_expire_execution\', function(keys, args)\n  local K = {\n    core_key                = keys[1],\n    attempt_hash            = keys[2],\n    stream_meta             = keys[3],\n    lease_current_key       = keys[4],\n    lease_history_key       = keys[5],\n    lease_expiry_key        = keys[6],\n    worker_leases_key       = keys[7],\n    active_index_key        = keys[8],\n    terminal_key            = keys[9],\n    attempt_timeout_key     = keys[10],\n    execution_deadline_key  = keys[11],\n    suspended_zset          = keys[12],\n    suspension_timeout_key  = keys[13],\n    suspension_current      = keys[14],\n  }\n\n  local A = {\n    execution_id = args[1],\n    expire_reason = args[2] or \"attempt_timeout\",\n  }\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then\n    redis.call(\"ZREM\", K.attempt_timeout_key, A.execution_id)\n    redis.call(\"ZREM\", K.execution_deadline_key, A.execution_id)\n    return ok(\"not_found_cleaned\")\n  end\n  local core = hgetall_to_table(raw)\n\n  -- Already terminal \u{2014} no-op\n  if core.lifecycle_phase == \"terminal\" then\n    redis.call(\"ZREM\", K.attempt_timeout_key, A.execution_id)\n    redis.call(\"ZREM\", K.execution_deadline_key, A.execution_id)\n    return ok(\"already_terminal\")\n  end\n\n  -- PATH: Active\n  if core.lifecycle_phase == \"active\" then\n    -- End attempt\n    if is_set(core.current_attempt_index) then\n      redis.call(\"HSET\", K.attempt_hash,\n        \"attempt_state\", \"ended_failure\",\n        \"ended_at\", tostring(now_ms),\n        \"failure_reason\", A.expire_reason)\n    end\n    -- Close stream\n    if redis.call(\"EXISTS\", K.stream_meta) == 1 then\n      redis.call(\"HSET\", K.stream_meta,\n        \"closed_at\", tostring(now_ms),\n        \"closed_reason\", \"expired\")\n    end\n    -- Release lease\n    redis.call(\"DEL\", K.lease_current_key)\n    redis.call(\"ZREM\", K.lease_expiry_key, A.execution_id)\n    redis.call(\"SREM\", K.worker_leases_key, A.execution_id)\n    -- Dynamic worker_leases SREM: the scanner passes empty WorkerInstanceId\n    -- in KEYS[7] (it can\'t know which worker holds the lease). Use the actual\n    -- worker_instance_id from exec_core to SREM from the correct set.\n    local wiid = core.current_worker_instance_id or \"\"\n    if wiid ~= \"\" then\n      local tag = string.match(K.core_key, \"(%b{})\")\n      redis.call(\"SREM\", \"ff:idx:\" .. tag .. \":worker:\" .. wiid .. \":leases\", A.execution_id)\n    end\n    redis.call(\"ZREM\", K.active_index_key, A.execution_id)\n    -- Lease history\n    redis.call(\"XADD\", K.lease_history_key, \"MAXLEN\", \"~\", \"1000\", \"*\",\n      \"event\", \"released\",\n      \"lease_id\", core.current_lease_id or \"\",\n      \"lease_epoch\", core.current_lease_epoch or \"\",\n      \"attempt_index\", core.current_attempt_index or \"\",\n      \"reason\", \"expired:\" .. A.expire_reason,\n      \"ts\", tostring(now_ms))\n  end\n\n  -- PATH: Suspended\n  if core.lifecycle_phase == \"suspended\" then\n    -- End attempt\n    if is_set(core.current_attempt_index) then\n      redis.call(\"HSET\", K.attempt_hash,\n        \"attempt_state\", \"ended_failure\",\n        \"ended_at\", tostring(now_ms),\n        \"failure_reason\", A.expire_reason)\n    end\n    -- Close suspension\n    if redis.call(\"EXISTS\", K.suspension_current) == 1 then\n      redis.call(\"HSET\", K.suspension_current,\n        \"closed_at\", tostring(now_ms),\n        \"close_reason\", \"expired:\" .. A.expire_reason)\n    end\n    -- Close stream\n    if redis.call(\"EXISTS\", K.stream_meta) == 1 then\n      redis.call(\"HSET\", K.stream_meta,\n        \"closed_at\", tostring(now_ms),\n        \"closed_reason\", \"expired\")\n    end\n    -- ZREM from suspended indexes\n    redis.call(\"ZREM\", K.suspended_zset, A.execution_id)\n    redis.call(\"ZREM\", K.suspension_timeout_key, A.execution_id)\n  end\n\n  -- PATH: Runnable (execution_deadline fires while execution is waiting/delayed/blocked)\n  -- These indexes are not in the 14 KEYS array, so construct dynamically from\n  -- hash tag + lane_id (same C2 pattern as ff_cancel_execution).\n  if core.lifecycle_phase == \"runnable\" then\n    local tag = string.match(K.core_key, \"(%b{})\")\n    local lane = core.lane_id or core.current_lane or \"default\"\n    local es = core.eligibility_state or \"\"\n\n    if es == \"eligible_now\" then\n      redis.call(\"ZREM\", \"ff:idx:\" .. tag .. \":lane:\" .. lane .. \":eligible\", A.execution_id)\n    elseif es == \"not_eligible_until_time\" then\n      redis.call(\"ZREM\", \"ff:idx:\" .. tag .. \":lane:\" .. lane .. \":delayed\", A.execution_id)\n    elseif es == \"blocked_by_dependencies\" then\n      redis.call(\"ZREM\", \"ff:idx:\" .. tag .. \":lane:\" .. lane .. \":blocked:dependencies\", A.execution_id)\n    elseif es == \"blocked_by_budget\" then\n      redis.call(\"ZREM\", \"ff:idx:\" .. tag .. \":lane:\" .. lane .. \":blocked:budget\", A.execution_id)\n    elseif es == \"blocked_by_quota\" then\n      redis.call(\"ZREM\", \"ff:idx:\" .. tag .. \":lane:\" .. lane .. \":blocked:quota\", A.execution_id)\n    elseif es == \"blocked_by_route\" then\n      redis.call(\"ZREM\", \"ff:idx:\" .. tag .. \":lane:\" .. lane .. \":blocked:route\", A.execution_id)\n    elseif es == \"blocked_by_operator\" then\n      redis.call(\"ZREM\", \"ff:idx:\" .. tag .. \":lane:\" .. lane .. \":blocked:operator\", A.execution_id)\n    else\n      -- Defensive catch-all: handles blocked_by_lane_state and any future\n      -- eligibility states. ZREM from ALL runnable-state indexes \u{2014} idempotent.\n      local lp = \"ff:idx:\" .. tag .. \":lane:\" .. lane\n      redis.call(\"ZREM\", lp .. \":eligible\", A.execution_id)\n      redis.call(\"ZREM\", lp .. \":delayed\", A.execution_id)\n      redis.call(\"ZREM\", lp .. \":blocked:dependencies\", A.execution_id)\n      redis.call(\"ZREM\", lp .. \":blocked:budget\", A.execution_id)\n      redis.call(\"ZREM\", lp .. \":blocked:quota\", A.execution_id)\n      redis.call(\"ZREM\", lp .. \":blocked:route\", A.execution_id)\n      redis.call(\"ZREM\", lp .. \":blocked:operator\", A.execution_id)\n    end\n  end\n\n  -- ALL PATHS: terminal transition\n  local att_state = \"attempt_terminal\"\n  if not is_set(core.current_attempt_index) then\n    att_state = core.attempt_state or \"none\"\n  end\n\n  redis.call(\"HSET\", K.core_key,\n    \"lifecycle_phase\", \"terminal\",\n    \"ownership_state\", \"unowned\",\n    \"eligibility_state\", \"not_applicable\",\n    \"blocking_reason\", \"none\",\n    \"blocking_detail\", \"\",\n    \"terminal_outcome\", \"expired\",\n    \"attempt_state\", att_state,\n    \"public_state\", \"expired\",\n    \"failure_reason\", A.expire_reason,\n    \"completed_at\", tostring(now_ms),\n    \"current_lease_id\", \"\",\n    \"current_worker_id\", \"\",\n    \"current_worker_instance_id\", \"\",\n    \"lease_expires_at\", \"\",\n    \"current_suspension_id\", \"\",\n    \"current_waitpoint_id\", \"\",\n    \"last_transition_at\", tostring(now_ms),\n    \"last_mutation_at\", tostring(now_ms))\n\n  -- Cleanup timeout indexes + ZADD terminal\n  redis.call(\"ZREM\", K.attempt_timeout_key, A.execution_id)\n  redis.call(\"ZREM\", K.execution_deadline_key, A.execution_id)\n  redis.call(\"ZADD\", K.terminal_key, now_ms, A.execution_id)\n\n  -- Push-based DAG promotion (Batch C item 6 follow-up, issue #44).\n  -- Same shape as ff_complete_execution / ff_fail_execution /\n  -- ff_cancel_execution \u{2014} emit only for flow-bound executions. Covers\n  -- all three phases (active, suspended, runnable) because the terminal\n  -- HSET + ZADD above is shared across them. Without this, a never-\n  -- claimed flow-bound execution that hits execution_deadline relies on\n  -- the dependency_reconciler safety net (15s default) to unblock\n  -- children, spiking DAG latency on the timeout path.\n  if is_set(core.flow_id) then\n    local payload = cjson.encode({\n      execution_id = A.execution_id,\n      flow_id = core.flow_id,\n      outcome = \"expired\",\n    })\n    redis.call(\"PUBLISH\", \"ff:dag:completions\", payload)\n  end\n\n  return ok(\"expired\", core.lifecycle_phase)\nend)\n\n---------------------------------------------------------------------------\n-- ff_set_execution_tags  (issue #58.4)\n--\n-- Write caller-supplied tag fields to the execution\'s separate tags key\n-- (`ff:exec:{fp:N}:<eid>:tags`). Tag keys MUST match the reserved\n-- namespace pattern `^[a-z][a-z0-9_]*%.` \u{2014} i.e. `<caller>.<field>` \u{2014} so\n-- callers get `$caller.*` for metadata and FF reserves dot-free\n-- snake_case for its own fields. Keys failing validation fail-closed\n-- with `invalid_tag_key` + the offending key; no writes happen.\n--\n-- Atomically HSETs all validated pairs in one call, then bumps\n-- `last_mutation_at` on `exec_core` so observers see the mutation.\n--\n-- KEYS (2): exec_core, tags_key\n-- ARGV (>=2, even): k1, v1, k2, v2, ...\n---------------------------------------------------------------------------\nredis.register_function(\'ff_set_execution_tags\', function(keys, args)\n  local K = {\n    core_key = keys[1],\n    tags_key = keys[2],\n  }\n\n  local n = #args\n  if n == 0 or (n % 2) ~= 0 then\n    return err(\"invalid_input\", \"tags must be non-empty even-length key/value pairs\")\n  end\n\n  if redis.call(\"EXISTS\", K.core_key) == 0 then\n    return err(\"execution_not_found\")\n  end\n\n  -- Require `<caller>.<field>` with at least one non-dot char after the\n  -- first dot, so `cairn.` and `cairn..x` are rejected. The suffix may\n  -- contain further dots (`app.sub.field` is legal).\n  for i = 1, n, 2 do\n    local k = args[i]\n    if type(k) ~= \"string\" or not string.find(k, \"^[a-z][a-z0-9_]*%.[^.]\") then\n      return err(\"invalid_tag_key\", tostring(k))\n    end\n  end\n\n  redis.call(\"HSET\", K.tags_key, unpack(args))\n\n  local now_ms = server_time_ms()\n  redis.call(\"HSET\", K.core_key, \"last_mutation_at\", tostring(now_ms))\n\n  return ok(tostring(n / 2))\nend)\n\n\n-- source: lua/scheduling.lua\n-- FlowFabric scheduling functions\n-- Reference: RFC-009 (Scheduling), RFC-010 \u{a7}4 (function inventory)\n--\n-- Depends on helpers: ok, err, hgetall_to_table, is_set, validate_lease\n\n---------------------------------------------------------------------------\n-- Capability matching helpers (local to scheduling.lua)\n-- Bounds CAPS_MAX_BYTES / CAPS_MAX_TOKENS live in helpers.lua and are\n-- enforced symmetrically on worker caps here and on required caps in\n-- ff_create_execution so neither side can smuggle in an oversized list.\n---------------------------------------------------------------------------\n\n-- Parse a capability CSV into a {token=true} set. Empty/nil \u{2192} empty set.\n-- Returns (set, nil) on success or (nil, err_tuple) on bound violation.\n--\n-- Empty tokens (from stray separators like \"a,,b\") are skipped BEFORE the\n-- count check so a legitimate list punctuated by noise isn\'t rejected.\n-- Real oversize input still fails because #csv > CAPS_MAX_BYTES catches it\n-- before this loop runs.\nlocal function parse_capability_csv(csv, kind)\n  if csv == nil or csv == \"\" then\n    return {}, nil\n  end\n  if #csv > CAPS_MAX_BYTES then\n    return nil, err(\"invalid_capabilities\", kind .. \":too_many_bytes\")\n  end\n  local set = {}\n  local n = 0\n  for token in string.gmatch(csv, \"([^,]+)\") do\n    if #token > 0 then\n      n = n + 1\n      if n > CAPS_MAX_TOKENS then\n        return nil, err(\"invalid_capabilities\", kind .. \":too_many_tokens\")\n      end\n      set[token] = true\n    end\n  end\n  return set, nil\nend\n\n-- Return sorted CSV of tokens present in `required` but missing from\n-- `worker_caps`. Empty result means worker satisfies all requirements.\nlocal function missing_capabilities(required, worker_caps)\n  local missing = {}\n  for cap, _ in pairs(required) do\n    if not worker_caps[cap] then\n      missing[#missing + 1] = cap\n    end\n  end\n  table.sort(missing)\n  return table.concat(missing, \",\")\nend\n\n---------------------------------------------------------------------------\n-- #25  ff_issue_claim_grant\n--\n-- Scheduler issues a claim grant for an eligible execution.\n-- Validates execution is eligible, writes grant hash with TTL,\n-- removes from eligible set.\n--\n-- KEYS (3): exec_core, claim_grant_key, eligible_zset\n-- ARGV (9): execution_id, worker_id, worker_instance_id,\n--           lane_id, capability_hash, grant_ttl_ms,\n--           route_snapshot_json, admission_summary,\n--           worker_capabilities_csv  -- sorted CSV of worker caps (option a)\n--\n-- Capability matching (RFC-009):\n--   If exec_core.required_capabilities (sorted CSV on exec_core) is empty,\n--   any worker matches (backwards compat). Otherwise the worker\'s sorted\n--   CSV must be a superset.\n--   On mismatch: Lua stamps `last_capability_mismatch_at` (single scalar\n--   field, idempotent write \u{2014} no unbounded counter) and returns\n--   err(\"capability_mismatch\", missing_csv). The scheduler side MUST\n--   then block the execution off the eligible ZSET (see\n--   ff_block_execution_for_admission with reason `waiting_for_capability`),\n--   otherwise ZRANGEBYSCORE keeps returning the same top-of-zset every\n--   tick and 100 workers \u{d7} 1 tick/s = hot-loop starvation. RFC-009 \u{a7}564.\n---------------------------------------------------------------------------\nredis.register_function(\'ff_issue_claim_grant\', function(keys, args)\n  local K = {\n    core_key       = keys[1],\n    claim_grant    = keys[2],\n    eligible_zset  = keys[3],\n  }\n\n  local A = {\n    execution_id            = args[1],\n    worker_id               = args[2],\n    worker_instance_id      = args[3],\n    lane_id                 = args[4],\n    capability_hash         = args[5] or \"\",\n    route_snapshot_json     = args[7] or \"\",\n    admission_summary       = args[8] or \"\",\n    worker_capabilities_csv = args[9] or \"\",\n  }\n\n  local grant_ttl_n = require_number(args[6], \"grant_ttl_ms\")\n  if type(grant_ttl_n) == \"table\" then return grant_ttl_n end\n  A.grant_ttl_ms = grant_ttl_n\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- 1. Validate execution exists and is eligible\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return err(\"execution_not_found\") end\n  local core = hgetall_to_table(raw)\n\n  if core.lifecycle_phase ~= \"runnable\" then\n    return err(\"execution_not_eligible\")\n  end\n  if core.eligibility_state ~= \"eligible_now\" then\n    return err(\"execution_not_eligible\")\n  end\n\n  -- 2. Check no existing grant (prevent double-grant)\n  if redis.call(\"EXISTS\", K.claim_grant) == 1 then\n    return err(\"grant_already_exists\")\n  end\n\n  -- 3. Verify execution is in eligible set (TOCTOU guard)\n  local score = redis.call(\"ZSCORE\", K.eligible_zset, A.execution_id)\n  if not score then\n    return err(\"execution_not_in_eligible_set\")\n  end\n\n  -- 4. Capability matching. On miss we stamp a SINGLE bounded field \u{2014}\n  -- `last_capability_mismatch_at` \u{2014} so operators can SCAN for stuck\n  -- executions via `HGET last_capability_mismatch_at < now - 1h` without\n  -- needing a counter. An earlier version HINCRBY\'d a counter; that was\n  -- dropped because combined with the hot-loop bug (executions staying in\n  -- the eligible ZSET after mismatch) the counter grew unboundedly (2.4M\n  -- increments/day on one stuck exec_core under 100 workers). An HSET of\n  -- a fixed field is idempotent w.r.t. size.\n  --\n  -- The scheduler MUST block the execution off the eligible ZSET after\n  -- this err returns; otherwise the next tick picks the same top-of-zset\n  -- and we wasted this validation. See Scheduler::claim_for_worker.\n  local required_set, req_err = parse_capability_csv(\n    core.required_capabilities or \"\", \"required\")\n  if req_err then return req_err end\n  local worker_set, wrk_err = parse_capability_csv(\n    A.worker_capabilities_csv, \"worker\")\n  if wrk_err then return wrk_err end\n  if next(required_set) ~= nil then\n    local missing = missing_capabilities(required_set, worker_set)\n    if missing ~= \"\" then\n      redis.call(\"HSET\", K.core_key,\n        \"last_capability_mismatch_at\", tostring(now_ms))\n      return err(\"capability_mismatch\", missing)\n    end\n  end\n\n  -- 5. Write grant hash with TTL\n  local grant_expires_at = now_ms + A.grant_ttl_ms\n  redis.call(\"HSET\", K.claim_grant,\n    \"worker_id\", A.worker_id,\n    \"worker_instance_id\", A.worker_instance_id,\n    \"lane_id\", A.lane_id,\n    \"capability_hash\", A.capability_hash,\n    \"route_snapshot_json\", A.route_snapshot_json,\n    \"admission_summary\", A.admission_summary,\n    \"created_at\", tostring(now_ms),\n    \"grant_expires_at\", tostring(grant_expires_at))\n  redis.call(\"PEXPIREAT\", K.claim_grant, grant_expires_at)\n\n  -- 6. Do NOT ZREM from eligible here. ff_claim_execution does the ZREM\n  -- when consuming the grant. If the grant expires unconsumed, the execution\n  -- remains in the eligible set and is re-discovered by the next scheduler\n  -- cycle. This prevents the \"orphaned grant\" stuck state where an execution\n  -- is in no scheduling index after grant expiry.\n\n  return ok(A.execution_id)\nend)\n\n---------------------------------------------------------------------------\n-- #32  ff_change_priority\n--\n-- Update priority and re-score in eligible ZSET.\n-- Only works for runnable + eligible_now executions.\n--\n-- KEYS (2): exec_core, eligible_zset\n-- ARGV (2): execution_id, new_priority\n---------------------------------------------------------------------------\nredis.register_function(\'ff_change_priority\', function(keys, args)\n  local K = {\n    core_key      = keys[1],\n    eligible_zset = keys[2],\n  }\n\n  local new_priority_n = require_number(args[2], \"new_priority\")\n  if type(new_priority_n) == \"table\" then return new_priority_n end\n\n  local A = {\n    execution_id = args[1],\n    new_priority = new_priority_n,\n  }\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- 1. Read and validate\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return err(\"execution_not_found\") end\n  local core = hgetall_to_table(raw)\n\n  if core.lifecycle_phase ~= \"runnable\" then\n    return err(\"execution_not_eligible\")\n  end\n  if core.eligibility_state ~= \"eligible_now\" then\n    return err(\"execution_not_eligible\")\n  end\n\n  local old_priority = tonumber(core.priority or \"0\")\n\n  -- Clamp to safe range (same as ff_create_execution)\n  if A.new_priority < 0 then A.new_priority = 0 end\n  if A.new_priority > 9000 then A.new_priority = 9000 end\n\n  -- 2. Update exec_core priority\n  redis.call(\"HSET\", K.core_key,\n    \"priority\", tostring(A.new_priority),\n    \"last_mutation_at\", tostring(now_ms))\n\n  -- 3. Re-score in eligible ZSET\n  -- Composite score: -(priority * 1_000_000_000_000) + created_at_ms\n  local created_at = tonumber(core.created_at or \"0\")\n  local new_score = 0 - (A.new_priority * 1000000000000) + created_at\n  redis.call(\"ZADD\", K.eligible_zset, new_score, A.execution_id)\n\n  return ok(tostring(old_priority), tostring(A.new_priority))\nend)\n\n---------------------------------------------------------------------------\n-- #33  ff_update_progress\n--\n-- Update progress fields on exec_core. Validate lease (lite check:\n-- lease_id + epoch only \u{2014} attempt_id not required per \u{a7}4 Class B).\n--\n-- KEYS (1): exec_core\n-- ARGV (5): execution_id, lease_id, lease_epoch,\n--           progress_pct, progress_message\n---------------------------------------------------------------------------\nredis.register_function(\'ff_update_progress\', function(keys, args)\n  local K = {\n    core_key = keys[1],\n  }\n\n  local A = {\n    execution_id    = args[1],\n    lease_id        = args[2],\n    lease_epoch     = args[3],\n    progress_pct    = args[4] or \"\",\n    progress_message = args[5] or \"\",\n  }\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- Read and validate\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return err(\"execution_not_found\") end\n  local core = hgetall_to_table(raw)\n\n  if core.lifecycle_phase ~= \"active\" then\n    return err(\"execution_not_active\",\n      core.terminal_outcome or \"\",\n      core.current_lease_epoch or \"\",\n      core.lifecycle_phase or \"\",\n      core.current_attempt_id or \"\")\n  end\n  if core.ownership_state == \"lease_revoked\" then\n    return err(\"lease_revoked\")\n  end\n  if tonumber(core.lease_expires_at or \"0\") <= now_ms then\n    return err(\"lease_expired\")\n  end\n  if core.current_lease_id ~= A.lease_id then\n    return err(\"stale_lease\")\n  end\n  if core.current_lease_epoch ~= A.lease_epoch then\n    return err(\"stale_lease\")\n  end\n\n  -- Update progress fields\n  local fields = { \"last_mutation_at\", tostring(now_ms), \"progress_updated_at\", tostring(now_ms) }\n  if is_set(A.progress_pct) then\n    fields[#fields + 1] = \"progress_pct\"\n    fields[#fields + 1] = A.progress_pct\n  end\n  if is_set(A.progress_message) then\n    fields[#fields + 1] = \"progress_message\"\n    fields[#fields + 1] = A.progress_message\n  end\n  redis.call(\"HSET\", K.core_key, unpack(fields))\n\n  return ok()\nend)\n\n---------------------------------------------------------------------------\n-- #27  ff_promote_delayed\n--\n-- Promote a delayed execution to eligible when its delay_until has passed.\n-- Called by the delayed promoter scanner.\n-- Preserves attempt_state (may be pending_retry, pending_first, or\n-- attempt_interrupted from delay_execution).\n--\n-- KEYS (3): exec_core, delayed_zset, eligible_zset\n-- ARGV (2): execution_id, now_ms\n---------------------------------------------------------------------------\nredis.register_function(\'ff_promote_delayed\', function(keys, args)\n  local K = {\n    core_key      = keys[1],\n    delayed_zset  = keys[2],\n    eligible_zset = keys[3],\n  }\n\n  local now_ms_n = require_number(args[2], \"now_ms\")\n  if type(now_ms_n) == \"table\" then return now_ms_n end\n\n  local A = {\n    execution_id = args[1],\n    now_ms       = now_ms_n,\n  }\n\n  -- Read and validate\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then\n    -- Execution gone \u{2014} clean up stale index entry\n    redis.call(\"ZREM\", K.delayed_zset, A.execution_id)\n    return ok(\"not_found_cleaned\")\n  end\n  local core = hgetall_to_table(raw)\n\n  -- Must be runnable + not_eligible_until_time\n  if core.lifecycle_phase ~= \"runnable\" then\n    redis.call(\"ZREM\", K.delayed_zset, A.execution_id)\n    return ok(\"not_runnable_cleaned\")\n  end\n  if core.eligibility_state ~= \"not_eligible_until_time\" then\n    redis.call(\"ZREM\", K.delayed_zset, A.execution_id)\n    return ok(\"not_delayed_cleaned\")\n  end\n\n  -- Check delay_until has actually passed\n  local delay_until = tonumber(core.delay_until or \"0\")\n  if delay_until > A.now_ms then\n    return ok(\"not_yet_due\")\n  end\n\n  -- Promote: update 6 of 7 state vector dimensions.\n  -- attempt_state is DELIBERATELY PRESERVED (not written). This is the 7th dim.\n  --\n  -- WHY: The caller that put this execution into the delayed set already set\n  -- the attempt_state to reflect what should happen on next claim:\n  --   * pending_retry_attempt  \u{2014} from ff_fail_execution (retry backoff expired)\n  --   * pending_replay_attempt \u{2014} from ff_replay_execution (replay delay expired)\n  --   * attempt_interrupted    \u{2014} from ff_delay_execution (worker self-delay)\n  --   * pending_first_attempt  \u{2014} from ff_create_execution (initial delay_until)\n  -- Overwriting it here would lose this routing information and break\n  -- claim_execution\'s attempt_type derivation (initial vs retry vs replay)\n  -- and the claim dispatch routing (claim_execution vs claim_resumed_execution).\n  redis.call(\"HSET\", K.core_key,\n    \"lifecycle_phase\", \"runnable\",\n    \"ownership_state\", \"unowned\",\n    \"eligibility_state\", \"eligible_now\",\n    \"blocking_reason\", \"waiting_for_worker\",\n    \"blocking_detail\", \"\",\n    \"terminal_outcome\", \"none\",\n    -- attempt_state: NOT WRITTEN \u{2014} see comment above\n    \"public_state\", \"waiting\",\n    \"delay_until\", \"\",\n    \"last_transition_at\", tostring(A.now_ms),\n    \"last_mutation_at\", tostring(A.now_ms))\n\n  -- ZREM from delayed, ZADD to eligible with composite priority score\n  redis.call(\"ZREM\", K.delayed_zset, A.execution_id)\n  local priority = tonumber(core.priority or \"0\")\n  local created_at = tonumber(core.created_at or \"0\")\n  local score = 0 - (priority * 1000000000000) + created_at\n  redis.call(\"ZADD\", K.eligible_zset, score, A.execution_id)\n\n  return ok(\"promoted\")\nend)\n\n---------------------------------------------------------------------------\n-- #26  ff_issue_reclaim_grant\n--\n-- TODO(batch-c): This function has NO production Rust caller as of Batch B.\n-- The reclaim scanner that would invoke it (to recover leases from crashed\n-- workers) is scheduled for cairn Batch C. A worker dying mid-execution today\n-- leaves its execution stuck in `lease_expired_reclaimable` until operator\n-- intervention. Test-only callers exist in crates/ff-test/tests to exercise\n-- the Lua side. When the scheduler reclaim integration lands, the caller\n-- must apply the same block-on-capability-mismatch pattern used by\n-- `ff-scheduler::Scheduler::claim_for_worker` (see the IMPORTANT note\n-- below) \u{2014} otherwise an unmatchable reclaim recycles every scanner tick.\n--\n-- Scheduler issues a reclaim grant for an expired/revoked execution.\n-- Similar to ff_issue_claim_grant but validates reclaimable state.\n--\n-- KEYS (3): exec_core, claim_grant_key, lease_expiry_zset\n-- ARGV (9): execution_id, worker_id, worker_instance_id,\n--           lane_id, capability_hash, grant_ttl_ms,\n--           route_snapshot_json, admission_summary,\n--           worker_capabilities_csv\n--\n-- Capability matching identical to ff_issue_claim_grant: reclaiming a lease\n-- must respect the execution\'s required_capabilities just like an initial\n-- claim, so a re-issuance to a non-matching worker is blocked here too.\n--\n-- IMPORTANT: on capability_mismatch this function does NOT remove the exec\n-- from the lease_expiry pool. The reclaim SCANNER (to be added in Rust) MUST\n-- detect capability_mismatch and move the execution into blocked_route with\n-- reason `waiting_for_capable_worker` (mirroring the claim-grant path). If\n-- the scanner instead re-attempts the same execution every tick, a reclaim\n-- hot-loop develops that is analogous to the claim-path hot-loop and\n-- identical in cost (wasted FCALLs + log volume). Lease_expiry as an index\n-- has no natural sweeping mechanism for post-mismatch promotion \u{2014} the\n-- scheduler-side block + periodic sweep owns the lifecycle.\n---------------------------------------------------------------------------\nredis.register_function(\'ff_issue_reclaim_grant\', function(keys, args)\n  local K = {\n    core_key       = keys[1],\n    claim_grant    = keys[2],\n    lease_expiry   = keys[3],\n  }\n\n  local A = {\n    execution_id            = args[1],\n    worker_id               = args[2],\n    worker_instance_id      = args[3],\n    lane_id                 = args[4],\n    capability_hash         = args[5] or \"\",\n    route_snapshot_json     = args[7] or \"\",\n    admission_summary       = args[8] or \"\",\n    worker_capabilities_csv = args[9] or \"\",\n  }\n\n  local grant_ttl_n = require_number(args[6], \"grant_ttl_ms\")\n  if type(grant_ttl_n) == \"table\" then return grant_ttl_n end\n  A.grant_ttl_ms = grant_ttl_n\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- Validate execution exists and is reclaimable\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return err(\"execution_not_found\") end\n  local core = hgetall_to_table(raw)\n\n  if core.lifecycle_phase ~= \"active\" then\n    return err(\"execution_not_reclaimable\")\n  end\n  if core.ownership_state ~= \"lease_expired_reclaimable\"\n    and core.ownership_state ~= \"lease_revoked\" then\n    return err(\"execution_not_reclaimable\")\n  end\n\n  -- Check no existing grant\n  if redis.call(\"EXISTS\", K.claim_grant) == 1 then\n    return err(\"grant_already_exists\")\n  end\n\n  -- Capability matching \u{2014} same policy as issue_claim_grant: stamp\n  -- last_capability_mismatch_at (single scalar) on miss so ops can surface\n  -- stuck reclaims via SCAN. Scheduler MUST also block-out the exec from\n  -- the lease_expiry reclaim pool; otherwise the reclaim scanner hits the\n  -- same mismatch every cycle. See Scheduler::reclaim_for_worker.\n  local required_set, req_err = parse_capability_csv(\n    core.required_capabilities or \"\", \"required\")\n  if req_err then return req_err end\n  local worker_set, wrk_err = parse_capability_csv(\n    A.worker_capabilities_csv, \"worker\")\n  if wrk_err then return wrk_err end\n  if next(required_set) ~= nil then\n    local missing = missing_capabilities(required_set, worker_set)\n    if missing ~= \"\" then\n      redis.call(\"HSET\", K.core_key,\n        \"last_capability_mismatch_at\", tostring(now_ms))\n      return err(\"capability_mismatch\", missing)\n    end\n  end\n\n  -- Write grant hash with TTL\n  local grant_expires_at = now_ms + A.grant_ttl_ms\n  redis.call(\"HSET\", K.claim_grant,\n    \"worker_id\", A.worker_id,\n    \"worker_instance_id\", A.worker_instance_id,\n    \"lane_id\", A.lane_id,\n    \"capability_hash\", A.capability_hash,\n    \"route_snapshot_json\", A.route_snapshot_json,\n    \"admission_summary\", A.admission_summary,\n    \"created_at\", tostring(now_ms),\n    \"grant_expires_at\", tostring(grant_expires_at))\n  redis.call(\"PEXPIREAT\", K.claim_grant, grant_expires_at)\n\n  -- Do NOT ZREM from lease_expiry \u{2014} stays for scheduler discovery\n\n  return ok(A.execution_id)\nend)\n\n\n-- source: lua/suspension.lua\n-- FlowFabric suspension and waitpoint functions\n-- Reference: RFC-004 (Suspension), RFC-005 (Signal), RFC-010 \u{a7}4\n--\n-- Depends on helpers: ok, err, ok_already_satisfied, hgetall_to_table,\n--   is_set, validate_lease_and_mark_expired, clear_lease_and_indexes,\n--   map_reason_to_blocking, initialize_condition, write_condition_hash,\n--   evaluate_signal_against_condition, is_condition_satisfied,\n--   extract_field, initial_signal_summary_json, validate_pending_waitpoint,\n--   assert_active_suspension, assert_waitpoint_belongs\n\n---------------------------------------------------------------------------\n-- #13  ff_suspend_execution\n--\n-- Validate lease, release ownership, create suspension + waitpoint(s)\n-- (or activate pending), init condition, transition active \u{2192} suspended.\n-- Mints the waitpoint HMAC token(s) (RFC-004 \u{a7}Waitpoint Security)\n-- returned alongside the waitpoint_id(s) for external signal delivery.\n--\n-- KEYS (18 + 3*N_extra): exec_core, attempt_record, lease_current,\n--            lease_history, lease_expiry_zset, worker_leases,\n--            suspension_current, waitpoint_hash, waitpoint_signals,\n--            suspension_timeout_zset, pending_wp_expiry_zset,\n--            active_index, suspended_zset, waitpoint_history,\n--            wp_condition, attempt_timeout_zset, hmac_secrets,\n--            dedup_hash; then for each RFC-014 Pattern 3 additional\n--            waitpoint (wp_hash_extra, wp_signals_extra,\n--            wp_condition_extra).\n-- ARGV (19 + 1 + 2*N_extra): execution_id, attempt_index, attempt_id,\n--            lease_id, lease_epoch, suspension_id, waitpoint_id,\n--            waitpoint_key, reason_code, requested_by, timeout_at,\n--            resume_condition_json, resume_policy_json,\n--            continuation_metadata_pointer, use_pending_waitpoint,\n--            timeout_behavior, lease_history_maxlen, idempotency_key,\n--            dedup_ttl_ms, num_extra_waitpoints, then for each extra\n--            (waitpoint_id, waitpoint_key).\n---------------------------------------------------------------------------\nredis.register_function(\'ff_suspend_execution\', function(keys, args)\n  local K = {\n    core_key              = keys[1],\n    attempt_record        = keys[2],\n    lease_current_key     = keys[3],\n    lease_history_key     = keys[4],\n    lease_expiry_key      = keys[5],\n    worker_leases_key     = keys[6],\n    suspension_current    = keys[7],\n    waitpoint_hash        = keys[8],\n    waitpoint_signals     = keys[9],\n    suspension_timeout_key = keys[10],\n    pending_wp_expiry_key = keys[11],\n    active_index_key      = keys[12],\n    suspended_zset        = keys[13],\n    waitpoint_history     = keys[14],\n    wp_condition          = keys[15],\n    attempt_timeout_key   = keys[16],\n    hmac_secrets          = keys[17],\n    dedup_hash            = keys[18],\n  }\n\n  local A = {\n    execution_id              = args[1],\n    attempt_index             = args[2],\n    attempt_id                = args[3] or \"\",\n    lease_id                  = args[4] or \"\",\n    lease_epoch               = args[5] or \"\",\n    suspension_id             = args[6],\n    waitpoint_id              = args[7],\n    waitpoint_key             = args[8],\n    reason_code               = args[9],\n    requested_by              = args[10],\n    timeout_at                = args[11] or \"\",\n    resume_condition_json     = args[12],\n    resume_policy_json        = args[13],\n    continuation_metadata_ptr = args[14] or \"\",\n    use_pending_waitpoint     = args[15] or \"\",\n    timeout_behavior          = args[16] or \"fail\",\n    lease_history_maxlen      = tonumber(args[17] or \"1000\"),\n    idempotency_key           = args[18] or \"\",\n    dedup_ttl_ms              = tonumber(args[19] or \"0\"),\n    num_extra_waitpoints      = tonumber(args[20] or \"0\"),\n  }\n\n  -- RFC-014 Pattern 3: additional-waitpoint bindings. Parsed up front\n  -- so dedup hashing covers the full set (RFC-013 idempotency_key +\n  -- RFC-014 multi-waitpoint widening).\n  local extras = {}\n  for i = 1, A.num_extra_waitpoints do\n    local base_arg = 20 + (i - 1) * 2\n    local ex_id = args[base_arg + 1]\n    local ex_key = args[base_arg + 2]\n    if not ex_id or ex_id == \"\" or not ex_key or ex_key == \"\" then\n      return err(\"additional_waitpoint_binding_malformed\")\n    end\n    local base_key = 18 + (i - 1) * 3\n    extras[#extras + 1] = {\n      waitpoint_id = ex_id,\n      waitpoint_key = ex_key,\n      wp_hash_key = keys[base_key + 1],\n      wp_signals_key = keys[base_key + 2],\n      wp_condition_key = keys[base_key + 3],\n    }\n  end\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- RFC-013 \u{a7}9.2 \u{2014} idempotency_key dedup branch. When the caller set an\n  -- idempotency_key, check the dedup hash first: a hit short-circuits with\n  -- the previously-serialized outcome verbatim, performing no state\n  -- mutation. On miss, fall through to the canonical path and write the\n  -- outcome into the dedup hash with TTL after commit.\n  --\n  -- RFC-014 widens the format to carry additional waitpoint bindings:\n  -- \"<status>\\t<suspension_id>\\t<waitpoint_id>\\t<waitpoint_key>\\t\n  --  <waitpoint_token>\\t<N_extra>[\\t<ex_id>\\t<ex_key>\\t<ex_tok>]*\".\n  -- Pattern-1/2 payloads with no extras have N_extra = 0.\n  local _dedup_active = (A.idempotency_key ~= \"\" and K.dedup_hash and K.dedup_hash ~= \"\")\n  if _dedup_active then\n    local stored = redis.call(\"HGET\", K.dedup_hash, \"outcome\")\n    if stored == false then stored = nil end\n    if stored then\n      -- Split on single \"\\t\". `string.gmatch(\"[^\\t]+\")` would drop\n      -- empty fields (legal for e.g. an empty extras block).\n      local parts = {}\n      local pos = 1\n      while pos <= #stored + 1 do\n        local nxt = string.find(stored, \"\\t\", pos, true)\n        if nxt then\n          parts[#parts + 1] = string.sub(stored, pos, nxt - 1)\n          pos = nxt + 1\n        else\n          parts[#parts + 1] = string.sub(stored, pos)\n          break\n        end\n      end\n      if #parts >= 5 then\n        local status     = parts[1]\n        local susp_id    = parts[2]\n        local wp_id_out  = parts[3]\n        local wp_key_out = parts[4]\n        local wp_tok_out = parts[5]\n        local n_extra    = tonumber(parts[6] or \"0\") or 0\n        local extras_out = {}\n        for i = 1, n_extra do\n          local o = 6 + (i - 1) * 3\n          extras_out[#extras_out + 1] = {\n            waitpoint_id = parts[o + 1] or \"\",\n            waitpoint_key = parts[o + 2] or \"\",\n            waitpoint_token = parts[o + 3] or \"\",\n          }\n        end\n        if status == \"ALREADY_SATISFIED\" then\n          return ok_already_satisfied_extras(\n            susp_id, wp_id_out, wp_key_out, wp_tok_out, extras_out)\n        else\n          return ok_extras(\n            susp_id, wp_id_out, wp_key_out, wp_tok_out, extras_out)\n        end\n      end\n      -- Malformed entry: fall through and treat as miss.\n    end\n  end\n\n  -- 1. Read execution core\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return err(\"execution_not_found\") end\n  local core = hgetall_to_table(raw)\n\n  -- RFC #58.5 \u{2014} suspend_execution is lease-bound. Hard-reject empty or\n  -- partial fence triples; no operator-override escape hatch.\n  local fence, must_check_or_err = resolve_lease_fence(core, A)\n  if not fence then return must_check_or_err end\n  if not must_check_or_err then return err(\"fence_required\") end\n\n  -- 2. Validate lease (full check incl. expiry + revocation + identity)\n  local lease_err = validate_lease_and_mark_expired(\n    core, A, now_ms, K, A.lease_history_maxlen)\n  if lease_err then return lease_err end\n\n  -- 3. Validate attempt binding\n  if tostring(core.current_attempt_index) ~= A.attempt_index then\n    return err(\"invalid_lease_for_suspend\")\n  end\n\n  -- 4. Check for existing suspension: reject if open, archive if closed\n  if redis.call(\"EXISTS\", K.suspension_current) == 1 then\n    local closed = redis.call(\"HGET\", K.suspension_current, \"closed_at\")\n    if not is_set(closed) then\n      return err(\"already_suspended\")\n    end\n    -- Previous suspension is closed. Archive old waitpoint_id for cleanup,\n    -- then DEL the stale record before creating a new one.\n    local old_wp = redis.call(\"HGET\", K.suspension_current, \"waitpoint_id\")\n    if is_set(old_wp) then\n      redis.call(\"SADD\", K.waitpoint_history, old_wp)\n    end\n    redis.call(\"DEL\", K.suspension_current)\n  end\n\n  -- 5. Create or activate waitpoint\n  local waitpoint_id = A.waitpoint_id\n  local waitpoint_key = A.waitpoint_key\n  local waitpoint_token = \"\"\n\n  if A.use_pending_waitpoint == \"1\" then\n    -- Activate existing pending waitpoint\n    local wp_raw = redis.call(\"HGETALL\", K.waitpoint_hash)\n    local wp_err = validate_pending_waitpoint(wp_raw, A.execution_id, A.attempt_index, now_ms)\n    if wp_err then return wp_err end\n\n    -- Read waitpoint_id, waitpoint_key, and existing token from pending record.\n    -- Token was minted at ff_create_pending_waitpoint time with that record\'s\n    -- created_at; we MUST keep using it so signals buffered before activation\n    -- validate against the same binding.\n    local wp = hgetall_to_table(wp_raw)\n    waitpoint_id = wp.waitpoint_id\n    waitpoint_key = wp.waitpoint_key\n    -- A pending waitpoint without a minted token is either a pre-HMAC-upgrade\n    -- record or a corrupted write. Activating with an empty token would return\n    -- \"\" to the SDK, and every subsequent signal delivery would reject with\n    -- missing_token \u{2014} fail-closed at the security boundary but silent about\n    -- the real degraded state. Surface the degradation AT the activation\n    -- point so operators see it immediately.\n    if not is_set(wp.waitpoint_token) then\n      return err(\"waitpoint_not_token_bound\")\n    end\n    waitpoint_token = wp.waitpoint_token\n\n    -- Activate the pending waitpoint\n    redis.call(\"HSET\", K.waitpoint_hash,\n      \"suspension_id\", A.suspension_id,\n      \"state\", \"active\",\n      \"activated_at\", tostring(now_ms),\n      \"expires_at\", is_set(A.timeout_at) and A.timeout_at or \"\")\n    redis.call(\"ZREM\", K.pending_wp_expiry_key, waitpoint_id)\n\n    -- CRITICAL: Evaluate buffered signals that arrived while waitpoint was pending.\n    -- If early signals already satisfy the resume condition, skip suspension entirely.\n    local buffered = redis.call(\"XRANGE\", K.waitpoint_signals, \"-\", \"+\")\n    if #buffered > 0 then\n      local wp_cond = initialize_condition(A.resume_condition_json)\n      for _, entry in ipairs(buffered) do\n        local fields = entry[2]\n        local sig_name = extract_field(fields, \"signal_name\")\n        local sig_id = extract_field(fields, \"signal_id\")\n        evaluate_signal_against_condition(wp_cond, sig_name, sig_id)\n      end\n      if is_condition_satisfied(wp_cond) then\n        -- Resume condition already met by buffered signals \u{2014} skip suspension.\n        redis.call(\"HSET\", K.waitpoint_hash,\n          \"state\", \"closed\", \"satisfied_at\", tostring(now_ms),\n          \"closed_at\", tostring(now_ms), \"close_reason\", \"resumed\")\n        write_condition_hash(K.wp_condition, wp_cond, now_ms)\n        -- Do NOT release lease, do NOT change execution state.\n        -- RFC-013 \u{a7}9.2 \u{2014} write dedup outcome. UsePending+extras is\n        -- rejected upstream, so N_extra is always 0 here.\n        if _dedup_active then\n          local payload = \"ALREADY_SATISFIED\\t\" .. A.suspension_id .. \"\\t\" .. waitpoint_id ..\n            \"\\t\" .. waitpoint_key .. \"\\t\" .. waitpoint_token .. \"\\t0\"\n          redis.call(\"HSET\", K.dedup_hash, \"outcome\", payload)\n          if A.dedup_ttl_ms > 0 then\n            redis.call(\"PEXPIRE\", K.dedup_hash, A.dedup_ttl_ms)\n          end\n        end\n        return ok_already_satisfied_extras(A.suspension_id, waitpoint_id, waitpoint_key, waitpoint_token, {})\n      end\n      -- Condition not yet satisfied \u{2014} proceed with suspension.\n      -- Write partial condition state (some matchers may be satisfied).\n      write_condition_hash(K.wp_condition, wp_cond, now_ms)\n    else\n      -- No buffered signals \u{2014} init condition from scratch\n      local wp_cond = initialize_condition(A.resume_condition_json)\n      write_condition_hash(K.wp_condition, wp_cond, now_ms)\n    end\n  else\n    -- Create new waitpoint \u{2014} mint HMAC token bound to (waitpoint_id,\n    -- waitpoint_key, created_at). The created_at written here is what the\n    -- signal-delivery path reads back for HMAC validation.\n    local token, token_err = mint_waitpoint_token(\n      K.hmac_secrets, waitpoint_id, waitpoint_key, now_ms)\n    if not token then return err(token_err) end\n    waitpoint_token = token\n\n    redis.call(\"HSET\", K.waitpoint_hash,\n      \"waitpoint_id\", waitpoint_id,\n      \"execution_id\", A.execution_id,\n      \"attempt_index\", A.attempt_index,\n      \"suspension_id\", A.suspension_id,\n      \"waitpoint_key\", waitpoint_key,\n      \"waitpoint_token\", waitpoint_token,\n      \"state\", \"active\",\n      \"created_at\", tostring(now_ms),\n      \"activated_at\", tostring(now_ms),\n      \"expires_at\", is_set(A.timeout_at) and A.timeout_at or \"\",\n      \"signal_count\", \"0\",\n      \"matched_signal_count\", \"0\",\n      \"last_signal_at\", \"\")\n\n    -- Initialize condition hash from resume condition spec\n    local wp_cond = initialize_condition(A.resume_condition_json)\n    write_condition_hash(K.wp_condition, wp_cond, now_ms)\n  end\n\n  -- 5b. RFC-014 Pattern 3: mint extras\' HMAC tokens + waitpoint hashes\n  -- + wp_condition hashes. Each additional binding gets its own\n  -- storage so the composite evaluator can SADD per-waitpoint\n  -- satisfier tokens and the HMAC enforcement works uniformly for\n  -- external signal delivery to ANY of the N waitpoints.\n  -- UsePending with extras is rejected by the Rust validator.\n  local extras_out = {}\n  if A.use_pending_waitpoint ~= \"1\" then\n    for _, ex in ipairs(extras) do\n      local ex_token, ex_err = mint_waitpoint_token(\n        K.hmac_secrets, ex.waitpoint_id, ex.waitpoint_key, now_ms)\n      if not ex_token then return err(ex_err) end\n\n      redis.call(\"HSET\", ex.wp_hash_key,\n        \"waitpoint_id\", ex.waitpoint_id,\n        \"execution_id\", A.execution_id,\n        \"attempt_index\", A.attempt_index,\n        \"suspension_id\", A.suspension_id,\n        \"waitpoint_key\", ex.waitpoint_key,\n        \"waitpoint_token\", ex_token,\n        \"state\", \"active\",\n        \"created_at\", tostring(now_ms),\n        \"activated_at\", tostring(now_ms),\n        \"expires_at\", is_set(A.timeout_at) and A.timeout_at or \"\",\n        \"signal_count\", \"0\",\n        \"matched_signal_count\", \"0\",\n        \"last_signal_at\", \"\")\n\n      -- Mirror the primary wp_condition hash shape so ff_deliver_signal\'s\n      -- `wp_cond.composite == \"1\"` branch routes correctly when a signal\n      -- lands on this extra waitpoint.\n      local ex_wp_cond = initialize_condition(A.resume_condition_json)\n      write_condition_hash(ex.wp_condition_key, ex_wp_cond, now_ms)\n\n      redis.call(\"SADD\", K.waitpoint_history, ex.waitpoint_id)\n\n      extras_out[#extras_out + 1] = {\n        waitpoint_id = ex.waitpoint_id,\n        waitpoint_key = ex.waitpoint_key,\n        waitpoint_token = ex_token,\n      }\n    end\n  elseif #extras > 0 then\n    -- Defensive: Rust validator already rejects UsePending+extras, but\n    -- fail loudly if a client bypasses Rust entirely.\n    return err(\"use_pending_with_extras_unsupported\")\n  end\n\n  -- 6. Record waitpoint_id in mandatory history set (required for cleanup cascade)\n  redis.call(\"SADD\", K.waitpoint_history, waitpoint_id)\n\n  -- OOM-SAFE WRITE ORDERING (per RFC-010 \u{a7}4.8b):\n  -- exec_core HSET is the \"point of no return\" \u{2014} write it FIRST.\n\n  -- 7. Transition exec_core (FIRST \u{2014} point of no return, all 7 dims)\n  redis.call(\"HSET\", K.core_key,\n    \"lifecycle_phase\", \"suspended\",\n    \"ownership_state\", \"unowned\",\n    \"eligibility_state\", \"not_applicable\",\n    \"blocking_reason\", map_reason_to_blocking(A.reason_code),\n    \"blocking_detail\", \"suspended: waitpoint \" .. waitpoint_id .. \" awaiting \" .. A.reason_code,\n    \"terminal_outcome\", \"none\",\n    \"attempt_state\", \"attempt_interrupted\",\n    \"public_state\", \"suspended\",\n    \"current_lease_id\", \"\",\n    \"current_worker_id\", \"\",\n    \"current_worker_instance_id\", \"\",\n    \"lease_expires_at\", \"\",\n    \"lease_last_renewed_at\", \"\",\n    \"lease_renewal_deadline\", \"\",\n    \"current_suspension_id\", A.suspension_id,\n    \"current_waitpoint_id\", waitpoint_id,\n    \"last_transition_at\", tostring(now_ms),\n    \"last_mutation_at\", tostring(now_ms))\n\n  -- 8. Pause the attempt: started -> suspended (RFC-002 suspend_attempt)\n  redis.call(\"HSET\", K.attempt_record,\n    \"attempt_state\", \"suspended\",\n    \"suspended_at\", tostring(now_ms),\n    \"suspension_id\", A.suspension_id)\n\n  -- 9. Release lease + update indexes\n  redis.call(\"DEL\", K.lease_current_key)\n  redis.call(\"ZREM\", K.lease_expiry_key, A.execution_id)\n  redis.call(\"SREM\", K.worker_leases_key, A.execution_id)\n  redis.call(\"ZREM\", K.active_index_key, A.execution_id)\n  redis.call(\"ZREM\", K.attempt_timeout_key, A.execution_id)\n  redis.call(\"XADD\", K.lease_history_key, \"MAXLEN\", \"~\", A.lease_history_maxlen, \"*\",\n    \"event\", \"released\",\n    \"lease_id\", A.lease_id,\n    \"lease_epoch\", A.lease_epoch,\n    \"attempt_index\", A.attempt_index,\n    \"attempt_id\", core.current_attempt_id or \"\",\n    \"reason\", \"suspend\",\n    \"ts\", tostring(now_ms))\n\n  -- 10. Create suspension record. RFC-014: record the full list of\n  -- additional waitpoint_ids (JSON array of \"<id>|<key>\" pairs) so\n  -- cleanup owners (cancel / expire / resume) can iterate all\n  -- per-waitpoint storage on terminal transitions without needing\n  -- the caller to re-supply them.\n  local add_json = \"[]\"\n  if #extras_out > 0 then\n    local pairs_list = {}\n    for _, e in ipairs(extras_out) do\n      pairs_list[#pairs_list + 1] = {\n        waitpoint_id = e.waitpoint_id,\n        waitpoint_key = e.waitpoint_key,\n      }\n    end\n    add_json = cjson.encode(pairs_list)\n  end\n  redis.call(\"HSET\", K.suspension_current,\n    \"suspension_id\", A.suspension_id,\n    \"execution_id\", A.execution_id,\n    \"attempt_index\", A.attempt_index,\n    \"waitpoint_id\", waitpoint_id,\n    \"waitpoint_key\", waitpoint_key,\n    \"additional_waitpoints_json\", add_json,\n    \"reason_code\", A.reason_code,\n    \"requested_by\", A.requested_by,\n    \"created_at\", tostring(now_ms),\n    \"timeout_at\", A.timeout_at,\n    \"timeout_behavior\", A.timeout_behavior,\n    \"resume_condition_json\", A.resume_condition_json,\n    \"resume_policy_json\", A.resume_policy_json,\n    \"continuation_metadata_pointer\", A.continuation_metadata_ptr,\n    \"buffered_signal_summary_json\", initial_signal_summary_json(),\n    \"last_signal_at\", \"\",\n    \"satisfied_at\", \"\",\n    \"closed_at\", \"\",\n    \"close_reason\", \"\")\n\n  -- 10b. RFC-014 \u{a7}3.1: seed composite member_map (write-once) when the\n  -- resume condition carries a composite tree. No-op for single-matcher\n  -- / operator / timeout conditions. Pattern 3 \u{2014} pass every binding\'s\n  -- waitpoint_key so candidate-node lookup covers all N leaves.\n  do\n    local spec_ok, spec = pcall(cjson.decode, A.resume_condition_json)\n    if spec_ok and type(spec) == \"table\" and spec.composite then\n      local all_keys = { waitpoint_key }\n      for _, e in ipairs(extras_out) do\n        all_keys[#all_keys + 1] = e.waitpoint_key\n      end\n      seed_composite_member_map(\n        K.suspension_current .. \":member_map\",\n        spec.tree,\n        all_keys)\n    end\n  end\n\n  -- 11. Add to per-lane suspended index + suspension timeout\n  -- Score: timeout_at if set, otherwise MAX for \"no timeout\" ordering\n  redis.call(\"ZADD\", K.suspended_zset,\n    is_set(A.timeout_at) and tonumber(A.timeout_at) or 9999999999999,\n    A.execution_id)\n\n  if is_set(A.timeout_at) then\n    redis.call(\"ZADD\", K.suspension_timeout_key, tonumber(A.timeout_at), A.execution_id)\n  end\n\n  -- RFC-013 \u{a7}9.2 + RFC-014 \u{2014} write dedup outcome. Includes extras tail.\n  if _dedup_active then\n    local pieces = {\n      \"OK\", A.suspension_id, waitpoint_id, waitpoint_key, waitpoint_token,\n      tostring(#extras_out),\n    }\n    for _, e in ipairs(extras_out) do\n      pieces[#pieces + 1] = e.waitpoint_id\n      pieces[#pieces + 1] = e.waitpoint_key\n      pieces[#pieces + 1] = e.waitpoint_token\n    end\n    redis.call(\"HSET\", K.dedup_hash, \"outcome\", table.concat(pieces, \"\\t\"))\n    if A.dedup_ttl_ms > 0 then\n      redis.call(\"PEXPIRE\", K.dedup_hash, A.dedup_ttl_ms)\n    end\n  end\n\n  return ok_extras(A.suspension_id, waitpoint_id, waitpoint_key, waitpoint_token, extras_out)\nend)\n\n---------------------------------------------------------------------------\n-- #14  ff_resume_execution\n--\n-- Transition suspended \u{2192} runnable. Called after signal satisfies condition\n-- or by operator override. Closes suspension + waitpoint, updates indexes.\n--\n-- KEYS (8): exec_core, suspension_current, waitpoint_hash,\n--           waitpoint_signals, suspension_timeout_zset,\n--           eligible_zset, delayed_zset, suspended_zset\n-- ARGV (3): execution_id, trigger_type, resume_delay_ms\n---------------------------------------------------------------------------\nredis.register_function(\'ff_resume_execution\', function(keys, args)\n  local K = {\n    core_key               = keys[1],\n    suspension_current     = keys[2],\n    waitpoint_hash         = keys[3],\n    waitpoint_signals      = keys[4],\n    suspension_timeout_key = keys[5],\n    eligible_zset          = keys[6],\n    delayed_zset           = keys[7],\n    suspended_zset         = keys[8],\n  }\n\n  local A = {\n    execution_id   = args[1],\n    trigger_type   = args[2] or \"signal\",     -- \"signal\", \"operator\", \"auto_resume\"\n    resume_delay_ms = tonumber(args[3] or \"0\"),\n  }\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- 1. Read + validate execution is suspended\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return err(\"execution_not_found\") end\n  local core = hgetall_to_table(raw)\n\n  if core.lifecycle_phase ~= \"suspended\" then\n    return err(\"execution_not_suspended\")\n  end\n\n  -- 2. Validate active suspension\n  local susp_raw = redis.call(\"HGETALL\", K.suspension_current)\n  local susp_err, susp = assert_active_suspension(susp_raw)\n  if susp_err then return susp_err end\n\n  -- 3. Compute eligibility based on resume_delay_ms\n  local eligibility_state = \"eligible_now\"\n  local blocking_reason = \"waiting_for_worker\"\n  local blocking_detail = \"\"\n  local public_state = \"waiting\"\n\n  if A.resume_delay_ms > 0 then\n    eligibility_state = \"not_eligible_until_time\"\n    blocking_reason = \"waiting_for_resume_delay\"\n    blocking_detail = \"resume delay \" .. tostring(A.resume_delay_ms) .. \"ms\"\n    public_state = \"delayed\"\n  end\n\n  -- OOM-SAFE WRITE ORDERING: exec_core FIRST (point of no return)\n\n  -- 4. Transition exec_core (FIRST \u{2014} all 7 dims)\n  redis.call(\"HSET\", K.core_key,\n    \"lifecycle_phase\", \"runnable\",\n    \"ownership_state\", \"unowned\",\n    \"eligibility_state\", eligibility_state,\n    \"blocking_reason\", blocking_reason,\n    \"blocking_detail\", blocking_detail,\n    \"terminal_outcome\", \"none\",\n    \"attempt_state\", \"attempt_interrupted\",\n    \"public_state\", public_state,\n    \"current_suspension_id\", \"\",\n    \"current_waitpoint_id\", \"\",\n    \"last_transition_at\", tostring(now_ms),\n    \"last_mutation_at\", tostring(now_ms))\n\n  -- 5. Update scheduling indexes\n  redis.call(\"ZREM\", K.suspended_zset, A.execution_id)\n  if A.resume_delay_ms > 0 then\n    redis.call(\"ZADD\", K.delayed_zset,\n      now_ms + A.resume_delay_ms, A.execution_id)\n  else\n    -- ZADD eligible with composite priority score\n    local priority = tonumber(core.priority or \"0\")\n    local created_at = tonumber(core.created_at or \"0\")\n    local score = 0 - (priority * 1000000000000) + created_at\n    redis.call(\"ZADD\", K.eligible_zset, score, A.execution_id)\n  end\n\n  -- 6. Close sub-objects (safe to lose on OOM \u{2014} stale but not zombie)\n  -- Close waitpoint\n  redis.call(\"HSET\", K.waitpoint_hash,\n    \"state\", \"closed\",\n    \"satisfied_at\", tostring(now_ms),\n    \"closed_at\", tostring(now_ms),\n    \"close_reason\", \"resumed\")\n\n  -- Close suspension\n  redis.call(\"HSET\", K.suspension_current,\n    \"satisfied_at\", tostring(now_ms),\n    \"closed_at\", tostring(now_ms),\n    \"close_reason\", \"resumed\")\n\n  -- Remove from suspension timeout index\n  redis.call(\"ZREM\", K.suspension_timeout_key, A.execution_id)\n\n  -- RFC-014 \u{a7}3.1.1 composite cleanup owner: operator-driven resume path.\n  composite_cleanup(\n    K.suspension_current .. \":satisfied_set\",\n    K.suspension_current .. \":member_map\")\n\n  -- RFC-014 Pattern 3: close any additional waitpoints co-owned by\n  -- this suspension so their HMAC tokens can no longer authenticate\n  -- signal delivery once the suspension has resumed.\n  close_additional_waitpoints(\n    K.suspension_current,\n    susp.additional_waitpoints_json or \"\",\n    { \"state\", \"closed\",\n      \"satisfied_at\", tostring(now_ms),\n      \"closed_at\", tostring(now_ms),\n      \"close_reason\", \"resumed\" },\n    { \"closed\", \"1\",\n      \"closed_at\", tostring(now_ms),\n      \"closed_reason\", \"resumed\" })\n\n  return ok(public_state)\nend)\n\n---------------------------------------------------------------------------\n-- #15  ff_create_pending_waitpoint\n--\n-- Pre-create a waitpoint before suspension commits. The waitpoint is\n-- externally addressable by waitpoint_key so early signals can be buffered.\n-- Requires the caller to still hold the active lease.\n-- Mints the waitpoint HMAC token up front so early signals targeting the\n-- pending waitpoint can be authenticated via ff_buffer_signal_for_pending_waitpoint.\n--\n-- KEYS (4): exec_core, waitpoint_hash, pending_wp_expiry_zset, hmac_secrets\n-- ARGV (5): execution_id, attempt_index, waitpoint_id, waitpoint_key,\n--           expires_at\n---------------------------------------------------------------------------\nredis.register_function(\'ff_create_pending_waitpoint\', function(keys, args)\n  local K = {\n    core_key              = keys[1],\n    waitpoint_hash        = keys[2],\n    pending_wp_expiry_key = keys[3],\n    hmac_secrets          = keys[4],\n  }\n\n  local A = {\n    execution_id  = args[1],\n    attempt_index = args[2],\n    waitpoint_id  = args[3],\n    waitpoint_key = args[4],\n    expires_at    = args[5],\n  }\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- 1. Validate execution is active with a lease\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return err(\"execution_not_found\") end\n  local core = hgetall_to_table(raw)\n\n  if core.lifecycle_phase ~= \"active\" then\n    return err(\"execution_not_active\",\n      core.terminal_outcome or \"\",\n      core.current_lease_epoch or \"\",\n      core.lifecycle_phase or \"\",\n      core.current_attempt_id or \"\")\n  end\n  if core.ownership_state ~= \"leased\" then\n    return err(\"no_active_lease\")\n  end\n  -- Validate attempt binding\n  if tostring(core.current_attempt_index) ~= A.attempt_index then\n    return err(\"stale_lease\")\n  end\n\n  -- 2. Guard: waitpoint already exists\n  if redis.call(\"EXISTS\", K.waitpoint_hash) == 1 then\n    local existing_state = redis.call(\"HGET\", K.waitpoint_hash, \"state\")\n    if existing_state == \"pending\" or existing_state == \"active\" then\n      return err(\"waitpoint_already_exists\")\n    end\n    -- Old closed/expired waitpoint \u{2014} safe to overwrite\n  end\n\n  -- 3. Mint HMAC token bound to (waitpoint_id, waitpoint_key, now_ms).\n  -- The suspension activation path will reuse this token unchanged.\n  local waitpoint_token, token_err = mint_waitpoint_token(\n    K.hmac_secrets, A.waitpoint_id, A.waitpoint_key, now_ms)\n  if not waitpoint_token then return err(token_err) end\n\n  -- 4. Create pending waitpoint\n  redis.call(\"HSET\", K.waitpoint_hash,\n    \"waitpoint_id\", A.waitpoint_id,\n    \"execution_id\", A.execution_id,\n    \"attempt_index\", A.attempt_index,\n    \"suspension_id\", \"\",\n    \"waitpoint_key\", A.waitpoint_key,\n    \"waitpoint_token\", waitpoint_token,\n    \"state\", \"pending\",\n    \"created_at\", tostring(now_ms),\n    \"activated_at\", \"\",\n    \"satisfied_at\", \"\",\n    \"closed_at\", \"\",\n    \"expires_at\", A.expires_at,\n    \"close_reason\", \"\",\n    \"signal_count\", \"0\",\n    \"matched_signal_count\", \"0\",\n    \"last_signal_at\", \"\")\n\n  -- 5. Add to pending waitpoint expiry index\n  redis.call(\"ZADD\", K.pending_wp_expiry_key,\n    tonumber(A.expires_at), A.waitpoint_id)\n\n  return ok(A.waitpoint_id, A.waitpoint_key, waitpoint_token)\nend)\n\n---------------------------------------------------------------------------\n-- #16/#19  ff_expire_suspension  (Overlap group D \u{2014} one script)\n--\n-- Apply timeout behavior when suspension timeout fires.\n-- Re-validates that execution is still suspended and timeout is due.\n-- Handles all 5 timeout behaviors:\n--   fail    \u{2192} terminal(failed)\n--   cancel  \u{2192} terminal(cancelled)\n--   expire  \u{2192} terminal(expired)\n--   auto_resume \u{2192} close + resume to runnable\n--   escalate \u{2192} mutate suspension to operator-review\n--\n-- KEYS (12): exec_core, suspension_current, waitpoint_hash, wp_condition,\n--            attempt_hash, stream_meta, suspension_timeout_zset,\n--            suspended_zset, terminal_zset, eligible_zset, delayed_zset,\n--            lease_history\n-- ARGV (1): execution_id\n---------------------------------------------------------------------------\nredis.register_function(\'ff_expire_suspension\', function(keys, args)\n  local K = {\n    core_key               = keys[1],\n    suspension_current     = keys[2],\n    waitpoint_hash         = keys[3],\n    wp_condition           = keys[4],\n    attempt_hash           = keys[5],\n    stream_meta            = keys[6],\n    suspension_timeout_key = keys[7],\n    suspended_zset         = keys[8],\n    terminal_key           = keys[9],\n    eligible_zset          = keys[10],\n    delayed_zset           = keys[11],\n    lease_history_key      = keys[12],\n  }\n\n  local A = {\n    execution_id = args[1],\n  }\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- 1. Read + validate execution is still suspended\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then\n    redis.call(\"ZREM\", K.suspension_timeout_key, A.execution_id)\n    return ok(\"not_found_cleaned\")\n  end\n  local core = hgetall_to_table(raw)\n\n  if core.lifecycle_phase ~= \"suspended\" then\n    redis.call(\"ZREM\", K.suspension_timeout_key, A.execution_id)\n    return ok(\"not_suspended_cleaned\")\n  end\n\n  -- 2. Read suspension and validate it\'s still open\n  local susp_raw = redis.call(\"HGETALL\", K.suspension_current)\n  local susp_err, susp = assert_active_suspension(susp_raw)\n  if susp_err then\n    redis.call(\"ZREM\", K.suspension_timeout_key, A.execution_id)\n    return ok(\"no_active_suspension_cleaned\")\n  end\n\n  -- 3. Check timeout is actually due\n  local timeout_at = tonumber(susp.timeout_at or \"0\")\n  if timeout_at == 0 or timeout_at > now_ms then\n    return ok(\"not_yet_due\")\n  end\n\n  -- 4. Read timeout behavior\n  local behavior = susp.timeout_behavior or \"fail\"\n\n  -- 5. Apply behavior\n  if behavior == \"auto_resume\" or behavior == \"auto_resume_with_timeout_signal\" then\n    -- auto_resume: close suspension + resume to runnable (like ff_resume_execution)\n\n    -- OOM-SAFE: exec_core FIRST\n    local priority = tonumber(core.priority or \"0\")\n    local created_at = tonumber(core.created_at or \"0\")\n    local score = 0 - (priority * 1000000000000) + created_at\n\n    redis.call(\"HSET\", K.core_key,\n      \"lifecycle_phase\", \"runnable\",\n      \"ownership_state\", \"unowned\",\n      \"eligibility_state\", \"eligible_now\",\n      \"blocking_reason\", \"waiting_for_worker\",\n      \"blocking_detail\", \"\",\n      \"terminal_outcome\", \"none\",\n      \"attempt_state\", \"attempt_interrupted\",\n      \"public_state\", \"waiting\",\n      \"current_suspension_id\", \"\",\n      \"current_waitpoint_id\", \"\",\n      \"last_transition_at\", tostring(now_ms),\n      \"last_mutation_at\", tostring(now_ms))\n\n    -- Update indexes\n    redis.call(\"ZREM\", K.suspended_zset, A.execution_id)\n    redis.call(\"ZADD\", K.eligible_zset, score, A.execution_id)\n\n    -- Close sub-objects\n    redis.call(\"HSET\", K.waitpoint_hash,\n      \"state\", \"closed\",\n      \"closed_at\", tostring(now_ms),\n      \"close_reason\", \"timed_out_auto_resume\")\n    redis.call(\"HSET\", K.wp_condition,\n      \"closed\", \"1\",\n      \"closed_at\", tostring(now_ms),\n      \"closed_reason\", \"timed_out_auto_resume\")\n    redis.call(\"HSET\", K.suspension_current,\n      \"closed_at\", tostring(now_ms),\n      \"close_reason\", \"timed_out_auto_resume\")\n\n    redis.call(\"ZREM\", K.suspension_timeout_key, A.execution_id)\n\n    -- RFC-014 \u{a7}3.1.1 composite cleanup owner: expire (auto_resume) path.\n    composite_cleanup(\n      K.suspension_current .. \":satisfied_set\",\n      K.suspension_current .. \":member_map\")\n    close_additional_waitpoints(\n      K.suspension_current,\n      susp.additional_waitpoints_json or \"\",\n      { \"state\", \"closed\",\n        \"closed_at\", tostring(now_ms),\n        \"close_reason\", \"timed_out_auto_resume\" },\n      { \"closed\", \"1\",\n        \"closed_at\", tostring(now_ms),\n        \"closed_reason\", \"timed_out_auto_resume\" })\n\n    return ok(\"auto_resume\", \"waiting\")\n\n  elseif behavior == \"escalate\" then\n    -- escalate: mutate suspension to operator-review, keep suspended (ALL 7 dims)\n\n    redis.call(\"HSET\", K.core_key,\n      \"lifecycle_phase\", \"suspended\",                        -- preserve\n      \"ownership_state\", \"unowned\",                          -- preserve\n      \"eligibility_state\", \"not_applicable\",                 -- preserve\n      \"blocking_reason\", \"paused_by_operator\",\n      \"blocking_detail\", \"suspension escalated: timeout at \" .. tostring(timeout_at),\n      \"terminal_outcome\", \"none\",                            -- preserve\n      \"attempt_state\", core.attempt_state or \"attempt_interrupted\", -- preserve\n      \"public_state\", \"suspended\",                           -- preserve\n      \"last_mutation_at\", tostring(now_ms))\n\n    redis.call(\"HSET\", K.suspension_current,\n      \"reason_code\", \"waiting_for_operator_review\",\n      \"timeout_at\", \"\",\n      \"timeout_behavior\", \"\")\n\n    -- Remove from timeout index (no longer has a timeout)\n    redis.call(\"ZREM\", K.suspension_timeout_key, A.execution_id)\n\n    return ok(\"escalate\", \"suspended\")\n\n  else\n    -- Terminal paths: fail, cancel, expire\n\n    local terminal_outcome\n    local public_state_val\n    local close_reason\n\n    if behavior == \"cancel\" then\n      terminal_outcome = \"cancelled\"\n      public_state_val = \"cancelled\"\n      close_reason = \"timed_out_cancel\"\n    elseif behavior == \"expire\" then\n      terminal_outcome = \"expired\"\n      public_state_val = \"expired\"\n      close_reason = \"timed_out_expire\"\n    else\n      -- Default: fail\n      terminal_outcome = \"failed\"\n      public_state_val = \"failed\"\n      close_reason = \"timed_out_fail\"\n    end\n\n    -- OOM-SAFE: exec_core FIRST (\u{a7}4.8b Rule 2)\n    redis.call(\"HSET\", K.core_key,\n      \"lifecycle_phase\", \"terminal\",\n      \"ownership_state\", \"unowned\",\n      \"eligibility_state\", \"not_applicable\",\n      \"blocking_reason\", \"none\",\n      \"blocking_detail\", \"\",\n      \"terminal_outcome\", terminal_outcome,\n      \"attempt_state\", \"attempt_terminal\",\n      \"public_state\", public_state_val,\n      \"failure_reason\", \"suspension_timeout:\" .. behavior,\n      \"completed_at\", tostring(now_ms),\n      \"current_suspension_id\", \"\",\n      \"current_waitpoint_id\", \"\",\n      \"last_transition_at\", tostring(now_ms),\n      \"last_mutation_at\", tostring(now_ms))\n\n    -- End attempt: suspended \u{2192} ended_failure/ended_cancelled\n    if is_set(core.current_attempt_index) then\n      local att_end_state = \"ended_failure\"\n      if behavior == \"cancel\" then\n        att_end_state = \"ended_cancelled\"\n      end\n      redis.call(\"HSET\", K.attempt_hash,\n        \"attempt_state\", att_end_state,\n        \"ended_at\", tostring(now_ms),\n        \"failure_reason\", \"suspension_timeout:\" .. behavior,\n        \"suspended_at\", \"\",\n        \"suspension_id\", \"\")\n    end\n\n    -- Close stream if exists\n    if redis.call(\"EXISTS\", K.stream_meta) == 1 then\n      redis.call(\"HSET\", K.stream_meta,\n        \"closed_at\", tostring(now_ms),\n        \"closed_reason\", \"suspension_timeout\")\n    end\n\n    -- Close sub-objects\n    redis.call(\"HSET\", K.waitpoint_hash,\n      \"state\", \"closed\",\n      \"closed_at\", tostring(now_ms),\n      \"close_reason\", close_reason)\n    redis.call(\"HSET\", K.wp_condition,\n      \"closed\", \"1\",\n      \"closed_at\", tostring(now_ms),\n      \"closed_reason\", close_reason)\n    redis.call(\"HSET\", K.suspension_current,\n      \"closed_at\", tostring(now_ms),\n      \"close_reason\", close_reason)\n\n    -- RFC-014 \u{a7}3.1.1 composite cleanup owner: expire (terminal) paths.\n    composite_cleanup(\n      K.suspension_current .. \":satisfied_set\",\n      K.suspension_current .. \":member_map\")\n    close_additional_waitpoints(\n      K.suspension_current,\n      susp.additional_waitpoints_json or \"\",\n      { \"state\", \"closed\",\n        \"closed_at\", tostring(now_ms),\n        \"close_reason\", close_reason },\n      { \"closed\", \"1\",\n        \"closed_at\", tostring(now_ms),\n        \"closed_reason\", close_reason })\n\n    -- Remove from suspension indexes, add to terminal\n    redis.call(\"ZREM\", K.suspension_timeout_key, A.execution_id)\n    redis.call(\"ZREM\", K.suspended_zset, A.execution_id)\n    redis.call(\"ZADD\", K.terminal_key, now_ms, A.execution_id)\n\n    -- Push-based DAG promotion (bridge-event gap report \u{a7}1.3 analogue).\n    -- Suspension-timeout terminals (fail / cancel / expire) are FF-\n    -- initiated transitions that cairn cannot observe via its\n    -- call-then-emit pattern. Without a PUBLISH, flow-bound children\n    -- only unblock via the dependency_reconciler safety net (15s).\n    -- Gated on `is_set(core.flow_id)` \u{2014} standalone executions never\n    -- have downstream edges. Outcome matches terminal_outcome\n    -- (\"failed\" / \"cancelled\" / \"expired\").\n    if is_set(core.flow_id) then\n      local payload = cjson.encode({\n        execution_id = A.execution_id,\n        flow_id = core.flow_id,\n        outcome = terminal_outcome,\n      })\n      redis.call(\"PUBLISH\", \"ff:dag:completions\", payload)\n    end\n\n    return ok(behavior, public_state_val)\n  end\nend)\n\n---------------------------------------------------------------------------\n-- #36  ff_close_waitpoint\n--\n-- Proactive close of pending or active waitpoint. Used by workers that\n-- created a pending waitpoint but decided not to suspend.\n--\n-- KEYS (3): exec_core, waitpoint_hash, pending_wp_expiry_zset\n-- ARGV (2): waitpoint_id, reason\n---------------------------------------------------------------------------\nredis.register_function(\'ff_close_waitpoint\', function(keys, args)\n  local K = {\n    core_key              = keys[1],\n    waitpoint_hash        = keys[2],\n    pending_wp_expiry_key = keys[3],\n  }\n\n  local A = {\n    waitpoint_id = args[1],\n    reason       = args[2] or \"proactive_close\",\n  }\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- 1. Read waitpoint\n  local wp_raw = redis.call(\"HGETALL\", K.waitpoint_hash)\n  if #wp_raw == 0 then\n    return err(\"waitpoint_not_found\")\n  end\n  local wp = hgetall_to_table(wp_raw)\n\n  -- 2. Validate state is pending or active\n  if wp.state ~= \"pending\" and wp.state ~= \"active\" then\n    if wp.state == \"closed\" or wp.state == \"expired\" then\n      return ok(\"already_closed\")\n    end\n    return err(\"waitpoint_not_open\")\n  end\n\n  -- 3. Close the waitpoint\n  redis.call(\"HSET\", K.waitpoint_hash,\n    \"state\", \"closed\",\n    \"closed_at\", tostring(now_ms),\n    \"close_reason\", A.reason)\n\n  -- 4. Remove from pending expiry index (no-op if not pending)\n  redis.call(\"ZREM\", K.pending_wp_expiry_key, A.waitpoint_id)\n\n  return ok()\nend)\n\n---------------------------------------------------------------------------\n-- ff_rotate_waitpoint_hmac_secret\n--\n-- Install a new waitpoint HMAC signing kid on a single partition. The\n-- server-side admin endpoint (ff-server POST /v1/admin/rotate-waitpoint-secret)\n-- delegates to this FCALL per partition. Direct-Valkey consumers (e.g.\n-- cairn-rs, issue #49) invoke it themselves across every partition.\n--\n-- FCALL atomicity is per-shard and per-call; previous implementations used\n-- a SETNX lock + read-modify-write from Rust. Here the script IS the\n-- atomicity boundary, so no lock is needed.\n--\n-- KEYS (1): hmac_secrets  (ff:sec:{p:N}:waitpoint_hmac)\n-- ARGV (3): new_kid, new_secret_hex, grace_ms\n--\n-- `now_ms` is derived server-side from `redis.call(\"TIME\")` to match\n-- the rest of the library (lua/flow.lua, validate_waitpoint_token), so\n-- GC and validation agree on \"now\". `grace_ms` is a duration, not a\n-- clock value, so taking it from ARGV is safe \u{2014} operators set it via\n-- FF_WAITPOINT_HMAC_GRACE_MS (ff-server) or pass their own value.\n--\n-- Outcomes:\n--   ok(\"rotated\", previous_kid_or_empty, new_kid, gc_count)\n--   ok(\"noop\",    kid)                           -- exact replay (same kid + secret)\n--   err(\"rotation_conflict\", kid)                -- same kid, different secret\n--   err(\"invalid_kid\")                           -- empty or contains \':\'\n--   err(\"invalid_secret_hex\")                    -- empty / odd length / non-hex\n--   err(\"invalid_grace_ms\")                      -- not a non-negative integer\n--\n-- Authoritative implementation of waitpoint HMAC rotation: idempotent\n-- replay, torn-write repair, orphan GC across expired kids. INVARIANT:\n-- expires_at:<new_kid> is never written (current_kid has no expiry).\n-- ff-server\'s admin endpoint delegates to this FCALL \u{2014} single source\n-- of truth lives here, not in Rust.\n---------------------------------------------------------------------------\nredis.register_function(\'ff_rotate_waitpoint_hmac_secret\', function(keys, args)\n  local hmac_key       = keys[1]\n  local new_kid        = args[1]\n  local new_secret_hex = args[2]\n  local grace_ms_s     = args[3]\n\n  -- Combined validation (feedback: single condition is more readable).\n  if type(new_kid) ~= \"string\" or new_kid == \"\" or new_kid:find(\":\", 1, true) then\n    return err(\"invalid_kid\")\n  end\n  if type(new_secret_hex) ~= \"string\"\n      or new_secret_hex == \"\"\n      or #new_secret_hex % 2 ~= 0\n      or new_secret_hex:find(\"[^0-9a-fA-F]\") then\n    return err(\"invalid_secret_hex\")\n  end\n\n  -- grace_ms must be a finite non-negative integer. The math.floor check\n  -- rejects decimals but NOT infinities (math.floor(math.huge) == math.huge\n  -- which would stamp \"inf\" into expires_at:*). Cap at 2^53-1 so the\n  -- stored value stays within the IEEE-754 double-precision integer range\n  -- AND within i64, keeping the Rust parser happy. 2^53-1 ms is ~285 years,\n  -- far beyond any operational grace window.\n  local grace_ms = tonumber(grace_ms_s)\n  if not grace_ms\n      or grace_ms ~= grace_ms -- NaN\n      or grace_ms < 0\n      or grace_ms > 9007199254740991 -- 2^53 - 1\n      or grace_ms ~= math.floor(grace_ms) then\n    return err(\"invalid_grace_ms\")\n  end\n\n  -- Server-side time via redis.call(\"TIME\"); never trust caller-supplied\n  -- timestamps for expiry decisions (consistency with flow.lua + helpers.lua\n  -- and with validate_waitpoint_token).\n  local now_ms          = server_time_ms()\n  local prev_expires_at = now_ms + grace_ms\n\n  local current_kid = redis.call(\"HGET\", hmac_key, \"current_kid\")\n  if current_kid == false then current_kid = nil end\n\n  -- Idempotency branch: same kid already installed.\n  if current_kid == new_kid then\n    local stored = redis.call(\"HGET\", hmac_key, \"secret:\" .. new_kid)\n    if stored == false then stored = nil end\n    if stored == new_secret_hex then\n      return ok(\"noop\", new_kid)\n    elseif stored then\n      return err(\"rotation_conflict\", new_kid)\n    else\n      -- Torn-write repair: current_kid=new_kid but secret:<new_kid> missing.\n      redis.call(\"HSET\", hmac_key, \"secret:\" .. new_kid, new_secret_hex)\n      return ok(\"noop\", new_kid)\n    end\n  end\n\n  -- Orphan GC: HGETALL once, collect kids whose expires_at has passed,\n  -- then a single HDEL. One entry per distinct kid ever installed plus\n  -- a handful of scalars; bounded in practice.\n  local raw = redis.call(\"HGETALL\", hmac_key)\n  local expired_fields = {}\n  local gc_count = 0\n  for i = 1, #raw, 2 do\n    local field = raw[i]\n    local value = raw[i + 1]\n    if field:sub(1, 11) == \"expires_at:\" then\n      local kid = field:sub(12)\n      local exp = tonumber(value)\n      -- GC strictness MUST match validate_waitpoint_token\'s `exp < now_ms`\n      -- (lua/helpers.lua). Reaping on `exp <= now_ms` would delete a kid\n      -- that the validator still considers in-grace at the boundary\n      -- exp == now_ms, causing tokens that should validate to fail.\n      if not exp or exp <= 0 or exp < now_ms then\n        expired_fields[#expired_fields + 1] = \"expires_at:\" .. kid\n        expired_fields[#expired_fields + 1] = \"secret:\" .. kid\n        gc_count = gc_count + 1\n      end\n    end\n  end\n  if #expired_fields > 0 then\n    redis.call(\"HDEL\", hmac_key, unpack(expired_fields))\n  end\n\n  -- Promote current \u{2192} previous. INVARIANT: expires_at:<new_kid> is NEVER\n  -- written \u{2014} current_kid has no expiry entry.\n  local prev_expires_str = tostring(prev_expires_at)\n  if current_kid then\n    redis.call(\"HSET\", hmac_key,\n      \"previous_kid\", current_kid,\n      \"previous_expires_at\", prev_expires_str,\n      \"expires_at:\" .. current_kid, prev_expires_str,\n      \"current_kid\", new_kid,\n      \"secret:\" .. new_kid, new_secret_hex)\n  else\n    redis.call(\"HSET\", hmac_key,\n      \"current_kid\", new_kid,\n      \"secret:\" .. new_kid, new_secret_hex)\n  end\n\n  return ok(\"rotated\", current_kid or \"\", new_kid, tostring(gc_count))\nend)\n\n---------------------------------------------------------------------------\n-- ff_list_waitpoint_hmac_kids\n--\n-- Read-back for the waitpoint HMAC keystore. Insulates consumers (cairn\n-- admin UI, ff-server audit surface) from the hash-field naming so future\n-- layout changes don\'t break them.\n--\n-- KEYS (1): hmac_secrets  (ff:sec:{p:N}:waitpoint_hmac)\n-- ARGV: none\n--\n-- Returns ok(current_kid_or_empty, n, kid1, exp1_ms, kid2, exp2_ms, ...)\n-- \"verifying\" kids = those with a FUTURE expires_at:<kid> entry. Kids\n-- whose grace has already elapsed are NOT reported here \u{2014} the contract\n-- promises kids that still validate tokens, so listing expired kids\n-- would mislead operators. Expired entries are swept by orphan GC on\n-- the next rotation. current_kid is excluded (it never has an expiry).\n-- Uninitialized \u{2192} ok(\"\", 0).\n---------------------------------------------------------------------------\nredis.register_function(\'ff_list_waitpoint_hmac_kids\', function(keys, args)\n  local hmac_key = keys[1]\n  local raw = redis.call(\"HGETALL\", hmac_key)\n  local now_ms = server_time_ms()\n\n  local current_kid = \"\"\n  local pairs_out = {}\n  local n = 0\n  for i = 1, #raw, 2 do\n    local field = raw[i]\n    local value = raw[i + 1]\n    if field == \"current_kid\" then\n      current_kid = value\n    elseif field:sub(1, 11) == \"expires_at:\" then\n      local kid = field:sub(12)\n      local exp = tonumber(value)\n      -- Match validator\'s `exp < now_ms` rejection rule so a kid listed\n      -- as verifying really does still validate tokens at call time.\n      if exp and exp > 0 and exp >= now_ms then\n        pairs_out[#pairs_out + 1] = kid\n        pairs_out[#pairs_out + 1] = value\n        n = n + 1\n      end\n    end\n  end\n\n  return ok(current_kid, tostring(n), unpack(pairs_out))\nend)\n\n\n-- source: lua/signal.lua\n-- FlowFabric signal delivery and resume-claim functions\n-- Reference: RFC-005 (Signal), RFC-001 (Execution), RFC-010 \u{a7}4.1 (#17, #18, #2)\n--\n-- Depends on helpers: ok, err, ok_duplicate, hgetall_to_table, is_set,\n--   initialize_condition, write_condition_hash, evaluate_signal_against_condition,\n--   is_condition_satisfied, extract_field\n\n---------------------------------------------------------------------------\n-- #17  ff_deliver_signal\n--\n-- Atomic signal delivery: validate target, check idempotency, record\n-- signal, evaluate resume condition, optionally close waitpoint +\n-- suspension + transition suspended -> runnable.\n--\n-- KEYS (15): exec_core, wp_condition, wp_signals_stream,\n--            exec_signals_zset, signal_hash, signal_payload,\n--            idem_key, waitpoint_hash, suspension_current,\n--            eligible_zset, suspended_zset, delayed_zset,\n--            suspension_timeout_zset, hmac_secrets,\n--            partition_signal_delivery_stream (RFC-019 Stage B / #310)\n-- ARGV (18): signal_id, execution_id, waitpoint_id, signal_name,\n--            signal_category, source_type, source_identity,\n--            payload, payload_encoding, idempotency_key,\n--            correlation_id, target_scope, created_at,\n--            dedup_ttl_ms, resume_delay_ms, signal_maxlen,\n--            max_signals_per_execution, waitpoint_token\n---------------------------------------------------------------------------\nredis.register_function(\'ff_deliver_signal\', function(keys, args)\n  local K = {\n    core_key              = keys[1],\n    wp_condition          = keys[2],\n    wp_signals_stream     = keys[3],\n    exec_signals_zset     = keys[4],\n    signal_hash           = keys[5],\n    signal_payload        = keys[6],\n    idem_key              = keys[7],\n    waitpoint_hash        = keys[8],\n    suspension_current    = keys[9],\n    eligible_zset         = keys[10],\n    suspended_zset        = keys[11],\n    delayed_zset          = keys[12],\n    suspension_timeout_zset = keys[13],\n    hmac_secrets          = keys[14],\n    partition_signal_delivery_stream = keys[15],\n  }\n\n  local A = {\n    signal_id        = args[1],\n    execution_id     = args[2],\n    waitpoint_id     = args[3],\n    signal_name      = args[4],\n    signal_category  = args[5],\n    source_type      = args[6],\n    source_identity  = args[7],\n    payload          = args[8] or \"\",\n    payload_encoding = args[9] or \"json\",\n    idempotency_key  = args[10] or \"\",\n    correlation_id   = args[11] or \"\",\n    target_scope     = args[12] or \"waitpoint\",\n    created_at       = args[13] or \"\",\n    dedup_ttl_ms     = tonumber(args[14] or \"86400000\"),\n    resume_delay_ms  = tonumber(args[15] or \"0\"),\n    signal_maxlen    = tonumber(args[16] or \"1000\"),\n    max_signals      = tonumber(args[17] or \"10000\"),\n    waitpoint_token  = args[18] or \"\",\n  }\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- 1. Validate execution exists\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then\n    return err(\"execution_not_found\")\n  end\n  local core = hgetall_to_table(raw)\n\n  -- 2. Validate HMAC token FIRST (RFC-004 \u{a7}Waitpoint Security).\n  --\n  -- Order matters: lifecycle / waitpoint-state checks below would otherwise\n  -- form a state oracle \u{2014} an attacker presenting ANY token (including an\n  -- invalid one) for an arbitrary (execution_id, waitpoint_id) pair could\n  -- distinguish \"execution is terminal\" vs \"waitpoint is pending\" vs\n  -- \"waitpoint is closed\" by the specific error code returned, without\n  -- having to produce a valid HMAC. Auth-first closes that oracle.\n  --\n  -- Missing-waitpoint is collapsed into `invalid_token` for the same\n  -- reason: an unauthenticated caller must not be able to probe which\n  -- (execution, waitpoint) tuples exist.\n  local wp_for_auth_raw = redis.call(\"HGETALL\", K.waitpoint_hash)\n  if #wp_for_auth_raw == 0 then\n    return err(\"invalid_token\")\n  end\n  local wp_for_auth = hgetall_to_table(wp_for_auth_raw)\n  if not wp_for_auth.created_at then\n    return err(\"invalid_token\")\n  end\n  local token_err = validate_waitpoint_token(\n    K.hmac_secrets, A.waitpoint_token,\n    A.waitpoint_id, wp_for_auth.waitpoint_key or \"\",\n    tonumber(wp_for_auth.created_at) or 0, now_ms)\n  if token_err then\n    -- Operator-visible counter (RFC-004 \u{a7}Waitpoint Security observability).\n    -- Single scalar HSET on exec_core \u{2014} bounded, amortized-free. Gives\n    -- operators a \"last time this execution saw an auth failure\" field to\n    -- correlate with key-rotation drift, client bugs, or attack traffic\n    -- without needing to tail Lua slowlog or FCALL error logs.\n    redis.call(\"HSET\", K.core_key, \"last_hmac_validation_failed_at\", tostring(now_ms))\n    return err(token_err)\n  end\n\n  -- 3. Validate execution is in a signalable state (post-auth).\n  local lp = core.lifecycle_phase\n  if lp == \"terminal\" then\n    return err(\"target_not_signalable\")\n  end\n\n  if lp == \"active\" or lp == \"runnable\" or lp == \"submitted\" then\n    -- Not suspended. wp_for_auth was just loaded above; reuse it.\n    if wp_for_auth.state == \"pending\" then\n      return err(\"waitpoint_pending_use_buffer_script\")\n    end\n    if wp_for_auth.state ~= \"active\" then\n      return err(\"target_not_signalable\")\n    end\n    -- Active waitpoint on non-suspended execution \u{2014} unusual but valid (race window)\n  end\n\n  -- 4. Validate waitpoint condition is open (post-auth).\n  local cond_raw = redis.call(\"HGETALL\", K.wp_condition)\n  if #cond_raw == 0 then\n    return err(\"waitpoint_not_found\")\n  end\n  local wp_cond = hgetall_to_table(cond_raw)\n  if wp_cond.closed == \"1\" then\n    return err(\"waitpoint_closed\")\n  end\n\n  -- 4. Signal count limit (prevents unbounded ZSET growth from webhook storms)\n  if A.max_signals > 0 then\n    local current_count = redis.call(\"ZCARD\", K.exec_signals_zset)\n    if current_count >= A.max_signals then\n      return err(\"signal_limit_exceeded\")\n    end\n  end\n\n  -- 5. Idempotency check\n  -- Guard: (A.dedup_ttl_ms or 0) handles nil from tonumber(\"\") safely.\n  local dedup_ms = A.dedup_ttl_ms or 0\n  if A.idempotency_key ~= \"\" and dedup_ms > 0 then\n    local existing = redis.call(\"GET\", K.idem_key)\n    if existing then\n      return ok_duplicate(existing)\n    end\n    redis.call(\"SET\", K.idem_key, A.signal_id,\n      \"PX\", dedup_ms, \"NX\")\n  end\n\n  -- 6. Record signal hash\n  local created_at = A.created_at ~= \"\" and A.created_at or tostring(now_ms)\n  redis.call(\"HSET\", K.signal_hash,\n    \"signal_id\", A.signal_id,\n    \"target_execution_id\", A.execution_id,\n    \"target_waitpoint_id\", A.waitpoint_id,\n    \"target_scope\", A.target_scope,\n    \"signal_name\", A.signal_name,\n    \"signal_category\", A.signal_category,\n    \"source_type\", A.source_type,\n    \"source_identity\", A.source_identity,\n    \"correlation_id\", A.correlation_id,\n    \"idempotency_key\", A.idempotency_key,\n    \"created_at\", created_at,\n    \"accepted_at\", tostring(now_ms),\n    \"matched_waitpoint_id\", A.waitpoint_id,\n    \"payload_encoding\", A.payload_encoding)\n\n  -- 6b. Store payload separately if present\n  if A.payload ~= \"\" then\n    redis.call(\"SET\", K.signal_payload, A.payload)\n  end\n\n  -- 7. Append to per-waitpoint signal stream + per-execution signal index\n  redis.call(\"XADD\", K.wp_signals_stream, \"MAXLEN\", \"~\",\n    tostring(A.signal_maxlen), \"*\",\n    \"signal_id\", A.signal_id,\n    \"signal_name\", A.signal_name,\n    \"signal_category\", A.signal_category,\n    \"source_type\", A.source_type,\n    \"source_identity\", A.source_identity,\n    \"matched\", \"0\",\n    \"accepted_at\", tostring(now_ms))\n  redis.call(\"ZADD\", K.exec_signals_zset, now_ms, A.signal_id)\n\n  -- 8. Evaluate resume condition\n  local effect = \"appended_to_waitpoint\"\n  local matched = false\n\n  -- RFC-014 \u{a7}3.3: composite branch. `wp_condition` stores a `composite=1`\n  -- marker when suspension serialized a multi-signal tree. We short-\n  -- circuit here and run the composite evaluator (depth-bounded).\n  local composite_mode = (wp_cond.composite == \"1\")\n  if composite_mode then\n    -- Load tree + waitpoint_key for candidate lookup.\n    local susp_raw = redis.call(\"HGETALL\", K.suspension_current)\n    local susp = hgetall_to_table(susp_raw)\n    local tree_json = wp_cond.tree_json or \"\"\n    local tree = tree_json ~= \"\" and cjson.decode(tree_json) or nil\n    local satisfied_set_key = K.suspension_current .. \":satisfied_set\"\n    local member_map_key    = K.suspension_current .. \":member_map\"\n\n    local signal_for_eval = {\n      signal_id = A.signal_id,\n      signal_name = A.signal_name,\n      source_type = A.source_type,\n      source_identity = A.source_identity,\n    }\n    -- RFC-014 Pattern 3: use THIS waitpoint\'s own key (loaded into\n    -- `wp_for_auth` above, keyed by A.waitpoint_id) so multi-waitpoint\n    -- AllOf resolves each leaf against its own wp_key, not the\n    -- suspension\'s primary key.\n    local this_wp_key = wp_for_auth.waitpoint_key or susp.waitpoint_key or \"\"\n    local outcome = composite_deliver_signal(\n      tree, satisfied_set_key, member_map_key,\n      A.waitpoint_id, this_wp_key, signal_for_eval)\n\n    effect = outcome.effect or \"appended_to_waitpoint\"\n    matched = (effect ~= \"signal_ignored_not_in_condition\"\n               and effect ~= \"signal_ignored_matcher_failed\")\n\n    if outcome.resume then\n      -- Close suspension via the standard path (same as single-matcher\n      -- resume below). Composite-scoped cleanup follows \u{a7}3.1.1.\n      local lp2 = core.lifecycle_phase\n      if lp2 == \"suspended\" then\n        local es, br, bd, ps\n        if A.resume_delay_ms > 0 then\n          es = \"not_eligible_until_time\"\n          br = \"waiting_for_resume_delay\"\n          bd = \"resume delay \" .. A.resume_delay_ms .. \"ms after signal \" .. A.signal_name\n          ps = \"delayed\"\n        else\n          es = \"eligible_now\"\n          br = \"waiting_for_worker\"\n          bd = \"\"\n          ps = \"waiting\"\n        end\n        redis.call(\"HSET\", K.core_key,\n          \"lifecycle_phase\", \"runnable\",\n          \"ownership_state\", \"unowned\",\n          \"eligibility_state\", es,\n          \"blocking_reason\", br,\n          \"blocking_detail\", bd,\n          \"terminal_outcome\", \"none\",\n          \"attempt_state\", \"attempt_interrupted\",\n          \"public_state\", ps,\n          \"current_suspension_id\", \"\",\n          \"current_waitpoint_id\", \"\",\n          \"last_transition_at\", tostring(now_ms),\n          \"last_mutation_at\", tostring(now_ms))\n\n        local priority = tonumber(core.priority or \"0\")\n        local created_at_exec = tonumber(core.created_at or \"0\")\n        redis.call(\"ZREM\", K.suspended_zset, A.execution_id)\n        if A.resume_delay_ms > 0 then\n          redis.call(\"ZADD\", K.delayed_zset,\n            now_ms + A.resume_delay_ms, A.execution_id)\n        else\n          redis.call(\"ZADD\", K.eligible_zset,\n            0 - (priority * 1000000000000) + created_at_exec,\n            A.execution_id)\n        end\n      end\n\n      redis.call(\"HSET\", K.wp_condition,\n        \"closed\", \"1\",\n        \"closed_at\", tostring(now_ms),\n        \"closed_reason\", \"satisfied\")\n      redis.call(\"HSET\", K.waitpoint_hash,\n        \"state\", \"closed\",\n        \"satisfied_at\", tostring(now_ms),\n        \"closed_at\", tostring(now_ms),\n        \"close_reason\", \"resumed\")\n      if redis.call(\"EXISTS\", K.suspension_current) == 1 then\n        redis.call(\"HSET\", K.suspension_current,\n          \"satisfied_at\", tostring(now_ms),\n          \"closed_at\", tostring(now_ms),\n          \"close_reason\", \"resumed\",\n          \"closer_signal_id\", outcome.closer or A.signal_id,\n          \"all_satisfier_signals\", outcome.all_satisfiers_json or \"[]\")\n      end\n      redis.call(\"ZREM\", K.suspension_timeout_zset, A.execution_id)\n      -- RFC-014 \u{a7}3.1.1 cleanup owner: deliver_signal close path.\n      composite_cleanup(satisfied_set_key, member_map_key)\n      -- RFC-014 Pattern 3: close any OTHER waitpoints owned by this\n      -- suspension (the one the signal arrived on is already closed\n      -- via K.waitpoint_hash above). Reread susp to pick up the\n      -- additional_waitpoints_json that was seeded at suspend-time.\n      local susp_after = redis.call(\"HGETALL\", K.suspension_current)\n      if #susp_after > 0 then\n        local susp2 = hgetall_to_table(susp_after)\n        close_additional_waitpoints(\n          K.suspension_current,\n          susp2.additional_waitpoints_json or \"\",\n          { \"state\", \"closed\",\n            \"satisfied_at\", tostring(now_ms),\n            \"closed_at\", tostring(now_ms),\n            \"close_reason\", \"resumed\" },\n          { \"closed\", \"1\",\n            \"closed_at\", tostring(now_ms),\n            \"closed_reason\", \"satisfied\" })\n      end\n    end\n\n    -- Record signal hash observed_effect + waitpoint counters + return.\n    redis.call(\"HSET\", K.signal_hash, \"observed_effect\", effect)\n    redis.call(\"HINCRBY\", K.waitpoint_hash, \"signal_count\", 1)\n    if matched then\n      redis.call(\"HINCRBY\", K.waitpoint_hash, \"matched_signal_count\", 1)\n    end\n    redis.call(\"HSET\", K.waitpoint_hash, \"last_signal_at\", tostring(now_ms))\n    if redis.call(\"EXISTS\", K.suspension_current) == 1 and not outcome.resume then\n      redis.call(\"HSET\", K.suspension_current, \"last_signal_at\", tostring(now_ms))\n    end\n    -- RFC-019 Stage B / #310: partition-level signal-delivery aggregate\n    -- stream. `subscribe_signal_delivery` XREAD BLOCKs this key.\n    redis.call(\"XADD\", K.partition_signal_delivery_stream, \"MAXLEN\", \"~\", \"1000\", \"*\",\n      \"signal_id\", A.signal_id,\n      \"execution_id\", A.execution_id,\n      \"waitpoint_id\", A.waitpoint_id,\n      \"source_identity\", A.source_identity,\n      \"effect\", effect,\n      \"delivered_at_ms\", tostring(now_ms))\n    return ok(A.signal_id, effect)\n  end\n\n  local total = tonumber(wp_cond.total_matchers or \"0\")\n  for i = 0, total - 1 do\n    local sat_key = \"matcher:\" .. i .. \":satisfied\"\n    local name_key = \"matcher:\" .. i .. \":name\"\n    if wp_cond[sat_key] == \"0\" then\n      local matcher_name = wp_cond[name_key] or \"\"\n      if matcher_name == \"\" or matcher_name == A.signal_name then\n        -- Mark matcher as satisfied\n        redis.call(\"HSET\", K.wp_condition,\n          sat_key, \"1\",\n          \"matcher:\" .. i .. \":signal_id\", A.signal_id)\n        matched = true\n        local new_sat = tonumber(wp_cond.satisfied_count or \"0\") + 1\n        redis.call(\"HSET\", K.wp_condition, \"satisfied_count\", tostring(new_sat))\n\n        -- Check if overall condition is satisfied\n        local mode = wp_cond.match_mode or \"any\"\n        local min_count = tonumber(wp_cond.minimum_signal_count or \"1\")\n        local resume = false\n        if mode == \"any\" then\n          resume = (new_sat >= min_count)\n        elseif mode == \"all\" then\n          resume = (new_sat >= total)\n        else\n          -- count(n) mode\n          resume = (new_sat >= min_count)\n        end\n\n        if resume then\n          effect = \"resume_condition_satisfied\"\n\n          -- OOM-SAFE WRITE ORDERING (per RFC-010 \u{a7}4.8b):\n          -- exec_core HSET is the \"point of no return\" \u{2014} write it FIRST.\n          -- If OOM kills after exec_core but before closing sub-objects,\n          -- execution is runnable (correct) with stale suspension/waitpoint\n          -- records (generalized index reconciler catches this).\n\n          -- 9a. Transition execution: suspended -> runnable (WRITE FIRST)\n          -- Resume continues the SAME attempt (no new attempt created).\n          if lp == \"suspended\" then\n            local es, br, bd, ps\n            if A.resume_delay_ms > 0 then\n              es = \"not_eligible_until_time\"\n              br = \"waiting_for_resume_delay\"\n              bd = \"resume delay \" .. A.resume_delay_ms .. \"ms after signal \" .. A.signal_name\n              ps = \"delayed\"\n            else\n              es = \"eligible_now\"\n              br = \"waiting_for_worker\"\n              bd = \"\"\n              ps = \"waiting\"\n            end\n\n            -- ALL 7 state vector dimensions\n            redis.call(\"HSET\", K.core_key,\n              \"lifecycle_phase\", \"runnable\",\n              \"ownership_state\", \"unowned\",\n              \"eligibility_state\", es,\n              \"blocking_reason\", br,\n              \"blocking_detail\", bd,\n              \"terminal_outcome\", \"none\",\n              \"attempt_state\", \"attempt_interrupted\",\n              \"public_state\", ps,\n              \"current_suspension_id\", \"\",\n              \"current_waitpoint_id\", \"\",\n              \"last_transition_at\", tostring(now_ms),\n              \"last_mutation_at\", tostring(now_ms))\n\n            -- 9b. Update scheduling indexes\n            local priority = tonumber(core.priority or \"0\")\n            local created_at_exec = tonumber(core.created_at or \"0\")\n            redis.call(\"ZREM\", K.suspended_zset, A.execution_id)\n            if A.resume_delay_ms > 0 then\n              redis.call(\"ZADD\", K.delayed_zset,\n                now_ms + A.resume_delay_ms, A.execution_id)\n            else\n              redis.call(\"ZADD\", K.eligible_zset,\n                0 - (priority * 1000000000000) + created_at_exec,\n                A.execution_id)\n            end\n          end\n\n          -- 9c. Close waitpoint condition (after exec_core is safe)\n          redis.call(\"HSET\", K.wp_condition,\n            \"closed\", \"1\",\n            \"closed_at\", tostring(now_ms),\n            \"closed_reason\", \"satisfied\")\n\n          -- 9d. Close waitpoint record\n          redis.call(\"HSET\", K.waitpoint_hash,\n            \"state\", \"closed\",\n            \"satisfied_at\", tostring(now_ms),\n            \"closed_at\", tostring(now_ms),\n            \"close_reason\", \"resumed\")\n\n          -- 9e. Close suspension record\n          if redis.call(\"EXISTS\", K.suspension_current) == 1 then\n            redis.call(\"HSET\", K.suspension_current,\n              \"satisfied_at\", tostring(now_ms),\n              \"closed_at\", tostring(now_ms),\n              \"close_reason\", \"resumed\")\n          end\n\n          -- 9f. Remove from suspension timeout index\n          redis.call(\"ZREM\", K.suspension_timeout_zset, A.execution_id)\n        end\n        break\n      end\n    end\n  end\n\n  if not matched then\n    effect = \"no_op\"\n  end\n\n  -- 10. Record observed effect on signal\n  redis.call(\"HSET\", K.signal_hash, \"observed_effect\", effect)\n\n  -- 11. Update waitpoint signal counts\n  redis.call(\"HINCRBY\", K.waitpoint_hash, \"signal_count\", 1)\n  if matched then\n    redis.call(\"HINCRBY\", K.waitpoint_hash, \"matched_signal_count\", 1)\n  end\n  redis.call(\"HSET\", K.waitpoint_hash, \"last_signal_at\", tostring(now_ms))\n\n  -- 12. Update suspension signal summary\n  if redis.call(\"EXISTS\", K.suspension_current) == 1 then\n    redis.call(\"HSET\", K.suspension_current, \"last_signal_at\", tostring(now_ms))\n  end\n\n  -- RFC-019 Stage B / #310: partition-level signal-delivery aggregate\n  -- stream. `subscribe_signal_delivery` XREAD BLOCKs this key.\n  redis.call(\"XADD\", K.partition_signal_delivery_stream, \"MAXLEN\", \"~\", \"1000\", \"*\",\n    \"signal_id\", A.signal_id,\n    \"execution_id\", A.execution_id,\n    \"waitpoint_id\", A.waitpoint_id,\n    \"source_identity\", A.source_identity,\n    \"effect\", effect,\n    \"delivered_at_ms\", tostring(now_ms))\n\n  return ok(A.signal_id, effect)\nend)\n\n---------------------------------------------------------------------------\n-- #18  ff_buffer_signal_for_pending_waitpoint\n--\n-- Accept signal for a pending (not yet committed) waitpoint.\n-- Records the signal but does NOT evaluate resume conditions.\n-- When suspend_execution activates the waitpoint, buffered signals\n-- are replayed through the full evaluation path.\n--\n-- KEYS (9): exec_core, wp_condition, wp_signals_stream,\n--           exec_signals_zset, signal_hash, signal_payload,\n--           idem_key, waitpoint_hash, hmac_secrets\n-- ARGV (18): same as ff_deliver_signal (17 + waitpoint_token)\n---------------------------------------------------------------------------\nredis.register_function(\'ff_buffer_signal_for_pending_waitpoint\', function(keys, args)\n  local K = {\n    core_key          = keys[1],\n    wp_condition      = keys[2],\n    wp_signals_stream = keys[3],\n    exec_signals_zset = keys[4],\n    signal_hash       = keys[5],\n    signal_payload    = keys[6],\n    idem_key          = keys[7],\n    waitpoint_hash    = keys[8],\n    hmac_secrets      = keys[9],\n  }\n\n  local A = {\n    signal_id        = args[1],\n    execution_id     = args[2],\n    waitpoint_id     = args[3],\n    signal_name      = args[4],\n    signal_category  = args[5],\n    source_type      = args[6],\n    source_identity  = args[7],\n    payload          = args[8] or \"\",\n    payload_encoding = args[9] or \"json\",\n    idempotency_key  = args[10] or \"\",\n    correlation_id   = args[11] or \"\",\n    target_scope     = args[12] or \"waitpoint\",\n    created_at       = args[13] or \"\",\n    dedup_ttl_ms     = tonumber(args[14] or \"86400000\"),\n    signal_maxlen    = tonumber(args[16] or \"1000\"),\n    max_signals      = tonumber(args[17] or \"10000\"),\n    waitpoint_token  = args[18] or \"\",\n  }\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- 1. Validate execution exists\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then\n    return err(\"execution_not_found\")\n  end\n\n  -- 1a. Validate HMAC token against the pending waitpoint\'s mint-time binding.\n  local wp_for_auth = hgetall_to_table(redis.call(\"HGETALL\", K.waitpoint_hash))\n  if not wp_for_auth.created_at then\n    return err(\"waitpoint_not_found\")\n  end\n  local token_err = validate_waitpoint_token(\n    K.hmac_secrets, A.waitpoint_token,\n    A.waitpoint_id, wp_for_auth.waitpoint_key or \"\",\n    tonumber(wp_for_auth.created_at) or 0, now_ms)\n  if token_err then\n    -- Operator-visible counter mirroring ff_deliver_signal. See comment\n    -- there for rationale.\n    redis.call(\"HSET\", K.core_key, \"last_hmac_validation_failed_at\", tostring(now_ms))\n    return err(token_err)\n  end\n\n  -- 1b. Gate on waitpoint state. ff_deliver_signal blocks replay-after-close\n  -- via wp_condition.closed, but wp_condition is not initialized for pending\n  -- waitpoints \u{2014} we must check wp.state directly. Without this, a caller\n  -- holding a valid token for a pending waitpoint that has since been\n  -- closed/expired can keep appending buffered signals that will replay\n  -- when suspend_execution(use_pending=1) later activates the waitpoint.\n  if wp_for_auth.state == \"closed\" or wp_for_auth.state == \"expired\" then\n    return err(\"waitpoint_closed\")\n  end\n\n  -- 2. Signal count limit\n  if A.max_signals > 0 then\n    local current_count = redis.call(\"ZCARD\", K.exec_signals_zset)\n    if current_count >= A.max_signals then\n      return err(\"signal_limit_exceeded\")\n    end\n  end\n\n  -- 3. Idempotency check\n  -- Guard: (A.dedup_ttl_ms or 0) handles nil from tonumber(\"\") safely.\n  local dedup_ms = A.dedup_ttl_ms or 0\n  if A.idempotency_key ~= \"\" and dedup_ms > 0 then\n    local existing = redis.call(\"GET\", K.idem_key)\n    if existing then\n      return ok_duplicate(existing)\n    end\n    redis.call(\"SET\", K.idem_key, A.signal_id,\n      \"PX\", dedup_ms, \"NX\")\n  end\n\n  -- 4. Record signal hash with tentative effect\n  local created_at = A.created_at ~= \"\" and A.created_at or tostring(now_ms)\n  redis.call(\"HSET\", K.signal_hash,\n    \"signal_id\", A.signal_id,\n    \"target_execution_id\", A.execution_id,\n    \"target_waitpoint_id\", A.waitpoint_id,\n    \"target_scope\", A.target_scope,\n    \"signal_name\", A.signal_name,\n    \"signal_category\", A.signal_category,\n    \"source_type\", A.source_type,\n    \"source_identity\", A.source_identity,\n    \"correlation_id\", A.correlation_id,\n    \"idempotency_key\", A.idempotency_key,\n    \"created_at\", created_at,\n    \"accepted_at\", tostring(now_ms),\n    \"matched_waitpoint_id\", A.waitpoint_id,\n    \"payload_encoding\", A.payload_encoding,\n    \"observed_effect\", \"buffered_for_pending_waitpoint\")\n\n  -- 4b. Store payload separately if present\n  if A.payload ~= \"\" then\n    redis.call(\"SET\", K.signal_payload, A.payload)\n  end\n\n  -- 5. Append to per-waitpoint signal stream + per-execution signal index\n  -- These are recorded so suspend_execution can XRANGE and replay them.\n  redis.call(\"XADD\", K.wp_signals_stream, \"MAXLEN\", \"~\",\n    tostring(A.signal_maxlen), \"*\",\n    \"signal_id\", A.signal_id,\n    \"signal_name\", A.signal_name,\n    \"signal_category\", A.signal_category,\n    \"source_type\", A.source_type,\n    \"source_identity\", A.source_identity,\n    \"matched\", \"0\",\n    \"accepted_at\", tostring(now_ms))\n  redis.call(\"ZADD\", K.exec_signals_zset, now_ms, A.signal_id)\n\n  -- No resume condition evaluation \u{2014} waitpoint is pending, not active.\n\n  return ok(A.signal_id, \"buffered_for_pending_waitpoint\")\nend)\n\n---------------------------------------------------------------------------\n-- #2  ff_claim_resumed_execution\n--\n-- Consume claim-grant, resume existing attempt (interrupted -> started),\n-- create new lease bound to SAME attempt. Does NOT create a new attempt.\n--\n-- KEYS (11): exec_core, claim_grant, eligible_zset, lease_expiry_zset,\n--            worker_leases, existing_attempt_hash, lease_current,\n--            lease_history, active_index, attempt_timeout_zset,\n--            execution_deadline_zset\n-- ARGV (8): execution_id, worker_id, worker_instance_id, lane,\n--           capability_snapshot_hash, lease_id, lease_ttl_ms,\n--           remaining_attempt_timeout_ms\n---------------------------------------------------------------------------\nredis.register_function(\'ff_claim_resumed_execution\', function(keys, args)\n  local K = {\n    core_key               = keys[1],\n    claim_grant_key        = keys[2],\n    eligible_zset          = keys[3],\n    lease_expiry_key       = keys[4],\n    worker_leases_key      = keys[5],\n    attempt_hash           = keys[6],\n    lease_current_key      = keys[7],\n    lease_history_key      = keys[8],\n    active_index_key       = keys[9],\n    attempt_timeout_key    = keys[10],\n    execution_deadline_key = keys[11],\n  }\n\n  local lease_ttl_n = require_number(args[7], \"lease_ttl_ms\")\n  if type(lease_ttl_n) == \"table\" then return lease_ttl_n end\n\n  local A = {\n    execution_id                  = args[1],\n    worker_id                     = args[2],\n    worker_instance_id            = args[3],\n    lane                          = args[4],\n    capability_snapshot_hash      = args[5] or \"\",\n    lease_id                      = args[6],\n    lease_ttl_ms                  = lease_ttl_n,\n    remaining_attempt_timeout_ms  = args[8] or \"\",\n  }\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- 1. Validate execution exists\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return err(\"execution_not_found\") end\n  local core = hgetall_to_table(raw)\n\n  -- 2. Must be runnable\n  if core.lifecycle_phase ~= \"runnable\" then\n    return err(\"execution_not_leaseable\")\n  end\n\n  -- 3. Must be attempt_interrupted (resumed after suspension/delay)\n  if core.attempt_state ~= \"attempt_interrupted\" then\n    return err(\"not_a_resumed_execution\")\n  end\n\n  -- 4. Validate claim grant\n  local grant_raw = redis.call(\"HGETALL\", K.claim_grant_key)\n  if #grant_raw == 0 then\n    return err(\"invalid_claim_grant\")\n  end\n  local grant = hgetall_to_table(grant_raw)\n\n  -- Validate grant matches (grant key is execution-scoped, so only check worker_id)\n  if grant.worker_id ~= A.worker_id then\n    return err(\"invalid_claim_grant\")\n  end\n\n  -- Check grant expiry\n  if is_set(grant.grant_expires_at) and tonumber(grant.grant_expires_at) < now_ms then\n    redis.call(\"DEL\", K.claim_grant_key)\n    return err(\"claim_grant_expired\")\n  end\n\n  -- Consume grant (DEL)\n  redis.call(\"DEL\", K.claim_grant_key)\n\n  -- 5. Resume existing attempt: attempt_interrupted -> started\n  --    Same attempt continues \u{2014} no new attempt_index.\n  local att_idx = core.current_attempt_index\n  local att_id = core.current_attempt_id\n  local epoch = tonumber(core.current_lease_epoch or \"0\") + 1\n  local expires_at = now_ms + A.lease_ttl_ms\n  local renewal_deadline = now_ms + math.floor(A.lease_ttl_ms * 2 / 3)\n\n  redis.call(\"HSET\", K.attempt_hash,\n    \"attempt_state\", \"started\",\n    \"resumed_at\", tostring(now_ms),\n    \"lease_id\", A.lease_id,\n    \"lease_epoch\", tostring(epoch),\n    \"worker_id\", A.worker_id,\n    \"worker_instance_id\", A.worker_instance_id,\n    \"suspended_at\", \"\",\n    \"suspension_id\", \"\")\n\n  -- 6. Create new lease bound to same attempt\n  redis.call(\"DEL\", K.lease_current_key)\n  redis.call(\"HSET\", K.lease_current_key,\n    \"lease_id\", A.lease_id,\n    \"lease_epoch\", tostring(epoch),\n    \"execution_id\", A.execution_id,\n    \"attempt_id\", att_id,\n    \"worker_id\", A.worker_id,\n    \"worker_instance_id\", A.worker_instance_id,\n    \"acquired_at\", tostring(now_ms),\n    \"expires_at\", tostring(expires_at),\n    \"last_renewed_at\", tostring(now_ms),\n    \"renewal_deadline\", tostring(renewal_deadline))\n\n  -- 7. Update exec_core \u{2014} ALL 7 state vector dimensions\n  redis.call(\"HSET\", K.core_key,\n    \"lifecycle_phase\", \"active\",\n    \"ownership_state\", \"leased\",\n    \"eligibility_state\", \"not_applicable\",\n    \"blocking_reason\", \"none\",\n    \"blocking_detail\", \"\",\n    \"terminal_outcome\", \"none\",\n    \"attempt_state\", \"running_attempt\",\n    \"public_state\", \"active\",\n    \"current_lease_id\", A.lease_id,\n    \"current_lease_epoch\", tostring(epoch),\n    \"current_worker_id\", A.worker_id,\n    \"current_worker_instance_id\", A.worker_instance_id,\n    \"current_lane\", A.lane,\n    \"lease_acquired_at\", tostring(now_ms),\n    \"lease_expires_at\", tostring(expires_at),\n    \"lease_last_renewed_at\", tostring(now_ms),\n    \"lease_renewal_deadline\", tostring(renewal_deadline),\n    \"lease_expired_at\", \"\",\n    \"lease_revoked_at\", \"\",\n    \"lease_revoke_reason\", \"\",\n    \"last_transition_at\", tostring(now_ms),\n    \"last_mutation_at\", tostring(now_ms))\n\n  -- 8. Update indexes\n  redis.call(\"ZREM\", K.eligible_zset, A.execution_id)\n  redis.call(\"ZADD\", K.lease_expiry_key, expires_at, A.execution_id)\n  redis.call(\"SADD\", K.worker_leases_key, A.execution_id)\n  redis.call(\"ZADD\", K.active_index_key, expires_at, A.execution_id)\n\n  -- 9. ZADD attempt_timeout with remaining timeout\n  if is_set(A.remaining_attempt_timeout_ms) then\n    local remaining = tonumber(A.remaining_attempt_timeout_ms)\n    if remaining > 0 then\n      redis.call(\"ZADD\", K.attempt_timeout_key,\n        now_ms + remaining, A.execution_id)\n    end\n  end\n\n  -- 10. Lease history event\n  redis.call(\"XADD\", K.lease_history_key, \"MAXLEN\", \"~\", 1000, \"*\",\n    \"event\", \"acquired\",\n    \"lease_id\", A.lease_id,\n    \"lease_epoch\", tostring(epoch),\n    \"attempt_index\", att_idx,\n    \"attempt_id\", att_id,\n    \"worker_id\", A.worker_id,\n    \"reason\", \"claim_resumed\",\n    \"ts\", tostring(now_ms))\n\n  return ok(A.lease_id, tostring(epoch), tostring(expires_at),\n            att_id, att_idx, \"resumed\")\nend)\n\n\n-- source: lua/stream.lua\n-- FlowFabric stream append function\n-- Reference: RFC-006 (Stream), RFC-010 \u{a7}4.1 (#20), RFC-015 (durability modes)\n--\n-- Depends on helpers: ok, err, is_set\n\n---------------------------------------------------------------------------\n-- RFC-015 JSON Merge Patch (RFC 7396) applier.\n--\n-- `target` and `patch` are Lua tables produced by `cjson.decode`. The\n-- applier mutates `target` in place per RFC 7396:\n--   * `patch[k] == cjson.null` \u{2192} delete `target[k]`\n--   * `patch[k]` is a table    \u{2192} recurse\n--   * otherwise                \u{2192} `target[k] = patch[k]`\n--\n-- `depth` is a recursion guard (RFC-015 \u{a7}3.3 step 4: bounded depth 16).\n-- A patch whose root is not a JSON object (array / scalar / null) is\n-- rejected by the caller per RFC 7396 \u{2014} only object-rooted patches are\n-- valid merge-patch documents.\n---------------------------------------------------------------------------\nlocal function apply_merge_patch(target, patch, depth)\n  if depth > 16 then\n    return nil, \"patch_depth_exceeded\"\n  end\n  for k, v in pairs(patch) do\n    if v == cjson.null then\n      target[k] = nil\n    elseif type(v) == \"table\" and not v[1] and next(v) ~= nil then\n      -- Object-valued \u{2192} recurse. `v[1]` heuristic distinguishes arrays\n      -- (numeric-indexed from 1) from objects. RFC 7396 replaces\n      -- arrays wholesale (no recursion into array members).\n      if type(target[k]) ~= \"table\" or (target[k][1] ~= nil) then\n        target[k] = {}\n      end\n      local _, ferr = apply_merge_patch(target[k], v, depth + 1)\n      if ferr ~= nil then\n        return nil, ferr\n      end\n    else\n      -- Scalar, array, or empty-table leaf \u{2192} replace wholesale.\n      target[k] = v\n    end\n  end\n  return target, nil\nend\n\n---------------------------------------------------------------------------\n-- RFC-015 null-sentinel rewrite. Walks `doc` and replaces scalar string\n-- leaves equal to \"__ff_null__\" with cjson.null. Applied post-merge so\n-- callers can encode \"set this leaf to JSON null\" inside an RFC 7396\n-- merge patch (where `null` would otherwise mean \"delete key\").\n---------------------------------------------------------------------------\nlocal FF_NULL_SENTINEL = \"__ff_null__\"\nlocal function rewrite_null_sentinel(doc, depth)\n  if depth > 32 then return end\n  if type(doc) ~= \"table\" then return end\n  for k, v in pairs(doc) do\n    if type(v) == \"string\" and v == FF_NULL_SENTINEL then\n      doc[k] = cjson.null\n    elseif type(v) == \"table\" then\n      rewrite_null_sentinel(v, depth + 1)\n    end\n  end\nend\n\n-- Reject non-object root documents / patches (RFC 7396 requires an\n-- object at the root). A bare scalar / array / null patch means\n-- \"replace the target wholesale\" under RFC 7396, but we scope v0.6\n-- to the object-root subset per RFC-015 \u{a7}3.3 step 2.\nlocal function is_json_object(tbl)\n  if type(tbl) ~= \"table\" then return false end\n  -- Empty table \u{2014} treat as an object by convention (cjson would have\n  -- decoded `{}` to `cjson.empty_array` in encode-array mode, but\n  -- stream.lua uses decode defaults).\n  if next(tbl) == nil then return true end\n  -- Any numeric-indexed [1] entry \u{2192} array, reject.\n  return tbl[1] == nil\nend\n\n---------------------------------------------------------------------------\n-- #20  ff_append_frame\n--\n-- Append a frame to the attempt-scoped output stream. Highest-throughput\n-- function \u{2014} called once per token during LLM streaming. Uses lite lease\n-- validation (HMGET, not HGETALL) for minimal overhead. Class B operation.\n--\n-- KEYS (4): exec_core, stream_data, stream_meta, stream_summary\n-- ARGV (19): execution_id, attempt_index, lease_id, lease_epoch,\n--            frame_type, ts, payload, encoding, correlation_id,\n--            source, retention_maxlen, attempt_id, max_payload_bytes,\n--            stream_mode, patch_kind, ttl_ms,\n--            maxlen_floor, maxlen_ceiling, ema_alpha\n--\n-- `stream_mode` (ARGV 14, RFC-015): \"\" / \"durable\" \u{2192} StreamMode::Durable\n-- (default). \"summary\" \u{2192} DurableSummary (requires `patch_kind` ARGV 15).\n-- \"best_effort\" \u{2192} BestEffortLive (uses `ttl_ms` ARGV 16 for PEXPIRE and\n-- ARGV 17-19 for dynamic MAXLEN sizing per RFC-015 \u{a7}4.2).\n---------------------------------------------------------------------------\nredis.register_function(\'ff_append_frame\', function(keys, args)\n  local K = {\n    core_key      = keys[1],\n    stream_key    = keys[2],\n    stream_meta   = keys[3],\n    stream_summary = keys[4] or \"\",\n  }\n\n  local A = {\n    execution_id     = args[1],\n    attempt_index    = args[2],\n    lease_id         = args[3],\n    lease_epoch      = args[4],\n    frame_type       = args[5],\n    ts               = args[6] or \"\",\n    payload          = args[7] or \"\",\n    encoding         = args[8] or \"utf8\",\n    correlation_id   = args[9] or \"\",\n    source           = args[10] or \"worker\",\n    retention_maxlen = tonumber(args[11] or \"0\"),\n    attempt_id       = args[12] or \"\",\n    max_payload_bytes = tonumber(args[13] or \"65536\"),\n    stream_mode      = args[14] or \"\",  -- RFC-015\n    patch_kind       = args[15] or \"\",  -- RFC-015\n    ttl_ms           = tonumber(args[16] or \"0\"),\n    -- RFC-015 \u{a7}4.2 dynamic MAXLEN knobs. Zero / missing \u{2192} fall back to\n    -- the RFC-final defaults.\n    maxlen_floor     = tonumber(args[17] or \"0\"),\n    maxlen_ceiling   = tonumber(args[18] or \"0\"),\n    ema_alpha        = tonumber(args[19] or \"0\"),\n  }\n  if A.maxlen_floor   == nil or A.maxlen_floor   <= 0 then A.maxlen_floor   = 64    end\n  if A.maxlen_ceiling == nil or A.maxlen_ceiling <= 0 then A.maxlen_ceiling = 16384 end\n  if A.ema_alpha      == nil or A.ema_alpha      <= 0 or A.ema_alpha > 1.0 then\n    A.ema_alpha = 0.2\n  end\n  if A.maxlen_ceiling < A.maxlen_floor then\n    A.maxlen_ceiling = A.maxlen_floor\n  end\n\n  -- Normalise empty / unknown mode to \"durable\" for pre-RFC-015 parity.\n  if A.stream_mode == \"\" then A.stream_mode = \"durable\" end\n\n  -- 1. Payload size guard (v1 default: 64KB)\n  if #A.payload > A.max_payload_bytes then\n    return err(\"retention_limit_exceeded\")\n  end\n\n  -- 2. Lite lease validation via HMGET (Class B \u{2014} no full HGETALL)\n  local core = redis.call(\"HMGET\", K.core_key,\n    \"current_attempt_index\",   -- [1]\n    \"current_lease_id\",        -- [2]\n    \"current_lease_epoch\",     -- [3]\n    \"lease_expires_at\",        -- [4]\n    \"lifecycle_phase\",         -- [5]\n    \"ownership_state\")         -- [6]\n\n  if core[5] ~= \"active\" then\n    return err(\"stream_closed\")\n  end\n\n  if core[6] == \"lease_expired_reclaimable\" or core[6] == \"lease_revoked\" then\n    return err(\"stale_owner_cannot_append\")\n  end\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n  if tonumber(core[4] or \"0\") <= now_ms then\n    return err(\"stale_owner_cannot_append\")\n  end\n\n  if tostring(core[1]) ~= A.attempt_index then\n    return err(\"stale_owner_cannot_append\")\n  end\n\n  if core[2] ~= A.lease_id or tostring(core[3]) ~= A.lease_epoch then\n    return err(\"stale_owner_cannot_append\")\n  end\n\n  -- 3. Lazy-create stream metadata on first append\n  if redis.call(\"EXISTS\", K.stream_meta) == 0 then\n    redis.call(\"HSET\", K.stream_meta,\n      \"stream_id\", A.execution_id .. \":\" .. A.attempt_index,\n      \"execution_id\", A.execution_id,\n      \"attempt_id\", A.attempt_id,\n      \"attempt_index\", A.attempt_index,\n      \"created_at\", tostring(now_ms),\n      \"closed_at\", \"\",\n      \"closed_reason\", \"\",\n      \"durability_mode\", \"durable_full\",\n      \"retention_maxlen\", tostring(A.retention_maxlen),\n      \"last_sequence\", \"\",\n      \"frame_count\", \"0\",\n      \"total_bytes\", \"0\",\n      \"last_frame_at\", \"\",\n      \"has_durable_frame\", \"0\")  -- RFC-015 \u{a7}4.1 \u{2014} PEXPIRE gate\n  end\n\n  -- 4. Check stream not closed\n  local closed = redis.call(\"HGET\", K.stream_meta, \"closed_at\")\n  if is_set(closed) then\n    return err(\"stream_closed\")\n  end\n\n  -- 5. RFC-015 \u{a7}3.3 \u{2014} DurableSummary delta apply. Done BEFORE XADD so\n  -- the stream entry carries the post-merge `summary_version`.\n  local summary_version = \"\"\n  if A.stream_mode == \"summary\" then\n    if K.stream_summary == \"\" then\n      return err(\"invalid_input\", \"summary mode requires stream_summary key\")\n    end\n    if A.patch_kind ~= \"json-merge-patch\" and A.patch_kind ~= \"\" then\n      return err(\"invalid_input\", \"unsupported patch_kind\")\n    end\n\n    -- Parse patch payload. RFC 7396 requires an object at the root.\n    local ok_decode, patch = pcall(cjson.decode, A.payload)\n    if not ok_decode then\n      return err(\"invalid_input\", \"patch payload is not valid JSON\")\n    end\n    if not is_json_object(patch) then\n      return err(\"invalid_input\", \"patch must be a JSON object\")\n    end\n\n    -- Load current summary document (or start from {}).\n    local cur_doc_raw = redis.call(\"HGET\", K.stream_summary, \"document\")\n    local doc\n    if cur_doc_raw and cur_doc_raw ~= \"\" then\n      local ok_cur, parsed = pcall(cjson.decode, cur_doc_raw)\n      if not ok_cur then\n        return err(\"corruption\", \"stored summary document not decodeable\")\n      end\n      doc = parsed\n    else\n      doc = {}\n    end\n\n    -- Apply merge patch, then rewrite the null sentinel.\n    local _, apply_err = apply_merge_patch(doc, patch, 0)\n    if apply_err ~= nil then\n      return err(\"invalid_input\", apply_err)\n    end\n    rewrite_null_sentinel(doc, 0)\n\n    -- Encode + persist. Use cjson.encode (not cjson_safe) \u{2014} input\n    -- validation above means we should never encode unencodable.\n    local encoded = cjson.encode(doc)\n    local new_version = redis.call(\"HINCRBY\", K.stream_summary, \"version\", 1)\n    redis.call(\"HSET\", K.stream_summary,\n      \"document\", encoded,\n      \"patch_kind\", \"json-merge-patch\",\n      \"last_updated_ms\", tostring(now_ms))\n    -- Set first_applied_ms on the very first delta only.\n    if new_version == 1 then\n      redis.call(\"HSET\", K.stream_summary, \"first_applied_ms\", tostring(now_ms))\n    end\n    summary_version = tostring(new_version)\n  end\n\n  -- 6. Append frame via XADD with RFC-015 mode fields.\n  local ts = A.ts ~= \"\" and A.ts or tostring(now_ms)\n  local xadd_args = {\n    K.stream_key, \"*\",\n    \"frame_type\", A.frame_type,\n    \"ts\", ts,\n    \"payload\", A.payload,\n    \"encoding\", A.encoding,\n    \"source\", A.source,\n    \"mode\", A.stream_mode,\n  }\n  if A.correlation_id ~= \"\" then\n    xadd_args[#xadd_args + 1] = \"correlation_id\"\n    xadd_args[#xadd_args + 1] = A.correlation_id\n  end\n  if summary_version ~= \"\" then\n    xadd_args[#xadd_args + 1] = \"summary_version\"\n    xadd_args[#xadd_args + 1] = summary_version\n  end\n  if A.stream_mode == \"best_effort\" and A.ttl_ms > 0 then\n    xadd_args[#xadd_args + 1] = \"ttl_ms\"\n    xadd_args[#xadd_args + 1] = tostring(A.ttl_ms)\n  end\n\n  local entry_id = redis.call(\"XADD\", unpack(xadd_args))\n\n  -- 7. Update stream metadata.\n  --\n  -- `frame_count` is the LIFETIME append counter \u{2014} NOT the retained\n  -- entry count (XTRIM below can prune without decrementing).\n  local frame_count = redis.call(\"HINCRBY\", K.stream_meta, \"frame_count\", 1)\n  redis.call(\"HINCRBY\", K.stream_meta, \"total_bytes\", #A.payload)\n  redis.call(\"HSET\", K.stream_meta,\n    \"last_sequence\", entry_id,\n    \"last_frame_at\", tostring(now_ms))\n\n  -- RFC-015 \u{a7}4.1: track `has_durable_frame` for the PEXPIRE gate. Any\n  -- Durable or DurableSummary append flips the flag (and PERSISTs the\n  -- stream key if a prior BestEffortLive had set a TTL).\n  if A.stream_mode == \"durable\" or A.stream_mode == \"summary\" then\n    local prior = redis.call(\"HGET\", K.stream_meta, \"has_durable_frame\")\n    if prior ~= \"1\" then\n      redis.call(\"HSET\", K.stream_meta, \"has_durable_frame\", \"1\")\n      -- PERSIST any previously-set best-effort TTL on the stream key.\n      redis.call(\"PERSIST\", K.stream_key)\n    end\n  end\n\n  -- 8. Apply retention / TTL policy.\n  --\n  -- RFC-015 \u{a7}3.5 \u{2014} DurableSummary compacts to a ~64-entry live-tail\n  -- window. \u{a7}4.1/\u{a7}4.2 \u{2014} BestEffortLive trims to a derived K and may\n  -- PEXPIRE the stream key.\n  -- Plain Durable keeps the pre-RFC-015 default (10_000 cap w/ `~`).\n  if A.stream_mode == \"summary\" then\n    redis.call(\"XTRIM\", K.stream_key, \"MAXLEN\", \"~\", 64)\n  elseif A.stream_mode == \"best_effort\" then\n    -- RFC-015 \u{a7}4.2: dynamic MAXLEN sizing from an EMA of append rate.\n    --\n    --   K = clamp(ceil(ema_rate_hz * ttl_ms / 1000) * 2, floor, ceiling)\n    --\n    -- EMA state persists on the per-attempt stream_meta Hash so the\n    -- estimator survives across Lua invocations. `ema_rate_hz` is Hz;\n    -- `last_append_ts_ms` is the wall-clock of the prior XADD.\n    local meta = redis.call(\"HMGET\", K.stream_meta,\n      \"ema_rate_hz\", \"last_append_ts_ms\")\n    local prev_rate_hz = tonumber(meta[1] or \"\")\n    local prev_ts_ms   = tonumber(meta[2] or \"\")\n\n    local rate_hz\n    if prev_rate_hz == nil or prev_ts_ms == nil then\n      -- First best-effort append: seed rate so the initial K == floor.\n      -- floor = ceil(seed * ttl_s * 2) \u{2192} seed = floor / (ttl_s * 2).\n      -- Guard against ttl_ms == 0 to avoid /0.\n      local ttl_s = math.max(A.ttl_ms, 1) / 1000.0\n      rate_hz = A.maxlen_floor / (ttl_s * 2.0)\n    else\n      local dt_ms = now_ms - prev_ts_ms\n      if dt_ms <= 0 then dt_ms = 1 end\n      local inst_hz = 1000.0 / dt_ms\n      rate_hz = A.ema_alpha * inst_hz + (1.0 - A.ema_alpha) * prev_rate_hz\n    end\n\n    local ttl_s_for_k = math.max(A.ttl_ms, 1) / 1000.0\n    local raw_k = math.ceil(rate_hz * ttl_s_for_k * 2.0)\n    if raw_k < A.maxlen_floor   then raw_k = A.maxlen_floor   end\n    if raw_k > A.maxlen_ceiling then raw_k = A.maxlen_ceiling end\n    local target_maxlen = raw_k\n\n    redis.call(\"XTRIM\", K.stream_key, \"MAXLEN\", \"~\", target_maxlen)\n\n    redis.call(\"HSET\", K.stream_meta,\n      \"ema_rate_hz\",         tostring(rate_hz),\n      \"last_append_ts_ms\",   tostring(now_ms),\n      \"maxlen_applied_last\", tostring(target_maxlen))\n\n    -- PEXPIRE gate: only set a TTL if the stream has NEVER received a\n    -- durable frame. Durable content must not be destroyed by a\n    -- best-effort TTL refresh.\n    local has_dur = redis.call(\"HGET\", K.stream_meta, \"has_durable_frame\")\n    if (has_dur ~= \"1\") and A.ttl_ms > 0 then\n      redis.call(\"PEXPIRE\", K.stream_key, A.ttl_ms * 2)\n    end\n  else\n    -- Durable (pre-RFC-015 path).\n    local maxlen = A.retention_maxlen\n    local trim_op\n    if maxlen == 0 then\n      maxlen = 10000\n      trim_op = \"~\"\n    else\n      trim_op = \"=\"\n    end\n    redis.call(\"XTRIM\", K.stream_key, \"MAXLEN\", trim_op, maxlen)\n  end\n\n  -- Success shape: ok(entry_id, frame_count, summary_version). The\n  -- third field is empty string for Durable / BestEffortLive and the\n  -- post-merge version string for DurableSummary. Pre-RFC-015 callers\n  -- that only look at fields [0] / [1] are unaffected.\n  return ok(entry_id, tostring(frame_count), summary_version)\nend)\n\n---------------------------------------------------------------------------\n-- ff_read_attempt_stream\n--\n-- Read frames from an attempt-scoped output stream via XRANGE. Non-blocking\n-- (safe in Lua Functions). Cluster-safe: stream_key and stream_meta share\n-- the {p:N} hash tag.\n--\n-- KEYS (2): stream_data, stream_meta\n-- ARGV (4): from_id, to_id, count_limit, visibility\n--\n-- `visibility` (ARGV 4, RFC-015 \u{a7}6.1): \"\" / \"all\" \u{2192} no filter.\n-- \"exclude_best_effort\" \u{2192} drop entries whose XADD `mode` field equals\n-- \"best_effort\". Pre-RFC-015 entries (no `mode` field) count as\n-- `durable` per the RFC-015 \u{a7}8.1 reader fallback.\n---------------------------------------------------------------------------\nredis.register_function(\'ff_read_attempt_stream\', function(keys, args)\n  local stream_key  = keys[1]\n  local stream_meta = keys[2]\n\n  local from_id = args[1] or \"-\"\n  local to_id   = args[2] or \"+\"\n  local count_limit = tonumber(args[3] or \"0\")\n  local visibility = args[4] or \"\"\n\n  local HARD_CAP = 10000\n  if count_limit == nil or count_limit < 1 then\n    return err(\"invalid_input\", \"count_limit must be >= 1\")\n  end\n  if count_limit > HARD_CAP then\n    return err(\"invalid_input\", \"count_limit_exceeds_hard_cap\")\n  end\n\n  local entries = redis.call(\"XRANGE\", stream_key, from_id, to_id,\n                             \"COUNT\", count_limit)\n\n  -- RFC-015 \u{a7}6.1 server-side mode filter.\n  if visibility == \"exclude_best_effort\" then\n    local filtered = {}\n    for i = 1, #entries do\n      local entry = entries[i]\n      local fields = entry[2]\n      -- fields is a flat array of alternating key/value.\n      local mode = \"durable\"\n      for j = 1, #fields - 1, 2 do\n        if fields[j] == \"mode\" then\n          mode = fields[j + 1]\n          break\n        end\n      end\n      if mode ~= \"best_effort\" then\n        filtered[#filtered + 1] = entry\n      end\n    end\n    entries = filtered\n  end\n\n  local meta = redis.call(\"HMGET\", stream_meta, \"closed_at\", \"closed_reason\")\n  local closed_at     = meta[1] or \"\"\n  local closed_reason = meta[2] or \"\"\n\n  return ok(entries, closed_at, closed_reason)\nend)\n\n---------------------------------------------------------------------------\n-- ff_read_summary  (RFC-015 \u{a7}6.3)\n--\n-- Read the rolling summary document for an attempt. Non-blocking HGETALL\n-- wrapper. Returns empty fields list when no DurableSummary frame has\n-- ever been appended (the summary Hash is absent).\n--\n-- KEYS (1): stream_summary\n-- ARGV (0)\n---------------------------------------------------------------------------\nredis.register_function(\'ff_read_summary\', function(keys, args)\n  local summary_key = keys[1]\n  if redis.call(\"EXISTS\", summary_key) == 0 then\n    return ok(\"\", \"0\", \"\", \"0\", \"0\")\n  end\n  local h = redis.call(\"HMGET\", summary_key,\n    \"document\", \"version\", \"patch_kind\", \"last_updated_ms\", \"first_applied_ms\")\n  return ok(h[1] or \"\", h[2] or \"0\", h[3] or \"\",\n            h[4] or \"0\", h[5] or \"0\")\nend)\n\n\n-- source: lua/budget.lua\n-- FlowFabric budget functions\n-- Reference: RFC-008 (Budget), RFC-010 \u{a7}4.3 (#30, #31), \u{a7}4.1 (#29a, #29b)\n--\n-- Depends on helpers: ok, err, is_set, hgetall_to_table\n\n---------------------------------------------------------------------------\n-- ff_create_budget  (on {b:M})\n--\n-- Create a new budget policy with hard/soft limits on N dimensions.\n-- Idempotent: if EXISTS budget_def \u{2192} return ok_already_satisfied.\n--\n-- KEYS (5): budget_def, budget_limits, budget_usage, budget_resets_zset,\n--           budget_policies_index\n-- ARGV (variable): budget_id, scope_type, scope_id, enforcement_mode,\n--   on_hard_limit, on_soft_limit, reset_interval_ms, now_ms,\n--   dimension_count, dim_1..dim_N, hard_1..hard_N, soft_1..soft_N\n---------------------------------------------------------------------------\nredis.register_function(\'ff_create_budget\', function(keys, args)\n  local K = {\n    def_key         = keys[1],\n    limits_key      = keys[2],\n    usage_key       = keys[3],\n    resets_zset     = keys[4],\n    policies_index  = keys[5],\n  }\n\n  local A = {\n    budget_id         = args[1],\n    scope_type        = args[2],\n    scope_id          = args[3],\n    enforcement_mode  = args[4],\n    on_hard_limit     = args[5],\n    on_soft_limit     = args[6],\n    reset_interval_ms = args[7],\n    now_ms            = args[8],\n  }\n\n  -- Maintain budget_policies_index BEFORE the idempotency guard. SADD is\n  -- itself idempotent (no-op on existing members), and hoisting it heals\n  -- any pre-existing budget_def that was created before this index was\n  -- introduced \u{2014} no migration script required.\n  redis.call(\"SADD\", K.policies_index, A.budget_id)\n\n  -- Idempotency: already exists \u{2192} return immediately\n  if redis.call(\"EXISTS\", K.def_key) == 1 then\n    return ok_already_satisfied(A.budget_id)\n  end\n\n  local dim_count = require_number(args[9], \"dim_count\")\n  if type(dim_count) == \"table\" then return dim_count end\n\n  -- HSET budget definition\n  redis.call(\"HSET\", K.def_key,\n    \"budget_id\", A.budget_id,\n    \"scope_type\", A.scope_type,\n    \"scope_id\", A.scope_id,\n    \"enforcement_mode\", A.enforcement_mode,\n    \"on_hard_limit\", A.on_hard_limit,\n    \"on_soft_limit\", A.on_soft_limit,\n    \"reset_interval_ms\", A.reset_interval_ms,\n    \"breach_count\", \"0\",\n    \"soft_breach_count\", \"0\",\n    \"created_at\", A.now_ms,\n    \"last_updated_at\", A.now_ms)\n\n  -- HSET per-dimension hard and soft limits\n  for i = 1, dim_count do\n    local dim  = args[9 + i]\n    local hard = args[9 + dim_count + i]\n    local soft = args[9 + 2 * dim_count + i]\n    redis.call(\"HSET\", K.limits_key, \"hard:\" .. dim, hard, \"soft:\" .. dim, soft)\n  end\n\n  -- budget_usage left empty \u{2014} first report_usage will create fields\n\n  -- Schedule periodic reset if reset_interval_ms > 0\n  local interval_ms = tonumber(A.reset_interval_ms)\n  if interval_ms > 0 then\n    local next_reset_at = tostring(tonumber(A.now_ms) + interval_ms)\n    redis.call(\"HSET\", K.def_key, \"next_reset_at\", next_reset_at)\n    redis.call(\"ZADD\", K.resets_zset, tonumber(next_reset_at), A.budget_id)\n  end\n\n  return ok(A.budget_id)\nend)\n\n---------------------------------------------------------------------------\n-- #30  ff_report_usage_and_check  (on {b:M})\n--\n-- Check-before-increment: read current usage, check hard limits. If any\n-- dimension would breach, return HARD_BREACH without incrementing. If safe,\n-- HINCRBY all dimensions, then check soft limits.\n--\n-- Atomic Lua serialization on {b:M} guarantees zero overshoot.\n--\n-- KEYS (3): budget_usage, budget_limits, budget_def\n-- ARGV (variable): dimension_count, dim_1..dim_N, delta_1..delta_N, now_ms, [dedup_key]\n---------------------------------------------------------------------------\nredis.register_function(\'ff_report_usage_and_check\', function(keys, args)\n  local K = {\n    usage_key  = keys[1],\n    limits_key = keys[2],\n    def_key    = keys[3],\n  }\n\n  local dim_count = require_number(args[1], \"dim_count\")\n  if type(dim_count) == \"table\" then return dim_count end\n  local now_ms = args[2 * dim_count + 2]\n  local dedup_key = args[2 * dim_count + 3] or \"\"\n\n  -- Idempotency: if dedup_key provided, check for prior application\n  if dedup_key ~= \"\" then\n    local existing = redis.call(\"GET\", dedup_key)\n    if existing then\n      return {1, \"ALREADY_APPLIED\"}\n    end\n  end\n\n  -- Phase 1: CHECK all dimensions BEFORE any increment.\n  -- If any hard limit would be breached, reject the entire report.\n  for i = 1, dim_count do\n    local dim = args[1 + i]\n    local delta = tonumber(args[1 + dim_count + i])\n    local current = tonumber(redis.call(\"HGET\", K.usage_key, dim) or \"0\")\n    local new_total = current + delta\n\n    local hard_limit = redis.call(\"HGET\", K.limits_key, \"hard:\" .. dim)\n    if hard_limit and hard_limit ~= \"\" and hard_limit ~= false then\n      local limit_val = tonumber(hard_limit)\n      if limit_val > 0 and new_total > limit_val then\n        -- Record breach metadata but DO NOT increment\n        redis.call(\"HINCRBY\", K.def_key, \"breach_count\", 1)\n        redis.call(\"HSET\", K.def_key,\n          \"last_breach_at\", now_ms,\n          \"last_breach_dim\", dim,\n          \"last_updated_at\", now_ms)\n        return {1, \"HARD_BREACH\", dim, tostring(current), tostring(hard_limit)}\n      end\n    end\n  end\n\n  -- Phase 2: No hard breach detected \u{2014} safe to increment all dimensions.\n  local breached_soft = nil\n  for i = 1, dim_count do\n    local dim = args[1 + i]\n    local delta = tonumber(args[1 + dim_count + i])\n    local new_val = redis.call(\"HINCRBY\", K.usage_key, dim, delta)\n\n    -- Check soft limit (advisory \u{2014} increment still happens)\n    local soft_limit = redis.call(\"HGET\", K.limits_key, \"soft:\" .. dim)\n    if soft_limit and soft_limit ~= \"\" and soft_limit ~= false then\n      local limit_val = tonumber(soft_limit)\n      if limit_val > 0 and new_val > limit_val then\n        if not breached_soft then\n          breached_soft = dim\n        end\n      end\n    end\n  end\n\n  -- Update metadata\n  redis.call(\"HSET\", K.def_key, \"last_updated_at\", now_ms)\n\n  -- Mark dedup key after successful increment (24h TTL)\n  if dedup_key ~= \"\" then\n    redis.call(\"SET\", dedup_key, \"1\", \"PX\", 86400000)\n  end\n\n  if breached_soft then\n    redis.call(\"HINCRBY\", K.def_key, \"soft_breach_count\", 1)\n    local soft_val = tonumber(redis.call(\"HGET\", K.limits_key, \"soft:\" .. breached_soft) or \"0\")\n    local cur_val = tonumber(redis.call(\"HGET\", K.usage_key, breached_soft) or \"0\")\n    return {1, \"SOFT_BREACH\", breached_soft, tostring(cur_val), tostring(soft_val)}\n  end\n\n  return {1, \"OK\"}\nend)\n\n---------------------------------------------------------------------------\n-- #31  ff_reset_budget  (on {b:M})\n--\n-- Scanner-called periodic reset. Zero all usage fields, record reset,\n-- compute next_reset_at, re-score in reset index.\n--\n-- KEYS (3): budget_def, budget_usage, budget_resets_zset\n-- ARGV (2): budget_id, now_ms\n---------------------------------------------------------------------------\nredis.register_function(\'ff_reset_budget\', function(keys, args)\n  local K = {\n    def_key       = keys[1],\n    usage_key     = keys[2],\n    resets_zset   = keys[3],\n  }\n\n  local A = {\n    budget_id = args[1],\n    now_ms    = args[2],\n  }\n\n  -- 1. Read usage fields and zero them\n  local usage_fields = redis.call(\"HKEYS\", K.usage_key)\n  if #usage_fields > 0 then\n    local zero_args = {}\n    for _, field in ipairs(usage_fields) do\n      zero_args[#zero_args + 1] = field\n      zero_args[#zero_args + 1] = \"0\"\n    end\n    redis.call(\"HSET\", K.usage_key, unpack(zero_args))\n  end\n\n  -- 2. Update budget_def: last_reset_at, reset_count\n  redis.call(\"HINCRBY\", K.def_key, \"reset_count\", 1)\n  redis.call(\"HSET\", K.def_key,\n    \"last_reset_at\", A.now_ms,\n    \"last_updated_at\", A.now_ms,\n    \"last_breach_at\", \"\",\n    \"last_breach_dim\", \"\")\n\n  -- 3. Compute next_reset_at from reset_interval_ms\n  local interval_ms = tonumber(redis.call(\"HGET\", K.def_key, \"reset_interval_ms\") or \"0\")\n  local next_reset_at = \"0\"\n  if interval_ms > 0 then\n    next_reset_at = tostring(tonumber(A.now_ms) + interval_ms)\n    redis.call(\"HSET\", K.def_key, \"next_reset_at\", next_reset_at)\n    redis.call(\"ZADD\", K.resets_zset, tonumber(next_reset_at), A.budget_id)\n  else\n    -- No recurring reset \u{2014} remove from schedule\n    redis.call(\"ZREM\", K.resets_zset, A.budget_id)\n  end\n\n  return ok(next_reset_at)\nend)\n\n---------------------------------------------------------------------------\n-- #29b  ff_unblock_execution  (on {p:N})\n--\n-- Re-evaluate blocked execution, set eligible_now, ZREM blocked set,\n-- ZADD eligible. All 7 dims.\n--\n-- KEYS (3): exec_core, blocked_zset, eligible_zset\n-- ARGV (3): execution_id, now_ms, expected_blocking_reason\n---------------------------------------------------------------------------\nredis.register_function(\'ff_unblock_execution\', function(keys, args)\n  local K = {\n    core_key     = keys[1],\n    blocked_zset = keys[2],\n    eligible_zset = keys[3],\n  }\n\n  local A = {\n    execution_id             = args[1],\n    now_ms                   = args[2],\n    expected_blocking_reason = args[3] or \"\",\n  }\n\n  -- 1. Read execution core\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return err(\"execution_not_found\") end\n  local core = hgetall_to_table(raw)\n\n  -- 2. Must be runnable\n  if core.lifecycle_phase ~= \"runnable\" then\n    return err(\"execution_not_eligible\")\n  end\n\n  -- 3. Must be blocked\n  local es = core.eligibility_state\n  if es ~= \"blocked_by_budget\" and es ~= \"blocked_by_quota\"\n     and es ~= \"blocked_by_route\" and es ~= \"blocked_by_operator\" then\n    return err(\"execution_not_eligible\")\n  end\n\n  -- 4. Validate expected blocking reason (prevent stale unblock)\n  if A.expected_blocking_reason ~= \"\" then\n    if core.blocking_reason ~= A.expected_blocking_reason then\n      return err(\"execution_not_eligible\")\n    end\n  end\n\n  -- 5. Transition: all 7 dims\n  local priority = tonumber(core.priority or \"0\")\n  local created_at = tonumber(core.created_at or \"0\")\n  local score = 0 - (priority * 1000000000000) + created_at\n\n  redis.call(\"HSET\", K.core_key,\n    \"lifecycle_phase\", \"runnable\",\n    \"ownership_state\", \"unowned\",\n    \"eligibility_state\", \"eligible_now\",\n    \"blocking_reason\", \"waiting_for_worker\",\n    \"blocking_detail\", \"\",\n    \"terminal_outcome\", \"none\",\n    \"attempt_state\", core.attempt_state or \"pending_first_attempt\",\n    \"public_state\", \"waiting\",\n    \"last_transition_at\", A.now_ms,\n    \"last_mutation_at\", A.now_ms)\n\n  -- 6. Move from blocked to eligible\n  redis.call(\"ZREM\", K.blocked_zset, A.execution_id)\n  redis.call(\"ZADD\", K.eligible_zset, score, A.execution_id)\n\n  return ok(\"unblocked\")\nend)\n\n---------------------------------------------------------------------------\n-- #29a  ff_block_execution_for_admission  (on {p:N})\n--\n-- Parameterized block: set eligibility/blocking for budget/quota/route/lane\n-- denial, ZREM eligible, ZADD target blocked set. All 7 dims.\n--\n-- KEYS (3): exec_core, eligible_zset, target_blocked_zset\n-- ARGV (4): execution_id, blocking_reason, blocking_detail, now_ms\n---------------------------------------------------------------------------\nredis.register_function(\'ff_block_execution_for_admission\', function(keys, args)\n  local K = {\n    core_key       = keys[1],\n    eligible_zset  = keys[2],\n    blocked_zset   = keys[3],\n  }\n\n  local A = {\n    execution_id    = args[1],\n    blocking_reason = args[2],\n    blocking_detail = args[3] or \"\",\n    now_ms          = args[4],\n  }\n\n  -- 1. Read execution core\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return err(\"execution_not_found\") end\n  local core = hgetall_to_table(raw)\n\n  -- 2. Must be runnable\n  if core.lifecycle_phase ~= \"runnable\" then\n    if core.lifecycle_phase == \"terminal\" then\n      return err(\"execution_not_active\",\n        core.terminal_outcome or \"\",\n        core.current_lease_epoch or \"\",\n        \"terminal\",\n        core.current_attempt_id or \"\")\n    end\n    return err(\"execution_not_eligible\")\n  end\n\n  -- 3. Map blocking_reason to eligibility_state\n  local REASON_TO_ELIGIBILITY = {\n    waiting_for_budget         = \"blocked_by_budget\",\n    waiting_for_quota          = \"blocked_by_quota\",\n    waiting_for_capable_worker = \"blocked_by_route\",\n    paused_by_operator         = \"blocked_by_operator\",\n    paused_by_policy           = \"blocked_by_lane_state\",\n  }\n  local eligibility = REASON_TO_ELIGIBILITY[A.blocking_reason]\n  if not eligibility then\n    return err(\"invalid_blocking_reason\")\n  end\n\n  -- 4. Transition: all 7 dims\n  redis.call(\"HSET\", K.core_key,\n    \"lifecycle_phase\", \"runnable\",\n    \"ownership_state\", \"unowned\",\n    \"eligibility_state\", eligibility,\n    \"blocking_reason\", A.blocking_reason,\n    \"blocking_detail\", A.blocking_detail,\n    \"terminal_outcome\", \"none\",\n    \"attempt_state\", core.attempt_state or \"pending_first_attempt\",\n    \"public_state\", \"rate_limited\",\n    \"last_transition_at\", A.now_ms,\n    \"last_mutation_at\", A.now_ms)\n\n  -- 5. Move from eligible to blocked\n  redis.call(\"ZREM\", K.eligible_zset, A.execution_id)\n  redis.call(\"ZADD\", K.blocked_zset, tonumber(A.now_ms), A.execution_id)\n\n  return ok(\"blocked\")\nend)\n\n\n-- source: lua/quota.lua\n-- FlowFabric quota and rate-limit functions\n-- Reference: RFC-008 (Quota), RFC-010 \u{a7}4.4 (#32)\n--\n-- Depends on helpers: ok, err, is_set\n\n---------------------------------------------------------------------------\n-- ff_create_quota_policy  (on {q:K})\n--\n-- Create a new quota/rate-limit policy.\n-- Idempotent: if EXISTS quota_def \u{2192} return ok_already_satisfied.\n--\n-- KEYS (5): quota_def, quota_window_zset, quota_concurrency_counter,\n--           admitted_set, quota_policies_index\n-- ARGV (5): quota_policy_id, window_seconds, max_requests_per_window,\n--           max_concurrent, now_ms\n---------------------------------------------------------------------------\nredis.register_function(\'ff_create_quota_policy\', function(keys, args)\n  local K = {\n    def_key          = keys[1],\n    window_zset      = keys[2],\n    concurrency_key  = keys[3],\n    admitted_set     = keys[4],\n    policies_index   = keys[5],\n  }\n\n  local A = {\n    quota_policy_id         = args[1],\n    window_seconds          = args[2],\n    max_requests_per_window = args[3],\n    max_concurrent          = args[4],\n    now_ms                  = args[5],\n  }\n\n  -- Idempotency: already exists \u{2192} return immediately\n  if redis.call(\"EXISTS\", K.def_key) == 1 then\n    return ok_already_satisfied(A.quota_policy_id)\n  end\n\n  -- HSET quota definition\n  redis.call(\"HSET\", K.def_key,\n    \"quota_policy_id\", A.quota_policy_id,\n    \"requests_per_window_seconds\", A.window_seconds,\n    \"max_requests_per_window\", A.max_requests_per_window,\n    \"active_concurrency_cap\", A.max_concurrent,\n    \"created_at\", A.now_ms)\n\n  -- Init concurrency counter to 0\n  redis.call(\"SET\", K.concurrency_key, \"0\")\n\n  -- Register in partition-level policy index (for cluster-safe discovery)\n  redis.call(\"SADD\", K.policies_index, A.quota_policy_id)\n\n  -- admitted_set + quota_window_zset left empty (populated on admission)\n\n  return ok(A.quota_policy_id)\nend)\n\n---------------------------------------------------------------------------\n-- #32  ff_check_admission_and_record  (on {q:K})\n--\n-- Idempotent sliding-window rate check + concurrency check.\n-- If admitted: ZADD window, SET NX guard, optional INCR concurrency,\n-- SADD to admitted_set (for cluster-safe reconciler discovery).\n--\n-- KEYS (5): window_zset, concurrency_counter, quota_def, admitted_guard_key,\n--           admitted_set\n-- ARGV (6): now_ms, window_seconds, rate_limit, concurrency_cap,\n--           execution_id, jitter_ms\n---------------------------------------------------------------------------\nredis.register_function(\'ff_check_admission_and_record\', function(keys, args)\n  local K = {\n    window_zset        = keys[1],\n    concurrency_key    = keys[2],\n    quota_def          = keys[3],\n    admitted_guard_key = keys[4],\n    admitted_set       = keys[5],\n  }\n\n  local now_ms_n          = require_number(args[1], \"now_ms\")\n  if type(now_ms_n) == \"table\" then return now_ms_n end\n  local window_seconds_n  = require_number(args[2], \"window_seconds\")\n  if type(window_seconds_n) == \"table\" then return window_seconds_n end\n  local rate_limit_n      = require_number(args[3], \"rate_limit\")\n  if type(rate_limit_n) == \"table\" then return rate_limit_n end\n  local concurrency_cap_n = require_number(args[4], \"concurrency_cap\")\n  if type(concurrency_cap_n) == \"table\" then return concurrency_cap_n end\n\n  local A = {\n    now_ms          = now_ms_n,\n    window_seconds  = window_seconds_n,\n    rate_limit      = rate_limit_n,\n    concurrency_cap = concurrency_cap_n,\n    execution_id    = args[5],\n    jitter_ms       = tonumber(args[6] or \"0\"),\n  }\n\n  local window_ms = A.window_seconds * 1000\n\n  -- 1. Idempotency guard: already admitted in this window?\n  if redis.call(\"EXISTS\", K.admitted_guard_key) == 1 then\n    return { \"ALREADY_ADMITTED\" }\n  end\n\n  -- 2. Sliding window: remove expired entries\n  redis.call(\"ZREMRANGEBYSCORE\", K.window_zset, \"-inf\", A.now_ms - window_ms)\n\n  -- 3. Check rate limit\n  if A.rate_limit > 0 then\n    local current_count = redis.call(\"ZCARD\", K.window_zset)\n    if current_count >= A.rate_limit then\n      -- Compute retry_after from oldest entry\n      local oldest = redis.call(\"ZRANGE\", K.window_zset, 0, 0, \"WITHSCORES\")\n      local retry_after_ms = 0\n      if #oldest >= 2 then\n        retry_after_ms = tonumber(oldest[2]) + window_ms - A.now_ms\n        if retry_after_ms < 0 then retry_after_ms = 0 end\n      end\n      local jitter = 0\n      if A.jitter_ms > 0 then\n        jitter = math.random(0, A.jitter_ms)\n      end\n      return { \"RATE_EXCEEDED\", tostring(retry_after_ms + jitter) }\n    end\n  end\n\n  -- 4. Check concurrency cap\n  if A.concurrency_cap > 0 then\n    local active = tonumber(redis.call(\"GET\", K.concurrency_key) or \"0\")\n    if active >= A.concurrency_cap then\n      return { \"CONCURRENCY_EXCEEDED\" }\n    end\n  end\n\n  -- 5. Admit: record in sliding window (execution_id as member \u{2014} idempotent)\n  redis.call(\"ZADD\", K.window_zset, A.now_ms, A.execution_id)\n\n  -- 6. Set admitted guard key with TTL = window size\n  -- Guard: PX 0 or PX <0 causes Valkey error inside Lua (after ZADD committed).\n  if window_ms > 0 then\n    redis.call(\"SET\", K.admitted_guard_key, \"1\", \"PX\", window_ms, \"NX\")\n  end\n\n  -- 7. Increment concurrency counter if cap is set\n  if A.concurrency_cap > 0 then\n    redis.call(\"INCR\", K.concurrency_key)\n  end\n\n  -- 8. Track in admitted set (for cluster-safe reconciler \u{2014} replaces SCAN)\n  redis.call(\"SADD\", K.admitted_set, A.execution_id)\n\n  return { \"ADMITTED\" }\nend)\n\n---------------------------------------------------------------------------\n-- ff_release_admission  (on {q:K})\n--\n-- Release a previously-recorded admission slot. Called when a claim grant\n-- fails after admission was recorded, preventing leaked concurrency slots.\n--\n-- KEYS (3): admitted_guard_key, admitted_set, concurrency_counter\n-- ARGV (1): execution_id\n---------------------------------------------------------------------------\nredis.register_function(\'ff_release_admission\', function(keys, args)\n  local K = {\n    admitted_guard_key = keys[1],\n    admitted_set       = keys[2],\n    concurrency_key    = keys[3],\n  }\n\n  local execution_id = args[1]\n\n  -- 1. Delete the guard key (idempotent \u{2014} DEL returns 0 if absent)\n  redis.call(\"DEL\", K.admitted_guard_key)\n\n  -- 2. Remove from admitted set\n  redis.call(\"SREM\", K.admitted_set, execution_id)\n\n  -- 3. Decrement concurrency counter (floor at 0)\n  local current = tonumber(redis.call(\"GET\", K.concurrency_key) or \"0\")\n  if current > 0 then\n    redis.call(\"DECR\", K.concurrency_key)\n  end\n\n  return {1, \"OK\", \"released\"}\nend)\n\n\n-- source: lua/flow.lua\n-- FlowFabric flow coordination and dependency functions\n-- Reference: RFC-007 (Flow), RFC-010 \u{a7}4.1 (#22-24, #35), \u{a7}4.2 (#29)\n--\n-- Depends on helpers: ok, err, is_set, hgetall_to_table\n\n---------------------------------------------------------------------------\n-- Cycle detection helper\n---------------------------------------------------------------------------\n\n-- Max nodes to visit during cycle detection BFS.\nlocal MAX_CYCLE_CHECK_NODES = 1000\n\n-- Detect if adding an edge upstream\u{2192}downstream would create a cycle.\n-- BFS from downstream through existing outgoing edges: if upstream is\n-- reachable, the new edge closes a loop (A\u{2192}B\u{2192}C\u{2192}A deadlock).\n-- All keys share the same {fp:N} slot (flow-partition co-location).\n-- @param flow_prefix  e.g. \"ff:flow:{fp:0}:<flow_id>\"\n-- @param start_eid    downstream of proposed edge (BFS start)\n-- @param target_eid   upstream of proposed edge (looking for this)\n-- @return true if a cycle would be created\nlocal function detect_cycle(flow_prefix, start_eid, target_eid)\n  local visited = {}\n  local queue = {start_eid}\n  local count = 0\n\n  while #queue > 0 do\n    local next_queue = {}\n    for _, eid in ipairs(queue) do\n      if eid == target_eid then\n        return true\n      end\n      if not visited[eid] then\n        visited[eid] = true\n        count = count + 1\n        if count > MAX_CYCLE_CHECK_NODES then\n          return true  -- graph too large to verify; reject conservatively\n        end\n        local out_key = flow_prefix .. \":out:\" .. eid\n        local edges = redis.call(\"SMEMBERS\", out_key)\n        for _, edge_id in ipairs(edges) do\n          local edge_key = flow_prefix .. \":edge:\" .. edge_id\n          local next_eid = redis.call(\"HGET\", edge_key, \"downstream_execution_id\")\n          if next_eid and next_eid ~= \"\" and not visited[next_eid] then\n            next_queue[#next_queue + 1] = next_eid\n          end\n        end\n      end\n    end\n    queue = next_queue\n  end\n\n  return false\nend\n\n---------------------------------------------------------------------------\n-- ff_create_flow  (on {fp:N})\n--\n-- Create a new flow container. Idempotent: if flow_core already exists,\n-- returns ok_already_satisfied.\n--\n-- KEYS (3): flow_core, members_set, flow_index\n-- ARGV (4): flow_id, flow_kind, namespace, now_ms (IGNORED \u{2014} see note below)\n--\n-- NOTE: ARGV[4] (`now_ms`) is accepted for caller compatibility but NOT\n-- used for stored timestamps. We read server time via redis.call(\"TIME\")\n-- so created_at / last_mutation_at agree with fields written by other\n-- Lua functions (ff_complete_execution etc.) under client clock skew.\n---------------------------------------------------------------------------\nredis.register_function(\'ff_create_flow\', function(keys, args)\n  local K = {\n    flow_core   = keys[1],\n    members_set = keys[2],\n    flow_index  = keys[3],\n  }\n\n  local A = {\n    flow_id   = args[1],\n    flow_kind = args[2],\n    namespace = args[3],\n    -- args[4] is client-provided now_ms; intentionally ignored.\n  }\n\n  -- Server time (not client-provided) so created_at / last_mutation_at\n  -- agree with timestamps written by ff_complete_execution and peers.\n  local now_ms = server_time_ms()\n\n  -- Maintain flow_index BEFORE the idempotency guard. SADD is itself\n  -- idempotent (no-op on existing members), and hoisting it heals any\n  -- pre-existing flow_core that was created before this index was\n  -- introduced \u{2014} no migration script required.\n  redis.call(\"SADD\", K.flow_index, A.flow_id)\n\n  -- Idempotency: if flow already exists, return already_satisfied\n  if redis.call(\"EXISTS\", K.flow_core) == 1 then\n    return ok_already_satisfied(A.flow_id)\n  end\n\n  -- Create flow core record\n  redis.call(\"HSET\", K.flow_core,\n    \"flow_id\", A.flow_id,\n    \"flow_kind\", A.flow_kind,\n    \"namespace\", A.namespace,\n    \"graph_revision\", 0,\n    \"node_count\", 0,\n    \"edge_count\", 0,\n    \"public_flow_state\", \"open\",\n    \"created_at\", now_ms,\n    \"last_mutation_at\", now_ms)\n\n  return ok(A.flow_id)\nend)\n\n---------------------------------------------------------------------------\n-- ff_add_execution_to_flow  (on {fp:N} \u{2014} single atomic FCALL)\n--\n-- Add a member execution to a flow AND stamp the flow_id back-pointer\n-- on exec_core in one atomic commit. Per RFC-011 \u{a7}7.3, exec keys\n-- co-locate with their parent flow\'s partition under hash-tag routing,\n-- so exec_core shares the `{fp:N}` hash-tag with flow_core / members_set\n-- / flow_index. All four KEYS hash to the same slot; no CROSSSLOT.\n--\n-- KEYS (4): flow_core, members_set, flow_index, exec_core\n-- ARGV (3): flow_id, execution_id, now_ms (IGNORED \u{2014} server time used)\n--\n-- Validates-before-writing: flow_not_found / flow_already_terminal\n-- early-returns fire BEFORE any write (step 1 below). On those error\n-- paths, zero state mutates \u{2014} atomicity by construction at the Lua\n-- level (Valkey scripting contract: no redis.call() before error_reply\n-- means nothing to roll back). See RFC-011 \u{a7}7.3.1 tests for the\n-- structural pin.\n--\n-- Invariant (post-RFC-011): a successful call commits BOTH the flow-\n-- index updates AND the exec_core.flow_id stamp in one atomic unit.\n-- Readers can assume exec_core.flow_id == flow_id iff the exec is in\n-- members_set. The pre-RFC-011 two-phase contract + \u{a7}5.5 orphan-window\n-- + issue #21 reconciliation-scanner plan are all superseded.\n---------------------------------------------------------------------------\nredis.register_function(\'ff_add_execution_to_flow\', function(keys, args)\n  local K = {\n    flow_core   = keys[1],\n    members_set = keys[2],\n    flow_index  = keys[3],\n    exec_core   = keys[4],\n  }\n\n  local A = {\n    flow_id      = args[1],\n    execution_id = args[2],\n    -- args[3] is client-provided now_ms; intentionally ignored in favour\n    -- of redis.call(\"TIME\") to keep last_mutation_at consistent with\n    -- timestamps stamped by ff_complete_execution and peers.\n  }\n\n  local now_ms = server_time_ms()\n\n  -- 1. Validate flow exists and is not terminal, and execution exists.\n  --    Validates-before-writing: no redis.call() writes happen before\n  --    these guards, so the error paths commit zero state (symmetric\n  --    with step 2\'s cross-flow guard on AlreadyMember).\n  local raw = redis.call(\"HGETALL\", K.flow_core)\n  if #raw == 0 then return err(\"flow_not_found\") end\n  local flow = hgetall_to_table(raw)\n  local pfs = flow.public_flow_state or \"\"\n  if pfs == \"cancelled\" or pfs == \"completed\" or pfs == \"failed\" then\n    return err(\"flow_already_terminal\")\n  end\n  -- Execution must exist \u{2014} otherwise step 5\'s HSET exec_core would\n  -- silently create a hash for a non-existent exec, leading to an\n  -- inconsistent members_set \u{2194} exec_core state. Symmetric with the\n  -- flow_not_found guard above.\n  if redis.call(\"EXISTS\", K.exec_core) == 0 then\n    return err(\"execution_not_found\")\n  end\n\n  -- Self-heal flow_index for LIVE flows only. The projector may have\n  -- SREMd this flow after observing an all-terminal sample, yet the\n  -- flow is still \"open\" per flow_core and can accept new members.\n  -- Re-add idempotently so the next projector cycle picks the flow\n  -- back up. Runs only after the terminal-state guard above so we do\n  -- not resurrect cancelled/completed/failed flows into the active\n  -- index. Same {fp:N} slot as the other KEYS, so atomic with the\n  -- membership mutation below.\n  redis.call(\"SADD\", K.flow_index, A.flow_id)\n\n  -- 2. Idempotency: already a member of THIS flow\'s members_set.\n  --    Still stamp exec_core.flow_id defensively \u{2014} an earlier call\n  --    may have committed members_set but crashed before exec_core\n  --    HSET under the legacy two-phase shape. Stamping here is a\n  --    no-op if flow_id already matches; heals any pre-RFC-011\n  --    orphans encountered on a rolling upgrade.\n  --\n  --    Cross-flow guard on the orphan case: if exec_core.flow_id is\n  --    already set to a DIFFERENT flow (corrupted-state orphan that\n  --    IS in this flow\'s members_set but stamped wrong), refuse\n  --    instead of silently re-stamping. Symmetric with step 3\'s\n  --    guard on the not-yet-a-member branch \u{2014} catches the same\n  --    invariant violation earlier in the path. Empty existing\n  --    flow_id goes through the heal path normally.\n  if redis.call(\"SISMEMBER\", K.members_set, A.execution_id) == 1 then\n    local existing = redis.call(\"HGET\", K.exec_core, \"flow_id\")\n    if existing and existing ~= \"\" and existing ~= A.flow_id then\n      return err(\"already_member_of_different_flow:\" .. existing)\n    end\n    redis.call(\"HSET\", K.exec_core, \"flow_id\", A.flow_id)\n    local nc = redis.call(\"HGET\", K.flow_core, \"node_count\") or \"0\"\n    return ok_already_satisfied(A.execution_id, nc)\n  end\n\n  -- 3. Cross-flow guard: if exec_core.flow_id is already set to a\n  --    DIFFERENT flow, refuse \u{2014} silently re-stamping would orphan\n  --    the other flow\'s accounting. An exec belongs to at most one\n  --    flow at a time per RFC-007.\n  local existing_flow_id = redis.call(\"HGET\", K.exec_core, \"flow_id\")\n  if existing_flow_id and existing_flow_id ~= \"\" and existing_flow_id ~= A.flow_id then\n    return err(\"already_member_of_different_flow:\" .. existing_flow_id)\n  end\n\n  -- 4. Add to membership set\n  redis.call(\"SADD\", K.members_set, A.execution_id)\n\n  -- 5. Stamp the flow_id back-pointer on exec_core. Co-located with\n  --    the flow\'s partition under RFC-011 \u{a7}7.3 hash-tag routing; this\n  --    HSET is part of the same atomic FCALL as the SADD above.\n  redis.call(\"HSET\", K.exec_core, \"flow_id\", A.flow_id)\n\n  -- 6. Increment node_count and graph_revision\n  local new_nc = redis.call(\"HINCRBY\", K.flow_core, \"node_count\", 1)\n  local new_rev = redis.call(\"HINCRBY\", K.flow_core, \"graph_revision\", 1)\n  redis.call(\"HSET\", K.flow_core, \"last_mutation_at\", now_ms)\n\n  return ok(A.execution_id, tostring(new_nc))\nend)\n\n---------------------------------------------------------------------------\n-- ff_cancel_flow  (on {fp:N})\n--\n-- Cancel a flow. Returns the member list for the caller to dispatch\n-- individual cancellations cross-partition.\n--\n-- KEYS (3): flow_core, members_set, flow_index (RESERVED \u{2014} see below)\n-- ARGV (4): flow_id, reason, cancellation_policy, now_ms (IGNORED \u{2014}\n--   server time used so `cancelled_at` agrees with peer Lua fields)\n--\n-- KEYS[3] (flow_index) is accepted for caller-compatibility with the\n-- shared FlowStructOpKeys wrapper, but this function does NOT mutate\n-- flow_index. The projector is the sole SREM writer (see the \"4b\" note\n-- in the body below).\n---------------------------------------------------------------------------\nredis.register_function(\'ff_cancel_flow\', function(keys, args)\n  local K = {\n    flow_core         = keys[1],\n    members_set       = keys[2],\n    -- keys[3] is flow_index; present in KEYS for wrapper symmetry but\n    -- unused in this function (see rationale near the end of the body).\n    pending_cancels   = keys[4],  -- SET populated only on first terminalization\n    cancel_backlog    = keys[5],  -- per-fp ZSET tracking flows owing members\n  }\n\n  local A = {\n    flow_id              = args[1],\n    reason               = args[2],\n    cancellation_policy  = args[3],\n    -- args[4] is client-provided now_ms; intentionally ignored.\n  }\n\n  -- grace_ms must be a finite non-negative integer. Same guard as\n  -- ff_rotate_waitpoint_hmac_secret: reject NaN, \u{b1}inf, negative,\n  -- non-integer, or >2^53-1. math.floor alone doesn\'t catch\n  -- infinities (math.floor(math.huge) == math.huge) which would\n  -- stamp \"inf\" into cancel_backlog and permanently poison the entry.\n  -- Default 30000 (30s) if the arg is omitted or empty string.\n  local grace_ms\n  if args[5] == nil or args[5] == \"\" then\n    grace_ms = 30000\n  else\n    local g = tonumber(args[5])\n    if not g\n        or g ~= g                          -- NaN\n        or g < 0\n        or g > 9007199254740991            -- 2^53 - 1\n        or g ~= math.floor(g) then\n      return err(\"invalid_grace_ms\")\n    end\n    grace_ms = g\n  end\n  A.grace_ms = grace_ms\n\n  local now_ms = server_time_ms()\n\n  -- 1. Validate flow exists\n  local raw = redis.call(\"HGETALL\", K.flow_core)\n  if #raw == 0 then return err(\"flow_not_found\") end\n  local flow = hgetall_to_table(raw)\n\n  -- 2. Check not already terminal\n  local pfs = flow.public_flow_state or \"\"\n  if pfs == \"cancelled\" or pfs == \"completed\" or pfs == \"failed\" then\n    return err(\"flow_already_terminal\")\n  end\n\n  -- 3. Get all member execution IDs\n  local members = redis.call(\"SMEMBERS\", K.members_set)\n\n  -- 4. Update flow state\n  -- cancellation_policy is persisted so an AlreadyTerminal retry can\n  -- return the authoritative stored policy instead of echoing the\n  -- caller\'s retry intent.\n  --\n  -- NOTE: this field is persisted from this library version onward.\n  -- Flows cancelled before this deploy reach public_flow_state=\'cancelled\'\n  -- without a cancellation_policy value. The Rust caller detects the\n  -- empty field on HMGET and falls back to args.cancellation_policy, so\n  -- no backfill migration is needed.\n  redis.call(\"HSET\", K.flow_core,\n    \"public_flow_state\", \"cancelled\",\n    \"cancelled_at\", now_ms,\n    \"cancel_reason\", A.reason,\n    \"cancellation_policy\", A.cancellation_policy,\n    \"last_mutation_at\", now_ms)\n\n  -- Do NOT SREM flow_index here. Member cancellations dispatch\n  -- asynchronously from ff-server; flow_projector needs to keep\n  -- projecting the flow while those cancels land so the summary\n  -- reflects the real progression (running/blocked \u{2192} cancelled). The\n  -- projector owns the SREM once it observes sampled==true_total\n  -- all-terminal (see crates/ff-engine/src/scanner/flow_projector.rs).\n  -- A projector-owned SREM is also the right place because it is\n  -- the only writer that can prove every member has actually reached\n  -- terminal state. Removing the entry here would freeze the summary\n  -- at whatever snapshot was current when cancel_flow fired.\n\n  -- 5. Durable backlog for async member-cancel dispatch.\n  --\n  -- Only cancel_all policy dispatches per-member cancels; other policies\n  -- mark the flow terminal and leave members to the flow_projector /\n  -- retention. Skip the backlog writes outside cancel_all.\n  --\n  -- If a process crashes between `CancellationScheduled` returning and\n  -- the in-process dispatch finishing, OR if one member\'s cancel hits a\n  -- permanent error that the bounded retry can\'t recover, the member\n  -- would otherwise escape cancellation. Tracking the owed members in\n  -- a persistent SET + partition-level ZSET lets the cancel_reconciler\n  -- scanner drain the remainder on its interval.\n  --\n  -- Score = now + grace_ms so the reconciler doesn\'t race the live\n  -- dispatch that\'s about to start \u{2014} live dispatch SREMs as it\n  -- succeeds; reconciler only picks up flows whose grace has elapsed.\n  if A.cancellation_policy == \"cancel_all\" and #members > 0 then\n    -- SADD chunked: Lua\'s unpack() arg limit is ~8000 on some builds\n    -- and some Valkey deployments enforce max-args lower. Chunk at\n    -- 1000 to stay well under both without a noticeable cost.\n    local i = 1\n    while i <= #members do\n      local chunk_end = math.min(i + 999, #members)\n      local sadd_args = {}\n      for j = i, chunk_end do\n        sadd_args[#sadd_args + 1] = members[j]\n      end\n      redis.call(\"SADD\", K.pending_cancels, unpack(sadd_args))\n      i = chunk_end + 1\n    end\n    redis.call(\"ZADD\", K.cancel_backlog, now_ms + A.grace_ms, A.flow_id)\n  end\n\n  -- 6. Return: ok(cancellation_policy, member1, member2, ...)\n  -- Build array manually to include variable member list.\n  local result = {1, \"OK\", A.cancellation_policy}\n  for _, eid in ipairs(members) do\n    result[#result + 1] = eid\n  end\n\n  return result\nend)\n\n---------------------------------------------------------------------------\n-- ff_ack_cancel_member  (on {fp:N})\n--\n-- Record that one flow member\'s cancel has been committed. Called by\n-- the live dispatch after each successful cancel_member_execution AND\n-- by the cancel_reconciler scanner after it catches up on crash-\n-- orphaned members. Atomically SREMs from pending_cancels and ZREMs\n-- the flow from the partition backlog when the set is empty.\n--\n-- KEYS (2): pending_cancels, cancel_backlog\n-- ARGV (2): eid, flow_id\n---------------------------------------------------------------------------\nredis.register_function(\'ff_ack_cancel_member\', function(keys, args)\n  local pending = keys[1]\n  local backlog = keys[2]\n  local eid     = args[1]\n  local flow_id = args[2]\n\n  redis.call(\"SREM\", pending, eid)\n  if redis.call(\"EXISTS\", pending) == 0 then\n    redis.call(\"ZREM\", backlog, flow_id)\n  end\n\n  return ok()\nend)\n\n---------------------------------------------------------------------------\n-- #29  ff_stage_dependency_edge  (on {fp:N})\n--\n-- Validate membership + topology, check graph_revision, create edge,\n-- increment graph_revision.\n--\n-- KEYS (6): flow_core, members_set, edge_hash, out_adj_set, in_adj_set,\n--           grant_hash\n-- ARGV (8): flow_id, edge_id, upstream_eid, downstream_eid,\n--           dependency_kind, data_passing_ref, expected_graph_revision,\n--           now_ms\n---------------------------------------------------------------------------\nredis.register_function(\'ff_stage_dependency_edge\', function(keys, args)\n  local K = {\n    flow_core    = keys[1],\n    members_set  = keys[2],\n    edge_hash    = keys[3],\n    out_adj_set  = keys[4],\n    in_adj_set   = keys[5],\n    grant_hash   = keys[6],\n  }\n\n  local A = {\n    flow_id                  = args[1],\n    edge_id                  = args[2],\n    upstream_eid             = args[3],\n    downstream_eid           = args[4],\n    dependency_kind          = args[5] or \"success_only\",\n    data_passing_ref         = args[6] or \"\",\n    expected_graph_revision  = args[7],\n    now_ms                   = args[8],\n  }\n\n  -- 1. Reject self-referencing edges\n  if A.upstream_eid == A.downstream_eid then\n    return err(\"self_referencing_edge\")\n  end\n\n  -- 2. Read flow core\n  local raw = redis.call(\"HGETALL\", K.flow_core)\n  if #raw == 0 then return err(\"flow_not_found\") end\n  local flow = hgetall_to_table(raw)\n\n  -- 2b. Reject mutations on terminal flows\n  local pfs = flow.public_flow_state or \"\"\n  if pfs == \"cancelled\" or pfs == \"completed\" or pfs == \"failed\" then\n    return err(\"flow_already_terminal\")\n  end\n\n  -- 3. Check graph_revision\n  if tostring(flow.graph_revision or \"0\") ~= A.expected_graph_revision then\n    return err(\"stale_graph_revision\")\n  end\n\n  -- 4. Verify both executions are members\n  if redis.call(\"SISMEMBER\", K.members_set, A.upstream_eid) == 0 then\n    return err(\"execution_not_in_flow\")\n  end\n  if redis.call(\"SISMEMBER\", K.members_set, A.downstream_eid) == 0 then\n    return err(\"execution_not_in_flow\")\n  end\n\n  -- 4b. Transitive cycle detection: walk from downstream through outgoing\n  -- edges to check if upstream is reachable (A\u{2192}B\u{2192}C\u{2192}A deadlock prevention).\n  local flow_prefix = string.sub(K.flow_core, 1, -6)  -- strip \":core\"\n  if detect_cycle(flow_prefix, A.downstream_eid, A.upstream_eid) then\n    return err(\"cycle_detected\")\n  end\n\n  -- 5. Check edge doesn\'t already exist\n  if redis.call(\"EXISTS\", K.edge_hash) == 1 then\n    return err(\"dependency_already_exists\")\n  end\n\n  -- 6. Create edge record\n  redis.call(\"HSET\", K.edge_hash,\n    \"edge_id\", A.edge_id,\n    \"flow_id\", A.flow_id,\n    \"upstream_execution_id\", A.upstream_eid,\n    \"downstream_execution_id\", A.downstream_eid,\n    \"dependency_kind\", A.dependency_kind,\n    \"satisfaction_condition\", \"all_required\",\n    \"data_passing_ref\", A.data_passing_ref,\n    \"edge_state\", \"pending\",\n    \"created_at\", A.now_ms,\n    \"created_by\", \"engine\")\n\n  -- 7. Update adjacency sets\n  redis.call(\"SADD\", K.out_adj_set, A.edge_id)\n  redis.call(\"SADD\", K.in_adj_set, A.edge_id)\n\n  -- 8. Increment graph_revision and edge_count\n  local new_rev = redis.call(\"HINCRBY\", K.flow_core, \"graph_revision\", 1)\n  redis.call(\"HINCRBY\", K.flow_core, \"edge_count\", 1)\n  redis.call(\"HSET\", K.flow_core, \"last_mutation_at\", A.now_ms)\n\n  return ok(A.edge_id, tostring(new_rev))\nend)\n\n---------------------------------------------------------------------------\n-- RFC-016 #set_edge_group_policy  (on {fp:N})\n--\n-- Declare the inbound-edge-group policy for a downstream execution.\n-- Stage B accepts `all_of`, `any_of`, and `quorum`. Must be called\n-- BEFORE the first `add_dependency` for this downstream (enforced via\n-- the `n > 0` guard below).\n--\n-- KEYS (2): flow_core, edgegroup\n-- ARGV (4): policy_variant, on_satisfied, k, now_ms\n---------------------------------------------------------------------------\nredis.register_function(\'ff_set_edge_group_policy\', function(keys, args)\n  local flow_core = keys[1]\n  local edgegroup = keys[2]\n  local policy_variant = args[1]\n  local on_satisfied = args[2] or \"\"\n  local k_raw = args[3] or \"0\"\n  local now_ms = args[4]\n\n  if redis.call(\"EXISTS\", flow_core) == 0 then\n    return err(\"flow_not_found\")\n  end\n\n  if policy_variant ~= \"all_of\"\n     and policy_variant ~= \"any_of\"\n     and policy_variant ~= \"quorum\" then\n    return err(\"invalid_input\",\n      \"unsupported_policy_variant: \" .. tostring(policy_variant))\n  end\n\n  local k_num = tonumber(k_raw) or 0\n  if policy_variant == \"quorum\" then\n    if k_num < 1 then\n      return err(\"invalid_input\", \"quorum k must be >= 1\")\n    end\n  end\n\n  if (policy_variant == \"any_of\" or policy_variant == \"quorum\")\n     and on_satisfied ~= \"cancel_remaining\"\n     and on_satisfied ~= \"let_run\" then\n    return err(\"invalid_input\",\n      \"on_satisfied required for any_of/quorum: cancel_remaining | let_run\")\n  end\n\n  -- Ordering: reject if edges have already been staged for this\n  -- downstream (the group hash is populated either by this call or by\n  -- ff_apply_dependency_to_child\'s first-edge write).\n  local existing_n = tonumber(redis.call(\"HGET\", edgegroup, \"n\") or \"0\")\n  if existing_n > 0 then\n    return err(\"invalid_input\",\n      \"edge_group_policy_already_fixed: dependencies already staged\")\n  end\n\n  local existing_variant = redis.call(\"HGET\", edgegroup, \"policy_variant\")\n  if existing_variant == policy_variant then\n    -- Idempotent on identical restate (ignoring on_satisfied / k\n    -- drift; Stage B trusts the caller not to flip those once set).\n    return ok(\"already_set\")\n  end\n\n  redis.call(\"HSET\", edgegroup,\n    \"policy_variant\", policy_variant,\n    \"on_satisfied\", on_satisfied,\n    \"k\", tostring(k_num),\n    \"n\", \"0\",\n    \"succeeded\", \"0\",\n    \"failed\", \"0\",\n    \"skipped\", \"0\",\n    \"group_state\", \"pending\",\n    \"created_at\", now_ms)\n\n  return ok(\"set\")\nend)\n\n---------------------------------------------------------------------------\n-- #22  ff_apply_dependency_to_child  (on {p:N})\n--\n-- Create dep record on child execution partition, increment unsatisfied\n-- count. If child is runnable: set blocked_by_dependencies.\n--\n-- KEYS (8): exec_core, deps_meta, unresolved_set, dep_hash,\n--           eligible_zset, blocked_deps_zset, deps_all_edges, edgegroup\n-- ARGV (7): flow_id, edge_id, upstream_eid, graph_revision,\n--           dependency_kind, data_passing_ref, now_ms\n---------------------------------------------------------------------------\nredis.register_function(\'ff_apply_dependency_to_child\', function(keys, args)\n  local K = {\n    core_key         = keys[1],\n    deps_meta        = keys[2],\n    unresolved_set   = keys[3],\n    dep_hash         = keys[4],\n    eligible_zset    = keys[5],\n    blocked_deps_zset = keys[6],\n    deps_all_edges   = keys[7],\n    edgegroup        = keys[8],\n  }\n\n  local A = {\n    flow_id          = args[1],\n    edge_id          = args[2],\n    upstream_eid     = args[3],\n    graph_revision   = args[4],\n    dependency_kind  = args[5] or \"success_only\",\n    data_passing_ref = args[6] or \"\",\n    now_ms           = args[7],\n  }\n\n  -- 1. Read execution core\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return err(\"execution_not_found\") end\n  local core = hgetall_to_table(raw)\n\n  -- 2. Validate flow membership (RFC-007 assert_flow_membership)\n  if is_set(core.flow_id) and core.flow_id ~= A.flow_id then\n    return err(\"execution_already_in_flow\")\n  end\n\n  -- 3. Idempotency: dep already applied\n  if redis.call(\"EXISTS\", K.dep_hash) == 1 then\n    return ok(\"already_applied\")\n  end\n\n  -- 4. Create dep record\n  redis.call(\"HSET\", K.dep_hash,\n    \"edge_id\", A.edge_id,\n    \"flow_id\", A.flow_id,\n    \"upstream_execution_id\", A.upstream_eid,\n    \"downstream_execution_id\", core.execution_id or \"\",\n    \"dependency_kind\", A.dependency_kind,\n    \"state\", \"unsatisfied\",\n    \"data_passing_ref\", A.data_passing_ref,\n    \"last_resolved_at\", \"\")\n\n  -- 5. Update deps:meta\n  redis.call(\"SADD\", K.unresolved_set, A.edge_id)\n  -- Register edge in the per-execution all-edges index (cluster-safe\n  -- retention discovery; retained across resolve, purged wholesale on\n  -- retention trim).\n  redis.call(\"SADD\", K.deps_all_edges, A.edge_id)\n  local unresolved = redis.call(\"HINCRBY\", K.deps_meta, \"unsatisfied_required_count\", 1)\n  redis.call(\"HSET\", K.deps_meta,\n    \"flow_id\", A.flow_id,\n    \"last_flow_graph_revision\", A.graph_revision,\n    \"last_dependency_update_at\", A.now_ms)\n\n  -- RFC-016 Stage A: dual-write the inbound edge-group hash. Default\n  -- policy is `all_of` when no explicit policy has been set (the\n  -- expected Stage A case for all existing flows). The edgegroup hash\n  -- becomes the source of truth for the AllOf counters in snapshots;\n  -- existing flows without a hash fall back to deps_meta on read.\n  if is_set(K.edgegroup) then\n    local existing_policy = redis.call(\"HGET\", K.edgegroup, \"policy_variant\")\n    if not existing_policy then\n      redis.call(\"HSET\", K.edgegroup,\n        \"policy_variant\", \"all_of\",\n        \"n\", \"1\",\n        \"succeeded\", \"0\",\n        \"group_state\", \"pending\")\n    else\n      redis.call(\"HINCRBY\", K.edgegroup, \"n\", 1)\n    end\n  end\n\n  -- 6. If runnable: block by dependencies (ALL 7 dims)\n  if core.lifecycle_phase == \"runnable\" and core.terminal_outcome == \"none\" then\n    redis.call(\"HSET\", K.core_key,\n      \"lifecycle_phase\", core.lifecycle_phase,             -- preserve\n      \"ownership_state\", core.ownership_state or \"unowned\", -- preserve\n      \"eligibility_state\", \"blocked_by_dependencies\",\n      \"blocking_reason\", \"waiting_for_children\",\n      \"blocking_detail\", unresolved .. \" dep(s) unresolved incl \" .. A.edge_id,\n      \"terminal_outcome\", \"none\",                          -- preserve\n      \"attempt_state\", core.attempt_state or \"pending_first_attempt\", -- preserve\n      \"public_state\", \"waiting_children\",\n      \"last_transition_at\", A.now_ms,\n      \"last_mutation_at\", A.now_ms)\n    redis.call(\"ZREM\", K.eligible_zset, core.execution_id or \"\")\n    redis.call(\"ZADD\", K.blocked_deps_zset,\n      tonumber(core.created_at or \"0\"), core.execution_id or \"\")\n  end\n\n  return ok(tostring(unresolved))\nend)\n\n---------------------------------------------------------------------------\n-- RFC-016 Stage B quorum helper (AnyOf / Quorum edge-group evaluator).\n--\n-- Owns the downstream transition for non-AllOf edge groups. Called\n-- from `ff_resolve_dependency` when the edgegroup hash\'s\n-- `policy_variant` is `any_of` or `quorum`.\n--\n-- Four-counter model (\u{a7}3):\n--   succeeded  \u{2014} upstream terminal = success\n--   failed     \u{2014} upstream terminal = failed | cancelled | expired\n--   skipped    \u{2014} upstream terminal = skipped\n--   n          \u{2014} frozen total inbound edges in the group\n--\n-- Eligibility evaluation:\n--   AnyOf (k=1)   satisfied iff succeeded >= 1\n--   Quorum(k)     satisfied iff succeeded >= k\n--                 impossible iff failed + skipped > n - k\n--\n-- Once-fired semantics (Invariant Q1): after `group_state != pending`,\n-- counters still update (telemetry) but the downstream is NOT\n-- retriggered and siblings are NOT re-flagged.\n--\n-- CancelRemaining: when the terminal transition happens (satisfied or\n-- short-circuit impossible), the still-running sibling list is\n-- snapshotted into `cancel_siblings_pending` and `cancel_siblings_pending_flag`\n-- is set to \"true\". The dispatcher (Stage C) drains these; Stage B\n-- only WRITES the flag / list.\n--\n-- LetRun: siblings are never flagged, regardless of terminal\n-- `group_state` branch (RFC-016 \u{a7}5, adjudication 2026-04-23). Pure.\n---------------------------------------------------------------------------\nlocal function resolve_dependency_quorum(K, A, dep, policy_variant)\n  local outcome_bucket  -- \"succeeded\" | \"failed\" | \"skipped\"\n  if A.upstream_outcome == \"success\" then\n    outcome_bucket = \"succeeded\"\n  elseif A.upstream_outcome == \"skipped\" then\n    outcome_bucket = \"skipped\"\n  else\n    outcome_bucket = \"failed\"\n  end\n\n  -- Idempotency on dep_hash: mark the edge resolved + maintain\n  -- unresolved_set so describe_flow\'s legacy counters don\'t drift out\n  -- of sync with edge-level state. The edgegroup hash is the source of\n  -- truth for eligibility under AnyOf/Quorum; deps_meta counters are\n  -- not consulted on this path.\n  if outcome_bucket == \"succeeded\" then\n    redis.call(\"HSET\", K.dep_hash,\n      \"state\", \"satisfied\", \"last_resolved_at\", A.now_ms)\n  else\n    redis.call(\"HSET\", K.dep_hash,\n      \"state\", \"impossible\", \"last_resolved_at\", A.now_ms)\n  end\n  redis.call(\"SREM\", K.unresolved_set, A.edge_id)\n\n  -- Atomically bump the matching counter on the edgegroup hash.\n  local new_count = redis.call(\"HINCRBY\", K.edgegroup, outcome_bucket, 1)\n\n  -- Load the group snapshot for decision-making.\n  local group_raw = redis.call(\"HGETALL\", K.edgegroup)\n  local group = hgetall_to_table(group_raw)\n  local n = tonumber(group.n or \"0\")\n  local succeeded = tonumber(group.succeeded or \"0\")\n  local failed = tonumber(group.failed or \"0\")\n  local skipped = tonumber(group.skipped or \"0\")\n  local prior_state = group.group_state or \"pending\"\n  local on_satisfied = group.on_satisfied or \"\"\n  local k\n  if policy_variant == \"any_of\" then\n    k = 1\n  else\n    k = tonumber(group.k or \"1\")\n    if k < 1 then k = 1 end\n  end\n\n  -- Invariant Q2 defence: counters must not exceed n.\n  if (succeeded + failed + skipped) > n then\n    -- Counter overflow (edge applied twice?) \u{2014} flag corruption and\n    -- abort without transitioning the downstream.\n    return err(\"invariant_violation\",\n      \"edgegroup counters exceed n: \" .. tostring(succeeded + failed + skipped) .. \"/\" .. tostring(n))\n  end\n\n  -- Once-fired (Q1): if group already terminal, only update counters.\n  -- The HINCRBY above already did that. No downstream transition.\n  if prior_state == \"satisfied\" or prior_state == \"impossible\"\n     or prior_state == \"cancelled\" then\n    return ok(\"already_fired\", \"\")\n  end\n\n  -- Evaluate current state.\n  local new_state = \"pending\"\n  if succeeded >= k then\n    new_state = \"satisfied\"\n  elseif (failed + skipped) > (n - k) then\n    new_state = \"impossible\"\n  end\n\n  if new_state == \"pending\" then\n    return ok(\"pending\", \"\")\n  end\n\n  -- Terminal transition: write group_state + satisfied_at, decide\n  -- cancel_siblings_pending, and flip the downstream execution.\n  redis.call(\"HSET\", K.edgegroup, \"group_state\", new_state)\n  if new_state == \"satisfied\" then\n    redis.call(\"HSET\", K.edgegroup, \"satisfied_at\", A.now_ms)\n  end\n\n  -- Sibling-cancel flagging + membership snapshot (RFC-016 Stage C).\n  -- Stage B set the flag; Stage C additionally:\n  --   (a) writes the list of still-running-sibling execution ids to the\n  --       edgegroup hash field `cancel_siblings_pending_members`\n  --       (pipe-delimited string; empty \u{21d2} no sibling needs cancellation),\n  --   (b) SADDs the tuple `<flow_id>|<downstream_eid>` to the\n  --       per-flow-partition index SET `ff:pending_cancel_groups:{fp:N}`\n  --       so the dispatcher scanner can iterate without a full scan.\n  -- All writes stay on the downstream\'s {fp:N} slot; siblings are\n  -- guaranteed co-located with the flow per RFC-011 \u{a7}11.\n  -- LetRun is pure: never flag, never enumerate siblings, regardless of\n  -- satisfied vs impossible (RFC-016 \u{a7}5, adjudication 2026-04-23).\n  if on_satisfied == \"cancel_remaining\" then\n    redis.call(\"HSET\", K.edgegroup,\n      \"cancel_siblings_pending_flag\", \"true\",\n      \"cancel_siblings_reason\",\n        (new_state == \"satisfied\") and \"sibling_quorum_satisfied\"\n                                    or \"sibling_quorum_impossible\")\n\n    -- Enumerate still-running siblings. The incoming_set lists all\n    -- inbound edge ids for the downstream; each edge hash stores the\n    -- upstream_execution_id. A sibling is \"still running\" iff its\n    -- exec_core\'s lifecycle_phase is NOT \"terminal\". The just-resolved\n    -- upstream\'s dep_hash is flipped above \u{2014} but at this moment its\n    -- exec_core may or may not have reached terminal_outcome; include\n    -- only genuinely-non-terminal executions.\n    local members = {}\n    if is_set(K.incoming_set) and is_set(K.flow_edge_prefix) then\n      local edge_ids = redis.call(\"SMEMBERS\", K.incoming_set)\n      for i = 1, #edge_ids do\n        local e_key = K.flow_edge_prefix .. edge_ids[i]\n        local up_eid = redis.call(\"HGET\", e_key, \"upstream_execution_id\")\n        if is_set(up_eid) then\n          -- Build sibling exec_core key on the same slot (all members of\n          -- a flow share the {fp:N} hash-tag baked into the eid string).\n          local sib_core_key = \"ff:exec:\" .. A.exec_tag .. \":\" .. up_eid .. \":core\"\n          local sib_phase = redis.call(\"HGET\", sib_core_key, \"lifecycle_phase\")\n          if is_set(sib_phase) and sib_phase ~= \"terminal\" then\n            members[#members + 1] = up_eid\n          end\n        end\n      end\n    end\n\n    local members_str = table.concat(members, \"|\")\n    redis.call(\"HSET\", K.edgegroup,\n      \"cancel_siblings_pending_members\", members_str)\n\n    -- SADD the tuple even when members is empty \u{2014} the dispatcher SREMs\n    -- the tuple atomically after it observes the empty list + clears\n    -- the flag (drain-done). This keeps crash-in-flight observable\n    -- via the SET rather than via a silent edgegroup-hash field.\n    if is_set(K.pending_cancel_groups_set)\n       and is_set(A.flow_id) and is_set(A.downstream_eid) then\n      redis.call(\"SADD\", K.pending_cancel_groups_set,\n        A.flow_id .. \"|\" .. A.downstream_eid)\n    end\n  end\n\n  -- Downstream transition.\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return ok(new_state, \"\") end\n  local core = hgetall_to_table(raw)\n\n  if new_state == \"satisfied\" then\n    -- Satisfied: flip eligible, optionally COPY data_passing_ref.\n    local data_injected = \"\"\n    if is_set(dep.data_passing_ref)\n       and outcome_bucket == \"succeeded\"\n       and core.terminal_outcome == \"none\" then\n      local copied = redis.call(\n        \"COPY\", K.upstream_result, K.downstream_payload, \"REPLACE\")\n      if copied == 1 then\n        data_injected = \"data_injected\"\n      end\n    end\n\n    if core.lifecycle_phase == \"runnable\"\n       and core.ownership_state == \"unowned\"\n       and core.terminal_outcome == \"none\"\n       and core.eligibility_state == \"blocked_by_dependencies\" then\n      local new_attempt_state = core.attempt_state\n      if not is_set(new_attempt_state) or new_attempt_state == \"none\" then\n        new_attempt_state = \"pending_first_attempt\"\n      end\n      redis.call(\"HSET\", K.core_key,\n        \"lifecycle_phase\", core.lifecycle_phase,\n        \"ownership_state\", core.ownership_state or \"unowned\",\n        \"eligibility_state\", \"eligible_now\",\n        \"blocking_reason\", \"waiting_for_worker\",\n        \"blocking_detail\", \"\",\n        \"terminal_outcome\", \"none\",\n        \"attempt_state\", new_attempt_state,\n        \"public_state\", \"waiting\",\n        \"last_transition_at\", A.now_ms,\n        \"last_mutation_at\", A.now_ms)\n      redis.call(\"ZREM\", K.blocked_deps_zset, core.execution_id or \"\")\n      local priority = tonumber(core.priority or \"0\")\n      local created_at_ms = tonumber(core.created_at or \"0\")\n      local score = 0 - (priority * 1000000000000) + created_at_ms\n      redis.call(\"ZADD\", K.eligible_zset, score, core.execution_id or \"\")\n    end\n    return ok(\"satisfied\", data_injected)\n  end\n\n  -- Impossible: short-circuit skip the downstream.\n  local child_skipped = false\n  if core.terminal_outcome == \"none\" then\n    local skip_attempt_state = core.attempt_state or \"none\"\n    if skip_attempt_state == \"running_attempt\"\n       or skip_attempt_state == \"attempt_interrupted\" then\n      skip_attempt_state = \"attempt_terminal\"\n      redis.call(\"HSET\", K.attempt_hash,\n        \"attempt_state\", \"ended_cancelled\",\n        \"ended_at\", A.now_ms,\n        \"failure_reason\", \"dependency_impossible\")\n      if redis.call(\"EXISTS\", K.stream_meta) == 1 then\n        redis.call(\"HSET\", K.stream_meta,\n          \"closed_at\", A.now_ms,\n          \"closed_reason\", \"dependency_impossible\")\n      end\n    elseif is_set(skip_attempt_state) and skip_attempt_state ~= \"none\" then\n      skip_attempt_state = \"none\"\n    end\n\n    redis.call(\"HSET\", K.core_key,\n      \"lifecycle_phase\", \"terminal\",\n      \"ownership_state\", \"unowned\",\n      \"eligibility_state\", \"not_applicable\",\n      \"blocking_reason\", \"none\",\n      \"blocking_detail\", \"\",\n      \"terminal_outcome\", \"skipped\",\n      \"attempt_state\", skip_attempt_state,\n      \"public_state\", \"skipped\",\n      \"completed_at\", A.now_ms,\n      \"last_transition_at\", A.now_ms,\n      \"last_mutation_at\", A.now_ms)\n    redis.call(\"ZREM\", K.blocked_deps_zset, core.execution_id or \"\")\n    redis.call(\"ZADD\", K.terminal_zset, tonumber(A.now_ms), core.execution_id or \"\")\n    child_skipped = true\n\n    if is_set(core.flow_id) and is_set(core.execution_id) then\n      local payload = cjson.encode({\n        execution_id = core.execution_id,\n        flow_id = core.flow_id,\n        outcome = \"skipped\",\n      })\n      redis.call(\"PUBLISH\", \"ff:dag:completions\", payload)\n    end\n  end\n\n  return ok(\"impossible\", child_skipped and \"child_skipped\" or \"\")\nend\n\n---------------------------------------------------------------------------\n-- #23  ff_resolve_dependency  (on {p:N})\n--\n-- Resolve one dependency edge: satisfied (upstream success) or impossible\n-- (upstream failed/cancelled/expired). Updates child eligibility.\n--\n-- On satisfaction, if the edge was staged with a non-empty\n-- `data_passing_ref`, atomically COPYs the upstream\'s result key into\n-- the downstream\'s input_payload key before flipping the child to\n-- eligible. Upstream + downstream are guaranteed co-located on the\n-- same {fp:N} slot by flow membership (RFC-011 \u{a7}7.3).\n--\n-- KEYS (14): exec_core, deps_meta, unresolved_set, dep_hash,\n--            eligible_zset, terminal_zset, blocked_deps_zset,\n--            attempt_hash, stream_meta, downstream_payload,\n--            upstream_result, edgegroup,\n--            -- RFC-016 Stage C additions (both required for the\n--            -- AnyOf/Quorum+CancelRemaining path; may be passed as\n--            -- empty strings when the caller knows the resolution\n--            -- cannot flip to a CancelRemaining terminal state):\n--            incoming_set, pending_cancel_groups_set\n-- ARGV (5): edge_id, upstream_outcome, now_ms,\n--            -- RFC-016 Stage C additions (required when the edgegroup\n--            -- uses CancelRemaining; may be empty for AllOf):\n--            flow_id, downstream_eid\n---------------------------------------------------------------------------\nredis.register_function(\'ff_resolve_dependency\', function(keys, args)\n  local K = {\n    core_key          = keys[1],\n    deps_meta         = keys[2],\n    unresolved_set    = keys[3],\n    dep_hash          = keys[4],\n    eligible_zset     = keys[5],\n    terminal_zset     = keys[6],\n    blocked_deps_zset = keys[7],\n    attempt_hash      = keys[8],\n    stream_meta       = keys[9],\n    downstream_payload = keys[10],\n    upstream_result    = keys[11],\n    edgegroup          = keys[12],\n    -- RFC-016 Stage C: sibling enumeration + pending-cancel-groups index.\n    incoming_set               = keys[13] or \"\",\n    pending_cancel_groups_set  = keys[14] or \"\",\n  }\n\n  local A = {\n    edge_id           = args[1],\n    upstream_outcome  = args[2],\n    now_ms            = args[3],\n    flow_id           = args[4] or \"\",\n    downstream_eid    = args[5] or \"\",\n  }\n\n  -- Derive the flow-partition hash-tag + per-flow edge-hash prefix from\n  -- the edgegroup key (format: `ff:flow:{fp:N}:<flow_id>:edgegroup:<eid>`).\n  -- These are used by the Stage C sibling-enumeration loop to read edge\n  -- hashes and sibling exec_cores on the same slot as the edgegroup.\n  local exec_tag = \"\"\n  local flow_edge_prefix = \"\"\n  if is_set(K.edgegroup) then\n    exec_tag = string.match(K.edgegroup, \"(%b{})\") or \"\"\n    if is_set(A.flow_id) then\n      flow_edge_prefix = \"ff:flow:\" .. exec_tag .. \":\" .. A.flow_id .. \":edge:\"\n    end\n  end\n  A.exec_tag = exec_tag\n  K.flow_edge_prefix = flow_edge_prefix\n\n  -- 1. Read dep record\n  local dep_raw = redis.call(\"HGETALL\", K.dep_hash)\n  if #dep_raw == 0 then return err(\"invalid_dependency\") end\n  local dep = hgetall_to_table(dep_raw)\n\n  -- 2. Already resolved?\n  if dep.state == \"satisfied\" or dep.state == \"impossible\" then\n    return ok(\"already_resolved\")\n  end\n\n  -- RFC-016 Stage B: branch on edge-group policy. AnyOf / Quorum run\n  -- the four-counter state machine and own the downstream transition;\n  -- AllOf (and legacy flows without an edgegroup hash) keep the\n  -- original path unchanged.\n  local policy_variant_b = nil\n  if is_set(K.edgegroup) and redis.call(\"EXISTS\", K.edgegroup) == 1 then\n    policy_variant_b = redis.call(\"HGET\", K.edgegroup, \"policy_variant\")\n  end\n\n  if policy_variant_b == \"any_of\" or policy_variant_b == \"quorum\" then\n    return resolve_dependency_quorum(K, A, dep, policy_variant_b)\n  end\n\n  -- 3. Satisfaction path (upstream completed successfully)\n  if A.upstream_outcome == \"success\" then\n    redis.call(\"HSET\", K.dep_hash,\n      \"state\", \"satisfied\", \"last_resolved_at\", A.now_ms)\n    redis.call(\"SREM\", K.unresolved_set, A.edge_id)\n    local remaining = redis.call(\"HINCRBY\", K.deps_meta,\n      \"unsatisfied_required_count\", -1)\n    redis.call(\"HSET\", K.deps_meta, \"last_dependency_update_at\", A.now_ms)\n\n    -- RFC-016 Stage A: dual-write the edgegroup hash counters. The\n    -- eligibility decision below still keys off `remaining == 0` so\n    -- Stage A is behaviour-identical for existing flows; the\n    -- edgegroup hash provides the snapshot source of truth going\n    -- forward and is the foundation the Stage B resolver extends.\n    if is_set(K.edgegroup) and redis.call(\"EXISTS\", K.edgegroup) == 1 then\n      redis.call(\"HINCRBY\", K.edgegroup, \"succeeded\", 1)\n      if remaining == 0 then\n        redis.call(\"HSET\", K.edgegroup, \"group_state\", \"satisfied\")\n      end\n    end\n\n    -- Check if all deps now satisfied\n    local raw = redis.call(\"HGETALL\", K.core_key)\n    if #raw == 0 then return ok(\"satisfied\", \"\") end\n    local core = hgetall_to_table(raw)\n\n    -- Server-side data_passing_ref resolution (Batch C item 3). When\n    -- the edge was staged with a non-empty `data_passing_ref`, replace\n    -- the downstream\'s input_payload with the upstream\'s result. COPY\n    -- is a single-slot server-internal op (no round-trip to Lua\n    -- memory) so large result payloads don\'t inflate the FCALL\'s\n    -- working set.\n    --\n    -- Terminal-child guard: a late satisfaction can race with the\n    -- child being cancelled or skipped. Don\'t overwrite the payload\n    -- of a child that has already reached a terminal state \u{2014} it\'s at\n    -- best pointless (the worker will never read it) and at worst\n    -- noisy for post-mortem debugging.\n    --\n    -- Write-ordering note (RFC-010 \u{a7}4.8b): COPY runs BEFORE the\n    -- eligibility transition below so a crash between the two leaves\n    -- the child blocked (or late-satisfied on reconciler retry) with\n    -- the correct payload rather than eligible with a stale one.\n    --\n    -- Void-completion path: if the upstream called complete(None), the\n    -- result key does not exist \u{2014} COPY returns 0 and data_injected\n    -- stays empty, leaving the child\'s original input_payload intact.\n    local data_injected = \"\"\n    if is_set(dep.data_passing_ref)\n       and core.terminal_outcome == \"none\" then\n      local copied = redis.call(\n        \"COPY\", K.upstream_result, K.downstream_payload, \"REPLACE\")\n      if copied == 1 then\n        data_injected = \"data_injected\"\n      end\n    end\n\n    if remaining == 0\n       and core.lifecycle_phase == \"runnable\"\n       and core.ownership_state == \"unowned\"\n       and core.terminal_outcome == \"none\"\n       and core.eligibility_state == \"blocked_by_dependencies\" then\n      -- Preserve attempt_state\n      local new_attempt_state = core.attempt_state\n      if not is_set(new_attempt_state) or new_attempt_state == \"none\" then\n        new_attempt_state = \"pending_first_attempt\"\n      end\n      -- ALL 7 dims\n      redis.call(\"HSET\", K.core_key,\n        \"lifecycle_phase\", core.lifecycle_phase,             -- preserve (runnable)\n        \"ownership_state\", core.ownership_state or \"unowned\", -- preserve\n        \"eligibility_state\", \"eligible_now\",\n        \"blocking_reason\", \"waiting_for_worker\",\n        \"blocking_detail\", \"\",\n        \"terminal_outcome\", \"none\",                          -- preserve\n        \"attempt_state\", new_attempt_state,\n        \"public_state\", \"waiting\",\n        \"last_transition_at\", A.now_ms,\n        \"last_mutation_at\", A.now_ms)\n      redis.call(\"ZREM\", K.blocked_deps_zset, core.execution_id or \"\")\n      local priority = tonumber(core.priority or \"0\")\n      local created_at_ms = tonumber(core.created_at or \"0\")\n      local score = 0 - (priority * 1000000000000) + created_at_ms\n      redis.call(\"ZADD\", K.eligible_zset, score, core.execution_id or \"\")\n    end\n\n    return ok(\"satisfied\", data_injected)\n  end\n\n  -- 4. Impossible path (upstream failed/cancelled/expired/skipped)\n  redis.call(\"HSET\", K.dep_hash,\n    \"state\", \"impossible\", \"last_resolved_at\", A.now_ms)\n  redis.call(\"SREM\", K.unresolved_set, A.edge_id)\n  redis.call(\"HINCRBY\", K.deps_meta, \"unsatisfied_required_count\", -1)\n  redis.call(\"HINCRBY\", K.deps_meta, \"impossible_required_count\", 1)\n  redis.call(\"HSET\", K.deps_meta, \"last_dependency_update_at\", A.now_ms)\n\n  -- RFC-016 Stage A: dual-write the edgegroup hash. AllOf short-\n  -- circuits to impossible on the first non-success terminal\n  -- (Invariant Q4). Failed/skipped bucket is lumped here as\n  -- `failed_count` for Stage A \u{2014} Stage B will split them per the\n  -- four-counter model (\u{a7}3).\n  if is_set(K.edgegroup) and redis.call(\"EXISTS\", K.edgegroup) == 1 then\n    redis.call(\"HINCRBY\", K.edgegroup, \"failed\", 1)\n    redis.call(\"HSET\", K.edgegroup, \"group_state\", \"impossible\")\n  end\n\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return ok(\"impossible\", \"\") end\n  local core = hgetall_to_table(raw)\n\n  local child_skipped = false\n\n  if core.terminal_outcome == \"none\" then\n    -- Determine attempt_state for skip\n    local skip_attempt_state = core.attempt_state or \"none\"\n    if skip_attempt_state == \"running_attempt\"\n       or skip_attempt_state == \"attempt_interrupted\" then\n      -- NOTE: If the child is active (worker holding lease), this FCALL runs\n      -- on {p:N} (exec partition) so we CAN write exec_core and attempt_hash.\n      -- However, lease_current, lease_expiry_zset, worker_leases, and\n      -- active_index also live on {p:N} \u{2014} but the KEYS array for this\n      -- function does not include them (only 9 KEYS).  Cleaning them here\n      -- would require adding more KEYS slots and pre-reading worker_instance_id\n      -- to construct the worker_leases key.  Instead, lease cleanup is\n      -- delegated to the lease_expiry scanner (1.5s default interval):\n      --   1. lease_expiry_scanner sees expired lease \u{2192} ff_mark_lease_expired_if_due\n      --   2. Worker\'s renewal sees terminal \u{2192} stops with terminal error\n      --   3. ff_expire_execution (attempt_timeout/deadline scanner) does full cleanup\n      -- Race window: between this skip and scanner cleanup, exec_core is\n      -- terminal(skipped) but stale entries remain in active/lease indexes.\n      -- Bounded by lease_expiry_interval (default 1.5s).  Index reconciler\n      -- detects and logs any residual inconsistency at 45s intervals.\n      skip_attempt_state = \"attempt_terminal\"\n      -- End real attempt + close stream\n      redis.call(\"HSET\", K.attempt_hash,\n        \"attempt_state\", \"ended_cancelled\",\n        \"ended_at\", A.now_ms,\n        \"failure_reason\", \"dependency_impossible\")\n      if redis.call(\"EXISTS\", K.stream_meta) == 1 then\n        redis.call(\"HSET\", K.stream_meta,\n          \"closed_at\", A.now_ms,\n          \"closed_reason\", \"dependency_impossible\")\n      end\n    elseif is_set(skip_attempt_state) and skip_attempt_state ~= \"none\" then\n      skip_attempt_state = \"none\"\n    end\n\n    redis.call(\"HSET\", K.core_key,\n      \"lifecycle_phase\", \"terminal\",\n      \"ownership_state\", \"unowned\",\n      \"eligibility_state\", \"not_applicable\",\n      \"blocking_reason\", \"none\",\n      \"blocking_detail\", \"\",\n      \"terminal_outcome\", \"skipped\",\n      \"attempt_state\", skip_attempt_state,\n      \"public_state\", \"skipped\",\n      \"completed_at\", A.now_ms,\n      \"last_transition_at\", A.now_ms,\n      \"last_mutation_at\", A.now_ms)\n    redis.call(\"ZREM\", K.blocked_deps_zset, core.execution_id or \"\")\n    redis.call(\"ZADD\", K.terminal_zset, tonumber(A.now_ms), core.execution_id or \"\")\n    child_skipped = true\n\n    -- Push-based DAG promotion (bridge-event gap report \u{a7}1.3 analogue).\n    -- A child skipped due to an impossible upstream is an FF-initiated\n    -- terminal transition: cairn never calls anything for the skip, so\n    -- without a PUBLISH the skip\'s own downstream edges only resolve\n    -- via the 15s dependency_reconciler safety net. Symmetric with the\n    -- other terminal sites (ff_complete_execution et al.). Gated on\n    -- `is_set(core.flow_id)` \u{2014} a skip on a standalone exec would be a\n    -- bug upstream (standalones have no edges), but the gate keeps the\n    -- invariant consistent with the other emit sites. Also gated on\n    -- `is_set(core.execution_id)`: the ff-backend-valkey subscriber\n    -- fails to parse an empty execution_id and silently drops the\n    -- message, reintroducing reconciler-latency for that exec.\n    if is_set(core.flow_id) and is_set(core.execution_id) then\n      local payload = cjson.encode({\n        execution_id = core.execution_id,\n        flow_id = core.flow_id,\n        outcome = \"skipped\",\n      })\n      redis.call(\"PUBLISH\", \"ff:dag:completions\", payload)\n    end\n  end\n\n  return ok(\"impossible\", child_skipped and \"child_skipped\" or \"\")\nend)\n\n---------------------------------------------------------------------------\n-- RFC-016 Stage C: ff_drain_sibling_cancel_group  (on {fp:N})\n--\n-- Atomic drain call issued by the ff-engine sibling-cancel dispatcher\n-- after it has fired `ff_cancel_execution` against every member of\n-- `cancel_siblings_pending_members`. In one Lua unit:\n--   (1) SREM `<flow_id>|<downstream_eid>` from the per-partition\n--       `ff:pending_cancel_groups:{fp:N}` index SET,\n--   (2) HDEL `cancel_siblings_pending_members` +\n--       `cancel_siblings_pending_flag` from the edgegroup hash,\n--       leaving `cancel_siblings_reason` + `satisfied_at` as\n--       observability breadcrumbs.\n--\n-- Returns \"drained\" on success (tuple was present in the SET) or\n-- \"already_drained\" when the dispatcher double-fires (idempotent).\n-- If the edgegroup hash is gone (retention / manual delete) the SET\n-- entry is still removed \u{2014} nothing authoritative to clear on the\n-- hash \u{2014} and the call returns \"drained_sans_group\".\n--\n-- KEYS (2): pending_cancel_groups_set, edgegroup\n-- ARGV (2): flow_id, downstream_eid\n---------------------------------------------------------------------------\nredis.register_function(\'ff_drain_sibling_cancel_group\', function(keys, args)\n  local pending_set = keys[1]\n  local edgegroup = keys[2]\n  local flow_id = args[1] or \"\"\n  local downstream_eid = args[2] or \"\"\n\n  if not is_set(flow_id) or not is_set(downstream_eid) then\n    return err(\"invalid_args\", \"flow_id and downstream_eid required\")\n  end\n\n  local member = flow_id .. \"|\" .. downstream_eid\n  local removed = redis.call(\"SREM\", pending_set, member)\n\n  local group_exists = redis.call(\"EXISTS\", edgegroup) == 1\n  if group_exists then\n    redis.call(\"HDEL\", edgegroup,\n      \"cancel_siblings_pending_flag\",\n      \"cancel_siblings_pending_members\")\n  end\n\n  if removed == 0 then\n    return ok(\"already_drained\", \"\")\n  end\n  if not group_exists then\n    return ok(\"drained_sans_group\", \"\")\n  end\n  return ok(\"drained\", \"\")\nend)\n\n---------------------------------------------------------------------------\n-- RFC-016 Stage D: ff_reconcile_sibling_cancel_group  (on {fp:N})\n--\n-- Crash-recovery reconciler for Invariant Q6. Stage C\'s dispatcher\n-- populates `pending_cancel_groups` under ff_resolve_dependency\'s\n-- atomic satisfied/impossible flip and drains it via\n-- ff_drain_sibling_cancel_group after per-sibling cancels land. An\n-- engine crash between the SADD + the drain leaves stale tuples in\n-- the SET. This function reconciles one tuple atomically:\n--\n--   (1) flag false / absent AND SET still carries the tuple:\n--       stale marker \u{2014} SREM + return \"sremmed_stale\".\n--   (2) flag true AND every sibling in `cancel_siblings_pending_members`\n--       is already terminal (dispatcher fired cancels but crashed\n--       before drain): HDEL flag/members + SREM + return\n--       \"completed_drain\".\n--   (3) flag true AND at least one sibling non-terminal: leave alone\n--       \u{2014} the dispatcher owns this tuple on its next tick. Return\n--       \"no_op\".\n--   (4) edgegroup missing (retention / manual delete) with the tuple\n--       still present: SREM + return \"sremmed_stale\" \u{2014} nothing to\n--       reconcile; drop the orphan index entry.\n--\n-- Reconciler MUST NOT fight the dispatcher: case (3) leaves state\n-- unchanged.\n--\n-- KEYS (2): pending_cancel_groups_set, edgegroup\n-- ARGV (2): flow_id, downstream_eid\n---------------------------------------------------------------------------\nredis.register_function(\'ff_reconcile_sibling_cancel_group\', function(keys, args)\n  local pending_set = keys[1]\n  local edgegroup = keys[2]\n  local flow_id = args[1] or \"\"\n  local downstream_eid = args[2] or \"\"\n\n  if not is_set(flow_id) or not is_set(downstream_eid) then\n    return err(\"invalid_args\", \"flow_id and downstream_eid required\")\n  end\n\n  local member = flow_id .. \"|\" .. downstream_eid\n  local in_set = redis.call(\"SISMEMBER\", pending_set, member) == 1\n  if not in_set then\n    -- Raced with the dispatcher or a prior reconcile tick; nothing to do.\n    return ok(\"no_op\", \"not_in_set\")\n  end\n\n  local group_exists = redis.call(\"EXISTS\", edgegroup) == 1\n  if not group_exists then\n    -- Orphan tuple \u{2014} group is gone, no sibling state to check.\n    redis.call(\"SREM\", pending_set, member)\n    return ok(\"sremmed_stale\", \"missing_edgegroup\")\n  end\n\n  local flag = redis.call(\"HGET\", edgegroup, \"cancel_siblings_pending_flag\")\n  if not is_set(flag) or flag ~= \"true\" then\n    -- Dispatcher HDEL\'d the flag but crashed before SREM.\n    redis.call(\"SREM\", pending_set, member)\n    return ok(\"sremmed_stale\", \"flag_cleared\")\n  end\n\n  -- Flag true: check members. Empty members_str means the dispatcher\n  -- observed no live siblings at flip-time \u{2014} still an interrupted\n  -- drain case since the flag is set; clear and SREM.\n  local members_str = redis.call(\"HGET\", edgegroup, \"cancel_siblings_pending_members\")\n  if not is_set(members_str) then\n    members_str = \"\"\n  end\n\n  -- Derive exec_tag from edgegroup key (`ff:flow:{fp:N}:<flow_id>:edgegroup:<eid>`).\n  -- All flow members share this slot per RFC-011 \u{a7}11.\n  local exec_tag = string.match(edgegroup, \"(%b{})\") or \"\"\n\n  local all_terminal = true\n  if is_set(members_str) and is_set(exec_tag) then\n    for sib_eid in string.gmatch(members_str, \"([^|]+)\") do\n      local sib_core_key = \"ff:exec:\" .. exec_tag .. \":\" .. sib_eid .. \":core\"\n      local phase = redis.call(\"HGET\", sib_core_key, \"lifecycle_phase\")\n      if is_set(phase) and phase ~= \"terminal\" then\n        all_terminal = false\n        break\n      end\n      -- Missing sibling core (retention): treat as terminal for\n      -- reconcile purposes \u{2014} nothing to cancel, drain is effectively\n      -- complete.\n    end\n  end\n\n  if not all_terminal then\n    -- Dispatcher will handle on its next tick.\n    return ok(\"no_op\", \"siblings_running\")\n  end\n\n  redis.call(\"HDEL\", edgegroup,\n    \"cancel_siblings_pending_flag\",\n    \"cancel_siblings_pending_members\")\n  redis.call(\"SREM\", pending_set, member)\n  return ok(\"completed_drain\", \"\")\nend)\n\n---------------------------------------------------------------------------\n-- #24  ff_evaluate_flow_eligibility  (on {p:N})\n--\n-- Read-only check of execution + dependency state. Class C.\n--\n-- KEYS (2): exec_core, deps_meta\n-- ARGV (0)\n---------------------------------------------------------------------------\nredis.register_function(\'ff_evaluate_flow_eligibility\', function(keys, args)\n  local raw = redis.call(\"HGETALL\", keys[1])\n  if #raw == 0 then return ok(\"not_found\") end\n  local core = hgetall_to_table(raw)\n\n  if core.lifecycle_phase ~= \"runnable\" then\n    return ok(\"not_runnable\")\n  end\n  if core.ownership_state ~= \"unowned\" then\n    return ok(\"owned\")\n  end\n  if core.terminal_outcome ~= \"none\" then\n    return ok(\"terminal\")\n  end\n\n  local deps_raw = redis.call(\"HGETALL\", keys[2])\n  if #deps_raw == 0 then\n    return ok(\"eligible\")\n  end\n  local deps = hgetall_to_table(deps_raw)\n\n  local impossible = tonumber(deps.impossible_required_count or \"0\")\n  if impossible > 0 then\n    return ok(\"impossible\")\n  end\n\n  local unresolved = tonumber(deps.unsatisfied_required_count or \"0\")\n  if unresolved > 0 then\n    return ok(\"blocked_by_dependencies\")\n  end\n\n  return ok(\"eligible\")\nend)\n\n---------------------------------------------------------------------------\n-- #35  ff_promote_blocked_to_eligible  (on {p:N})\n--\n-- Promote zero-dep flow member from blocked:dependencies to eligible.\n--\n-- KEYS (5): exec_core, blocked_deps_zset, eligible_zset, deps_meta,\n--           deps_unresolved\n-- ARGV (2): execution_id, now_ms\n---------------------------------------------------------------------------\nredis.register_function(\'ff_promote_blocked_to_eligible\', function(keys, args)\n  local K = {\n    core_key          = keys[1],\n    blocked_deps_zset = keys[2],\n    eligible_zset     = keys[3],\n    deps_meta         = keys[4],\n    deps_unresolved   = keys[5],\n  }\n\n  local A = {\n    execution_id = args[1],\n    now_ms       = args[2],\n  }\n\n  -- 1. Read execution core\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return err(\"execution_not_found\") end\n  local core = hgetall_to_table(raw)\n\n  if core.lifecycle_phase ~= \"runnable\" then\n    return err(\"not_runnable\")\n  end\n  if core.eligibility_state ~= \"blocked_by_dependencies\" then\n    return err(\"not_blocked_by_deps\")\n  end\n  if core.terminal_outcome ~= \"none\" then\n    return err(\"terminal\")\n  end\n\n  -- 2. Verify zero deps\n  local unsatisfied = tonumber(\n    redis.call(\"HGET\", K.deps_meta, \"unsatisfied_required_count\") or \"0\")\n  local unresolved_count = redis.call(\"SCARD\", K.deps_unresolved)\n  if unsatisfied > 0 or unresolved_count > 0 then\n    return err(\"deps_not_satisfied\", tostring(unsatisfied), tostring(unresolved_count))\n  end\n\n  -- 3. Preserve attempt_state\n  local new_attempt_state = core.attempt_state\n  if not is_set(new_attempt_state) or new_attempt_state == \"none\" then\n    new_attempt_state = \"pending_first_attempt\"\n  end\n\n  -- 4. Transition (ALL 7 dims)\n  redis.call(\"HSET\", K.core_key,\n    \"lifecycle_phase\", core.lifecycle_phase,             -- preserve (runnable)\n    \"ownership_state\", core.ownership_state or \"unowned\", -- preserve\n    \"eligibility_state\", \"eligible_now\",\n    \"blocking_reason\", \"waiting_for_worker\",\n    \"blocking_detail\", \"\",\n    \"terminal_outcome\", \"none\",                          -- preserve\n    \"attempt_state\", new_attempt_state,\n    \"public_state\", \"waiting\",\n    \"last_transition_at\", A.now_ms,\n    \"last_mutation_at\", A.now_ms)\n\n  redis.call(\"ZREM\", K.blocked_deps_zset, A.execution_id)\n  local priority = tonumber(core.priority or \"0\")\n  local created_at_ms = tonumber(core.created_at or \"0\")\n  local score = 0 - (priority * 1000000000000) + created_at_ms\n  redis.call(\"ZADD\", K.eligible_zset, score, A.execution_id)\n\n  return ok()\nend)\n\n---------------------------------------------------------------------------\n-- #12b  ff_replay_execution  (on {p:N})\n--\n-- Reset a terminal execution for replay. If skipped flow member: reset\n-- impossible deps back to unsatisfied, recompute counts, set\n-- blocked_by_dependencies instead of eligible_now.\n--\n-- KEYS (4+N): exec_core, terminal_zset, eligible_zset, lease_history,\n--             [blocked_deps_zset, deps_meta, deps_unresolved, dep_edge_0..N]\n-- ARGV (2+N): execution_id, now_ms, [edge_id_0..N]\n---------------------------------------------------------------------------\nredis.register_function(\'ff_replay_execution\', function(keys, args)\n  local K = {\n    core_key       = keys[1],\n    terminal_zset  = keys[2],\n    eligible_zset  = keys[3],\n    lease_history  = keys[4],\n  }\n\n  local A = {\n    execution_id = args[1],\n  }\n\n  local t = redis.call(\"TIME\")\n  local now_ms = tonumber(t[1]) * 1000 + math.floor(tonumber(t[2]) / 1000)\n\n  -- 1. Read execution core\n  local raw = redis.call(\"HGETALL\", K.core_key)\n  if #raw == 0 then return err(\"execution_not_found\") end\n  local core = hgetall_to_table(raw)\n\n  -- 2. Must be terminal\n  if core.lifecycle_phase ~= \"terminal\" then\n    return err(\"execution_not_terminal\")\n  end\n\n  -- 3. Check replay limit (read from policy, same pattern as ff_reclaim_execution)\n  local replay_count = tonumber(core.replay_count or \"0\")\n  local max_replays = 10  -- default\n  local policy_key = string.gsub(K.core_key, \":core$\", \":policy\")\n  local policy_raw = redis.call(\"GET\", policy_key)\n  if policy_raw then\n    local ok_p, pol = pcall(cjson.decode, policy_raw)\n    if ok_p and type(pol) == \"table\" then\n      max_replays = tonumber(pol.max_replay_count) or 10\n    end\n  end\n  if replay_count >= max_replays then\n    return err(\"max_replays_exhausted\")\n  end\n\n  -- 4. Determine replay path\n  local is_skipped_flow_member = (core.terminal_outcome == \"skipped\") and is_set(core.flow_id)\n\n  if is_skipped_flow_member then\n    -- SKIPPED FLOW MEMBER PATH: reset impossible deps \u{2192} blocked on deps\n    local blocked_deps_zset = keys[5]\n    local deps_meta         = keys[6]\n    local deps_unresolved   = keys[7]\n\n    -- Reset impossible dep edges back to unsatisfied\n    local num_edges = #args - 2\n    local new_unsatisfied = 0\n    for i = 1, num_edges do\n      local edge_id = args[2 + i]\n      local dep_key = keys[7 + i]  -- dep_edge keys start at KEYS[8]\n\n      local dep_state = redis.call(\"HGET\", dep_key, \"state\")\n      if dep_state == \"impossible\" then\n        redis.call(\"HSET\", dep_key,\n          \"state\", \"unsatisfied\",\n          \"last_resolved_at\", \"\")\n        redis.call(\"SADD\", deps_unresolved, edge_id)\n        new_unsatisfied = new_unsatisfied + 1\n      elseif dep_state == \"unsatisfied\" then\n        new_unsatisfied = new_unsatisfied + 1\n      end\n      -- satisfied edges remain satisfied (upstream already succeeded)\n    end\n\n    -- Recompute deps:meta counts\n    redis.call(\"HSET\", deps_meta,\n      \"unsatisfied_required_count\", tostring(new_unsatisfied),\n      \"impossible_required_count\", \"0\",\n      \"last_dependency_update_at\", tostring(now_ms))\n\n    -- Transition: terminal \u{2192} runnable/blocked_by_dependencies\n    redis.call(\"HSET\", K.core_key,\n      \"lifecycle_phase\", \"runnable\",\n      \"ownership_state\", \"unowned\",\n      \"eligibility_state\", \"blocked_by_dependencies\",\n      \"blocking_reason\", \"waiting_for_children\",\n      \"blocking_detail\", tostring(new_unsatisfied) .. \" dep(s) unsatisfied after replay\",\n      \"terminal_outcome\", \"none\",\n      \"attempt_state\", \"pending_replay_attempt\",\n      \"public_state\", \"waiting_children\",\n      \"pending_replay_attempt\", \"1\",\n      \"replay_count\", tostring(replay_count + 1),\n      \"completed_at\", \"\",\n      \"last_transition_at\", tostring(now_ms),\n      \"last_mutation_at\", tostring(now_ms))\n\n    -- Move from terminal \u{2192} blocked:deps\n    redis.call(\"ZREM\", K.terminal_zset, A.execution_id)\n    redis.call(\"ZADD\", blocked_deps_zset,\n      tonumber(core.created_at or \"0\"), A.execution_id)\n\n    -- Lease history\n    redis.call(\"XADD\", K.lease_history, \"MAXLEN\", \"~\", 1000, \"*\",\n      \"event\", \"replay_initiated\",\n      \"replay_count\", tostring(replay_count + 1),\n      \"replay_type\", \"skipped_flow_member\",\n      \"ts\", tostring(now_ms))\n\n    return ok(tostring(new_unsatisfied))\n  else\n    -- NORMAL REPLAY PATH: terminal \u{2192} runnable/eligible\n    local priority = tonumber(core.priority or \"0\")\n    local created_at = tonumber(core.created_at or \"0\")\n    local score = 0 - (priority * 1000000000000) + created_at\n\n    redis.call(\"HSET\", K.core_key,\n      \"lifecycle_phase\", \"runnable\",\n      \"ownership_state\", \"unowned\",\n      \"eligibility_state\", \"eligible_now\",\n      \"blocking_reason\", \"waiting_for_worker\",\n      \"blocking_detail\", \"\",\n      \"terminal_outcome\", \"none\",\n      \"attempt_state\", \"pending_replay_attempt\",\n      \"public_state\", \"waiting\",\n      \"pending_replay_attempt\", \"1\",\n      \"replay_count\", tostring(replay_count + 1),\n      \"completed_at\", \"\",\n      \"last_transition_at\", tostring(now_ms),\n      \"last_mutation_at\", tostring(now_ms))\n\n    -- Move from terminal \u{2192} eligible\n    redis.call(\"ZREM\", K.terminal_zset, A.execution_id)\n    redis.call(\"ZADD\", K.eligible_zset, score, A.execution_id)\n\n    -- Lease history\n    redis.call(\"XADD\", K.lease_history, \"MAXLEN\", \"~\", 1000, \"*\",\n      \"event\", \"replay_initiated\",\n      \"replay_count\", tostring(replay_count + 1),\n      \"replay_type\", \"normal\",\n      \"ts\", tostring(now_ms))\n\n    return ok(\"0\")\n  end\nend)\n\n---------------------------------------------------------------------------\n-- ff_set_flow_tags  (issue #58.4)\n--\n-- Write caller-supplied tag fields to the flow\'s separate tags key\n-- (`ff:flow:{fp:N}:<flow_id>:tags`). Mirrors `ff_set_execution_tags`.\n--\n-- Lazy migration (Option 1(a)): BEFORE writing, any existing fields on\n-- `flow_core` whose name matches the reserved namespace\n-- `^[a-z][a-z0-9_]*%.` are moved to `tags_key` and HDEL\'d from\n-- `flow_core`. Heals pre-58.4 flows that stored `<caller>.<field>` tags\n-- inline on `flow_core`. Idempotent: after first call, no fields match.\n--\n-- Tag keys MUST match `^[a-z][a-z0-9_]*%.`; violations fail-closed with\n-- `invalid_tag_key` (no migration, no write).\n--\n-- KEYS (2): flow_core, tags_key\n-- ARGV (>=2, even): k1, v1, k2, v2, ...\n---------------------------------------------------------------------------\nredis.register_function(\'ff_set_flow_tags\', function(keys, args)\n  local K = {\n    flow_core = keys[1],\n    tags_key  = keys[2],\n  }\n\n  local n = #args\n  if n == 0 or (n % 2) ~= 0 then\n    return err(\"invalid_input\", \"tags must be non-empty even-length key/value pairs\")\n  end\n\n  if redis.call(\"EXISTS\", K.flow_core) == 0 then\n    return err(\"flow_not_found\")\n  end\n\n  -- Require `<caller>.<field>` with at least one non-dot char after the\n  -- first dot (same rule as `ff_set_execution_tags`). Suffix may contain\n  -- further dots.\n  for i = 1, n, 2 do\n    local k = args[i]\n    if type(k) ~= \"string\" or not string.find(k, \"^[a-z][a-z0-9_]*%.[^.]\") then\n      return err(\"invalid_tag_key\", tostring(k))\n    end\n  end\n\n  -- Lazy migration: only HGETALL the core hash once per flow. A sentinel\n  -- `tags_migrated=1` field on `flow_core` short-circuits subsequent\n  -- calls so tag writes on well-formed flows stay O(1) instead of paying\n  -- an O(n) scan of every flow_core field. The sentinel itself is\n  -- dot-free snake_case \u{2014} it matches FF\'s own-field rule, not the\n  -- reserved caller namespace, so it can\'t be confused with a tag.\n  local migrated = redis.call(\"HGET\", K.flow_core, \"tags_migrated\")\n  if migrated ~= \"1\" then\n    local flat = redis.call(\"HGETALL\", K.flow_core)\n    local to_migrate = {}\n    local to_delete = {}\n    for i = 1, #flat, 2 do\n      local fname = flat[i]\n      if type(fname) == \"string\" and string.find(fname, \"^[a-z][a-z0-9_]*%.[^.]\") then\n        to_migrate[#to_migrate + 1] = fname\n        to_migrate[#to_migrate + 1] = flat[i + 1]\n        to_delete[#to_delete + 1] = fname\n      end\n    end\n    if #to_migrate > 0 then\n      redis.call(\"HSET\", K.tags_key, unpack(to_migrate))\n      redis.call(\"HDEL\", K.flow_core, unpack(to_delete))\n    end\n    redis.call(\"HSET\", K.flow_core, \"tags_migrated\", \"1\")\n  end\n\n  redis.call(\"HSET\", K.tags_key, unpack(args))\n\n  local now_ms = server_time_ms()\n  redis.call(\"HSET\", K.flow_core, \"last_mutation_at\", tostring(now_ms))\n\n  return ok(tostring(n / 2))\nend)\n\n";
Expand description
The compiled FlowFabric Lua library source.
Generated from lua/*.lua by scripts/gen-ff-script-lua.sh and checked into the crate as flowfabric.lua so it ships inside the published tarball. CI (matrix.yml) fails if this file drifts from what the script would produce.
LIBRARY_SOURCE

Constant LIBRARY_SOURCE Copy item path

Constant LIBRARY_SOURCE