{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://w3id.org/tilezz/schemas/block_index.schema.json",
"title": "tilezz-rat-dafsa-blocks manifest",
"description": "Block-index manifest for a tilezz-rat-dafsa-blocks asset directory. See blocks_schema.txt (alongside this file) for the full prose specification including the binary block file (`blocks/<sha256>.bin`) wire format -- the JSON Schema here only covers the manifest JSON itself.",
"type": "object",
"required": [
"format",
"version",
"scalar",
"block_format",
"block_version",
"target_block_bytes",
"n_states",
"n_edges",
"n_sequences",
"max_indexed_length",
"root",
"blocks"
],
"additionalProperties": false,
"properties": {
"format": {
"type": "string",
"const": "tilezz-rat-dafsa-blocks",
"description": "Wire-format discriminator; must be exactly this literal."
},
"version": {
"type": "integer",
"minimum": 1,
"const": 1,
"description": "Manifest schema version. v1 = this schema."
},
"scalar": {
"type": "string",
"const": "i8",
"description": "Edge-label scalar type. Currently always 'i8'."
},
"block_format": {
"type": "string",
"const": "tilezz-rat-block",
"description": "Wire-format tag of each block file."
},
"block_version": {
"type": "integer",
"minimum": 1,
"const": 1,
"description": "Block file format version, bumped independently of the manifest 'version'."
},
"target_block_bytes": {
"type": "integer",
"minimum": 1,
"description": "Writer's target uncompressed bytes per block file. Hint only -- the writer closes a block once its serialised size crosses this threshold, so real blocks may be larger or (for the last block) smaller. Recorded so the build pipeline is reproducible."
},
"n_states": {
"type": "integer",
"minimum": 1,
"description": "Total DAFSA states including the root. State 0 lives in 'root'; states 1..n_states are covered by the entries in 'blocks'."
},
"n_edges": {
"type": "integer",
"minimum": 0,
"description": "Total DAFSA edges across the root and every block."
},
"n_sequences": {
"type": "integer",
"minimum": 0,
"description": "Number of accepted sequences (rats). Equal to root.count."
},
"max_indexed_length": {
"type": "integer",
"minimum": 0,
"description": "Maximum length of any accepted rat (= maximum label value of any root edge). Lets clients short-circuit queries for longer inputs without walking the automaton."
},
"root": {
"type": "object",
"description": "DAFSA root state (state 0), inlined into the manifest rather than in a block file. Lifting the root out keeps every block file's bytes determined purely by inner-DAFSA structure, which is stable across forward extensions (adding longer rats only touches the root's edges).",
"required": ["count", "is_accept", "edges"],
"additionalProperties": false,
"properties": {
"count": {
"type": "integer",
"minimum": 0,
"description": "Number of accepted sequences reachable from the root. Equal to n_sequences."
},
"is_accept": {
"type": "boolean",
"description": "Whether the root itself accepts. For a tilezz-rat-dafsa (length-prefixed) this is always false."
},
"edges": {
"type": "array",
"description": "Root outgoing edges. For a tilezz-rat-dafsa each edge.label is a length byte; edge.target is the entry state of that length class.",
"items": {
"type": "object",
"required": ["label", "target"],
"additionalProperties": false,
"properties": {
"label": {
"type": "integer",
"minimum": -128,
"maximum": 127,
"description": "Edge label as a signed 8-bit integer."
},
"target": {
"type": "integer",
"minimum": 1,
"description": "Target state id (>= 1; the root never points at itself)."
}
}
}
}
}
},
"blocks": {
"type": "array",
"description": "Content-addressed block index. Sorted strictly ascending by 'first_state'. Block N covers states [blocks[N].first_state, blocks[N+1].first_state); the last block covers up to n_states. Filenames are SHA-256 hashes of the gzipped block bytes (= cache key), so any two assets that contain byte-identical blocks share URLs and CDN cache entries.",
"minItems": 0,
"items": {
"type": "object",
"required": ["first_state", "sha256", "size"],
"additionalProperties": false,
"properties": {
"first_state": {
"type": "integer",
"minimum": 1,
"description": "Smallest state id this block contains. blocks[0].first_state MUST be 1 (the root is in 'root', not in a block)."
},
"sha256": {
"type": "string",
"pattern": "^[0-9a-f]{64}$",
"description": "Lowercase hex SHA-256 of the gzipped block file. Doubles as the filename stem: the file lives at blocks/<sha256>.bin."
},
"size": {
"type": "integer",
"minimum": 0,
"description": "Gzipped block file size in bytes. Lets clients show progress / preallocate buffers before fetching."
}
}
}
},
"block_base_url": {
"type": "string",
"format": "uri",
"description": "Optional URL prefix for fetching block files. When present, clients resolve each block as `{block_base_url}{sha256}.bin` instead of `{asset_dir}/blocks/{sha256}.bin`. Lets the manifest live on one host (e.g. Pages) while block files live on another (e.g. a GitHub Release, S3, IPFS) -- the sha256 integrity check is unchanged, so the trust anchor remains the manifest URL. Absent in single-host assets; consumers MUST tolerate absence."
}
}
}