epub-stream 0.1.0

Memory-efficient EPUB parser for embedded systems
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
# epub-stream justfile

# Host target used for local simulator/CLI binaries.
host_target := `if [ -n "${HOST_TEST_TARGET:-}" ]; then \
    echo "$HOST_TEST_TARGET"; \
else \
    rustc -vV | awk '/^host: / { print $2 }'; \
fi`

# Format code
fmt:
    cargo fmt --all

# Check formatting without changes
fmt-check:
    cargo fmt --all -- --check

# Type-check (default dev target matrix).
check:
    cargo check --workspace --all-features

# Lint with clippy (single strict pass).
lint:
    cargo clippy --workspace --all-features -- -D warnings

# Unit tests (fast default loop).
test:
    cargo test --workspace --all-features --lib --bins

# Default developer loop: auto-format + check + lint + unit tests.
all:
    just fmt
    just check
    just lint
    just test

# CI add-on: run integration tests after baseline all.
ci:
    just all
    just test-integration

# Backward-compatible aliases.
strict:
    just all

harden:
    just all

# Integration tests (slower / broader; useful in CI).
test-integration:
    cargo test --workspace --all-features --tests

# Strict memory-focused linting for constrained targets.
#
# - no_std pass: enforce core/alloc import discipline.
# - render pass: ban convenience constructors that hide allocation intent.
lint-memory:
    just lint-memory-no-std
    just lint-memory-render

# no_std/alloc discipline checks (core path only).
lint-memory-no-std:
    cargo clippy --no-default-features --lib -- -D warnings -W clippy::alloc_instead_of_core -W clippy::std_instead_of_alloc -W clippy::std_instead_of_core

# Render crate allocation-intent checks.
lint-memory-render:
    cargo clippy -p epub-stream-render --lib --no-deps -- -D warnings -W clippy::disallowed_methods

# Check split render crates
render-check:
    cargo check -p epub-stream-render -p epub-stream-embedded-graphics -p epub-stream-render-web

# Lint split render crates
render-lint:
    cargo clippy -p epub-stream-render -p epub-stream-embedded-graphics -p epub-stream-render-web --all-targets -- -D warnings -A clippy::disallowed_methods

# Test split render crates
render-test:
    cargo test -p epub-stream-render -p epub-stream-embedded-graphics -p epub-stream-render-web

# Run all split render crate checks
render-all:
    just render-check
    just render-lint
    just render-test

# Check no_std (no default features)
check-no-std:
    cargo check --no-default-features

# Run ignored tests
test-ignored:
    cargo test --all-features -- --ignored

# Run tests with output
test-verbose:
    cargo test --all-features -- --nocapture

# Run allocation count tests
test-alloc:
    cargo test --all-features --test allocation_tests -- --ignored --nocapture --test-threads=1

# Run embedded-focused suites (tiny budgets + reflow regression matrix).
test-embedded:
    cargo test --all-features --test embedded_mode_tests -- --ignored --nocapture
    cargo test -p epub-stream-embedded-graphics --test embedded_reflow_regression -- --nocapture

# Verify benchmark fixture corpus integrity
bench-fixtures-check:
    sha256sum -c tests/fixtures/bench/SHA256SUMS

# Build docs
doc:
    cargo doc --all-features --no-deps

# Build docs and fail on warnings
doc-check:
    RUSTDOCFLAGS="-D warnings" cargo doc --all-features --no-deps

# Build docs and open locally
doc-open:
    cargo doc --all-features --no-deps --open

# Build release
build:
    cargo build --release --all-features

# Check CLI build
cli-check:
    cargo check --features cli --bin epub-stream

# Run CLI
cli *args:
    cargo run --features cli --bin epub-stream -- {{args}}

# Render EPUB pages to PNG snapshots for local visual layout debugging.
#
# Usage:
#   just visualize
#   just visualize tests/fixtures/bench/pg84-frankenstein.epub 5 0 12 target/visualize-default
visualize epub="tests/fixtures/bench/pg84-frankenstein.epub" chapter="5" start="0" pages="12" out="target/visualize-default" cover_page_mode="contain":
    RUSTC_WRAPPER= cargo run -p epub-stream-embedded-graphics --bin visualize --target {{ host_target }} -- \
      {{epub}} \
      --chapter {{chapter}} \
      --start-page {{start}} \
      --pages {{pages}} \
      --out {{out}} \
      --cover-page-mode {{cover_page_mode}}

# Launch interactive web preview with live re-render API.
# Exposes primary typography + cover policy controls for quick e-reader tuning.
web-preview epub="tests/fixtures/bench/pg84-frankenstein.epub" port="42817" justify_mode="adaptive-inter-word" justify_max_space_stretch="0.45" cover_page_mode="contain":
    RUSTC_WRAPPER= cargo run -p epub-stream-render-web --bin web-preview -- \
      {{epub}} \
      --serve \
      --open \
      --port {{port}} \
      --justify-mode {{justify_mode}} \
      --justify-max-space-stretch {{justify_max_space_stretch}} \
      --cover-page-mode {{cover_page_mode}}

# Export standalone HTML preview snapshot (non-interactive reflow).
web-preview-export epub="tests/fixtures/bench/pg84-frankenstein.epub" out="target/web-preview/index.html" justify_mode="adaptive-inter-word" justify_max_space_stretch="0.45" cover_page_mode="contain":
    RUSTC_WRAPPER= cargo run -p epub-stream-render-web --bin web-preview -- \
      {{epub}} \
      --out {{out}} \
      --justify-mode {{justify_mode}} \
      --justify-max-space-stretch {{justify_max_space_stretch}} \
      --cover-page-mode {{cover_page_mode}}

# Chapter-scoped web preview variant.
web-preview-chapter epub="tests/fixtures/bench/pg84-frankenstein.epub" chapter="5" out="target/web-preview/chapter.html" justify_mode="adaptive-inter-word" justify_max_space_stretch="0.45" cover_page_mode="contain":
    RUSTC_WRAPPER= cargo run -p epub-stream-render-web --bin web-preview -- \
      {{epub}} \
      --chapter {{chapter}} \
      --out {{out}} \
      --justify-mode {{justify_mode}} \
      --justify-max-space-stretch {{justify_max_space_stretch}} \
      --cover-page-mode {{cover_page_mode}}

# One-command sane default render pass for local layout iteration.
visualize-default:
    just visualize

# Render with a constrained virtual-memory budget to catch large transient
# allocations locally before flashing firmware.
visualize-lowmem epub="tests/fixtures/bench/pg84-frankenstein.epub" chapter="5" start="0" pages="12" out="target/visualize-lowmem" vm_kib="180000" cover_page_mode="contain":
    RUSTC_WRAPPER= cargo build -p epub-stream-embedded-graphics --bin visualize --target {{ host_target }}
    bash -lc "ulimit -Sv {{vm_kib}}; target/{{host_target}}/debug/visualize {{epub}} --chapter {{chapter}} --start-page {{start}} --pages {{pages}} --out {{out}} --cover-page-mode {{cover_page_mode}}"

# Low-memory smoke suite for EPUB stability validation prior to flashing.
lowmem-confidence vm_kib="150000":
    just visualize-lowmem tests/fixtures/bench/pg84-frankenstein.epub 5 0 16 target/visualize-lowmem/frankenstein {{vm_kib}}
    just visualize-lowmem tests/fixtures/bench/pg1342-pride-and-prejudice.epub 7 0 16 target/visualize-lowmem/pride {{vm_kib}}
    just visualize-lowmem tests/fixtures/bench/pg1661-sherlock-holmes.epub 3 0 16 target/visualize-lowmem/sherlock {{vm_kib}}
    just visualize-lowmem tests/fixtures/bench/pg2701-moby-dick.epub 10 0 16 target/visualize-lowmem/moby {{vm_kib}}
    just visualize-lowmem tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub 1 0 10 target/visualize-lowmem/fundamental {{vm_kib}}

# Same as visualize, but with inter-word justification enabled.
visualize-justify epub="tests/fixtures/bench/pg84-frankenstein.epub" chapter="5" start="0" pages="12" out="target/visualize-justify":
    RUSTC_WRAPPER= cargo run -p epub-stream-embedded-graphics --bin visualize --target {{ host_target }} -- \
      {{epub}} \
      --chapter {{chapter}} \
      --start-page {{start}} \
      --pages {{pages}} \
      --out {{out}} \
      --justify

# Larger type profile for wrap/spacing validation.
visualize-large epub="tests/fixtures/bench/pg84-frankenstein.epub" chapter="5" start="0" pages="8" out="target/visualize-large":
    RUSTC_WRAPPER= cargo run -p epub-stream-embedded-graphics --bin visualize --target {{ host_target }} -- \
      {{epub}} \
      --chapter {{chapter}} \
      --start-page {{start}} \
      --pages {{pages}} \
      --out {{out}} \
      --font-size 28 \
      --line-gap 5 \
      --paragraph-gap 10

# Deterministic typography sweep for local golden-like visual review.
visualize-matrix epub="tests/fixtures/bench/pg84-frankenstein.epub" chapter="5" start="0" pages="6":
    just visualize {{epub}} {{chapter}} {{start}} {{pages}} target/visualize-matrix-default
    just visualize-justify {{epub}} {{chapter}} {{start}} {{pages}} target/visualize-matrix-justify
    just visualize-large {{epub}} {{chapter}} {{start}} {{pages}} target/visualize-matrix-large

# High-confidence typography gate:
# - run render-layout + typography regression tests
# - generate deterministic visual matrices for core Gutenberg fixtures
typography-confidence:
    cargo test -p epub-stream-render --tests
    cargo test -p epub-stream-render render_layout::tests:: -- --nocapture
    just visualize tests/fixtures/bench/pg84-frankenstein.epub 5 0 8 target/visualize-confidence/frankenstein/default
    just visualize-justify tests/fixtures/bench/pg84-frankenstein.epub 5 0 8 target/visualize-confidence/frankenstein/justify
    just visualize-large tests/fixtures/bench/pg84-frankenstein.epub 5 0 6 target/visualize-confidence/frankenstein/large
    just visualize tests/fixtures/bench/pg1342-pride-and-prejudice.epub 7 0 8 target/visualize-confidence/pride/default
    just visualize-justify tests/fixtures/bench/pg1342-pride-and-prejudice.epub 7 0 8 target/visualize-confidence/pride/justify
    just visualize-large tests/fixtures/bench/pg1342-pride-and-prejudice.epub 7 0 6 target/visualize-confidence/pride/large
    just visualize tests/fixtures/bench/pg1661-sherlock-holmes.epub 3 0 8 target/visualize-confidence/sherlock/default
    just visualize-justify tests/fixtures/bench/pg1661-sherlock-holmes.epub 3 0 8 target/visualize-confidence/sherlock/justify
    just visualize-large tests/fixtures/bench/pg1661-sherlock-holmes.epub 3 0 6 target/visualize-confidence/sherlock/large

# Deterministic reflow/config regression harness for reader controls.
render-regression:
    cargo test -p epub-stream-render --test corpus_regression_harness
    cargo test -p epub-stream-render --test typography_regression
    cargo test -p epub-stream-embedded-graphics --test embedded_reflow_regression
    cargo test -p epub-stream-render --test docs
    cargo test -p epub-stream-render-web --bin web-preview

# Focused embedded reflow regression harness.
embedded-reflow-regression:
    cargo test -p epub-stream-embedded-graphics --test embedded_reflow_regression -- --nocapture

# Focused low-RAM loop verification inside the embedded regression harness.
embedded-low-ram-matrix:
    cargo test -p epub-stream-embedded-graphics --test embedded_reflow_regression embedded_low_ram_reflow_and_page_turn_loops_are_stable -- --nocapture

# Focused budget/telemetry coverage inside the embedded regression harness.
embedded-budget-telemetry:
    cargo test -p epub-stream-embedded-graphics --test embedded_reflow_regression embedded_renderer_budget_diagnostics_cover_limit_and_fallback_paths -- --nocapture

# Bootstrap external test datasets (not committed)
dataset-bootstrap:
    ./scripts/datasets/bootstrap.sh

# Bootstrap with explicit Gutenberg IDs (space-separated)
dataset-bootstrap-gutenberg *ids:
    ./scripts/datasets/bootstrap.sh {{ids}}

# List all discovered dataset EPUB files
dataset-list:
    ./scripts/datasets/list_epubs.sh

# Validate all dataset EPUB files
dataset-validate:
    @cargo build --features cli --bin epub-stream
    ./scripts/datasets/validate.sh --expectations scripts/datasets/expectations.tsv

# Validate only Gutenberg EPUB corpus under tests/datasets/wild/gutenberg.
dataset-validate-gutenberg:
    @cargo build --features cli --bin epub-stream
    DATASET_ROOT="${EPUB_STREAM_DATASET_DIR:-tests/datasets}" && \
    ./scripts/datasets/validate.sh --dataset-dir "$DATASET_ROOT/wild/gutenberg" --expectations scripts/datasets/expectations.tsv

# Validate only Gutenberg EPUB corpus in strict mode.
dataset-validate-gutenberg-strict:
    @cargo build --features cli --bin epub-stream
    DATASET_ROOT="${EPUB_STREAM_DATASET_DIR:-tests/datasets}" && \
    ./scripts/datasets/validate.sh --strict --dataset-dir "$DATASET_ROOT/wild/gutenberg" --expectations scripts/datasets/expectations.tsv

# Time Gutenberg corpus smoke path (validate + chapters + first chapter text).
dataset-profile-gutenberg:
    @cargo build --release --features cli --bin epub-stream
    EPUB_STREAM_CLI_BIN=target/release/epub-stream ./scripts/datasets/gutenberg_smoke.sh

# Time Gutenberg corpus smoke path in strict validation mode.
dataset-profile-gutenberg-strict:
    @cargo build --release --features cli --bin epub-stream
    EPUB_STREAM_CLI_BIN=target/release/epub-stream ./scripts/datasets/gutenberg_smoke.sh --strict

# Full pre-flash gate including local Gutenberg corpus (if bootstrapped).
harden-gutenberg:
    just all
    just dataset-validate-gutenberg
    just dataset-profile-gutenberg

# Validate all dataset EPUB files in strict mode (warnings fail too)
dataset-validate-strict:
    @cargo build --features cli --bin epub-stream
    ./scripts/datasets/validate.sh --strict --expectations scripts/datasets/expectations.tsv

# Validate against expectation manifest (default mode)
dataset-validate-expected:
    @cargo build --features cli --bin epub-stream
    ./scripts/datasets/validate.sh --expectations scripts/datasets/expectations.tsv

# Validate against expectation manifest in strict mode
dataset-validate-expected-strict:
    @cargo build --features cli --bin epub-stream
    ./scripts/datasets/validate.sh --strict --expectations scripts/datasets/expectations.tsv

# Raw validate mode (every file must pass validation)
dataset-validate-raw:
    @cargo build --features cli --bin epub-stream
    ./scripts/datasets/validate.sh

# Raw strict validate mode (warnings fail too)
dataset-validate-raw-strict:
    @cargo build --features cli --bin epub-stream
    ./scripts/datasets/validate.sh --strict

# Validate a small, CI-ready mini corpus from a manifest
dataset-validate-mini:
    @cargo build --features cli --bin epub-stream
    ./scripts/datasets/validate.sh --manifest tests/datasets/manifest-mini.tsv

# Run benchmarks and save latest CSV report
bench:
    @mkdir -p target/bench
    @cargo bench --bench epub_bench --all-features | tee target/bench/latest.csv

# Check no_std + layout
check-no-std-layout:
    cargo check --no-default-features --features layout

# MSRV check (matches Cargo.toml rust-version)
check-msrv:
    cargo +1.85.0 check --all-features

# Clean build artifacts
clean:
    cargo clean

# Crates.io publish order (dependency-aware).
publish-order:
    @echo "epub-stream epub-stream-render epub-stream-embedded-graphics epub-stream-render-web"

# Local package sanity check for one crate.
package crate:
    RUSTC_WRAPPER= cargo package -p {{crate}}

# Local package sanity check without dependency verification (for unpublished local deps).
package-no-verify crate:
    RUSTC_WRAPPER= cargo package -p {{crate}} --no-verify

# Local package sanity check for all crates in publish order.
package-all:
    just package epub-stream
    just package-no-verify epub-stream-render
    just package-no-verify epub-stream-embedded-graphics
    just package-no-verify epub-stream-render-web

# Dry-run publish for one crate.
publish-dry-run crate:
    RUSTC_WRAPPER= cargo publish -p {{crate}} --dry-run

# Dry-run publish without dependency verification (for unpublished local deps).
publish-dry-run-no-verify crate:
    RUSTC_WRAPPER= cargo publish -p {{crate}} --dry-run --no-verify

# Dry-run publish for all crates in dependency order.
publish-dry-run-all:
    just publish-dry-run epub-stream
    just publish-dry-run-no-verify epub-stream-render
    just publish-dry-run-no-verify epub-stream-embedded-graphics
    just publish-dry-run-no-verify epub-stream-render-web

# Full release preflight before publishing.
release-preflight:
    just ci
    just package-all
    just publish-dry-run-all

# Publish all crates to crates.io in dependency order.
# Requires CARGO_REGISTRY_TOKEN to be configured.
publish-all:
    @bash -eu -o pipefail -c '\
      crates="epub-stream epub-stream-render epub-stream-embedded-graphics epub-stream-render-web"; \
      for c in $crates; do \
        echo "Publishing $$c..."; \
        cargo publish -p "$$c"; \
        sleep 30; \
      done'