SHELL := /bin/bash
.DEFAULT_GOAL := help
ROOT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
PDFIUM_LIB := $(ROOT_DIR)libpdfium.dylib
RELEASE_BIN := $(ROOT_DIR)target/release/pdf2md
DEBUG_BIN := $(ROOT_DIR)target/debug/pdf2md
TEST_DIR := $(ROOT_DIR)test_cases
OUT_DIR := $(ROOT_DIR)test_cases/output
BIN ?= $(RELEASE_BIN)
PDF_ARXIV := $(TEST_DIR)/attention_is_all_you_need.pdf
PDF_IRS := $(TEST_DIR)/irs_form_1040.pdf
PDF_NEURO := $(TEST_DIR)/neuroscience_textbook.pdf
PDF_TEXT := $(TEST_DIR)/sample_text.pdf
BOLD := \033[1m
GREEN := \033[0;32m
CYAN := \033[0;36m
YELLOW:= \033[0;33m
RED := \033[0;31m
RESET := \033[0m
PDF2MD := DYLD_LIBRARY_PATH=$(ROOT_DIR) EDGEQUAKE_LLM_PROVIDER=openai EDGEQUAKE_MODEL=gpt-4.1-nano $(BIN)
.PHONY: help
help:
@printf "$(BOLD)edgequake-pdf2md — Developer Makefile$(RESET)\n\n"
@printf "$(CYAN)Build targets:$(RESET)\n"
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
awk 'BEGIN {FS = ":.*?## "}; /^[a-zA-Z]/ {printf " $(GREEN)%-22s$(RESET) %s\n", $$1, $$2}' | \
sort
@printf "\n$(CYAN)Environment variables:$(RESET)\n"
@printf " $(YELLOW)EDGEQUAKE_PROVIDER$(RESET) LLM provider (e.g. openai, anthropic, gemini)\n"
@printf " $(YELLOW)EDGEQUAKE_MODEL$(RESET) LLM model ID (default: gpt-4.1-nano)\n"
@printf " $(YELLOW)PDF2MD_PAGES$(RESET) Pages to convert (all|N|M-N|N,M,...)\n"
@printf " $(YELLOW)BIN$(RESET) Path to pdf2md binary (default: release)\n"
@printf "\n$(CYAN)Quick start:$(RESET)\n"
@printf " make setup @printf " make build @printf " make ci @printf " make pre-publish @printf " make demo @printf " make test-e2e
.PHONY: setup
setup: check-pdfium check-api-key
.PHONY: check-pdfium
check-pdfium:
@if [ ! -f "$(PDFIUM_LIB)" ]; then \
printf "$(YELLOW)pdfium library not found — running setup-pdfium.sh...$(RESET)\n"; \
bash $(ROOT_DIR)scripts/setup-pdfium.sh --install-dir $(ROOT_DIR); \
else \
printf "$(GREEN)✓ pdfium library present ($(shell ls -lh $(PDFIUM_LIB) | awk '{print $$5}'))$(RESET)\n"; \
fi
.PHONY: check-api-key
check-api-key:
@if [ -z "$$OPENAI_API_KEY" ] && [ -z "$$ANTHROPIC_AUTH_TOKEN" ] && [ -z "$$GEMINI_API_KEY" ]; then \
printf "$(RED)✗ No API key found$(RESET)\n"; \
printf " Set OPENAI_API_KEY, ANTHROPIC_AUTH_TOKEN, or GEMINI_API_KEY\n"; \
exit 1; \
else \
printf "$(GREEN)✓ LLM API key present"; \
[ -n "$$OPENAI_API_KEY" ] && printf " (OpenAI)"; \
[ -n "$$ANTHROPIC_AUTH_TOKEN" ] && printf " (Anthropic)"; \
[ -n "$$GEMINI_API_KEY" ] && printf " (Gemini)"; \
printf "$(RESET)\n"; \
fi
.PHONY: build
build:
@printf "$(BOLD)Building release binary...$(RESET)\n"
cargo build --release --features cli
@cp $(PDFIUM_LIB) target/release/libpdfium.dylib 2>/dev/null || true
@printf "$(GREEN)✓ Built: $(RELEASE_BIN)$(RESET)\n"
.PHONY: build-dev
build-dev:
@printf "$(BOLD)Building debug binary...$(RESET)\n"
cargo build --features cli
@cp $(PDFIUM_LIB) target/debug/libpdfium.dylib 2>/dev/null || true
@printf "$(GREEN)✓ Built: $(DEBUG_BIN)$(RESET)\n"
.PHONY: clean
clean:
cargo clean
@rm -f target/release/libpdfium.dylib target/debug/libpdfium.dylib
@printf "$(GREEN)✓ Cleaned$(RESET)\n"
.PHONY: inspect-all
inspect-all: check-pdfium
@printf "$(BOLD)Inspecting test PDFs...$(RESET)\n"
@for f in $(TEST_DIR)/*.pdf; do \
printf "\n$(CYAN)── $$f $(RESET)\n"; \
$(PDF2MD) --inspect-only "$$f" 2>&1; \
done
.PHONY: inspect
inspect: ## Inspect a specific PDF: make inspect PDF=path/to/file.pdf
@[ -n "$(PDF)" ] || (printf "$(RED)Usage: make inspect PDF=path/to/file.pdf$(RESET)\n"; exit 1)
$(PDF2MD) --inspect-only "$(PDF)" 2>&1
.PHONY: demo
demo: check-pdfium
@printf "$(BOLD)Converting page 1 of Attention Is All You Need...$(RESET)\n"
$(PDF2MD) --pages 1 "$(PDF_ARXIV)" 2>&1
.PHONY: demo-irs
demo-irs: check-pdfium
@printf "$(BOLD)Converting page 1 of IRS Form 1040...$(RESET)\n"
$(PDF2MD) --pages 1 "$(PDF_IRS)" 2>&1
.PHONY: demo-neuro
demo-neuro: check-pdfium
@printf "$(BOLD)Converting page 1 of neuroscience textbook...$(RESET)\n"
$(PDF2MD) --pages 1 "$(PDF_NEURO)" 2>&1
.PHONY: demo-url
demo-url: check-pdfium
@printf "$(BOLD)Converting page 1 from a URL...$(RESET)\n"
$(PDF2MD) --pages 1 "https://arxiv.org/pdf/1706.03762" 2>&1
.PHONY: convert-all
convert-all: check-pdfium $(OUT_DIR)
@printf "$(BOLD)Converting all test PDFs (pages 1-3)...$(RESET)\n"
$(PDF2MD) --pages 1-3 --output $(OUT_DIR)/attention_is_all_you_need.md "$(PDF_ARXIV)" 2>&1
$(PDF2MD) --pages 1-2 --output $(OUT_DIR)/irs_form_1040.md "$(PDF_IRS)" 2>&1
$(PDF2MD) --pages 1-3 --output $(OUT_DIR)/neuroscience_textbook.md "$(PDF_NEURO)" 2>&1
$(PDF2MD) --pages 1-2 --output $(OUT_DIR)/sample_text.md "$(PDF_TEXT)" 2>&1
@printf "$(GREEN)✓ Outputs written to $(OUT_DIR)/$(RESET)\n"
.PHONY: convert-paper
convert-paper: check-pdfium $(OUT_DIR)
@printf "$(BOLD)Converting Attention Is All You Need (all 15 pages)...$(RESET)\n"
$(PDF2MD) --pages all --output $(OUT_DIR)/attention_full.md \
--separator hr --metadata "$(PDF_ARXIV)" 2>&1
.PHONY: convert-form
convert-form: check-pdfium $(OUT_DIR)
@printf "$(BOLD)Converting IRS Form 1040...$(RESET)\n"
$(PDF2MD) --pages all --output $(OUT_DIR)/irs_form_1040_full.md \
--separator hr "$(PDF_IRS)" 2>&1
$(OUT_DIR):
@mkdir -p $(OUT_DIR)
.PHONY: test
test:
cargo test 2>&1
.PHONY: test-e2e
test-e2e: check-pdfium check-api-key build
@printf "$(BOLD)Running e2e tests...$(RESET)\n"
DYLD_LIBRARY_PATH=$(ROOT_DIR) EDGEQUAKE_LLM_PROVIDER=openai EDGEQUAKE_MODEL=gpt-4.1-nano E2E_ENABLED=1 \
cargo test --test e2e -- --nocapture 2>&1
.PHONY: test-e2e-verbose
test-e2e-verbose: check-pdfium check-api-key build
@printf "$(BOLD)Running e2e tests (verbose)...$(RESET)\n"
DYLD_LIBRARY_PATH=$(ROOT_DIR) EDGEQUAKE_LLM_PROVIDER=openai EDGEQUAKE_MODEL=gpt-4.1-nano E2E_ENABLED=1 RUST_LOG=debug \
cargo test --test e2e -- --nocapture 2>&1
.PHONY: test-all
test-all: test test-e2e
.PHONY: lint
lint:
cargo clippy --all-features -- -D warnings 2>&1
.PHONY: fmt
fmt:
cargo fmt 2>&1
.PHONY: fmt-check
fmt-check:
cargo fmt --check 2>&1
.PHONY: doc
doc:
cargo doc --no-deps --open 2>&1
.PHONY: doc-test
doc-test:
cargo test --doc 2>&1
.PHONY: audit
audit:
@command -v cargo-audit >/dev/null 2>&1 || (printf "$(YELLOW)Installing cargo-audit...$(RESET)\n" && cargo install cargo-audit)
cargo audit 2>&1
.PHONY: ci
ci: fmt-check lint test doc-test
.PHONY: ci-all
ci-all: fmt-check lint test doc-test audit build
.PHONY: pre-publish
pre-publish:
@bash scripts/pre-publish-check.sh
.PHONY: pre-publish-check-version
pre-publish-check-version:
@bash scripts/pre-publish-check.sh --version v$(shell grep '^version = ' Cargo.toml | head -1 | sed 's/version = "//' | sed 's/".*//')
.PHONY: msrv
msrv:
@printf "$(BOLD)Checking MSRV (1.80)...$(RESET)\n"
rustup toolchain install 1.80 --profile minimal 2>/dev/null || true
cargo +1.80 check --all-features 2>&1
@printf "$(GREEN)✓ MSRV check passed$(RESET)\n"
.PHONY: install
install: build
cargo install --path . --features cli 2>&1
@printf "$(GREEN)✓ pdf2md installed to ~/.cargo/bin$(RESET)\n"
.PHONY: bench-page
bench-page: check-pdfium
@printf "$(BOLD)Benchmarking single-page conversion...$(RESET)\n"
time $(PDF2MD) --pages 1 "$(PDF_ARXIV)" >/dev/null 2>&1
.PHONY: view-output
view-output:
@ls $(OUT_DIR) 2>/dev/null && open $(OUT_DIR) || printf "$(YELLOW)No output yet — run: make convert-all$(RESET)\n"
.PHONY: download-test-pdfs
download-test-pdfs:
@printf "$(BOLD)Downloading test PDFs...$(RESET)\n"
@mkdir -p $(TEST_DIR)
curl -fSL "https://arxiv.org/pdf/1706.03762" -o $(TEST_DIR)/attention_is_all_you_need.pdf && printf "$(GREEN)✓ Attention paper$(RESET)\n"
curl -fSL "https://www.irs.gov/pub/irs-pdf/f1040.pdf" -o $(TEST_DIR)/irs_form_1040.pdf && printf "$(GREEN)✓ IRS Form 1040$(RESET)\n"
curl -fSL "https://css4.pub/2015/textbook/somatosensory.pdf" -o $(TEST_DIR)/neuroscience_textbook.pdf && printf "$(GREEN)✓ Neuroscience textbook$(RESET)\n"
curl -fSL "https://freetestdata.com/wp-content/uploads/2021/09/Free_Test_Data_1MB_PDF.pdf" -o $(TEST_DIR)/sample_text.pdf && printf "$(GREEN)✓ Sample text PDF$(RESET)\n"
.PHONY: info
info:
@printf "$(BOLD)Project:$(RESET) edgequake-pdf2md\n"
@printf "$(BOLD)Root:$(RESET) $(ROOT_DIR)\n"
@printf "$(BOLD)Binary:$(RESET) $(BIN)\n"
@printf "$(BOLD)pdfium:$(RESET) "
@[ -f "$(PDFIUM_LIB)" ] && printf "$(GREEN)present$(RESET) $(shell ls -lh $(PDFIUM_LIB) | awk '{print $$5}')" || printf "$(RED)MISSING$(RESET)"
@printf "\n$(BOLD)Test PDFs:$(RESET) $(shell ls -1 $(TEST_DIR)/*.pdf 2>/dev/null | wc -l | tr -d ' ') files\n"
@printf "$(BOLD)Provider:$(RESET) $${EDGEQUAKE_PROVIDER:-auto-detect}\n"
@printf "$(BOLD)Model:$(RESET) $${EDGEQUAKE_MODEL:-provider default}\n"