anno 0.3.1

Information extraction for Rust: Named Entity Recognition (NER), coreference resolution, and structured extraction
Documentation
# Anno environment configuration
# Copy to .env and fill in values

# HuggingFace token (for gated datasets like MultiCoNER v2)
# Get from: https://huggingface.co/settings/tokens
HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

# S3 cache configuration
# Naming: arc (global namespace) -> anno (project) -> data
ANNO_S3_CACHE=1
ANNO_S3_BUCKET=arc-anno-data

# Cache directory (optional, defaults to platform cache: ~/Library/Caches/anno on macOS,
# ~/.cache/anno on Linux). Used by: dataset loader, muxer state, and eval history (as fallback).
# Set to ~/.anno_cache to match CI (GitHub Actions caches this directory).
# ANNO_CACHE_DIR=~/.anno_cache

# Eval history JSONL path (optional; defaults to ANNO_CACHE_DIR/eval-results.jsonl, then platform
# cache). Used by: TaskEvaluator (CLI benchmark), matrix harness Estimate/WorstFirst strategies,
# regression detection (ANNO_CHECK_REGRESSIONS).
# ANNO_EVAL_HISTORY=~/.anno_cache/eval-results.jsonl

# Muxer history file override (optional; defaults to ANNO_CACHE_DIR/muxer_history.<slice>.json).
# Useful for isolating local runs from CI state.
# ANNO_HISTORY_FILE=/tmp/my-muxer-history.json

# LinUCB global state override (optional; defaults to ANNO_CACHE_DIR/linucb_global_state.json).
# ANNO_LINUCB_STATE_FILE=/tmp/my-linucb-state.json

# Quality weight for MAB selection (0.0 = disabled, default).
# When > 0, blends the continuous F1 quality signal (Outcome.quality_score) into the
# MAB objective alongside junk/ok/cost signals. Requires quality scores to be populated
# (they are set automatically in the matrix harness from primary_f1).
# ANNO_MUXER_QUALITY_WEIGHT=0.1

# Logging (optional)
# RUST_LOG=anno=debug

# ONNX Runtime (optional)
# ORT_DYLIB_PATH=/path/to/libonnxruntime.so