llm-git 3.4.0

AI-powered git commit message generator using Claude and other LLMs via OpenAI-compatible APIs
Documentation
//! Build script: codegen static lookup tables for `src/validation.rs` from the
//! single-source-of-truth JSON (`src/validation_data.json`).
//!
//! Emitting `&'static` slices at compile time keeps `present_to_past`/
//! `is_past_tense_verb` allocation-free (no runtime parse) and lets the JSON
//! stay the only place the verb data lives.

use std::{collections::HashSet, env, fmt::Write as _, fs, path::Path};

const DATA: &str = "src/validation_data.json";

fn main() {
   println!("cargo:rerun-if-changed={DATA}");
   println!("cargo:rerun-if-changed=build.rs");

   let json = fs::read_to_string(DATA).unwrap_or_else(|e| panic!("read {DATA}: {e}"));
   let data: serde_json::Value =
      serde_json::from_str(&json).unwrap_or_else(|e| panic!("parse {DATA}: {e}"));

   let mut out = String::new();
   out.push_str("// @generated by build.rs from src/validation_data.json - do not edit.\n");

   // `past_tense`: array of [present, past] pairs -> `&[(&str, &str)]`.
   // Also derive the "unchanged irregular" set (entries where present == past):
   // `is_past_tense_verb` deliberately skips those in its map check, so they
   // must land in IRREGULAR_PAST to stay accepted.
   let pairs = data["past_tense"]
      .as_array()
      .expect("past_tense must be an array");
   let mut irregular: Vec<&str> = Vec::new();
   let mut seen_present: HashSet<&str> = HashSet::new();
   out.push_str("pub const PAST_TENSE_MAP: &[(&str, &str)] = &[\n");
   for pair in pairs {
      let arr = pair.as_array().expect("past_tense entry must be [present, past]");
      assert_eq!(arr.len(), 2, "past_tense entry must have exactly 2 elements: {pair}");
      let present = lc(arr[0].as_str().expect("present must be a string"), "past_tense present");
      assert!(
         seen_present.insert(present),
         "duplicate present key in past_tense: {present:?}"
      );
      let past = lc(arr[1].as_str().expect("past must be a string"), "past_tense past");
      assert!(
         !past.contains(' '),
         "past form must be a single token, got {past:?}"
      );
      writeln!(out, "   ({present:?}, {past:?}),").unwrap();
      if present == past {
         irregular.push(past);
      }
   }
   out.push_str("];\n");

   // Standalone irregular pasts (no present key) merged with derived ones.
   for s in data["irregular_past"]
      .as_array()
      .expect("irregular_past must be an array")
   {
      irregular.push(lc(s.as_str().expect("irregular_past entry must be a string"), "irregular_past"));
   }
   irregular.sort_unstable();
   irregular.dedup();

   // Invariant: an accepted irregular (derived `key == value` or explicit)
   // must never sit in a morphology blocklist, since `is_past_tense_verb`
   // checks the blocklists before IRREGULAR_PAST and would otherwise reject it.
   let blocklisted: HashSet<&str> = ["ed_blocklist", "d_blocklist"]
      .iter()
      .flat_map(|k| {
         data[*k]
            .as_array()
            .unwrap_or_else(|| panic!("{k} must be an array"))
            .iter()
            .map(|v| v.as_str().expect("blocklist entry must be a string"))
      })
      .collect();
   for &irr in &irregular {
      assert!(
         !blocklisted.contains(irr),
         "irregular {irr:?} must not appear in a morphology blocklist"
      );
   }
   emit_slice(&mut out, "IRREGULAR_PAST", irregular.into_iter());

   for (name, key) in [
      ("ED_BLOCKLIST", "ed_blocklist"),
      ("D_BLOCKLIST", "d_blocklist"),
      ("CODE_EXTENSIONS", "code_extensions"),
      ("DOC_EXTENSIONS", "doc_extensions"),
      ("FILLER_WORDS", "filler_words"),
      ("META_PHRASES", "meta_phrases"),
      ("BODY_PRESENT_TENSE", "body_present_tense"),
   ] {
      let arr = data[key]
         .as_array()
         .unwrap_or_else(|| panic!("{key} must be an array"));
      emit_slice(
         &mut out,
         name,
         arr.iter()
            .map(|v| lc(v.as_str().unwrap_or_else(|| panic!("{key} entries must be strings")), key)),
      );
   }

   let out_dir = env::var("OUT_DIR").expect("OUT_DIR not set");
   let dest = Path::new(&out_dir).join("validation_data.rs");
   fs::write(&dest, out).unwrap_or_else(|e| panic!("write {}: {e}", dest.display()));
}

/// Emit `const {name}: &[&str] = &[ ... ];` with Rust-escaped string literals.
fn emit_slice<'a>(out: &mut String, name: &str, items: impl Iterator<Item = &'a str>) {
   writeln!(out, "const {name}: &[&str] = &[").unwrap();
   for item in items {
      writeln!(out, "   {item:?},").unwrap();
   }
   out.push_str("];\n");
}

/// Assert a JSON string is lowercase and trimmed: most lookups compare against
/// lowercased input, so a stray `Load` or trailing space would silently no-op.
fn lc<'a>(s: &'a str, ctx: &str) -> &'a str {
   assert!(
      s == s.trim() && s == s.to_lowercase(),
      "{ctx} value must be lowercase and trimmed: {s:?}"
   );
   s
}