use std::collections::HashMap;
use std::env;
use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::Path;
#[path = "src/types.rs"]
mod types;
use types::{Journal, Law, Reporter};
fn sanitize_identifier(s: &str) -> String {
s.chars()
.map(|c| match c {
'a'..='z' | 'A'..='Z' | '0'..='9' => c,
_ => '_',
})
.collect::<String>()
.trim_matches('_')
.to_string()
}
fn escape_string(s: &str) -> String {
s.replace("\\", "\\\\")
.replace("\"", "\\\"")
.replace("\n", "\\n")
.replace("\r", "\\r")
.replace("\t", "\\t")
}
fn main() {
let out_dir = env::var_os("OUT_DIR").unwrap();
let dest_path = Path::new(&out_dir).join("generated.rs");
let mut file = BufWriter::new(File::create(&dest_path).unwrap());
writeln!(file, "// This file is auto-generated by build.rs").unwrap();
writeln!(
file,
"// The types generated here use static references for zero-copy access"
)
.unwrap();
writeln!(
file,
"// They are distinct from the owned types in types.rs used for JSON parsing"
)
.unwrap();
writeln!(file).unwrap();
writeln!(file, "use crate::types::CiteType;").unwrap();
writeln!(file).unwrap();
generate_reporters(&mut file);
generate_journals(&mut file);
generate_laws(&mut file);
generate_state_abbreviations(&mut file);
generate_case_name_abbreviations(&mut file);
generate_regexes(&mut file);
println!("cargo:rerun-if-changed=reporters_db/data/");
}
fn generate_reporters(file: &mut BufWriter<File>) {
let json = include_str!("reporters_db/data/reporters.json");
let reporters: HashMap<String, Vec<Reporter>> = serde_json::from_str(json).unwrap();
let mut edition_map_names = Vec::new();
let mut variation_map_names = Vec::new();
let mut edition_counters = std::collections::HashMap::new();
let mut variation_counters = std::collections::HashMap::new();
for (key, reporter_list) in &reporters {
for reporter in reporter_list.iter() {
let sanitized_key = sanitize_identifier(key);
let base_editions_name = format!("EDITIONS_{}", sanitized_key);
let editions_counter = edition_counters
.entry(base_editions_name.clone())
.or_insert(0);
let editions_map_name = format!("{}_{}", base_editions_name, editions_counter);
*editions_counter += 1;
let base_variations_name = format!("VARIATIONS_{}", sanitized_key);
let variations_counter = variation_counters
.entry(base_variations_name.clone())
.or_insert(0);
let variations_map_name = format!("{}_{}", base_variations_name, variations_counter);
*variations_counter += 1;
writeln!(file, "#[allow(non_upper_case_globals)]").unwrap();
writeln!(
file,
"static {}: phf::Map<&'static str, Edition> = ",
editions_map_name
)
.unwrap();
let mut map = phf_codegen::Map::new();
for (edition_key, edition) in &reporter.editions {
let edition_str = format!(
"Edition {{ end: {}, start: {}, regexes: {} }}",
edition
.end
.as_ref()
.map(|s| format!("Some(\"{}\")", s))
.unwrap_or_else(|| "None".to_string()),
edition
.start
.as_ref()
.map(|s| format!("Some(\"{}\")", s))
.unwrap_or_else(|| "None".to_string()),
edition
.regexes
.as_ref()
.map(|v| {
format!(
"Some(&[{}])",
v.iter()
.map(|s| format!("\"{}\"", s.replace("\\", "\\\\").replace("\"", "\\\"")))
.collect::<Vec<_>>()
.join(", ")
)
})
.unwrap_or_else(|| "None".to_string())
);
map.entry(edition_key.as_str(), edition_str);
}
writeln!(file, "{};", map.build()).unwrap();
writeln!(file).unwrap();
if !reporter.variations.is_empty() {
writeln!(file, "#[allow(non_upper_case_globals)]").unwrap();
writeln!(
file,
"static {}: phf::Map<&'static str, &'static str> = ",
variations_map_name
)
.unwrap();
let mut map = phf_codegen::Map::new();
for (var_key, var_val) in &reporter.variations {
map.entry(var_key.as_str(), format!("\"{}\"", var_val));
}
writeln!(file, "{};", map.build()).unwrap();
writeln!(file).unwrap();
variation_map_names.push(variations_map_name.clone());
} else {
variation_map_names.push(String::new());
}
edition_map_names.push(editions_map_name);
}
}
writeln!(
file,
"/// A legal court reporter containing metadata about editions, variations, and citations."
)
.unwrap();
writeln!(file, "///").unwrap();
writeln!(
file,
"/// Reporters are collections of legal publications that contain court decisions."
)
.unwrap();
writeln!(
file,
"/// Each reporter has multiple editions (like A., A.2d, A.3d) that represent different"
)
.unwrap();
writeln!(file, "/// time periods or series of the same publication.").unwrap();
writeln!(file, "///").unwrap();
writeln!(file, "/// # Examples").unwrap();
writeln!(file, "///").unwrap();
writeln!(file, "/// ```rust").unwrap();
writeln!(file, "/// use reporters_db::get_reporters;").unwrap();
writeln!(file, "///").unwrap();
writeln!(file, "/// let reporters = get_reporters();").unwrap();
writeln!(
file,
"/// if let Some(reporter_list) = reporters.get(\"A.2d\") {{"
)
.unwrap();
writeln!(file, "/// for reporter in reporter_list.iter() {{").unwrap();
writeln!(
file,
"/// println!(\"Reporter: {{}}\", reporter.name);"
)
.unwrap();
writeln!(
file,
"/// println!(\"Type: {{:?}}\", reporter.cite_type);"
)
.unwrap();
writeln!(file, "/// }}").unwrap();
writeln!(file, "/// }}").unwrap();
writeln!(file, "/// ```").unwrap();
writeln!(file, "#[derive(Debug, Clone)]").unwrap();
writeln!(file, "pub struct Reporter {{").unwrap();
writeln!(
file,
" /// The type of legal citation this reporter represents."
)
.unwrap();
writeln!(file, " pub cite_type: CiteType,").unwrap();
writeln!(
file,
" /// Map of edition abbreviations to their metadata (dates, regexes, etc.)."
)
.unwrap();
writeln!(
file,
" pub editions: &'static phf::Map<&'static str, Edition>,"
)
.unwrap();
writeln!(
file,
" /// Jurisdictions covered by this reporter (for citation processing)."
)
.unwrap();
writeln!(file, " pub mlz_jurisdiction: &'static [&'static str],").unwrap();
writeln!(
file,
" /// Full name of the reporter (e.g., \"Atlantic Reporter\")."
)
.unwrap();
writeln!(file, " pub name: &'static str,").unwrap();
writeln!(
file,
" /// Optional map of variation names to canonical abbreviations."
)
.unwrap();
writeln!(
file,
" pub variations: Option<&'static phf::Map<&'static str, &'static str>>,"
)
.unwrap();
writeln!(
file,
" /// Optional URL reference for more information about this reporter."
)
.unwrap();
writeln!(file, " pub href: Option<&'static str>,").unwrap();
writeln!(file, "}}").unwrap();
writeln!(file).unwrap();
writeln!(
file,
"/// A specific edition or series of a legal reporter."
)
.unwrap();
writeln!(file, "///").unwrap();
writeln!(
file,
"/// Each reporter can have multiple editions representing different time periods"
)
.unwrap();
writeln!(
file,
"/// or series. For example, the Atlantic Reporter has A. (1885-1938), A.2d (1938-2009),"
)
.unwrap();
writeln!(file, "/// and A.3d (2009-present) editions.").unwrap();
writeln!(file, "///").unwrap();
writeln!(file, "/// # Examples").unwrap();
writeln!(file, "///").unwrap();
writeln!(file, "/// ```rust").unwrap();
writeln!(file, "/// use reporters_db::get_reporters;").unwrap();
writeln!(file, "///").unwrap();
writeln!(file, "/// let reporters = get_reporters();").unwrap();
writeln!(
file,
"/// if let Some(reporter_list) = reporters.get(\"A.2d\") {{"
)
.unwrap();
writeln!(file, "/// for reporter in reporter_list.iter() {{").unwrap();
writeln!(
file,
"/// for (abbrev, edition) in reporter.editions {{"
)
.unwrap();
writeln!(
file,
"/// if let (Some(start), Some(end)) = (edition.start, edition.end) {{"
)
.unwrap();
writeln!(
file,
"/// println!(\"{{}} ran from {{}} to {{}}\", abbrev, start, end);"
)
.unwrap();
writeln!(file, "/// }}").unwrap();
writeln!(file, "/// }}").unwrap();
writeln!(file, "/// }}").unwrap();
writeln!(file, "/// }}").unwrap();
writeln!(file, "/// ```").unwrap();
writeln!(file, "#[derive(Debug, Clone)]").unwrap();
writeln!(file, "pub struct Edition {{").unwrap();
writeln!(
file,
" /// End date of this edition (ISO 8601 format), if known."
)
.unwrap();
writeln!(file, " pub end: Option<&'static str>,").unwrap();
writeln!(
file,
" /// Start date of this edition (ISO 8601 format), if known."
)
.unwrap();
writeln!(file, " pub start: Option<&'static str>,").unwrap();
writeln!(
file,
" /// Regular expressions for parsing citations in this edition."
)
.unwrap();
writeln!(file, " pub regexes: Option<&'static [&'static str]>,").unwrap();
writeln!(file, "}}").unwrap();
writeln!(file).unwrap();
let mut reporter_arrays = Vec::new();
let mut edition_idx = 0;
let mut name_counters = std::collections::HashMap::new();
for (key, reporter_list) in &reporters {
let base_name = sanitize_identifier(key);
let counter = name_counters.entry(base_name.clone()).or_insert(0);
let array_name = if *counter == 0 {
format!("REPORTERS_{}", base_name)
} else {
format!("REPORTERS_{}_{}", base_name, counter)
};
*counter += 1;
writeln!(file, "#[allow(non_upper_case_globals)]").unwrap();
writeln!(file, "static {}: &[Reporter] = &[", array_name).unwrap();
for reporter in reporter_list {
let cite_type = format!("CiteType::{:?}", reporter.cite_type);
let mlz = reporter
.mlz_jurisdiction
.iter()
.map(|s| format!("\"{}\"", escape_string(s)))
.collect::<Vec<_>>()
.join(", ");
let variations = if !variation_map_names[edition_idx].is_empty() {
format!("Some(&{})", variation_map_names[edition_idx])
} else {
"None".to_string()
};
writeln!(file, " Reporter {{").unwrap();
writeln!(file, " cite_type: {},", cite_type).unwrap();
writeln!(
file,
" editions: &{},",
edition_map_names[edition_idx]
)
.unwrap();
writeln!(file, " mlz_jurisdiction: &[{}],", mlz).unwrap();
writeln!(file, " name: \"{}\",", escape_string(&reporter.name)).unwrap();
writeln!(file, " variations: {},", variations).unwrap();
writeln!(
file,
" href: {},",
reporter
.href
.as_ref()
.map(|s| format!("Some(\"{}\")", escape_string(s)))
.unwrap_or_else(|| "None".to_string())
)
.unwrap();
writeln!(file, " }},").unwrap();
edition_idx += 1;
}
writeln!(file, "];").unwrap();
writeln!(file).unwrap();
reporter_arrays.push((key.clone(), array_name));
}
writeln!(
file,
"pub static REPORTERS: phf::Map<&'static str, &'static [Reporter]> = "
)
.unwrap();
let mut map = phf_codegen::Map::new();
for (key, array_name) in &reporter_arrays {
map.entry(key.as_str(), array_name);
}
writeln!(file, "{};", map.build()).unwrap();
writeln!(file).unwrap();
}
fn generate_journals(file: &mut BufWriter<File>) {
let json = include_str!("reporters_db/data/journals.json");
let journals: HashMap<String, Vec<Journal>> = serde_json::from_str(json).unwrap();
writeln!(file, "/// A legal journal or law review publication.").unwrap();
writeln!(file, "///").unwrap();
writeln!(
file,
"/// Journals are academic or professional legal publications that contain"
)
.unwrap();
writeln!(
file,
"/// articles, notes, and commentary on legal topics. They have different"
)
.unwrap();
writeln!(file, "/// citation formats than court reporters.").unwrap();
writeln!(file, "///").unwrap();
writeln!(file, "/// # Examples").unwrap();
writeln!(file, "///").unwrap();
writeln!(file, "/// ```rust").unwrap();
writeln!(file, "/// use reporters_db::get_journals;").unwrap();
writeln!(file, "///").unwrap();
writeln!(file, "/// let journals = get_journals();").unwrap();
writeln!(
file,
"/// for (abbrev, journal_list) in journals.entries().take(3) {{"
)
.unwrap();
writeln!(file, "/// for journal in journal_list.iter() {{").unwrap();
writeln!(
file,
"/// println!(\"{{}} - {{}}\", abbrev, journal.name);"
)
.unwrap();
writeln!(file, "/// }}").unwrap();
writeln!(file, "/// }}").unwrap();
writeln!(file, "/// ```").unwrap();
writeln!(file, "#[derive(Debug, Clone)]").unwrap();
writeln!(file, "pub struct Journal {{").unwrap();
writeln!(file, " /// Type of citation this journal uses.").unwrap();
writeln!(file, " pub cite_type: &'static str,").unwrap();
writeln!(file, " /// Full name of the journal.").unwrap();
writeln!(file, " pub name: &'static str,").unwrap();
writeln!(file, " /// Publication start date, if known.").unwrap();
writeln!(file, " pub start: Option<&'static str>,").unwrap();
writeln!(file, " /// Publication end date, if known.").unwrap();
writeln!(file, " pub end: Option<&'static str>,").unwrap();
writeln!(file, " /// Example citations showing proper format.").unwrap();
writeln!(file, " pub examples: &'static [&'static str],").unwrap();
writeln!(
file,
" /// Regular expressions for parsing citations of this journal."
)
.unwrap();
writeln!(file, " pub regexes: &'static [&'static str],").unwrap();
writeln!(file, " /// Additional notes about this journal.").unwrap();
writeln!(file, " pub notes: Option<&'static str>,").unwrap();
writeln!(file, " /// URL reference for more information.").unwrap();
writeln!(file, " pub href: Option<&'static str>,").unwrap();
writeln!(file, "}}").unwrap();
writeln!(file).unwrap();
let mut journal_arrays = Vec::new();
for (key, journal_list) in &journals {
let array_name = format!("JOURNALS_{}", sanitize_identifier(key));
writeln!(file, "#[allow(non_upper_case_globals)]").unwrap();
writeln!(file, "static {}: &[Journal] = &[", array_name).unwrap();
for journal in journal_list {
let examples = journal
.examples
.iter()
.map(|s| format!("\"{}\"", escape_string(s)))
.collect::<Vec<_>>()
.join(", ");
let regexes = journal
.regexes
.iter()
.map(|s| format!("\"{}\"", escape_string(s)))
.collect::<Vec<_>>()
.join(", ");
writeln!(file, " Journal {{").unwrap();
writeln!(
file,
" cite_type: \"{}\",",
escape_string(&journal.cite_type)
)
.unwrap();
writeln!(file, " name: \"{}\",", escape_string(&journal.name)).unwrap();
writeln!(
file,
" start: {},",
journal
.start
.as_ref()
.map(|s| format!("Some(\"{}\")", escape_string(s)))
.unwrap_or_else(|| "None".to_string())
)
.unwrap();
writeln!(
file,
" end: {},",
journal
.end
.as_ref()
.map(|s| format!("Some(\"{}\")", escape_string(s)))
.unwrap_or_else(|| "None".to_string())
)
.unwrap();
writeln!(file, " examples: &[{}],", examples).unwrap();
writeln!(file, " regexes: &[{}],", regexes).unwrap();
writeln!(
file,
" notes: {},",
journal
.notes
.as_ref()
.map(|s| format!("Some(\"{}\")", escape_string(s)))
.unwrap_or_else(|| "None".to_string())
)
.unwrap();
writeln!(
file,
" href: {},",
journal
.href
.as_ref()
.map(|s| format!("Some(\"{}\")", escape_string(s)))
.unwrap_or_else(|| "None".to_string())
)
.unwrap();
writeln!(file, " }},").unwrap();
}
writeln!(file, "];").unwrap();
writeln!(file).unwrap();
journal_arrays.push((key.clone(), array_name));
}
writeln!(
file,
"pub static JOURNALS: phf::Map<&'static str, &'static [Journal]> = "
)
.unwrap();
let mut map = phf_codegen::Map::new();
for (key, array_name) in &journal_arrays {
map.entry(key.as_str(), array_name);
}
writeln!(file, "{};", map.build()).unwrap();
writeln!(file).unwrap();
}
fn generate_laws(file: &mut BufWriter<File>) {
let json = include_str!("reporters_db/data/laws.json");
let laws: HashMap<String, Vec<Law>> = serde_json::from_str(json).unwrap();
writeln!(file, "/// A statutory law or code publication.").unwrap();
writeln!(file, "///").unwrap();
writeln!(
file,
"/// Laws represent statutory publications such as codes, statutes, and regulations"
)
.unwrap();
writeln!(
file,
"/// that can be cited in legal documents. Each law entry contains information"
)
.unwrap();
writeln!(
file,
"/// about its jurisdiction, publication dates, and citation patterns."
)
.unwrap();
writeln!(file, "///").unwrap();
writeln!(file, "/// # Examples").unwrap();
writeln!(file, "///").unwrap();
writeln!(file, "/// ```rust").unwrap();
writeln!(file, "/// use reporters_db::get_laws;").unwrap();
writeln!(file, "///").unwrap();
writeln!(file, "/// let laws = get_laws();").unwrap();
writeln!(
file,
"/// for (abbrev, law_list) in laws.entries().take(3) {{"
)
.unwrap();
writeln!(file, "/// for law in law_list.iter() {{").unwrap();
writeln!(
file,
"/// println!(\"{{}} ({{}}): {{}}\", abbrev, law.jurisdiction, law.name);"
)
.unwrap();
writeln!(file, "/// }}").unwrap();
writeln!(file, "/// }}").unwrap();
writeln!(file, "/// ```").unwrap();
writeln!(file, "#[derive(Debug, Clone)]").unwrap();
writeln!(file, "pub struct Law {{").unwrap();
writeln!(file, " /// Type of citation this law uses.").unwrap();
writeln!(file, " pub cite_type: &'static str,").unwrap();
writeln!(file, " /// Full name of the law or code.").unwrap();
writeln!(file, " pub name: &'static str,").unwrap();
writeln!(file, " /// Jurisdiction where this law applies.").unwrap();
writeln!(file, " pub jurisdiction: &'static str,").unwrap();
writeln!(file, " /// Effective start date, if known.").unwrap();
writeln!(file, " pub start: Option<&'static str>,").unwrap();
writeln!(file, " /// End of effectiveness date, if applicable.").unwrap();
writeln!(file, " pub end: Option<&'static str>,").unwrap();
writeln!(file, " /// Example citations showing proper format.").unwrap();
writeln!(file, " pub examples: &'static [&'static str],").unwrap();
writeln!(
file,
" /// Regular expressions for parsing citations of this law."
)
.unwrap();
writeln!(file, " pub regexes: &'static [&'static str],").unwrap();
writeln!(file, " /// Additional notes about this law.").unwrap();
writeln!(file, " pub notes: Option<&'static str>,").unwrap();
writeln!(file, " /// URL reference for more information.").unwrap();
writeln!(file, " pub href: Option<&'static str>,").unwrap();
writeln!(file, "}}").unwrap();
writeln!(file).unwrap();
let mut law_arrays = Vec::new();
for (key, law_list) in &laws {
let array_name = format!("LAWS_{}", sanitize_identifier(key));
writeln!(file, "#[allow(non_upper_case_globals)]").unwrap();
writeln!(file, "static {}: &[Law] = &[", array_name).unwrap();
for law in law_list {
let examples = law
.examples
.iter()
.map(|s| format!("\"{}\"", escape_string(s)))
.collect::<Vec<_>>()
.join(", ");
let regexes = law
.regexes
.iter()
.map(|s| format!("\"{}\"", escape_string(s)))
.collect::<Vec<_>>()
.join(", ");
writeln!(file, " Law {{").unwrap();
writeln!(
file,
" cite_type: \"{}\",",
escape_string(&law.cite_type)
)
.unwrap();
writeln!(file, " name: \"{}\",", escape_string(&law.name)).unwrap();
writeln!(
file,
" jurisdiction: \"{}\",",
escape_string(&law.jurisdiction)
)
.unwrap();
writeln!(
file,
" start: {},",
law
.start
.as_ref()
.map(|s| format!("Some(\"{}\")", escape_string(s)))
.unwrap_or_else(|| "None".to_string())
)
.unwrap();
writeln!(
file,
" end: {},",
law
.end
.as_ref()
.map(|s| format!("Some(\"{}\")", escape_string(s)))
.unwrap_or_else(|| "None".to_string())
)
.unwrap();
writeln!(file, " examples: &[{}],", examples).unwrap();
writeln!(file, " regexes: &[{}],", regexes).unwrap();
writeln!(
file,
" notes: {},",
law
.notes
.as_ref()
.map(|s| format!("Some(\"{}\")", escape_string(s)))
.unwrap_or_else(|| "None".to_string())
)
.unwrap();
writeln!(
file,
" href: {},",
law
.href
.as_ref()
.map(|s| format!("Some(\"{}\")", escape_string(s)))
.unwrap_or_else(|| "None".to_string())
)
.unwrap();
writeln!(file, " }},").unwrap();
}
writeln!(file, "];").unwrap();
writeln!(file).unwrap();
law_arrays.push((key.clone(), array_name));
}
writeln!(
file,
"pub static LAWS: phf::Map<&'static str, &'static [Law]> = "
)
.unwrap();
let mut map = phf_codegen::Map::new();
for (key, array_name) in &law_arrays {
map.entry(key.as_str(), array_name);
}
writeln!(file, "{};", map.build()).unwrap();
writeln!(file).unwrap();
}
fn generate_state_abbreviations(file: &mut BufWriter<File>) {
let json = include_str!("reporters_db/data/state_abbreviations.json");
let abbreviations: HashMap<String, String> = serde_json::from_str(json).unwrap();
writeln!(
file,
"pub static STATE_ABBREVIATIONS: phf::Map<&'static str, &'static str> = "
)
.unwrap();
let mut map = phf_codegen::Map::new();
for (key, value) in &abbreviations {
map.entry(key.as_str(), format!("\"{}\"", escape_string(value)));
}
writeln!(file, "{};", map.build()).unwrap();
writeln!(file).unwrap();
}
fn generate_case_name_abbreviations(file: &mut BufWriter<File>) {
let json = include_str!("reporters_db/data/case_name_abbreviations.json");
let abbreviations: HashMap<String, Vec<String>> = serde_json::from_str(json).unwrap();
writeln!(
file,
"pub static CASE_NAME_ABBREVIATIONS: phf::Map<&'static str, &'static [&'static str]> = "
)
.unwrap();
let mut map = phf_codegen::Map::new();
for (key, values) in &abbreviations {
let values_str = values
.iter()
.map(|s| format!("\"{}\"", escape_string(s)))
.collect::<Vec<_>>()
.join(", ");
map.entry(key.as_str(), format!("&[{}]", values_str));
}
writeln!(file, "{};", map.build()).unwrap();
writeln!(file).unwrap();
}
fn generate_regexes(file: &mut BufWriter<File>) {
let json = include_str!("reporters_db/data/regexes.json");
let regexes: serde_json::Value = serde_json::from_str(json).unwrap();
let mut flattened = std::collections::HashMap::new();
flatten_json(®exes, String::new(), &mut flattened);
writeln!(
file,
"pub static RAW_REGEXES: phf::Map<&'static str, &'static str> = "
)
.unwrap();
let mut map = phf_codegen::Map::new();
for (key, value) in &flattened {
map.entry(key.as_str(), format!("\"{}\"", escape_string(value)));
}
writeln!(file, "{};", map.build()).unwrap();
writeln!(file).unwrap();
}
fn flatten_json(
value: &serde_json::Value,
prefix: String,
flattened: &mut std::collections::HashMap<String, String>,
) {
match value {
serde_json::Value::Object(map) => {
for (key, val) in map {
if key.ends_with('#') {
continue;
}
let new_prefix = if prefix.is_empty() {
key.clone()
} else {
format!("{}.{}", prefix, key)
};
flatten_json(val, new_prefix, flattened);
}
}
serde_json::Value::String(s) => {
flattened.insert(prefix, s.clone());
}
_ => {
}
}
}