use serde::{Deserialize, Serialize};
use std::path::Path;
use super::SCHEMA_VERSION;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "UPPERCASE")]
#[non_exhaustive]
pub enum XfaTier {
A,
B,
C,
D,
E,
}
impl XfaTier {
pub const fn as_str(self) -> &'static str {
match self {
Self::A => "A",
Self::B => "B",
Self::C => "C",
Self::D => "D",
Self::E => "E",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
#[non_exhaustive]
pub enum FormCalcSignal {
None,
Present,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
#[non_exhaustive]
pub enum JsSignal {
None,
Present,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
#[non_exhaustive]
pub enum SupportStatus {
FullySupported,
SupportedWithCaveats,
BestEffort,
OutOfScope,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ClassificationReport {
pub schema_version: u32,
pub sdk_version: String,
pub tier: XfaTier,
pub has_xfa: bool,
pub is_dynamic_xfa: bool,
pub formcalc: FormCalcSignal,
pub javascript: JsSignal,
pub support_status: SupportStatus,
pub caveats: Vec<String>,
pub low_confidence: bool,
}
pub fn classify_input(bytes: &[u8]) -> ClassificationReport {
let has_xfa = bytes.windows(b"/XFA".len()).any(|w| w == b"/XFA");
let formcalc_marker = b"x-formcalc";
let has_formcalc = bytes
.windows(formcalc_marker.len())
.any(|w| w == formcalc_marker);
let js_marker = b"x-javascript";
let has_javascript = bytes.windows(js_marker.len()).any(|w| w == js_marker);
let has_dynamic_layout_token = bytes.windows(11).any(|w| w == b"layout=\"tb\"")
|| bytes.windows(14).any(|w| w == b"layout=\"lr-tb\"")
|| bytes.windows(13).any(|w| w == b"layout=\"row\"");
let is_dynamic_xfa = has_xfa && has_dynamic_layout_token;
let tier = if !has_xfa || !is_dynamic_xfa {
XfaTier::A
} else if has_javascript {
XfaTier::D
} else if has_formcalc {
XfaTier::C
} else {
XfaTier::B
};
let support_status = match tier {
XfaTier::A => SupportStatus::FullySupported,
XfaTier::B => SupportStatus::FullySupported,
XfaTier::C => SupportStatus::SupportedWithCaveats,
XfaTier::D => SupportStatus::BestEffort,
XfaTier::E => SupportStatus::OutOfScope,
};
let mut caveats = Vec::new();
if matches!(tier, XfaTier::C) {
caveats.push("FormCalc evaluation is supported but interaction with JS is not.".into());
}
if matches!(tier, XfaTier::D) {
caveats.push("JavaScript-driven XFA layout is best-effort; fidelity is not guaranteed for layout-critical scripts.".into());
}
let low_confidence = has_xfa && !has_dynamic_layout_token;
ClassificationReport {
schema_version: SCHEMA_VERSION,
sdk_version: crate::api_version().to_string(),
tier,
has_xfa,
is_dynamic_xfa,
formcalc: if has_formcalc {
FormCalcSignal::Present
} else {
FormCalcSignal::None
},
javascript: if has_javascript {
JsSignal::Present
} else {
JsSignal::None
},
support_status,
caveats,
low_confidence,
}
}
pub fn classify_path(path: &Path) -> std::io::Result<ClassificationReport> {
let bytes = std::fs::read(path)?;
Ok(classify_input(&bytes))
}
#[cfg(test)]
mod tests {
use super::*;
fn fake_pdf(extra: &[u8]) -> Vec<u8> {
let mut v = b"%PDF-1.7\n".to_vec();
v.extend_from_slice(extra);
v.extend_from_slice(b"\n%%EOF\n");
v
}
#[test]
fn no_xfa_is_tier_a() {
let pdf = fake_pdf(b"/Catalog\n/AcroForm null");
let r = classify_input(&pdf);
assert_eq!(r.tier, XfaTier::A);
assert!(!r.has_xfa);
assert!(matches!(r.support_status, SupportStatus::FullySupported));
assert!(!r.low_confidence);
}
#[test]
fn xfa_static_is_tier_a() {
let pdf = fake_pdf(b"/XFA [(template) (data)] layout=\"position\"");
let r = classify_input(&pdf);
assert_eq!(r.tier, XfaTier::A);
assert!(r.has_xfa);
assert!(!r.is_dynamic_xfa);
assert!(
r.low_confidence,
"static-xfa heuristic should be low-confidence"
);
}
#[test]
fn dynamic_xfa_no_scripts_is_tier_b() {
let pdf = fake_pdf(b"/XFA <subform layout=\"tb\"></subform>");
let r = classify_input(&pdf);
assert_eq!(r.tier, XfaTier::B);
assert!(r.is_dynamic_xfa);
assert!(matches!(r.support_status, SupportStatus::FullySupported));
}
#[test]
fn dynamic_xfa_formcalc_is_tier_c() {
let pdf = fake_pdf(
b"/XFA <subform layout=\"tb\"><script contentType=\"application/x-formcalc\">a=1</script></subform>",
);
let r = classify_input(&pdf);
assert_eq!(r.tier, XfaTier::C);
assert!(matches!(r.formcalc, FormCalcSignal::Present));
assert!(matches!(r.javascript, JsSignal::None));
assert!(matches!(
r.support_status,
SupportStatus::SupportedWithCaveats
));
assert!(!r.caveats.is_empty());
}
#[test]
fn dynamic_xfa_javascript_is_tier_d() {
let pdf = fake_pdf(
b"/XFA <subform layout=\"tb\"><script contentType=\"application/x-javascript\">x=1</script></subform>",
);
let r = classify_input(&pdf);
assert_eq!(r.tier, XfaTier::D);
assert!(matches!(r.javascript, JsSignal::Present));
assert!(matches!(r.support_status, SupportStatus::BestEffort));
}
#[test]
fn javascript_dominates_formcalc() {
let pdf = fake_pdf(b"/XFA layout=\"tb\" x-formcalc x-javascript");
let r = classify_input(&pdf);
assert_eq!(r.tier, XfaTier::D);
}
#[test]
fn report_round_trips_json() {
let pdf = fake_pdf(b"/XFA layout=\"tb\" x-formcalc");
let r = classify_input(&pdf);
let json = serde_json::to_string(&r).unwrap();
let r2: ClassificationReport = serde_json::from_str(&json).unwrap();
assert_eq!(r2.tier, r.tier);
assert_eq!(r2.schema_version, SCHEMA_VERSION);
}
#[test]
fn schema_version_is_one() {
let r = classify_input(b"%PDF-1.7\n%%EOF");
assert_eq!(r.schema_version, 1);
}
#[test]
fn tier_serialises_as_single_letter() {
let pdf = fake_pdf(b"/XFA layout=\"tb\"");
let r = classify_input(&pdf);
let json = serde_json::to_value(&r).unwrap();
assert_eq!(json["tier"], "B");
}
}