use super::PatternMatcher;
use once_cell::sync::Lazy;
use regex::Regex;
#[derive(Debug, Clone)]
pub struct VerizonAccountMatcher;
impl VerizonAccountMatcher {
pub fn new() -> Self {
Self
}
pub fn find_account_number(text: &str) -> Option<String> {
let mut candidates = Vec::new();
if let Some(caps) = Self::pattern_9_5_with_context().captures(text) {
if let (Some(prefix), Some(suffix)) = (caps.get(1), caps.get(2)) {
candidates.push((0, format!("{}{}", prefix.as_str(), suffix.as_str())));
}
}
for cap in Self::pattern_9_5().captures_iter(text) {
if let (Some(prefix), Some(suffix)) = (cap.get(1), cap.get(2)) {
candidates.push((1, format!("{}{}", prefix.as_str(), suffix.as_str())));
}
}
if let Some(caps) = Self::pattern_14_with_context().captures(text) {
if let Some(matched) = caps.get(1) {
candidates.push((2, matched.as_str().to_string()));
}
}
for cap in Self::pattern_14().captures_iter(text) {
if let Some(matched) = cap.get(1) {
candidates.push((3, matched.as_str().to_string()));
}
}
for cap in Self::pattern_generic().captures_iter(text) {
if let Some(matched) = cap.get(1) {
let digits: String = matched
.as_str()
.chars()
.filter(|c| c.is_ascii_digit())
.collect();
if digits.len() >= 10 && digits.len() <= 15 {
candidates.push((4, digits));
}
}
}
candidates.sort_by_key(|(priority, _)| *priority);
candidates.dedup_by(|(_, a), (_, b)| a == b);
candidates
.iter()
.find(|(_, num)| num.len() == 14)
.map(|(_, num)| num.clone())
.or_else(|| candidates.first().map(|(_, num)| num.clone()))
}
fn pattern_9_5_with_context() -> &'static Regex {
static PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?i)(?:account|acct)(?:\s*(?:number|num|no|#))?\s*:?\s*(\d{9})-(\d{5})")
.expect("Valid regex")
});
&PATTERN
}
fn pattern_9_5() -> &'static Regex {
static PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(\d{9})-(\d{5})").expect("Valid regex"));
&PATTERN
}
fn pattern_14_with_context() -> &'static Regex {
static PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?i)(?:account|acct)(?:\s*(?:number|num|no|#))?\s*:?\s*(\d{14})")
.expect("Valid regex")
});
&PATTERN
}
fn pattern_14() -> &'static Regex {
static PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\b(\d{14})\b").expect("Valid regex"));
&PATTERN
}
fn pattern_generic() -> &'static Regex {
static PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?i)(?:account|acct)(?:\s*(?:number|num|no|#))?\s*:?\s*([\d\s\-]{10,20})")
.expect("Valid regex")
});
&PATTERN
}
}
impl Default for VerizonAccountMatcher {
fn default() -> Self {
Self::new()
}
}
impl PatternMatcher for VerizonAccountMatcher {
fn pattern(&self) -> &Regex {
Self::pattern_14()
}
fn extract_all<'a>(&self, text: &'a str) -> Vec<&'a str> {
Self::pattern_9_5()
.find_iter(text)
.map(|m| m.as_str())
.collect()
}
fn normalize(&self, text: &str) -> Option<String> {
Self::find_account_number(text)
}
fn generate_variants(&self, normalized: &str) -> Vec<String> {
let len = normalized.len();
let mut variants = vec![normalized.to_string()];
match len {
14 => {
let prefix = &normalized[0..9];
let suffix = &normalized[9..14];
variants.push(format!("{}-{}", prefix, suffix));
variants.push(format!("{} {}", prefix, suffix));
}
12 => {
variants.push(format!(
"{}-{}-{}",
&normalized[0..4],
&normalized[4..8],
&normalized[8..12]
));
variants.push(format!("{}-{}", &normalized[0..6], &normalized[6..12]));
}
len if len >= 10 => {
let mid = len / 2;
variants.push(format!("{}-{}", &normalized[0..mid], &normalized[mid..]));
variants.push(format!("{} {}", &normalized[0..mid], &normalized[mid..]));
}
_ => {}
}
variants
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_verizon_account_extraction() {
let text = "Account Number: 123456789-00001";
let account = VerizonAccountMatcher::find_account_number(text);
assert_eq!(account, Some("12345678900001".to_string()));
}
#[test]
fn test_account_priority() {
let text = "Random: 999999999-99999 Account: 123456789-00001";
let account = VerizonAccountMatcher::find_account_number(text);
assert_eq!(account, Some("12345678900001".to_string()));
}
#[test]
fn test_account_variants() {
let matcher = VerizonAccountMatcher::new();
let variants = matcher.generate_variants("12345678900001");
assert!(variants.contains(&"12345678900001".to_string()));
assert!(variants.contains(&"123456789-00001".to_string()));
assert!(variants.contains(&"123456789 00001".to_string()));
assert_eq!(variants.len(), 3);
}
#[test]
fn test_no_account_found() {
let text = "This document has no account number";
let account = VerizonAccountMatcher::find_account_number(text);
assert_eq!(account, None);
}
}