use super::html_text::strip_html;
#[derive(Debug, Clone, PartialEq)]
pub struct EarningsRelease {
pub period_end_date: String,
pub fiscal_period: String,
pub revenue: Option<f64>,
pub net_income: Option<f64>,
pub eps_basic: Option<f64>,
pub eps_diluted: Option<f64>,
pub guidance_revenue_low: Option<f64>,
pub guidance_revenue_high: Option<f64>,
pub guidance_eps_low: Option<f64>,
pub guidance_eps_high: Option<f64>,
}
const MONTHS: &[&str] = &[
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",
];
pub fn extract_earnings_release(html: &str) -> Option<EarningsRelease> {
let text = strip_html(html);
let lc = text.to_ascii_lowercase();
let has_top_line = lc.contains("revenue") || lc.contains("net sales");
let has_bottom_line =
lc.contains("net income") || lc.contains("net loss") || lc.contains("per share");
if !(has_top_line && has_bottom_line) {
return None;
}
let revenue = money_near(
&text,
&lc,
&["total revenue", "net revenue", "net sales", "revenue"],
);
let net_income = money_near(&text, &lc, &["net income", "net loss"]);
let eps_diluted = eps_near(
&text,
&lc,
&["per diluted share", "diluted earnings per share"],
);
let eps_basic = eps_near(&text, &lc, &["per basic share", "basic earnings per share"]);
if revenue.is_none() && net_income.is_none() && eps_diluted.is_none() {
return None;
}
Some(EarningsRelease {
period_end_date: period_end(&text, &lc),
fiscal_period: fiscal_period(&lc),
revenue,
net_income,
eps_basic,
eps_diluted,
guidance_revenue_low: None,
guidance_revenue_high: None,
guidance_eps_low: None,
guidance_eps_high: None,
})
}
fn money_near(text: &str, lc: &str, labels: &[&str]) -> Option<f64> {
for label in labels {
if let Some(idx) = lc.find(label) {
let end = (idx + label.len() + 140).min(text.len());
if let Some(v) = first_dollar(&text[idx..end]) {
return Some(v);
}
}
}
None
}
fn eps_near(text: &str, lc: &str, labels: &[&str]) -> Option<f64> {
for label in labels {
let Some(idx) = lc.find(label) else {
continue;
};
let mut start = idx.saturating_sub(60);
while start > 0 && !text.is_char_boundary(start) {
start -= 1;
}
let mut end = (idx + label.len() + 60).min(text.len());
while end < text.len() && !text.is_char_boundary(end) {
end += 1;
}
let win = &text[start..end];
let label_off = idx - start;
let mut best: Option<(usize, f64)> = None; let mut search = 0;
while let Some(rel) = win[search..].find('$') {
let dpos = search + rel;
search = dpos + 1;
let before = win[..dpos].trim_end();
let after = win[dpos + 1..].trim_start();
let neg = before.ends_with('(') || after.starts_with('(');
let mut digits = String::new();
let mut seen_dot = false;
for c in after.trim_start_matches('(').chars() {
match c {
'0'..='9' => digits.push(c),
'.' if !seen_dot => {
digits.push('.');
seen_dot = true;
}
',' => {}
_ => break,
}
}
if !digits.contains('.') {
continue;
}
let Ok(v) = digits.parse::<f64>() else {
continue;
};
if v >= 1_000.0 {
continue;
}
let dist = dpos.abs_diff(label_off);
if best.is_none_or(|(bd, _)| dist < bd) {
best = Some((dist, if neg { -v } else { v }));
}
}
if let Some((_, v)) = best {
return Some(v);
}
}
None
}
fn first_dollar(s: &str) -> Option<f64> {
let d = s.find('$')?;
let after = s[d + 1..].trim_start();
let mut digits = String::new();
let mut consumed = 0;
for c in after.chars() {
match c {
'0'..='9' => {
digits.push(c);
consumed += 1;
}
',' => consumed += 1,
'.' => {
digits.push('.');
consumed += 1;
}
_ => break,
}
}
if !digits.chars().any(|c| c.is_ascii_digit()) {
return None;
}
let mut value: f64 = digits.parse().ok()?;
let tail = after[consumed..]
.chars()
.take(12)
.collect::<String>()
.to_ascii_lowercase();
if tail.contains("billion") {
value *= 1_000_000_000.0;
} else if tail.contains("million") {
value *= 1_000_000.0;
}
Some(value)
}
fn period_end(text: &str, lc: &str) -> String {
let from = lc.find("ended").map(|p| p + 5).unwrap_or(0);
let hay = &text[from.min(text.len())..];
for month in MONTHS {
if let Some(idx) = hay.find(month) {
let tail: String = hay[idx..].chars().take(18).collect();
if tail.chars().any(|c| c.is_ascii_digit()) {
return tail.trim().to_string();
}
}
}
String::new()
}
fn fiscal_period(lc: &str) -> String {
const PHRASES: &[&str] = &[
"first quarter",
"second quarter",
"third quarter",
"fourth quarter",
"full year",
"fiscal year",
"fiscal fourth quarter",
];
PHRASES
.iter()
.find(|p| lc.contains(*p))
.map(|p| p.to_string())
.unwrap_or_default()
}
#[cfg(test)]
mod tests {
use super::*;
const SAMPLE: &str = r#"<html><body>
<h1>Acme Corp Reports Third Quarter 2024 Results</h1>
<p>Acme Corp today announced financial results for the third
quarter ended September 30, 2024. Total revenue was $4.2 billion,
an increase of 12%. Net income was $512 million. Diluted earnings
per diluted share were $1.85.</p>
</body></html>"#;
#[test]
fn extracts_headline_figures() {
let e = extract_earnings_release(SAMPLE).expect("earnings release");
assert_eq!(e.revenue, Some(4_200_000_000.0));
assert_eq!(e.net_income, Some(512_000_000.0));
assert_eq!(e.eps_diluted, Some(1.85));
assert_eq!(e.fiscal_period, "third quarter");
assert!(e.period_end_date.contains("September 30, 2024"));
}
#[test]
fn non_earnings_document_returns_none() {
let html = "<html><body><p>Item 5.02 The director resigned.</p></body></html>";
assert!(extract_earnings_release(html).is_none());
}
#[test]
fn loss_per_share_is_negative() {
let html = "<html><body><p>Net sales were $90 million. Net loss \
was $5 million, or $(0.30) per diluted share.</p></body></html>";
let e = extract_earnings_release(html).expect("release");
assert_eq!(e.eps_diluted, Some(-0.30));
}
#[test]
fn first_dollar_handles_multiplier() {
assert_eq!(first_dollar("was $4.2 billion, up"), Some(4_200_000_000.0));
assert_eq!(first_dollar("$512 million"), Some(512_000_000.0));
assert_eq!(first_dollar("no figure"), None);
}
}