use super::html_text::strip_html;
#[derive(Debug, Clone, PartialEq, Default)]
pub struct OfferingSummary {
pub offering_type: String,
pub shares_offered: Option<f64>,
pub price_per_share: Option<f64>,
pub gross_proceeds: Option<f64>,
pub net_proceeds: Option<f64>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct SellingStockholder {
pub holder_name: String,
pub shares_before: Option<f64>,
pub shares_offered: Option<f64>,
pub shares_after: Option<f64>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct Underwriter {
pub underwriter_name: String,
pub shares_underwritten: Option<f64>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct UseOfProceeds {
pub category: String,
pub amount_usd: Option<f64>,
pub narrative: String,
}
const UNDERWRITERS: &[&str] = &[
"Goldman Sachs",
"Morgan Stanley",
"J.P. Morgan",
"JPMorgan",
"BofA Securities",
"Merrill Lynch",
"Citigroup",
"Barclays",
"Credit Suisse",
"Deutsche Bank",
"Wells Fargo Securities",
"Jefferies",
"Cowen",
"Evercore",
"RBC Capital Markets",
"UBS Securities",
"Piper Sandler",
"Raymond James",
"Allen & Company",
];
pub fn extract_offering(html: &str) -> Option<OfferingSummary> {
let text = strip_html(html);
let lc = text.to_ascii_lowercase();
if !lc.contains("prospectus") && !lc.contains("registration statement") {
return None;
}
let offering_type = if lc.contains("initial public offering") {
"ipo"
} else if lc.contains("from time to time") || lc.contains("shelf") {
"shelf"
} else if lc.contains("selling stockholder") || lc.contains("selling securityholder") {
"secondary"
} else {
""
};
let shares_offered = shares_near(&text, &lc, &["we are offering", "shares of common stock"]);
let price_per_share = dollar_before(&text, &lc, "per share")
.or_else(|| dollar_after(&text, &lc, "offering price"));
let gross_proceeds = dollar_after(&text, &lc, "gross proceeds");
let net_proceeds = dollar_after(&text, &lc, "net proceeds");
if shares_offered.is_none() && gross_proceeds.is_none() && net_proceeds.is_none() {
return None;
}
Some(OfferingSummary {
offering_type: offering_type.to_string(),
shares_offered,
price_per_share,
gross_proceeds,
net_proceeds,
})
}
pub fn extract_selling_stockholders(html: &str) -> Vec<SellingStockholder> {
let lc = html.to_ascii_lowercase();
let Some(h) = lc
.find("selling stockholder")
.or_else(|| lc.find("selling securityholder"))
else {
return Vec::new();
};
let mut end = (h + 30_000).min(html.len());
while end < html.len() && !html.is_char_boundary(end) {
end += 1;
}
let mut out: Vec<SellingStockholder> = Vec::new();
for cells in super::summary_compensation::table_rows(&html[h..end]) {
if let Some(row) = parse_ss_row(&cells) {
out.push(row);
}
if out.len() >= 60 {
break;
}
}
out
}
fn parse_ss_row(cells: &[String]) -> Option<SellingStockholder> {
let holder_name = cells
.iter()
.find(|c| looks_like_name(c))?
.trim()
.to_string();
let nums: Vec<f64> = cells.iter().filter_map(|c| share_count(c)).collect();
if nums.len() < 2 {
return None;
}
Some(SellingStockholder {
holder_name,
shares_before: nums.first().copied(),
shares_offered: nums.get(1).copied(),
shares_after: nums.get(2).copied(),
})
}
fn looks_like_name(cell: &str) -> bool {
let words = cell.split_whitespace().count();
if !(2..=8).contains(&words) || cell.len() > 70 {
return false;
}
let alpha = cell.chars().filter(|c| c.is_ascii_alphabetic()).count();
let digit = cell.chars().filter(|c| c.is_ascii_digit()).count();
let lc = cell.to_ascii_lowercase();
alpha >= 5 && digit == 0 && !lc.contains("shares") && !lc.contains("name of")
}
fn share_count(cell: &str) -> Option<f64> {
let t = cell.trim();
if t.contains('%') {
return None;
}
let digits: String = t.chars().filter(|c| c.is_ascii_digit()).collect();
if digits.len() != t.chars().filter(|c| !matches!(c, ',' | ' ')).count() {
return None; }
let v: f64 = digits.parse().ok()?;
(v >= 100.0).then_some(v)
}
pub fn extract_underwriters(html: &str) -> Vec<Underwriter> {
let text = strip_html(html);
let lc = text.to_ascii_lowercase();
let Some(idx) = lc.find("underwriting").or_else(|| lc.find("underwriter")) else {
return Vec::new();
};
let end = (idx + 6_000).min(text.len());
let section = &text[idx..end];
let mut out: Vec<Underwriter> = Vec::new();
for name in UNDERWRITERS {
if section.contains(name) && !out.iter().any(|u| u.underwriter_name == *name) {
out.push(Underwriter {
underwriter_name: name.to_string(),
shares_underwritten: None,
});
}
}
out
}
pub fn extract_use_of_proceeds(html: &str) -> Option<UseOfProceeds> {
let text = strip_html(html);
let lc = text.to_ascii_lowercase();
let idx = lc.find("use of proceeds")?;
let start = idx + "use of proceeds".len();
let end = (start + 600).min(text.len());
let narrative = text[start..end]
.split_whitespace()
.collect::<Vec<_>>()
.join(" ");
if narrative.len() < 20 {
return None;
}
Some(UseOfProceeds {
category: String::new(),
amount_usd: first_dollar(&narrative),
narrative: truncate(&narrative, 400),
})
}
fn shares_near(text: &str, lc: &str, labels: &[&str]) -> Option<f64> {
for label in labels {
let Some(idx) = lc.find(label) else {
continue;
};
let end = (idx + label.len() + 120).min(text.len());
let hay = &lc[idx..end];
if let Some(s) = hay.find("shares") {
let num: String = hay[..s]
.chars()
.rev()
.skip_while(|c| c.is_whitespace())
.take_while(|c| c.is_ascii_digit() || *c == ',')
.collect::<String>()
.chars()
.rev()
.filter(|c| c.is_ascii_digit())
.collect();
if let Ok(v) = num.parse::<f64>() {
if v >= 1_000.0 {
return Some(v);
}
}
}
}
None
}
fn dollar_after(text: &str, lc: &str, label: &str) -> Option<f64> {
let idx = lc.find(label)?;
let from = idx + label.len();
let end = (from + 120).min(text.len());
first_dollar(&text[from..end])
}
fn dollar_before(text: &str, lc: &str, label: &str) -> Option<f64> {
let idx = lc.find(label)?;
let mut start = idx.saturating_sub(40);
while start > 0 && !text.is_char_boundary(start) {
start -= 1;
}
let win = &text[start..idx];
let d = win.rfind('$')?;
first_dollar(&win[d..])
}
fn first_dollar(s: &str) -> Option<f64> {
let d = s.find('$')?;
let after = s[d + 1..].trim_start();
let mut digits = String::new();
let mut seen_dot = false;
let mut consumed = 0;
for c in after.chars() {
match c {
'0'..='9' => {
digits.push(c);
consumed += 1;
}
',' => consumed += 1,
'.' if !seen_dot => {
digits.push('.');
seen_dot = true;
consumed += 1;
}
_ => break,
}
}
if !digits.chars().any(|c| c.is_ascii_digit()) {
return None;
}
let mut value: f64 = digits.parse().ok()?;
let tail = after[consumed..]
.chars()
.take(12)
.collect::<String>()
.to_ascii_lowercase();
if tail.contains("billion") {
value *= 1_000_000_000.0;
} else if tail.contains("million") {
value *= 1_000_000.0;
}
Some(value)
}
fn truncate(s: &str, max_chars: usize) -> String {
if s.chars().count() <= max_chars {
s.to_string()
} else {
s.chars().take(max_chars).collect::<String>() + "…"
}
}
#[cfg(test)]
mod tests {
use super::*;
const IPO: &str = r#"<html><body>
<p>PROSPECTUS</p>
<p>Acme Robotics, Inc. — Initial Public Offering</p>
<p>We are offering 10,000,000 shares of common stock. The initial
public offering price is $18.00 per share. We estimate that the
net proceeds from this offering will be approximately $168 million.</p>
<h2>Use of Proceeds</h2>
<p>We intend to use the net proceeds for working capital, research
and development, and general corporate purposes.</p>
<h2>Underwriting</h2>
<p>Goldman Sachs & Co. LLC and Morgan Stanley are acting as
representatives of the underwriters.</p>
</body></html>"#;
#[test]
fn extracts_ipo_summary() {
let o = extract_offering(IPO).expect("offering");
assert_eq!(o.offering_type, "ipo");
assert_eq!(o.shares_offered, Some(10_000_000.0));
assert_eq!(o.price_per_share, Some(18.0));
assert_eq!(o.net_proceeds, Some(168_000_000.0));
}
#[test]
fn extracts_underwriters() {
let u = extract_underwriters(IPO);
let names: Vec<&str> = u.iter().map(|x| x.underwriter_name.as_str()).collect();
assert!(names.contains(&"Goldman Sachs"), "got {names:?}");
assert!(names.contains(&"Morgan Stanley"), "got {names:?}");
}
#[test]
fn extracts_use_of_proceeds() {
let uop = extract_use_of_proceeds(IPO).expect("use of proceeds");
assert!(uop.narrative.to_lowercase().contains("working capital"));
}
#[test]
fn non_prospectus_returns_none() {
assert!(extract_offering("<html><body>Item 5.02 ...</body></html>").is_none());
}
#[test]
fn extracts_selling_stockholders() {
let html = r#"<html><body>
<h2>Selling Stockholders</h2>
<table>
<tr><th>Name</th><th>Owned Before</th><th>Offered</th><th>Owned After</th></tr>
<tr><td>Riverstone Capital Partners</td><td>2,500,000</td><td>1,000,000</td><td>1,500,000</td></tr>
<tr><td>Helen R. Vance</td><td>800,000</td><td>300,000</td><td>500,000</td></tr>
</table>
</body></html>"#;
let rows = extract_selling_stockholders(html);
assert_eq!(rows.len(), 2, "got {rows:?}");
assert_eq!(rows[0].holder_name, "Riverstone Capital Partners");
assert_eq!(rows[0].shares_before, Some(2_500_000.0));
assert_eq!(rows[0].shares_offered, Some(1_000_000.0));
assert_eq!(rows[1].holder_name, "Helen R. Vance");
}
}