#![allow(dead_code)]
use std::collections::HashMap;
use std::sync::LazyLock;
use serde::Deserialize;
use crate::client::AkShareClient;
use crate::error::{Error, Result};
static RE_TABLE: LazyLock<regex::Regex> =
LazyLock::new(|| regex::Regex::new(r"(?is)<table[^>]*>(.*?)</table>").unwrap());
static RE_THEAD: LazyLock<regex::Regex> =
LazyLock::new(|| regex::Regex::new(r"(?is)<thead[^>]*>(.*?)</thead>").unwrap());
static RE_TR: LazyLock<regex::Regex> =
LazyLock::new(|| regex::Regex::new(r"(?is)<tr[^>]*>(.*?)</tr>").unwrap());
static RE_TH: LazyLock<regex::Regex> =
LazyLock::new(|| regex::Regex::new(r"(?is)<th[^>]*>(.*?)</th>").unwrap());
static RE_TD: LazyLock<regex::Regex> =
LazyLock::new(|| regex::Regex::new(r"(?is)<td[^>]*>(.*?)</td>").unwrap());
static RE_HTML_TAG: LazyLock<regex::Regex> =
LazyLock::new(|| regex::Regex::new(r"<[^>]+>").unwrap());
static RE_YEAR_TABLE: LazyLock<regex::Regex> =
LazyLock::new(|| regex::Regex::new(r#"id="con02-1".*?<table[^>]*>(.*?)</table>"#).unwrap());
static RE_LINK_YEAR: LazyLock<regex::Regex> =
LazyLock::new(|| regex::Regex::new(r"<a[^>]*>(\d{4})</a>").unwrap());
static RE_MENU_LINK: LazyLock<regex::Regex> = LazyLock::new(|| {
regex::Regex::new(r#"<li[^>]*><a[^>]*href="([^"]*)"[^>]*>([^<]*)</a></li>"#).unwrap()
});
#[derive(Debug, Deserialize)]
struct SinaFinanceReportEnvelope {
result: Option<SinaFinanceReportResult>,
}
#[derive(Debug, Deserialize)]
struct SinaFinanceReportResult {
data: Option<SinaFinanceReportData>,
}
#[derive(Debug, Deserialize)]
struct SinaFinanceReportData {
report_date: Option<Vec<ReportDateItem>>,
report_list: Option<serde_json::Value>,
}
#[derive(Debug, Deserialize)]
struct ReportDateItem {
date_value: Option<String>,
}
#[derive(Debug, Deserialize)]
struct SinaInstituteDetailEnvelope {
data: Option<serde_json::Value>,
}
fn parse_html_tables(html: &str) -> Vec<(Vec<String>, Vec<Vec<String>>)> {
let mut tables = Vec::new();
for table_cap in RE_TABLE.captures_iter(html) {
let table_content = &table_cap[1];
let mut headers = Vec::new();
let mut rows = Vec::new();
let header_content = RE_THEAD
.captures(table_content)
.map_or_else(|| table_content.to_string(), |c| c[1].to_string());
let mut found_header = false;
for row_cap in RE_TR.captures_iter(&header_content) {
let row_content = &row_cap[1];
let ths: Vec<String> = RE_TH
.captures_iter(row_content)
.map(|c| strip_html_tags(&c[1]).trim().to_string())
.collect();
if !ths.is_empty() && !found_header {
headers = ths;
found_header = true;
continue;
}
let tds: Vec<String> = RE_TD
.captures_iter(row_content)
.map(|c| strip_html_tags(&c[1]).trim().to_string())
.collect();
if !tds.is_empty() {
rows.push(tds);
}
}
if headers.is_empty()
&& let Some(first_row) = rows.first()
{
headers.clone_from(first_row);
rows.remove(0);
}
if !headers.is_empty() || !rows.is_empty() {
tables.push((headers, rows));
}
}
tables
}
fn strip_html_tags(s: &str) -> String {
RE_HTML_TAG.replace_all(s, "").to_string()
}
fn table_to_records(
headers: &[String],
rows: &[Vec<String>],
) -> Vec<HashMap<String, serde_json::Value>> {
rows.iter()
.filter(|row| row.len() >= headers.len())
.map(|row| {
let mut map = HashMap::new();
for (i, header) in headers.iter().enumerate() {
if i < row.len() {
map.insert(header.clone(), serde_json::Value::String(row[i].clone()));
}
}
map
})
.collect()
}
async fn fetch_html_table(
http: &reqwest::Client,
url: &str,
table_index: usize,
referer: Option<&str>,
) -> Result<Vec<HashMap<String, serde_json::Value>>> {
let mut req = http.get(url);
if let Some(ref_url) = referer {
req = req.header("Referer", ref_url);
}
let html = req.send().await?.text().await?;
let tables = parse_html_tables(&html);
if table_index >= tables.len() {
return Ok(vec![]);
}
let (headers, rows) = &tables[table_index];
Ok(table_to_records(headers, rows))
}
impl AkShareClient {
pub async fn stock_financial_report_sina(
&self,
stock: &str,
symbol: &str,
) -> Result<Vec<HashMap<String, serde_json::Value>>> {
let source = match symbol {
"资产负债表" => "fzb",
"利润表" => "lrb",
"现金流量表" => "llb",
_ => {
return Err(Error::invalid_input(format!(
"unsupported symbol: {symbol}"
)));
}
};
let resp = self
.get("https://quotes.sina.cn/cn/api/openapi.php/CompanyFinanceService.getFinanceReport2022")
.query(&[
("paperCode", stock),
("source", source),
("type", "0"),
("page", "1"),
("num", "1000"),
])
.send()
.await?
.error_for_status()?;
let payload: SinaFinanceReportEnvelope = resp.json().await?;
let data = payload
.result
.and_then(|r| r.data)
.ok_or_else(|| Error::upstream("sina financial report missing data"))?;
let dates = data.report_date.unwrap_or_default();
let date_values: Vec<String> = dates.iter().filter_map(|d| d.date_value.clone()).collect();
let report_list = data.report_list.unwrap_or_default();
let mut all_rows = Vec::new();
for date_str in &date_values {
let report = report_list.get(date_str);
if let Some(report) = report {
let items = report
.get("data")
.and_then(|v| v.as_array())
.cloned()
.unwrap_or_default();
let mut row = HashMap::new();
row.insert(
"报告日".to_string(),
serde_json::Value::String(date_str.clone()),
);
if let Some(ds) = report.get("data_source").and_then(|v| v.as_str()) {
row.insert(
"数据源".to_string(),
serde_json::Value::String(ds.to_string()),
);
}
if let Some(audit) = report.get("is_audit").and_then(|v| v.as_str()) {
row.insert(
"是否审计".to_string(),
serde_json::Value::String(audit.to_string()),
);
}
if let Some(pd) = report.get("publish_date").and_then(|v| v.as_str()) {
row.insert(
"公告日期".to_string(),
serde_json::Value::String(pd.to_string()),
);
}
if let Some(currency) = report.get("rCurrency").and_then(|v| v.as_str()) {
row.insert(
"币种".to_string(),
serde_json::Value::String(currency.to_string()),
);
}
if let Some(rtype) = report.get("rType").and_then(|v| v.as_str()) {
row.insert(
"类型".to_string(),
serde_json::Value::String(rtype.to_string()),
);
}
for item in items {
if let (Some(title), Some(value)) = (
item.get("item_title").and_then(|v| v.as_str()),
item.get("item_value"),
) {
row.insert(title.to_string(), value.clone());
}
}
all_rows.push(row);
}
}
Ok(all_rows)
}
pub async fn stock_financial_abstract(
&self,
symbol: &str,
) -> Result<Vec<HashMap<String, serde_json::Value>>> {
let paper_code = if symbol.starts_with("sh") || symbol.starts_with("sz") {
symbol.to_string()
} else {
format!("sh{symbol}")
};
let resp = self
.get("https://quotes.sina.cn/cn/api/openapi.php/CompanyFinanceService.getFinanceReport2022")
.query(&[
("paperCode", paper_code.as_str()),
("source", "gjzb"),
("type", "0"),
("page", "1"),
("num", "1000"),
])
.send()
.await?
.error_for_status()?;
let payload: SinaFinanceReportEnvelope = resp.json().await?;
let data = payload
.result
.and_then(|r| r.data)
.ok_or_else(|| Error::upstream("sina financial abstract missing data"))?;
let report_list = data.report_list.unwrap_or_default();
let keys: Vec<String> = if let serde_json::Value::Object(map) = &report_list {
map.keys().cloned().collect()
} else {
return Ok(vec![]);
};
let Some(first_key) = keys.first() else {
return Ok(vec![]);
};
let first_items = report_list
.get(first_key)
.and_then(|v| v.get("data"))
.and_then(|v| v.as_array())
.cloned()
.unwrap_or_default();
let titles: Vec<String> = first_items
.iter()
.filter_map(|item| {
item.get("item_title")
.and_then(|v| v.as_str())
.map(std::string::ToString::to_string)
})
.collect();
let mut all_rows = Vec::new();
for (i, title) in titles.iter().enumerate() {
let mut row = HashMap::new();
row.insert("指标".to_string(), serde_json::Value::String(title.clone()));
for key in &keys {
let items = report_list
.get(key)
.and_then(|v| v.get("data"))
.and_then(|v| v.as_array())
.cloned()
.unwrap_or_default();
if let Some(item) = items.get(i)
&& let Some(val) = item.get("item_value")
{
row.insert(key.clone(), val.clone());
}
}
all_rows.push(row);
}
Ok(all_rows)
}
pub async fn stock_history_dividend(&self) -> Result<Vec<HashMap<String, serde_json::Value>>> {
let url = "https://vip.stock.finance.sina.com.cn/q/go.php/vInvestConsult/kind/lsfh/index.phtml?p=1&num=50000";
let html = self.get(url).send().await?.text().await?;
let tables = parse_html_tables(&html);
if tables.is_empty() {
return Ok(vec![]);
}
let (headers, rows) = &tables[0];
Ok(table_to_records(headers, rows))
}
pub async fn stock_history_dividend_detail(
&self,
symbol: &str,
indicator: &str,
date: Option<&str>,
) -> Result<Vec<HashMap<String, serde_json::Value>>> {
let url = format!(
"https://vip.stock.finance.sina.com.cn/corp/go.php/vISSUE_ShareBonus/stockid/{symbol}.phtml"
);
let html = self.get(&url).send().await?.text().await?;
let tables = parse_html_tables(&html);
let table_index = if indicator == "分红" { 12 } else { 13 };
if table_index >= tables.len() {
return Ok(vec![]);
}
let (headers, rows) = &tables[table_index];
let records = table_to_records(headers, rows);
if let Some(detail_date) = date {
let detail_url =
"https://vip.stock.finance.sina.com.cn/corp/view/vISSUE_ShareBonusDetail.php";
let detail_html = self
.get(detail_url)
.query(&[
("stockid", symbol),
("type", "1"),
("end_date", detail_date),
])
.send()
.await?
.text()
.await?;
let detail_tables = parse_html_tables(&detail_html);
if detail_tables.len() > 12 {
let (h, r) = &detail_tables[12];
return Ok(table_to_records(h, r));
}
}
Ok(records)
}
pub async fn stock_ipo_info(
&self,
stock: &str,
) -> Result<Vec<HashMap<String, serde_json::Value>>> {
let url = format!(
"https://vip.stock.finance.sina.com.cn/corp/go.php/vISSUE_NewStock/stockid/{stock}.phtml"
);
fetch_html_table(&self.http, &url, 12, None).await
}
pub async fn stock_add_stock(
&self,
symbol: &str,
) -> Result<Vec<HashMap<String, serde_json::Value>>> {
let url = format!(
"https://vip.stock.finance.sina.com.cn/corp/go.php/vISSUE_AddStock/stockid/{symbol}.phtml"
);
let html = self.get(&url).send().await?.text().await?;
let tables = parse_html_tables(&html);
if tables.len() <= 12 {
return Ok(vec![]);
}
let (_, check_rows) = &tables[12];
if check_rows.is_empty() {
return Ok(vec![]);
}
let mut all_records = Vec::new();
let mut i = 13;
while i < tables.len() {
let (headers, rows) = &tables[i];
for row in rows {
if row.len() >= 2 {
let mut record = HashMap::new();
record.insert(
"公告日期".to_string(),
serde_json::Value::String(headers.first().cloned().unwrap_or_default()),
);
record.insert(
"发行方式".to_string(),
serde_json::Value::String(row.first().cloned().unwrap_or_default()),
);
record.insert(
"发行价格".to_string(),
serde_json::Value::String(row.get(1).cloned().unwrap_or_default()),
);
if row.len() > 2 {
record.insert(
"实际公司募集资金总额".to_string(),
serde_json::Value::String(row.get(2).cloned().unwrap_or_default()),
);
}
if row.len() > 3 {
record.insert(
"发行费用总额".to_string(),
serde_json::Value::String(row.get(3).cloned().unwrap_or_default()),
);
}
if row.len() > 4 {
record.insert(
"实际发行数量".to_string(),
serde_json::Value::String(row.get(4).cloned().unwrap_or_default()),
);
}
all_records.push(record);
}
}
i += 1;
}
Ok(all_records)
}
pub async fn stock_restricted_release_queue_sina(
&self,
symbol: &str,
) -> Result<Vec<HashMap<String, serde_json::Value>>> {
let url = format!(
"https://vip.stock.finance.sina.com.cn/q/go.php/vInvestConsult/kind/xsjj/index.phtml?symbol={symbol}"
);
fetch_html_table(&self.http, &url, 0, None).await
}
pub async fn stock_circulate_stock_holder(
&self,
symbol: &str,
) -> Result<Vec<HashMap<String, serde_json::Value>>> {
let url = format!(
"https://vip.stock.finance.sina.com.cn/corp/go.php/vCI_CirculateStockHolder/stockid/{symbol}.phtml"
);
let html = self.get(&url).send().await?.text().await?;
let tables = parse_html_tables(&html);
if tables.len() <= 13 {
return Ok(vec![]);
}
let (headers, rows) = &tables[13];
Ok(parse_shareholder_table(headers, rows, "截止日期"))
}
pub async fn stock_fund_stock_holder(
&self,
symbol: &str,
) -> Result<Vec<HashMap<String, serde_json::Value>>> {
let url = format!(
"https://vip.stock.finance.sina.com.cn/corp/go.php/vCI_FundStockHolder/stockid/{symbol}.phtml"
);
let html = self.get(&url).send().await?.text().await?;
let tables = parse_html_tables(&html);
if tables.len() <= 13 {
return Ok(vec![]);
}
let (headers, rows) = &tables[13];
Ok(parse_shareholder_table(headers, rows, "截止日期"))
}
pub async fn stock_main_stock_holder(
&self,
stock: &str,
) -> Result<Vec<HashMap<String, serde_json::Value>>> {
let url = format!(
"https://vip.stock.finance.sina.com.cn/corp/go.php/vCI_StockHolder/stockid/{stock}.phtml"
);
let html = self.get(&url).send().await?.text().await?;
let tables = parse_html_tables(&html);
if tables.len() <= 13 {
return Ok(vec![]);
}
let (headers, rows) = &tables[13];
Ok(parse_shareholder_table(headers, rows, "截至日期"))
}
pub async fn stock_financial_analysis_indicator(
&self,
symbol: &str,
start_year: &str,
) -> Result<Vec<HashMap<String, serde_json::Value>>> {
let url = format!(
"https://money.finance.sina.com.cn/corp/go.php/vFD_FinancialGuideLine/stockid/{symbol}/ctrl/2020/displaytype/4.phtml"
);
let html = self.get(&url).send().await?.text().await?;
let mut year_list = Vec::new();
if let Some(cap) = RE_YEAR_TABLE.captures(&html) {
let table_content = &cap[1];
for link_cap in RE_LINK_YEAR.captures_iter(table_content) {
year_list.push(link_cap[1].to_string());
}
}
if let Some(pos) = year_list.iter().position(|y| y == start_year) {
year_list.truncate(pos + 1);
}
if year_list.is_empty() {
return Ok(vec![]);
}
let mut all_rows = Vec::new();
for year in &year_list {
let year_url = format!(
"https://money.finance.sina.com.cn/corp/go.php/vFD_FinancialGuideLine/stockid/{symbol}/ctrl/{year}/displaytype/4.phtml"
);
let year_html = self.get(&year_url).send().await?.text().await?;
let tables = parse_html_tables(&year_html);
if tables.len() <= 12 {
continue;
}
let (headers, rows) = &tables[12];
if headers.is_empty() || rows.is_empty() {
continue;
}
let report_dates: Vec<String> = headers[1..].to_vec();
let mut current_category = String::new();
for row in rows {
if row.is_empty() {
continue;
}
let first = &row[0];
let categories = [
"每股指标",
"盈利能力",
"成长能力",
"营运能力",
"偿债及资本结构",
"现金流量",
"其他指标",
];
if categories.contains(&first.as_str()) {
current_category.clone_from(first);
continue;
}
for (j, date) in report_dates.iter().enumerate() {
if j + 1 < row.len() {
let mut record = HashMap::new();
record.insert("日期".to_string(), serde_json::Value::String(date.clone()));
record.insert(
"类别".to_string(),
serde_json::Value::String(current_category.clone()),
);
record.insert("指标".to_string(), serde_json::Value::String(first.clone()));
record.insert(
"值".to_string(),
serde_json::Value::String(row[j + 1].clone()),
);
all_rows.push(record);
}
}
}
}
Ok(all_rows)
}
pub async fn stock_institute_hold(
&self,
symbol: &str,
) -> Result<Vec<HashMap<String, serde_json::Value>>> {
let report_date = &symbol[..symbol.len() - 1];
let quarter = &symbol[symbol.len() - 1..];
let url =
"https://vip.stock.finance.sina.com.cn/q/go.php/vComStockHold/kind/jgcg/index.phtml";
let html = self
.get(url)
.query(&[
("p", "1"),
("num", "10000"),
("reportdate", report_date),
("quarter", quarter),
])
.send()
.await?
.text()
.await?;
let tables = parse_html_tables(&html);
if tables.is_empty() {
return Ok(vec![]);
}
let (headers, rows) = &tables[0];
Ok(table_to_records(headers, rows))
}
pub async fn stock_institute_hold_detail(
&self,
stock: &str,
quarter: &str,
) -> Result<Vec<HashMap<String, serde_json::Value>>> {
let url = "https://vip.stock.finance.sina.com.cn/q/api/jsonp.php/var%20details=/ComStockHoldService.getJGCGDetail";
let body = self
.get(url)
.query(&[("symbol", stock), ("quarter", quarter)])
.send()
.await?
.text()
.await?;
let json_start = body.find('{').unwrap_or(0);
let json_end = body.rfind(')').unwrap_or(body.len());
if json_start >= json_end {
return Ok(vec![]);
}
let json_str = &body[json_start..json_end];
let payload: serde_json::Value = serde_json::from_str(json_str)
.map_err(|e| Error::decode(format!("failed to parse institute detail JSON: {e}")))?;
let Some(serde_json::Value::Object(data)) = payload.get("data") else {
return Ok(vec![]);
};
let mut all_records = Vec::new();
for (_key, inner) in data {
if let serde_json::Value::Object(items) = inner {
for (inst_key, values) in items {
if inst_key == "total" {
continue;
}
if let serde_json::Value::Object(vals) = values {
let mut record = HashMap::new();
let inst_type = inst_key.split('_').next().unwrap_or(inst_key);
let display_type = match inst_type {
"fund" => "基金",
"socialSecurity" => "全国社保",
"qfii" => "QFII",
"insurance" => "保险",
other => other,
};
record.insert(
"持股机构类型".to_string(),
serde_json::Value::String(display_type.to_string()),
);
for (field, val) in vals {
record.insert(field.clone(), val.clone());
}
all_records.push(record);
}
}
}
}
Ok(all_records)
}
pub async fn stock_institute_recommend(
&self,
symbol: &str,
) -> Result<Vec<HashMap<String, serde_json::Value>>> {
let base_url = "http://stock.finance.sina.com.cn/stock/go.php/vIR_RatingNewest/index.phtml";
let index_html = self
.get(base_url)
.query(&[("num", "40"), ("p", "1")])
.send()
.await?
.text()
.await?;
let mut target_url = None;
for cap in RE_MENU_LINK.captures_iter(&index_html) {
let link_text = &cap[2];
if link_text.trim() == symbol {
target_url = Some(cap[1].to_string());
break;
}
}
let Some(url) = target_url else {
return Err(Error::not_found(format!(
"recommendation type not found: {symbol}"
)));
};
let html = self
.get(&url)
.query(&[("num", "10000"), ("p", "1")])
.send()
.await?
.text()
.await?;
let tables = parse_html_tables(&html);
if tables.is_empty() {
return Ok(vec![]);
}
let (headers, rows) = &tables[0];
Ok(table_to_records(headers, rows))
}
pub async fn stock_institute_recommend_detail(
&self,
symbol: &str,
) -> Result<Vec<HashMap<String, serde_json::Value>>> {
let url = format!(
"http://stock.finance.sina.com.cn/stock/go.php/vIR_StockSearch/key/{symbol}.phtml"
);
let html = self
.get(&url)
.query(&[("num", "5000"), ("p", "1")])
.send()
.await?
.text()
.await?;
let tables = parse_html_tables(&html);
if tables.is_empty() {
return Ok(vec![]);
}
let (headers, rows) = &tables[0];
Ok(table_to_records(headers, rows))
}
}
fn parse_shareholder_table(
_headers: &[String],
rows: &[Vec<String>],
date_field: &str,
) -> Vec<HashMap<String, serde_json::Value>> {
if rows.is_empty() {
return vec![];
}
let mut all_records = Vec::new();
let mut current_date = String::new();
let mut current_ann_date = String::new();
for row in rows {
if row.is_empty() {
continue;
}
if row[0].contains("截止日期") || row[0].contains("截至日期") {
if row.len() > 1 {
current_date.clone_from(&row[1]);
}
continue;
}
if row[0].contains("公告日期") {
if row.len() > 1 {
current_ann_date.clone_from(&row[1]);
}
continue;
}
if row[0] == "编号" || row[0] == "股东名称" || row[0] == "基金名称" {
continue;
}
if row.iter().all(|s| s.trim().is_empty()) {
continue;
}
let mut record = HashMap::new();
if !current_date.is_empty() {
record.insert(
date_field.to_string(),
serde_json::Value::String(current_date.clone()),
);
}
if !current_ann_date.is_empty() {
record.insert(
"公告日期".to_string(),
serde_json::Value::String(current_ann_date.clone()),
);
}
for (i, cell) in row.iter().enumerate() {
record.insert(format!("col_{i}"), serde_json::Value::String(cell.clone()));
}
all_records.push(record);
}
all_records
}