use crate::status::KgliteStatusCode;
use crate::strings::alloc_c_string;
use kglite::api::datasets::sec::{
fetch_company_facts_blocking, fetch_company_tickers_blocking,
fetch_quarterly_master_idx_blocking, fetch_submissions_bulk_blocking, parse_tickers_json,
resolve_fetch_buckets, run_all, ExtractReport, SecClient, SecFormBucket, SliceSpec, Workdir,
YearRange,
};
use std::ffi::{c_char, CStr};
#[repr(C)]
pub struct KgliteSecClient {
_opaque: [u8; 0],
_marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
}
pub(crate) struct SecClientState {
pub(crate) inner: SecClient,
}
impl SecClientState {
fn into_handle(client: SecClient) -> *mut KgliteSecClient {
let boxed = Box::new(SecClientState { inner: client });
Box::into_raw(boxed).cast::<KgliteSecClient>()
}
unsafe fn from_handle<'a>(handle: *const KgliteSecClient) -> &'a SecClientState {
unsafe { &*handle.cast::<SecClientState>() }
}
unsafe fn free_handle(handle: *mut KgliteSecClient) {
if handle.is_null() {
return;
}
let _ = unsafe { Box::from_raw(handle.cast::<SecClientState>()) };
}
}
#[no_mangle]
pub unsafe extern "C" fn kglite_datasets_sec_client_new(
user_agent: *const c_char,
out_client: *mut *mut KgliteSecClient,
out_error_msg: *mut *const c_char,
) -> KgliteStatusCode {
if user_agent.is_null() || out_client.is_null() {
return KgliteStatusCode::NullPointer;
}
let ua = match unsafe { CStr::from_ptr(user_agent) }.to_str() {
Ok(s) => s,
Err(_) => return KgliteStatusCode::InvalidUtf8,
};
match SecClient::new(ua) {
Ok(client) => {
unsafe {
*out_client = SecClientState::into_handle(client);
}
if !out_error_msg.is_null() {
unsafe {
*out_error_msg = std::ptr::null();
}
}
KgliteStatusCode::Ok
}
Err(err) => {
unsafe {
*out_client = std::ptr::null_mut();
}
if !out_error_msg.is_null() {
unsafe {
*out_error_msg = alloc_c_string(&err.to_string());
}
}
KgliteStatusCode::InvalidArgument
}
}
}
#[no_mangle]
pub unsafe extern "C" fn kglite_datasets_sec_client_free(client: *mut KgliteSecClient) {
unsafe { SecClientState::free_handle(client) };
}
#[no_mangle]
pub unsafe extern "C" fn kglite_datasets_sec_fetch_quarterly_master_idx(
client: *const KgliteSecClient,
workdir_path: *const c_char,
year_start: u16,
year_end: u16,
current_year: u16,
current_quarter: u8,
out_pair_json: *mut *const c_char,
out_error_msg: *mut *const c_char,
) -> KgliteStatusCode {
if client.is_null() || workdir_path.is_null() || out_pair_json.is_null() {
return KgliteStatusCode::NullPointer;
}
let workdir_str = match unsafe { CStr::from_ptr(workdir_path) }.to_str() {
Ok(s) => s,
Err(_) => return KgliteStatusCode::InvalidUtf8,
};
if year_start > year_end {
return KgliteStatusCode::InvalidArgument;
}
let client_state = unsafe { SecClientState::from_handle(client) };
let workdir = Workdir::new(workdir_str);
let range = YearRange::new(year_start, year_end);
match fetch_quarterly_master_idx_blocking(
&client_state.inner,
&workdir,
range,
current_year,
current_quarter,
) {
Ok((fetched, skipped)) => {
let json = format!("[{},{}]", fetched, skipped);
unsafe {
*out_pair_json = alloc_c_string(&json);
}
if !out_error_msg.is_null() {
unsafe {
*out_error_msg = std::ptr::null();
}
}
KgliteStatusCode::Ok
}
Err(err) => {
unsafe {
*out_pair_json = std::ptr::null();
}
if !out_error_msg.is_null() {
unsafe {
*out_error_msg = alloc_c_string(&err.to_string());
}
}
KgliteStatusCode::Internal
}
}
}
#[no_mangle]
pub unsafe extern "C" fn kglite_datasets_sec_fetch_submissions_bulk(
client: *const KgliteSecClient,
workdir_path: *const c_char,
staleness_hours: u64,
force_refetch: u8,
out_fetched: *mut u8,
out_error_msg: *mut *const c_char,
) -> KgliteStatusCode {
if client.is_null() || workdir_path.is_null() || out_fetched.is_null() {
return KgliteStatusCode::NullPointer;
}
let workdir_str = match unsafe { CStr::from_ptr(workdir_path) }.to_str() {
Ok(s) => s,
Err(_) => return KgliteStatusCode::InvalidUtf8,
};
let client_state = unsafe { SecClientState::from_handle(client) };
let workdir = Workdir::new(workdir_str);
match fetch_submissions_bulk_blocking(
&client_state.inner,
&workdir,
staleness_hours,
force_refetch != 0,
) {
Ok(fetched) => {
unsafe {
*out_fetched = u8::from(fetched);
}
if !out_error_msg.is_null() {
unsafe {
*out_error_msg = std::ptr::null();
}
}
KgliteStatusCode::Ok
}
Err(err) => {
unsafe {
*out_fetched = 0;
}
if !out_error_msg.is_null() {
unsafe {
*out_error_msg = alloc_c_string(&err.to_string());
}
}
KgliteStatusCode::Internal
}
}
}
#[no_mangle]
pub unsafe extern "C" fn kglite_datasets_sec_fetch_company_tickers(
client: *const KgliteSecClient,
workdir_path: *const c_char,
force_refetch: u8,
out_fetched: *mut u8,
out_error_msg: *mut *const c_char,
) -> KgliteStatusCode {
if client.is_null() || workdir_path.is_null() || out_fetched.is_null() {
return KgliteStatusCode::NullPointer;
}
let workdir_str = match unsafe { CStr::from_ptr(workdir_path) }.to_str() {
Ok(s) => s,
Err(_) => return KgliteStatusCode::InvalidUtf8,
};
let client_state = unsafe { SecClientState::from_handle(client) };
let workdir = Workdir::new(workdir_str);
match fetch_company_tickers_blocking(&client_state.inner, &workdir, force_refetch != 0) {
Ok(fetched) => {
unsafe {
*out_fetched = u8::from(fetched);
}
if !out_error_msg.is_null() {
unsafe {
*out_error_msg = std::ptr::null();
}
}
KgliteStatusCode::Ok
}
Err(err) => {
unsafe {
*out_fetched = 0;
}
if !out_error_msg.is_null() {
unsafe {
*out_error_msg = alloc_c_string(&err.to_string());
}
}
KgliteStatusCode::Internal
}
}
}
#[no_mangle]
pub unsafe extern "C" fn kglite_datasets_sec_fetch_company_facts(
client: *const KgliteSecClient,
workdir_path: *const c_char,
cik: u64,
force_refetch: u8,
out_fetched: *mut u8,
out_error_msg: *mut *const c_char,
) -> KgliteStatusCode {
if client.is_null() || workdir_path.is_null() || out_fetched.is_null() {
return KgliteStatusCode::NullPointer;
}
let workdir_str = match unsafe { CStr::from_ptr(workdir_path) }.to_str() {
Ok(s) => s,
Err(_) => return KgliteStatusCode::InvalidUtf8,
};
let client_state = unsafe { SecClientState::from_handle(client) };
let workdir = Workdir::new(workdir_str);
match fetch_company_facts_blocking(&client_state.inner, &workdir, cik, force_refetch != 0) {
Ok(fetched) => {
unsafe {
*out_fetched = u8::from(fetched);
}
if !out_error_msg.is_null() {
unsafe {
*out_error_msg = std::ptr::null();
}
}
KgliteStatusCode::Ok
}
Err(err) => {
unsafe {
*out_fetched = 0;
}
if !out_error_msg.is_null() {
unsafe {
*out_error_msg = alloc_c_string(&err.to_string());
}
}
KgliteStatusCode::Internal
}
}
}
#[no_mangle]
pub unsafe extern "C" fn kglite_datasets_sec_resolve_fetch_buckets(
form_types_json: *const c_char,
out_active_json: *mut *const c_char,
out_unmatched_json: *mut *const c_char,
out_error_msg: *mut *const c_char,
) -> KgliteStatusCode {
if form_types_json.is_null() || out_active_json.is_null() || out_unmatched_json.is_null() {
return KgliteStatusCode::NullPointer;
}
let json_str = match unsafe { CStr::from_ptr(form_types_json) }.to_str() {
Ok(s) => s,
Err(_) => return KgliteStatusCode::InvalidUtf8,
};
let owned_strings: Option<Vec<String>> = if json_str.trim() == "null" {
None
} else {
match serde_json::from_str::<Vec<String>>(json_str) {
Ok(v) => Some(v),
Err(_) => return KgliteStatusCode::InvalidArgument,
}
};
let form_types_slice: Option<Vec<&str>> = owned_strings
.as_ref()
.map(|v| v.iter().map(String::as_str).collect());
let (active, unmatched) = resolve_fetch_buckets(form_types_slice.as_deref());
let active_names: Vec<&str> = active.iter().map(bucket_str).collect();
let active_json = serde_json::to_string(&active_names).unwrap_or_else(|_| "[]".to_string());
let unmatched_json = serde_json::to_string(&unmatched).unwrap_or_else(|_| "[]".to_string());
unsafe {
*out_active_json = alloc_c_string(&active_json);
*out_unmatched_json = alloc_c_string(&unmatched_json);
}
if !out_error_msg.is_null() {
unsafe {
*out_error_msg = std::ptr::null();
}
}
KgliteStatusCode::Ok
}
fn bucket_str(bucket: &SecFormBucket) -> &'static str {
match bucket {
SecFormBucket::Form3 => "form3",
SecFormBucket::Form4 => "form4",
SecFormBucket::Form5 => "form5",
SecFormBucket::Form144 => "form144",
SecFormBucket::Form13f => "13f",
SecFormBucket::Form8k => "8k",
SecFormBucket::Sc13d => "sc13d",
SecFormBucket::Sc13g => "sc13g",
SecFormBucket::Def14a => "def14a",
SecFormBucket::Form10k => "form10k",
}
}
#[no_mangle]
pub unsafe extern "C" fn kglite_datasets_sec_parse_tickers_json(
tickers_json: *const c_char,
out_map_json: *mut *const c_char,
out_error_msg: *mut *const c_char,
) -> KgliteStatusCode {
if tickers_json.is_null() || out_map_json.is_null() {
return KgliteStatusCode::NullPointer;
}
let json_str = match unsafe { CStr::from_ptr(tickers_json) }.to_str() {
Ok(s) => s,
Err(_) => return KgliteStatusCode::InvalidUtf8,
};
match parse_tickers_json(json_str) {
Ok(map) => {
let out_json = serde_json::to_string(&map).unwrap_or_else(|_| "{}".to_string());
unsafe {
*out_map_json = alloc_c_string(&out_json);
}
if !out_error_msg.is_null() {
unsafe {
*out_error_msg = std::ptr::null();
}
}
KgliteStatusCode::Ok
}
Err(err) => {
unsafe {
*out_map_json = std::ptr::null();
}
if !out_error_msg.is_null() {
unsafe {
*out_error_msg = alloc_c_string(&err.to_string());
}
}
KgliteStatusCode::InvalidArgument
}
}
}
#[no_mangle]
pub unsafe extern "C" fn kglite_datasets_sec_run_all(
workdir_path: *const c_char,
slice_json: *const c_char,
force: u8,
out_report_json: *mut *const c_char,
out_error_msg: *mut *const c_char,
) -> KgliteStatusCode {
if workdir_path.is_null() || out_report_json.is_null() {
return KgliteStatusCode::NullPointer;
}
let workdir_str = match unsafe { CStr::from_ptr(workdir_path) }.to_str() {
Ok(s) => s,
Err(_) => return KgliteStatusCode::InvalidUtf8,
};
let slice = match parse_slice_json(slice_json) {
Ok(s) => s,
Err(rc) => return rc,
};
let workdir = Workdir::new(workdir_str);
match run_all(&workdir, &slice, force != 0) {
Ok(report) => {
let json = serialize_extract_report(&report);
unsafe {
*out_report_json = alloc_c_string(&json);
}
if !out_error_msg.is_null() {
unsafe {
*out_error_msg = std::ptr::null();
}
}
KgliteStatusCode::Ok
}
Err(err) => {
unsafe {
*out_report_json = std::ptr::null();
}
if !out_error_msg.is_null() {
unsafe {
*out_error_msg = alloc_c_string(&err.to_string());
}
}
KgliteStatusCode::Internal
}
}
}
fn parse_slice_json(slice_json: *const c_char) -> Result<SliceSpec, KgliteStatusCode> {
if slice_json.is_null() {
return Ok(SliceSpec::default());
}
let json_str = match unsafe { CStr::from_ptr(slice_json) }.to_str() {
Ok(s) => s,
Err(_) => return Err(KgliteStatusCode::InvalidUtf8),
};
if json_str.trim().is_empty() {
return Ok(SliceSpec::default());
}
#[derive(serde::Deserialize)]
struct SliceJson {
cik_list: Option<Vec<u64>>,
form_types: Option<Vec<String>>,
year_range: Option<(u16, u16)>,
}
let parsed: SliceJson = match serde_json::from_str(json_str) {
Ok(v) => v,
Err(_) => return Err(KgliteStatusCode::InvalidArgument),
};
let mut spec = SliceSpec::default();
if let Some(ciks) = parsed.cik_list {
spec = spec.with_cik_list(ciks);
}
if let Some(forms) = parsed.form_types {
spec = spec.with_form_types(forms);
}
if let Some((start, end)) = parsed.year_range {
if start > end {
return Err(KgliteStatusCode::InvalidArgument);
}
spec = spec.with_year_range(start, end);
}
Ok(spec)
}
fn serialize_extract_report(report: &ExtractReport) -> String {
serde_json::json!({
"extracted_at": report.extracted_at,
"identity_ms": report.identity_ms,
"total_ms": report.total_ms,
"debug": format!("{:?}", report),
})
.to_string()
}
#[cfg(test)]
mod tests {
use super::*;
use std::ffi::CString;
#[test]
fn client_factory_rejects_empty_user_agent() {
let ua = CString::new("").unwrap();
let mut client: *mut KgliteSecClient = std::ptr::null_mut();
let mut err: *const c_char = std::ptr::null();
let rc = unsafe {
kglite_datasets_sec_client_new(ua.as_ptr(), &mut client as *mut _, &mut err as *mut _)
};
assert_eq!(rc, KgliteStatusCode::InvalidArgument);
assert!(client.is_null());
assert!(!err.is_null());
unsafe { crate::kglite_free_string(err) };
}
#[test]
fn client_factory_accepts_valid_ua() {
let ua = CString::new("kglite-c test test@example.com").unwrap();
let mut client: *mut KgliteSecClient = std::ptr::null_mut();
let mut err: *const c_char = std::ptr::null();
let rc = unsafe {
kglite_datasets_sec_client_new(ua.as_ptr(), &mut client as *mut _, &mut err as *mut _)
};
assert_eq!(rc, KgliteStatusCode::Ok);
assert!(!client.is_null());
unsafe { kglite_datasets_sec_client_free(client) };
}
#[test]
fn resolve_fetch_buckets_with_null_returns_defaults() {
let null_json = CString::new("null").unwrap();
let mut active: *const c_char = std::ptr::null();
let mut unmatched: *const c_char = std::ptr::null();
let mut err: *const c_char = std::ptr::null();
let rc = unsafe {
kglite_datasets_sec_resolve_fetch_buckets(
null_json.as_ptr(),
&mut active as *mut _,
&mut unmatched as *mut _,
&mut err as *mut _,
)
};
assert_eq!(rc, KgliteStatusCode::Ok);
let active_str = unsafe { CStr::from_ptr(active).to_str().unwrap() };
let parsed: Vec<String> = serde_json::from_str(active_str).unwrap();
assert!(!parsed.is_empty());
unsafe { crate::kglite_free_string(active) };
unsafe { crate::kglite_free_string(unmatched) };
}
#[test]
fn resolve_fetch_buckets_with_known_forms() {
let forms = CString::new(r#"["10-K", "4", "ZZZ-UNKNOWN"]"#).unwrap();
let mut active: *const c_char = std::ptr::null();
let mut unmatched: *const c_char = std::ptr::null();
let mut err: *const c_char = std::ptr::null();
let rc = unsafe {
kglite_datasets_sec_resolve_fetch_buckets(
forms.as_ptr(),
&mut active as *mut _,
&mut unmatched as *mut _,
&mut err as *mut _,
)
};
assert_eq!(rc, KgliteStatusCode::Ok);
let unmatched_str = unsafe { CStr::from_ptr(unmatched).to_str().unwrap() };
assert!(unmatched_str.contains("ZZZ-UNKNOWN"));
unsafe { crate::kglite_free_string(active) };
unsafe { crate::kglite_free_string(unmatched) };
}
#[test]
fn parse_tickers_handles_simple_object() {
let raw = CString::new(
r#"{"0":{"cik_str":320193,"ticker":"AAPL","title":"Apple Inc."},
"1":{"cik_str":789019,"ticker":"MSFT","title":"Microsoft Corp"}}"#,
)
.unwrap();
let mut out: *const c_char = std::ptr::null();
let mut err: *const c_char = std::ptr::null();
let rc = unsafe {
kglite_datasets_sec_parse_tickers_json(
raw.as_ptr(),
&mut out as *mut _,
&mut err as *mut _,
)
};
assert_eq!(rc, KgliteStatusCode::Ok);
let s = unsafe { CStr::from_ptr(out).to_str().unwrap() };
let map: std::collections::HashMap<String, u64> = serde_json::from_str(s).unwrap();
assert_eq!(map.get("AAPL"), Some(&320193u64));
assert_eq!(map.get("MSFT"), Some(&789019u64));
unsafe { crate::kglite_free_string(out) };
}
#[test]
fn parse_tickers_with_bad_json_returns_invalid_argument() {
let bad = CString::new("not-json").unwrap();
let mut out: *const c_char = std::ptr::null();
let mut err: *const c_char = std::ptr::null();
let rc = unsafe {
kglite_datasets_sec_parse_tickers_json(
bad.as_ptr(),
&mut out as *mut _,
&mut err as *mut _,
)
};
assert_eq!(rc, KgliteStatusCode::InvalidArgument);
assert!(out.is_null());
if !err.is_null() {
unsafe { crate::kglite_free_string(err) };
}
}
#[test]
fn parse_slice_json_empty_is_unrestricted() {
let null_json = CString::new("{}").unwrap();
let spec = parse_slice_json(null_json.as_ptr()).unwrap();
assert!(spec.cik_list.is_none());
assert!(spec.form_types.is_none());
assert!(spec.year_range.is_none());
}
#[test]
fn parse_slice_json_with_all_filters() {
let s =
CString::new(r#"{"cik_list":[320193],"form_types":["10-K"],"year_range":[2020,2024]}"#)
.unwrap();
let spec = parse_slice_json(s.as_ptr()).unwrap();
assert_eq!(spec.year_range, Some((2020, 2024)));
assert!(spec.cik_list.as_ref().unwrap().contains(&320193));
assert!(spec.form_types.as_ref().unwrap().contains("10-K"));
}
#[test]
fn parse_slice_json_invalid_year_range_rejected() {
let s = CString::new(r#"{"year_range":[2024,2020]}"#).unwrap();
let err = parse_slice_json(s.as_ptr()).unwrap_err();
assert_eq!(err, KgliteStatusCode::InvalidArgument);
}
}