1use std::collections::HashMap;
2use std::num::NonZeroU32;
3use std::sync::Arc;
4
5use governor::clock::DefaultClock;
6use governor::state::{InMemoryState, NotKeyed};
7use governor::{Quota, RateLimiter};
8
9use crate::cik::Cik;
10use crate::error::{Error, Result};
11use crate::models::*;
12
13const DEFAULT_RATE_LIMIT: u32 = 10;
14
15const BASE_SEC_URL: &str = "https://www.sec.gov";
16const BASE_DATA_URL: &str = "https://data.sec.gov";
17const BASE_EFTS_URL: &str = "https://efts.sec.gov";
18
19type Limiter = RateLimiter<NotKeyed, InMemoryState, DefaultClock>;
20
21#[derive(Debug)]
37pub struct Client {
38 http: reqwest::Client,
39 limiter: Arc<Limiter>,
40 base_sec_url: String,
41 base_data_url: String,
42 base_efts_url: String,
43}
44
45pub struct ClientBuilder {
47 user_agent: String,
48 rate_limit: u32,
49 http_client: Option<reqwest::Client>,
50 base_sec_url: Option<String>,
51 base_data_url: Option<String>,
52 base_efts_url: Option<String>,
53}
54
55impl ClientBuilder {
56 pub fn new(user_agent: impl Into<String>) -> Self {
59 Self {
60 user_agent: user_agent.into(),
61 rate_limit: DEFAULT_RATE_LIMIT,
62 http_client: None,
63 base_sec_url: None,
64 base_data_url: None,
65 base_efts_url: None,
66 }
67 }
68
69 pub fn rate_limit(mut self, requests_per_second: u32) -> Self {
72 self.rate_limit = requests_per_second;
73 self
74 }
75
76 pub fn http_client(mut self, client: reqwest::Client) -> Self {
80 self.http_client = Some(client);
81 self
82 }
83
84 #[doc(hidden)]
86 pub fn base_sec_url(mut self, url: impl Into<String>) -> Self {
87 self.base_sec_url = Some(url.into());
88 self
89 }
90
91 #[doc(hidden)]
93 pub fn base_data_url(mut self, url: impl Into<String>) -> Self {
94 self.base_data_url = Some(url.into());
95 self
96 }
97
98 #[doc(hidden)]
100 pub fn base_efts_url(mut self, url: impl Into<String>) -> Self {
101 self.base_efts_url = Some(url.into());
102 self
103 }
104
105 pub fn build(self) -> Result<Client> {
107 if self.user_agent.is_empty() {
108 return Err(Error::Config(
109 "user-agent is required by SEC EDGAR policy".into(),
110 ));
111 }
112
113 let http = match self.http_client {
114 Some(c) => c,
115 None => {
116 let mut headers = reqwest::header::HeaderMap::new();
117 headers.insert(
118 reqwest::header::USER_AGENT,
119 reqwest::header::HeaderValue::from_str(&self.user_agent).map_err(|_| {
120 Error::Config("user-agent contains invalid header characters".into())
121 })?,
122 );
123 reqwest::Client::builder()
124 .default_headers(headers)
125 .build()
126 .map_err(|e| Error::Config(format!("failed to build HTTP client: {e}")))?
127 }
128 };
129
130 let quota = Quota::per_second(
131 NonZeroU32::new(self.rate_limit)
132 .ok_or_else(|| Error::Config("rate limit must be > 0".into()))?,
133 );
134 let limiter = Arc::new(RateLimiter::direct(quota));
135
136 Ok(Client {
137 http,
138 limiter,
139 base_sec_url: self.base_sec_url.unwrap_or_else(|| BASE_SEC_URL.to_owned()),
140 base_data_url: self
141 .base_data_url
142 .unwrap_or_else(|| BASE_DATA_URL.to_owned()),
143 base_efts_url: self
144 .base_efts_url
145 .unwrap_or_else(|| BASE_EFTS_URL.to_owned()),
146 })
147 }
148}
149
150impl Client {
151 async fn get_bytes(&self, url: &str) -> Result<Vec<u8>> {
153 self.limiter.until_ready().await;
154
155 let response = self
156 .http
157 .get(url)
158 .send()
159 .await
160 .map_err(|e| Error::Request {
161 endpoint: url.to_owned(),
162 source: e,
163 })?;
164
165 let status = response.status();
166 if !status.is_success() {
167 return Err(Error::Api {
168 status,
169 endpoint: url.to_owned(),
170 message: format!("unexpected status {status}"),
171 });
172 }
173
174 response
175 .bytes()
176 .await
177 .map(|b| b.to_vec())
178 .map_err(|e| Error::Request {
179 endpoint: url.to_owned(),
180 source: e,
181 })
182 }
183
184 async fn get_json<T: serde::de::DeserializeOwned>(&self, url: &str) -> Result<T> {
186 self.limiter.until_ready().await;
187
188 let response = self
189 .http
190 .get(url)
191 .send()
192 .await
193 .map_err(|e| Error::Request {
194 endpoint: url.to_owned(),
195 source: e,
196 })?;
197
198 let status = response.status();
199 if !status.is_success() {
200 return Err(Error::Api {
201 status,
202 endpoint: url.to_owned(),
203 message: format!("unexpected status {status}"),
204 });
205 }
206
207 response.json::<T>().await.map_err(|e| Error::Decode {
208 endpoint: url.to_owned(),
209 source: e,
210 })
211 }
212
213 pub async fn get_tickers(&self) -> Result<HashMap<String, Ticker>> {
218 let url = format!("{}/files/company_tickers.json", self.base_sec_url);
219 let raw: HashMap<String, Ticker> = self.get_json(&url).await?;
220 let result = raw.into_values().map(|t| (t.cik.to_string(), t)).collect();
221 Ok(result)
222 }
223
224 pub async fn get_submission(&self, cik: Cik) -> Result<Submission> {
227 let url = format!(
228 "{}/submissions/CIK{}.json",
229 self.base_data_url,
230 cik.to_padded_string()
231 );
232 self.get_json(&url).await
233 }
234
235 pub async fn get_document(
239 &self,
240 cik: Cik,
241 accession: &str,
242 primary_doc: &str,
243 ) -> Result<String> {
244 let clean_acc = accession.replace('-', "");
245 let url = format!(
246 "{}/Archives/edgar/data/{}/{}/{}",
247 self.base_sec_url,
248 cik.to_padded_string(),
249 clean_acc,
250 primary_doc
251 );
252 let bytes = self.get_bytes(&url).await?;
253 String::from_utf8(bytes).map_err(|e| Error::DecodeBody {
254 endpoint: url,
255 message: format!("response is not valid UTF-8: {e}"),
256 })
257 }
258
259 pub async fn get_company_concept(
263 &self,
264 cik: Cik,
265 taxonomy: &str,
266 tag: &str,
267 ) -> Result<CompanyConcept> {
268 let url = format!(
269 "{}/api/xbrl/companyconcept/CIK{}/{}/{}.json",
270 self.base_data_url,
271 cik.to_padded_string(),
272 taxonomy,
273 tag
274 );
275 self.get_json(&url).await
276 }
277
278 pub async fn get_company_facts(&self, cik: Cik) -> Result<CompanyFacts> {
282 let url = format!(
283 "{}/api/xbrl/companyfacts/CIK{}.json",
284 self.base_data_url,
285 cik.to_padded_string()
286 );
287 self.get_json(&url).await
288 }
289
290 pub async fn get_frame(
298 &self,
299 taxonomy: &str,
300 tag: &str,
301 unit: &str,
302 period: &str,
303 ) -> Result<Frame> {
304 let url = format!(
305 "{}/api/xbrl/frames/{}/{}/{}/{}.json",
306 self.base_data_url, taxonomy, tag, unit, period
307 );
308 self.get_json(&url).await
309 }
310
311 pub async fn search(
315 &self,
316 query: &str,
317 options: Option<&SearchOptions>,
318 ) -> Result<SearchResult> {
319 let mut params = vec![("q".to_owned(), query.to_owned())];
320
321 if let Some(opts) = options {
322 if !opts.forms.is_empty() {
323 params.push(("forms".to_owned(), opts.forms.join(",")));
324 }
325 if opts.date_start.is_some() || opts.date_end.is_some() {
326 params.push(("dateRange".to_owned(), "custom".to_owned()));
327 if let Some(ref start) = opts.date_start {
328 params.push(("startdt".to_owned(), start.clone()));
329 }
330 if let Some(ref end) = opts.date_end {
331 params.push(("enddt".to_owned(), end.clone()));
332 }
333 }
334 if let Some(from) = opts.from
335 && from > 0
336 {
337 params.push(("from".to_owned(), from.to_string()));
338 }
339 }
340
341 let url = reqwest::Url::parse_with_params(
342 &format!("{}/LATEST/search-index", self.base_efts_url),
343 ¶ms,
344 )
345 .map_err(|e| Error::Config(format!("failed to build search URL: {e}")))?;
346
347 let raw: EftsResponse = self.get_json(url.as_str()).await?;
348
349 let hits = raw
350 .hits
351 .hits
352 .into_iter()
353 .map(|h| SearchHit {
354 id: h.id,
355 score: h.score,
356 ciks: h.source.ciks,
357 display_names: h.source.display_names,
358 form: h.source.form.unwrap_or_default(),
359 file_date: h.source.file_date.unwrap_or_default(),
360 period_ending: h.source.period_ending,
361 accession_number: h.source.adsh.unwrap_or_default(),
362 file_type: h.source.file_type.unwrap_or_default(),
363 file_description: h.source.file_description.unwrap_or_default(),
364 })
365 .collect();
366
367 Ok(SearchResult {
368 total: raw.hits.total.value,
369 hits,
370 })
371 }
372}