Skip to main content

edgar_rs/
client.rs

1use std::collections::HashMap;
2use std::num::NonZeroU32;
3use std::sync::Arc;
4
5use governor::clock::DefaultClock;
6use governor::state::{InMemoryState, NotKeyed};
7use governor::{Quota, RateLimiter};
8
9use crate::cik::Cik;
10use crate::error::{Error, Result};
11use crate::models::*;
12
13const DEFAULT_RATE_LIMIT: u32 = 10;
14
15const BASE_SEC_URL: &str = "https://www.sec.gov";
16const BASE_DATA_URL: &str = "https://data.sec.gov";
17const BASE_EFTS_URL: &str = "https://efts.sec.gov";
18
19type Limiter = RateLimiter<NotKeyed, InMemoryState, DefaultClock>;
20
21/// An async HTTP client for the SEC EDGAR API.
22///
23/// Handles rate limiting, User-Agent headers, and JSON deserialization.
24/// Construct via [`ClientBuilder`].
25///
26/// # Example
27/// ```no_run
28/// # async fn example() -> edgar_rs::Result<()> {
29/// let client = edgar_rs::ClientBuilder::new("MyApp/1.0 contact@example.com")
30///     .build()?;
31///
32/// let tickers = client.get_tickers().await?;
33/// # Ok(())
34/// # }
35/// ```
36#[derive(Debug)]
37pub struct Client {
38    http: reqwest::Client,
39    limiter: Arc<Limiter>,
40    base_sec_url: String,
41    base_data_url: String,
42    base_efts_url: String,
43}
44
45/// Builder for constructing a [`Client`].
46pub struct ClientBuilder {
47    user_agent: String,
48    rate_limit: u32,
49    http_client: Option<reqwest::Client>,
50    base_sec_url: Option<String>,
51    base_data_url: Option<String>,
52    base_efts_url: Option<String>,
53}
54
55impl ClientBuilder {
56    /// Creates a new builder. The user agent is required by SEC EDGAR policy.
57    /// Typically in the format `"AppName/Version contact@email.com"`.
58    pub fn new(user_agent: impl Into<String>) -> Self {
59        Self {
60            user_agent: user_agent.into(),
61            rate_limit: DEFAULT_RATE_LIMIT,
62            http_client: None,
63            base_sec_url: None,
64            base_data_url: None,
65            base_efts_url: None,
66        }
67    }
68
69    /// Override the default rate limit of 10 requests per second.
70    /// Setting a value above 10 is not recommended per SEC fair access policy.
71    pub fn rate_limit(mut self, requests_per_second: u32) -> Self {
72        self.rate_limit = requests_per_second;
73        self
74    }
75
76    /// Provide a pre-configured `reqwest::Client`.
77    /// Useful for custom timeouts, proxies, or testing.
78    /// Note: the User-Agent default header on this client will be overridden.
79    pub fn http_client(mut self, client: reqwest::Client) -> Self {
80        self.http_client = Some(client);
81        self
82    }
83
84    /// Override the base SEC URL (for testing).
85    #[doc(hidden)]
86    pub fn base_sec_url(mut self, url: impl Into<String>) -> Self {
87        self.base_sec_url = Some(url.into());
88        self
89    }
90
91    /// Override the base data URL (for testing).
92    #[doc(hidden)]
93    pub fn base_data_url(mut self, url: impl Into<String>) -> Self {
94        self.base_data_url = Some(url.into());
95        self
96    }
97
98    /// Override the base EFTS URL (for testing).
99    #[doc(hidden)]
100    pub fn base_efts_url(mut self, url: impl Into<String>) -> Self {
101        self.base_efts_url = Some(url.into());
102        self
103    }
104
105    /// Build the [`Client`]. Returns an error if configuration is invalid.
106    pub fn build(self) -> Result<Client> {
107        if self.user_agent.is_empty() {
108            return Err(Error::Config(
109                "user-agent is required by SEC EDGAR policy".into(),
110            ));
111        }
112
113        let http = match self.http_client {
114            Some(c) => c,
115            None => {
116                let mut headers = reqwest::header::HeaderMap::new();
117                headers.insert(
118                    reqwest::header::USER_AGENT,
119                    reqwest::header::HeaderValue::from_str(&self.user_agent).map_err(|_| {
120                        Error::Config("user-agent contains invalid header characters".into())
121                    })?,
122                );
123                reqwest::Client::builder()
124                    .default_headers(headers)
125                    .build()
126                    .map_err(|e| Error::Config(format!("failed to build HTTP client: {e}")))?
127            }
128        };
129
130        let quota = Quota::per_second(
131            NonZeroU32::new(self.rate_limit)
132                .ok_or_else(|| Error::Config("rate limit must be > 0".into()))?,
133        );
134        let limiter = Arc::new(RateLimiter::direct(quota));
135
136        Ok(Client {
137            http,
138            limiter,
139            base_sec_url: self.base_sec_url.unwrap_or_else(|| BASE_SEC_URL.to_owned()),
140            base_data_url: self
141                .base_data_url
142                .unwrap_or_else(|| BASE_DATA_URL.to_owned()),
143            base_efts_url: self
144                .base_efts_url
145                .unwrap_or_else(|| BASE_EFTS_URL.to_owned()),
146        })
147    }
148}
149
150impl Client {
151    /// Internal: wait for rate limiter, send GET request, check status, return body as bytes.
152    async fn get_bytes(&self, url: &str) -> Result<Vec<u8>> {
153        self.limiter.until_ready().await;
154
155        let response = self
156            .http
157            .get(url)
158            .send()
159            .await
160            .map_err(|e| Error::Request {
161                endpoint: url.to_owned(),
162                source: e,
163            })?;
164
165        let status = response.status();
166        if !status.is_success() {
167            return Err(Error::Api {
168                status,
169                endpoint: url.to_owned(),
170                message: format!("unexpected status {status}"),
171            });
172        }
173
174        response
175            .bytes()
176            .await
177            .map(|b| b.to_vec())
178            .map_err(|e| Error::Request {
179                endpoint: url.to_owned(),
180                source: e,
181            })
182    }
183
184    /// Internal: GET + JSON deserialization.
185    async fn get_json<T: serde::de::DeserializeOwned>(&self, url: &str) -> Result<T> {
186        self.limiter.until_ready().await;
187
188        let response = self
189            .http
190            .get(url)
191            .send()
192            .await
193            .map_err(|e| Error::Request {
194                endpoint: url.to_owned(),
195                source: e,
196            })?;
197
198        let status = response.status();
199        if !status.is_success() {
200            return Err(Error::Api {
201                status,
202                endpoint: url.to_owned(),
203                message: format!("unexpected status {status}"),
204            });
205        }
206
207        response.json::<T>().await.map_err(|e| Error::Decode {
208            endpoint: url.to_owned(),
209            source: e,
210        })
211    }
212
213    // ─── Public API Methods ────────────────────────────
214
215    /// Fetches the complete mapping of CIK numbers to ticker symbols.
216    /// The returned map is keyed by CIK number as a string.
217    pub async fn get_tickers(&self) -> Result<HashMap<String, Ticker>> {
218        let url = format!("{}/files/company_tickers.json", self.base_sec_url);
219        let raw: HashMap<String, Ticker> = self.get_json(&url).await?;
220        let result = raw.into_values().map(|t| (t.cik.to_string(), t)).collect();
221        Ok(result)
222    }
223
224    /// Fetches company submission data for the given CIK.
225    /// Includes company metadata and recent filing history.
226    pub async fn get_submission(&self, cik: Cik) -> Result<Submission> {
227        let url = format!(
228            "{}/submissions/CIK{}.json",
229            self.base_data_url,
230            cik.to_padded_string()
231        );
232        self.get_json(&url).await
233    }
234
235    /// Fetches a specific filing document from EDGAR.
236    /// The accession number should be in dashed format (e.g., `"0000320193-24-000123"`);
237    /// dashes are stripped automatically for the URL path.
238    pub async fn get_document(
239        &self,
240        cik: Cik,
241        accession: &str,
242        primary_doc: &str,
243    ) -> Result<String> {
244        let clean_acc = accession.replace('-', "");
245        let url = format!(
246            "{}/Archives/edgar/data/{}/{}/{}",
247            self.base_sec_url,
248            cik.to_padded_string(),
249            clean_acc,
250            primary_doc
251        );
252        let bytes = self.get_bytes(&url).await?;
253        String::from_utf8(bytes).map_err(|e| Error::DecodeBody {
254            endpoint: url,
255            message: format!("response is not valid UTF-8: {e}"),
256        })
257    }
258
259    /// Fetches all XBRL disclosures for a single concept from a company.
260    /// Taxonomy is typically `"us-gaap"`, `"dei"`, `"ifrs-full"`, `"srt"`, or `"invest"`.
261    /// Tag is the concept name, e.g., `"AccountsPayableCurrent"` or `"Revenues"`.
262    pub async fn get_company_concept(
263        &self,
264        cik: Cik,
265        taxonomy: &str,
266        tag: &str,
267    ) -> Result<CompanyConcept> {
268        let url = format!(
269            "{}/api/xbrl/companyconcept/CIK{}/{}/{}.json",
270            self.base_data_url,
271            cik.to_padded_string(),
272            taxonomy,
273            tag
274        );
275        self.get_json(&url).await
276    }
277
278    /// Fetches all XBRL facts for a company in a single call.
279    /// The response can be very large (multi-MB) as it includes every concept
280    /// the company has ever reported.
281    pub async fn get_company_facts(&self, cik: Cik) -> Result<CompanyFacts> {
282        let url = format!(
283            "{}/api/xbrl/companyfacts/CIK{}.json",
284            self.base_data_url,
285            cik.to_padded_string()
286        );
287        self.get_json(&url).await
288    }
289
290    /// Fetches aggregated XBRL data across all companies for a given
291    /// concept, unit, and period.
292    ///
293    /// Period format examples:
294    /// - `"CY2023"` for annual
295    /// - `"CY2023Q1"` for quarterly duration
296    /// - `"CY2023Q1I"` for quarterly instantaneous
297    pub async fn get_frame(
298        &self,
299        taxonomy: &str,
300        tag: &str,
301        unit: &str,
302        period: &str,
303    ) -> Result<Frame> {
304        let url = format!(
305            "{}/api/xbrl/frames/{}/{}/{}/{}.json",
306            self.base_data_url, taxonomy, tag, unit, period
307        );
308        self.get_json(&url).await
309    }
310
311    /// Performs a full-text search across EDGAR filings.
312    /// The query string supports wildcards (`*`), boolean operators (`OR`, `NOT`),
313    /// and exact phrase matching with quotes.
314    pub async fn search(
315        &self,
316        query: &str,
317        options: Option<&SearchOptions>,
318    ) -> Result<SearchResult> {
319        let mut params = vec![("q".to_owned(), query.to_owned())];
320
321        if let Some(opts) = options {
322            if !opts.forms.is_empty() {
323                params.push(("forms".to_owned(), opts.forms.join(",")));
324            }
325            if opts.date_start.is_some() || opts.date_end.is_some() {
326                params.push(("dateRange".to_owned(), "custom".to_owned()));
327                if let Some(ref start) = opts.date_start {
328                    params.push(("startdt".to_owned(), start.clone()));
329                }
330                if let Some(ref end) = opts.date_end {
331                    params.push(("enddt".to_owned(), end.clone()));
332                }
333            }
334            if let Some(from) = opts.from
335                && from > 0
336            {
337                params.push(("from".to_owned(), from.to_string()));
338            }
339        }
340
341        let url = reqwest::Url::parse_with_params(
342            &format!("{}/LATEST/search-index", self.base_efts_url),
343            &params,
344        )
345        .map_err(|e| Error::Config(format!("failed to build search URL: {e}")))?;
346
347        let raw: EftsResponse = self.get_json(url.as_str()).await?;
348
349        let hits = raw
350            .hits
351            .hits
352            .into_iter()
353            .map(|h| SearchHit {
354                id: h.id,
355                score: h.score,
356                ciks: h.source.ciks,
357                display_names: h.source.display_names,
358                form: h.source.form.unwrap_or_default(),
359                file_date: h.source.file_date.unwrap_or_default(),
360                period_ending: h.source.period_ending,
361                accession_number: h.source.adsh.unwrap_or_default(),
362                file_type: h.source.file_type.unwrap_or_default(),
363                file_description: h.source.file_description.unwrap_or_default(),
364            })
365            .collect();
366
367        Ok(SearchResult {
368            total: raw.hits.total.value,
369            hits,
370        })
371    }
372}