edgarkit/search.rs
1//! Search SEC EDGAR filings using flexible criteria and filters.
2//!
3//! This module provides a powerful search interface to the SEC's EDGAR full-text search system.
4//! You can search by form type, date ranges, company names, CIKs, keywords, and more. The search
5//! API supports both single-page queries and automatic pagination through all matching results.
6//!
7//! Search results include comprehensive metadata about each filing such as company names, CIKs,
8//! filing dates, form types, and accession numbers. Results are returned in reverse chronological
9//! order by default (newest first).
10//!
11//! # Search Capabilities
12//!
13//! - Full-text search with keyword queries
14//! - Filter by form types (10-K, 8-K, S-1, etc.)
15//! - Date range filtering
16//! - Company name or CIK filtering
17//! - SIC code and location-based filtering
18//! - Pagination with configurable page sizes
19//!
20//! # Performance
21//!
22//! The `search_all()` method fetches all results across multiple pages using parallel requests
23//! (up to 7 concurrent) while respecting rate limits. This provides significantly better
24//! performance than sequential pagination for large result sets.
25//!
26//! # Example
27//!
28//! ```ignore
29//! use edgarkit::{Edgar, SearchOperations, SearchOptions};
30//!
31//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
32//! let edgar = Edgar::new("your_app_name contact@example.com")?;
33//!
34//! let options = SearchOptions::new()
35//! .with_forms(vec!["10-K".to_string()])
36//! .with_date_range("2024-01-01".to_string(), "2024-12-31".to_string())
37//! .with_count(100);
38//!
39//! let results = edgar.search_all(options).await?;
40//! # Ok(())
41//! # }
42//! ```
43
44use super::Edgar;
45use super::error::{EdgarError, Result};
46use super::traits::SearchOperations;
47use async_trait::async_trait;
48use serde::{Deserialize, Deserializer, de};
49
50/// Response container from the EDGAR search API containing search metadata and results.
51///
52/// This structure wraps the complete search response including timing information,
53/// shard statistics from Elasticsearch, and the actual search hits. The search
54/// system uses Elasticsearch under the hood, which is why you'll see fields like
55/// `_shards` and `_score` that are specific to that search engine.
56#[derive(Debug, Clone, Deserialize)]
57pub struct SearchResponse {
58 /// Time taken to execute search (ms)
59 pub took: u32,
60
61 /// Whether the search timed out
62 pub timed_out: bool,
63
64 /// Shard information
65 pub _shards: Shards,
66
67 /// Search results
68 pub hits: Hits,
69}
70
71/// Information about Elasticsearch shards that processed the search query.
72///
73/// The EDGAR search system uses Elasticsearch which distributes data across multiple
74/// shards for performance. This struct provides diagnostic information about how many
75/// shards were involved and whether all completed successfully.
76#[derive(Debug, Clone, Deserialize)]
77pub struct Shards {
78 pub total: u32,
79 pub successful: u32,
80 pub skipped: u32,
81 pub failed: u32,
82}
83
84/// Container for search results including total count and individual hit documents.
85///
86/// This structure holds the array of matching filings along with metadata about the
87/// total number of matches and relevance scoring. The `total` field indicates how
88/// many documents matched your search criteria, while `hits` contains the actual
89/// results for the current page.
90#[derive(Debug, Clone, Deserialize)]
91pub struct Hits {
92 /// Total hits information
93 pub total: TotalHits,
94
95 /// Maximum relevance score
96 #[serde(default)]
97 pub max_score: Option<f64>,
98
99 /// Array of hit documents
100 pub hits: Vec<Hit>,
101}
102
103/// Total count of matching documents and the relationship type.
104///
105/// The `relation` field indicates whether the count is exact ("eq") or a lower bound
106/// ("gte"). For very large result sets, Elasticsearch may provide an approximate count.
107#[derive(Debug, Clone, Deserialize)]
108pub struct TotalHits {
109 pub value: u32,
110 pub relation: String,
111}
112
113/// A single search result representing a matching SEC filing.
114///
115/// Each hit contains metadata about the search match (index name, document ID, relevance
116/// score) and the actual filing data in the `_source` field. The underscore-prefixed
117/// fields are Elasticsearch conventions for system metadata.
118#[derive(Debug, Clone, Deserialize)]
119pub struct Hit {
120 /// Index name
121 pub _index: String,
122
123 /// Document ID
124 pub _id: String,
125
126 /// Relevance score
127 #[serde(default)]
128 pub _score: Option<f64>,
129
130 /// Filing information
131 pub _source: Source,
132}
133
134/// Filing information and metadata extracted from the EDGAR search index.
135///
136/// This structure contains all the details about a specific SEC filing including company
137/// identifiers, filing metadata, form type, and business information. This is the primary
138/// data payload for each search result and includes everything you need to identify and
139/// retrieve the actual filing documents.
140///
141/// Many fields are arrays because a single filing can be associated with multiple entities,
142/// locations, or classification codes. For example, merger filings may list multiple CIKs.
143#[derive(Debug, Clone, Deserialize)]
144pub struct Source {
145 /// Company CIK numbers
146 pub ciks: Vec<String>,
147
148 /// Period ending date (if applicable)
149 #[serde(default)]
150 pub period_ending: Option<String>,
151
152 /// File numbers
153 pub file_num: Option<Vec<String>>,
154
155 /// Company display names
156 pub display_names: Vec<String>,
157
158 /// XSL stylesheet reference
159 #[serde(default)]
160 pub xsl: Option<String>,
161
162 /// Sequence number
163 #[serde(deserialize_with = "deserialize_sequence")]
164 pub sequence: u32,
165
166 /// Root form types
167 pub root_forms: Vec<String>,
168
169 /// Filing date (YYYY-MM-DD)
170 pub file_date: String,
171
172 /// Business states
173 pub biz_states: Vec<String>,
174
175 /// SIC codes
176 pub sics: Vec<String>,
177
178 /// Form type (e.g., "10-K", "8-K")
179 pub form: String,
180
181 /// Accession number
182 pub adsh: String,
183
184 /// Film numbers
185 pub film_num: Vec<String>,
186
187 /// Business locations
188 pub biz_locations: Vec<String>,
189
190 /// File type
191 pub file_type: String,
192
193 /// File description
194 #[serde(default)]
195 pub file_description: Option<String>,
196
197 /// Incorporation states
198 pub inc_states: Vec<String>,
199
200 /// Item numbers (for 8-K)
201 pub items: Option<Vec<String>>,
202}
203
204/// Configurable options for searching SEC EDGAR filings.
205///
206/// This builder-style struct allows you to construct complex search queries using a fluent
207/// interface. Combine multiple filters to narrow down results: form types, date ranges,
208/// company identifiers, keywords, and more. All options are optional - you can construct
209/// as simple or complex a query as needed.
210///
211/// The search system supports advanced query syntax including Boolean operators, phrase
212/// searches with quotes, and wildcards. See the SEC's EDGAR full-text search FAQ for
213/// details on query syntax and special operators.
214///
215/// # Builder Pattern
216///
217/// Options are set using builder methods that return `self`, allowing you to chain
218/// multiple calls together. For example:
219///
220/// ```rust
221/// # use edgarkit::SearchOptions;
222/// let options = SearchOptions::new()
223/// .with_query("acquisition merger")
224/// .with_forms(vec!["8-K".to_string()])
225/// .with_date_range("2024-01-01".to_string(), "2024-12-31".to_string())
226/// .with_count(100);
227/// ```
228///
229/// # Pagination
230///
231/// Control pagination using `with_page()`, `with_from()`, and `with_count()`. The maximum
232/// results per page is 100. For retrieving all results across multiple pages, use the
233/// `search_all()` method instead of manually paginating.
234///
235/// # Common Patterns
236///
237/// - **Recent filings**: Use `with_forms()` and `with_count()` without date filters
238/// - **Company-specific**: Use `with_ciks()` to filter by one or more company CIKs
239/// - **Date-bounded**: Use `with_date_range()` to limit results to a specific time period
240/// - **Form type filtering**: Use `with_forms()` to search specific filing types
241#[derive(Debug, Clone, Default)]
242pub struct SearchOptions {
243 /// Typeahead keys
244 pub keys_typed: Option<String>,
245
246 /// Search query (supports special operators, see SEC FAQ)
247 pub query: Option<String>,
248
249 /// Filing category
250 pub category: Option<String>,
251
252 /// Filter by company location
253 pub location_code: Option<String>,
254
255 /// Company or individual name (cannot combine with cik or sic)
256 pub entity_name: Option<String>,
257
258 /// Form types to search (e.g., ["10-K", "10-Q"])
259 pub forms: Option<Vec<String>>,
260
261 /// Filter by multiple location codes
262 pub location_codes: Option<Vec<String>>,
263
264 /// Page number for pagination
265 pub page: Option<u32>,
266
267 /// Number of results to skip
268 pub from: Option<u32>,
269
270 /// Number of results to return (max 100)
271 pub count: Option<u32>,
272
273 /// Order by oldest first instead of newest
274 pub reverse_order: Option<bool>,
275
276 /// Start date (YYYY-MM-DD, requires end_date)
277 pub start_date: Option<String>,
278
279 /// End date (YYYY-MM-DD, requires start_date)
280 pub end_date: Option<String>,
281
282 /// Search by base words (default) or exactly as entered
283 pub stemming: Option<String>,
284
285 /// CIK codes to search (cannot combine with name or sic)
286 pub ciks: Option<Vec<String>>,
287
288 /// Standard Industrial Classification code
289 pub sic: Option<String>,
290
291 /// Use incorporation location instead of HQ location
292 pub incorporated_location: Option<bool>,
293}
294
295/// Custom deserializer for sequence field that can be either u32 or string
296fn deserialize_sequence<'de, D>(deserializer: D) -> std::result::Result<u32, D::Error>
297where
298 D: Deserializer<'de>,
299{
300 struct SequenceVisitor;
301
302 impl<'de> de::Visitor<'de> for SequenceVisitor {
303 type Value = u32;
304
305 fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
306 formatter.write_str("an integer or a string containing an integer")
307 }
308
309 fn visit_u64<E>(self, value: u64) -> std::result::Result<Self::Value, E>
310 where
311 E: de::Error,
312 {
313 Ok(value as u32)
314 }
315
316 fn visit_str<E>(self, value: &str) -> std::result::Result<Self::Value, E>
317 where
318 E: de::Error,
319 {
320 value.parse().map_err(de::Error::custom)
321 }
322 }
323
324 deserializer.deserialize_any(SequenceVisitor)
325}
326
327impl SearchOptions {
328 /// Creates a new instance of SearchOptions with default values
329 pub fn new() -> Self {
330 Self::default()
331 }
332
333 /// Sets the search query text.
334 ///
335 /// # Example
336 ///
337 /// ```rust
338 /// # use edgarkit::SearchOptions;
339 /// let options = SearchOptions::new()
340 /// .with_query("quarterly report");
341 /// ```
342 pub fn with_query(mut self, query: impl Into<String>) -> Self {
343 self.query = Some(query.into());
344 self
345 }
346
347 /// Sets the keys typed for typeahead search functionality
348 pub fn with_keys_typed(mut self, keys: impl Into<String>) -> Self {
349 self.keys_typed = Some(keys.into());
350 self
351 }
352
353 /// Sets the category filter for the search
354 pub fn with_category(mut self, category: impl Into<String>) -> Self {
355 self.category = Some(category.into());
356 self
357 }
358
359 /// Sets the location code filter
360 pub fn with_location_code(mut self, code: impl Into<String>) -> Self {
361 self.location_code = Some(code.into());
362 self
363 }
364
365 /// Sets the entity name filter
366 pub fn with_entity_name(mut self, name: impl Into<String>) -> Self {
367 self.entity_name = Some(name.into());
368 self
369 }
370
371 /// Sets the form types to filter by (e.g., ["10-K", "10-Q"])
372 pub fn with_forms(mut self, forms: Vec<String>) -> Self {
373 self.forms = Some(forms);
374 self
375 }
376
377 /// Sets the location codes to filter by
378 pub fn with_location_codes(mut self, codes: Vec<String>) -> Self {
379 self.location_codes = Some(codes);
380 self
381 }
382
383 /// Sets the page number for pagination (starting from 1)
384 pub fn with_page(mut self, page: u32) -> Self {
385 self.page = Some(page);
386 self
387 }
388
389 /// Sets the starting index for results
390 pub fn with_from(mut self, from: u32) -> Self {
391 self.from = Some(from);
392 self
393 }
394
395 /// Sets the maximum number of results to return
396 pub fn with_count(mut self, count: u32) -> Self {
397 self.count = Some(count);
398 self
399 }
400
401 /// Sets whether to return results in reverse order
402 pub fn with_reverse_order(mut self, reverse: bool) -> Self {
403 self.reverse_order = Some(reverse);
404 self
405 }
406
407 /// Sets the date range for the search
408 ///
409 /// # Arguments
410 /// * `start_date` - Start date in YYYY-MM-DD format
411 /// * `end_date` - End date in YYYY-MM-DD format
412 pub fn with_date_range(mut self, start_date: String, end_date: String) -> Self {
413 self.start_date = Some(start_date);
414 self.end_date = Some(end_date);
415 self
416 }
417
418 /// Sets stemming option for search
419 pub fn with_stemming(mut self, stemming: impl Into<String>) -> Self {
420 self.stemming = Some(stemming.into());
421 self
422 }
423
424 /// Sets company CIK filter(s).
425 ///
426 /// # Examples
427 ///
428 /// ```rust
429 /// # use edgarkit::SearchOptions;
430 /// // Single CIK
431 /// let options = SearchOptions::new().with_ciks(vec!["0001234567".to_string()]);
432 ///
433 /// // Multiple CIKs
434 /// let options = SearchOptions::new().with_ciks(vec!["0001234567".to_string(), "0007654321".to_string()]);
435 /// ```
436 pub fn with_ciks<T>(mut self, ciks: T) -> Self
437 where
438 T: Into<Vec<String>>,
439 {
440 self.ciks = Some(ciks.into());
441 self
442 }
443
444 /// Sets a single company CIK filter
445 ///
446 /// This is a convenience method for backwards compatibility
447 ///
448 /// # Arguments
449 /// * `cik` - A single CIK
450 pub fn with_cik(self, cik: impl Into<String>) -> Self {
451 self.with_ciks(vec![cik.into()])
452 }
453
454 /// Sets SIC code filter
455 pub fn with_sic(mut self, sic: impl Into<String>) -> Self {
456 self.sic = Some(sic.into());
457 self
458 }
459
460 /// Sets whether to use incorporation location instead of HQ
461 pub fn with_incorporated_location(mut self, incorporated: bool) -> Self {
462 self.incorporated_location = Some(incorporated);
463 self
464 }
465
466 pub fn to_query_params(&self) -> Vec<(String, String)> {
467 let mut params = Vec::new();
468
469 if let Some(ref query) = self.query {
470 params.push(("q".to_string(), query.clone()));
471 }
472
473 if let Some(ref keys) = self.keys_typed {
474 params.push(("keysTyped".to_string(), keys.clone()));
475 }
476
477 if let Some(ref category) = self.category {
478 params.push(("category".to_string(), category.clone()));
479 }
480
481 if let Some(ref code) = self.location_code {
482 params.push(("locationCode".to_string(), code.clone()));
483 }
484
485 if let Some(ref name) = self.entity_name {
486 params.push(("entityName".to_string(), name.clone()));
487 }
488
489 if let Some(ref forms) = self.forms {
490 params.push(("forms".to_string(), forms.join(",")));
491 }
492
493 if let Some(ref codes) = self.location_codes {
494 params.push(("locationCodes".to_string(), codes.join(",")));
495 }
496
497 if let Some(page) = self.page {
498 params.push(("page".to_string(), page.to_string()));
499 }
500
501 if let Some(from) = self.from {
502 params.push(("from".to_string(), from.to_string()));
503 }
504
505 if let Some(count) = self.count {
506 params.push(("count".to_string(), count.to_string()));
507 }
508
509 if let Some(reverse) = self.reverse_order {
510 params.push((
511 "reverse_order".to_string(),
512 if reverse { "TRUE" } else { "FALSE" }.to_string(),
513 ));
514 }
515
516 if let Some(ref start) = self.start_date {
517 params.push(("startdt".to_string(), start.clone()));
518 }
519
520 if let Some(ref end) = self.end_date {
521 params.push(("enddt".to_string(), end.clone()));
522 }
523
524 if let Some(ref stemming) = self.stemming {
525 params.push(("stemming".to_string(), stemming.clone()));
526 }
527
528 if let Some(ref ciks) = self.ciks {
529 params.push(("ciks".to_string(), ciks.join(",")));
530 }
531
532 if let Some(ref sic) = self.sic {
533 params.push(("sic".to_string(), sic.clone()));
534 }
535
536 if let Some(incorporated) = self.incorporated_location {
537 params.push((
538 "incorporated_location".to_string(),
539 incorporated.to_string(),
540 ));
541 }
542
543 params
544 }
545}
546
547/// Search operations for querying SEC EDGAR filings with flexible filters and criteria.
548///
549/// This trait provides two main search methods: `search()` for single-page queries and
550/// `search_all()` for comprehensive multi-page retrieval. Both methods use the same
551/// `SearchOptions` for filtering, but `search_all()` automatically handles pagination
552/// and fetches all matching results in parallel batches.
553///
554/// The search system is powered by SEC's EDGAR full-text search, which indexes filing
555/// content, company names, form types, and metadata. Results are ranked by relevance
556/// when using keyword queries, or sorted by filing date when searching by form type
557/// or date range.
558///
559/// # Performance Considerations
560///
561/// For large result sets (>100 documents), `search_all()` is significantly faster than
562/// manually paginating because it fetches multiple pages concurrently. However, it will
563/// retrieve ALL matching results, which could be thousands of documents. Consider using
564/// date ranges or other filters to limit scope when appropriate.
565///
566/// # Example
567///
568/// ```ignore
569/// use edgarkit::{Edgar, SearchOperations, SearchOptions};
570///
571/// async fn example() -> Result<(), Box<dyn std::error::Error>> {
572/// let edgar = Edgar::new("your_app_name contact@example.com")?;
573///
574/// let options = SearchOptions::new()
575/// .with_forms(vec!["10-K".to_string()])
576/// .with_count(10);
577///
578/// // Single page
579/// let first_page = edgar.search(options.clone()).await?;
580/// println!("First page: {} results", first_page.hits.hits.len());
581///
582/// // All results across pages
583/// let all_results = edgar.search_all(options).await?;
584/// println!("Total results: {}", all_results.len());
585/// Ok(())
586/// }
587/// ```
588#[async_trait]
589impl SearchOperations for Edgar {
590 /// Executes a search query and returns a single page of results.
591 ///
592 /// This method performs one search request and returns the results for the specified
593 /// page. Use this when you only need a small number of results or want to implement
594 /// custom pagination logic. For retrieving all matching results, use `search_all()`
595 /// which handles pagination automatically.
596 ///
597 /// The returned `SearchResponse` includes metadata about the search (execution time,
598 /// total hits) along with the actual results for the current page. By default, results
599 /// are sorted by filing date (newest first) unless a keyword query is provided, in
600 /// which case they're ranked by relevance.
601 ///
602 /// # Arguments
603 ///
604 /// * `options` - Search filters and pagination settings
605 ///
606 /// # Returns
607 ///
608 /// Returns a `SearchResponse` containing search metadata and results for one page.
609 ///
610 /// # Example
611 ///
612 /// ```ignore
613 /// let options = SearchOptions::new()
614 /// .with_forms(vec!["10-Q".to_string()])
615 /// .with_page(1)
616 /// .with_count(50);
617 ///
618 /// let response = edgar.search(options).await?;
619 /// println!("Found {} total matches", response.hits.total.value);
620 /// println!("This page has {} results", response.hits.hits.len());
621 /// ```
622 async fn search(&self, options: SearchOptions) -> Result<SearchResponse> {
623 let params = options.to_query_params();
624 let query_string = serde_urlencoded::to_string(¶ms)
625 .map_err(|e| EdgarError::InvalidResponse(e.to_string()))?;
626
627 let url = format!("{}?{}", self.search_url(), query_string);
628 let response = self.get(&url).await?;
629
630 Ok(serde_json::from_str(&response)?)
631 }
632
633 /// Fetches all matching results across multiple pages with automatic pagination.
634 ///
635 /// This method is designed for comprehensive data retrieval where you need all filings
636 /// matching your search criteria. It automatically handles pagination by first querying
637 /// for total count, then fetching all pages in parallel batches of up to 7 concurrent
638 /// requests. This provides excellent performance while respecting SEC rate limits.
639 ///
640 /// The method aggregates all results into a single vector of `Hit` objects, making it
641 /// easy to process the complete result set. Progress and errors are logged using the
642 /// `tracing` crate, so you can monitor long-running searches.
643 ///
644 /// # Performance Notes
645 ///
646 /// - Uses parallel requests (batch size: 7) to fetch multiple pages simultaneously
647 /// - Respects rate limiting between batches
648 /// - For 1000+ results, this is significantly faster than sequential pagination
649 /// - Memory usage scales with result set size - consider filtering for very large queries
650 ///
651 /// # Arguments
652 ///
653 /// * `options` - Search filters and criteria (pagination options are overridden)
654 ///
655 /// # Returns
656 ///
657 /// Returns a vector containing all matching `Hit` objects across all pages.
658 ///
659 /// # Example
660 ///
661 /// ```ignore
662 /// let options = SearchOptions::new()
663 /// .with_query("quarterly earnings")
664 /// .with_forms(vec!["10-Q".to_string()])
665 /// .with_date_range("2024-01-01".to_string(), "2024-03-31".to_string());
666 ///
667 /// let all_results = edgar.search_all(options).await?;
668 /// println!("Retrieved {} quarterly reports", all_results.len());
669 ///
670 /// for hit in all_results {
671 /// println!("{}: {} filed on {}",
672 /// hit._source.display_names[0],
673 /// hit._source.form,
674 /// hit._source.file_date);
675 /// }
676 /// ```
677 async fn search_all(&self, mut options: SearchOptions) -> Result<Vec<Hit>> {
678 const BATCH_SIZE: u32 = 7; // Maximum number of concurrent requests
679 const PAGE_SIZE: u32 = 100; // Results per page
680
681 // Set defaults
682 options.count = Some(PAGE_SIZE);
683 options.page = Some(1);
684 options.reverse_order = Some(false);
685
686 let initial_response = self.search(options.clone()).await?;
687 let total_hits = initial_response.hits.total.value;
688
689 tracing::info!("Found {} total hits", total_hits);
690
691 let mut all_hits = Vec::with_capacity(total_hits as usize);
692 all_hits.extend(initial_response.hits.hits);
693
694 let total_pages = (total_hits + PAGE_SIZE - 1) / PAGE_SIZE;
695 let mut current_page = 1;
696
697 while current_page < total_pages {
698 let end_page = (current_page + BATCH_SIZE).min(total_pages);
699 let mut batch_futures = Vec::with_capacity((end_page - current_page) as usize);
700
701 for page in (current_page + 1)..=end_page {
702 let skip = (page - 1) * PAGE_SIZE;
703
704 // Stop if we've gone past the total hits
705 if skip >= total_hits {
706 break;
707 }
708
709 let mut page_options = options.clone();
710 page_options.page = Some(page);
711 page_options.from = Some(skip);
712 page_options.count = Some(PAGE_SIZE.min(total_hits - skip));
713 page_options.reverse_order = Some(false);
714
715 batch_futures.push(self.search(page_options));
716 }
717
718 if batch_futures.is_empty() {
719 break;
720 }
721
722 let results = futures_util::future::join_all(batch_futures).await;
723
724 for result in results {
725 match result {
726 Ok(response) => {
727 all_hits.extend(response.hits.hits);
728 }
729 Err(e) => {
730 tracing::error!("Error fetching page: {}", e);
731 return Err(e);
732 }
733 }
734 }
735
736 current_page += BATCH_SIZE;
737 }
738
739 Ok(all_hits)
740 }
741}
742
743#[cfg(test)]
744mod tests {
745 use super::*;
746
747 #[test]
748 fn test_search_options_builder() {
749 let options = SearchOptions::new()
750 .with_query("test")
751 .with_forms(vec!["10-K".to_string(), "10-Q".to_string()])
752 .with_count(10)
753 .with_reverse_order(true);
754
755 let params = options.to_query_params();
756
757 assert!(params.contains(&("q".to_string(), "test".to_string())));
758 assert!(params.contains(&("forms".to_string(), "10-K,10-Q".to_string())));
759 assert!(params.contains(&("count".to_string(), "10".to_string())));
760 assert!(params.contains(&("reverse_order".to_string(), "TRUE".to_string())));
761 }
762}