ticker_sniffer/lib.rs
1#[cfg(doctest)]
2doc_comment::doctest!("../README.md");
3
4pub mod config;
5pub use config::DEFAULT_COMPANY_TOKEN_PROCESSOR_CONFIG;
6pub mod constants;
7pub mod structs;
8mod utils;
9pub use structs::{
10 CompanySymbolListPreprocessor, CompanyTokenMapper, CompanyTokenProcessor,
11 CompanyTokenProcessorConfig, Error, TokenMapper, TokenParityState, TokenRangeState, Tokenizer,
12};
13pub use utils::sort_results;
14pub mod types;
15pub use types::{
16 AlternateCompanyName, CompanyName, CompanySymbolList, TickerSymbol, TickerSymbolFrequencyMap,
17 Token, TokenId, TokenRef, TokenVector,
18};
19
20include!("../embed.rs");
21
22/// Extracts ticker symbols from the provided text using the default configuration.
23///
24/// # Arguments
25/// * `text` - A reference to the input text document from which ticker symbols
26/// are to be extracted.
27/// * `is_case_sensitive` - Whether or not the text document should be filtered using case sensitivity.
28///
29/// # Returns
30/// * `Ok(TickerSymbolFrequencyMap)` - A map of ticker symbols and their
31/// frequencies if the operation is successful.
32/// * `Err(Error)` - An error if processing fails.
33///
34/// # Example
35/// ```
36/// use ticker_sniffer::extract_tickers_from_text;
37///
38/// let text = "Apple and Microsoft are leading companies.";
39/// let result = extract_tickers_from_text(text, true);
40/// assert!(result.is_ok());
41/// ```
42pub fn extract_tickers_from_text(
43 text: &str,
44 is_case_sensitive: bool,
45) -> Result<TickerSymbolFrequencyMap, Error> {
46 // Skip entirely if there is no text
47 if text.is_empty() {
48 return Ok(TickerSymbolFrequencyMap::new());
49 }
50
51 let results_ticker_symbol_frequency_map = extract_tickers_from_text_with_custom_config(
52 DEFAULT_COMPANY_TOKEN_PROCESSOR_CONFIG,
53 text,
54 is_case_sensitive,
55 )?;
56
57 Ok(results_ticker_symbol_frequency_map)
58}
59
60/// Extracts ticker symbols from the provided text using a custom configuration.
61///
62/// # Arguments
63/// * `document_token_processor_config` - A reference to the custom configuration
64/// for processing tokens.
65/// * `text` - A reference to the input text document from which ticker symbols
66/// are to be extracted.
67/// * `is_case_sensitive` - Whether or not the text document should be filtered using case sensitivity.
68///
69/// # Returns
70/// * `Ok(TickerSymbolFrequencyMap)` - A map of ticker symbols and their
71/// frequencies if the operation is successful.
72/// * `Err(Error)` - An error if processing fails.
73///
74/// # Example
75/// ```
76/// use ticker_sniffer::config::DEFAULT_COMPANY_TOKEN_PROCESSOR_CONFIG;
77/// use ticker_sniffer::extract_tickers_from_text_with_custom_config;
78///
79/// let config = DEFAULT_COMPANY_TOKEN_PROCESSOR_CONFIG;
80/// let text = "Google is a tech giant.";
81/// let result = extract_tickers_from_text_with_custom_config(&config, text, true);
82/// assert!(result.is_ok());
83/// ```
84pub fn extract_tickers_from_text_with_custom_config(
85 document_token_processor_config: &CompanyTokenProcessorConfig,
86 text: &str,
87 is_case_sensitive: bool,
88) -> Result<TickerSymbolFrequencyMap, Error> {
89 // Load the company symbol list
90 let company_symbol_list =
91 CompanySymbolListPreprocessor::extract_company_symbol_list_from_bytes(
92 COMPRESSED_COMPANY_SYMBOL_LIST_BYTE_ARRAY,
93 )?;
94
95 let company_token_processor = CompanyTokenProcessor::new(
96 document_token_processor_config,
97 &company_symbol_list,
98 is_case_sensitive,
99 );
100
101 let results_ticker_symbol_frequency_map = company_token_processor?.process_text_doc(text)?;
102
103 Ok(results_ticker_symbol_frequency_map)
104}