ticker_sniffer/
lib.rs

1#[cfg(doctest)]
2doc_comment::doctest!("../README.md");
3
4pub mod config;
5pub use config::DEFAULT_COMPANY_TOKEN_PROCESSOR_CONFIG;
6pub mod constants;
7pub mod structs;
8mod utils;
9pub use structs::{
10    CompanySymbolListPreprocessor, CompanyTokenMapper, CompanyTokenProcessor,
11    CompanyTokenProcessorConfig, Error, TokenMapper, TokenParityState, TokenRangeState, Tokenizer,
12};
13pub use utils::sort_results;
14pub mod types;
15pub use types::{
16    AlternateCompanyName, CompanyName, CompanySymbolList, TickerSymbol, TickerSymbolFrequencyMap,
17    Token, TokenId, TokenRef, TokenVector,
18};
19
20include!("../embed.rs");
21
22/// Extracts ticker symbols from the provided text using the default configuration.
23///
24/// # Arguments
25/// * `text` - A reference to the input text document from which ticker symbols
26///   are to be extracted.
27/// * `is_case_sensitive` - Whether or not the text document should be filtered using case sensitivity.
28///
29/// # Returns
30/// * `Ok(TickerSymbolFrequencyMap)` - A map of ticker symbols and their
31///   frequencies if the operation is successful.
32/// * `Err(Error)` - An error if processing fails.
33///
34/// # Example
35/// ```
36/// use ticker_sniffer::extract_tickers_from_text;
37///
38/// let text = "Apple and Microsoft are leading companies.";
39/// let result = extract_tickers_from_text(text, true);
40/// assert!(result.is_ok());
41/// ```
42pub fn extract_tickers_from_text(
43    text: &str,
44    is_case_sensitive: bool,
45) -> Result<TickerSymbolFrequencyMap, Error> {
46    // Skip entirely if there is no text
47    if text.is_empty() {
48        return Ok(TickerSymbolFrequencyMap::new());
49    }
50
51    let results_ticker_symbol_frequency_map = extract_tickers_from_text_with_custom_config(
52        DEFAULT_COMPANY_TOKEN_PROCESSOR_CONFIG,
53        text,
54        is_case_sensitive,
55    )?;
56
57    Ok(results_ticker_symbol_frequency_map)
58}
59
60/// Extracts ticker symbols from the provided text using a custom configuration.
61///
62/// # Arguments
63/// * `document_token_processor_config` - A reference to the custom configuration
64///   for processing tokens.
65/// * `text` - A reference to the input text document from which ticker symbols
66///   are to be extracted.
67/// * `is_case_sensitive` - Whether or not the text document should be filtered using case sensitivity.
68///
69/// # Returns
70/// * `Ok(TickerSymbolFrequencyMap)` - A map of ticker symbols and their
71///   frequencies if the operation is successful.
72/// * `Err(Error)` - An error if processing fails.
73///
74/// # Example
75/// ```
76/// use ticker_sniffer::config::DEFAULT_COMPANY_TOKEN_PROCESSOR_CONFIG;
77/// use ticker_sniffer::extract_tickers_from_text_with_custom_config;
78///
79/// let config = DEFAULT_COMPANY_TOKEN_PROCESSOR_CONFIG;
80/// let text = "Google is a tech giant.";
81/// let result = extract_tickers_from_text_with_custom_config(&config, text, true);
82/// assert!(result.is_ok());
83/// ```
84pub fn extract_tickers_from_text_with_custom_config(
85    document_token_processor_config: &CompanyTokenProcessorConfig,
86    text: &str,
87    is_case_sensitive: bool,
88) -> Result<TickerSymbolFrequencyMap, Error> {
89    // Load the company symbol list
90    let company_symbol_list =
91        CompanySymbolListPreprocessor::extract_company_symbol_list_from_bytes(
92            COMPRESSED_COMPANY_SYMBOL_LIST_BYTE_ARRAY,
93        )?;
94
95    let company_token_processor = CompanyTokenProcessor::new(
96        document_token_processor_config,
97        &company_symbol_list,
98        is_case_sensitive,
99    );
100
101    let results_ticker_symbol_frequency_map = company_token_processor?.process_text_doc(text)?;
102
103    Ok(results_ticker_symbol_frequency_map)
104}