llm_utils 0.0.11

The best possible text chunker and text splitter and other text tools
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
use linkify::{LinkFinder, LinkKind};
use std::{collections::HashSet, str::FromStr};
use url::Url;

pub fn extract_urls<T: AsRef<str>>(input: T) -> Vec<Url> {
    let mut unique_urls = HashSet::new();

    LinkFinder::new()
        .kinds(&[LinkKind::Url])
        .links(input.as_ref())
        .filter_map(|link| Url::from_str(link.as_str()).ok())
        .filter(|url| unique_urls.insert(url.clone()))
        .collect()
}