langchain_rust/document_loaders/text_loader/
text_loader.rs1use std::pin::Pin;
2
3use async_trait::async_trait;
4use futures::{stream, Stream};
5
6use crate::{
7 document_loaders::{process_doc_stream, Loader, LoaderError},
8 schemas::Document,
9 text_splitter::TextSplitter,
10};
11
12#[derive(Debug, Clone)]
13pub struct TextLoader {
14 content: String,
15}
16
17impl TextLoader {
18 pub fn new<T: Into<String>>(input: T) -> Self {
19 Self {
20 content: input.into(),
21 }
22 }
23}
24
25#[async_trait]
26impl Loader for TextLoader {
27 async fn load(
28 mut self,
29 ) -> Result<
30 Pin<Box<dyn Stream<Item = Result<Document, LoaderError>> + Send + 'static>>,
31 LoaderError,
32 > {
33 let doc = Document::new(self.content);
34 let stream = stream::iter(vec![Ok(doc)]);
35 Ok(Box::pin(stream))
36 }
37
38 async fn load_and_split<TS: TextSplitter + 'static>(
39 mut self,
40 splitter: TS,
41 ) -> Result<
42 Pin<Box<dyn Stream<Item = Result<Document, LoaderError>> + Send + 'static>>,
43 LoaderError,
44 > {
45 let doc_stream = self.load().await?;
46 let stream = process_doc_stream(doc_stream, splitter).await;
47 Ok(Box::pin(stream))
48 }
49}
50
51#[cfg(test)]
52mod tests {
53 use futures_util::StreamExt;
54
55 use crate::text_splitter::TokenSplitter;
56
57 use super::*;
58
59 #[tokio::test]
60 async fn test_reading_mocked_file_content() {
61 let mocked_file_content = r#"
62iterary Descriptive Text
63Doña Uzeada de Ribera Maldonado de Bracamonte y Anaya was short, plump, and mustachioed. There was no longer any reason to call hers a figure. Her vibrant, healthy colors could overcome the lead white and ceruse she used for makeup to feign melancholies. She wore two dark patches adhered to her temples, pretending to be medicines. She had small, mischievous, mouse-like eyes. She knew how to dilate them sternly, dim them modestly, or raise them subtly. She walked swaying her impossible hips, and it was difficult, upon seeing her, not to associate her squat image with that of certain domestic waterfowl. Blue and azure rings choked her phalanges.
64 • Manuel Mujica Lainez, Don Galaz de Buenos Aires
65The descriptive text, in this case, a portrait of a person, evokes such an image in the receiver that the described reality takes shape, materializes in their mind. In this case, the text talks about a real character: Doña Uzeada de Ribera Maldonado de Bracamonte y Anaya. As it is a literary description, the attitude of the emitter is subjective, as it aims to transmit their own personal vision in the description, and the language function is predominantly poetic, as it seeks a particular aesthetic.
66Non-Literary Descriptive Text
67The west of Texas divides the border between Mexico and New Mexico. It is very beautiful but rugged, filled with cacti; in this region are found the Davis Mountains. The entire terrain is filled with limestone, twisted mesquite trees, and prickly pear cactuses. To admire the true desert beauty, visit Big Bend National Park, near Brownsville. It is a favorite location for hikers, campers, and rock enthusiasts. Small towns and ranches lie along the plains and canyons of this region. The area only has two seasons, mild and really hot. The best time to visit is from December to March when the days are warm, the nights are cool, and the desert plants bloom with moisture in the air.
68
69"#;
70
71 let loader = TextLoader::new(mocked_file_content.to_string());
73
74 let mut documents = loader.load().await.unwrap();
76 while let Some(doc) = documents.next().await {
77 assert_eq!(doc.unwrap().page_content, mocked_file_content); }
79
80 let loader = TextLoader::new(mocked_file_content.to_string());
81 let splitter = TokenSplitter::default();
82
83 let mut documents = loader.load_and_split(splitter).await.unwrap();
84
85 while let Some(doc) = documents.next().await {
86 println!("{:?}", doc.unwrap());
87 println!("/n");
88 println!("-----------------------");
89 }
90 }
91}