swiftide/lib.rs
1// show feature flags in the generated documentation
2// https://doc.rust-lang.org/rustdoc/unstable-features.html#extensions-to-the-doc-attribute
3#![cfg_attr(docsrs, feature(doc_cfg))]
4#![cfg_attr(docsrs, feature(doc_auto_cfg))]
5#![doc(html_logo_url = "https://github.com/bosun-ai/swiftide/raw/master/images/logo.png")]
6
7//! Swiftide is a data indexing and processing library, tailored for Retrieval Augmented Generation
8//! (RAG). When building applications with large language models (LLM), these LLMs need access to
9//! external resources. Data needs to be transformed, enriched, split up, embedded, and persisted.
10//! It is build in Rust, using parallel, asynchronous streams and is blazingly fast.
11//!
12//! Part of the [bosun.ai](https://bosun.ai) project. An upcoming platform for autonomous code improvement.
13//!
14//! We <3 feedback: project ideas, suggestions, and complaints are very welcome. Feel free to open
15//! an issue.
16//!
17//! Read more about the project on the [swiftide website](https://swiftide.rs)
18//!
19//! # Features
20//!
21//! - Extremely fast streaming indexing pipeline with async, parallel processing
22//! - Integrations with `OpenAI`, `Redis`, `Qdrant`, `FastEmbed`, `Treesitter` and more
23//! - A variety of loaders, transformers, and embedders and other common, generic tools
24//! - Bring your own transformers by extending straightforward traits
25//! - Splitting and merging pipelines
26//! - Jinja-like templating for prompts
27//! - Store into multiple backends
28//! - `tracing` supported for logging and tracing, see /examples and the `tracing` crate for more
29//! information.
30//!
31//! # Querying
32//!
33//! After running an indexing pipeline, you can use the [`query`] module to query the indexed data.
34//!
35//! # Examples
36//!
37//! ## Indexing markdown
38//!
39//! ```no_run
40//! # use swiftide::indexing::loaders::FileLoader;
41//! # use swiftide::indexing::transformers::{ChunkMarkdown, Embed, MetadataQAText};
42//! # use swiftide::integrations::qdrant::Qdrant;
43//! # use swiftide::integrations::openai::OpenAI;
44//! # use swiftide::indexing::Pipeline;
45//! # use anyhow::Result;
46//!
47//! # #[tokio::main]
48//! # async fn main() -> Result<()> {
49//! # let qdrant_url = "url";
50//! # let openai_client = OpenAI::builder().build()?;
51//! Pipeline::from_loader(FileLoader::new(".").with_extensions(&["md"]))
52//! .then_chunk(ChunkMarkdown::from_chunk_range(10..512))
53//! .then(MetadataQAText::new(openai_client.clone()))
54//! .then_in_batch(Embed::new(openai_client.clone()).with_batch_size(10))
55//! .then_store_with(
56//! Qdrant::try_from_url(qdrant_url)?
57//! .batch_size(50)
58//! .vector_size(1536)
59//! .collection_name("swiftide-examples".to_string())
60//! .build()?,
61//! )
62//! .run()
63//! .await
64//! # }
65//! ```
66//!
67//! ## Querying
68//!
69//! ```no_run
70//! # use anyhow::Result;
71//! # use swiftide::query::{query_transformers, self, response_transformers, answers};
72//! # use swiftide::integrations::openai::OpenAI;
73//!
74//! # #[tokio::main]
75//! # async fn main() -> Result<()> {
76//! # let qdrant_url = "url";
77//! # let openai_client = OpenAI::builder().build()?;
78//! # let qdrant = swiftide::integrations::qdrant::Qdrant::try_from_url(qdrant_url)?
79//! # .batch_size(50)
80//! # .vector_size(1536)
81//! # .collection_name("swiftide-examples".to_string())
82//! # .build()?;
83//! query::Pipeline::default()
84//! .then_transform_query(query_transformers::GenerateSubquestions::from_client(
85//! openai_client.clone(),
86//! ))
87//! .then_transform_query(query_transformers::Embed::from_client(
88//! openai_client.clone(),
89//! ))
90//! .then_retrieve(qdrant.clone())
91//! .then_transform_response(response_transformers::Summary::from_client(
92//! openai_client.clone(),
93//! ))
94//! .then_answer(answers::Simple::from_client(openai_client.clone()))
95//! .query("What is swiftide?")
96//! .await?;
97//! # Ok(())
98//! # }
99//! ```
100//!
101//! # Feature flags
102//!
103//! Swiftide has little features enabled by default, as there are some dependency heavy
104//! integrations. You need to cherry-pick the tools and integrations you want to use.
105#![doc = document_features::document_features!()]
106
107#[doc(inline)]
108pub use swiftide_core::prompt;
109#[doc(inline)]
110pub use swiftide_core::type_aliases::*;
111
112#[cfg(feature = "swiftide-agents")]
113#[doc(inline)]
114pub use swiftide_agents as agents;
115
116/// Common traits for common behaviour, re-exported from indexing and query
117pub mod traits {
118 #[doc(inline)]
119 pub use swiftide_core::agent_traits::*;
120 #[doc(inline)]
121 pub use swiftide_core::chat_completion::traits::*;
122 #[doc(inline)]
123 pub use swiftide_core::indexing_traits::*;
124 #[doc(inline)]
125 pub use swiftide_core::query_traits::*;
126}
127
128pub mod chat_completion {
129 #[doc(inline)]
130 pub use swiftide_core::chat_completion::*;
131}
132
133/// Integrations with various platforms and external services.
134pub mod integrations {
135 #[doc(inline)]
136 pub use swiftide_integrations::*;
137}
138
139/// This module serves as the main entry point for indexing in Swiftide.
140///
141/// The indexing system in Swiftide is designed to handle the asynchronous processing of large
142/// volumes of data, including loading, transforming, and storing data chunks.
143pub mod indexing {
144 #[doc(inline)]
145 pub use swiftide_core::indexing::*;
146 #[doc(inline)]
147 pub use swiftide_indexing::*;
148
149 pub mod transformers {
150 #[cfg(feature = "tree-sitter")]
151 #[doc(inline)]
152 pub use swiftide_integrations::treesitter::transformers::*;
153
154 pub use swiftide_indexing::transformers::*;
155 }
156}
157
158#[cfg(feature = "macros")]
159#[doc(inline)]
160pub use swiftide_macros::*;
161/// # Querying pipelines
162///
163/// Swiftide allows you to define sophisticated query pipelines.
164///
165/// Consider the following code that uses Swiftide to load some markdown text, chunk it, embed it,
166/// and store it in a Qdrant index:
167///
168/// ```no_run
169/// use swiftide::{
170/// indexing::{
171/// self,
172/// loaders::FileLoader,
173/// transformers::{ChunkMarkdown, Embed, MetadataQAText},
174/// },
175/// integrations::{self, qdrant::Qdrant},
176/// integrations::openai::OpenAI,
177/// query::{self, answers, query_transformers, response_transformers},
178/// };
179///
180/// async fn index() -> Result<(), Box<dyn std::error::Error>> {
181/// let openai_client = OpenAI::builder()
182/// .default_embed_model("text-embedding-3-large")
183/// .default_prompt_model("gpt-4o")
184/// .build()?;
185///
186/// let qdrant = Qdrant::builder()
187/// .batch_size(50)
188/// .vector_size(3072)
189/// .collection_name("swiftide-examples")
190/// .build()?;
191///
192/// indexing::Pipeline::from_loader(FileLoader::new("README.md"))
193/// .then_chunk(ChunkMarkdown::from_chunk_range(10..2048))
194/// .then(MetadataQAText::new(openai_client.clone()))
195/// .then_in_batch(Embed::new(openai_client.clone()).with_batch_size(10))
196/// .then_store_with(qdrant.clone())
197/// .run()
198/// .await?;
199///
200/// Ok(())
201/// }
202/// ```
203///
204/// We could then define a query pipeline that uses the Qdrant index to answer questions:
205///
206/// ```no_run
207/// # use swiftide::{
208/// # indexing::{
209/// # self,
210/// # loaders::FileLoader,
211/// # transformers::{ChunkMarkdown, Embed, MetadataQAText},
212/// # },
213/// # integrations::{self, qdrant::Qdrant},
214/// # query::{self, answers, query_transformers, response_transformers},
215/// # integrations::openai::OpenAI,
216/// # };
217/// # async fn query() -> Result<(), Box<dyn std::error::Error>> {
218/// # let openai_client = OpenAI::builder()
219/// # .default_embed_model("text-embedding-3-large")
220/// # .default_prompt_model("gpt-4o")
221/// # .build()?;
222/// # let qdrant = Qdrant::builder()
223/// # .batch_size(50)
224/// # .vector_size(3072)
225/// # .collection_name("swiftide-examples")
226/// # .build()?;
227/// // By default the search strategy is SimilaritySingleEmbedding
228/// // which takes the latest query, embeds it, and does a similarity search
229/// let pipeline = query::Pipeline::default()
230/// .then_transform_query(query_transformers::GenerateSubquestions::from_client(
231/// openai_client.clone(),
232/// ))
233/// .then_transform_query(query_transformers::Embed::from_client(
234/// openai_client.clone(),
235/// ))
236/// .then_retrieve(qdrant.clone())
237/// .then_transform_response(response_transformers::Summary::from_client(
238/// openai_client.clone(),
239/// ))
240/// .then_answer(answers::Simple::from_client(openai_client.clone()));
241///
242/// let result = pipeline
243/// .query("What is swiftide? Please provide an elaborate explanation")
244/// .await?;
245///
246/// println!("{:?}", result.answer());
247/// # Ok(())
248/// # }
249/// ```
250///
251/// By using a query pipeline to transform queries, we can improve the quality of the answers we get
252/// from our index. In this example, we used an LLM to generate subquestions, embedding those and
253/// then using them to search the index. Finally, we summarize the results and combine them together
254/// into a single answer.
255pub mod query {
256 #[doc(inline)]
257 pub use swiftide_core::querying::*;
258 #[doc(inline)]
259 pub use swiftide_query::*;
260}
261
262/// Re-exports for macros
263#[doc(hidden)]
264pub mod reexports {
265 pub use ::anyhow;
266 pub use ::async_trait;
267 pub use ::serde;
268 pub use ::serde_json;
269}