swiftide/
lib.rs

1// show feature flags in the generated documentation
2// https://doc.rust-lang.org/rustdoc/unstable-features.html#extensions-to-the-doc-attribute
3#![cfg_attr(docsrs, feature(doc_cfg))]
4#![cfg_attr(docsrs, feature(doc_auto_cfg))]
5#![doc(html_logo_url = "https://github.com/bosun-ai/swiftide/raw/master/images/logo.png")]
6
7//! Swiftide is a data indexing and processing library, tailored for Retrieval Augmented Generation
8//! (RAG). When building applications with large language models (LLM), these LLMs need access to
9//! external resources. Data needs to be transformed, enriched, split up, embedded, and persisted.
10//! It is build in Rust, using parallel, asynchronous streams and is blazingly fast.
11//!
12//! Part of the [bosun.ai](https://bosun.ai) project. An upcoming platform for autonomous code improvement.
13//!
14//! We <3 feedback: project ideas, suggestions, and complaints are very welcome. Feel free to open
15//! an issue.
16//!
17//! Read more about the project on the [swiftide website](https://swiftide.rs)
18//!
19//! # Features
20//!
21//! - Extremely fast streaming indexing pipeline with async, parallel processing
22//! - Integrations with `OpenAI`, `Redis`, `Qdrant`, `FastEmbed`, `Treesitter` and more
23//! - A variety of loaders, transformers, and embedders and other common, generic tools
24//! - Bring your own transformers by extending straightforward traits
25//! - Splitting and merging pipelines
26//! - Jinja-like templating for prompts
27//! - Store into multiple backends
28//! - `tracing` supported for logging and tracing, see /examples and the `tracing` crate for more
29//!   information.
30//!
31//! # Querying
32//!
33//! After running an indexing pipeline, you can use the [`query`] module to query the indexed data.
34//!
35//! # Examples
36//!
37//! ## Indexing markdown
38//!
39//! ```no_run
40//! # use swiftide::indexing::loaders::FileLoader;
41//! # use swiftide::indexing::transformers::{ChunkMarkdown, Embed, MetadataQAText};
42//! # use swiftide::integrations::qdrant::Qdrant;
43//! # use swiftide::integrations::openai::OpenAI;
44//! # use swiftide::indexing::Pipeline;
45//! # use anyhow::Result;
46//!
47//! # #[tokio::main]
48//! # async fn main() -> Result<()> {
49//! # let qdrant_url = "url";
50//! # let openai_client = OpenAI::builder().build()?;
51//!  Pipeline::from_loader(FileLoader::new(".").with_extensions(&["md"]))
52//!          .then_chunk(ChunkMarkdown::from_chunk_range(10..512))
53//!          .then(MetadataQAText::new(openai_client.clone()))
54//!          .then_in_batch(Embed::new(openai_client.clone()).with_batch_size(10))
55//!          .then_store_with(
56//!              Qdrant::try_from_url(qdrant_url)?
57//!                  .batch_size(50)
58//!                  .vector_size(1536)
59//!                  .collection_name("swiftide-examples".to_string())
60//!                  .build()?,
61//!          )
62//!          .run()
63//!          .await
64//! # }
65//! ```
66//!
67//! ## Querying
68//!
69//! ```no_run
70//! # use anyhow::Result;
71//! # use swiftide::query::{query_transformers, self, response_transformers, answers};
72//! # use swiftide::integrations::openai::OpenAI;
73//!
74//! # #[tokio::main]
75//! # async fn main() -> Result<()> {
76//! # let qdrant_url = "url";
77//! # let openai_client = OpenAI::builder().build()?;
78//! # let qdrant = swiftide::integrations::qdrant::Qdrant::try_from_url(qdrant_url)?
79//! #                .batch_size(50)
80//! #                .vector_size(1536)
81//! #                .collection_name("swiftide-examples".to_string())
82//! #                .build()?;
83//! query::Pipeline::default()
84//!     .then_transform_query(query_transformers::GenerateSubquestions::from_client(
85//!         openai_client.clone(),
86//!     ))
87//!     .then_transform_query(query_transformers::Embed::from_client(
88//!         openai_client.clone(),
89//!     ))
90//!     .then_retrieve(qdrant.clone())
91//!     .then_transform_response(response_transformers::Summary::from_client(
92//!         openai_client.clone(),
93//!     ))
94//!     .then_answer(answers::Simple::from_client(openai_client.clone()))
95//!     .query("What is swiftide?")
96//!     .await?;
97//! # Ok(())
98//! # }
99//! ```
100//!
101//! # Feature flags
102//!
103//! Swiftide has little features enabled by default, as there are some dependency heavy
104//! integrations. You need to cherry-pick the tools and integrations you want to use.
105#![doc = document_features::document_features!()]
106
107#[doc(inline)]
108pub use swiftide_core::prompt;
109#[doc(inline)]
110pub use swiftide_core::type_aliases::*;
111
112#[cfg(feature = "swiftide-agents")]
113#[doc(inline)]
114pub use swiftide_agents as agents;
115
116/// Common traits for common behaviour, re-exported from indexing and query
117pub mod traits {
118    #[doc(inline)]
119    pub use swiftide_core::agent_traits::*;
120    #[doc(inline)]
121    pub use swiftide_core::chat_completion::traits::*;
122    #[doc(inline)]
123    pub use swiftide_core::indexing_traits::*;
124    #[doc(inline)]
125    pub use swiftide_core::query_traits::*;
126}
127
128pub mod chat_completion {
129    #[doc(inline)]
130    pub use swiftide_core::chat_completion::*;
131}
132
133/// Integrations with various platforms and external services.
134pub mod integrations {
135    #[doc(inline)]
136    pub use swiftide_integrations::*;
137}
138
139/// This module serves as the main entry point for indexing in Swiftide.
140///
141/// The indexing system in Swiftide is designed to handle the asynchronous processing of large
142/// volumes of data, including loading, transforming, and storing data chunks.
143pub mod indexing {
144    #[doc(inline)]
145    pub use swiftide_core::indexing::*;
146    #[doc(inline)]
147    pub use swiftide_indexing::*;
148
149    pub mod transformers {
150        #[cfg(feature = "tree-sitter")]
151        #[doc(inline)]
152        pub use swiftide_integrations::treesitter::transformers::*;
153
154        pub use swiftide_indexing::transformers::*;
155    }
156}
157
158#[cfg(feature = "macros")]
159#[doc(inline)]
160pub use swiftide_macros::*;
161/// # Querying pipelines
162///
163/// Swiftide allows you to define sophisticated query pipelines.
164///
165/// Consider the following code that uses Swiftide to load some markdown text, chunk it, embed it,
166/// and store it in a Qdrant index:
167///
168/// ```no_run
169/// use swiftide::{
170///     indexing::{
171///         self,
172///         loaders::FileLoader,
173///         transformers::{ChunkMarkdown, Embed, MetadataQAText},
174///     },
175///     integrations::{self, qdrant::Qdrant},
176///     integrations::openai::OpenAI,
177///     query::{self, answers, query_transformers, response_transformers},
178/// };
179///
180/// async fn index() -> Result<(), Box<dyn std::error::Error>> {
181///   let openai_client = OpenAI::builder()
182///       .default_embed_model("text-embedding-3-large")
183///       .default_prompt_model("gpt-4o")
184///       .build()?;
185///
186///   let qdrant = Qdrant::builder()
187///       .batch_size(50)
188///       .vector_size(3072)
189///       .collection_name("swiftide-examples")
190///       .build()?;
191///
192///   indexing::Pipeline::from_loader(FileLoader::new("README.md"))
193///       .then_chunk(ChunkMarkdown::from_chunk_range(10..2048))
194///       .then(MetadataQAText::new(openai_client.clone()))
195///       .then_in_batch(Embed::new(openai_client.clone()).with_batch_size(10))
196///       .then_store_with(qdrant.clone())
197///       .run()
198///       .await?;
199///
200///   Ok(())
201/// }
202/// ```
203///
204/// We could then define a query pipeline that uses the Qdrant index to answer questions:
205///
206/// ```no_run
207/// # use swiftide::{
208/// #     indexing::{
209/// #         self,
210/// #         loaders::FileLoader,
211/// #         transformers::{ChunkMarkdown, Embed, MetadataQAText},
212/// #     },
213/// #     integrations::{self, qdrant::Qdrant},
214/// #     query::{self, answers, query_transformers, response_transformers},
215/// #     integrations::openai::OpenAI,
216/// # };
217/// # async fn query() -> Result<(), Box<dyn std::error::Error>> {
218/// #  let openai_client = OpenAI::builder()
219/// #      .default_embed_model("text-embedding-3-large")
220/// #      .default_prompt_model("gpt-4o")
221/// #      .build()?;
222/// #  let qdrant = Qdrant::builder()
223/// #      .batch_size(50)
224/// #      .vector_size(3072)
225/// #      .collection_name("swiftide-examples")
226/// #      .build()?;
227/// // By default the search strategy is SimilaritySingleEmbedding
228/// // which takes the latest query, embeds it, and does a similarity search
229/// let pipeline = query::Pipeline::default()
230///     .then_transform_query(query_transformers::GenerateSubquestions::from_client(
231///         openai_client.clone(),
232///     ))
233///     .then_transform_query(query_transformers::Embed::from_client(
234///         openai_client.clone(),
235///     ))
236///     .then_retrieve(qdrant.clone())
237///     .then_transform_response(response_transformers::Summary::from_client(
238///         openai_client.clone(),
239///     ))
240///     .then_answer(answers::Simple::from_client(openai_client.clone()));
241///
242/// let result = pipeline
243///     .query("What is swiftide? Please provide an elaborate explanation")
244///     .await?;
245///
246/// println!("{:?}", result.answer());
247/// # Ok(())
248/// # }
249/// ```
250///
251/// By using a query pipeline to transform queries, we can improve the quality of the answers we get
252/// from our index. In this example, we used an LLM to generate subquestions, embedding those and
253/// then using them to search the index. Finally, we summarize the results and combine them together
254/// into a single answer.
255pub mod query {
256    #[doc(inline)]
257    pub use swiftide_core::querying::*;
258    #[doc(inline)]
259    pub use swiftide_query::*;
260}
261
262/// Re-exports for macros
263#[doc(hidden)]
264pub mod reexports {
265    pub use ::anyhow;
266    pub use ::async_trait;
267    pub use ::serde;
268    pub use ::serde_json;
269}