prqlc/
lib.rs

1//! # prqlc
2//!
3//! Compiler for PRQL language. Targets SQL and exposes PL and RQ abstract
4//! syntax trees.
5//!
6//! You probably want to start with [compile] wrapper function.
7//!
8//! For more granular access, refer to this diagram:
9//! ```ascii
10//!            PRQL
11//!
12//!    (parse) │ ▲
13//! prql_to_pl │ │ pl_to_prql
14//!            │ │
15//!            ▼ │      json::from_pl
16//!                   ────────►
17//!           PL AST            PL JSON
18//!                   ◄────────
19//!            │        json::to_pl
20//!            │
21//!  (resolve) │
22//!   pl_to_rq │
23//!            │
24//!            │
25//!            ▼        json::from_rq
26//!                   ────────►
27//!           RQ AST            RQ JSON
28//!                   ◄────────
29//!            │        json::to_rq
30//!            │
31//!  rq_to_sql │
32//!            ▼
33//!
34//!            SQL
35//! ```
36//!
37#![doc = include_str!("../ARCHITECTURE.md")]
38//!
39//! ## Common use-cases
40//!
41//! - Compile PRQL queries to SQL at run time.
42//!
43//!   ```
44//!   # fn main() -> Result<(), prqlc::ErrorMessages> {
45//!   let sql = prqlc::compile(
46//!       "from albums | select {title, artist_id}",
47//!        &prqlc::Options::default().no_format()
48//!   )?;
49//!   assert_eq!(&sql[..35], "SELECT title, artist_id FROM albums");
50//!   # Ok(())
51//!   # }
52//!   ```
53//!
54//! - Compile PRQL queries to SQL at build time.
55//!
56//!   For inline strings, use the `prqlc-macros` crate; for example:
57//!   ```ignore
58//!   let sql: &str = prql_to_sql!("from albums | select {title, artist_id}");
59//!   ```
60//!
61//!   For compiling whole files (`.prql` to `.sql`), call `prqlc` from
62//!   `build.rs`. See [this example
63//!   project](https://github.com/PRQL/prql/tree/main/prqlc/prqlc/examples/compile-files).
64//!
65//! - Compile, format & debug PRQL from command line.
66//!
67//!   ```sh
68//!   $ cargo install --locked prqlc
69//!   $ prqlc compile query.prql
70//!   ```
71//!
72//! ## Feature flags
73//!
74//! The following feature flags are available:
75//!
76//! * `cli`: enables the `prqlc` CLI binary. This is enabled by default. When
77//!   consuming this crate from another rust library, it can be disabled.
78//! * `test-dbs`: enables the `prqlc` in-process test databases as part of the
79//!   crate's tests. This significantly increases compile times so is not
80//!   enabled by default.
81//! * `test-dbs-external`: enables the `prqlc` external test databases,
82//!   requiring a docker container with the test databases to be running. Check
83//!   out the [integration tests](https://github.com/PRQL/prql/tree/main/prqlc/prqlc/tests/integration/dbs)
84//!   for more details.
85//! * `serde_yaml`: Enables serialization and deserialization of ASTs to YAML.
86//!
87//! ## Large binary sizes
88//!
89//! For Linux users, the binary size contributed by this crate will probably be
90//! quite large (>20MB) by default. That is because it includes a lot of
91//! debuginfo symbols from our parser. They can be removed by adding the
92//! following to `Cargo.toml`, reducing the contribution to around 7MB:
93//! ```toml
94//! [profile.release.package.prqlc]
95//! strip = "debuginfo"
96//! ```
97
98use std::sync::OnceLock;
99use std::{collections::HashMap, path::PathBuf, str::FromStr};
100
101use anstream::adapter::strip_str;
102use semver::Version;
103use serde::{Deserialize, Serialize};
104use strum::VariantNames;
105
106pub use error_message::{ErrorMessage, ErrorMessages, SourceLocation};
107pub use prqlc_parser::error::{Error, ErrorSource, Errors, MessageKind, Reason, WithErrorInfo};
108pub use prqlc_parser::lexer::lr;
109pub use prqlc_parser::parser::pr;
110pub use prqlc_parser::span::Span;
111
112mod codegen;
113pub mod debug;
114mod error_message;
115pub mod ir;
116pub mod parser;
117pub mod semantic;
118pub mod sql;
119#[cfg(feature = "cli")]
120pub mod utils;
121#[cfg(not(feature = "cli"))]
122pub(crate) mod utils;
123
124pub type Result<T, E = Error> = core::result::Result<T, E>;
125
126/// Get the version of the compiler. This is determined by the first of:
127/// - An optional environment variable `PRQL_VERSION_OVERRIDE`; primarily useful
128///   for internal testing.
129///   - Note that this env var is checked on every call of this function.
130///     Without checking each read, we found some internal tests were flaky. If
131///     this caused any perf issues, we could adjust the tests that rely on
132///     versions to run in a more encapsulated way (for example, use `prqlc`
133///     binary tests, which we can guarantee won't have anything call this
134///     before setting up the env var).
135/// - The version returned by `git describe --tags`
136/// - The version in the cargo manifest
137pub fn compiler_version() -> Version {
138    if let Ok(prql_version_override) = std::env::var("PRQL_VERSION_OVERRIDE") {
139        return Version::parse(&prql_version_override).unwrap_or_else(|e| {
140            panic!("Could not parse PRQL version {prql_version_override}\n{e}")
141        });
142    };
143
144    static COMPILER_VERSION: OnceLock<Version> = OnceLock::new();
145    COMPILER_VERSION
146        .get_or_init(|| {
147            if let Ok(prql_version_override) = std::env::var("PRQL_VERSION_OVERRIDE") {
148                return Version::parse(&prql_version_override).unwrap_or_else(|e| {
149                    panic!("Could not parse PRQL version {prql_version_override}\n{e}")
150                });
151            }
152            let git_version = env!("VERGEN_GIT_DESCRIBE");
153            let cargo_version = env!("CARGO_PKG_VERSION");
154            Version::parse(git_version)
155                .or_else(|e| {
156                    log::info!("Could not parse git version number {git_version}\n{e}");
157                    Version::parse(cargo_version)
158                })
159                .unwrap_or_else(|e| {
160                    panic!("Could not parse prqlc version number {cargo_version}\n{e}")
161                })
162        })
163        .clone()
164}
165
166/// Compile a PRQL string into a SQL string.
167///
168/// This is a wrapper for:
169/// - [prql_to_pl] — Build PL AST from a PRQL string
170/// - [pl_to_rq] — Finds variable references, validates functions calls,
171///   determines frames and converts PL to RQ.
172/// - [rq_to_sql] — Convert RQ AST into an SQL string.
173/// # Example
174/// Use the prql compiler to convert a PRQL string to SQLite dialect
175///
176/// ```
177/// use prqlc::{compile, Options, Target, sql::Dialect};
178///
179/// let prql = "from employees | select {name,age}";
180/// let opts = Options::default().with_target(Target::Sql(Some(Dialect::SQLite))).with_signature_comment(false).with_format(false);
181/// let sql = compile(&prql, &opts).unwrap();
182/// println!("PRQL: {}\nSQLite: {}", prql, &sql);
183/// assert_eq!("SELECT name, age FROM employees", sql)
184///
185/// ```
186/// See [`sql::Options`](sql/struct.Options.html) and
187/// [`sql::Dialect`](sql/enum.Dialect.html) for options and supported SQL
188/// dialects.
189pub fn compile(prql: &str, options: &Options) -> Result<String, ErrorMessages> {
190    let sources = SourceTree::from(prql);
191
192    Ok(&sources)
193        .and_then(parser::parse)
194        .and_then(|ast| {
195            semantic::resolve_and_lower(ast, &[], None)
196                .map_err(|e| e.with_source(ErrorSource::NameResolver).into())
197        })
198        .and_then(|rq| {
199            sql::compile(rq, options).map_err(|e| e.with_source(ErrorSource::SQL).into())
200        })
201        .map_err(|e| {
202            let error_messages = ErrorMessages::from(e).composed(&sources);
203            match options.display {
204                DisplayOptions::AnsiColor => error_messages,
205                DisplayOptions::Plain => ErrorMessages {
206                    inner: error_messages
207                        .inner
208                        .into_iter()
209                        .map(|e| ErrorMessage {
210                            display: e.display.map(|s| strip_str(&s).to_string()),
211                            ..e
212                        })
213                        .collect(),
214                },
215            }
216        })
217}
218
219#[derive(Debug, Clone, Serialize, Deserialize)]
220pub enum Target {
221    /// If `None` is used, dialect is extracted from `target` query header.
222    Sql(Option<sql::Dialect>),
223}
224
225impl Default for Target {
226    fn default() -> Self {
227        Self::Sql(None)
228    }
229}
230
231impl Target {
232    pub fn names() -> Vec<String> {
233        let mut names = vec!["sql.any".to_string()];
234
235        let dialects = sql::Dialect::VARIANTS;
236        names.extend(dialects.iter().map(|d| format!("sql.{d}")));
237
238        names
239    }
240}
241
242impl FromStr for Target {
243    type Err = Error;
244
245    fn from_str(s: &str) -> Result<Target, Self::Err> {
246        if let Some(dialect) = s.strip_prefix("sql.") {
247            if dialect == "any" {
248                return Ok(Target::Sql(None));
249            }
250
251            if let Ok(dialect) = sql::Dialect::from_str(dialect) {
252                return Ok(Target::Sql(Some(dialect)));
253            }
254        }
255
256        Err(Error::new(Reason::NotFound {
257            name: format!("{s:?}"),
258            namespace: "target".to_string(),
259        }))
260    }
261}
262
263/// Compilation options for SQL backend of the compiler.
264#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct Options {
266    /// Pass generated SQL string through a formatter that splits it
267    /// into multiple lines and prettifies indentation and spacing.
268    ///
269    /// Defaults to true.
270    pub format: bool,
271
272    /// Target and dialect to compile to.
273    pub target: Target,
274
275    /// Emits the compiler signature as a comment after generated SQL
276    ///
277    /// Defaults to true.
278    pub signature_comment: bool,
279
280    /// Deprecated: use `display` instead.
281    pub color: bool,
282
283    /// Whether to use ANSI colors in error messages. This may be extended to
284    /// other formats in the future.
285    ///
286    /// Note that we don't generally recommend threading a `color` option
287    /// through an entire application. Instead, in order of preferences:
288    /// - Use a library such as `anstream` to encapsulate presentation logic and
289    ///   automatically disable colors when not connected to a TTY.
290    /// - Set an environment variable such as `CLI_COLOR=0` to disable any
291    ///   colors coming back from this library.
292    /// - Strip colors from the output (possibly also with a library such as
293    ///   `anstream`).
294    pub display: DisplayOptions,
295}
296
297impl Default for Options {
298    fn default() -> Self {
299        Self {
300            format: true,
301            target: Target::Sql(None),
302            signature_comment: true,
303            color: true,
304            display: DisplayOptions::AnsiColor,
305        }
306    }
307}
308
309impl Options {
310    pub fn with_format(mut self, format: bool) -> Self {
311        self.format = format;
312        self
313    }
314
315    pub fn no_format(self) -> Self {
316        self.with_format(false)
317    }
318
319    pub fn with_signature_comment(mut self, signature_comment: bool) -> Self {
320        self.signature_comment = signature_comment;
321        self
322    }
323
324    pub fn no_signature(self) -> Self {
325        self.with_signature_comment(false)
326    }
327
328    pub fn with_target(mut self, target: Target) -> Self {
329        self.target = target;
330        self
331    }
332
333    #[deprecated(note = "`color` is replaced by `display`; see `Options` docs for more details")]
334    pub fn with_color(mut self, color: bool) -> Self {
335        self.color = color;
336        self
337    }
338
339    pub fn with_display(mut self, display: DisplayOptions) -> Self {
340        self.display = display;
341        self
342    }
343}
344
345#[derive(Debug, Clone, Serialize, Deserialize, strum::EnumString)]
346#[strum(serialize_all = "snake_case")]
347#[non_exhaustive]
348pub enum DisplayOptions {
349    /// Plain text
350    Plain,
351    /// With ANSI colors
352    AnsiColor,
353}
354
355#[doc = include_str!("../README.md")]
356#[cfg(doctest)]
357pub struct ReadmeDoctests;
358
359/// Lex PRQL source into Lexer Representation.
360pub fn prql_to_tokens(prql: &str) -> Result<lr::Tokens, ErrorMessages> {
361    prqlc_parser::lexer::lex_source(prql).map_err(|e| {
362        e.into_iter()
363            .map(|e| e.into())
364            .collect::<Vec<ErrorMessage>>()
365            .into()
366    })
367}
368
369/// Parse PRQL into a PL AST
370// TODO: rename this to `prql_to_pl_simple`
371pub fn prql_to_pl(prql: &str) -> Result<pr::ModuleDef, ErrorMessages> {
372    let source_tree = SourceTree::from(prql);
373    prql_to_pl_tree(&source_tree)
374}
375
376/// Parse PRQL into a PL AST
377pub fn prql_to_pl_tree(prql: &SourceTree) -> Result<pr::ModuleDef, ErrorMessages> {
378    parser::parse(prql).map_err(|e| ErrorMessages::from(e).composed(prql))
379}
380
381/// Perform semantic analysis and convert PL to RQ.
382// TODO: rename this to `pl_to_rq_simple`
383pub fn pl_to_rq(pl: pr::ModuleDef) -> Result<ir::rq::RelationalQuery, ErrorMessages> {
384    semantic::resolve_and_lower(pl, &[], None)
385        .map_err(|e| e.with_source(ErrorSource::NameResolver).into())
386}
387
388/// Perform semantic analysis and convert PL to RQ.
389pub fn pl_to_rq_tree(
390    pl: pr::ModuleDef,
391    main_path: &[String],
392    database_module_path: &[String],
393) -> Result<ir::rq::RelationalQuery, ErrorMessages> {
394    semantic::resolve_and_lower(pl, main_path, Some(database_module_path))
395        .map_err(|e| e.with_source(ErrorSource::NameResolver).into())
396}
397
398/// Generate SQL from RQ.
399pub fn rq_to_sql(rq: ir::rq::RelationalQuery, options: &Options) -> Result<String, ErrorMessages> {
400    sql::compile(rq, options).map_err(|e| e.with_source(ErrorSource::SQL).into())
401}
402
403/// Generate PRQL code from PL AST
404pub fn pl_to_prql(pl: &pr::ModuleDef) -> Result<String, ErrorMessages> {
405    Ok(codegen::WriteSource::write(&pl.stmts, codegen::WriteOpt::default()).unwrap())
406}
407
408/// JSON serialization and deserialization functions
409pub mod json {
410    use super::*;
411
412    /// JSON serialization
413    pub fn from_pl(pl: &pr::ModuleDef) -> Result<String, ErrorMessages> {
414        serde_json::to_string(pl).map_err(convert_json_err)
415    }
416
417    /// JSON deserialization
418    pub fn to_pl(json: &str) -> Result<pr::ModuleDef, ErrorMessages> {
419        serde_json::from_str(json).map_err(convert_json_err)
420    }
421
422    /// JSON serialization
423    pub fn from_rq(rq: &ir::rq::RelationalQuery) -> Result<String, ErrorMessages> {
424        serde_json::to_string(rq).map_err(convert_json_err)
425    }
426
427    /// JSON deserialization
428    pub fn to_rq(json: &str) -> Result<ir::rq::RelationalQuery, ErrorMessages> {
429        serde_json::from_str(json).map_err(convert_json_err)
430    }
431
432    fn convert_json_err(err: serde_json::Error) -> ErrorMessages {
433        ErrorMessages::from(Error::new_simple(err.to_string()))
434    }
435}
436
437/// All paths are relative to the project root.
438// We use `SourceTree` to represent both a single file (including a "file" piped
439// from stdin), and a collection of files. (Possibly this could be implemented
440// as a Trait with a Struct for each type, which would use structure over values
441// (i.e. `Option<PathBuf>` below signifies whether it's a project or not). But
442// waiting until it's necessary before splitting it out.)
443#[derive(Debug, Clone, Default, Serialize)]
444pub struct SourceTree {
445    /// Path to the root of the source tree.
446    pub root: Option<PathBuf>,
447
448    /// Mapping from file paths into into their contents.
449    /// Paths are relative to the root.
450    pub sources: HashMap<PathBuf, String>,
451
452    /// Index of source ids to paths. Used to keep [error::Span] lean.
453    source_ids: HashMap<u16, PathBuf>,
454}
455
456impl SourceTree {
457    pub fn single(path: PathBuf, content: String) -> Self {
458        SourceTree {
459            sources: [(path.clone(), content)].into(),
460            source_ids: [(1, path)].into(),
461            root: None,
462        }
463    }
464
465    pub fn new<I>(iter: I, root: Option<PathBuf>) -> Self
466    where
467        I: IntoIterator<Item = (PathBuf, String)>,
468    {
469        let mut res = SourceTree {
470            sources: HashMap::new(),
471            source_ids: HashMap::new(),
472            root,
473        };
474
475        for (index, (path, content)) in iter.into_iter().enumerate() {
476            res.sources.insert(path.clone(), content);
477            res.source_ids.insert((index + 1) as u16, path);
478        }
479        res
480    }
481
482    pub fn insert(&mut self, path: PathBuf, content: String) {
483        let last_id = self.source_ids.keys().max().cloned().unwrap_or(0);
484        self.sources.insert(path.clone(), content);
485        self.source_ids.insert(last_id + 1, path);
486    }
487
488    pub fn get_path(&self, source_id: u16) -> Option<&PathBuf> {
489        self.source_ids.get(&source_id)
490    }
491}
492
493impl<S: ToString> From<S> for SourceTree {
494    fn from(source: S) -> Self {
495        SourceTree::single(PathBuf::from(""), source.to_string())
496    }
497}
498
499/// Debugging and unstable API functions
500pub mod internal {
501    use super::*;
502
503    /// Create column-level lineage graph
504    pub fn pl_to_lineage(
505        pl: pr::ModuleDef,
506    ) -> Result<semantic::reporting::FrameCollector, ErrorMessages> {
507        let ast = Some(pl.clone());
508
509        let root_module = semantic::resolve(pl).map_err(ErrorMessages::from)?;
510
511        let (main, _) = root_module.find_main_rel(&[]).unwrap();
512        let mut fc =
513            semantic::reporting::collect_frames(*main.clone().into_relation_var().unwrap());
514        fc.ast = ast;
515
516        Ok(fc)
517    }
518
519    pub mod json {
520        use super::*;
521
522        /// JSON serialization of FrameCollector lineage
523        pub fn from_lineage(
524            fc: &semantic::reporting::FrameCollector,
525        ) -> Result<String, ErrorMessages> {
526            serde_json::to_string(fc).map_err(convert_json_err)
527        }
528
529        fn convert_json_err(err: serde_json::Error) -> ErrorMessages {
530            ErrorMessages::from(Error::new_simple(err.to_string()))
531        }
532    }
533}
534
535#[cfg(test)]
536mod tests {
537    use std::str::FromStr;
538
539    use insta::assert_debug_snapshot;
540
541    use crate::pr::Ident;
542    use crate::Target;
543
544    pub fn compile(prql: &str) -> Result<String, super::ErrorMessages> {
545        anstream::ColorChoice::Never.write_global();
546        super::compile(prql, &super::Options::default().no_signature())
547    }
548
549    #[test]
550    fn test_starts_with() {
551        // Over-testing, from co-pilot, can remove some of them.
552        let a = Ident::from_path(vec!["a", "b", "c"]);
553        let b = Ident::from_path(vec!["a", "b"]);
554        let c = Ident::from_path(vec!["a", "b", "c", "d"]);
555        let d = Ident::from_path(vec!["a", "b", "d"]);
556        let e = Ident::from_path(vec!["a", "c"]);
557        let f = Ident::from_path(vec!["b", "c"]);
558        assert!(a.starts_with(&b));
559        assert!(a.starts_with(&a));
560        assert!(!a.starts_with(&c));
561        assert!(!a.starts_with(&d));
562        assert!(!a.starts_with(&e));
563        assert!(!a.starts_with(&f));
564    }
565
566    #[test]
567    fn test_target_from_str() {
568        assert_debug_snapshot!(Target::from_str("sql.postgres"), @r"
569        Ok(
570            Sql(
571                Some(
572                    Postgres,
573                ),
574            ),
575        )
576        ");
577
578        assert_debug_snapshot!(Target::from_str("sql.poostgres"), @r#"
579        Err(
580            Error {
581                kind: Error,
582                span: None,
583                reason: NotFound {
584                    name: "\"sql.poostgres\"",
585                    namespace: "target",
586                },
587                hints: [],
588                code: None,
589            },
590        )
591        "#);
592
593        assert_debug_snapshot!(Target::from_str("postgres"), @r#"
594        Err(
595            Error {
596                kind: Error,
597                span: None,
598                reason: NotFound {
599                    name: "\"postgres\"",
600                    namespace: "target",
601                },
602                hints: [],
603                code: None,
604            },
605        )
606        "#);
607    }
608
609    /// Confirm that all target names can be parsed.
610    #[test]
611    fn test_target_names() {
612        let _: Vec<_> = Target::names()
613            .into_iter()
614            .map(|name| Target::from_str(&name))
615            .collect();
616    }
617}