antimatter 2.0.13

antimatter.io Rust library for data control
Documentation
//! Capsule file format
//!
//! `capsule` contains the implementation of the capsule file format
//! and related utilities. It is mostly an internal package with just
//! a few types to understand in order to be able to encapsulate and
//! open capsules.

#[doc(hidden)]
pub mod aead;
#[doc(hidden)]
pub mod bundle;
#[doc(hidden)]
pub mod bundle_v2;
#[doc(hidden)]
pub(crate) mod bundle_v3;
#[doc(hidden)]
pub mod capsule;
#[doc(hidden)]
pub mod capsule_v2;
#[doc(hidden)]
pub mod classifier;
pub mod common;
#[doc(hidden)]
pub mod framer;
#[doc(hidden)]
pub mod policy_enforcer;
#[doc(hidden)]
pub mod stream;
#[doc(hidden)]
pub mod streaming_aead;
#[doc(hidden)]
pub mod util_readers;

use crate::capsule::common::{CapsuleError, CapsuleTag, Column, SpanTag};
use std::{io::Read, marker::Send};

/// An iterator for the rows in a capsule.
pub trait RowIterator: Send {
    /// Returns the domain ID for the capsule. In the case of a bundle,
    /// the ID is of the domain which created the bundle.
    fn domain_id(&self) -> String;
    /// Returns the extra data as specified in the [`EncapsulateConfig`]
    /// at capsule creation time.
    ///
    /// [`EncapsulateConfig`]: [`antimatter::session::session::EncapsulateConfig`]
    fn extra_data(&self) -> String;
    /// Returns the capsule ID in the case of a individual capsule, or the
    /// list of capsule IDs in the case of a bundle.
    fn capsule_ids(&self) -> Vec<String>;
    /// Returns the collection of all [`CapsuleTag`]s over all capsules.
    fn capsule_tags(&self) -> Vec<CapsuleTag>;
    /// Returns the [`Column`]s in the capsule data table.
    fn columns(&self) -> Vec<Column>;
    /// Returns one string for each open failure. The result will only
    /// be meaningful if the [`RowIterator`] is over a capsule bundle.
    /// Note that open failures are not fatal in the case of a bundle,
    /// since the consumer may only have access to open certain capsules
    /// within the bundle.
    fn open_failures(&self) -> Vec<String>;

    /// Get the next row in the capsule as a [`CellIterator`]. Consumers
    /// are expected to consume the entire row (i.e. by reading the data
    /// from each cell) before calling this function again, but implementers
    /// may include safety checks to consume unread data. When the method
    /// is called and all rows have already been returned, implementations
    /// must return CapsuleError::EndOfCapsule.
    ///
    /// **Arguments**
    /// * `redact_tags`: extra tags to redact. If any tag in this list
    ///      appears in the list of tags for a span, that span will be
    ///      redacted. These tags take precedence over configured access
    ///      policy rules, so even if the policy explicitly allowed access,
    ///      the presence of a tag in `redact_tags` will cause the span to
    ///      be redacted.
    ///
    /// **Returns**
    /// * `Box<dyn CellIterator + 'static>`: a [`CellIterator`] which can
    ///      be used to obtain a reader for each cell in the row.
    fn next_row(
        &mut self,
        redact_tags: Vec<CapsuleTag>,
    ) -> Result<Box<dyn CellIterator + 'static>, CapsuleError>;

    /// Iterate each row in the capsule and call back the argument function
    /// `f` with a [`CellIterator`]. The consumer is expected to call
    /// [`CellIterator::for_each_cell`] on the provided [`CellIterator`]
    /// within `f`.
    ///
    /// **Arguments**
    /// * `redact_tags`: see the documentation for [`RowIterator::next_row`].
    /// * `f`: a callback function which will receive a [`CellIterator`]
    fn for_each_row(
        &mut self,
        redact_tags: &[CapsuleTag],
        f: &mut dyn FnMut(&mut dyn CellIterator) -> Result<(), CapsuleError>,
    ) -> Result<(), CapsuleError> {
        self.for_each_row_default(redact_tags, f)
    }

    #[doc(hidden)]
    // for_each_row_default is for internal use.
    fn for_each_row_default(
        &mut self,
        redact_tags: &[CapsuleTag],
        f: &mut dyn FnMut(&mut dyn CellIterator) -> Result<(), CapsuleError>,
    ) -> Result<(), CapsuleError> {
        loop {
            match self.next_row(redact_tags.to_vec()) {
                Ok(mut cell_iterator) => {
                    f(&mut *cell_iterator)?;
                }
                Err(e) => match e {
                    CapsuleError::RowAccessDeniedByPolicy => {}
                    CapsuleError::EndOfRow => {}
                    CapsuleError::EndOfCapsule => return Ok(()),
                    e => return Err(e),
                },
            }
        }
    }

    /// Read the entire capsule into memory and return the span tags and
    /// (redacted, tokenized, and etc.) cell data. Errors returned on read
    /// should be treated as fatal.
    ///
    /// **Arguments**
    /// * `redact_tags`: see the documentation for [`RowIterator::next_row`].
    ///
    /// **Returns**
    /// * `Vec<Vec<Vec<`[`SpanTag`]`>>>`: a 2d vector of `Vec<`[`SpanTag`]`>`
    ///      where each element corresponds to the returned cell data.
    /// * `Vec<Vec<Vec<u8>>>`: a 2d vector of `Vec<u8>` where each element
    ///      contains the data for a cell in the capsule data table. The
    ///      data is in row-major order.
    fn read_all(
        &mut self,
        redact_tags: &[CapsuleTag],
    ) -> Result<(Vec<Vec<Vec<SpanTag>>>, Vec<Vec<Vec<u8>>>), CapsuleError> {
        let mut tags: Vec<Vec<Vec<SpanTag>>> = Vec::new();
        let mut data: Vec<Vec<Vec<u8>>> = Vec::new();
        self.for_each_row(redact_tags, &mut |row| {
            let mut row_data: Vec<Vec<u8>> = Vec::new();
            match row.for_each_cell(&mut |cell| {
                let mut data: Vec<u8> = Vec::new();
                match cell.read_to_end(&mut data) {
                    Ok(_) => {
                        row_data.push(data);
                        Ok(())
                    }
                    Err(e) if e.kind() == std::io::ErrorKind::Other => {
                        match e.to_string().as_str() {
                            // TODO: is there a better way to handle this?
                            "record access denied by policy" => {
                                Err(CapsuleError::RowAccessDeniedByPolicy)
                            }
                            _ => Err(CapsuleError::Generic(format!("reading cell: {}", e))),
                        }
                    }
                    Err(e) => Err(CapsuleError::Generic(format!("reading cell: {}", e))),
                }?;
                Ok(())
            }) {
                Err(CapsuleError::EndOfCapsule) => {}
                Err(CapsuleError::EndOfRow) => {}
                Err(CapsuleError::RowAccessDeniedByPolicy) => {}
                Err(e) => return Err(e),
                Ok(()) => {}
            }
            if !row.is_deny_record() {
                tags.push(row.span_tags());
                data.push(row_data);
            }
            Ok(())
        })?;
        Ok((tags, data))
    }
}

/// An iterator for the cells in a row.
pub trait CellIterator: Send {
    /// Iterate each cell in the row and call back the argument function `f`
    /// with a cell data reader and the cell's span tags.
    ///
    /// **Arguments**
    /// * `f`: a callback function which will receive the set of span tags
    ///      and a Reader containing the (redacted, tokenized, and etc.) cell
    ///      data.
    ///
    /// **Notes**
    /// * iteration may stop early if an access policy decision evaluates to
    ///   `DenyRecord`, in which case the number of callbacks to `f` will be
    ///   less than the number of cells in the row. see
    ///   [`CellIterator::is_deny_record`], which must be checked after
    ///   `for_each_cell` returns `Ok(())`.
    fn for_each_cell(
        &mut self,
        f: &mut dyn FnMut(&mut dyn Read) -> Result<(), CapsuleError>,
    ) -> Result<(), CapsuleError> {
        loop {
            let mut cell = self.next_cell()?;
            match f(&mut *cell) {
                Ok(()) => {}
                Err(e) => {
                    self.cleanup()?;
                    return Err(e);
                }
            }
        }
    }

    /// Get the next cell in this row as a Reader. The reader must be fully
    /// consumed before calling next_cell again, but implementations may
    /// include convenience features to skip unread data. Implementations
    /// must return CapsuleError::EndOfCapsule when the end of the capsule
    /// has been reached, and CapsuleError::EndOfRow when the end of the
    /// row (but not the capsule) has been reached. If access to the row
    /// has been denied, then CapsuleError::RowAccessDeniedByPolicy must
    /// be returned, and CapsuleError::CapsuleAccessDeniedByPolicy must
    /// be returned if access to the entire capsule has been denied.
    fn next_cell(&mut self) -> Result<Box<dyn Read + Send + 'static>, CapsuleError>;

    /// Returns true iff cell iteration was terminated early because the
    /// access policy evaluation resulted in a `DenyRecord` decision. In
    /// this case, the consumer should discard any data read from the
    /// entire row as access has been denied.
    fn is_deny_record(&self) -> bool;

    fn span_tags(&self) -> Vec<Vec<SpanTag>>;

    fn cleanup(&mut self) -> Result<(), CapsuleError>;
}