rudof_generate 0.3.4

RDF data shapes implementation in Rust
Documentation
#[cfg(not(target_family = "wasm"))]
pub mod config;
#[cfg(not(target_family = "wasm"))]
pub mod conformance_metrics;
#[cfg(not(target_family = "wasm"))]
pub mod converters;
#[cfg(not(target_family = "wasm"))]
pub mod errors;
#[cfg(not(target_family = "wasm"))]
pub mod field_generators;
#[cfg(not(target_family = "wasm"))]
pub mod output;
#[cfg(not(target_family = "wasm"))]
pub mod parallel_generation;
#[cfg(not(target_family = "wasm"))]
pub mod shape_processing;
#[cfg(not(target_family = "wasm"))]
pub mod unified_constraints;
// #[cfg(target_family = "wasm")]
// mod wasm_stubs;

#[cfg(not(target_family = "wasm"))]
pub use config::{GeneratorConfig, SchemaFormat};
#[cfg(not(target_family = "wasm"))]
pub use conformance_metrics::{ConformanceMetrics, TranslationMetrics};
#[cfg(not(target_family = "wasm"))]
use errors::{DataGeneratorError, Result};

#[cfg(not(target_family = "wasm"))]
use crate::{output::OutputWriter, parallel_generation::ParallelGenerator, shape_processing::ShapeProcessor};
#[cfg(not(target_family = "wasm"))]
use std::{path::Path, str::FromStr};

#[cfg(not(target_family = "wasm"))]
/// Main data generator interface
pub struct DataGenerator {
    config: GeneratorConfig,
    processor: ShapeProcessor,
    generator: ParallelGenerator,
    writer: OutputWriter,
}

#[cfg(not(target_family = "wasm"))]
impl DataGenerator {
    /// Create a new data generator with the given configuration
    pub fn new(config: GeneratorConfig) -> Result<Self> {
        let processor = ShapeProcessor::new();
        let generator = ParallelGenerator::new(&config)?;
        let writer = OutputWriter::new(&config.output)?;

        Ok(Self {
            config,
            processor,
            generator,
            writer,
        })
    }

    /// Load and process a ShEx schema file
    pub async fn load_schema<P: AsRef<Path>>(&mut self, shex_path: P) -> Result<()> {
        let _shapes = self.processor.extract_shapes(shex_path).await?;

        // Get the processed shape infos from the processor
        let shape_infos: Vec<_> = self.processor.get_shapes().values().cloned().collect();

        self.generator.set_shapes(shape_infos).await?;
        Ok(())
    }

    /// Load and process a ShEx schema file
    pub async fn load_shex_schema<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
        self.processor.load_shex_schema(path).await?;

        // For backward compatibility, also populate the legacy shapes
        if let Some(unified_model) = self.processor.get_unified_model() {
            // Convert unified model back to shape infos for the generator
            let shape_infos = self.convert_unified_to_shape_infos(unified_model);
            self.generator.set_shapes(shape_infos).await?;
        }

        Ok(())
    }

    /// Load and process a SHACL schema file
    pub async fn load_shacl_schema<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
        self.processor.load_shacl_schema(path).await?;

        // Convert unified model to shape infos for the generator
        if let Some(unified_model) = self.processor.get_unified_model() {
            let shape_infos = self.convert_unified_to_shape_infos(unified_model);
            self.generator.set_shapes(shape_infos).await?;
        }

        Ok(())
    }

    /// Auto-detect schema format and load
    pub async fn load_schema_auto<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
        self.processor.load_schema_auto(path).await?;

        // Convert unified model to shape infos for the generator
        if let Some(unified_model) = self.processor.get_unified_model() {
            let shape_infos = self.convert_unified_to_shape_infos(unified_model);
            self.generator.set_shapes(shape_infos).await?;
        }

        Ok(())
    }

    /// Generate synthetic data and write to output
    pub async fn generate(&mut self) -> Result<()> {
        let start_time = std::time::Instant::now();
        let graph = self.generator.generate_data(&self.config.generation).await?;
        let generation_time = start_time.elapsed();

        let conformance_metrics = if let (Some(unified_model), Some(translation_metrics)) = (
            self.processor.get_unified_model(),
            self.processor.get_translation_metrics(),
        ) {
            Some(crate::conformance_metrics::ConformanceMetrics::from_graph_and_model(
                &graph,
                unified_model,
                translation_metrics.clone(),
            )?)
        } else {
            None
        };

        self.writer
            .write_graph_with_timing(&graph, Some(generation_time), conformance_metrics.as_ref())
            .await?;
        Ok(())
    }

    /// Run the complete generation pipeline (legacy ShEx support)
    pub async fn run<P: AsRef<Path>>(&mut self, shex_path: P) -> Result<()> {
        tracing::trace!("Loading ShEx schema from: {}", shex_path.as_ref().display());
        self.load_schema(shex_path).await?;

        tracing::trace!("Generating {} entities", self.config.generation.entity_count);
        self.generate().await?;

        tracing::trace!("Data generation completed successfully");
        Ok(())
    }

    /// Run the complete generation pipeline with schema format detection
    pub async fn run_with_format<P: AsRef<Path>>(
        &mut self,
        schema_path: P,
        format: Option<SchemaFormat>,
    ) -> Result<()> {
        let schema_path_ref = schema_path.as_ref();
        tracing::info!("Loading schema from: {}", schema_path_ref.display());

        match format.or(self.config.generation.schema_format) {
            Some(SchemaFormat::ShEx) => {
                self.load_shex_schema(schema_path).await?;
            },
            Some(SchemaFormat::Shacl) => {
                self.load_shacl_schema(schema_path).await?;
            },
            None => {
                // Auto-detect based on file extension
                self.load_schema_auto(schema_path).await?;
            },
        }

        tracing::info!("Generating {} entities", self.config.generation.entity_count);
        self.generate().await?;

        tracing::info!("Data generation completed successfully");
        Ok(())
    }

    /// Run the complete generation pipeline with automatic schema format detection
    pub async fn run_auto<P: AsRef<Path>>(&mut self, schema_path: P) -> Result<()> {
        self.run_with_format(schema_path, None).await
    }

    // Convert unified model to legacy shape infos for backward compatibility
    fn convert_unified_to_shape_infos(
        &self,
        unified_model: &crate::unified_constraints::UnifiedConstraintModel,
    ) -> Vec<crate::shape_processing::ShapeInfo> {
        use crate::shape_processing::{PropertyInfo, ShapeDependency, ShapeInfo};

        let mut shape_infos = Vec::new();

        for (shape_id, unified_shape) in &unified_model.shapes {
            let mut dependencies = Vec::new();
            let mut properties = Vec::new();

            // Convert properties
            for prop in &unified_shape.properties {
                // Extract dependencies from shape references
                for constraint in &prop.constraints {
                    if let crate::unified_constraints::UnifiedConstraint::ShapeReference(target_shape) = constraint {
                        dependencies.push(ShapeDependency {
                            target_shape: target_shape.clone(),
                            property: prop.property_iri.clone(),
                            min_cardinality: prop.min_cardinality.map(|c| c as i32),
                            max_cardinality: prop.max_cardinality.map(|c| c as i32),
                        });
                    }
                }

                // Extract datatype from constraints
                let datatype = prop.constraints.iter().find_map(|c| match c {
                    crate::unified_constraints::UnifiedConstraint::Datatype(dt) => Some(dt.clone()),
                    _ => None,
                });

                // Extract shape reference
                let shape_ref = prop.constraints.iter().find_map(|c| match c {
                    crate::unified_constraints::UnifiedConstraint::ShapeReference(sr) => Some(sr.clone()),
                    _ => None,
                });

                properties.push(PropertyInfo {
                    property_iri: prop.property_iri.clone(),
                    datatype,
                    shape_ref,
                    min_cardinality: prop.min_cardinality.map(|c| c as i32),
                    max_cardinality: prop.max_cardinality.map(|c| c as i32),
                    constraints: prop.constraints.clone(),
                });
            }

            // Create a minimal ShapeDecl for backward compatibility
            let shape_iri = match rudof_iri::IriS::from_str(shape_id) {
                Ok(iri) => prefixmap::IriRef::Iri(iri),
                Err(_) => {
                    // Fallback to a simple IRI if parsing fails
                    prefixmap::IriRef::Iri(rudof_iri::IriS::new_unchecked("http://example.org/shape"))
                },
            };

            let dummy_decl = shex_ast::ast::ShapeDecl {
                id: shex_ast::ast::ShapeExprLabel::IriRef { value: shape_iri },
                shape_expr: shex_ast::ast::ShapeExpr::Shape(shex_ast::ast::Shape {
                    expression: None,
                    extra: None,
                    closed: Some(unified_shape.closed),
                    sem_acts: None,
                    annotations: None,
                    extends: None,
                }),
                type_: "".to_string(), // Empty string for type
                is_abstract: false,
            };

            shape_infos.push(ShapeInfo {
                declaration: dummy_decl,
                dependencies,
                properties,
            });
        }

        shape_infos
    }
}