icechunk/
inspect.rs

1use chrono::{DateTime, Utc};
2use itertools::Itertools;
3use serde::{Deserialize, Serialize};
4
5use crate::{
6    asset_manager::AssetManager,
7    format::{
8        SnapshotId,
9        manifest::ManifestRef,
10        snapshot::{
11            ManifestFileInfo, NodeData, NodeSnapshot, NodeType, SnapshotProperties,
12        },
13    },
14    repository::{RepositoryErrorKind, RepositoryResult},
15};
16
17#[derive(Debug, Serialize, Deserialize)]
18struct ManifestFileInfoInspect {
19    id: String,
20    size_bytes: u64,
21    num_chunk_refs: u32,
22}
23
24impl From<ManifestFileInfo> for ManifestFileInfoInspect {
25    fn from(value: ManifestFileInfo) -> Self {
26        Self {
27            id: value.id.to_string(),
28            size_bytes: value.size_bytes,
29            num_chunk_refs: value.num_chunk_refs,
30        }
31    }
32}
33
34#[derive(Debug, Serialize, Deserialize)]
35struct ManifestRefInspect {
36    id: String,
37    extents: Vec<(u32, u32)>,
38}
39
40impl From<ManifestRef> for ManifestRefInspect {
41    fn from(value: ManifestRef) -> Self {
42        Self {
43            id: value.object_id.to_string(),
44            extents: value.extents.iter().map(|r| (r.start, r.end)).collect(),
45        }
46    }
47}
48
49#[derive(Debug, Serialize, Deserialize)]
50struct NodeSnapshotInspect {
51    id: String,
52    path: String,
53    node_type: String,
54    #[serde(skip_serializing_if = "Option::is_none")]
55    manifest_refs: Option<Vec<ManifestRefInspect>>,
56}
57
58impl From<NodeSnapshot> for NodeSnapshotInspect {
59    fn from(value: NodeSnapshot) -> Self {
60        Self {
61            id: value.id.to_string(),
62            path: value.path.to_string(),
63            node_type: match value.node_type() {
64                NodeType::Group => "group".to_string(),
65                NodeType::Array => "array".to_string(),
66            },
67            manifest_refs: match value.node_data {
68                NodeData::Array { manifests, .. } => {
69                    let ms = manifests.into_iter().map(|m| m.into()).collect();
70                    Some(ms)
71                }
72                NodeData::Group => None,
73            },
74        }
75    }
76}
77
78#[derive(Debug, Serialize, Deserialize)]
79struct SnapshotInfoInspect {
80    // TODO: add fields
81    //path: String,
82    //size_bytes: u64,
83    id: String,
84    #[serde(skip_serializing_if = "Option::is_none")]
85    parent_id: Option<String>,
86    flushed_at: DateTime<Utc>,
87    commit_message: String,
88    metadata: SnapshotProperties,
89
90    manifests: Vec<ManifestFileInfoInspect>,
91    nodes: Vec<NodeSnapshotInspect>,
92}
93
94async fn inspect_snapshot(
95    asset_manager: &AssetManager,
96    id: &SnapshotId,
97) -> RepositoryResult<SnapshotInfoInspect> {
98    let snap = asset_manager.fetch_snapshot(id).await?;
99    let res = SnapshotInfoInspect {
100        id: snap.id().to_string(),
101        parent_id: snap.parent_id().map(|p| p.to_string()),
102        flushed_at: snap.flushed_at()?,
103        commit_message: snap.message(),
104        metadata: snap.metadata()?,
105        manifests: snap.manifest_files().map(|f| f.into()).collect(),
106        nodes: snap.iter().map_ok(|n| n.into()).try_collect()?,
107    };
108
109    Ok(res)
110}
111
112pub async fn snapshot_json(
113    asset_manager: &AssetManager,
114    id: &SnapshotId,
115    pretty: bool,
116) -> RepositoryResult<String> {
117    let info = inspect_snapshot(asset_manager, id).await?;
118    let res = if pretty {
119        serde_json::to_string_pretty(&info)
120    } else {
121        serde_json::to_string(&info)
122    }
123    .map_err(|e| RepositoryErrorKind::Other(e.to_string()))?;
124    Ok(res)
125}
126
127#[cfg(test)]
128#[allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)]
129mod tests {
130    use super::*;
131    use crate::{ObjectStorage, Repository, repository::VersionInfo};
132    use futures::{StreamExt, TryStreamExt};
133    use std::{path::PathBuf, sync::Arc};
134
135    #[icechunk_macros::tokio_test]
136    async fn test_print_snapshot() -> Result<(), Box<dyn std::error::Error>> {
137        let st = Arc::new(
138            ObjectStorage::new_local_filesystem(&PathBuf::from(
139                "../icechunk-python/tests/data/split-repo",
140            ))
141            .await?,
142        );
143        let repo = Repository::open(None, st, Default::default()).await?;
144        let snap_id = repo
145            .ancestry(&VersionInfo::BranchTipRef("main".to_string()))
146            .await?
147            .boxed()
148            .try_next()
149            .await?
150            .unwrap()
151            .id;
152
153        let json = snapshot_json(repo.asset_manager(), &snap_id, true).await?;
154        let info: SnapshotInfoInspect = serde_json::from_str(json.as_str())?;
155        assert!(info.id == snap_id.to_string());
156
157        Ok(())
158    }
159}