jax-common 0.1.11

Core data structures and cryptography for JaxBucket - end-to-end encrypted P2P storage
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
//! Bucket synchronization job and execution logic
//!
//! This module contains the logic for syncing buckets between peers.

use anyhow::{anyhow, Result};
use uuid::Uuid;

use crate::bucket_log::BucketLogProvider;
use crate::crypto::PublicKey;
use crate::linked_data::Link;
use crate::mount::Manifest;
use crate::mount::PrincipalRole;
use crate::peer::Peer;

use super::{DownloadPinsJob, ProvenanceError, SyncJob};

/// Result of provenance verification for a manifest.
#[derive(Debug, Clone, PartialEq, Eq)]
enum ProvenanceResult {
    /// Manifest is valid - properly signed by an authorized writer
    Valid,
    /// Our key is not in the manifest's shares (not an error, just skip)
    NotAuthorized,
    /// Manifest is unsigned (allowed during migration)
    UnsignedLegacy,
}

/// Target peer and state for bucket synchronization
#[derive(Debug, Clone)]
pub struct SyncTarget {
    /// Link to the target bucket state
    pub link: Link,
    /// Height of the target bucket
    pub height: u64,
    /// Public keys of peers to sync from (in priority order)
    /// First peer is the "preferred" peer that triggered the sync
    pub peer_ids: Vec<PublicKey>,
}

/// Sync bucket job definition
#[derive(Debug, Clone)]
pub struct SyncBucketJob {
    pub bucket_id: Uuid,
    pub target: SyncTarget,
}

/// Execute a bucket sync job
///
/// This is the main entry point for syncing. It handles both cases:
/// - Updating an existing bucket we already have
/// - Cloning a new bucket we don't have yet
pub async fn execute<L>(peer: &Peer<L>, job: SyncBucketJob) -> Result<()>
where
    L: BucketLogProvider + Clone + Send + Sync + 'static,
    L::Error: std::error::Error + Send + Sync + 'static,
{
    let peer_ids_hex: Vec<String> = job.target.peer_ids.iter().map(|p| p.to_hex()).collect();
    tracing::info!(
        "Syncing bucket {} from {} peer(s) {:?} to link {:?} at height {}",
        job.bucket_id,
        job.target.peer_ids.len(),
        peer_ids_hex,
        job.target.link,
        job.target.height
    );

    let exists: bool = peer.logs().exists(job.bucket_id).await?;

    // Signal new bucket discovery so the daemon can set pending status
    if !exists {
        let shared_by = job.target.peer_ids.first().map(|p| p.to_hex());
        if let Err(e) = peer
            .logs()
            .on_new_bucket_discovered(job.bucket_id, shared_by)
            .await
        {
            tracing::warn!(
                "Failed to signal new bucket discovery for {}: {}",
                job.bucket_id,
                e
            );
        }
    }

    let common_ancestor = if exists {
        // find a common ancestor between our log and the
        //  link the peer advertised to us
        find_common_ancestor(peer, job.bucket_id, &job.target.link, &job.target.peer_ids).await?
    } else {
        None
    };

    // TODO (amiller68): between finding the common ancestor and downloading the manifest chain
    //  there are redundant operations. We should optimize this.

    // if we know the bucket exists, but we did not find a common ancestor
    //  then we have diverged / are not talking about the same bucket
    // for now just log a warning and do nothing
    if exists && common_ancestor.is_none() {
        tracing::warn!(
            "Bucket {} diverged from peer(s) {:?}",
            job.bucket_id,
            peer_ids_hex
        );
        return Ok(());
    }

    // Determine between what links we should download manifests for
    let stop_link_owned = common_ancestor.as_ref().map(|ancestor| ancestor.0.clone());
    let stop_link = stop_link_owned.as_ref();

    if stop_link.is_none() && common_ancestor.is_none() {
        // No common ancestor - we'll sync everything from the target back to genesis
        tracing::info!(
            "No common ancestor for bucket {}, syncing from genesis",
            job.bucket_id
        );
    }

    // now we know there is a valid list of manifests we should
    //  fetch and apply to our log

    // Load the common ancestor manifest as our trusted base (if we have one)
    // This manifest is already in our blobs from find_common_ancestor
    let trusted_base: Option<Manifest> = if let Some(link) = stop_link {
        Some(peer.blobs().get_cbor(&link.hash()).await?)
    } else {
        None
    };

    // Download manifest chain from peer (from target back to common ancestor)
    let manifests = download_manifest_chain(
        peer,
        &job.target.link,
        stop_link,
        &job.target.peer_ids,
        trusted_base.as_ref(),
    )
    .await?;

    // TODO (amiller68): maybe theres an optimization here in that we should know
    //  we can exit earlier by virtue of finding a common ancestor which is just
    //  our current head
    if manifests.is_empty() {
        tracing::info!("No new manifests to sync, already up to date");
        return Ok(());
    };

    // Verify provenance of the latest manifest
    // Use our local manifest as the trusted base (previous)
    let latest_manifest = &manifests.last().unwrap().0;
    let trusted_base_ref = trusted_base.as_ref();
    match verify_provenance(peer, latest_manifest, trusted_base_ref)? {
        ProvenanceResult::Valid => {
            tracing::debug!("Provenance verification passed");
        }
        ProvenanceResult::UnsignedLegacy => {
            tracing::warn!(
                "Accepting unsigned manifest for bucket {} (migration mode)",
                latest_manifest.id()
            );
        }
        ProvenanceResult::NotAuthorized => {
            tracing::warn!("Provenance verification failed: our key not in bucket shares");
            return Ok(());
        }
    }

    // apply the updates to the bucket
    apply_manifest_chain(peer, job.bucket_id, &manifests).await?;

    Ok(())
}

/// Download a chain of manifests from peers and validate provenance
///
/// Walks backwards through the manifest chain via `previous` links.
/// Stops when it reaches `stop_at` link (common ancestor) or genesis (no previous).
/// Tries multiple peers in order for each download, succeeding on first available.
///
/// After downloading, validates each manifest's provenance:
/// - The first manifest (oldest) is validated against `trusted_base` (if any)
/// - Each subsequent manifest is validated against its predecessor
///
/// Returns manifests in order from oldest to newest with their links.
async fn download_manifest_chain<L>(
    peer: &Peer<L>,
    start_link: &Link,
    stop_link: Option<&Link>,
    peer_ids: &[PublicKey],
    trusted_base: Option<&Manifest>,
) -> Result<Vec<(Manifest, Link)>>
where
    L: BucketLogProvider + Clone + Send + Sync + 'static,
    L::Error: std::error::Error + Send + Sync + 'static,
{
    tracing::debug!(
        "Downloading manifest chain from {:?}, stop_at: {:?}, using {} peer(s)",
        start_link,
        stop_link,
        peer_ids.len()
    );

    let mut manifests = Vec::new();
    let mut current_link = start_link.clone();

    // Download manifests walking backwards
    loop {
        // Download the manifest blob from peers
        peer.blobs()
            .download_hash(current_link.hash(), peer_ids.to_vec(), peer.endpoint())
            .await
            .map_err(|e| {
                anyhow!(
                    "Failed to download manifest {:?} from peers: {}",
                    current_link,
                    e
                )
            })?;

        // Read and decode the manifest
        let manifest: Manifest = peer.blobs().get_cbor(&current_link.hash()).await?;

        // Check if we should stop
        if let Some(stop_link) = stop_link {
            if &current_link == stop_link {
                tracing::debug!("Reached stop_at link, stopping download");
                break;
            }
        }

        manifests.push((manifest.clone(), current_link.clone()));

        // Check for previous link
        match manifest.previous() {
            Some(prev_link) => {
                current_link = prev_link.clone();
            }
            None => {
                // Reached genesis, stop
                tracing::debug!("Reached genesis manifest, stopping download");
                break;
            }
        }
    }

    // Reverse to get oldest-to-newest order
    manifests.reverse();

    tracing::debug!("Downloaded {} manifests", manifests.len());

    // Validate the chain (walking from oldest to newest)
    // The first manifest is validated against the trusted_base (common ancestor)
    // NOTE: Chain validation only checks author authorization, not receiver authorization.
    // The receiver check is done separately on the final manifest in execute().
    let mut previous: Option<&Manifest> = trusted_base;

    for (manifest, link) in manifests.iter() {
        verify_author(manifest, previous).map_err(|e| ProvenanceError::InvalidManifestInChain {
            link: link.clone(),
            reason: e.to_string(),
        })?;
        previous = Some(manifest);
    }

    Ok(manifests)
}

/// Find common ancestor by downloading manifests from peers
///
/// Starting from `start_link`, walks backwards through the peer's manifest chain,
/// downloading each manifest and checking if it exists in our local log.
/// Returns the first (most recent) link and height found in our log.
/// Tries multiple peers in order for each download, succeeding on first available.
///
/// # Arguments
///
/// * `peer` - The peer instance with access to logs and blobs
/// * `bucket_id` - The bucket to check against our local log
/// * `link` - The starting point on the peer's chain (typically their head)
/// * `peer_ids` - The peers to download manifests from (in priority order)
///
/// # Returns
///
/// * `Ok(Some((link, height)))` - Found common ancestor with its link and height
/// * `Ok(None)` - No common ancestor found (reached genesis without intersection)
/// * `Err(_)` - Download or log access error
async fn find_common_ancestor<L>(
    peer: &Peer<L>,
    bucket_id: Uuid,
    link: &Link,
    peer_ids: &[PublicKey],
) -> Result<Option<(Link, u64)>>
where
    L: BucketLogProvider + Clone + Send + Sync + 'static,
    L::Error: std::error::Error + Send + Sync + 'static,
{
    tracing::debug!(
        "Finding common ancestor starting from {:?} using {} peer(s)",
        link,
        peer_ids.len()
    );

    let mut current_link = link.clone();
    let mut manifests_checked = 0;

    loop {
        manifests_checked += 1;
        tracing::debug!(
            "Checking manifest {} at link {:?}",
            manifests_checked,
            current_link
        );

        // TODO (amiller68): this should build in memory
        //  but for now we just download it
        // Download the manifest from peers
        peer.blobs()
            .download_hash(current_link.hash(), peer_ids.to_vec(), peer.endpoint())
            .await
            .map_err(|e| {
                anyhow!(
                    "Failed to download manifest {:?} from peers: {}",
                    current_link,
                    e
                )
            })?;

        // Read and decode the manifest
        let manifest: Manifest = peer.blobs().get_cbor(&current_link.hash()).await?;
        let height = manifest.height();

        // Check if this link exists in our local log
        match peer.logs().has(bucket_id, current_link.clone()).await {
            Ok(heights) if !heights.is_empty() => {
                tracing::info!(
                    "Found common ancestor at link {:?} with height {} (in our log at heights {:?})",
                    current_link,
                    height,
                    heights
                );
                return Ok(Some((current_link, height)));
            }
            Ok(_) => {
                // Link not in our log, check previous
                tracing::debug!("Link {:?} not in our log, checking previous", current_link);
            }
            Err(e) => {
                tracing::warn!("Error checking for link in log: {}", e);
                // Continue checking previous links despite error
            }
        }

        // Move to previous link
        match manifest.previous() {
            Some(prev_link) => {
                current_link = prev_link.clone();
            }
            None => {
                // Reached genesis without finding common ancestor
                tracing::debug!(
                    "Reached genesis after checking {} manifests, no common ancestor found",
                    manifests_checked
                );
                return Ok(None);
            }
        }
    }
}

/// Apply a chain of manifests to the log
///
/// Appends each manifest to the log in order, starting from `start_height`.
async fn apply_manifest_chain<L>(
    peer: &Peer<L>,
    bucket_id: Uuid,
    manifests: &[(Manifest, Link)],
) -> Result<()>
where
    L: BucketLogProvider + Clone + Send + Sync + 'static,
    L::Error: std::error::Error + Send + Sync + 'static,
{
    tracing::info!("Applying {} manifests to log", manifests.len());

    // Check if we should download content for this bucket
    let should_sync = peer
        .logs()
        .should_sync_content(bucket_id)
        .await
        .unwrap_or(true);

    for (manifest, link) in manifests.iter() {
        let previous = manifest.previous().clone();
        let height = manifest.height();
        let is_published = manifest.is_published();

        tracing::info!(
            "Appending manifest to log: height={}, link={:?}, previous={:?}, published={}",
            height,
            link,
            previous,
            is_published
        );

        peer.logs()
            .append(
                bucket_id,
                manifest.name().to_string(),
                link.clone(),
                previous,
                height,
                is_published,
            )
            .await
            .map_err(|e| anyhow!("Failed to append manifest at height {}: {}", height, e))?;

        // Only download pins/blobs for active buckets
        if should_sync {
            let pins_link = manifest.pins().clone();
            let peer_ids = manifest
                .shares()
                .iter()
                .map(|share| share.1.principal().identity)
                .collect();
            peer.dispatch(SyncJob::DownloadPins(DownloadPinsJob {
                pins_link,
                peer_ids,
            }))
            .await?;
        } else {
            tracing::info!(
                "Skipping pin download for bucket {} (not active)",
                bucket_id
            );
        }
    }

    tracing::info!("Successfully applied {} manifests to log", manifests.len());
    Ok(())
}

/// Verify the author's authorization to create a manifest.
///
/// Checks that:
/// 1. The manifest is properly signed (or unsigned during migration)
/// 2. The author was in the previous manifest's shares (authorized to make changes)
/// 3. The author has write permission (Owner role)
///
/// This is used for chain validation where we don't yet know if the receiver
/// is in the final shares.
///
/// # Arguments
///
/// * `manifest` - The manifest to verify
/// * `previous` - The previous manifest in the chain (if any). The author must
///   have been in this manifest's shares to be authorized. Pass `None` for
///   genesis manifests.
fn verify_author(
    manifest: &Manifest,
    previous: Option<&Manifest>,
) -> Result<ProvenanceResult, ProvenanceError> {
    // 1. Check manifest is signed
    if !manifest.is_signed() {
        // Allow unsigned for backwards compatibility during migration
        tracing::warn!("Received unsigned manifest for bucket {}", manifest.id());
        return Ok(ProvenanceResult::UnsignedLegacy);
    }

    // 2. Verify signature
    match manifest.verify_signature() {
        Ok(true) => {}
        Ok(false) => return Err(ProvenanceError::InvalidSignature),
        Err(e) => {
            tracing::warn!("Signature verification error: {}", e);
            return Err(ProvenanceError::InvalidSignature);
        }
    }

    // 3. Check author was authorized in PREVIOUS manifest (not current!)
    //    An attacker could add themselves to the current manifest's shares.
    //    The author must have been in the previous manifest to make this update.
    let author = manifest.author().expect("is_signed() was true");
    let author_hex = author.to_hex();

    let check_shares = previous
        .map(|p| p.shares())
        .unwrap_or_else(|| manifest.shares());

    let author_share = check_shares
        .get(&author_hex)
        .ok_or(ProvenanceError::AuthorNotInShares)?;

    // 4. Check author has write permission (Owner role)
    if *author_share.role() != PrincipalRole::Owner {
        return Err(ProvenanceError::AuthorNotWriter);
    }

    // 5. If shares were removed, verify the author was an owner in the previous manifest
    if let Some(prev) = previous {
        let prev_keys: std::collections::HashSet<&String> = prev.shares().keys().collect();
        let current_keys: std::collections::HashSet<&String> = manifest.shares().keys().collect();

        let removed_keys: Vec<&&String> = prev_keys.difference(&current_keys).collect();

        if !removed_keys.is_empty() {
            // Author must have been an owner in the previous manifest to remove shares
            let author_prev_share = prev
                .shares()
                .get(&author_hex)
                .ok_or(ProvenanceError::UnauthorizedShareRemoval)?;
            if *author_prev_share.role() != PrincipalRole::Owner {
                return Err(ProvenanceError::UnauthorizedShareRemoval);
            }
        }
    }

    tracing::debug!(
        "Author verified: bucket={}, author={}",
        manifest.id(),
        author_hex
    );

    Ok(ProvenanceResult::Valid)
}

/// Verify a manifest's full provenance including receiver authorization.
///
/// Checks that:
/// 1. Our key is in the manifest's shares (we're authorized to receive it)
/// 2. The manifest is properly signed (or unsigned during migration)
/// 3. The author was in the previous manifest's shares (authorized to make changes)
/// 4. The author has write permission (Owner role)
///
/// # Arguments
///
/// * `peer` - The peer instance (for our public key)
/// * `manifest` - The manifest to verify
/// * `previous` - The previous manifest in the chain (if any). The author must
///   have been in this manifest's shares to be authorized. Pass `None` for
///   genesis manifests or when using a locally-synced manifest as trusted base.
fn verify_provenance<L>(
    peer: &Peer<L>,
    manifest: &Manifest,
    previous: Option<&Manifest>,
) -> Result<ProvenanceResult, ProvenanceError>
where
    L: BucketLogProvider + Clone + Send + Sync + 'static,
    L::Error: std::error::Error + Send + Sync + 'static,
{
    let our_pub_key = PublicKey::from(*peer.secret().public());
    let our_key_hex = our_pub_key.to_hex();

    // 1. Check we're authorized to receive this bucket
    let we_are_authorized = manifest
        .shares()
        .iter()
        .any(|(key_hex, _)| key_hex == &our_key_hex);

    if !we_are_authorized {
        return Ok(ProvenanceResult::NotAuthorized);
    }

    // 2. Verify author (signature + role check)
    let result = verify_author(manifest, previous)?;

    // Log success with receiver info
    if manifest.is_signed() {
        let author = manifest.author().expect("is_signed() was true");
        tracing::debug!(
            "Provenance valid: bucket={}, author={}, our_key={}",
            manifest.id(),
            author.to_hex(),
            our_key_hex
        );
    }

    Ok(result)
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::crypto::{SecretKey, SecretShare};
    use crate::mount::Share;

    fn create_test_manifest(owner: &SecretKey) -> Manifest {
        let share = SecretShare::default();
        Manifest::new(
            uuid::Uuid::new_v4(),
            "test".to_string(),
            owner.public(),
            share,
            Link::default(),
            Link::default(),
            0,
        )
    }

    #[test]
    fn test_verify_author_valid_owner() {
        let owner = SecretKey::generate();
        let mut manifest = create_test_manifest(&owner);
        manifest.sign(&owner).unwrap();

        let result = verify_author(&manifest, None).unwrap();
        assert_eq!(result, ProvenanceResult::Valid);
    }

    #[test]
    fn test_verify_author_rejects_non_writer() {
        let owner = SecretKey::generate();
        let mirror = SecretKey::generate();

        let mut manifest = create_test_manifest(&owner);
        manifest.add_share(Share::new_mirror(mirror.public()));
        manifest.sign(&mirror).unwrap();

        let result = verify_author(&manifest, None);
        assert!(matches!(result, Err(ProvenanceError::AuthorNotWriter)));
    }

    #[test]
    fn test_verify_author_rejects_unknown_signer() {
        let owner = SecretKey::generate();
        let attacker = SecretKey::generate();

        let mut manifest = create_test_manifest(&owner);
        manifest.sign(&attacker).unwrap();

        let result = verify_author(&manifest, None);
        assert!(matches!(result, Err(ProvenanceError::AuthorNotInShares)));
    }

    #[test]
    fn test_verify_author_accepts_unsigned_legacy() {
        let owner = SecretKey::generate();
        let manifest = create_test_manifest(&owner);
        // Not signed

        let result = verify_author(&manifest, None).unwrap();
        assert_eq!(result, ProvenanceResult::UnsignedLegacy);
    }
}