1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
//! # Control Interface Client
//!
//! This library provides a client API for consuming the wasmCloud control interface over a
//! NATS connection. This library can be used by multiple types of tools, and is also used
//! by the control interface capability provider and the wash CLI
use std::fmt::Debug;
use std::marker::PhantomData;
use std::{collections::HashMap, time::Duration};

use cloudevents::event::Event;
use serde::{de::DeserializeOwned, Deserialize, Serialize};
use sub_stream::collect_timeout;
use tokio::sync::mpsc::Receiver;
use tracing::{debug, error, instrument, trace};
use tracing_futures::Instrument;

mod broker;
pub mod kv;
mod otel;
mod sub_stream;
mod types;

use kv::{CachedKvStore, DirectKvStore};
pub use types::*;

use crate::kv::KvStore;
use crate::otel::OtelHeaderInjector;

type Result<T> = ::std::result::Result<T, Box<dyn std::error::Error + Send + Sync>>;

/// Lattice control interface client
#[derive(Clone, Debug)]
pub struct Client<T: Clone + Debug> {
    nc: async_nats::Client,
    topic_prefix: Option<String>,
    pub lattice_prefix: String,
    timeout: Duration,
    auction_timeout: Duration,
    kvstore: T,
}

/// A client builder that can be used to fluently provide configuration settings used to construct
/// the control interface client
pub struct ClientBuilder<T> {
    nc: Option<async_nats::Client>,
    topic_prefix: Option<String>,
    lattice_prefix: String,
    timeout: Duration,
    auction_timeout: Duration,
    js_domain: Option<String>,
    store_placeholder: PhantomData<T>,
}

impl Default for ClientBuilder<DirectKvStore> {
    fn default() -> Self {
        Self {
            nc: None,
            topic_prefix: None,
            lattice_prefix: "default".to_string(),
            timeout: Duration::from_secs(2),
            auction_timeout: Duration::from_secs(5),
            js_domain: None,
            store_placeholder: PhantomData,
        }
    }
}

impl ClientBuilder<DirectKvStore> {
    /// Creates a new client builder using the given client
    pub fn new(nc: async_nats::Client) -> ClientBuilder<DirectKvStore> {
        ClientBuilder {
            nc: Some(nc),
            ..Default::default()
        }
    }
}

impl<T> ClientBuilder<T> {
    /// Sets the topic prefix for the NATS topic used for all control requests. Not to be confused with lattice ID/prefix
    pub fn topic_prefix(self, prefix: impl Into<String>) -> ClientBuilder<T> {
        ClientBuilder {
            topic_prefix: Some(prefix.into()),
            ..self
        }
    }

    /// The lattice ID/prefix used for this client. If this function is not invoked, the prefix will be set to `default`
    pub fn lattice_prefix(self, prefix: impl Into<String>) -> ClientBuilder<T> {
        ClientBuilder {
            lattice_prefix: prefix.into(),
            ..self
        }
    }

    /// Sets the timeout for standard calls and RPC invocations used by the client. If not set, the default will be 2 seconds
    pub fn rpc_timeout(self, timeout: Duration) -> ClientBuilder<T> {
        ClientBuilder { timeout, ..self }
    }

    /// Sets the timeout for auction (scatter/gather) operations. If not set, the default will be 5 seconds
    pub fn auction_timeout(self, timeout: Duration) -> ClientBuilder<T> {
        ClientBuilder {
            auction_timeout: timeout,
            ..self
        }
    }

    /// Sets the JetStream domain for this client, which can be critical for locating the right key-value bucket
    /// for lattice metadata storage. If this is skipped, then the JS domain will be `None`
    pub fn js_domain(self, domain: impl Into<String>) -> ClientBuilder<T> {
        ClientBuilder {
            js_domain: Some(domain.into()),
            ..self
        }
    }

    /// Tells the client to use caching for lattice metadata. This is useful for long running
    /// applications that want to consistently fetch lattice metadata. If this is not set, then
    /// every call to `get_links` or `get_claims` will result in a query to the lattice metadata
    /// bucket
    pub fn use_caching(self) -> ClientBuilder<CachedKvStore> {
        ClientBuilder {
            nc: self.nc,
            topic_prefix: self.topic_prefix,
            lattice_prefix: self.lattice_prefix,
            timeout: self.timeout,
            auction_timeout: self.auction_timeout,
            js_domain: self.js_domain,
            store_placeholder: PhantomData,
        }
    }
}

impl ClientBuilder<CachedKvStore> {
    /// Completes the generation of a control interface client. This function is async because it will attempt
    /// to locate and attach to a metadata key-value bucket (`LATTICEDATA_{prefix}`) when starting. If this bucket
    /// is not discovered during build time, all subsequent client calls will operate in "legacy" mode against the
    /// deprecated control interface topics
    pub async fn build(self) -> Result<Client<CachedKvStore>> {
        if let Some(nc) = self.nc {
            Ok(Client {
                nc: nc.clone(),
                topic_prefix: self.topic_prefix,
                lattice_prefix: self.lattice_prefix.clone(),
                timeout: self.timeout,
                auction_timeout: self.auction_timeout,
                kvstore: CachedKvStore::new(nc, &self.lattice_prefix, self.js_domain).await?,
            })
        } else {
            Err("Cannot create a control interface client without a NATS client".into())
        }
    }
}

impl ClientBuilder<DirectKvStore> {
    /// Completes the generation of a control interface client. This function is async because it will attempt
    /// to locate and attach to a metadata key-value bucket (`LATTICEDATA_{prefix}`) when starting. If this bucket
    /// is not discovered during build time, all subsequent client calls will operate in "legacy" mode against the
    /// deprecated control interface topics
    pub async fn build(self) -> Result<Client<DirectKvStore>> {
        if let Some(nc) = self.nc {
            Ok(Client {
                nc: nc.clone(),
                topic_prefix: self.topic_prefix,
                lattice_prefix: self.lattice_prefix.clone(),
                timeout: self.timeout,
                auction_timeout: self.auction_timeout,
                kvstore: DirectKvStore::new(nc, &self.lattice_prefix, self.js_domain).await?,
            })
        } else {
            Err("Cannot create a control interface client without a NATS client".into())
        }
    }
}

impl<T: KvStore + Clone + Debug + Send + Sync> Client<T> {
    #[instrument(level = "debug", skip_all)]
    pub(crate) async fn request_timeout(
        &self,
        subject: String,
        payload: Vec<u8>,
        timeout: Duration,
    ) -> Result<async_nats::Message> {
        match tokio::time::timeout(
            timeout,
            self.nc.request_with_headers(
                subject,
                OtelHeaderInjector::default_with_span().into(),
                payload.into(),
            ),
        )
        .await
        {
            Err(_) => Err(std::io::Error::new(std::io::ErrorKind::TimedOut, "timed out").into()),
            Ok(Ok(message)) => Ok(message),
            Ok(Err(e)) => Err(e.into()),
        }
    }

    /// Returns a handle to the underlying metadata client for use in advanced scenarios and queries
    pub fn lattice_metadata_client(&self) -> &T {
        &self.kvstore
    }

    /// Queries the lattice for all responsive hosts, waiting for the full period specified by
    /// _timeout_.
    #[instrument(level = "debug", skip_all)]
    pub async fn get_hosts(&self) -> Result<Vec<Host>> {
        let subject = broker::queries::hosts(&self.topic_prefix, &self.lattice_prefix);
        debug!("get_hosts:publish {}", &subject);
        self.publish_and_wait(subject, Vec::new()).await
    }

    /// Retrieves the contents of a running host
    #[instrument(level = "debug", skip_all)]
    pub async fn get_host_inventory(&self, host_id: &str) -> Result<HostInventory> {
        let subject =
            broker::queries::host_inventory(&self.topic_prefix, &self.lattice_prefix, host_id);
        debug!("get_host_inventory:request {}", &subject);
        match self.request_timeout(subject, vec![], self.timeout).await {
            Ok(msg) => {
                let hi: HostInventory = json_deserialize(&msg.payload)?;
                Ok(hi)
            }
            Err(e) => Err(format!("Did not receive host inventory from target host: {}", e).into()),
        }
    }

    /// Retrieves the full set of all cached claims in the lattice.   
    #[instrument(level = "debug", skip_all)]
    pub async fn get_claims(&self) -> Result<Vec<HashMap<String, String>>> {
        self.kvstore.get_all_claims().await
    }

    /// Performs an actor auction within the lattice, publishing a set of constraints and the
    /// metadata for the actor in question. This will always wait for the full period specified by
    /// _duration_, and then return the set of gathered results. It is then up to the client to
    /// choose from among the "auction winners" to issue the appropriate command to start an actor.
    /// Clients cannot assume that auctions will always return at least one result.
    #[instrument(level = "debug", skip_all)]
    pub async fn perform_actor_auction(
        &self,
        actor_ref: &str,
        constraints: HashMap<String, String>,
    ) -> Result<Vec<ActorAuctionAck>> {
        let subject = broker::actor_auction_subject(&self.topic_prefix, &self.lattice_prefix);
        let bytes = json_serialize(ActorAuctionRequest {
            actor_ref: actor_ref.to_string(),
            constraints,
        })?;
        debug!("actor_auction:publish {}", &subject);
        self.publish_and_wait(subject, bytes).await
    }

    /// Performs a provider auction within the lattice, publishing a set of constraints and the
    /// metadata for the provider in question. This will always wait for the full period specified
    /// by _duration_, and then return the set of gathered results. It is then up to the client to
    /// choose from among the "auction winners" and issue the appropriate command to start a
    /// provider. Clients cannot assume that auctions will always return at least one result.
    #[instrument(level = "debug", skip_all)]
    pub async fn perform_provider_auction(
        &self,
        provider_ref: &str,
        link_name: &str,
        constraints: HashMap<String, String>,
    ) -> Result<Vec<ProviderAuctionAck>> {
        let subject = broker::provider_auction_subject(&self.topic_prefix, &self.lattice_prefix);
        let bytes = json_serialize(ProviderAuctionRequest {
            provider_ref: provider_ref.to_string(),
            link_name: link_name.to_string(),
            constraints,
        })?;
        debug!("provider_auction:publish {}", &subject);
        self.publish_and_wait(subject, bytes).await
    }

    /// Sends a request to the given host to start a given actor by its OCI reference. This returns
    /// an acknowledgement of _receipt_ of the command, not a confirmation that the actor started.
    /// An acknowledgement will either indicate some form of validation failure, or, if no failure
    /// occurs, the receipt of the command. To avoid blocking consumers, wasmCloud hosts will
    /// acknowledge the start actor command prior to fetching the actor's OCI bytes. If a client
    /// needs deterministic results as to whether the actor completed its startup process, the
    /// client will have to monitor the appropriate event in the control event stream
    #[instrument(level = "debug", skip_all)]
    pub async fn start_actor(
        &self,
        host_id: &str,
        actor_ref: &str,
        count: u16,
        annotations: Option<HashMap<String, String>>,
    ) -> Result<CtlOperationAck> {
        let subject =
            broker::commands::start_actor(&self.topic_prefix, &self.lattice_prefix, host_id);
        debug!("start_actor:request {}", &subject);
        let bytes = json_serialize(StartActorCommand {
            count,
            actor_ref: actor_ref.to_string(),
            host_id: host_id.to_string(),
            annotations,
        })?;
        match self.request_timeout(subject, bytes, self.timeout).await {
            Ok(msg) => {
                let ack: CtlOperationAck = json_deserialize(&msg.payload)?;
                Ok(ack)
            }
            Err(e) => Err(format!("Did not receive start actor acknowledgement: {}", e).into()),
        }
    }

    /// Sends a request to the given host to scale a given actor. This returns an acknowledgement of
    /// _receipt_ of the command, not a confirmation that the actor scaled. An acknowledgement will
    /// either indicate some form of validation failure, or, if no failure occurs, the receipt of
    /// the command. To avoid blocking consumers, wasmCloud hosts will acknowledge the scale actor
    /// command prior to fetching the actor's OCI bytes. If a client needs deterministic results as
    /// to whether the actor completed its startup process, the client will have to monitor the
    /// appropriate event in the control event stream
    #[instrument(level = "debug", skip_all)]
    pub async fn scale_actor(
        &self,
        host_id: &str,
        actor_ref: &str,
        actor_id: &str,
        count: u16,
        annotations: Option<HashMap<String, String>>,
    ) -> Result<CtlOperationAck> {
        let subject =
            broker::commands::scale_actor(&self.topic_prefix, &self.lattice_prefix, host_id);
        debug!("scale_actor:request {}", &subject);
        let bytes = json_serialize(ScaleActorCommand {
            count,
            actor_ref: actor_ref.to_string(),
            host_id: host_id.to_string(),
            actor_id: actor_id.to_string(),
            annotations,
        })?;
        match self.request_timeout(subject, bytes, self.timeout).await {
            Ok(msg) => {
                let ack: CtlOperationAck = json_deserialize(&msg.payload)?;
                Ok(ack)
            }
            Err(e) => Err(format!("Did not receive scale actor acknowledgement: {}", e).into()),
        }
    }

    /// Publishes a registry credential map to the control interface of the lattice. All hosts will
    /// be listening and all will overwrite their registry credential map with the new information.
    /// It is highly recommended you use TLS connections with NATS and isolate the control interface
    /// credentials when using this function in production as the data contains secrets
    #[instrument(level = "debug", skip_all)]
    pub async fn put_registries(&self, registries: RegistryCredentialMap) -> Result<()> {
        let subject = broker::publish_registries(&self.topic_prefix, &self.lattice_prefix);
        debug!("put_registries:publish {}", &subject);
        let bytes = json_serialize(&registries)?;
        let resp = self
            .nc
            .publish_with_headers(
                subject,
                OtelHeaderInjector::default_with_span().into(),
                bytes.into(),
            )
            .await;
        if let Err(e) = resp {
            Err(format!("Failed to push registry credential map: {}", e).into())
        } else {
            Ok(())
        }
    }

    /// Puts a link into the lattice metadata keyvalue bucket. Returns an error if it was unable to
    /// put the link
    #[instrument(level = "debug", skip_all)]
    pub async fn advertise_link(
        &self,
        actor_id: &str,
        provider_id: &str,
        contract_id: &str,
        link_name: &str,
        values: HashMap<String, String>,
    ) -> Result<()> {
        self.kvstore
            .put_link(LinkDefinition {
                actor_id: actor_id.to_string(),
                provider_id: provider_id.to_string(),
                contract_id: contract_id.to_string(),
                link_name: link_name.to_string(),
                values,
            })
            .await
    }

    /// Removes a link from the lattice metadata keyvalue bucket. Returns an error if it was unable
    /// to delete. This is an idempotent operation.
    #[instrument(level = "debug", skip_all)]
    pub async fn remove_link(
        &self,
        actor_id: &str,
        contract_id: &str,
        link_name: &str,
    ) -> Result<()> {
        self.kvstore
            .delete_link(actor_id, contract_id, link_name)
            .await
    }

    /// Retrieves the list of link definitions stored in the lattice metadata key-value bucket. If
    /// the client was created with caching, this will return the cached list of links. Otherwise,
    /// it will query the bucket for the list of links.
    #[instrument(level = "debug", skip_all)]
    pub async fn query_links(&self) -> Result<Vec<LinkDefinition>> {
        self.kvstore.get_links().await
    }

    /// Issue a command to a host instructing that it replace an existing actor (indicated by its
    /// public key) with a new actor indicated by an OCI image reference. The host will acknowledge
    /// this request as soon as it verifies that the target actor is running. This acknowledgement
    /// occurs **before** the new bytes are downloaded. Live-updating an actor can take a long time
    /// and control clients cannot block waiting for a reply that could come several seconds later.
    /// If you need to verify that the actor has been updated, you will want to set up a listener
    /// for the appropriate **PublishedEvent** which will be published on the control events channel
    /// in JSON
    #[instrument(level = "debug", skip_all)]
    pub async fn update_actor(
        &self,
        host_id: &str,
        existing_actor_id: &str,
        new_actor_ref: &str,
        annotations: Option<HashMap<String, String>>,
    ) -> Result<CtlOperationAck> {
        let subject =
            broker::commands::update_actor(&self.topic_prefix, &self.lattice_prefix, host_id);
        debug!("update_actor:request {}", &subject);
        let bytes = json_serialize(UpdateActorCommand {
            host_id: host_id.to_string(),
            actor_id: existing_actor_id.to_string(),
            new_actor_ref: new_actor_ref.to_string(),
            annotations,
        })?;
        match self.request_timeout(subject, bytes, self.timeout).await {
            Ok(msg) => {
                let ack: CtlOperationAck = json_deserialize(&msg.payload)?;
                Ok(ack)
            }
            Err(e) => Err(format!("Did not receive update actor acknowledgement: {}", e).into()),
        }
    }

    /// Issues a command to a host to start a provider with a given OCI reference using the
    /// specified link name (or "default" if none is specified). The target wasmCloud host will
    /// acknowledge the receipt of this command _before_ downloading the provider's bytes from the
    /// OCI registry, indicating either a validation failure or success. If a client needs
    /// deterministic guarantees that the provider has completed its startup process, such a client
    /// needs to monitor the control event stream for the appropriate event. If a host ID is not
    /// supplied (empty string), then this function will return an early acknowledgement, go find a
    /// host, and then submit the start request to a target host.
    #[instrument(level = "debug", skip_all)]
    pub async fn start_provider(
        &self,
        host_id: &str,
        provider_ref: &str,
        link_name: Option<String>,
        annotations: Option<HashMap<String, String>>,
        provider_configuration: Option<String>,
    ) -> Result<CtlOperationAck> {
        let provider_ref = provider_ref.to_string();
        if !host_id.trim().is_empty() {
            start_provider_(
                &self.nc,
                &self.topic_prefix,
                &self.lattice_prefix,
                self.timeout,
                host_id,
                &provider_ref,
                link_name,
                annotations,
                provider_configuration,
            )
            .in_current_span()
            .await
        } else {
            // If a host isn't supplied, try to find one via auction.
            // If no host is found, return error.
            // If a host is found, start brackground request to start provider and return Ack
            let mut error = String::new();
            debug!("start_provider:deferred (no-host) request");
            let current_span = tracing::Span::current();
            let host = match self.get_hosts().await {
                Err(e) => {
                    error = format!("failed to query hosts for no-host provider start: {}", e);
                    None
                }
                Ok(hs) => hs.into_iter().next(),
            };
            if let Some(host) = host {
                let this = self.clone();
                tokio::spawn(async move {
                    let _ = start_provider_(
                        &this.nc,
                        &this.topic_prefix,
                        &this.lattice_prefix,
                        this.timeout,
                        &host.id,
                        &provider_ref,
                        link_name,
                        annotations,
                        provider_configuration,
                    )
                    .instrument(current_span)
                    .await;
                });
            } else if error.is_empty() {
                error = "No hosts detected in in no-host provider start.".to_string();
            }
            if !error.is_empty() {
                error!("{}", error);
            }
            Ok(CtlOperationAck {
                accepted: true,
                error,
            })
        }
    }

    /// Issues a command to a host to stop a provider for the given OCI reference, link name, and
    /// contract ID. The target wasmCloud host will acknowledge the receipt of this command, and
    /// _will not_ supply a discrete confirmation that a provider has terminated. For that kind of
    /// information, the client must also monitor the control event stream
    #[instrument(level = "debug", skip_all)]
    pub async fn stop_provider(
        &self,
        host_id: &str,
        provider_ref: &str,
        link_name: &str,
        contract_id: &str,
        annotations: Option<HashMap<String, String>>,
    ) -> Result<CtlOperationAck> {
        let subject =
            broker::commands::stop_provider(&self.topic_prefix, &self.lattice_prefix, host_id);
        debug!("stop_provider:request {}", &subject);
        let bytes = json_serialize(StopProviderCommand {
            host_id: host_id.to_string(),
            provider_ref: provider_ref.to_string(),
            link_name: link_name.to_string(),
            contract_id: contract_id.to_string(),
            annotations,
        })?;
        match self.request_timeout(subject, bytes, self.timeout).await {
            Ok(msg) => {
                let ack: CtlOperationAck = json_deserialize(&msg.payload)?;
                Ok(ack)
            }
            Err(e) => Err(format!("Did not receive stop provider acknowledgement: {}", e).into()),
        }
    }

    /// Issues a command to a host to stop an actor for the given OCI reference. The target
    /// wasmCloud host will acknowledge the receipt of this command, and _will not_ supply a
    /// discrete confirmation that the actor has terminated. For that kind of information, the
    /// client must also monitor the control event stream
    #[instrument(level = "debug", skip_all)]
    pub async fn stop_actor(
        &self,
        host_id: &str,
        actor_ref: &str,
        count: u16,
        annotations: Option<HashMap<String, String>>,
    ) -> Result<CtlOperationAck> {
        let subject =
            broker::commands::stop_actor(&self.topic_prefix, &self.lattice_prefix, host_id);
        debug!("stop_actor:request {}", &subject);
        let bytes = json_serialize(StopActorCommand {
            host_id: host_id.to_string(),
            actor_ref: actor_ref.to_string(),
            count,
            annotations,
        })?;
        match self.request_timeout(subject, bytes, self.timeout).await {
            Ok(msg) => {
                let ack: CtlOperationAck = json_deserialize(&msg.payload)?;
                Ok(ack)
            }
            Err(e) => Err(format!("Did not receive stop actor acknowledgement: {}", e).into()),
        }
    }

    /// Issues a command to a specific host to perform a graceful termination. The target host will
    /// acknowledge receipt of the command before it attempts a shutdown. To deterministically
    /// verify that the host is down, a client should monitor for the "host stopped" event or
    /// passively detect the host down by way of a lack of heartbeat receipts
    #[instrument(level = "debug", skip_all)]
    pub async fn stop_host(
        &self,
        host_id: &str,
        timeout_ms: Option<u64>,
    ) -> Result<CtlOperationAck> {
        let subject =
            broker::commands::stop_host(&self.topic_prefix, &self.lattice_prefix, host_id);
        debug!("stop_host:request {}", &subject);
        let bytes = json_serialize(StopHostCommand {
            host_id: host_id.to_owned(),
            timeout: timeout_ms,
        })?;

        match self.request_timeout(subject, bytes, self.timeout).await {
            Ok(msg) => {
                let ack: CtlOperationAck = json_deserialize(&msg.payload)?;
                Ok(ack)
            }
            Err(e) => Err(format!("Did not receive stop host acknowledgement: {}", e).into()),
        }
    }

    async fn publish_and_wait<D: DeserializeOwned>(
        &self,
        subject: String,
        payload: Vec<u8>,
    ) -> Result<Vec<D>> {
        let reply = self.nc.new_inbox();
        let sub = self.nc.subscribe(reply.clone()).await?;
        self.nc
            .publish_with_reply_and_headers(
                subject.clone(),
                reply,
                OtelHeaderInjector::default_with_span().into(),
                payload.into(),
            )
            .await?;
        let nc = self.nc.clone();
        tokio::spawn(async move {
            if let Err(error) = nc.flush().await {
                error!(%error, "flush after publish");
            }
        });
        Ok(collect_timeout::<D>(sub, self.auction_timeout, subject.as_str()).await)
    }

    /// Returns the receiver end of a channel that subscribes to the lattice control event stream.
    /// Any [`Event`](struct@Event)s that are published after this channel is created
    /// will be added to the receiver channel's buffer, which can be observed or handled if needed.
    /// See the example for how you could use this receiver to handle events.
    ///
    /// # Example
    /// ```rust
    /// use wasmcloud_control_interface::{Client, ClientBuilder};
    /// async {
    ///   let nc = async_nats::connect("127.0.0.1:4222").await.unwrap();
    ///   let client = ClientBuilder::new(nc)
    ///                 .rpc_timeout(std::time::Duration::from_millis(1000))
    ///                 .auction_timeout(std::time::Duration::from_millis(1000))
    ///                 .build().await.unwrap();
    ///   let mut receiver = client.events_receiver().await.unwrap();
    ///   tokio::spawn( async move {
    ///       while let Some(evt) = receiver.recv().await {
    ///           println!("Event received: {:?}", evt);
    ///       }
    ///   });
    ///   // perform other operations on client
    ///   client.get_host_inventory("NAEXHW...").await.unwrap();
    /// };
    /// ```
    ///
    /// Once you're finished with the event receiver, be sure to call `drop` with the receiver
    /// as an argument. This closes the channel and will prevent the sender from endlessly
    /// sending messages into the channel buffer.
    ///
    /// # Example
    /// ```rust
    /// use wasmcloud_control_interface::{Client, ClientBuilder};
    /// async {
    ///   let nc = async_nats::connect("0.0.0.0:4222").await.unwrap();
    ///   let client = ClientBuilder::new(nc)
    ///                 .rpc_timeout(std::time::Duration::from_millis(1000))
    ///                 .auction_timeout(std::time::Duration::from_millis(1000))
    ///                 .build().await.unwrap();    
    ///   let mut receiver = client.events_receiver().await.unwrap();
    ///   // read the docs for flume receiver. You can use it in either sync or async code
    ///   // The receiver can be cloned() as needed.
    ///   // If you drop the receiver. The subscriber will exit
    ///   // If the nats connection ic closed, the loop below will exit.
    ///   while let Some(evt) = receiver.recv().await {
    ///       println!("Event received: {:?}", evt);
    ///   }
    /// };
    /// ```
    pub async fn events_receiver(&self) -> Result<Receiver<Event>> {
        use futures::StreamExt as _;
        let (sender, receiver) = tokio::sync::mpsc::channel(5000);
        let mut sub = self
            .nc
            .subscribe(broker::control_event(&self.lattice_prefix))
            .await?;
        tokio::spawn(async move {
            while let Some(msg) = sub.next().await {
                let evt = match json_deserialize::<Event>(&msg.payload) {
                    Ok(evt) => evt,
                    Err(_) => {
                        error!("Object received on event stream was not a CloudEvent");
                        continue;
                    }
                };
                trace!("received event: {:?}", evt);
                // If the channel is disconnected, stop sending events
                if sender.send(evt).await.is_err() {
                    let _ = sub.unsubscribe().await;
                    break;
                }
            }
        });
        Ok(receiver)
    }
}

// [ss]: renamed to json_serialize and json_deserialize to avoid confusion
//   with msgpack serialize and deserialize, used for rpc messages.
//
/// The standard function for serializing codec structs into a format that can be
/// used for message exchange between actor and host. Use of any other function to
/// serialize could result in breaking incompatibilities.
pub fn json_serialize<T>(
    item: T,
) -> ::std::result::Result<Vec<u8>, Box<dyn std::error::Error + Send + Sync>>
where
    T: Serialize,
{
    serde_json::to_vec(&item).map_err(|e| format!("JSON serialization failure: {}", e).into())
}

/// The standard function for de-serializing codec structs from a format suitable
/// for message exchange between actor and host. Use of any other function to
/// deserialize could result in breaking incompatibilities.
pub fn json_deserialize<'de, T: Deserialize<'de>>(
    buf: &'de [u8],
) -> ::std::result::Result<T, Box<dyn std::error::Error + Send + Sync>> {
    serde_json::from_slice(buf).map_err(|e| {
        {
            std::io::Error::new(
                std::io::ErrorKind::Other,
                format!("JSON deserialization failure: {}", e),
            )
        }
        .into()
    })
}

// "selfless" helper function that submits a start provider request to a host
#[allow(clippy::too_many_arguments)]
async fn start_provider_(
    client: &async_nats::Client,
    topic_prefix: &Option<String>,
    lattice_prefix: &str,
    timeout: Duration,
    host_id: &str,
    provider_ref: &str,
    link_name: Option<String>,
    annotations: Option<HashMap<String, String>>,
    provider_configuration: Option<String>,
) -> Result<CtlOperationAck> {
    let subject = broker::commands::start_provider(topic_prefix, lattice_prefix, host_id);
    debug!("start_provider:request {}", &subject);
    let bytes = json_serialize(StartProviderCommand {
        host_id: host_id.to_string(),
        provider_ref: provider_ref.to_string(),
        link_name: link_name.unwrap_or_else(|| "default".to_string()),
        annotations,
        configuration: provider_configuration,
    })?;
    match tokio::time::timeout(
        timeout,
        client.request_with_headers(
            subject,
            OtelHeaderInjector::default_with_span().into(),
            bytes.into(),
        ),
    )
    .await
    {
        Err(e) => Err(format!("Did not receive start provider acknowledgement: {}", e).into()),
        Ok(Err(e)) => Err(format!("Error sending or receiving message: {}", e).into()),
        Ok(Ok(msg)) => {
            let ack: CtlOperationAck = json_deserialize(&msg.payload)?;
            Ok(ack)
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::time::Duration;

    /// Note: This test is a means of manually watching the event stream as CloudEvents are received
    /// It does not assert functionality, and so we've marked it as ignore to ensure it's not run by default
    /// It currently listens for 120 seconds then exits
    #[tokio::test]
    #[ignore]
    async fn test_events_receiver() {
        let nc = async_nats::connect("127.0.0.1:4222").await.unwrap();
        let client = ClientBuilder::new(nc)
            .rpc_timeout(Duration::from_millis(1000))
            .auction_timeout(Duration::from_millis(1000))
            .build()
            .await
            .unwrap();
        let mut receiver = client.events_receiver().await.unwrap();
        tokio::spawn(async move {
            while let Some(evt) = receiver.recv().await {
                println!("Event received: {:?}", evt);
            }
        });
        println!("Listening to Cloud Events for 120 seconds. Then we will quit.");
        tokio::time::sleep(std::time::Duration::from_secs(120)).await;
    }
}