triton_distributed/
discovery.rs

1// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16use crate::{transports::etcd, Result};
17
18pub use etcd::Lease;
19
20pub struct DiscoveryClient {
21    namespace: String,
22    etcd_client: etcd::Client,
23}
24
25impl DiscoveryClient {
26    /// Create a new [`DiscoveryClient`]
27    ///
28    /// This will establish a connection to the etcd server, create a primary lease,
29    /// and spawn a task to keep the lease alive and tie the lifetime of the [`Runtime`]
30    /// to the lease.
31    ///
32    /// If the lease expires, the [`Runtime`] will be shutdown.
33    /// If the [`Runtime`] is shutdown, the lease will be revoked.
34    pub(crate) fn new(namespace: String, etcd_client: etcd::Client) -> Self {
35        DiscoveryClient {
36            namespace,
37            etcd_client,
38        }
39    }
40
41    /// Get the primary lease ID
42    pub fn primary_lease_id(&self) -> i64 {
43        self.etcd_client.lease_id()
44    }
45
46    /// Create a [`Lease`] with a given time-to-live (TTL).
47    /// This [`Lease`] will be tied to the [`Runtime`], but has its own independent [`crate::CancellationToken`].
48    pub async fn create_lease(&self, ttl: i64) -> Result<Lease> {
49        self.etcd_client.create_lease(ttl).await
50    }
51
52    // the following two commented out codes are not implemented, but are placeholders for proposed ectd usage patterns
53
54    // /// Create an ephemeral key/value pair tied to a lease_id.
55    // /// This is an atomic create. If the key already exists, this will fail.
56    // /// The [`etcd_client::KeyValue`] will be removed when the lease expires or is revoked.
57    // pub async fn create_ephemerial_key(&self, key: &str, value: &str, lease_id: i64) -> Result<()> {
58    //     // self.etcd_client.create_ephemeral_key(key, value, lease_id).await
59    //     unimplemented!()
60    // }
61
62    // /// Create a shared [`etcd_client::KeyValue`] which behaves similar to a C++ `std::shared_ptr` or a
63    // /// Rust [std::sync::Arc]. Instead of having one owner of the lease, multiple owners participate in
64    // /// maintaining the lease. In this manner, when the last member of the group sharing the lease is gone,
65    // /// the lease will be expired.
66    // ///
67    // /// Implementation notes: At the time of writing, it is unclear if we have atomics that control leases,
68    // /// so in our initial implementation, the last member of the group will not revoke the lease, so the object
69    // /// will live for upto the TTL after the last member is gone.
70    // ///
71    // /// Notes
72    // /// -----
73    // ///
74    // /// - Multiple members sharing the lease and contributing to the heartbeat might cause some overheads.
75    // ///   The implementation will try to randomize the heartbeat intervals to avoid thundering herd problem,
76    // ///   and with any luck, the heartbeat watchers will be able to detect when if a external member triggered
77    // ///   the heartbeat checking this interval and skip unnecessary heartbeat messages.
78    // ///
79    // /// A new lease will be created for this object. If you wish to add an object to a shared group s
80    // ///
81    // /// The [`etcd_client::KeyValue`] will be removed when the lease expires or is revoked.
82    // pub async fn create_shared_key(&self, key: &str, value: &str, lease_id: i64) -> Result<()> {
83    //     // self.etcd_client.create_ephemeral_key(key, value, lease_id).await
84    //     unimplemented!()
85    // }
86}