1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
/*
*
* Hedera Rust SDK
*
* Copyright (C) 2022 - 2023 Hedera Hashgraph, LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
use backoff::backoff::Backoff;
use backoff::ExponentialBackoff;
use prost::Message;
use rand::thread_rng;
use time::OffsetDateTime;
use tokio::time::sleep;
use tonic::transport::Channel;
use crate::{
AccountId,
BoxGrpcFuture,
Client,
Error,
Status,
TransactionId,
ValidateChecksums,
};
pub(crate) trait Execute: ValidateChecksums {
type GrpcRequest: Clone + Message;
type GrpcResponse: Message;
/// Additional context returned from each call to `make_request`. Upon
/// a successful request, the associated response context is passed to
/// `make_response`.
type Context: Send;
type Response;
/// Get the _explicit_ nodes that this request will be submitted to.
fn node_account_ids(&self) -> Option<&[AccountId]>;
/// Get the _explicit_ transaction ID that this request will use.
fn transaction_id(&self) -> Option<TransactionId>;
/// Get whether to generate transaction IDs for request creation.
fn requires_transaction_id(&self) -> bool;
/// Check whether to retry an pre-check status.
fn should_retry_pre_check(&self, _status: Status) -> bool {
false
}
/// Check whether we should retry an otherwise successful response.
#[allow(unused_variables)]
fn should_retry(&self, response: &Self::GrpcResponse) -> bool {
false
}
/// Create a new request for execution.
///
/// A created request is cached per node until any request returns
/// `TransactionExpired`; in which case, the request cache is cleared.
///
fn make_request(
&self,
transaction_id: &Option<TransactionId>,
node_account_id: AccountId,
) -> crate::Result<(Self::GrpcRequest, Self::Context)>;
/// Execute the created GRPC request against the provided GRPC channel.
fn execute(
&self,
channel: Channel,
request: Self::GrpcRequest,
) -> BoxGrpcFuture<Self::GrpcResponse>;
/// Create a response from the GRPC response and the saved transaction
/// and node account ID from the successful request.
fn make_response(
&self,
response: Self::GrpcResponse,
context: Self::Context,
node_account_id: AccountId,
transaction_id: Option<TransactionId>,
) -> crate::Result<Self::Response>;
/// Create an error from the given pre-check status.
fn make_error_pre_check(
&self,
status: Status,
transaction_id: Option<TransactionId>,
) -> crate::Error;
/// Extract the pre-check status from the GRPC response.
fn response_pre_check_status(response: &Self::GrpcResponse) -> crate::Result<i32>;
}
pub(crate) async fn execute<E>(
client: &Client,
executable: &E,
timeout: impl Into<Option<std::time::Duration>> + Send,
) -> crate::Result<E::Response>
where
E: Execute + Sync,
{
let timeout: Option<std::time::Duration> = timeout.into();
let timeout = timeout.or_else(|| client.request_timeout()).unwrap_or_else(|| {
std::time::Duration::from_millis(backoff::default::MAX_ELAPSED_TIME_MILLIS)
});
// the overall timeout for the backoff starts measuring from here
let mut backoff =
ExponentialBackoff { max_elapsed_time: Some(timeout), ..ExponentialBackoff::default() };
let mut last_error: Option<Error> = None;
if client.auto_validate_checksums() {
if let Some(ledger_id) = &*client.ledger_id_internal() {
executable.validate_checksums(ledger_id)?;
} else {
return Err(Error::CannotPerformTaskWithoutLedgerId { task: "validate checksums" });
}
}
// TODO: cache requests to avoid signing a new request for every node in a delayed back-off
// if we need to generate a transaction ID for this request (and one was not provided),
// generate one now
let explicit_transaction_id = executable.transaction_id();
let mut transaction_id = match executable.requires_transaction_id() {
false => None,
true => match explicit_transaction_id {
Some(id) => Some(id),
None => client.generate_transaction_id().await,
},
};
// if we were explicitly given a list of nodes to use, we iterate through each
// of the given nodes (in a random order)
let explicit_node_indexes = executable
.node_account_ids()
.map(|ids| client.network().node_indexes_for_ids(ids))
.transpose()?;
let mut include_unhealthy = false;
// the outer loop continues until we timeout or reach the maximum number of "attempts"
// an attempt is counted when we have a successful response from a node that must either
// be retried immediately (on a new node) or retried after a backoff.
loop {
// if no explicit set of node account IDs, we randomly sample 1/3 of all
// healthy nodes on the client. this set of healthy nodes can change on
// each iteration
let healthy_node_indexes: Option<Vec<_>> = explicit_node_indexes
.is_none()
.then(|| client.network().healthy_node_indexes().collect());
let node_indexes =
explicit_node_indexes.as_deref().or(healthy_node_indexes.as_deref()).unwrap();
let node_sample_amount = if explicit_node_indexes.is_none() {
(node_indexes.len() + 2) / 3
} else {
node_indexes.len()
};
let node_index_indexes =
rand::seq::index::sample(&mut thread_rng(), node_indexes.len(), node_sample_amount);
for index in node_index_indexes.iter() {
// logic:
// if there are no explicit node indexes, all nodes we pick are healthy.
// if we're including unhealthy nodes, then it doesn't matter if it's healthy.
if explicit_node_indexes.is_some()
&& !include_unhealthy
&& !client.network().is_node_healthy(index, OffsetDateTime::now_utc())
{
continue;
}
let node_index = node_indexes[index];
let (node_account_id, channel) = client.network().channel(node_index);
let (request, context) = executable.make_request(&transaction_id, node_account_id)?;
let response = match executable.execute(channel, request).await {
Ok(response) => response.into_inner(),
Err(status) => {
match status.code() {
tonic::Code::Unavailable | tonic::Code::ResourceExhausted => {
// NOTE: this is an "unhealthy" node
client.network().mark_node_unhealthy(node_index);
// try the next node in our allowed list, immediately
last_error = Some(status.into());
continue;
}
_ => {
// fail immediately
return Err(status.into());
}
}
}
};
let pre_check_status = E::response_pre_check_status(&response)?;
match Status::from_i32(pre_check_status) {
Some(status) => match status {
Status::Ok if executable.should_retry(&response) => {
last_error = Some(executable.make_error_pre_check(status, transaction_id));
break;
}
Status::Ok => {
return executable.make_response(
response,
context,
node_account_id,
transaction_id,
);
}
Status::Busy | Status::PlatformNotActive => {
// NOTE: this is a "busy" node
// try the next node in our allowed list, immediately
last_error = Some(executable.make_error_pre_check(status, transaction_id));
continue;
}
Status::TransactionExpired if explicit_transaction_id.is_none() => {
// the transaction that was generated has since expired
// re-generate the transaction ID and try again, immediately
last_error = Some(executable.make_error_pre_check(status, transaction_id));
transaction_id = client.generate_transaction_id().await;
continue;
}
_ if executable.should_retry_pre_check(status) => {
// conditional retry on pre-check should back-off and try again
last_error = Some(executable.make_error_pre_check(status, transaction_id));
break;
}
_ => {
// any other pre-check is an error that the user needs to fix, fail immediately
return Err(executable.make_error_pre_check(status, transaction_id));
}
},
None => {
// not sure how to proceed, fail immediately
return Err(Error::ResponseStatusUnrecognized(pre_check_status));
}
}
}
// we tried each node, suspend execution until the next backoff interval
if let Some(duration) = backoff.next_backoff() {
sleep(duration).await;
} else {
// maximum time allowed has elapsed
// NOTE: it should be impossible to reach here without capturing at least one error
return Err(Error::TimedOut(last_error.unwrap().into()));
}
// only ever include unhealthy nodes if we have explicit nodes.
include_unhealthy = explicit_node_indexes.is_some();
}
}