domain 0.12.0

A DNS library for Rust.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
//! A transport that tries to distribute requests over multiple upstreams.
//!
//! It is assumed that the upstreams have similar performance. use the
//! [super::redundant] transport to forward requests to the best upstream out of
//! upstreams that may have quite different performance.
//!
//! Basic mode of operation
//!
//! Associated with every upstream configured is optionally a burst length
//! and burst interval. Burst length deviced by burst interval gives a
//! queries per second (QPS) value. This be use to limit the rate and
//! especially the bursts that reach upstream servers. Once the burst
//! length has been reach, the upstream receives no new requests until
//! the burst interval has completed.
//!
//! For each upstream the object maintains an estimated response time.
//! with the configuration value slow_rt_factor, the group of upstream
//! that have not exceeded their burst length are divided into a 'fast'
//! and a 'slow' group. The slow group are those upstream that have an
//! estimated response time that is higher than slow_rt_factor times the
//! lowest estimated response time. Slow upstream are considered only when
//! all fast upstream failed to provide a suitable response.
//!
//! Within the group of fast upstreams, the ones with the lower queue
//! length are preferred. This tries to give each of the fast upstreams
//! an equal number of outstanding requests.
//!
//! Within a group of fast upstreams with the same queue length, the
//! one with the lowest estimated response time is preferred.
//!
//! Probing
//!
//! Upstream with high estimated response times may be get any traffic and
//! therefore the estimated response time may remain high. Probing is
//! intended to solve that problem. Using a random number generator,
//! occasionally an upstream is selected for probing. If the selected
//! upstream currently has a non-zero queue then probing is not needed and
//! no probe will happen.
//! Otherwise, the upstream to be probed is selected first with an
//! estimated response time equal to the lowest one. If the probed upstream
//! does not provide a response within that time, the otherwise best upstream
//! also gets the request. If the probes upstream provides a suitable response
//! before the next upstream then its estimated will be updated.

use crate::base::iana::OptRcode;
use crate::base::iana::Rcode;
use crate::base::opt::AllOptData;
use crate::base::Message;
use crate::base::MessageBuilder;
use crate::base::StaticCompressor;
use crate::dep::octseq::OctetsInto;
use crate::net::client::request::ComposeRequest;
use crate::net::client::request::{Error, GetResponse, SendRequest};
use crate::utils::config::DefMinMax;
use bytes::Bytes;
use futures_util::stream::FuturesUnordered;
use futures_util::StreamExt;
use octseq::Octets;
use rand::{random, random_range};
use std::boxed::Box;
use std::cmp::Ordering;
use std::fmt::{Debug, Formatter};
use std::future::Future;
use std::pin::Pin;
use std::string::String;
use std::string::ToString;
use std::sync::Arc;
use std::vec::Vec;
use tokio::sync::{mpsc, oneshot};
use tokio::time::{sleep_until, Duration, Instant};

/*
Basic algorithm:
- try to distribute requests over all upstreams subject to some limitations.
- limit bursts
  - record the start of a burst interval when a request goes out over an
    upstream
  - record the number of requests since the start of the burst interval
  - in the burst is larger than the maximum configured by the user then the
    upstream is no longer available.
  - start a new burst interval when enough time has passed.
- prefer fast upstreams over slow upstreams
  - maintain a response time estimate for each upstream
  - upstreams with an estimate response time larger than slow_rt_factor
    times the lowest estimated response time are consider slow.
  - 'fast' upstreams are preferred over slow upstream. However slow upstreams
    are considered if during a single request all fast upstreams fail.
- prefer fast upstream with a low queue length
  - maintain a counter with the number of current outstanding requests on an
    upstream.
  - prefer the upstream with the lowest count.
  - preset the upstream with the lowest estimated response time in case
    two or more upstreams have the same count.

Execution:
- set a timer to the expect response time.
- if the timer expires before reply arrives, send the query to the next lowest
  and set a timer
- when a reply arrives update the expected response time for the relevant
  upstream and for the ones that failed.

Probing:
- upstream that currently have outstanding requests do not need to be
  probed.
- for idle upstream, based on a random number generator:
  - pick a different upstream rather then the best
  - but set the timer to the expected response time of the best.
  - maybe we need a configuration parameter for the amound of head start
    given to the probed upstream.
*/

/// Capacity of the channel that transports [ChanReq].
const DEF_CHAN_CAP: usize = 8;

/// Time in milliseconds for the initial response time estimate.
const DEFAULT_RT_MS: u64 = 300;

/// The initial response time estimate for unused connections.
const DEFAULT_RT: Duration = Duration::from_millis(DEFAULT_RT_MS);

/// Maintain a moving average for the measured response time and the
/// square of that. The window is SMOOTH_N.
const SMOOTH_N: f64 = 8.;

/// Chance to probe a worse connection.
const PROBE_P: f64 = 0.05;

//------------ Configuration Constants ----------------------------------------

/// Cut off for slow upstreams.
const DEF_SLOW_RT_FACTOR: f64 = 5.0;

/// Minimum value for the cut off factor.
const MIN_SLOW_RT_FACTOR: f64 = 1.0;

/// Interval for limiting upstream query bursts.
const BURST_INTERVAL: DefMinMax<Duration> = DefMinMax::new(
    Duration::from_secs(1),
    Duration::from_millis(1),
    Duration::from_secs(3600),
);

//------------ Config ---------------------------------------------------------

/// User configuration variables.
#[derive(Clone, Copy, Debug)]
pub struct Config {
    /// Defer transport errors.
    defer_transport_error: bool,

    /// Defer replies that report Refused.
    defer_refused: bool,

    /// Defer replies that report ServFail.
    defer_servfail: bool,

    /// Cut-off for slow upstreams as a factor of the fastest upstream.
    slow_rt_factor: f64,
}

impl Config {
    /// Return the value of the defer_transport_error configuration variable.
    pub fn defer_transport_error(&self) -> bool {
        self.defer_transport_error
    }

    /// Set the value of the defer_transport_error configuration variable.
    pub fn set_defer_transport_error(&mut self, value: bool) {
        self.defer_transport_error = value
    }

    /// Return the value of the defer_refused configuration variable.
    pub fn defer_refused(&self) -> bool {
        self.defer_refused
    }

    /// Set the value of the defer_refused configuration variable.
    pub fn set_defer_refused(&mut self, value: bool) {
        self.defer_refused = value
    }

    /// Return the value of the defer_servfail configuration variable.
    pub fn defer_servfail(&self) -> bool {
        self.defer_servfail
    }

    /// Set the value of the defer_servfail configuration variable.
    pub fn set_defer_servfail(&mut self, value: bool) {
        self.defer_servfail = value
    }

    /// Set the value of the slow_rt_factor configuration variable.
    pub fn slow_rt_factor(&self) -> f64 {
        self.slow_rt_factor
    }

    /// Set the value of the slow_rt_factor configuration variable.
    pub fn set_slow_rt_factor(&mut self, mut value: f64) {
        if value < MIN_SLOW_RT_FACTOR {
            value = MIN_SLOW_RT_FACTOR
        };
        self.slow_rt_factor = value;
    }
}

impl Default for Config {
    fn default() -> Self {
        Self {
            defer_transport_error: Default::default(),
            defer_refused: Default::default(),
            defer_servfail: Default::default(),
            slow_rt_factor: DEF_SLOW_RT_FACTOR,
        }
    }
}

//------------ ConnConfig -----------------------------------------------------

/// Configuration variables for each upstream.
#[derive(Clone, Copy, Debug, Default)]
pub struct ConnConfig {
    /// Maximum burst of upstream queries.
    max_burst: Option<u64>,

    /// Interval over which the burst is counted.
    burst_interval: Duration,
}

impl ConnConfig {
    /// Create a new ConnConfig object.
    pub fn new() -> Self {
        Self {
            max_burst: None,
            burst_interval: BURST_INTERVAL.default(),
        }
    }

    /// Return the current configuration value for the maximum burst.
    /// None means that there is no limit.
    pub fn max_burst(&mut self) -> Option<u64> {
        self.max_burst
    }

    /// Set the configuration value for the maximum burst.
    /// The value None means no limit.
    pub fn set_max_burst(&mut self, max_burst: Option<u64>) {
        self.max_burst = max_burst;
    }

    /// Return the current burst interval.
    pub fn burst_interval(&mut self) -> Duration {
        self.burst_interval
    }

    /// Set a new burst interval.
    ///
    /// The interval is silently limited to at least 1 millesecond and
    /// at most 1 hour.
    pub fn set_burst_interval(&mut self, burst_interval: Duration) {
        self.burst_interval = BURST_INTERVAL.limit(burst_interval);
    }
}

//------------ Connection -----------------------------------------------------

/// This type represents a transport connection.
#[derive(Debug)]
pub struct Connection<Req>
where
    Req: Send + Sync,
{
    /// User configuation.
    config: Config,

    /// To send a request to the runner.
    sender: mpsc::Sender<ChanReq<Req>>,
}

impl<Req: Clone + Debug + Send + Sync + 'static> Connection<Req> {
    /// Create a new connection.
    pub fn new() -> (Self, Transport<Req>) {
        Self::with_config(Default::default())
    }

    /// Create a new connection with a given config.
    pub fn with_config(config: Config) -> (Self, Transport<Req>) {
        let (sender, receiver) = mpsc::channel(DEF_CHAN_CAP);
        (Self { config, sender }, Transport::new(receiver))
    }

    /// Add a transport connection.
    pub async fn add(
        &self,
        label: &str,
        config: &ConnConfig,
        conn: Box<dyn SendRequest<Req> + Send + Sync>,
    ) -> Result<(), Error> {
        let (tx, rx) = oneshot::channel();
        self.sender
            .send(ChanReq::Add(AddReq {
                label: label.to_string(),
                max_burst: config.max_burst,
                burst_interval: config.burst_interval,
                conn,
                tx,
            }))
            .await
            .expect("send should not fail");
        rx.await.expect("receive should not fail")
    }

    /// Implementation of the query method.
    async fn request_impl(
        self,
        request_msg: Req,
    ) -> Result<Message<Bytes>, Error>
    where
        Req: ComposeRequest,
    {
        let (tx, rx) = oneshot::channel();
        self.sender
            .send(ChanReq::GetRT(RTReq { tx }))
            .await
            .expect("send should not fail");
        let conn_rt = rx.await.expect("receive should not fail")?;
        if conn_rt.is_empty() {
            return serve_fail(&request_msg.to_message().unwrap());
        }
        Query::new(self.config, request_msg, conn_rt, self.sender.clone())
            .get_response()
            .await
    }
}

impl<Req> Clone for Connection<Req>
where
    Req: Send + Sync,
{
    fn clone(&self) -> Self {
        Self {
            config: self.config,
            sender: self.sender.clone(),
        }
    }
}

impl<Req: Clone + ComposeRequest + Debug + Send + Sync + 'static>
    SendRequest<Req> for Connection<Req>
{
    fn send_request(
        &self,
        request_msg: Req,
    ) -> Box<dyn GetResponse + Send + Sync> {
        Box::new(Request {
            fut: Box::pin(self.clone().request_impl(request_msg)),
        })
    }
}

//------------ Request -------------------------------------------------------

/// An active request.
struct Request {
    /// The underlying future.
    fut: Pin<
        Box<dyn Future<Output = Result<Message<Bytes>, Error>> + Send + Sync>,
    >,
}

impl Request {
    /// Async function that waits for the future stored in Query to complete.
    async fn get_response_impl(&mut self) -> Result<Message<Bytes>, Error> {
        (&mut self.fut).await
    }
}

impl GetResponse for Request {
    fn get_response(
        &mut self,
    ) -> Pin<
        Box<
            dyn Future<Output = Result<Message<Bytes>, Error>>
                + Send
                + Sync
                + '_,
        >,
    > {
        Box::pin(self.get_response_impl())
    }
}

impl Debug for Request {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("Request")
            .field("fut", &format_args!("_"))
            .finish()
    }
}

//------------ Query --------------------------------------------------------

/// This type represents an active query request.
#[derive(Debug)]
struct Query<Req>
where
    Req: Send + Sync,
{
    /// User configuration.
    config: Config,

    /// The state of the query
    state: QueryState,

    /// The request message
    request_msg: Req,

    /// List of connections identifiers and estimated response times.
    conn_rt: Vec<ConnRT>,

    /// Channel to send requests to the run function.
    sender: mpsc::Sender<ChanReq<Req>>,

    /// List of futures for outstanding requests.
    fut_list: FuturesUnordered<
        Pin<Box<dyn Future<Output = FutListOutput> + Send + Sync>>,
    >,

    /// Transport error that should be reported if nothing better shows
    /// up.
    deferred_transport_error: Option<Error>,

    /// Reply that should be returned to the user if nothing better shows
    /// up.
    deferred_reply: Option<Message<Bytes>>,

    /// The result from one of the connectons.
    result: Option<Result<Message<Bytes>, Error>>,

    /// Index of the connection that returned a result.
    res_index: usize,
}

/// The various states a query can be in.
#[derive(Debug)]
enum QueryState {
    /// The initial state
    Init,

    /// Start a request on a specific connection.
    Probe(usize),

    /// Report the response time for a specific index in the list.
    Report(usize),

    /// Wait for one of the requests to finish.
    Wait,
}

/// The commands that can be sent to the run function.
enum ChanReq<Req>
where
    Req: Send + Sync,
{
    /// Add a connection
    Add(AddReq<Req>),

    /// Get the list of estimated response times for all connections
    GetRT(RTReq),

    /// Start a query
    Query(RequestReq<Req>),

    /// Report how long it took to get a response
    Report(TimeReport),

    /// Report that a connection failed to provide a timely response
    Failure(TimeReport),
}

impl<Req> Debug for ChanReq<Req>
where
    Req: Send + Sync,
{
    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
        f.debug_struct("ChanReq").finish()
    }
}

/// Request to add a new connection
struct AddReq<Req> {
    /// Name of new connection
    label: String,

    /// Maximum length of a burst.
    max_burst: Option<u64>,

    /// Interval over which bursts are counted.
    burst_interval: Duration,

    /// New connection to add
    conn: Box<dyn SendRequest<Req> + Send + Sync>,

    /// Channel to send the reply to
    tx: oneshot::Sender<AddReply>,
}

/// Reply to an Add request
type AddReply = Result<(), Error>;

/// Request to give the estimated response times for all connections
struct RTReq /*<Octs>*/ {
    /// Channel to send the reply to
    tx: oneshot::Sender<RTReply>,
}

/// Reply to a RT request
type RTReply = Result<Vec<ConnRT>, Error>;

/// Request to start a request
struct RequestReq<Req>
where
    Req: Send + Sync,
{
    /// Identifier of connection
    id: u64,

    /// Request message
    request_msg: Req,

    /// Channel to send the reply to
    tx: oneshot::Sender<RequestReply>,
}

impl<Req: Debug> Debug for RequestReq<Req>
where
    Req: Send + Sync,
{
    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
        f.debug_struct("RequestReq")
            .field("id", &self.id)
            .field("request_msg", &self.request_msg)
            .finish()
    }
}

/// Reply to a request request.
type RequestReply =
    Result<(Box<dyn GetResponse + Send + Sync>, Arc<()>), Error>;

/// Report the amount of time until success or failure.
#[derive(Debug)]
struct TimeReport {
    /// Identifier of the transport connection.
    id: u64,

    /// Time spend waiting for a reply.
    elapsed: Duration,
}

/// Connection statistics to compute the estimated response time.
struct ConnStats {
    /// Name of the connection.
    _label: String,

    /// Aproximation of the windowed average of response times.
    mean: f64,

    /// Aproximation of the windowed average of the square of response times.
    mean_sq: f64,

    /// Maximum upstream query burst.
    max_burst: Option<u64>,

    /// burst length,
    burst_interval: Duration,

    /// Start of the current burst
    burst_start: Instant,

    /// Number of queries since the start of the burst.
    burst: u64,

    /// Use the number of references to an Arc as queue length. The number
    /// of references is one higher than then actual queue length.
    queue_length_plus_one: Arc<()>,
}

impl ConnStats {
    /// Update response time statistics.
    fn update(&mut self, elapsed: Duration) {
        let elapsed = elapsed.as_secs_f64();
        self.mean += (elapsed - self.mean) / SMOOTH_N;
        let elapsed_sq = elapsed * elapsed;
        self.mean_sq += (elapsed_sq - self.mean_sq) / SMOOTH_N;
    }

    /// Get an estimated response time.
    fn est_rt(&self) -> f64 {
        let mean = self.mean;
        let var = self.mean_sq - mean * mean;
        let std_dev = f64::sqrt(var.max(0.));
        mean + 3. * std_dev
    }
}

/// Data required to schedule requests and report timing results.
#[derive(Clone, Debug)]
struct ConnRT {
    /// Estimated response time.
    est_rt: Duration,

    /// Identifier of the connection.
    id: u64,

    /// Start of a request using this connection.
    start: Option<Instant>,

    /// Use the number of references to an Arc as queue length. The number
    /// of references is one higher than then actual queue length.
    queue_length: usize,
}

/// Result of the futures in fut_list.
type FutListOutput = (usize, Result<Message<Bytes>, Error>);

impl<Req: Clone + Send + Sync + 'static> Query<Req> {
    /// Create a new query object.
    fn new(
        config: Config,
        request_msg: Req,
        mut conn_rt: Vec<ConnRT>,
        sender: mpsc::Sender<ChanReq<Req>>,
    ) -> Self {
        let conn_rt_len = conn_rt.len();
        let min_rt = conn_rt.iter().map(|e| e.est_rt).min().unwrap();
        let slow_rt = min_rt.as_secs_f64() * config.slow_rt_factor;
        conn_rt.sort_unstable_by(|e1, e2| conn_rt_cmp(e1, e2, slow_rt));

        // Do we want to probe a less performant upstream? We only need to
        // probe upstreams with a queue length of zero. If the queue length
        // is non-zero then the upstream recently got work and does not need
        // to be probed.
        if conn_rt_len > 1 && random::<f64>() < PROBE_P {
            let index = random_range(1..=conn_rt_len - 1);

            if conn_rt[index].queue_length == 0 {
                // Give the probe some head start. We may need a separate
                // configuration parameter. A multiple of min_rt. Just use
                // min_rt for now.
                let mut e = conn_rt.remove(index);
                e.est_rt = min_rt;
                conn_rt.insert(0, e);
            }
        }

        Self {
            config,
            request_msg,
            conn_rt,
            sender,
            state: QueryState::Init,
            fut_list: FuturesUnordered::new(),
            deferred_transport_error: None,
            deferred_reply: None,
            result: None,
            res_index: 0,
        }
    }

    /// Implementation of get_response.
    async fn get_response(&mut self) -> Result<Message<Bytes>, Error> {
        loop {
            match self.state {
                QueryState::Init => {
                    if self.conn_rt.is_empty() {
                        return Err(Error::NoTransportAvailable);
                    }
                    self.state = QueryState::Probe(0);
                    continue;
                }
                QueryState::Probe(ind) => {
                    self.conn_rt[ind].start = Some(Instant::now());
                    let fut = start_request(
                        ind,
                        self.conn_rt[ind].id,
                        self.sender.clone(),
                        self.request_msg.clone(),
                    );
                    self.fut_list.push(Box::pin(fut));
                    let timeout = Instant::now() + self.conn_rt[ind].est_rt;
                    loop {
                        tokio::select! {
                            res = self.fut_list.next() => {
                                let res = res.expect("res should not be empty");
                                match res.1 {
                                    Err(ref err) => {
                                        if self.config.defer_transport_error {
                                            if self.deferred_transport_error.is_none() {
                                                self.deferred_transport_error = Some(err.clone());
                                            }
                                            if res.0 == ind {
                                                // The current upstream finished,
                                                // try the next one, if any.
                                                self.state =
                                                if ind+1 < self.conn_rt.len() {
                                                    QueryState::Probe(ind+1)
                                                }
                                                else
                                                {
                                                    QueryState::Wait
                                                };
                                                // Break out of receive loop
                                                break;
                                            }
                                            // Just continue receiving
                                            continue;
                                        }
                                        // Return error to the user.
                                    }
                                    Ok(ref msg) => {
                                        if skip(msg, &self.config) {
                                            if self.deferred_reply.is_none() {
                                                self.deferred_reply = Some(msg.clone());
                                            }
                                            if res.0 == ind {
                                                // The current upstream finished,
                                                // try the next one, if any.
                                                self.state =
                                                    if ind+1 < self.conn_rt.len() {
                                                        QueryState::Probe(ind+1)
                                                    }
                                                    else
                                                    {
                                                        QueryState::Wait
                                                    };
                                                // Break out of receive loop
                                                break;
                                            }
                                            // Just continue receiving
                                            continue;
                                        }
                                        // Now we have a reply that can be
                                        // returned to the user.
                                    }
                                }
                                self.result = Some(res.1);
                                self.res_index = res.0;

                                self.state = QueryState::Report(0);
                                // Break out of receive loop
                                break;
                            }
                            _ = sleep_until(timeout) => {
                                // Move to the next Probe state if there
                                // are more upstreams to try, otherwise
                                // move to the Wait state.
                                self.state =
                                if ind+1 < self.conn_rt.len() {
                                    QueryState::Probe(ind+1)
                                }
                                else {
                                    QueryState::Wait
                                };
                                // Break out of receive loop
                                break;
                            }
                        }
                    }
                    // Continue with state machine loop
                    continue;
                }
                QueryState::Report(ind) => {
                    if ind >= self.conn_rt.len()
                        || self.conn_rt[ind].start.is_none()
                    {
                        // Nothing more to report. Return result.
                        let res = self
                            .result
                            .take()
                            .expect("result should not be empty");
                        return res;
                    }

                    let start = self.conn_rt[ind]
                        .start
                        .expect("start time should not be empty");
                    let elapsed = start.elapsed();
                    let time_report = TimeReport {
                        id: self.conn_rt[ind].id,
                        elapsed,
                    };
                    let report = if ind == self.res_index {
                        // Succesfull entry
                        ChanReq::Report(time_report)
                    } else {
                        // Failed entry
                        ChanReq::Failure(time_report)
                    };

                    // Send could fail but we don't care.
                    let _ = self.sender.send(report).await;

                    self.state = QueryState::Report(ind + 1);
                    continue;
                }
                QueryState::Wait => {
                    loop {
                        if self.fut_list.is_empty() {
                            // We have nothing left. There should be a reply or
                            // an error. Prefer a reply over an error.
                            if self.deferred_reply.is_some() {
                                let msg = self
                                    .deferred_reply
                                    .take()
                                    .expect("just checked for Some");
                                return Ok(msg);
                            }
                            if self.deferred_transport_error.is_some() {
                                let err = self
                                    .deferred_transport_error
                                    .take()
                                    .expect("just checked for Some");
                                return Err(err);
                            }
                            panic!("either deferred_reply or deferred_error should be present");
                        }
                        let res = self.fut_list.next().await;
                        let res = res.expect("res should not be empty");
                        match res.1 {
                            Err(ref err) => {
                                if self.config.defer_transport_error {
                                    if self.deferred_transport_error.is_none()
                                    {
                                        self.deferred_transport_error =
                                            Some(err.clone());
                                    }
                                    // Just continue with the next future, or
                                    // finish if fut_list is empty.
                                    continue;
                                }
                                // Return error to the user.
                            }
                            Ok(ref msg) => {
                                if skip(msg, &self.config) {
                                    if self.deferred_reply.is_none() {
                                        self.deferred_reply =
                                            Some(msg.clone());
                                    }
                                    // Just continue with the next future, or
                                    // finish if fut_list is empty.
                                    continue;
                                }
                                // Return reply to user.
                            }
                        }
                        self.result = Some(res.1);
                        self.res_index = res.0;
                        self.state = QueryState::Report(0);
                        // Break out of loop to continue with the state machine
                        break;
                    }
                    continue;
                }
            }
        }
    }
}

//------------ Transport -----------------------------------------------------

/// Type that actually implements the connection.
#[derive(Debug)]
pub struct Transport<Req>
where
    Req: Send + Sync,
{
    /// Receive side of the channel used by the runner.
    receiver: mpsc::Receiver<ChanReq<Req>>,
}

impl<Req: Clone + Send + Sync + 'static> Transport<Req> {
    /// Implementation of the new method.
    fn new(receiver: mpsc::Receiver<ChanReq<Req>>) -> Self {
        Self { receiver }
    }

    /// Run method.
    pub async fn run(mut self) {
        let mut next_id: u64 = 10;
        let mut conn_stats: Vec<ConnStats> = Vec::new();
        let mut conn_rt: Vec<ConnRT> = Vec::new();
        let mut conns: Vec<Box<dyn SendRequest<Req> + Send + Sync>> =
            Vec::new();

        loop {
            let req = match self.receiver.recv().await {
                Some(req) => req,
                None => break, // All references to connection objects are
                               // dropped. Shutdown.
            };
            match req {
                ChanReq::Add(add_req) => {
                    let id = next_id;
                    next_id += 1;
                    conn_stats.push(ConnStats {
                        _label: add_req.label,
                        mean: (DEFAULT_RT_MS as f64) / 1000.,
                        mean_sq: 0.,
                        max_burst: add_req.max_burst,
                        burst_interval: add_req.burst_interval,
                        burst_start: Instant::now(),
                        burst: 0,
                        queue_length_plus_one: Arc::new(()),
                    });
                    conn_rt.push(ConnRT {
                        id,
                        est_rt: DEFAULT_RT,
                        start: None,
                        queue_length: 42, // To spot errors.
                    });
                    conns.push(add_req.conn);

                    // Don't care if send fails
                    let _ = add_req.tx.send(Ok(()));
                }
                ChanReq::GetRT(rt_req) => {
                    let mut tmp_conn_rt = conn_rt.clone();

                    // Remove entries that exceed the QPS limit. Loop
                    // backward to efficiently remove them.
                    for i in (0..tmp_conn_rt.len()).rev() {
                        // Fill-in current queue length.
                        tmp_conn_rt[i].queue_length = Arc::strong_count(
                            &conn_stats[i].queue_length_plus_one,
                        ) - 1;
                        if let Some(max_burst) = conn_stats[i].max_burst {
                            if conn_stats[i].burst_start.elapsed()
                                > conn_stats[i].burst_interval
                            {
                                conn_stats[i].burst_start = Instant::now();
                                conn_stats[i].burst = 0;
                            }
                            if conn_stats[i].burst > max_burst {
                                tmp_conn_rt.swap_remove(i);
                            }
                        } else {
                            // No limit.
                        }
                    }
                    // Don't care if send fails
                    let _ = rt_req.tx.send(Ok(tmp_conn_rt));
                }
                ChanReq::Query(request_req) => {
                    let opt_ind =
                        conn_rt.iter().position(|e| e.id == request_req.id);
                    match opt_ind {
                        Some(ind) => {
                            // Leave resetting qps_num to GetRT.
                            conn_stats[ind].burst += 1;
                            let query = conns[ind]
                                .send_request(request_req.request_msg);
                            // Don't care if send fails
                            let _ = request_req.tx.send(Ok((
                                query,
                                conn_stats[ind].queue_length_plus_one.clone(),
                            )));
                        }
                        None => {
                            // Don't care if send fails
                            let _ = request_req
                                .tx
                                .send(Err(Error::RedundantTransportNotFound));
                        }
                    }
                }
                ChanReq::Report(time_report) => {
                    let opt_ind =
                        conn_rt.iter().position(|e| e.id == time_report.id);
                    if let Some(ind) = opt_ind {
                        conn_stats[ind].update(time_report.elapsed);

                        let est_rt = conn_stats[ind].est_rt();
                        conn_rt[ind].est_rt = Duration::from_secs_f64(est_rt);
                    }
                }
                ChanReq::Failure(time_report) => {
                    let opt_ind =
                        conn_rt.iter().position(|e| e.id == time_report.id);
                    if let Some(ind) = opt_ind {
                        let elapsed = time_report.elapsed.as_secs_f64();
                        if elapsed < conn_stats[ind].mean {
                            // Do not update the mean if a
                            // failure took less time than the
                            // current mean.
                            continue;
                        }
                        conn_stats[ind].update(time_report.elapsed);
                        let est_rt = conn_stats[ind].est_rt();
                        conn_rt[ind].est_rt = Duration::from_secs_f64(est_rt);
                    }
                }
            }
        }
    }
}

//------------ Utility --------------------------------------------------------

/// Async function to send a request and wait for the reply.
///
/// This gives a single future that we can put in a list.
async fn start_request<Req>(
    index: usize,
    id: u64,
    sender: mpsc::Sender<ChanReq<Req>>,
    request_msg: Req,
) -> (usize, Result<Message<Bytes>, Error>)
where
    Req: Send + Sync,
{
    let (tx, rx) = oneshot::channel();
    sender
        .send(ChanReq::Query(RequestReq {
            id,
            request_msg,
            tx,
        }))
        .await
        .expect("receiver still exists");
    let (mut request, qlp1) =
        match rx.await.expect("receive is expected to work") {
            Err(err) => return (index, Err(err)),
            Ok((request, qlp1)) => (request, qlp1),
        };
    let reply = request.get_response().await;

    drop(qlp1);
    (index, reply)
}

/// Compare ConnRT elements based on estimated response time.
fn conn_rt_cmp(e1: &ConnRT, e2: &ConnRT, slow_rt: f64) -> Ordering {
    let e1_slow = e1.est_rt.as_secs_f64() > slow_rt;
    let e2_slow = e2.est_rt.as_secs_f64() > slow_rt;

    match (e1_slow, e2_slow) {
        (true, true) => {
            // Normal case. First check queue lengths. Then check est_rt.
            e1.queue_length
                .cmp(&e2.queue_length)
                .then(e1.est_rt.cmp(&e2.est_rt))
        }
        (true, false) => Ordering::Greater,
        (false, true) => Ordering::Less,
        (false, false) => e1.est_rt.cmp(&e2.est_rt),
    }
}

/// Return if this reply should be skipped or not.
fn skip<Octs: Octets>(msg: &Message<Octs>, config: &Config) -> bool {
    // Check if we actually need to check.
    if !config.defer_refused && !config.defer_servfail {
        return false;
    }

    let opt_rcode = msg.opt_rcode();
    // OptRcode needs PartialEq
    if let OptRcode::REFUSED = opt_rcode {
        if config.defer_refused {
            return true;
        }
    }
    if let OptRcode::SERVFAIL = opt_rcode {
        if config.defer_servfail {
            return true;
        }
    }

    false
}

/// Generate a SERVFAIL reply message.
// This needs to be consolodated with the one in validator and the one in
// MessageBuilder.
fn serve_fail<Octs>(msg: &Message<Octs>) -> Result<Message<Bytes>, Error>
where
    Octs: AsRef<[u8]> + Octets,
{
    let mut target =
        MessageBuilder::from_target(StaticCompressor::new(Vec::new()))
            .expect("Vec is expected to have enough space");

    let source = msg;

    *target.header_mut() = msg.header();
    target.header_mut().set_rcode(Rcode::SERVFAIL);
    target.header_mut().set_ad(false);

    let source = source.question();
    let mut target = target.question();
    for rr in source {
        target.push(rr?).expect("should not fail");
    }
    let mut target = target.additional();

    if let Some(opt) = msg.opt() {
        target
            .opt(|ob| {
                ob.set_dnssec_ok(opt.dnssec_ok());
                // XXX something is missing ob.set_rcode(opt.rcode());
                ob.set_udp_payload_size(opt.udp_payload_size());
                ob.set_version(opt.version());
                for o in opt.opt().iter() {
                    let x: AllOptData<_, _> = o.expect("should not fail");
                    ob.push(&x).expect("should not fail");
                }
                Ok(())
            })
            .expect("should not fail");
    }

    let result = target.as_builder().clone();
    let msg = Message::<Bytes>::from_octets(
        result.finish().into_target().octets_into(),
    )
    .expect("Message should be able to parse output from MessageBuilder");
    Ok(msg)
}