1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
//! Sink for AWS Firehose.
use chrono::DateTime;
use chrono::naive::NaiveDateTime;
use chrono::offset::Utc;
use metric::LogLine;
use metric::TagMap;
use rusoto_core::{DefaultCredentialsProvider, Region};
use rusoto_core::default_tls_client;
use rusoto_firehose::{KinesisFirehose, KinesisFirehoseClient, PutRecordBatchInput,
Record};
use rusoto_firehose::PutRecordBatchError::*;
use serde_json;
use serde_json::Map;
use serde_json::value::Value;
use sink::{Sink, Valve};
use source::report_full_telemetry;
use uuid::Uuid;
/// Configuration struct for the Firehose sink
#[derive(Debug, Clone)]
pub struct FirehoseConfig {
/// Every firehose is identified by a `delivery_stream`. This name does not
/// need to be unique per sink.
pub delivery_stream: Option<String>,
/// Control the batch size for firehose publishing. Amazon limits the
/// maximum number of objects in a submission but users may want to scale
/// down to avoid the need to re-publish.
pub batch_size: usize,
/// Set the AWS region of the firehose.
pub region: Option<Region>,
/// The sink's unique name in the routing topology.
pub config_path: Option<String>,
/// The sink specific `flush_interval`.
pub flush_interval: u64,
/// The tags to be applied to all `metric::Event`s streaming through this
/// sink. These tags will overwrite any tags carried by the `metric::Event`
/// itself.
pub tags: TagMap,
}
impl Default for FirehoseConfig {
fn default() -> FirehoseConfig {
FirehoseConfig {
delivery_stream: None,
batch_size: 400,
region: None,
config_path: None,
flush_interval: 60,
tags: TagMap::default(),
}
}
}
/// The Firehose sink struct
///
/// This struct stores the information needed to publish safely to AWS
/// Firehose. All fields are hidden because there's no need for external
/// fiddling. See `FirehoseConfig` for knobs.
pub struct Firehose {
buffer: Vec<LogLine>,
delivery_stream_name: String,
region: Region,
batch_size: usize,
flush_interval: u64,
tags: TagMap,
}
impl Sink<FirehoseConfig> for Firehose {
fn init(config: FirehoseConfig) -> Self {
Firehose {
buffer: Vec::new(),
delivery_stream_name: config
.delivery_stream
.expect("delivery_stream cannot be None"),
region: config.region.expect("region cannot be None"),
batch_size: config.batch_size,
flush_interval: config.flush_interval,
tags: config.tags,
}
}
fn flush_interval(&self) -> Option<u64> {
Some(self.flush_interval)
}
fn flush(&mut self) {
let provider = DefaultCredentialsProvider::new().unwrap();
let dispatcher = default_tls_client().unwrap();
let client =
KinesisFirehoseClient::new(dispatcher, provider, self.region.clone());
if self.buffer.is_empty() {
return;
}
for chunk in self.buffer.chunks(self.batch_size) {
let prbi = PutRecordBatchInput {
delivery_stream_name: self.delivery_stream_name.clone(),
records: chunk
.iter()
.filter(|m| m.value.len() < 1_024_000)
.map(|m| {
let mut pyld = Map::new();
pyld.insert(
String::from("Path"),
Value::String((*m.path).to_string()),
);
pyld.insert(
String::from("Payload"),
Value::String(m.value.clone()),
);
pyld.insert(
String::from("timestamp"),
Value::String(format_time(m.time)),
);
pyld.insert(
String::from("Uuid"),
Value::String(Uuid::new_v4().hyphenated().to_string()),
);
for (k, v) in m.tags(&self.tags) {
pyld.insert(k.clone(), Value::String(v.clone()));
}
for (k, v) in &m.fields {
pyld.insert(k.clone(), Value::String(v.clone()));
}
Record {
data: serde_json::ser::to_vec(&pyld).unwrap(),
}
})
.collect(),
};
loop {
match client.put_record_batch(&prbi) {
Ok(prbo) => {
debug!(
"Wrote {} records to delivery stream {}",
prbi.records.len(),
prbi.delivery_stream_name
);
report_full_telemetry(
"cernan.sinks.firehose.records.delivery",
1.0,
Some(vec![
(
"delivery_stream_name",
prbi.delivery_stream_name.as_str(),
),
]),
);
report_full_telemetry(
"cernan.sinks.firehose.records.total_delivered",
prbi.records.len() as f64,
Some(vec![
(
"delivery_stream_name",
prbi.delivery_stream_name.as_str(),
),
]),
);
let failed_put_count = prbo.failed_put_count;
if failed_put_count > 0 {
report_full_telemetry(
"cernan.sinks.firehose.records.total_failed",
failed_put_count as f64,
Some(vec![
(
"delivery_stream_name",
prbi.delivery_stream_name.as_str(),
),
]),
);
error!("Failed to write {} put records", failed_put_count);
}
break;
}
Err(err) => {
match err {
// The following errors cannot be recovered from. We
// drop the payload lines and move on to the next
// batch. We might choose to split the chunk smaller in
// the hopes that the failure is a result of a subset of
// the payload being wonky. This is an optimization for
// the future.
ResourceNotFound(rnf_err) => {
report_full_telemetry(
"cernan.sinks.firehose.error.resource_not_found",
1.0,
Some(vec![
(
"delivery_stream_name",
prbi.delivery_stream_name.as_str(),
),
]),
);
error!(
"Unable to write to resource, not found: {}",
rnf_err
);
break;
}
InvalidArgument(ia_err) => {
report_full_telemetry(
"cernan.sinks.firehose.error.invalid_argument",
1.0,
Some(vec![
(
"delivery_stream_name",
prbi.delivery_stream_name.as_str(),
),
]),
);
error!(
"Unable to write, invalid argument: {}",
ia_err
);
break;
}
HttpDispatch(hd_err) => {
report_full_telemetry(
"cernan.sinks.firehose.error.http_dispatch",
1.0,
Some(vec![
(
"delivery_stream_name",
prbi.delivery_stream_name.as_str(),
),
]),
);
error!("Unable to write, http dispatch: {}", hd_err);
break;
}
Validation(v_err) => {
report_full_telemetry(
"cernan.sinks.firehose.error.validation",
1.0,
Some(vec![
(
"delivery_stream_name",
prbi.delivery_stream_name.as_str(),
),
]),
);
error!(
"Unable to write, validation failure: {}",
v_err
);
break;
}
Unknown(u_err) => {
report_full_telemetry(
"cernan.sinks.firehose.error.unknown",
1.0,
Some(vec![
(
"delivery_stream_name",
prbi.delivery_stream_name.as_str(),
),
]),
);
error!("Unable to write, unknown failure: {}", u_err);
break;
}
// The following errors are recoverable, potentially.
Credentials(c_err) => {
report_full_telemetry(
"cernan.sinks.firehose.error.credentials",
1.0,
Some(vec![
(
"delivery_stream_name",
prbi.delivery_stream_name.as_str(),
),
]),
);
error!(
"Unable to write, credential failure: {}",
c_err
);
}
ServiceUnavailable(su_err) => {
report_full_telemetry(
"cernan.sinks.firehose.error.service_unavailable",
1.0,
Some(vec![
(
"delivery_stream_name",
prbi.delivery_stream_name.as_str(),
),
]),
);
error!("Service unavailable, will retry: {}", su_err);
}
}
}
}
}
}
self.buffer.clear();
}
fn shutdown(mut self) -> () {
self.flush();
}
fn deliver_line(&mut self, line: LogLine) -> () {
self.buffer.append(&mut vec![line]);
}
fn valve_state(&self) -> Valve {
if self.buffer.len() > 10_000 {
Valve::Closed
} else {
Valve::Open
}
}
}
#[inline]
fn format_time(time: i64) -> String {
let naive_time = NaiveDateTime::from_timestamp(time, 0);
let utc_time: DateTime<Utc> = DateTime::from_utc(naive_time, Utc);
format!("{}", utc_time.format("%Y-%m-%dT%H:%M:%S%.3fZ"))
}