1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
use std::{collections::BTreeMap, fmt, iter, pin::Pin, task, time::Duration};

use async_stream::try_stream;
use aws_sdk_cloudformation::{
    error::SdkError, operation::describe_stack_events::DescribeStackEventsError,
};
use aws_smithy_types_convert::date_time::DateTimeExt;
use chrono::{DateTime, Utc};
use futures_util::{stream, Stream, TryStreamExt};

use crate::{
    status_reason::StatusReason, ResourceStatus, StackEvent, StackEventDetails, StackStatus, Status,
};

const POLL_INTERVAL_STACK_EVENT: Duration = Duration::from_secs(5);

/// Describes a failed stack operation.
///
/// This error tries to capture enough information to quickly identify the root-cause of the
/// operation's failure (such as not having permission to create or update a particular resource
/// in the stack). [`stack_status_reason`](Self::stack_status_reason) and
/// [`StackEventDetails::resource_status_reason`] may be useful for this purpose.
#[derive(Debug)]
#[allow(clippy::module_name_repetitions)]
pub struct StackFailure {
    /// The ID of the stack.
    pub stack_id: String,

    /// The failed status in which the stack settled.
    pub stack_status: StackStatus,

    /// The *first* reason the stack moved into a failing state.
    ///
    /// Note that this may not be the reason associated with the current `stack_status`, but rather
    /// the reason for the first negative status the stack entered (which is usually more
    /// descriptive).
    pub stack_status_reason: String,

    /// Resource events with negative statuses that may have precipitated the failure of the
    /// operation.
    ///
    /// **Note:** this is represented as a `Vec` or tuples to avoid having to worry about
    /// matching [`StackEvent`] variants (when it would be a logical error for them to be
    /// anything other than the `Resource` variant).
    pub resource_events: Vec<(ResourceStatus, StackEventDetails)>,
}

impl StackFailure {
    /// The *first* reason the stack moved into a failing state.
    #[must_use]
    pub fn stack_status_reason(&self) -> StatusReason {
        StatusReason::new(Some(&self.stack_status_reason))
    }
}

impl fmt::Display for StackFailure {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f,
            "Stack operation failed for {}; terminal status: {} ({})",
            self.stack_id, self.stack_status, self.stack_status_reason
        )?;

        if !self.resource_events.is_empty() {
            writeln!(f, "\nThe following resources had errors:")?;
        }
        for (resource_status, details) in &self.resource_events {
            write!(
                f,
                "\n- {} ({}): {} ({})",
                details.logical_resource_id,
                details.resource_type,
                resource_status,
                details
                    .resource_status_reason
                    .as_deref()
                    .unwrap_or("no reason reported"),
            )?;
        }

        Ok(())
    }
}

/// Describes a successful stack operation with warnings.
///
/// It is possible for resource errors to occur even when the overall operation succeeds, such
/// as failing to delete a resource during clean-up after a successful update. Rather than
/// letting this pass silently, or relying on carefully interrogating `StackEvent`s, the
/// operation returns an error.
#[derive(Debug)]
#[allow(clippy::module_name_repetitions)]
pub struct StackWarning {
    /// The ID of the stack.
    pub stack_id: String,

    /// Resource events with negative statuses that did not affect the overall operation.
    ///
    /// **Note:** this is represented as a `Vec` of tuples to avoid having to worry about
    /// matching [`StackEvent`] variants (when it would be a logical error for them to be
    /// anything other than the `Resource` variant).
    pub resource_events: Vec<(ResourceStatus, StackEventDetails)>,
}

impl fmt::Display for StackWarning {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        writeln!(
            f,
            "Stack {} applied successfully but some resources had errors:",
            self.stack_id
        )?;
        for (resource_status, details) in &self.resource_events {
            write!(
                f,
                "\n- {} ({}): {} ({})",
                details.logical_resource_id,
                details.resource_type,
                resource_status,
                details
                    .resource_status_reason
                    .as_deref()
                    .unwrap_or("no reason reported")
            )?;
        }
        Ok(())
    }
}

pub(crate) enum StackOperationError {
    Failure(StackFailure),
    Warning(StackWarning),
}

pub(crate) enum StackOperationStatus {
    InProgress,
    Complete,
    Failed,
    Unexpected,
}

pub(crate) struct StackOperation<'client, F> {
    stack_id: String,
    check_progress: F,
    events: Pin<
        Box<dyn Stream<Item = Result<StackEvent, SdkError<DescribeStackEventsError>>> + 'client>,
    >,
    stack_error_status: Option<StackStatus>,
    stack_error_status_reason: Option<String>,
    resource_error_events: Vec<(ResourceStatus, StackEventDetails)>,
}

impl<'client, F> StackOperation<'client, F>
where
    F: Fn(StackStatus) -> StackOperationStatus + Unpin,
{
    pub(crate) fn new(
        client: &'client aws_sdk_cloudformation::Client,
        stack_id: String,
        started_at: DateTime<Utc>,
        check_progress: F,
    ) -> Self {
        let root_stack_id = stack_id.clone();
        let events = try_stream! {
            let mut interval = tokio::time::interval(POLL_INTERVAL_STACK_EVENT);
            let mut since = started_at;
            let mut nested_stacks = BTreeMap::<String, String>::new();

            loop {
                interval.tick().await;

                let stack_ids = iter::once(root_stack_id.clone()).chain(nested_stacks.keys().cloned());
                let stack_events = stack_ids.into_iter().map(|stack_id| stream::once(Box::pin(async {
                    let stack_events: Vec<_> = client
                        .describe_stack_events()
                        .stack_name(stack_id)
                        .send()
                        .await?
                        .stack_events
                        .expect("DescribeStackEventsOutput without stack_events")
                        .into_iter()
                        .take_while(|event| {
                            event
                                .timestamp
                                .expect("StackEvent without timestamp")
                                .to_chrono_utc()
                                .expect("invalid timestamp")
                                > since
                        })
                        .map(|event| {
                            let stack_alias = event.stack_id().and_then(|stack_id| nested_stacks.get(stack_id)).cloned();
                            StackEvent::from_sdk(stack_alias, event)
                        })
                        .filter(|event| {
                            match event {
                                StackEvent::Stack { details, .. } => details.stack_id() == root_stack_id,
                                StackEvent::Resource{ .. } => true,
                            }
                        })
                        .collect();
                    Ok::<_, SdkError<DescribeStackEventsError>>(stack_events)
                })));
                let stack_events: Vec<_> = stream::select_all(stack_events).try_collect().await?;
                let stack_events: Vec<_> = stack_events.into_iter().flatten().collect();

                if let Some(stack_event) = stack_events.first() {
                    since = *stack_event.timestamp();
                }

                for stack_event in stack_events.into_iter().rev() {
                    let is_terminal = stack_event.is_terminal();

                    match &stack_event {
                        StackEvent::Resource {
                            details: details @ StackEventDetails {
                                physical_resource_id: Some(nested_stack_id),
                                ..
                            },
                            ..
                        } if details.resource_type() == "AWS::CloudFormation::Stack" && !nested_stack_id.is_empty() => {
                            let stack_alias = nested_stacks
                                .get(details.stack_id())
                                .map(String::as_str)
                                .into_iter()
                                .chain(iter::once(details.logical_resource_id()))
                                .collect::<Vec<_>>()
                                .join("/");
                            nested_stacks.insert(nested_stack_id.clone(), stack_alias);
                        },
                        _ => {},
                    }

                    yield stack_event;

                    if is_terminal {
                        return;
                    }
                }
            }
        };
        Self {
            stack_id,
            check_progress,
            events: Box::pin(events),
            stack_error_status: None,
            stack_error_status_reason: None,
            resource_error_events: Vec::new(),
        }
    }

    pub(crate) fn verify(self) -> Result<(), StackOperationError> {
        if let Some(stack_status) = self.stack_error_status {
            return Err(StackOperationError::Failure(StackFailure {
                stack_id: self.stack_id,
                stack_status,
                stack_status_reason: self
                    .stack_error_status_reason
                    .expect("stack op failed with no reasons"),
                resource_events: self.resource_error_events,
            }));
        }

        if self.resource_error_events.is_empty() {
            Ok(())
        } else {
            Err(StackOperationError::Warning(StackWarning {
                stack_id: self.stack_id,
                resource_events: self.resource_error_events,
            }))
        }
    }
}

impl<F> Stream for StackOperation<'_, F>
where
    F: Fn(StackStatus) -> StackOperationStatus + Unpin,
{
    type Item = Result<StackEvent, SdkError<DescribeStackEventsError>>;

    fn poll_next(
        mut self: Pin<&mut Self>,
        ctx: &mut task::Context<'_>,
    ) -> task::Poll<Option<Self::Item>> {
        match self.events.as_mut().poll_next(ctx) {
            task::Poll::Pending => task::Poll::Pending,
            task::Poll::Ready(None) => task::Poll::Ready(None),
            task::Poll::Ready(Some(Err(error))) => task::Poll::Ready(Some(Err(error))),
            task::Poll::Ready(Some(Ok(event))) => {
                match &event {
                    StackEvent::Resource {
                        resource_status,
                        details,
                    } => {
                        if resource_status.sentiment().is_negative() {
                            self.resource_error_events
                                .push((*resource_status, details.clone()));
                        }
                    }
                    StackEvent::Stack {
                        resource_status, ..
                    } if event.stack_id() == self.stack_id => {
                        if resource_status.sentiment().is_negative() {
                            if let Some(reason) = event.resource_status_reason() {
                                self.stack_error_status_reason.replace(reason.to_string());
                            }
                        }
                        match (self.check_progress)(*resource_status) {
                            StackOperationStatus::InProgress | StackOperationStatus::Complete => {}
                            StackOperationStatus::Failed => {
                                self.stack_error_status = Some(*resource_status);
                            }
                            StackOperationStatus::Unexpected => {
                                panic!("stack has unexpected status for: {}", resource_status);
                            }
                        }
                    }
                    // Do nothing for nested stack events since resource events from the parent stack will be processed.
                    StackEvent::Stack { .. } => {}
                }
                task::Poll::Ready(Some(Ok(event)))
            }
        }
    }
}