Skip to main content

tryaudex_core/
cleanup.rs

1//! Phase 3: delete resources that `--tag-session` stamped during a session.
2//!
3//! Discovery runs through AWS Resource Groups Tagging API, which is the
4//! canonical cross-service way to list resources by tag. We then dispatch
5//! per-service `delete-*` calls keyed off the ARN.
6//!
7//! Credentials: this module shells out to the user's `aws` CLI, which picks
8//! up ambient credentials (AWS_PROFILE, env vars, instance role, etc.).
9//! Cleanup runs OUTSIDE the expired session — by definition, the short-lived
10//! STS creds are already gone by the time the user asks to clean up.
11
12use crate::error::{AvError, Result};
13use serde::{Deserialize, Serialize};
14use std::path::{Path, PathBuf};
15use std::process::Command;
16
17/// A single tagged resource returned by Resource Groups Tagging API.
18#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
19pub struct TaggedResource {
20    /// Full ARN as reported by AWS.
21    pub arn: String,
22    /// Parsed service name ("s3", "dynamodb", ...).
23    pub service: String,
24    /// Parsed resource type within the service ("bucket", "table", ...).
25    pub resource_type: String,
26    /// Human-friendly name/identifier extracted from the ARN.
27    pub name: String,
28}
29
30impl TaggedResource {
31    /// Parse an ARN into its (service, type, name) parts.
32    ///
33    /// ARN formats we handle:
34    /// - arn:aws:s3:::bucket-name
35    /// - arn:aws:dynamodb:us-east-1:123:table/TableName
36    /// - arn:aws:sqs:us-east-1:123:queue-name
37    /// - arn:aws:sns:us-east-1:123:topic-name
38    /// - arn:aws:lambda:us-east-1:123:function:FuncName
39    /// - arn:aws:iam::123:role/RoleName
40    /// - arn:aws:secretsmanager:us-east-1:123:secret:name-AbCdEf
41    /// - arn:aws:logs:us-east-1:123:log-group:group-name
42    /// - arn:aws:ecr:us-east-1:123:repository/repo
43    /// - arn:aws:kms:us-east-1:123:key/uuid
44    /// - arn:aws:cloudformation:us-east-1:123:stack/name/uuid
45    /// - arn:aws:rds:us-east-1:123:db:instance-name
46    pub fn from_arn(arn: &str) -> Option<Self> {
47        // Split into: ["arn", "aws", service, region, account, resource...]
48        let parts: Vec<&str> = arn.splitn(6, ':').collect();
49        if parts.len() < 6 || parts[0] != "arn" {
50            return None;
51        }
52        let service = parts[2].to_string();
53        let resource_part = parts[5];
54
55        // Resource part can be "type/name", "type:name", or just "name".
56        let (resource_type, name) = if let Some((t, n)) = resource_part.split_once('/') {
57            (t.to_string(), n.to_string())
58        } else if let Some((t, n)) = resource_part.split_once(':') {
59            (t.to_string(), n.to_string())
60        } else {
61            // S3 bucket ARNs have no type prefix — the whole thing is the name.
62            let default_type = match service.as_str() {
63                "s3" => "bucket",
64                "sqs" => "queue",
65                "sns" => "topic",
66                _ => "resource",
67            };
68            (default_type.to_string(), resource_part.to_string())
69        };
70
71        Some(TaggedResource {
72            arn: arn.to_string(),
73            service,
74            resource_type,
75            name,
76        })
77    }
78}
79
80/// Query Resource Groups Tagging API for every resource bearing
81/// `tryaudex-session=<session_id>`. Returns the raw list (may be empty).
82pub fn discover(session_id: &str) -> Result<Vec<TaggedResource>> {
83    let filter = format!("Key=tryaudex-session,Values={session_id}");
84    let output = Command::new("aws")
85        .args([
86            "resourcegroupstaggingapi",
87            "get-resources",
88            "--tag-filters",
89            &filter,
90            "--output",
91            "json",
92        ])
93        .output()
94        .map_err(AvError::Io)?;
95
96    if !output.status.success() {
97        return Err(AvError::InvalidPolicy(format!(
98            "aws resourcegroupstaggingapi failed: {}",
99            String::from_utf8_lossy(&output.stderr).trim()
100        )));
101    }
102
103    #[derive(Deserialize)]
104    struct Response {
105        #[serde(rename = "ResourceTagMappingList", default)]
106        mappings: Vec<Mapping>,
107    }
108    #[derive(Deserialize)]
109    struct Mapping {
110        #[serde(rename = "ResourceARN")]
111        arn: String,
112    }
113
114    let parsed: Response = serde_json::from_slice(&output.stdout).map_err(|e| {
115        AvError::InvalidPolicy(format!("Failed to parse tagging API response: {e}"))
116    })?;
117
118    Ok(parsed
119        .mappings
120        .into_iter()
121        .filter_map(|m| TaggedResource::from_arn(&m.arn))
122        .collect())
123}
124
125/// The outcome of attempting to delete one resource.
126#[derive(Debug, Clone, PartialEq, Eq)]
127pub enum DeleteOutcome {
128    Deleted,
129    DryRun,
130    Unsupported,
131    Failed(String),
132}
133
134/// Build the aws-CLI argv to delete this resource. None if unsupported.
135pub fn delete_command(r: &TaggedResource) -> Option<Vec<String>> {
136    let s = |s: &str| s.to_string();
137    match (r.service.as_str(), r.resource_type.as_str()) {
138        ("s3", "bucket") => Some(vec![
139            s("aws"),
140            s("s3"),
141            s("rb"),
142            format!("s3://{}", r.name),
143            s("--force"),
144        ]),
145        ("dynamodb", "table") => Some(vec![
146            s("aws"),
147            s("dynamodb"),
148            s("delete-table"),
149            s("--table-name"),
150            r.name.clone(),
151        ]),
152        ("sqs", "queue") => Some(vec![
153            s("aws"),
154            s("sqs"),
155            s("delete-queue"),
156            s("--queue-url"),
157            // SQS delete-queue needs the URL, not the name.
158            // We rebuild it from the ARN — arn:aws:sqs:region:account:name.
159            sqs_url_from_arn(&r.arn)?,
160        ]),
161        ("sns", "topic") => Some(vec![
162            s("aws"),
163            s("sns"),
164            s("delete-topic"),
165            s("--topic-arn"),
166            r.arn.clone(),
167        ]),
168        ("lambda", "function") => Some(vec![
169            s("aws"),
170            s("lambda"),
171            s("delete-function"),
172            s("--function-name"),
173            r.name.clone(),
174        ]),
175        ("rds", "db") => Some(vec![
176            s("aws"),
177            s("rds"),
178            s("delete-db-instance"),
179            s("--db-instance-identifier"),
180            r.name.clone(),
181            s("--skip-final-snapshot"),
182        ]),
183        ("iam", "role") => Some(vec![
184            s("aws"),
185            s("iam"),
186            s("delete-role"),
187            s("--role-name"),
188            r.name.clone(),
189        ]),
190        ("iam", "user") => Some(vec![
191            s("aws"),
192            s("iam"),
193            s("delete-user"),
194            s("--user-name"),
195            r.name.clone(),
196        ]),
197        ("iam", "policy") => Some(vec![
198            s("aws"),
199            s("iam"),
200            s("delete-policy"),
201            s("--policy-arn"),
202            r.arn.clone(),
203        ]),
204        ("secretsmanager", "secret") => Some(vec![
205            s("aws"),
206            s("secretsmanager"),
207            s("delete-secret"),
208            s("--secret-id"),
209            r.arn.clone(),
210            s("--force-delete-without-recovery"),
211        ]),
212        ("ssm", "parameter") => Some(vec![
213            s("aws"),
214            s("ssm"),
215            s("delete-parameter"),
216            s("--name"),
217            r.name.clone(),
218        ]),
219        ("logs", "log-group") => Some(vec![
220            s("aws"),
221            s("logs"),
222            s("delete-log-group"),
223            s("--log-group-name"),
224            r.name.clone(),
225        ]),
226        ("cloudformation", "stack") => Some(vec![
227            s("aws"),
228            s("cloudformation"),
229            s("delete-stack"),
230            s("--stack-name"),
231            r.name.clone(),
232        ]),
233        ("ecr", "repository") => Some(vec![
234            s("aws"),
235            s("ecr"),
236            s("delete-repository"),
237            s("--repository-name"),
238            r.name.clone(),
239            s("--force"),
240        ]),
241        ("kms", "key") => Some(vec![
242            s("aws"),
243            s("kms"),
244            s("schedule-key-deletion"),
245            s("--key-id"),
246            r.name.clone(),
247            s("--pending-window-in-days"),
248            s("7"),
249        ]),
250        _ => None,
251    }
252}
253
254/// Reconstruct the SQS queue URL from its ARN.
255fn sqs_url_from_arn(arn: &str) -> Option<String> {
256    // arn:aws:sqs:us-east-1:123456789012:queue-name
257    let parts: Vec<&str> = arn.split(':').collect();
258    if parts.len() < 6 || parts[2] != "sqs" {
259        return None;
260    }
261    let region = parts[3];
262    let account = parts[4];
263    let name = parts[5];
264    Some(format!(
265        "https://sqs.{region}.amazonaws.com/{account}/{name}"
266    ))
267}
268
269/// Attempt to delete a single resource. `dry_run=true` skips the actual call.
270///
271/// For IAM roles we first detach attached policies + delete inline policies,
272/// since AWS rejects `delete-role` if anything is still attached. Without
273/// this pre-step users hit cryptic "cannot delete entity because it is
274/// currently attached to 1 entities" errors and have to drop into the
275/// AWS console.
276pub fn delete(r: &TaggedResource, dry_run: bool) -> DeleteOutcome {
277    let argv = match delete_command(r) {
278        Some(a) => a,
279        None => return DeleteOutcome::Unsupported,
280    };
281    if dry_run {
282        return DeleteOutcome::DryRun;
283    }
284    if r.service == "iam" && r.resource_type == "role" {
285        if let Err(msg) = detach_iam_role_policies(&r.name) {
286            // Don't hard-fail — try delete-role anyway; if the role had
287            // nothing attached the pre-step failure is irrelevant.
288            tracing::warn!(role = %r.name, error = %msg, "role policy pre-detach failed");
289        }
290    }
291    let output = match Command::new(&argv[0]).args(&argv[1..]).output() {
292        Ok(o) => o,
293        Err(e) => return DeleteOutcome::Failed(e.to_string()),
294    };
295    if output.status.success() {
296        DeleteOutcome::Deleted
297    } else {
298        DeleteOutcome::Failed(String::from_utf8_lossy(&output.stderr).trim().to_string())
299    }
300}
301
302/// For an IAM role, list+detach all managed policies and list+delete all
303/// inline policies so `delete-role` can succeed. Best-effort: partial
304/// failures return the first error but keep going.
305fn detach_iam_role_policies(role_name: &str) -> std::result::Result<(), String> {
306    // Managed policies: list-attached-role-policies → detach each.
307    let list_attached = Command::new("aws")
308        .args([
309            "iam",
310            "list-attached-role-policies",
311            "--role-name",
312            role_name,
313            "--output",
314            "json",
315        ])
316        .output()
317        .map_err(|e| e.to_string())?;
318    if list_attached.status.success() {
319        #[derive(Deserialize)]
320        struct Attached {
321            #[serde(rename = "AttachedPolicies", default)]
322            policies: Vec<AttachedPolicy>,
323        }
324        #[derive(Deserialize)]
325        struct AttachedPolicy {
326            #[serde(rename = "PolicyArn")]
327            arn: String,
328        }
329        if let Ok(parsed) = serde_json::from_slice::<Attached>(&list_attached.stdout) {
330            for p in parsed.policies {
331                let _ = Command::new("aws")
332                    .args([
333                        "iam",
334                        "detach-role-policy",
335                        "--role-name",
336                        role_name,
337                        "--policy-arn",
338                        &p.arn,
339                    ])
340                    .output();
341            }
342        }
343    }
344
345    // Inline policies: list-role-policies → delete each.
346    let list_inline = Command::new("aws")
347        .args([
348            "iam",
349            "list-role-policies",
350            "--role-name",
351            role_name,
352            "--output",
353            "json",
354        ])
355        .output()
356        .map_err(|e| e.to_string())?;
357    if list_inline.status.success() {
358        #[derive(Deserialize)]
359        struct Inline {
360            #[serde(rename = "PolicyNames", default)]
361            names: Vec<String>,
362        }
363        if let Ok(parsed) = serde_json::from_slice::<Inline>(&list_inline.stdout) {
364            for name in parsed.names {
365                let _ = Command::new("aws")
366                    .args([
367                        "iam",
368                        "delete-role-policy",
369                        "--role-name",
370                        role_name,
371                        "--policy-name",
372                        &name,
373                    ])
374                    .output();
375            }
376        }
377    }
378
379    Ok(())
380}
381
382/// Delete priority for dependency ordering. Lower tiers are leaf resources
383/// (nothing else in our set refers to them) and get deleted first. Higher
384/// tiers sit upstream and must be torn down only after their dependents.
385///
386/// IAM policies are the main upstream case: `aws iam delete-policy` refuses
387/// if any role/user/group still holds a reference. So we delete roles
388/// (tier 1, which auto-detaches policies in `delete()`'s pre-step) before
389/// policies (tier 2). Every other resource is a leaf at tier 0.
390pub fn delete_tier(r: &TaggedResource) -> u8 {
391    match (r.service.as_str(), r.resource_type.as_str()) {
392        ("iam", "policy") => 2,
393        ("iam", "role") => 1,
394        _ => 0,
395    }
396}
397
398/// Stable-sort resources into delete order. Same-tier ordering is
399/// preserved from the caller's input (discovery order).
400pub fn sort_for_deletion(resources: &mut [TaggedResource]) {
401    resources.sort_by_key(delete_tier);
402}
403
404/// Rough daily-cost hint for a single tagged resource, used in the pre-delete
405/// preview so users can see "oh wait, that's a $40/day RDS instance".
406///
407/// These are intentionally conservative *floor* estimates based on the
408/// cheapest common SKU (e.g. RDS t3.micro, EC2 t3.micro, KMS single-region).
409/// Storage- and request-driven services (S3, DynamoDB, CloudWatch Logs)
410/// report usage-dependent because we can't see the bytes from the tag API
411/// alone — the number we show is a rock-bottom floor assuming idle.
412#[derive(Debug, Clone, Copy, PartialEq)]
413pub struct DailyCostHint {
414    /// Conservative lower bound, USD/day.
415    pub usd_per_day: f64,
416    /// True when actual cost scales with size or traffic beyond this floor.
417    pub usage_dependent: bool,
418}
419
420/// Look up a daily cost hint for the given resource, or None if it's free
421/// / unknown. Prices are approximate us-east-1 list prices circa 2025 —
422/// precise to within "right order of magnitude", nothing more.
423pub fn estimate_daily_cost(r: &TaggedResource) -> Option<DailyCostHint> {
424    let hint = match (r.service.as_str(), r.resource_type.as_str()) {
425        // RDS on-demand, t3.micro single-AZ: ~$0.017/hr = $0.41/day floor.
426        // Anything bigger or multi-AZ is dramatically more.
427        ("rds", "db") => DailyCostHint {
428            usd_per_day: 0.41,
429            usage_dependent: true,
430        },
431        // EC2 t3.micro on-demand: $0.0104/hr = $0.25/day. Plus EBS.
432        ("ec2", "instance") => DailyCostHint {
433            usd_per_day: 0.25,
434            usage_dependent: true,
435        },
436        // KMS customer-managed key: $1/month = $0.033/day flat.
437        ("kms", "key") => DailyCostHint {
438            usd_per_day: 0.033,
439            usage_dependent: false,
440        },
441        // Secrets Manager: $0.40/secret/month = $0.013/day.
442        ("secretsmanager", "secret") => DailyCostHint {
443            usd_per_day: 0.013,
444            usage_dependent: false,
445        },
446        // S3 bucket: cost scales with stored bytes. Storage is $0.023/GB/mo
447        // = $0.00077/GB/day. Show the floor as ~free and flag usage-dependent.
448        ("s3", "bucket") => DailyCostHint {
449            usd_per_day: 0.0,
450            usage_dependent: true,
451        },
452        // DynamoDB: on-demand is pay-per-request, provisioned has a base.
453        // Idle on-demand is effectively free but storage is $0.25/GB/mo.
454        ("dynamodb", "table") => DailyCostHint {
455            usd_per_day: 0.0,
456            usage_dependent: true,
457        },
458        // CloudWatch Logs: ingest $0.50/GB, storage $0.03/GB/mo.
459        ("logs", "log-group") => DailyCostHint {
460            usd_per_day: 0.0,
461            usage_dependent: true,
462        },
463        // ECR: $0.10/GB/mo storage + data transfer.
464        ("ecr", "repository") => DailyCostHint {
465            usd_per_day: 0.0,
466            usage_dependent: true,
467        },
468        // Free/idle-free services: IAM, Lambda (idle), SQS/SNS (idle),
469        // CloudFormation, SSM parameters (standard tier).
470        ("iam", _)
471        | ("lambda", _)
472        | ("sqs", _)
473        | ("sns", _)
474        | ("cloudformation", _)
475        | ("ssm", _) => return None,
476        _ => return None,
477    };
478    Some(hint)
479}
480
481/// Sum the known daily-cost floors across a set of resources.
482/// Returns (total_usd_per_day, any_usage_dependent).
483pub fn estimate_daily_cost_total(resources: &[TaggedResource]) -> (f64, bool) {
484    let mut total = 0.0;
485    let mut any_usage = false;
486    for r in resources {
487        if let Some(h) = estimate_daily_cost(r) {
488            total += h.usd_per_day;
489            any_usage |= h.usage_dependent;
490        }
491    }
492    (total, any_usage)
493}
494
495/// One stale-session entry: the session already ended (expired / completed
496/// / failed / revoked) but still has resources tagged `tryaudex-session`
497/// that are alive in the account.
498#[derive(Debug, Clone)]
499pub struct OrphanedSession {
500    pub session_id: String,
501    pub status: String,
502    pub ended_at: chrono::DateTime<chrono::Utc>,
503    pub resources: Vec<TaggedResource>,
504    pub daily_cost: f64,
505    pub usage_dependent: bool,
506}
507
508/// Cross-reference the local session store with the tagging API to find
509/// every non-active session that still has billing resources. Each entry
510/// can be drained with `tryaudex cleanup <session_id>`.
511///
512/// Hits the AWS tagging API once per non-active session in the input;
513/// callers should scope the input list (recent N, filter by status) when
514/// the store grows. Discovery errors on individual sessions are swallowed
515/// — one failing session shouldn't block orphan reporting for the rest.
516pub fn find_orphans(sessions: &[crate::session::Session]) -> Vec<OrphanedSession> {
517    use crate::session::SessionStatus;
518    let mut out = Vec::new();
519    for s in sessions {
520        if s.status == SessionStatus::Active {
521            continue;
522        }
523        let Ok(resources) = discover(&s.id) else {
524            continue;
525        };
526        if resources.is_empty() {
527            continue;
528        }
529        let (daily_cost, usage_dependent) = estimate_daily_cost_total(&resources);
530        out.push(OrphanedSession {
531            session_id: s.id.clone(),
532            status: s.status.to_string(),
533            ended_at: s.expires_at,
534            resources,
535            daily_cost,
536            usage_dependent,
537        });
538    }
539    // Sort by cost descending so the expensive orphans surface first.
540    out.sort_by(|a, b| {
541        b.daily_cost
542            .partial_cmp(&a.daily_cost)
543            .unwrap_or(std::cmp::Ordering::Equal)
544    });
545    out
546}
547
548/// Summary of a cleanup run.
549#[derive(Debug, Clone, Default)]
550pub struct CleanupReport {
551    pub deleted: Vec<TaggedResource>,
552    pub failed: Vec<(TaggedResource, String)>,
553    pub unsupported: Vec<TaggedResource>,
554    pub dry_run: bool,
555}
556
557/// Run `delete()` over every discovered resource and aggregate results.
558pub fn cleanup_session(session_id: &str, dry_run: bool) -> Result<CleanupReport> {
559    let resources = discover(session_id)?;
560    let mut report = CleanupReport {
561        dry_run,
562        ..Default::default()
563    };
564    for r in resources {
565        match delete(&r, dry_run) {
566            DeleteOutcome::Deleted => report.deleted.push(r),
567            DeleteOutcome::DryRun => report.deleted.push(r),
568            DeleteOutcome::Unsupported => report.unsupported.push(r),
569            DeleteOutcome::Failed(err) => report.failed.push((r, err)),
570        }
571    }
572    Ok(report)
573}
574
575// --- Partial-cleanup state persistence ---
576//
577// Cleanup can crash mid-run (network blip, AWS throttle, user Ctrl-C). When
578// that happens we need to know which resources were already deleted on the
579// next invocation, otherwise the user either wastes API calls re-issuing
580// deletes or, worse, gets misleading "resource not found" errors for things
581// we handled last time. State files on disk give us that idempotency —
582// re-running `tryaudex cleanup <id>` picks up from where we left off.
583
584/// On-disk record of an in-flight cleanup. Written before any deletion
585/// happens, updated after each attempt, removed once nothing is left.
586#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
587pub struct CleanupState {
588    pub session_id: String,
589    pub started_at: chrono::DateTime<chrono::Utc>,
590    /// ARNs that are known to be deleted (successful outcomes from prior runs).
591    pub deleted_arns: Vec<String>,
592    /// Resources whose delete call returned an error — retry candidates.
593    pub failed: Vec<(TaggedResource, String)>,
594}
595
596impl CleanupState {
597    fn new(session_id: &str) -> Self {
598        Self {
599            session_id: session_id.to_string(),
600            started_at: chrono::Utc::now(),
601            deleted_arns: Vec::new(),
602            failed: Vec::new(),
603        }
604    }
605
606    pub fn is_deleted(&self, arn: &str) -> bool {
607        self.deleted_arns.iter().any(|a| a == arn)
608    }
609
610    pub fn record_deleted(&mut self, arn: &str) {
611        if !self.is_deleted(arn) {
612            self.deleted_arns.push(arn.to_string());
613        }
614        // If the resource was previously failed, clear it from that list.
615        self.failed.retain(|(r, _)| r.arn != arn);
616    }
617
618    pub fn record_failed(&mut self, resource: &TaggedResource, reason: &str) {
619        self.failed.retain(|(r, _)| r.arn != resource.arn);
620        self.failed.push((resource.clone(), reason.to_string()));
621    }
622}
623
624/// Manages cleanup state files keyed by session id.
625pub struct CleanupStateStore {
626    dir: PathBuf,
627}
628
629impl CleanupStateStore {
630    pub fn new() -> Result<Self> {
631        let dir = dirs::data_local_dir()
632            .unwrap_or_else(|| PathBuf::from("."))
633            .join("audex")
634            .join("cleanup_state");
635        std::fs::create_dir_all(&dir)?;
636        Ok(Self { dir })
637    }
638
639    pub fn with_dir(dir: impl AsRef<Path>) -> Result<Self> {
640        let dir = dir.as_ref().to_path_buf();
641        std::fs::create_dir_all(&dir)?;
642        Ok(Self { dir })
643    }
644
645    fn path_for(&self, session_id: &str) -> PathBuf {
646        self.dir.join(format!("{session_id}.json"))
647    }
648
649    /// Load state for a session, or return a freshly-initialized state if
650    /// none exists yet.
651    pub fn load_or_new(&self, session_id: &str) -> Result<CleanupState> {
652        let path = self.path_for(session_id);
653        if !path.exists() {
654            return Ok(CleanupState::new(session_id));
655        }
656        let json = std::fs::read_to_string(&path)?;
657        serde_json::from_str(&json).map_err(|e| {
658            AvError::InvalidPolicy(format!("corrupt cleanup state file {path:?}: {e}"))
659        })
660    }
661
662    pub fn save(&self, state: &CleanupState) -> Result<()> {
663        let path = self.path_for(&state.session_id);
664        let json = serde_json::to_string_pretty(state)?;
665        std::fs::write(&path, json)?;
666        Ok(())
667    }
668
669    pub fn remove(&self, session_id: &str) {
670        let _ = std::fs::remove_file(self.path_for(session_id));
671    }
672
673    pub fn exists(&self, session_id: &str) -> bool {
674        self.path_for(session_id).exists()
675    }
676
677    /// Return every cleanup state currently on disk that still has at least
678    /// one unresolved failure. Used as a "cleanup backlog" guard before
679    /// starting a fresh --ephemeral session so users don't quietly pile up
680    /// orphaned resources. States with only successful deletes (which is an
681    /// interim state during cleanup) are skipped — the file would be removed
682    /// on next full success anyway.
683    pub fn list_pending(&self) -> Vec<CleanupState> {
684        let mut out = Vec::new();
685        let entries = match std::fs::read_dir(&self.dir) {
686            Ok(e) => e,
687            Err(_) => return out,
688        };
689        for entry in entries.flatten() {
690            let path = entry.path();
691            if path.extension().and_then(|s| s.to_str()) != Some("json") {
692                continue;
693            }
694            let Ok(json) = std::fs::read_to_string(&path) else {
695                continue;
696            };
697            let Ok(state) = serde_json::from_str::<CleanupState>(&json) else {
698                continue;
699            };
700            if !state.failed.is_empty() {
701                out.push(state);
702            }
703        }
704        // Sort by started_at for predictable listing order.
705        out.sort_by_key(|s| s.started_at);
706        out
707    }
708}
709
710#[cfg(test)]
711mod tests {
712    use super::*;
713
714    #[test]
715    fn parses_s3_bucket_arn() {
716        let r = TaggedResource::from_arn("arn:aws:s3:::my-bucket").unwrap();
717        assert_eq!(r.service, "s3");
718        assert_eq!(r.resource_type, "bucket");
719        assert_eq!(r.name, "my-bucket");
720    }
721
722    #[test]
723    fn parses_dynamodb_table_arn() {
724        let r = TaggedResource::from_arn("arn:aws:dynamodb:us-east-1:123:table/Users").unwrap();
725        assert_eq!(r.service, "dynamodb");
726        assert_eq!(r.resource_type, "table");
727        assert_eq!(r.name, "Users");
728    }
729
730    #[test]
731    fn parses_lambda_function_arn() {
732        let r = TaggedResource::from_arn("arn:aws:lambda:us-east-1:123:function:my-fn").unwrap();
733        assert_eq!(r.service, "lambda");
734        assert_eq!(r.resource_type, "function");
735        assert_eq!(r.name, "my-fn");
736    }
737
738    #[test]
739    fn parses_iam_role_arn() {
740        let r = TaggedResource::from_arn("arn:aws:iam::123:role/MyRole").unwrap();
741        assert_eq!(r.service, "iam");
742        assert_eq!(r.resource_type, "role");
743        assert_eq!(r.name, "MyRole");
744    }
745
746    #[test]
747    fn parses_sqs_queue_arn() {
748        let r = TaggedResource::from_arn("arn:aws:sqs:us-east-1:123:my-queue").unwrap();
749        assert_eq!(r.service, "sqs");
750        assert_eq!(r.resource_type, "queue");
751        assert_eq!(r.name, "my-queue");
752    }
753
754    #[test]
755    fn rejects_bad_arn() {
756        assert!(TaggedResource::from_arn("not-an-arn").is_none());
757        assert!(TaggedResource::from_arn("arn:aws:s3").is_none());
758    }
759
760    #[test]
761    fn sqs_url_rebuilt_from_arn() {
762        let url = sqs_url_from_arn("arn:aws:sqs:us-east-1:123456789012:foo").unwrap();
763        assert_eq!(url, "https://sqs.us-east-1.amazonaws.com/123456789012/foo");
764    }
765
766    #[test]
767    fn delete_command_for_s3_bucket() {
768        let r = TaggedResource::from_arn("arn:aws:s3:::mybk").unwrap();
769        let cmd = delete_command(&r).unwrap();
770        assert_eq!(cmd[0..3], vec!["aws", "s3", "rb"]);
771        assert!(cmd.contains(&"s3://mybk".to_string()));
772        assert!(cmd.contains(&"--force".to_string()));
773    }
774
775    #[test]
776    fn delete_command_for_dynamodb() {
777        let r = TaggedResource::from_arn("arn:aws:dynamodb:us-east-1:1:table/T").unwrap();
778        let cmd = delete_command(&r).unwrap();
779        assert_eq!(cmd[0..3], vec!["aws", "dynamodb", "delete-table"]);
780        assert!(cmd.contains(&"T".to_string()));
781    }
782
783    #[test]
784    fn delete_command_for_sqs_uses_url() {
785        let r = TaggedResource::from_arn("arn:aws:sqs:us-east-1:1:q1").unwrap();
786        let cmd = delete_command(&r).unwrap();
787        assert!(cmd.iter().any(|s| s.starts_with("https://sqs.")));
788    }
789
790    #[test]
791    fn delete_command_for_kms_schedules_deletion() {
792        let r = TaggedResource::from_arn("arn:aws:kms:us-east-1:1:key/uuid").unwrap();
793        let cmd = delete_command(&r).unwrap();
794        assert!(cmd.contains(&"schedule-key-deletion".to_string()));
795        assert!(cmd.contains(&"7".to_string())); // 7-day pending window
796    }
797
798    #[test]
799    fn delete_command_for_unsupported_returns_none() {
800        let r = TaggedResource {
801            arn: "arn:aws:exotic:us-east-1:1:thing/x".to_string(),
802            service: "exotic".to_string(),
803            resource_type: "thing".to_string(),
804            name: "x".to_string(),
805        };
806        assert!(delete_command(&r).is_none());
807    }
808
809    #[test]
810    fn delete_dry_run_returns_dryrun_outcome() {
811        let r = TaggedResource::from_arn("arn:aws:dynamodb:us-east-1:1:table/T").unwrap();
812        assert_eq!(delete(&r, true), DeleteOutcome::DryRun);
813    }
814
815    #[test]
816    fn cleanup_state_records_deleted_arns() {
817        let mut state = CleanupState::new("sess-1");
818        assert!(!state.is_deleted("arn:aws:s3:::bk"));
819        state.record_deleted("arn:aws:s3:::bk");
820        assert!(state.is_deleted("arn:aws:s3:::bk"));
821    }
822
823    #[test]
824    fn cleanup_state_record_deleted_is_idempotent() {
825        let mut state = CleanupState::new("s");
826        state.record_deleted("arn:x");
827        state.record_deleted("arn:x");
828        assert_eq!(state.deleted_arns.len(), 1);
829    }
830
831    #[test]
832    fn cleanup_state_record_deleted_clears_prior_failure() {
833        let mut state = CleanupState::new("s");
834        let r = TaggedResource::from_arn("arn:aws:s3:::bk").unwrap();
835        state.record_failed(&r, "temporary throttle");
836        assert_eq!(state.failed.len(), 1);
837        state.record_deleted(&r.arn);
838        assert!(state.failed.is_empty());
839        assert!(state.is_deleted(&r.arn));
840    }
841
842    #[test]
843    fn cleanup_state_record_failed_updates_reason() {
844        let mut state = CleanupState::new("s");
845        let r = TaggedResource::from_arn("arn:aws:s3:::bk").unwrap();
846        state.record_failed(&r, "first error");
847        state.record_failed(&r, "second error");
848        assert_eq!(state.failed.len(), 1);
849        assert_eq!(state.failed[0].1, "second error");
850    }
851
852    #[test]
853    fn cleanup_state_roundtrips_through_store() {
854        let tmp = tempfile::tempdir().unwrap();
855        let store = CleanupStateStore::with_dir(tmp.path()).unwrap();
856        let mut state = store.load_or_new("sess-xyz").unwrap();
857        assert!(state.deleted_arns.is_empty());
858        state.record_deleted("arn:aws:s3:::bk1");
859        store.save(&state).unwrap();
860
861        let reloaded = store.load_or_new("sess-xyz").unwrap();
862        assert_eq!(reloaded.deleted_arns, vec!["arn:aws:s3:::bk1".to_string()]);
863        assert!(store.exists("sess-xyz"));
864
865        store.remove("sess-xyz");
866        assert!(!store.exists("sess-xyz"));
867    }
868
869    #[test]
870    fn cleanup_state_store_returns_fresh_state_for_unknown_session() {
871        let tmp = tempfile::tempdir().unwrap();
872        let store = CleanupStateStore::with_dir(tmp.path()).unwrap();
873        let state = store.load_or_new("brand-new").unwrap();
874        assert_eq!(state.session_id, "brand-new");
875        assert!(state.deleted_arns.is_empty());
876        assert!(state.failed.is_empty());
877    }
878
879    #[test]
880    fn delete_tier_puts_iam_policies_last() {
881        let role = TaggedResource::from_arn("arn:aws:iam::1:role/R").unwrap();
882        let policy = TaggedResource::from_arn("arn:aws:iam::1:policy/P").unwrap();
883        let leaf = TaggedResource::from_arn("arn:aws:s3:::bk").unwrap();
884        assert!(delete_tier(&leaf) < delete_tier(&role));
885        assert!(delete_tier(&role) < delete_tier(&policy));
886    }
887
888    #[test]
889    fn sort_for_deletion_orders_leaves_roles_policies() {
890        let mut items = vec![
891            TaggedResource::from_arn("arn:aws:iam::1:policy/P").unwrap(),
892            TaggedResource::from_arn("arn:aws:s3:::bk").unwrap(),
893            TaggedResource::from_arn("arn:aws:iam::1:role/R").unwrap(),
894            TaggedResource::from_arn("arn:aws:dynamodb:us-east-1:1:table/T").unwrap(),
895        ];
896        sort_for_deletion(&mut items);
897        // Leaves (s3, dynamodb) first, role next, policy last.
898        assert_eq!(items[0].service, "s3");
899        assert_eq!(items[1].service, "dynamodb");
900        assert_eq!(
901            (items[2].service.as_str(), items[2].resource_type.as_str()),
902            ("iam", "role")
903        );
904        assert_eq!(
905            (items[3].service.as_str(), items[3].resource_type.as_str()),
906            ("iam", "policy")
907        );
908    }
909
910    #[test]
911    fn sort_for_deletion_is_stable_within_tier() {
912        // Two leaves; sort must preserve input order when tiers are equal.
913        let mut items = vec![
914            TaggedResource::from_arn("arn:aws:s3:::first").unwrap(),
915            TaggedResource::from_arn("arn:aws:s3:::second").unwrap(),
916        ];
917        sort_for_deletion(&mut items);
918        assert_eq!(items[0].name, "first");
919        assert_eq!(items[1].name, "second");
920    }
921
922    #[test]
923    fn cost_hint_rds_is_nonzero_and_usage_dependent() {
924        let db = TaggedResource::from_arn("arn:aws:rds:us-east-1:1:db:prod").unwrap();
925        let h = estimate_daily_cost(&db).expect("rds has a hint");
926        assert!(h.usd_per_day > 0.0);
927        assert!(h.usage_dependent);
928    }
929
930    #[test]
931    fn cost_hint_iam_and_lambda_are_free() {
932        let role = TaggedResource::from_arn("arn:aws:iam::1:role/R").unwrap();
933        let func = TaggedResource::from_arn("arn:aws:lambda:us-east-1:1:function:f").unwrap();
934        assert!(estimate_daily_cost(&role).is_none());
935        assert!(estimate_daily_cost(&func).is_none());
936    }
937
938    #[test]
939    fn cost_hint_kms_is_flat_and_not_usage_dependent() {
940        let key = TaggedResource::from_arn("arn:aws:kms:us-east-1:1:key/uuid").unwrap();
941        let h = estimate_daily_cost(&key).expect("kms has a hint");
942        assert!(h.usd_per_day > 0.0);
943        assert!(!h.usage_dependent);
944    }
945
946    #[test]
947    fn cost_hint_s3_is_usage_dependent_floor_zero() {
948        let bucket = TaggedResource::from_arn("arn:aws:s3:::mybk").unwrap();
949        let h = estimate_daily_cost(&bucket).expect("s3 has a hint");
950        assert_eq!(h.usd_per_day, 0.0);
951        assert!(h.usage_dependent);
952    }
953
954    #[test]
955    fn list_pending_returns_only_states_with_failures() {
956        let tmp = tempfile::tempdir().unwrap();
957        let store = CleanupStateStore::with_dir(tmp.path()).unwrap();
958        // Session A: has failures → should appear.
959        let mut a = store.load_or_new("sess-a").unwrap();
960        let res = TaggedResource::from_arn("arn:aws:s3:::stuck").unwrap();
961        a.record_failed(&res, "throttled");
962        store.save(&a).unwrap();
963        // Session B: only deleted, no failures → should NOT appear.
964        let mut b = store.load_or_new("sess-b").unwrap();
965        b.record_deleted("arn:aws:s3:::ok");
966        store.save(&b).unwrap();
967        // Session C: pristine → load_or_new doesn't persist, skip save.
968
969        let pending = store.list_pending();
970        assert_eq!(pending.len(), 1);
971        assert_eq!(pending[0].session_id, "sess-a");
972        assert_eq!(pending[0].failed.len(), 1);
973    }
974
975    #[test]
976    fn list_pending_is_empty_when_no_state_files() {
977        let tmp = tempfile::tempdir().unwrap();
978        let store = CleanupStateStore::with_dir(tmp.path()).unwrap();
979        assert!(store.list_pending().is_empty());
980    }
981
982    #[test]
983    fn total_cost_sums_hints_and_flags_usage() {
984        let items = vec![
985            TaggedResource::from_arn("arn:aws:kms:us-east-1:1:key/a").unwrap(),
986            TaggedResource::from_arn("arn:aws:secretsmanager:us-east-1:1:secret:s-AbCd").unwrap(),
987            TaggedResource::from_arn("arn:aws:iam::1:role/R").unwrap(),
988            TaggedResource::from_arn("arn:aws:s3:::bk").unwrap(),
989        ];
990        let (total, any_usage) = estimate_daily_cost_total(&items);
991        // kms ($0.033) + secret ($0.013) + iam (none) + s3 (floor 0)
992        assert!((total - 0.046).abs() < 0.0005);
993        // s3 flips the usage-dependent flag.
994        assert!(any_usage);
995    }
996}