Skip to main content

nuwax_cli/docker_service/
health_check.rs

1use crate::docker_service::{DockerServiceError, DockerServiceResult};
2use bollard::Docker;
3use bollard::models::HealthStatusEnum;
4use bollard::query_parameters::{InspectContainerOptions, ListContainersOptions};
5use client_core::constants::timeout;
6use client_core::container::DockerManager;
7use rust_i18n::t;
8use serde::{Deserialize, Serialize};
9use std::str::FromStr;
10use std::time::Duration;
11use std::{collections::HashSet, sync::Arc};
12use tracing::{debug, error, info, warn};
13
14/// Docker容器重启策略
15#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
16pub enum RestartPolicy {
17    /// 不自动重启 (restart: no)
18    No,
19    /// 总是重启 (restart: always)
20    Always,
21    /// 除非手动停止否则重启 (restart: unless-stopped)
22    UnlessStopped,
23    /// 失败时重启 (restart: on-failure)
24    OnFailure,
25    /// 失败时重启,最大重试次数 (restart: on-failure:3)
26    OnFailureWithRetries(u32),
27}
28
29impl FromStr for RestartPolicy {
30    type Err = anyhow::Error;
31
32    fn from_str(s: &str) -> Result<Self, Self::Err> {
33        RestartPolicy::parse(s).ok_or_else(|| anyhow::anyhow!("Invalid restart policy: {}", s))
34    }
35}
36
37impl std::fmt::Display for RestartPolicy {
38    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39        let s = match self {
40            Self::No => "no",
41            Self::Always => "always",
42            Self::UnlessStopped => "unless-stopped",
43            Self::OnFailure => "on-failure",
44            Self::OnFailureWithRetries(retries) => return write!(f, "on-failure:{retries}"),
45        };
46        write!(f, "{}", s)
47    }
48}
49
50impl RestartPolicy {
51    /// 从字符串解析重启策略
52    pub fn parse(s: &str) -> Option<Self> {
53        match s.to_lowercase().as_str() {
54            "no" | "false" => Some(Self::No),
55            "always" => Some(Self::Always),
56            "unless-stopped" => Some(Self::UnlessStopped),
57            "on-failure" => Some(Self::OnFailure),
58            s if s.starts_with("on-failure:") => {
59                if let Ok(retries) = s[11..].parse::<u32>() {
60                    Some(Self::OnFailureWithRetries(retries))
61                } else {
62                    Some(Self::OnFailure)
63                }
64            }
65            _ => None,
66        }
67    }
68
69    /// 转换为字符串
70    pub fn as_str(&self) -> String {
71        match self {
72            Self::No => "no".to_string(),
73            Self::Always => "always".to_string(),
74            Self::UnlessStopped => "unless-stopped".to_string(),
75            Self::OnFailure => "on-failure".to_string(),
76            Self::OnFailureWithRetries(retries) => format!("on-failure:{retries}"),
77        }
78    }
79
80    /// 判断是否为一次性任务
81    pub fn is_oneshot(&self) -> bool {
82        matches!(self, Self::No)
83    }
84
85    /// 判断是否应该持续运行
86    pub fn should_keep_running(&self) -> bool {
87        matches!(
88            self,
89            Self::Always | Self::UnlessStopped | Self::OnFailure | Self::OnFailureWithRetries(_)
90        )
91    }
92
93    /// 获取显示名称
94    pub fn display_name(&self) -> String {
95        match self {
96            Self::No => t!("restart_policy.no"),
97            Self::Always => t!("restart_policy.always"),
98            Self::UnlessStopped => t!("restart_policy.unless_stopped"),
99            Self::OnFailure => t!("restart_policy.on_failure"),
100            Self::OnFailureWithRetries(_) => t!("restart_policy.on_failure_n"),
101        }
102        .to_string()
103    }
104}
105
106/// Docker Compose 容器标签信息
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub struct ComposeLabels {
109    /// 项目名称
110    pub project: Option<String>,
111    /// 服务名称
112    pub service: Option<String>,
113    /// 容器编号
114    pub container_number: Option<String>,
115    /// 是否为一次性任务
116    pub oneoff: Option<bool>,
117    /// 配置文件路径
118    pub config_files: Option<String>,
119    /// 工作目录
120    pub working_dir: Option<String>,
121}
122
123/// 容器状态
124#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
125pub enum ContainerStatus {
126    /// 运行中
127    Running,
128    /// 已停止
129    Stopped,
130    /// 正在启动
131    Starting,
132    /// 已完成 (一次性任务成功退出)
133    Completed,
134    /// 未知状态
135    Unknown,
136}
137
138impl ContainerStatus {
139    /// 从ducker的容器状态和退出码解析状态
140    pub fn from_ducker_status(running: bool, status: &str, is_oneshot: bool) -> Self {
141        if running {
142            ContainerStatus::Running
143        } else if status.to_lowercase().contains("exited") {
144            if is_oneshot {
145                // 一次性任务:检查退出码
146                if status.contains("(0)") {
147                    ContainerStatus::Completed // 成功完成
148                } else {
149                    ContainerStatus::Stopped // 失败退出
150                }
151            } else {
152                ContainerStatus::Stopped // 持续服务退出都视为异常
153            }
154        } else if status.to_lowercase().contains("restarting")
155            || status.to_lowercase().contains("created")
156        {
157            ContainerStatus::Starting
158        } else {
159            ContainerStatus::Unknown
160        }
161    }
162
163    /// 获取状态的显示名称
164    pub fn display_name(&self) -> String {
165        match self {
166            ContainerStatus::Running => t!("container_status.running").to_string(),
167            ContainerStatus::Stopped => t!("container_status.stopped").to_string(),
168            ContainerStatus::Starting => t!("container_status.starting").to_string(),
169            ContainerStatus::Completed => t!("container_status.completed").to_string(),
170            ContainerStatus::Unknown => t!("container_status.unknown").to_string(),
171        }
172    }
173    /// 判断是否运行中
174    pub fn is_running(&self) -> bool {
175        matches!(self, ContainerStatus::Running)
176    }
177
178    /// 判断状态是否健康(运行中或已完成都算健康)
179    pub fn is_healthy(&self) -> bool {
180        matches!(self, ContainerStatus::Running | ContainerStatus::Completed)
181    }
182
183    /// 判断状态是否为过渡状态(需要继续等待)
184    pub fn is_transitioning(&self) -> bool {
185        matches!(self, ContainerStatus::Starting)
186    }
187
188    /// 判断状态是否为失败状态
189    pub fn is_failed(&self) -> bool {
190        matches!(self, ContainerStatus::Stopped | ContainerStatus::Unknown)
191    }
192}
193
194/// 容器信息
195#[derive(Debug, Clone, Serialize, Deserialize)]
196pub struct ContainerInfo {
197    /// 容器名称
198    pub name: String,
199    /// 容器状态
200    pub status: ContainerStatus,
201    /// 镜像名称
202    pub image: String,
203    /// 端口映射
204    pub ports: Vec<String>,
205    /// 启动时间
206    pub uptime: Option<String>,
207    /// 健康检查状态
208    pub health: Option<HealthStatusEnum>,
209    /// 是否为一次性任务
210    pub is_oneshot: bool,
211    /// 重启策略
212    pub restart: Option<RestartPolicy>,
213}
214
215impl ContainerInfo {
216    /// 判断是否为一次性任务
217    /// 仅基于restart策略进行判断,不使用名称匹配
218    pub fn is_oneshot(&self) -> bool {
219        match &self.restart {
220            Some(policy) => policy.is_oneshot(),
221            None => {
222                // 如果没有restart信息,默认不是一次性任务
223                // 这样更安全,避免误判持续服务为一次性任务
224                false
225            }
226        }
227    }
228
229    /// 判断是否为持续服务(需要一直运行)
230    /// 仅基于restart策略进行判断,不使用名称匹配
231    pub fn is_persistent_service(&self) -> bool {
232        match &self.restart {
233            Some(policy) => policy.should_keep_running(),
234            None => {
235                // 如果没有restart信息,默认认为是持续服务
236                // 这样更安全,避免误判持续服务导致备份时出现问题
237                true
238            }
239        }
240    }
241
242    /// 获取restart策略的显示字符串
243    pub fn get_restart_display(&self) -> String {
244        match &self.restart {
245            Some(policy) => policy.as_str(),
246            None => t!("restart_policy.unknown").to_string(),
247        }
248    }
249}
250
251/// 服务整体状态
252#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
253pub enum ServiceStatus {
254    /// 所有服务都在运行
255    AllRunning,
256    /// 部分服务在运行
257    PartiallyRunning,
258    /// 所有服务都已停止
259    AllStopped,
260    /// 服务正在启动中
261    Starting,
262    /// 服务状态未知
263    Unknown,
264    /// 没有发现容器
265    NoContainer,
266}
267
268impl ServiceStatus {
269    /// 获取状态的显示名称
270    pub fn display_name(&self) -> String {
271        match self {
272            ServiceStatus::AllRunning => t!("service_status.all_running").to_string(),
273            ServiceStatus::PartiallyRunning => t!("service_status.partially_running").to_string(),
274            ServiceStatus::AllStopped => t!("service_status.all_stopped").to_string(),
275            ServiceStatus::Starting => t!("service_status.starting").to_string(),
276            ServiceStatus::Unknown => t!("service_status.unknown").to_string(),
277            ServiceStatus::NoContainer => t!("service_status.no_container").to_string(),
278        }
279    }
280
281    /// 判断状态是否健康
282    pub fn is_healthy(&self) -> bool {
283        matches!(self, ServiceStatus::AllRunning)
284    }
285}
286
287/// 健康检查报告
288#[derive(Debug, Clone, Serialize, Deserialize)]
289pub struct HealthReport {
290    /// 容器详细信息
291    pub containers: Vec<ContainerInfo>,
292    /// 运行中的容器数量
293    running_count: usize,
294    /// 已完成的容器数量 (一次性任务)
295    one_shot_count: usize,
296    /// 总容器数量
297    total_count: usize,
298    /// 检查时间
299    pub check_time: chrono::DateTime<chrono::Utc>,
300    /// 错误信息
301    pub errors: Vec<String>,
302}
303
304impl HealthReport {
305    /// 添加容器信息
306    pub fn add_container(&mut self, container: ContainerInfo) {
307        self.containers.push(container);
308    }
309
310    /// 添加错误信息
311    pub fn add_error(&mut self, error: String) {
312        self.errors.push(error);
313    }
314
315    /// 完成报告并计算整体状态
316    pub fn finalize(&self) -> ServiceStatus {
317        let healthy_count = self.get_healthy_count();
318        let total_count = self.get_total_count();
319        let one_shot_count = self.get_one_shot_count();
320        let running_count = self.get_running_count();
321
322        if total_count == 0 {
323            ServiceStatus::NoContainer
324        } else if (healthy_count + one_shot_count) == total_count {
325            ServiceStatus::AllRunning
326        } else if running_count == 0 {
327            ServiceStatus::AllStopped
328        } else {
329            // 检查是否有正在启动的容器
330            let has_starting = self.containers.iter().any(|c| c.status.is_transitioning());
331            if has_starting {
332                ServiceStatus::Starting
333            } else {
334                ServiceStatus::PartiallyRunning
335            }
336        }
337    }
338
339    /// 获取运行中的容器列表
340    pub fn get_running_containers(&self) -> Vec<&ContainerInfo> {
341        self.containers
342            .iter()
343            .filter(|c| matches!(c.status, ContainerStatus::Running))
344            .collect()
345    }
346
347    /// 获取已完成的容器列表
348    pub fn get_completed_containers(&self) -> Vec<&ContainerInfo> {
349        self.containers
350            .iter()
351            .filter(|c| matches!(c.status, ContainerStatus::Completed))
352            .collect()
353    }
354
355    /// 获取失败的容器列表
356    pub fn get_failed_containers(&self) -> Vec<&ContainerInfo> {
357        self.containers
358            .iter()
359            .filter(|c| c.status.is_failed())
360            .collect()
361    }
362
363    /// 获取运行中的容器数量 ,不保证一次性的初始化容器
364    pub fn get_running_count(&self) -> usize {
365        self.containers
366            .iter()
367            .filter(|c| c.status.is_running())
368            .count()
369    }
370
371    /// 获取总容器数
372    pub fn get_total_count(&self) -> usize {
373        self.containers.len()
374    }
375
376    /// 获取正在启动的容器列表
377    pub fn get_starting_containers(&self) -> Vec<&ContainerInfo> {
378        self.containers
379            .iter()
380            .filter(|c| c.status.is_transitioning())
381            .collect()
382    }
383
384    /// 获取一次性容器数量
385    pub fn get_one_shot_count(&self) -> usize {
386        self.containers.iter().filter(|c| c.is_oneshot()).count()
387    }
388
389    /// 获取健康容器总数
390    pub fn get_healthy_count(&self) -> usize {
391        self.containers
392            .iter()
393            .filter_map(|c| c.health)
394            .filter(|&c| c == HealthStatusEnum::HEALTHY)
395            .count()
396    }
397
398    /// 获取失败容器名称列表
399    pub fn get_failed_container_names(&self) -> Vec<String> {
400        self.get_failed_containers()
401            .iter()
402            .map(|c| c.name.clone())
403            .collect()
404    }
405
406    /// 获取状态摘要字符串
407    pub fn get_status_summary(&self) -> String {
408        let failed_containers = self.get_failed_container_names();
409        let starting_containers: Vec<String> = self
410            .get_starting_containers()
411            .iter()
412            .map(|c| c.name.clone())
413            .collect();
414
415        let mut summary = format!(
416            "📊 [Healthy: {}/{}] ✅ Running: {} | ✔️ One-shot (init): {} | ❌ Failed: {} | ⏳ Starting: {}",
417            self.get_healthy_count(),
418            self.get_total_count(),
419            self.get_running_count(),
420            self.get_one_shot_count(),
421            failed_containers.len(),
422            starting_containers.len()
423        );
424
425        if !failed_containers.is_empty() {
426            summary.push_str(&format!(
427                " | Failed containers: {}",
428                failed_containers.join(", ")
429            ));
430        }
431
432        if !starting_containers.is_empty() {
433            summary.push_str(&format!(" | Starting: {}", starting_containers.join(", ")));
434        }
435
436        summary
437    }
438
439    /// 检查是否所有服务都健康
440    pub fn is_all_healthy(&self) -> bool {
441        let healthy_count = self.get_healthy_count();
442        let one_shot_count = self.get_one_shot_count();
443        let total_count = self.get_total_count();
444        healthy_count > 0 && healthy_count == total_count - one_shot_count
445    }
446
447    /// 获取所有健康容器(运行中 + 已完成)
448    pub fn healthy_containers(&self) -> Vec<&ContainerInfo> {
449        self.containers
450            .iter()
451            .filter(|c| c.status.is_healthy())
452            .collect()
453    }
454
455    /// 获取总容器数
456    pub fn total_containers(&self) -> usize {
457        self.containers.len()
458    }
459
460    /// 获取失败容器列表(别名,兼容性)
461    pub fn failed_containers(&self) -> Vec<&ContainerInfo> {
462        self.get_failed_containers()
463    }
464}
465
466impl Default for HealthReport {
467    fn default() -> Self {
468        Self {
469            containers: Vec::new(),
470            running_count: 0,
471            one_shot_count: 0,
472            total_count: 0,
473            check_time: chrono::Utc::now(),
474            errors: Vec::new(),
475        }
476    }
477}
478
479/// 健康检查器
480pub struct HealthChecker {
481    docker_manager: Arc<DockerManager>,
482}
483
484impl HealthChecker {
485    /// 创建新的健康检查器
486    pub fn new(docker_manager: Arc<DockerManager>) -> Self {
487        Self { docker_manager }
488    }
489
490    /// 获取服务的restart策略
491    async fn get_restart_policy(&self, service_name: &str) -> Option<RestartPolicy> {
492        if let Ok(service_config) = self.docker_manager.parse_service_config(service_name).await
493            && let Some(restart_str) = service_config.restart
494        {
495            return RestartPolicy::parse(&restart_str);
496        }
497        None
498    }
499
500    /// 执行健康检查
501    /// 使用基于Docker Compose标签的精确匹配
502    pub async fn health_check(&self) -> DockerServiceResult<HealthReport> {
503        info!("🏥 Starting health check...");
504
505        // 获取 docker-compose 项目信息
506        let compose_project_name = self.docker_manager.get_compose_project_name();
507        let compose_file_path = self
508            .docker_manager
509            .get_compose_file()
510            .to_string_lossy()
511            .to_string();
512
513        info!("📋 Docker Compose Project Info:");
514        info!("   - Project name: {name}", name = compose_project_name);
515        info!("   - Config file: {path}", path = compose_file_path);
516
517        // 创建健康检查报告
518        let mut report = HealthReport::default();
519
520        // 获取compose文件中定义的所有服务
521        let compose_services = self
522            .docker_manager
523            .get_compose_service_names()
524            .await
525            .unwrap_or_else(|e| {
526                error!(
527                    "Failed to get compose services: {error}",
528                    error = e.to_string()
529                );
530                HashSet::new()
531            });
532
533        if compose_services.is_empty() {
534            warn!("⚠️ No services defined in compose file");
535            return Ok(report);
536        }
537
538        info!(
539            "🔍 Services defined in compose file: {services}",
540            services = format!("{:?}", compose_services)
541        );
542
543        // 获取系统中所有容器
544        let all_containers = self
545            .docker_manager
546            .get_all_containers_status()
547            .await
548            .unwrap_or_else(|e| {
549                error!(
550                    "Failed to get container status: {error}",
551                    error = e.to_string()
552                );
553                Vec::new()
554            });
555
556        info!(
557            "📊 Found {count} containers in system",
558            count = all_containers.len()
559        );
560
561        // 🔧 使用标签精确匹配容器
562        let mut found_services = HashSet::new();
563        let mut added_containers = HashSet::new();
564
565        // 第一轮:处理正在运行的和已停止的容器
566        for service in &all_containers {
567            // 🆕 使用标签精确匹配
568            if let Some(service_name) = self.get_container_service_name(&service.name).await {
569                // 验证是否属于当前项目
570                if self
571                    .is_container_from_compose_project(
572                        &service.name,
573                        &compose_project_name,
574                        &compose_file_path,
575                    )
576                    .await
577                {
578                    // 检查是否在compose文件中定义
579                    if compose_services.contains(&service_name) {
580                        info!(
581                            "✅ Matched compose service: {container} -> {service}",
582                            container = service.name,
583                            service = service_name
584                        );
585
586                        // 🔧 防重复:检查是否已经添加过这个compose服务
587                        if added_containers.contains(&service_name) {
588                            warn!(
589                                "⚠️ Skipping duplicate compose service: {service} (container: {container})",
590                                service = service_name,
591                                container = service.name
592                            );
593                            continue;
594                        }
595
596                        found_services.insert(service_name.clone());
597                        added_containers.insert(service_name.clone());
598
599                        // 检查是否为一次性服务
600                        let is_oneshot = self.is_oneshot_service(&service_name).await;
601
602                        // 获取restart策略
603                        let restart_policy = self.get_restart_policy(&service_name).await;
604
605                        // 使用增强的状态解析逻辑
606                        let status = self.determine_container_status(service, is_oneshot);
607
608                        // 获取容器的健康检查状态
609                        let health = self.get_container_health_status(&service.name).await;
610
611                        let container = ContainerInfo {
612                            name: service_name.clone(), // 使用compose中定义的服务名
613                            status,
614                            image: service.image.clone(),
615                            ports: service.ports.clone(),
616                            uptime: None,
617                            health,
618                            is_oneshot,
619                            restart: restart_policy,
620                        };
621
622                        debug!(
623                            "📦 Added container: {} (status: {:?}, oneshot: {})",
624                            container.name, container.status, is_oneshot
625                        );
626                        report.add_container(container);
627                    } else {
628                        // 不在compose文件中定义的容器(可能是历史遗留)
629                        warn!(
630                            "⏭️ Skipping non-project container: {container} (service: {service})",
631                            container = service.name,
632                            service = service_name
633                        );
634                    }
635                } else {
636                    // 不属于当前项目的容器
637                    debug!(
638                        "⏭️ Skipping other project container: {container} (project: other)",
639                        container = service.name
640                    );
641                }
642            } else {
643                // 无法获取服务名称,可能不是compose容器
644                debug!(
645                    "⏭️ Skipping non-compose container: {container} (no label info)",
646                    container = service.name
647                );
648            }
649        }
650
651        info!(
652            "📊 Round 1 complete: added {count} containers",
653            count = added_containers.len()
654        );
655
656        // 为未找到的compose服务创建"已停止"状态的条目
657        for service_name in &compose_services {
658            if !found_services.contains(service_name) {
659                // 🔧 防重复:再次检查是否已经添加过
660                if added_containers.contains(service_name) {
661                    warn!(
662                        "⚠️ Skipping duplicate stopped service: {service}",
663                        service = service_name
664                    );
665                    continue;
666                }
667
668                let is_oneshot = self.is_oneshot_service(service_name).await;
669
670                // 获取restart策略
671                let restart_policy = self.get_restart_policy(service_name).await;
672
673                let status = if is_oneshot {
674                    // 一次性服务未运行通常表示已完成
675                    ContainerStatus::Completed
676                } else {
677                    // 持续服务未运行表示已停止
678                    ContainerStatus::Stopped
679                };
680
681                let container = ContainerInfo {
682                    name: service_name.clone(),
683                    status,
684                    image: t!("health_check.not_started_label").to_string(),
685                    ports: Vec::new(),
686                    uptime: None,
687                    health: None,
688                    is_oneshot,
689                    restart: restart_policy,
690                };
691
692                info!(
693                    "📦 Adding stopped service: {name} (status: {status}, oneshot: {oneshot})",
694                    name = container.name,
695                    status = format!("{:?}", container.status),
696                    oneshot = is_oneshot
697                );
698                report.add_container(container);
699                added_containers.insert(service_name.clone());
700            }
701        }
702
703        info!(
704            "📊 Final stats: compose services={compose}, added containers={containers}",
705            compose = compose_services.len(),
706            containers = added_containers.len()
707        );
708
709        // 生成健康检查摘要
710        let summary = format!(
711            "{}: {}/{}",
712            t!("health_check.complete"),
713            report.get_healthy_count(),
714            report.get_total_count()
715        );
716
717        info!("🎯 {summary}", summary = summary);
718
719        Ok(report)
720    }
721
722    /// 智能判断容器状态
723    fn determine_container_status(
724        &self,
725        service: &client_core::container::ServiceInfo,
726        is_oneshot: bool,
727    ) -> ContainerStatus {
728        match service.status {
729            client_core::container::ServiceStatus::Running => ContainerStatus::Running,
730            client_core::container::ServiceStatus::Stopped => {
731                if is_oneshot {
732                    // 一次性任务停止通常表示已完成
733                    ContainerStatus::Completed
734                } else {
735                    ContainerStatus::Stopped
736                }
737            }
738            client_core::container::ServiceStatus::Unknown => ContainerStatus::Unknown,
739            client_core::container::ServiceStatus::Created => ContainerStatus::Unknown,
740            client_core::container::ServiceStatus::Restarting => ContainerStatus::Starting,
741        }
742    }
743
744    /// 检查服务是否为一次性任务 - 增强版
745    async fn is_oneshot_service(&self, service_name: &str) -> bool {
746        // 1. 尝试从docker-compose.yml文件解析restart策略
747        if let Ok(service_config) = self.docker_manager.parse_service_config(service_name).await
748            && let Some(restart_policy) = service_config.restart
749        {
750            // restart: "no" 表示不自动重启,通常是一次性任务
751            if restart_policy == "no" || restart_policy == "false" {
752                info!(
753                    "Service {service} restart policy: {policy} (oneshot)",
754                    service = service_name,
755                    policy = restart_policy
756                );
757                return true;
758            }
759            // restart: "always" 或 "unless-stopped" 表示应该一直运行
760            if restart_policy == "always"
761                || restart_policy == "unless-stopped"
762                || restart_policy == "on-failure"
763            {
764                info!(
765                    "Service {service} restart policy: {policy} (persistent)",
766                    service = service_name,
767                    policy = restart_policy
768                );
769                return false;
770            }
771        }
772
773        false
774    }
775
776    /// 获取容器的Docker Compose标签信息
777    /// 使用bollard库直接从Docker API获取容器标签信息
778    async fn get_container_labels(&self, container_name: &str) -> Option<ComposeLabels> {
779        match Docker::connect_with_socket_defaults() {
780            Ok(docker) => {
781                // 获取容器列表,查找指定容器
782                match docker
783                    .list_containers(Some(ListContainersOptions {
784                        all: true,
785                        ..Default::default()
786                    }))
787                    .await
788                {
789                    Ok(containers) => {
790                        for container in containers {
791                            // 检查容器名称是否匹配
792                            if let Some(names) = &container.names {
793                                let container_matches = names.iter().any(|name| {
794                                    // Docker容器名称通常以/开头,需要去掉
795                                    let clean_name = name.strip_prefix('/').unwrap_or(name);
796                                    clean_name == container_name
797                                });
798
799                                if container_matches {
800                                    if let Some(labels) = &container.labels {
801                                        return Some(ComposeLabels {
802                                            project: labels
803                                                .get("com.docker.compose.project")
804                                                .cloned(),
805                                            service: labels
806                                                .get("com.docker.compose.service")
807                                                .cloned(),
808                                            container_number: labels
809                                                .get("com.docker.compose.container-number")
810                                                .cloned(),
811                                            oneoff: labels
812                                                .get("com.docker.compose.oneoff")
813                                                .and_then(|v| v.parse::<bool>().ok())
814                                                .or_else(|| {
815                                                    labels
816                                                        .get("com.docker.compose.oneoff")
817                                                        .map(|v| v.to_lowercase() == "true")
818                                                }),
819                                            config_files: labels
820                                                .get("com.docker.compose.project.config_files")
821                                                .cloned(),
822                                            working_dir: labels
823                                                .get("com.docker.compose.project.working_dir")
824                                                .cloned(),
825                                        });
826                                    }
827                                    return None; // 找到容器但没有标签
828                                }
829                            }
830                        }
831                        None // 没有找到匹配的容器
832                    }
833                    Err(e) => {
834                        warn!(
835                            "Bollard failed to get container list: {error}",
836                            error = e.to_string()
837                        );
838                        None
839                    }
840                }
841            }
842            Err(e) => {
843                warn!(
844                    "Bollard failed to connect to Docker: {error}",
845                    error = e.to_string()
846                );
847                None
848            }
849        }
850    }
851
852    /// 验证容器是否属于指定的docker-compose项目
853    /// 基于标签精确匹配,避免名称匹配的不准确性
854    async fn is_container_from_compose_project(
855        &self,
856        container_name: &str,
857        project_name: &str,
858        compose_file_path: &str,
859    ) -> bool {
860        if let Some(labels) = self.get_container_labels(container_name).await {
861            // 1. 检查项目名称是否匹配
862            if let Some(label_project) = &labels.project {
863                if label_project != project_name {
864                    info!(
865                        "⚠️ Container {container} project mismatch: {label} vs {expected}",
866                        container = container_name,
867                        label = label_project,
868                        expected = project_name
869                    );
870                    return false;
871                }
872            } else {
873                info!(
874                    "⚠️ Container {container} missing project label",
875                    container = container_name
876                );
877                return false;
878            }
879
880            // 2. 检查配置文件路径是否匹配(处理相对路径vs绝对路径问题)
881            if let Some(label_config_files) = &labels.config_files {
882                // 将我们的配置文件路径转换为绝对路径
883                let compose_file_absolute =
884                    match std::path::Path::new(compose_file_path).canonicalize() {
885                        Ok(abs_path) => abs_path.to_string_lossy().to_string(),
886                        Err(_) => {
887                            // 如果无法获取绝对路径,尝试基于当前目录构建
888                            let current_dir = std::env::current_dir().unwrap_or_default();
889                            let full_path = current_dir.join(compose_file_path);
890                            full_path.to_string_lossy().to_string()
891                        }
892                    };
893
894                debug!(
895                    "🔍 Path comparison: container label path={}, local absolute path={}",
896                    label_config_files, compose_file_absolute
897                );
898
899                #[cfg(windows)]
900                fn normalize_win_path(path: &str) -> &str {
901                    if path.starts_with(r"\\?\") {
902                        &path[4..]
903                    } else {
904                        path
905                    }
906                }
907
908                #[cfg(windows)]
909                let matched = normalize_win_path(label_config_files)
910                    .eq_ignore_ascii_case(normalize_win_path(&compose_file_absolute));
911                #[cfg(not(windows))]
912                let matched = label_config_files == &compose_file_absolute;
913
914                if matched {
915                    debug!(
916                        "✅ Container {container} config path matched",
917                        container = container_name
918                    );
919                    return true;
920                } else {
921                    debug!(
922                        "❌ Container {container} config path mismatch: {label} vs {expected}",
923                        container = container_name,
924                        label = label_config_files,
925                        expected = compose_file_absolute
926                    );
927                    return false;
928                }
929            }
930
931            // 3. 如果没有配置文件路径信息,但项目名称匹配,则认为匹配
932            info!(
933                "⚠️ Container {container} missing config path, but project name matched",
934                container = container_name
935            );
936            true
937        } else {
938            // 如果无法获取标签,说明不是compose容器
939            info!(
940                "⚠️ Container {container} cannot get compose labels",
941                container = container_name
942            );
943            false
944        }
945    }
946
947    /// 根据标签获取容器的服务名称
948    async fn get_container_service_name(&self, container_name: &str) -> Option<String> {
949        self.get_container_labels(container_name)
950            .await
951            .and_then(|labels| labels.service)
952    }
953
954    /// 获取Docker容器的健康检查状态
955    async fn get_container_health_status(&self, container_name: &str) -> Option<HealthStatusEnum> {
956        match Docker::connect_with_socket_defaults() {
957            Ok(docker) => {
958                match docker
959                    .inspect_container(container_name, None::<InspectContainerOptions>)
960                    .await
961                {
962                    Ok(container_info) => container_info
963                        .state
964                        .and_then(|state| state.health.and_then(|health| health.status)),
965                    Err(e) => {
966                        warn!(
967                            "Cannot get health status for container {container}: {error}",
968                            container = container_name,
969                            error = e.to_string()
970                        );
971                        None
972                    }
973                }
974            }
975            Err(e) => {
976                warn!(
977                    "Cannot connect to Docker for health check: {error}",
978                    error = e.to_string()
979                );
980                None
981            }
982        }
983    }
984
985    /// 等待服务启动完成 - 智能等待策略
986    pub async fn wait_for_services_ready(
987        &self,
988        check_interval: Duration,
989    ) -> DockerServiceResult<HealthReport> {
990        use std::time::Instant;
991
992        // 最长检查180秒
993        let timeout = Duration::from_secs(timeout::HEALTH_CHECK_TIMEOUT);
994
995        let start_time = Instant::now();
996
997        info!(
998            "⏳ Starting service startup check, timeout: {timeout}s",
999            timeout = timeout.as_secs()
1000        );
1001
1002        loop {
1003            let elapsed = start_time.elapsed();
1004            if elapsed >= timeout {
1005                error!(
1006                    "⏰ Health check timeout! elapsed: {elapsed}s",
1007                    elapsed = elapsed.as_secs()
1008                );
1009                return Err(DockerServiceError::Timeout {
1010                    operation: t!("health_check.wait_operation").to_string(),
1011                    timeout_seconds: timeout.as_secs(),
1012                });
1013            }
1014
1015            // 执行健康检查
1016            let report = self.health_check().await?;
1017
1018            // 检查是否所有服务都已就绪
1019            if report.is_all_healthy() {
1020                info!(
1021                    "🎉 All services started! elapsed: {elapsed}s",
1022                    elapsed = elapsed.as_secs()
1023                );
1024                return Ok(report);
1025            } else {
1026                info!(
1027                    "⏳ Services starting... elapsed: {elapsed}s",
1028                    elapsed = elapsed.as_secs()
1029                );
1030                //打印尚未启动成功容器
1031                let failed_containers = report.failed_containers();
1032                if !failed_containers.is_empty() {
1033                    let failed_names: Vec<&str> =
1034                        failed_containers.iter().map(|c| c.name.as_str()).collect();
1035                    info!(
1036                        "⚠️ Not started containers: {names}",
1037                        names = format!("{:?}", failed_names)
1038                    );
1039                }
1040            }
1041
1042            tokio::time::sleep(check_interval).await;
1043        }
1044    }
1045
1046    /// 获取服务状态摘要
1047    pub async fn get_status_summary(&self) -> DockerServiceResult<String> {
1048        let report = self.health_check().await?;
1049
1050        let mut summary = format!(
1051            "{}: {} ({}/{})",
1052            t!("health_check.service_status"),
1053            t!("health_check.healthy"),
1054            report.healthy_containers().len(),
1055            report.total_containers()
1056        );
1057
1058        if !report.errors.is_empty() {
1059            summary.push_str(&format!(
1060                "\n{}: {}",
1061                t!("health_check.errors"),
1062                report.errors.join(", ")
1063            ));
1064        }
1065
1066        let failed_containers = report.failed_containers();
1067        if !failed_containers.is_empty() {
1068            let failed_names: Vec<&str> =
1069                failed_containers.iter().map(|c| c.name.as_str()).collect();
1070            summary.push_str(&format!(
1071                "\n{}: {:?}",
1072                t!("health_check.failed_containers"),
1073                failed_names
1074            ));
1075        }
1076
1077        Ok(summary)
1078    }
1079}
1080
1081#[cfg(test)]
1082mod tests {
1083    use super::*;
1084
1085    #[test]
1086    fn test_health_report() {
1087        let mut report = HealthReport::default();
1088
1089        report.add_container(ContainerInfo {
1090            name: "service1".to_string(),
1091            status: ContainerStatus::Running,
1092            image: "test:latest".to_string(),
1093            ports: vec!["8080:8080".to_string()],
1094            uptime: None,
1095            health: None,
1096            is_oneshot: false,
1097            restart: Some(RestartPolicy::UnlessStopped),
1098        });
1099
1100        report.add_container(ContainerInfo {
1101            name: "service2".to_string(),
1102            status: ContainerStatus::Starting,
1103            image: "test2:latest".to_string(),
1104            ports: vec![],
1105            uptime: None,
1106            health: None,
1107            is_oneshot: false,
1108            restart: Some(RestartPolicy::Always),
1109        });
1110
1111        assert_eq!(report.finalize(), ServiceStatus::Starting);
1112        assert_eq!(report.get_running_count(), 1);
1113        assert_eq!(report.get_total_count(), 2);
1114    }
1115}