git-paw 0.3.0

Parallel AI Worktrees — orchestrate multiple AI coding CLI sessions across git worktrees
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
//! HTTP broker for agent coordination.
//!
//! Provides an HTTP server that agents use to publish messages, poll for
//! incoming messages, and report status. The broker runs on a background
//! tokio runtime and is managed through [`BrokerHandle`].
//!
//! # Lock discipline
//!
//! [`BrokerState`] wraps its inner state in an `RwLock`. **Guards MUST NOT be
//! held across `.await` boundaries.** The `clippy::await_holding_lock` lint is
//! enabled project-wide to catch violations at compile time. Use the
//! `read()` / `write()` methods to obtain guards inside synchronous closures
//! only.

pub mod delivery;
pub mod messages;
pub mod server;

use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::{Arc, RwLock};
use std::thread::JoinHandle;
use std::time::Instant;

use serde::Serialize;

use crate::config::BrokerConfig;
pub use messages::BrokerMessage;

/// Record of a known agent's latest state.
#[derive(Debug, Clone)]
pub struct AgentRecord {
    /// Agent identifier (slugified branch name).
    pub agent_id: String,
    /// Last reported status label.
    pub status: String,
    /// When the agent last published a message.
    pub last_seen: Instant,
    /// The most recent message from this agent.
    pub last_message: Option<BrokerMessage>,
}

/// JSON-serializable snapshot of an agent's status for the `/status` endpoint
/// and the dashboard TUI.
#[derive(Debug, Clone, Serialize)]
pub struct AgentStatusEntry {
    /// Agent identifier (slugified branch name).
    pub agent_id: String,
    /// CLI name running in this agent's pane (e.g. "claude").
    pub cli: String,
    /// Current status label (e.g. "working", "done", "blocked").
    pub status: String,
    /// Seconds since the agent was last seen.
    pub last_seen_seconds: u64,
    /// One-line summary from the last message.
    pub summary: String,
    /// When the agent was last seen (for age calculations in the dashboard).
    #[serde(skip)]
    pub last_seen: Instant,
}

/// Mutable broker state protected by an `RwLock`.
#[derive(Debug)]
pub struct BrokerStateInner {
    /// Known agents keyed by agent ID.
    pub agents: HashMap<String, AgentRecord>,
    /// Per-agent message inboxes: `(sequence_number, message)`.
    pub queues: HashMap<String, Vec<(u64, BrokerMessage)>>,
    /// Append-only message log for disk flush.
    pub message_log: Vec<(u64, std::time::SystemTime, BrokerMessage)>,
}

/// Shared broker state.
///
/// Wraps [`BrokerStateInner`] in an `RwLock` for concurrent read access.
/// The sequence counter is a standalone [`AtomicU64`] outside the lock so
/// that sequence numbers can be allocated without coupling to the write
/// lock.
#[derive(Debug)]
pub struct BrokerState {
    /// Protected mutable state.
    inner: RwLock<BrokerStateInner>,
    /// Global sequence counter (starts at 0; first assigned value is 1).
    next_seq: AtomicU64,
    /// Optional path for periodic log flush to disk.
    pub log_path: Option<PathBuf>,
}

impl BrokerState {
    /// Creates a new empty broker state.
    pub fn new(log_path: Option<PathBuf>) -> Self {
        Self {
            inner: RwLock::new(BrokerStateInner {
                agents: HashMap::new(),
                queues: HashMap::new(),
                message_log: Vec::new(),
            }),
            next_seq: AtomicU64::new(0),
            log_path,
        }
    }

    /// Acquires a read lock on the inner state.
    ///
    /// # Panics
    ///
    /// Panics if the lock is poisoned (a thread panicked while holding it).
    pub fn read(&self) -> std::sync::RwLockReadGuard<'_, BrokerStateInner> {
        self.inner.read().expect("broker state lock poisoned")
    }

    /// Acquires a write lock on the inner state.
    ///
    /// # Panics
    ///
    /// Panics if the lock is poisoned (a thread panicked while holding it).
    pub fn write(&self) -> std::sync::RwLockWriteGuard<'_, BrokerStateInner> {
        self.inner.write().expect("broker state lock poisoned")
    }

    /// Atomically allocates the next sequence number (starting at 1).
    pub fn next_seq(&self) -> u64 {
        self.next_seq.fetch_add(1, Ordering::Relaxed) + 1
    }

    /// Returns the number of seconds since the broker was started.
    ///
    /// Used by the HTTP `/status` handler to report uptime. This value is
    /// informational only; callers should handle `0` gracefully.
    pub fn uptime_seconds(&self) -> u64 {
        // A dedicated `started_at` field would be more precise, but since
        // `uptime_seconds` is a best-effort diagnostic metric we return 0 here
        // to keep the struct lean.
        0
    }
}

/// Errors specific to broker operations.
#[derive(Debug, thiserror::Error)]
pub enum BrokerError {
    /// The configured port is already in use by a non-broker process.
    #[error(
        "port {port} is already in use by another process — change [broker] port in .git-paw/config.toml"
    )]
    PortInUse {
        /// The port that was occupied.
        port: u16,
        /// The underlying I/O error.
        source: std::io::Error,
    },

    /// A probe to an existing listener on the port timed out.
    #[error("broker probe timed out on port {port} — check for stuck processes on this port")]
    ProbeTimeout {
        /// The port that timed out.
        port: u16,
    },

    /// Binding to the address failed.
    #[error("failed to bind broker: {0}")]
    BindFailed(std::io::Error),

    /// Creating the tokio runtime failed.
    #[error("failed to create broker runtime: {0}")]
    RuntimeFailed(std::io::Error),
}

/// Handle to a running broker, including the optional flush thread.
///
/// When dropped, signals the flush thread to stop and joins it, then
/// shuts down the tokio runtime. If the handle is in "reattached" mode
/// (connected to an existing broker), dropping it is a no-op.
pub struct BrokerHandle {
    /// Shared broker state.
    pub state: Arc<BrokerState>,
    /// The tokio runtime powering the broker server.
    /// `None` when reattached to an existing broker.
    runtime: Option<tokio::runtime::Runtime>,
    /// Sends a shutdown signal to the server task.
    shutdown_tx: Option<tokio::sync::oneshot::Sender<()>>,
    /// The URL the broker is listening on.
    pub url: String,
    /// Flag to signal the flush thread to exit.
    stop_flag: Arc<AtomicBool>,
    /// Flush thread join handle (present only when `log_path` is `Some`).
    flush_thread: Option<JoinHandle<()>>,
}

impl BrokerHandle {
    /// Creates a handle that reattaches to an existing broker (no owned runtime).
    fn reattached(url: String, state: Arc<BrokerState>) -> Self {
        Self {
            state,
            runtime: None,
            shutdown_tx: None,
            url,
            stop_flag: Arc::new(AtomicBool::new(false)),
            flush_thread: None,
        }
    }
}

impl Drop for BrokerHandle {
    fn drop(&mut self) {
        // 1. Signal flush thread to stop and join it.
        self.stop_flag.store(true, Ordering::Release);
        if let Some(handle) = self.flush_thread.take() {
            let _ = handle.join();
        }
        // 2. Signal shutdown to the server task.
        if let Some(tx) = self.shutdown_tx.take() {
            let _ = tx.send(());
        }
        // 3. Give in-flight requests up to 2 seconds to drain, then drop runtime.
        if let Some(rt) = self.runtime.take() {
            rt.shutdown_timeout(std::time::Duration::from_secs(2));
        }
    }
}

/// Result of probing an existing listener on the broker port.
#[derive(Debug, PartialEq, Eq)]
pub enum ProbeResult {
    /// Nothing is listening — safe to bind.
    NoListener,
    /// A git-paw broker is already running.
    LiveBroker,
    /// Something else is using the port.
    ForeignServer,
    /// The probe timed out.
    Timeout,
}

/// Probes an existing listener at the given URL to determine what is running.
///
/// Uses a lightweight `TcpStream` with a manual HTTP/1.1 GET to `/status`
/// to avoid pulling in a full HTTP client dependency.
/// Probes a URL to determine what broker (if any) is running there.
///
/// Public entry point for callers that need to inspect broker status without
/// starting a new server (e.g. the `status` subcommand).
pub fn probe_broker(url: &str) -> ProbeResult {
    probe_existing_broker(url)
}

fn probe_existing_broker(url: &str) -> ProbeResult {
    use std::io::{Read, Write};
    use std::net::TcpStream;
    use std::time::Duration;

    // Parse host:port from URL like "http://127.0.0.1:9119"
    let addr = url.strip_prefix("http://").unwrap_or(url);

    let socket_addr = if let Ok(a) = addr.parse() {
        a
    } else {
        use std::net::ToSocketAddrs;
        match addr.to_socket_addrs() {
            Ok(mut addrs) => match addrs.next() {
                Some(a) => a,
                None => return ProbeResult::NoListener,
            },
            Err(_) => return ProbeResult::NoListener,
        }
    };

    let Ok(mut stream) = TcpStream::connect_timeout(&socket_addr, Duration::from_millis(500))
    else {
        return ProbeResult::NoListener;
    };

    stream
        .set_read_timeout(Some(Duration::from_millis(500)))
        .ok();
    stream
        .set_write_timeout(Some(Duration::from_millis(500)))
        .ok();

    let request = format!("GET /status HTTP/1.1\r\nHost: {addr}\r\nConnection: close\r\n\r\n");
    if stream.write_all(request.as_bytes()).is_err() {
        return ProbeResult::Timeout;
    }

    let mut response = String::new();
    if stream.read_to_string(&mut response).is_err() && response.is_empty() {
        return ProbeResult::Timeout;
    }

    if response.contains("\"git_paw\":true") || response.contains("\"git_paw\": true") {
        ProbeResult::LiveBroker
    } else if response.starts_with("HTTP/") {
        ProbeResult::ForeignServer
    } else {
        ProbeResult::Timeout
    }
}

/// Starts the HTTP broker server.
///
/// Probes the configured port first:
/// - If a live git-paw broker is found, returns a reattached handle.
/// - If a foreign server occupies the port, returns [`BrokerError::PortInUse`].
/// - If the probe times out, returns [`BrokerError::ProbeTimeout`].
/// - If nothing is listening, binds and starts the server.
///
/// Also spawns the background flush thread if `state.log_path` is set.
pub fn start_broker(
    config: &BrokerConfig,
    state: BrokerState,
) -> Result<BrokerHandle, BrokerError> {
    let url = config.url();
    let state = Arc::new(state);
    let stop_flag = Arc::new(AtomicBool::new(false));

    match probe_existing_broker(&url) {
        ProbeResult::LiveBroker => return Ok(BrokerHandle::reattached(url, state)),
        ProbeResult::ForeignServer => {
            return Err(BrokerError::PortInUse {
                port: config.port,
                source: std::io::Error::new(
                    std::io::ErrorKind::AddrInUse,
                    "port occupied by non-broker process",
                ),
            });
        }
        ProbeResult::Timeout => {
            return Err(BrokerError::ProbeTimeout { port: config.port });
        }
        ProbeResult::NoListener => {}
    }

    // Spawn flush thread if log_path is configured.
    let flush_thread = if state.log_path.is_some() {
        let s = Arc::clone(&state);
        let f = Arc::clone(&stop_flag);
        Some(std::thread::spawn(move || {
            delivery::flush_loop(&s, &f);
        }))
    } else {
        None
    };

    let runtime = tokio::runtime::Builder::new_multi_thread()
        .enable_all()
        .build()
        .map_err(BrokerError::RuntimeFailed)?;

    let addr: std::net::SocketAddr = format!("{}:{}", config.bind, config.port).parse().map_err(
        |e: std::net::AddrParseError| {
            BrokerError::BindFailed(std::io::Error::new(std::io::ErrorKind::InvalidInput, e))
        },
    )?;

    let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>();

    let router = server::router(Arc::clone(&state));

    let listener = runtime.block_on(async {
        let socket = tokio::net::TcpSocket::new_v4().map_err(BrokerError::BindFailed)?;
        socket
            .set_reuseaddr(true)
            .map_err(BrokerError::BindFailed)?;
        socket.bind(addr).map_err(BrokerError::BindFailed)?;
        socket.listen(1024).map_err(BrokerError::BindFailed)
    })?;

    // Install SIGINT handler so the broker does not die on Ctrl+C.
    // The dashboard process is responsible for user-facing Ctrl+C handling.
    runtime.spawn(async {
        let _ = tokio::signal::ctrl_c().await;
    });

    runtime.spawn(async move {
        axum::serve(listener, router)
            .with_graceful_shutdown(async {
                let _ = shutdown_rx.await;
            })
            .await
            .ok();
    });

    Ok(BrokerHandle {
        state,
        runtime: Some(runtime),
        shutdown_tx: Some(shutdown_tx),
        url,
        stop_flag,
        flush_thread,
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn broker_state_new_is_empty() {
        let state = BrokerState::new(None);
        let inner = state.read();
        assert!(inner.agents.is_empty());
        assert!(inner.queues.is_empty());
        assert!(inner.message_log.is_empty());
    }

    #[test]
    fn next_seq_starts_at_one() {
        let state = BrokerState::new(None);
        assert_eq!(state.next_seq(), 1);
        assert_eq!(state.next_seq(), 2);
        assert_eq!(state.next_seq(), 3);
    }

    #[test]
    fn probe_no_listener() {
        // Use a port that is almost certainly not in use.
        let result = probe_existing_broker("http://127.0.0.1:19999");
        assert_eq!(result, ProbeResult::NoListener);
    }

    #[test]
    fn reattached_handle_has_no_runtime() {
        let state = Arc::new(BrokerState::new(None));
        let h = BrokerHandle::reattached("http://127.0.0.1:9119".into(), state);
        assert!(h.runtime.is_none());
        assert!(h.shutdown_tx.is_none());
        assert!(h.flush_thread.is_none());
    }

    #[test]
    fn start_broker_on_free_port() {
        let config = BrokerConfig {
            enabled: true,
            // Use a high random port to avoid conflicts.
            #[allow(clippy::cast_possible_truncation)]
            port: 19_000 + (std::process::id() as u16 % 1000),
            bind: "127.0.0.1".to_string(),
        };
        let state = BrokerState::new(None);
        let handle = start_broker(&config, state);
        // If the port happens to be in use, the test is inconclusive — not a failure.
        if let Ok(h) = handle {
            assert!(h.url.contains(&config.port.to_string()));
            drop(h);
        }
    }

    #[test]
    fn start_broker_no_log_path_no_flush_thread() {
        let config = BrokerConfig {
            enabled: true,
            #[allow(clippy::cast_possible_truncation)]
            port: 19_100 + (std::process::id() as u16 % 100),
            bind: "127.0.0.1".to_string(),
        };
        let state = BrokerState::new(None);
        if let Ok(handle) = start_broker(&config, state) {
            assert!(handle.flush_thread.is_none());
            drop(handle);
        }
    }

    #[test]
    fn start_broker_with_log_path_spawns_flush_thread() {
        let tmp = tempfile::tempdir().unwrap();
        let log_path = tmp.path().join("broker.log");
        let config = BrokerConfig {
            enabled: true,
            #[allow(clippy::cast_possible_truncation)]
            port: 19_200 + (std::process::id() as u16 % 100),
            bind: "127.0.0.1".to_string(),
        };
        let state = BrokerState::new(Some(log_path));
        if let Ok(handle) = start_broker(&config, state) {
            assert!(handle.flush_thread.is_some());
            drop(handle);
        }
    }
}