experiment:
name: pure-pull
description: >
All agents use list_tasks(ready=true) + claim() to self-select work.
No coordinator, no push assignments, no specialization.
Measures contention, distribution, and throughput under pure pull coordination.
version: "1.0"
created: "2026-01-29"
workflow: swarm
topology:
phase_model: field coordination: pull granularity: fine default_deps: loose agents: generalists
runs:
- name: pull-1
description: "Baseline: single agent (no contention possible)"
agents: 1
agent_config:
workflow: swarm
tags: [worker, implementer, code]
max_claims: 1
- name: pull-2
description: "Two agents (minimal contention expected)"
agents: 2
agent_config:
workflow: swarm
tags: [worker, implementer, code]
max_claims: 1
- name: pull-4
description: "Four agents (moderate parallelism)"
agents: 4
agent_config:
workflow: swarm
tags: [worker, implementer, code]
max_claims: 1
- name: pull-8
description: "Eight agents (high parallelism, potential contention)"
agents: 8
agent_config:
workflow: swarm
tags: [worker, implementer, code]
max_claims: 1
template: experiments/templates/browser-parallel.json
protocol:
name: pure-pull-loop
description: >
Each agent runs an autonomous loop: find ready tasks, claim one, work it,
complete it, repeat until no tasks remain.
steps:
- action: connect
description: "Register with task-graph server"
tool: connect
params:
workflow: swarm
tags: [worker, implementer, code]
- action: find_ready
description: "Query for tasks that are ready to claim"
tool: list_tasks
params:
ready: true
loop_until: "no ready tasks AND no working tasks"
- action: claim
description: "Attempt to claim the highest-priority ready task"
tool: claim
params: {}
on_failure: retry_find
notes: >
If claim fails (race condition with another agent),
go back to find_ready. This is the expected contention point.
- action: work
description: "Execute the task"
tool: update
params:
phase: implement
sub_steps:
- mark_file for any files being edited
- Execute task implementation
- Run tests if applicable
- Attach results
- action: complete
description: "Mark task completed and release resources"
tool: update
params:
status: completed
sub_steps:
- unmark_file all held files
- attach results with type "gate/results"
- log_metrics with cost and token data
- action: loop
description: "Return to find_ready"
goto: find_ready
metrics:
contention:
description: "Measure how agents compete for the same tasks"
measurements:
- name: claim_success_rate
description: "Ratio of successful claims to total claim attempts"
query: >
SELECT
worker_id,
COUNT(*) as claim_attempts,
SUM(CASE WHEN status = 'working' THEN 1 ELSE 0 END) as successful
FROM task_sequence
WHERE status IN ('working', 'pending')
GROUP BY worker_id
target: "> 90% success rate with 4 agents"
- name: claim_retry_count
description: "Number of times agents had to retry after a failed claim"
derivation: "claim_attempts - successful_claims per agent"
target: "< 2 retries average per task"
- name: queue_wait_time
description: "Time from task becoming ready to being claimed"
query: >
SELECT
AVG(started_at - created_at) as avg_queue_wait_ms,
MAX(started_at - created_at) as max_queue_wait_ms
FROM tasks
WHERE started_at IS NOT NULL AND created_at IS NOT NULL
target: "Queue wait should decrease with more agents"
distribution:
description: "Measure how evenly work is spread across agents"
measurements:
- name: tasks_per_agent
description: "Number of completed tasks per agent"
query: >
SELECT
worker_id,
COUNT(*) as tasks_completed,
SUM(COALESCE(points, 1)) as points_completed
FROM tasks
WHERE status = 'completed' AND worker_id IS NOT NULL
GROUP BY worker_id
target: "Coefficient of variation < 0.3 (roughly even distribution)"
- name: load_balance_gini
description: "Gini coefficient of task distribution (0=perfect, 1=all to one)"
derivation: "Gini coefficient computed from tasks_per_agent counts"
target: "< 0.15 for 4+ agents"
- name: time_per_agent
description: "Total working time per agent"
query: >
SELECT
worker_id,
SUM(time_actual_ms) as total_work_ms,
AVG(time_actual_ms) as avg_task_ms
FROM tasks
WHERE status = 'completed' AND worker_id IS NOT NULL
GROUP BY worker_id
target: "Agents should have similar total work times"
- name: idle_time_ratio
description: "Time agents spend not working (looking for tasks, failed claims)"
derivation: "(wall_clock_per_agent - working_time_per_agent) / wall_clock_per_agent"
target: "< 15% idle time per agent"
throughput:
description: "Measure task completion velocity"
measurements:
- name: tasks_per_hour
description: "Overall task completion rate"
query: >
SELECT
COUNT(*) as completed,
(MAX(completed_at) - MIN(started_at)) / 3600000.0 as hours,
COUNT(*) / ((MAX(completed_at) - MIN(started_at)) / 3600000.0) as tasks_per_hour
FROM tasks
WHERE status = 'completed'
target: "Should scale near-linearly with agent count for independent tasks"
- name: throughput_per_agent
description: "Per-agent throughput (scaling efficiency)"
derivation: "tasks_per_hour / agent_count"
target: "> 80% of single-agent throughput per agent"
- name: scaling_efficiency
description: "How well throughput scales with agent count"
derivation: "throughput_N_agents / (N * throughput_1_agent)"
target: "> 0.8 for 4 agents, > 0.6 for 8 agents"
- name: completion_timeline
description: "Cumulative task completion over time"
query: >
SELECT
completed_at as timestamp_ms,
id as task_id,
worker_id,
ROW_NUMBER() OVER (ORDER BY completed_at) as cumulative_count
FROM tasks
WHERE status = 'completed'
ORDER BY completed_at
coordination:
description: "Measure overhead from the pull coordination mechanism"
measurements:
- name: blocking_ratio
description: "Fraction of time spent waiting vs working"
query: >
SELECT
SUM(CASE WHEN status = 'working' THEN
COALESCE(end_timestamp, CAST(strftime('%s','now') AS INTEGER)*1000) - timestamp
ELSE 0 END) as working_ms,
SUM(CASE WHEN status IN ('pending', 'assigned') THEN
COALESCE(end_timestamp, CAST(strftime('%s','now') AS INTEGER)*1000) - timestamp
ELSE 0 END) as blocked_ms
FROM task_sequence
target: "< 10% blocking ratio for independent tasks"
- name: file_contention
description: "Instances of multiple agents trying to edit the same file"
derivation: "Count of mark_file warnings from concurrent marks"
target: "Zero for truly independent tasks"
- name: rework_rate
description: "Tasks that had to be reworked (multiple working periods)"
query: >
SELECT
task_id,
COUNT(*) as working_periods
FROM task_sequence
WHERE status = 'working'
GROUP BY task_id
HAVING COUNT(*) > 1
target: "< 5% rework rate"
success_criteria:
- name: all_tasks_complete
description: "All tasks reach terminal state (completed or cancelled)"
condition: "pending_count == 0 AND working_count == 0"
required: true
- name: acceptable_contention
description: "Claim contention does not cause significant waste"
condition: "claim_success_rate > 0.85"
required: true
- name: even_distribution
description: "Work is reasonably distributed across agents"
condition: "gini_coefficient < 0.25"
required: false
- name: scaling_efficiency
description: "Adding agents provides meaningful throughput improvement"
condition: "throughput_4_agents > 2.5 * throughput_1_agent"
required: false
- name: low_overhead
description: "Coordination overhead is minimal"
condition: "blocking_ratio < 0.15"
required: false
execution:
baseline:
command: >
python scripts/run_experiment.py
--template experiments/templates/browser-parallel.json
--workflow swarm
--agents 1
--output results/pure-pull/pull-1
notes: "Run first to establish baseline throughput"
variants:
- command: >
python scripts/run_experiment.py
--template experiments/templates/browser-parallel.json
--workflow swarm
--agents 2
--output results/pure-pull/pull-2
- command: >
python scripts/run_experiment.py
--template experiments/templates/browser-parallel.json
--workflow swarm
--agents 4
--output results/pure-pull/pull-4
- command: >
python scripts/run_experiment.py
--template experiments/templates/browser-parallel.json
--workflow swarm
--agents 8
--output results/pure-pull/pull-8
comparison:
command: >
python scripts/compare_experiments.py
results/pure-pull/pull-1/tasks.db
results/pure-pull/pull-2/tasks.db
results/pure-pull/pull-4/tasks.db
results/pure-pull/pull-8/tasks.db
--labels "1-agent,2-agent,4-agent,8-agent"
--charts
--output results/pure-pull/comparison
notes: "Generates markdown report and charts comparing all runs"
wait_mode:
command: >
python scripts/run_experiment.py
--wait
--poll-interval 15
--output results/pure-pull/pull-4
notes: "Polls DB every 15s, auto-exports metrics when all tasks are terminal"
analysis:
expected_outcomes:
- metric: contention
expectation: >
With independent tasks and atomic claiming, contention should be low
(< 10% failed claims) for up to 4 agents. At 8 agents, contention may
increase as agents race to claim from a shrinking pool. The claim()
operation is atomic in SQLite, so no data corruption -- just wasted
list_tasks() calls when another agent claims first.
- metric: distribution
expectation: >
Task distribution should be roughly even because all agents are
generalists with the same tags. Minor imbalance expected due to
variable task durations -- agents that finish fast claim more tasks.
The Gini coefficient should be < 0.2 for 4+ agents.
- metric: throughput
expectation: >
Throughput should scale near-linearly for 1-4 agents since the
browser-parallel template has 20+ independent tasks. Scaling
efficiency may drop at 8 agents as the task pool depletes and
agents start competing for the last few tasks.
- metric: overhead
expectation: >
Pure pull has minimal coordination overhead -- no coordinator
bottleneck, no assignment delays. The main overhead is from
list_tasks() calls that return tasks already claimed by another
agent. This overhead should be < 5% of total agent time.
comparison_dimensions:
- name: "Pure Pull vs Hierarchical"
description: >
Compare with workflow-hierarchical where a lead assigns tasks.
Expected: pull has lower overhead for independent tasks but
hierarchical handles dependencies better.
- name: "Pure Pull vs Relay"
description: >
Compare with workflow-relay where specialists hand off.
Expected: pull has higher throughput for generalist tasks but
relay produces higher quality through review gates.
- name: "Agent Count Scaling"
description: >
Compare pull-1 through pull-8 to find the optimal agent count
before contention overhead exceeds parallelism gains.
Plot: throughput vs agent count (expect sublinear but positive).
key_charts:
- title: "Throughput vs Agent Count"
type: line
x: agent_count
y: tasks_per_hour
description: "Shows scaling behavior of pure pull coordination"
- title: "Task Distribution Heatmap"
type: heatmap
x: task_id
y: worker_id
description: "Shows which agent completed which tasks"
- title: "Cumulative Completion Timeline"
type: area
x: elapsed_time
y: cumulative_completed
series: run_name
description: "Overlay completion curves for each agent count"
- title: "Claim Contention Rate"
type: bar
x: agent_count
y: failed_claim_pct
description: "Shows how contention increases with agents"