agent-tools-interface 0.7.11

Agent Tools Interface — secure CLI for AI agent tool execution
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
"""Tests for AtiOrchestrator — end-to-end provisioning."""

import io
import json
import warnings
from unittest.mock import patch

from ati import AtiOrchestrator, build_skill_instructions, inspect_token, validate_token

TEST_SECRET = "17332cf135d362f79a2ed700b13e1215978be1d6ae6e133d25b6b3f21fa10299"


class TestAtiOrchestrator:
    def setup_method(self):
        self.orch = AtiOrchestrator(
            proxy_url="https://ati-proxy.example.com",
            secret=TEST_SECRET,
        )

    def test_provision_returns_env_vars(self):
        env = self.orch.provision_sandbox(
            agent_id="sandbox:abc123",
            tools=["web_search", "finnhub_quote"],
        )
        assert "ATI_PROXY_URL" in env
        assert "ATI_SESSION_TOKEN" in env
        assert env["ATI_PROXY_URL"] == "https://ati-proxy.example.com"

    def test_provision_token_is_valid(self):
        env = self.orch.provision_sandbox(
            agent_id="sandbox:xyz",
            tools=["web_search"],
            skills=["research-*"],
            ttl_seconds=1800,
        )
        claims = validate_token(env["ATI_SESSION_TOKEN"], secret=TEST_SECRET)
        assert claims.sub == "sandbox:xyz"
        assert claims.aud == "ati-proxy"
        assert claims.iss == "ati-orchestrator"
        assert "tool:web_search" in claims.scope
        assert "skill:research-*" in claims.scope
        assert claims.exp - claims.iat == 1800

    def test_provision_with_rate_limits(self):
        env = self.orch.provision_sandbox(
            agent_id="agent-1",
            tools=["github:*"],
            rate={"tool:github:*": "10/hour"},
        )
        claims = inspect_token(env["ATI_SESSION_TOKEN"])
        assert claims.ati is not None
        assert claims.ati.rate == {"tool:github:*": "10/hour"}

    def test_provision_with_extra_scopes(self):
        env = self.orch.provision_sandbox(
            agent_id="agent-2",
            tools=["web_search"],
            extra_scopes=["help"],
        )
        claims = inspect_token(env["ATI_SESSION_TOKEN"])
        assert "help" in claims.scopes()
        assert "tool:web_search" in claims.scopes()

    def test_provision_strips_trailing_slash(self):
        orch = AtiOrchestrator(
            proxy_url="https://ati-proxy.example.com/",
            secret=TEST_SECRET,
        )
        env = orch.provision_sandbox(agent_id="test", tools=["x"])
        assert env["ATI_PROXY_URL"] == "https://ati-proxy.example.com"

    def test_custom_aud_and_iss(self):
        orch = AtiOrchestrator(
            proxy_url="https://proxy.test",
            secret=TEST_SECRET,
            default_aud="custom-aud",
            default_iss="custom-iss",
        )
        env = orch.provision_sandbox(agent_id="test", tools=["x"])
        claims = validate_token(
            env["ATI_SESSION_TOKEN"],
            secret=TEST_SECRET,
            audience="custom-aud",
        )
        assert claims.aud == "custom-aud"
        assert claims.iss == "custom-iss"

    def test_validate_own_token(self):
        env = self.orch.provision_sandbox(
            agent_id="validate-me", tools=["web_search"]
        )
        claims = self.orch.validate_token(env["ATI_SESSION_TOKEN"])
        assert claims.sub == "validate-me"

    def test_provision_empty_tools(self):
        env = self.orch.provision_sandbox(agent_id="empty")
        claims = inspect_token(env["ATI_SESSION_TOKEN"])
        assert claims.scope == ""


# --- build_skill_listing / listing formatter regression tests (0.7.5+) ---


def _fake_urlopen(payload_bytes: bytes):
    """Return a mock context-manager that `urllib.request.urlopen` can
    accept. Captures the Request passed in via a side-effect list so
    tests can assert URL + headers."""
    captured: list = []

    class _FakeResp:
        def __init__(self, data):
            self._data = data

        def __enter__(self):
            return self

        def __exit__(self, *_exc):
            return False

        def read(self):
            return self._data

    def _urlopen(req, timeout=None):
        captured.append((req, timeout))
        return _FakeResp(payload_bytes)

    return captured, _urlopen


class TestBuildSkillListing:
    def setup_method(self):
        self.orch = AtiOrchestrator(
            proxy_url="https://ati-proxy.example.com",
            secret=TEST_SECRET,
        )

    def test_fetches_from_proxy_with_bearer_token(self):
        payload = json.dumps(
            {
                "skills": [
                    {
                        "name": "slidedeck-production",
                        "description": "Create animation-rich HTML slides",
                    },
                    {
                        "name": "html-app-architecture",
                        "description": "Build self-contained HTML apps",
                        "when_to_use": "use when asked for an HTML artifact",
                    },
                ]
            }
        ).encode()

        captured, urlopen = _fake_urlopen(payload)
        with patch("urllib.request.urlopen", urlopen):
            listing = self.orch.build_skill_listing(token="tok-abc")

        assert "<system-reminder>" in listing
        assert "</system-reminder>" in listing
        assert "slidedeck-production: Create animation-rich HTML slides" in listing
        assert (
            "html-app-architecture: Build self-contained HTML apps - "
            "use when asked for an HTML artifact"
        ) in listing

        assert len(captured) == 1
        req, _timeout = captured[0]
        assert req.full_url == "https://ati-proxy.example.com/skillati/catalog"
        assert req.get_header("Authorization") == "Bearer tok-abc"

    def test_forwards_search_query_param(self):
        captured, urlopen = _fake_urlopen(b'{"skills": []}')
        with patch("urllib.request.urlopen", urlopen):
            self.orch.build_skill_listing(token="tok", search="html")

        req, _ = captured[0]
        assert req.full_url == (
            "https://ati-proxy.example.com/skillati/catalog?search=html"
        )

    @staticmethod
    def _extract_body(listing: str) -> str:
        """Pluck the bullet-content body out of a wrapped listing so
        tests can assert against the 8000-char CC budget directly."""
        preamble_end = listing.find("\n\n")
        closing = listing.find("\n</system-reminder>")
        assert preamble_end >= 0 and closing >= 0, (
            f"malformed listing: {listing[:200]}"
        )
        return listing[preamble_end + 2 : closing]

    def test_respects_body_budget_across_many_skills(self):
        """CC's 8000-char budget applies to the bullet-content body, not
        the wrapped output. The `<system-reminder>` envelope adds
        header/footer overhead on top. 400 skills * ~160 chars forces
        the pass-2 (truncated descriptions) cascade; the body
        (excluding the wrap) must stay under 8000 chars — matching CC's
        ``formatCommandsWithinBudget`` which returns just the body
        string and lets ``wrapInSystemReminder`` add its own overhead
        later (``~/cc/src/utils/messages.ts:3098``)."""
        many = [
            {
                "name": f"skill-{i:03}",
                "description": "A description that pushes us past the per-entry cap " * 3,
            }
            for i in range(400)
        ]
        payload = json.dumps({"skills": many}).encode()
        _, urlopen = _fake_urlopen(payload)
        with patch("urllib.request.urlopen", urlopen):
            listing = self.orch.build_skill_listing(token="tok")
        assert "<system-reminder>" in listing
        body = self._extract_body(listing)
        assert len(body) <= 8000, (
            f"body {len(body)} chars exceeds 8000 budget (total listing: "
            f"{len(listing)} chars)"
        )

    def test_pass_three_overflow_footer_appends_plus_n_more(self):
        """Pass 3' — the ATI-specific overflow footer — must fire when
        even names-only entries exceed the 8000-char body budget. CC
        never exercises this path (built-in skill counts are <30); our
        Parcha GCS catalog carries 286+ skills and agents provisioned
        with a narrow custom catalog of 700+ long-named skills will
        legitimately hit it. Assert the footer says "+N more" with the
        correct drop count."""
        # Each bare bullet line is `- <name>` (len 2 + len(name)).
        # With 700 skills × 25-char names (`- a-verbose-skill-name-{i:04}`
        # = `- ` + 22 chars = 24 chars), the names-only body is
        # ~700 × 24 + 699 newlines ≈ 17500 chars, well over the 8000
        # budget. Forces the Pass 3' cascade.
        many = [
            {
                "name": f"a-verbose-skill-name-{i:04}",
                "description": "x" * 300,
            }
            for i in range(700)
        ]
        payload = json.dumps({"skills": many}).encode()
        _, urlopen = _fake_urlopen(payload)
        with patch("urllib.request.urlopen", urlopen):
            listing = self.orch.build_skill_listing(token="tok")
        assert "<system-reminder>" in listing
        body = self._extract_body(listing)
        assert len(body) <= 8000, (
            f"body {len(body)} chars exceeds 8000 budget in pass 3' — "
            f"overflow reservation should have prevented this"
        )
        # The footer line must appear exactly once.
        footer_marker = "+{count} more".format(count="")  # unused — just for grep
        assert "+ " not in body, body  # belt-and-suspenders: no blank counts
        assert " more — run `ati skill fetch catalog`" in body, (
            f"pass 3' must emit the '+N more' footer: {body[-300:]}"
        )
        # Parse the count out of the footer and verify it equals
        # (700 - number-of-included-bullets).
        import re

        match = re.search(r"\+(\d+) more — run `ati skill fetch catalog`", body)
        assert match, f"footer format changed: {body[-300:]}"
        reported_missing = int(match.group(1))
        # Count actual bullets that survived (skip the overflow line).
        bullet_lines = [
            ln for ln in body.splitlines()
            if ln.startswith("- ") and "more — run" not in ln
        ]
        assert len(bullet_lines) + reported_missing == 700, (
            f"overflow math mismatch: {len(bullet_lines)} bullets + "
            f"{reported_missing} reported as missing = "
            f"{len(bullet_lines) + reported_missing}, expected 700"
        )
        assert reported_missing > 0, "pass 3' must drop at least one skill"

    def test_truncates_description_at_250_chars_not_full_line(self):
        """CC's 250-char cap applies to the description portion only,
        not the whole bullet line. A skill with a 2000-char description
        should have its description clipped to 249 chars + `\\u2026`,
        then the `- name: ` prefix is added on top. Matches
        ``~/cc/src/tools/SkillTool/prompt.ts:43-50``
        (`getCommandDescription`)."""
        long_desc = "x" * 2000
        payload = json.dumps(
            {
                "skills": [
                    {
                        "name": "bloated-skill",
                        "description": long_desc,
                    }
                ]
            }
        ).encode()
        _, urlopen = _fake_urlopen(payload)
        with patch("urllib.request.urlopen", urlopen):
            listing = self.orch.build_skill_listing(token="tok")
        for line in listing.splitlines():
            if line.startswith("- bloated-skill"):
                # Description portion = the "x...x\u2026" after "- name: ".
                prefix = "- bloated-skill: "
                assert line.startswith(prefix)
                description_part = line[len(prefix):]
                assert len(description_part) == 250, (
                    f"description portion is {len(description_part)} chars, "
                    f"expected 250 (249 x's + 1 ellipsis): {description_part[:50]}..."
                )
                assert description_part.endswith("\u2026")
                # Make sure CC's rule applies: 249 content chars + 1 ellipsis.
                assert description_part[:-1] == "x" * 249
                return
        raise AssertionError(f"bloated-skill line not found in listing:\n{listing}")

    def test_long_skill_name_does_not_force_description_truncation(self):
        """A skill with a moderate description and a very long name
        should NOT have its description truncated — the 250-char cap
        applies to the description, not `- name: description` as a
        whole. This is the regression the previous test was hiding."""
        payload = json.dumps(
            {
                "skills": [
                    {
                        "name": "a" * 200,  # 200-char skill name
                        "description": "b" * 100,  # 100-char description
                    }
                ]
            }
        ).encode()
        _, urlopen = _fake_urlopen(payload)
        with patch("urllib.request.urlopen", urlopen):
            listing = self.orch.build_skill_listing(token="tok")
        # Description should be intact — no ellipsis anywhere in the body.
        for line in listing.splitlines():
            if line.startswith("- " + "a" * 10):
                assert "\u2026" not in line, (
                    f"description must not be truncated when the bullet's "
                    f"total length exceeds 250 chars due to a long name: {line}"
                )
                assert line.endswith("b" * 100)
                return
        raise AssertionError(f"skill line not found in listing:\n{listing}")

    def test_ellipsis_uses_single_codepoint_not_three_dots(self):
        """CC uses the U+2026 ellipsis codepoint (`\\u2026`), not three
        ASCII periods. Assert our truncation matches byte-for-byte."""
        long_desc = "z" * 500
        payload = json.dumps(
            {"skills": [{"name": "ellipsis-check", "description": long_desc}]}
        ).encode()
        _, urlopen = _fake_urlopen(payload)
        with patch("urllib.request.urlopen", urlopen):
            listing = self.orch.build_skill_listing(token="tok")
        for line in listing.splitlines():
            if line.startswith("- ellipsis-check:"):
                assert "..." not in line, (
                    f"three-dot ellipsis leaked into output: {line[:80]}"
                )
                assert line.endswith("\u2026")
                return
        raise AssertionError("ellipsis-check skill not found in listing")

    def test_middle_pass_truncates_descriptions_before_dropping_to_names(self):
        """When full descriptions don't fit but there's still room for
        moderately-sized per-description budgets (>=20 chars per entry),
        CC trims descriptions via the middle pass rather than dropping
        straight to names-only. Matches
        ``~/cc/src/tools/SkillTool/prompt.ts:117-170``."""
        # 100 skills * 150-char descriptions = 15000+ chars in full mode
        # (overflows 8000). Per-description budget after overhead should
        # still be well above MIN_DESC_LENGTH (20), so we expect Pass 2.
        skills = [
            {
                "name": f"skill-{i:03}",
                "description": "Detailed methodology for handling scenario " + ("x" * 100),
            }
            for i in range(100)
        ]
        payload = json.dumps({"skills": skills}).encode()
        _, urlopen = _fake_urlopen(payload)
        with patch("urllib.request.urlopen", urlopen):
            listing = self.orch.build_skill_listing(token="tok")

        # Pass 2 signature: every bullet still has a description (colon +
        # space + text), and at least one of them ends with the ellipsis
        # marker because it got trimmed.
        lines = [
            ln for ln in listing.splitlines() if ln.startswith("- skill-")
        ]
        assert len(lines) == 100, f"all 100 skills should be listed, got {len(lines)}"
        with_desc = [ln for ln in lines if ": " in ln]
        assert len(with_desc) == 100, (
            f"middle pass must preserve descriptions, got "
            f"{len(with_desc)} / 100 with descriptions"
        )
        truncated = [ln for ln in lines if ln.endswith("\u2026")]
        assert truncated, (
            "at least one description should be trimmed in the middle pass"
        )
        assert len(listing) <= 8000 + 500  # header+footer overhead

    def test_empty_catalog_returns_empty_string(self):
        _, urlopen = _fake_urlopen(b'{"skills": []}')
        with patch("urllib.request.urlopen", urlopen):
            listing = self.orch.build_skill_listing(token="tok")
        assert listing == ""

    def test_legacy_build_skill_instructions_still_works_with_warning(self):
        with warnings.catch_warnings(record=True) as captured_warnings:
            warnings.simplefilter("always")
            out = build_skill_instructions(["foo", "bar"])

        assert any(
            issubclass(w.category, DeprecationWarning) for w in captured_warnings
        ), "build_skill_instructions should emit a DeprecationWarning"
        # Legacy format preserved.
        assert "# Available Skills" in out
        assert "- **foo**: `ati skill fetch read foo`" in out
        assert "- **bar**: `ati skill fetch read bar`" in out