From c5564ff3e551fd118103ad5cb76694c5e21f48bd Mon Sep 17 00:00:00 2001
From: GAURAV KARMAKAR <gaurav.k@graeon.ai>
Date: Sun, 14 Jun 2026 15:15:10 +0530
Subject: [PATCH 1/4] WIP: sp_cli command-line client for Sample Platform

Adds the sp_cli package (client, commands, runner, output, classifier,
triage) plus test_cli suite and pyproject packaging.
---
 .gitignore                        |   3 +
 pyproject.toml                    |  16 +++
 run.py                            |  12 +-
 sp_cli/__init__.py                |   9 ++
 sp_cli/__main__.py                |   6 +
 sp_cli/banner.py                  |  54 +++++++++
 sp_cli/classifier.py              | 110 ++++++++++++++++++
 sp_cli/client.py                  | 168 +++++++++++++++++++++++++++
 sp_cli/commands/__init__.py       |   1 +
 sp_cli/commands/auth.py           |  47 ++++++++
 sp_cli/commands/investigate.py    |  72 ++++++++++++
 sp_cli/commands/regression.py     |  29 +++++
 sp_cli/commands/run.py            | 183 ++++++++++++++++++++++++++++++
 sp_cli/commands/sample.py         |  47 ++++++++
 sp_cli/commands/system.py         |  23 ++++
 sp_cli/main.py                    |  51 +++++++++
 sp_cli/output.py                  | 138 ++++++++++++++++++++++
 sp_cli/runner.py                  |  41 +++++++
 sp_cli/triage.py                  |  85 ++++++++++++++
 tests/test_cli/__init__.py        |   1 +
 tests/test_cli/test_classifier.py |  69 +++++++++++
 tests/test_cli/test_cli.py        | 156 +++++++++++++++++++++++++
 tests/test_cli/test_client.py     |  90 +++++++++++++++
 23 files changed, 1407 insertions(+), 4 deletions(-)
 create mode 100644 pyproject.toml
 create mode 100644 sp_cli/__init__.py
 create mode 100644 sp_cli/__main__.py
 create mode 100644 sp_cli/banner.py
 create mode 100644 sp_cli/classifier.py
 create mode 100644 sp_cli/client.py
 create mode 100644 sp_cli/commands/__init__.py
 create mode 100644 sp_cli/commands/auth.py
 create mode 100644 sp_cli/commands/investigate.py
 create mode 100644 sp_cli/commands/regression.py
 create mode 100644 sp_cli/commands/run.py
 create mode 100644 sp_cli/commands/sample.py
 create mode 100644 sp_cli/commands/system.py
 create mode 100644 sp_cli/main.py
 create mode 100644 sp_cli/output.py
 create mode 100644 sp_cli/runner.py
 create mode 100644 sp_cli/triage.py
 create mode 100644 tests/test_cli/__init__.py
 create mode 100644 tests/test_cli/test_classifier.py
 create mode 100644 tests/test_cli/test_cli.py
 create mode 100644 tests/test_cli/test_client.py

diff --git a/.gitignore b/.gitignore
index 4f8ca2d8d..9ca6892b0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -53,3 +53,6 @@ monkeytype.sqlite3
 
 # Test related data
 temp/
+
+# Python packaging
+*.egg-info/
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 000000000..58784e5b1
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,16 @@
+[build-system]
+requires = ["setuptools>=61"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "sp-cli"
+version = "0.1.0"
+description = "AI-friendly CLI for the CCExtractor CI / Sample Platform"
+requires-python = ">=3.10"
+dependencies = ["click", "requests"]
+
+[project.scripts]
+sp = "sp_cli.main:cli"
+
+[tool.setuptools]
+packages = ["sp_cli", "sp_cli.commands"]
diff --git a/run.py b/run.py
index e277c6d97..a27ea2a3b 100755
--- a/run.py
+++ b/run.py
@@ -60,10 +60,14 @@
                                      app.config['DEBUG'])
 log = log_configuration.create_logger("Platform")
 
-# Create bucket objext using GCS storage client
-sa_file = os.path.join(app.config.get('INSTALL_FOLDER', ''), app.config.get('SERVICE_ACCOUNT_FILE', ''))
-storage_client = Client.from_service_account_json(sa_file)
-storage_client_bucket = storage_client.bucket(app.config.get('GCS_BUCKET_NAME', ''))
+# Create bucket object using GCS storage client, unless explicitly disabled (local dev)
+if os.environ.get('DISABLE_GCS', '0') == '1':
+    storage_client = None
+    storage_client_bucket = None
+else:
+    sa_file = os.path.join(app.config.get('INSTALL_FOLDER', ''), app.config.get('SERVICE_ACCOUNT_FILE', ''))
+    storage_client = Client.from_service_account_json(sa_file)
+    storage_client_bucket = storage_client.bucket(app.config.get('GCS_BUCKET_NAME', ''))
 
 # Save build commit
 repo = git.Repo(app.config.get('INSTALL_FOLDER', ''))
diff --git a/sp_cli/__init__.py b/sp_cli/__init__.py
new file mode 100644
index 000000000..c3179aac4
--- /dev/null
+++ b/sp_cli/__init__.py
@@ -0,0 +1,9 @@
+"""``sp`` — an AI-friendly command-line client for the CCExtractor Sample Platform.
+
+The CLI is a thin layer over the Sample Platform JSON API (``/api/v1``). It is
+designed to be driven by AI agents as well as humans: it emits machine-readable
+JSON by default and uses non-zero exit codes plus a consistent error envelope on
+failure, so it can be scripted without screen-scraping the web UI.
+"""
+
+__version__ = "0.1.0"
diff --git a/sp_cli/__main__.py b/sp_cli/__main__.py
new file mode 100644
index 000000000..d5e9b2ebd
--- /dev/null
+++ b/sp_cli/__main__.py
@@ -0,0 +1,6 @@
+"""Allow the CLI to be run as ``python -m sp_cli``."""
+
+from sp_cli.main import cli
+
+if __name__ == '__main__':
+    cli()
diff --git a/sp_cli/banner.py b/sp_cli/banner.py
new file mode 100644
index 000000000..d0cb6de4d
--- /dev/null
+++ b/sp_cli/banner.py
@@ -0,0 +1,54 @@
+"""Branded welcome screen for the ``sp`` CLI.
+
+Shown only when ``sp`` is invoked with no subcommand. Never emitted on command
+output, so machine consumers (agents parsing JSON) are unaffected. Colors are
+applied via :func:`click.style` and are auto-stripped when output is piped.
+"""
+
+import click
+
+from sp_cli import __version__
+
+#: Figlet-style "sp" wordmark.
+LOGO = r"""  ___ _ __
+ / __| '_ \
+ \__ \ |_) |
+ |___/ .__/
+     |_|"""
+
+_GROUPS = [
+    ('TRIAGE', 'sp investigate <run>     ← one-shot: what failed and why'),
+    ('RUNS', 'sp run ls · show · summary · failures · results · result · diff · artifacts · logs · errors'),
+    ('SAMPLES', 'sp sample ls · show · history'),
+    ('TESTS', 'sp regression ls'),
+    ('SYSTEM', 'sp health · queue'),
+    ('AUTH', 'sp auth login · logout'),
+]
+
+_EXAMPLES = [
+    ('sp investigate 9299', 'triage a run end-to-end'),
+    ('sp run failures 9299', 'failing tests, each labeled with why'),
+    ('sp run diff 9299 137', 'expected-vs-actual diff (ids auto-resolved)'),
+]
+
+
+def show_welcome() -> None:
+    """Print the branded welcome screen (banner, command map, examples)."""
+    click.echo()
+    click.echo(click.style(LOGO, fg='cyan'))
+    click.echo(f"  {click.style('CCExtractor CI', bold=True)} · AI-friendly CLI · v{__version__}")
+    click.echo("  drive CI investigations from the terminal — no UI, no HTML scraping")
+    click.echo()
+
+    for name, line in _GROUPS:
+        click.echo(f"  {click.style(name.ljust(8), fg='green', bold=True)} {line}")
+    click.echo()
+
+    click.echo(f"  {click.style('Examples', bold=True)}")
+    for command, note in _EXAMPLES:
+        click.echo(f"    {command.ljust(28)} {click.style('# ' + note, fg='bright_black')}")
+    click.echo()
+
+    click.echo(f"  {click.style('Help', bold=True)} sp COMMAND --help"
+               f"     {click.style('Config', bold=True)} SP_BASE_URL · SP_API_TOKEN")
+    click.echo()
diff --git a/sp_cli/classifier.py b/sp_cli/classifier.py
new file mode 100644
index 000000000..ec67ef9d5
--- /dev/null
+++ b/sp_cli/classifier.py
@@ -0,0 +1,110 @@
+"""Rule-based classification of regression-test failures into stable codes.
+
+Deterministic, no ML: maps the raw signals a test run exposes (exit code,
+expected return code, output presence, pass-history) onto a small, stable
+taxonomy so an agent can branch on *why* a test failed instead of parsing
+prose. Platform differences are normalized — e.g. a segfault surfaces as ``139``
+on Linux and ``-1073741819`` (0xC0000005) on Windows; both classify as
+``SEGFAULT``.
+
+Each classification returns a ``code`` (stable, machine-readable), a
+``confidence`` (``high`` for unambiguous exit-code rules, ``medium`` for
+output-based ones), a human ``reason``, and ``regression`` (True if the test was
+passing before — a real regression; False if it never passed; None if unknown).
+"""
+
+from typing import Any, Dict, Optional
+
+# --- Failure codes (stable; downstream tools may pin on these) ---------------
+CODE_PASS = "PASS"
+CODE_SEGFAULT = "SEGFAULT"
+CODE_ABORT = "ABORT"
+CODE_TIMEOUT = "TIMEOUT"
+CODE_MISSING_OUTPUT = "MISSING_OUTPUT"
+CODE_EXIT_CODE_MISMATCH = "EXIT_CODE_MISMATCH"
+CODE_OUTPUT_DIFF = "OUTPUT_DIFF"
+CODE_UNKNOWN = "UNKNOWN"
+
+# --- Exit codes that denote a crash, normalized across platforms -------------
+#: SIGSEGV (128+11) on Linux, raw -11, and 0xC0000005 access violation on Windows.
+_SEGFAULT_CODES = frozenset({139, -11, -1073741819})
+#: SIGABRT (128+6) on Linux and raw -6.
+_ABORT_CODES = frozenset({134, -6})
+#: `timeout` exit (124) and SIGTERM (143 / -15).
+_TIMEOUT_CODES = frozenset({124, 143, -15})
+
+
+def classify(exit_code: Optional[int], expected_rc: Optional[int], *,
+             has_output_diff: bool = False, missing_output: bool = False,
+             has_ever_passed: Optional[bool] = None) -> Dict[str, Any]:
+    """
+    Classify a single regression-test result into a stable failure code.
+
+    Rules are evaluated most-severe first (crash > timeout > missing output >
+    exit-code mismatch > output diff), so the most actionable signal wins.
+
+    :param exit_code: The process exit code observed for the test.
+    :type exit_code: Optional[int]
+    :param expected_rc: The exit code the test was expected to return.
+    :type expected_rc: Optional[int]
+    :param has_output_diff: True if a differing output file was recorded.
+    :type has_output_diff: bool
+    :param missing_output: True if output was expected but none was produced.
+    :type missing_output: bool
+    :param has_ever_passed: Whether this test has ever passed (history), if known.
+    :type has_ever_passed: Optional[bool]
+    :return: ``{code, confidence, reason, regression}``.
+    :rtype: Dict[str, Any]
+    """
+    regression = _regression_state(has_ever_passed)
+
+    if exit_code in _SEGFAULT_CODES:
+        return _result(CODE_SEGFAULT, "high",
+                       f"Crash (segfault / access violation), exit {exit_code}", regression)
+    if exit_code in _ABORT_CODES:
+        return _result(CODE_ABORT, "high", f"Aborted (SIGABRT), exit {exit_code}", regression)
+    if exit_code in _TIMEOUT_CODES:
+        return _result(CODE_TIMEOUT, "high", f"Timed out / terminated, exit {exit_code}", regression)
+    if missing_output:
+        return _result(CODE_MISSING_OUTPUT, "high",
+                       "No output was produced but one was expected", regression)
+    if exit_code != expected_rc:
+        return _result(CODE_EXIT_CODE_MISMATCH, "high",
+                       f"Exited {exit_code}, expected {expected_rc}", regression)
+    if has_output_diff:
+        return _result(CODE_OUTPUT_DIFF, "medium",
+                       "Exit code matched but output differs from expected", regression)
+
+    return _result(CODE_PASS, "high", "Exit code matched and no output diff recorded", regression)
+
+
+def _regression_state(has_ever_passed: Optional[bool]) -> Optional[bool]:
+    """
+    Translate pass-history into the ``regression`` flag.
+
+    :param has_ever_passed: Whether the test has ever passed, if known.
+    :type has_ever_passed: Optional[bool]
+    :return: True if a real regression, False if never worked, None if unknown.
+    :rtype: Optional[bool]
+    """
+    if has_ever_passed is None:
+        return None
+    return bool(has_ever_passed)
+
+
+def _result(code: str, confidence: str, reason: str, regression: Optional[bool]) -> Dict[str, Any]:
+    """
+    Assemble a classification result dict.
+
+    :param code: The stable failure code.
+    :type code: str
+    :param confidence: ``high`` or ``medium``.
+    :type confidence: str
+    :param reason: Human-readable explanation.
+    :type reason: str
+    :param regression: Regression flag (see :func:`_regression_state`).
+    :type regression: Optional[bool]
+    :return: The assembled result.
+    :rtype: Dict[str, Any]
+    """
+    return {"code": code, "confidence": confidence, "reason": reason, "regression": regression}
diff --git a/sp_cli/client.py b/sp_cli/client.py
new file mode 100644
index 000000000..2719deeb7
--- /dev/null
+++ b/sp_cli/client.py
@@ -0,0 +1,168 @@
+"""HTTP client for the CCExtractor CI System API (`/api/v1`)."""
+
+from typing import Any, Dict, List, Optional
+
+import requests  # type: ignore[import-untyped]
+
+
+class ApiError(Exception):
+    """Raised when an API request fails, carrying the structured error envelope."""
+
+    def __init__(self, code: str, message: str, status: Optional[int] = None,
+                 details: Optional[Dict[str, Any]] = None) -> None:
+        """
+        Build an API error.
+
+        :param code: Stable machine-readable error code (e.g. ``not_found``).
+        :type code: str
+        :param message: Human-readable explanation.
+        :type message: str
+        :param status: HTTP status code, if the failure was an HTTP response.
+        :type status: Optional[int]
+        :param details: Optional structured context echoed from the API.
+        :type details: Optional[Dict[str, Any]]
+        """
+        super().__init__(message)
+        self.code = code
+        self.message = message
+        self.status = status
+        self.details = details
+
+    @property
+    def exit_code(self) -> int:
+        """
+        Map the error to a process exit code so callers can branch on it.
+
+        :return: 3 connection · 4 not-found · 5 validation · 6 auth · 7 rate-limited · 1 other.
+        :rtype: int
+        """
+        if self.code == 'connection_error':
+            return 3
+        if self.status == 404:
+            return 4
+        if self.status in (400, 422):
+            return 5
+        if self.status in (401, 403):
+            return 6
+        if self.status == 429:
+            return 7
+        return 1
+
+
+class ApiClient:
+    """Minimal client over the JSON API. Sends a bearer token when configured."""
+
+    def __init__(self, base_url: str, token: Optional[str] = None, timeout: int = 30) -> None:
+        """
+        Configure the client.
+
+        :param base_url: Root URL of the platform (without the ``/api/v1`` prefix).
+        :type base_url: str
+        :param token: Optional opaque bearer token sent on every request.
+        :type token: Optional[str]
+        :param timeout: Per-request timeout in seconds.
+        :type timeout: int
+        """
+        self.base_url = base_url.rstrip('/')
+        self.token = token
+        self.timeout = timeout
+        self.session = requests.Session()
+
+    def _headers(self) -> Dict[str, str]:
+        """
+        Build request headers, including the bearer token when set.
+
+        :return: Header mapping.
+        :rtype: Dict[str, str]
+        """
+        headers = {'Accept': 'application/json'}
+        if self.token:
+            headers['Authorization'] = f'Bearer {self.token}'
+        return headers
+
+    def request(self, method: str, path: str, params: Optional[Dict[str, Any]] = None,
+                json_body: Optional[Dict[str, Any]] = None) -> Any:
+        """
+        Perform a request against an API path and return the decoded JSON body.
+
+        :param method: HTTP method (``GET``, ``POST``, ``DELETE`` …).
+        :type method: str
+        :param path: API path below ``/api/v1`` (e.g. ``/runs``).
+        :type path: str
+        :param params: Optional query-string parameters.
+        :type params: Optional[Dict[str, Any]]
+        :param json_body: Optional JSON request body.
+        :type json_body: Optional[Dict[str, Any]]
+        :raises ApiError: on connection failure, a non-JSON body, or an HTTP error.
+        :return: The decoded JSON response body (or ``None`` for ``204``).
+        :rtype: Any
+        """
+        url = f"{self.base_url}{path}"
+        try:
+            response = self.session.request(method, url, params=params, json=json_body,
+                                            headers=self._headers(), timeout=self.timeout)
+        except requests.RequestException as exc:
+            raise ApiError('connection_error', f'Could not reach {url}: {exc}')
+
+        if response.status_code == 204:
+            return None
+
+        try:
+            payload = response.json()
+        except ValueError:
+            raise ApiError('invalid_response',
+                           f'Expected JSON but got HTTP {response.status_code}', response.status_code)
+
+        if response.status_code >= 400:
+            error = payload if isinstance(payload, dict) else {}
+            raise ApiError(
+                error.get('code', 'http_error'),
+                error.get('message', f'Request failed with HTTP {response.status_code}'),
+                response.status_code,
+                error.get('details'),
+            )
+
+        return payload
+
+    def get(self, path: str, params: Optional[Dict[str, Any]] = None) -> Any:
+        """
+        Perform a GET and return the decoded body.
+
+        :param path: API path below ``/api/v1``.
+        :type path: str
+        :param params: Optional query-string parameters.
+        :type params: Optional[Dict[str, Any]]
+        :return: The decoded JSON body.
+        :rtype: Any
+        """
+        return self.request('GET', path, params=params)
+
+    def get_paginated(self, path: str, params: Optional[Dict[str, Any]] = None,
+                      max_items: int = 1000) -> List[Any]:
+        """
+        Follow offset pagination and return the combined ``data`` list.
+
+        :param path: API path below ``/api/v1``.
+        :type path: str
+        :param params: Optional query-string parameters (``limit``/``offset`` are managed).
+        :type params: Optional[Dict[str, Any]]
+        :param max_items: Safety cap on total items collected.
+        :type max_items: int
+        :return: All items across pages.
+        :rtype: List[Any]
+        """
+        merged = dict(params or {})
+        merged.setdefault('limit', 100)
+        offset = 0
+        items: List[Any] = []
+        while True:
+            merged['offset'] = offset
+            payload = self.get(path, params=merged)
+            data = payload.get('data', []) if isinstance(payload, dict) else []
+            items.extend(data)
+            pagination = payload.get('pagination', {}) if isinstance(payload, dict) else {}
+            next_offset = pagination.get('next_offset')
+            if not data or next_offset is None or len(items) >= max_items:
+                break
+            offset = next_offset
+        return items
diff --git a/sp_cli/commands/__init__.py b/sp_cli/commands/__init__.py
new file mode 100644
index 000000000..38d122629
--- /dev/null
+++ b/sp_cli/commands/__init__.py
@@ -0,0 +1 @@
+"""Command groups for the ``sp`` CLI, grouped by resource (noun-verb)."""
diff --git a/sp_cli/commands/auth.py b/sp_cli/commands/auth.py
new file mode 100644
index 000000000..3de56a836
--- /dev/null
+++ b/sp_cli/commands/auth.py
@@ -0,0 +1,47 @@
+"""``sp auth`` — obtain and revoke API tokens."""
+
+import click
+
+from sp_cli.client import ApiError
+from sp_cli.output import render, render_error
+
+
+@click.group()
+def auth() -> None:
+    """Obtain and revoke API tokens."""
+
+
+@auth.command('login')
+@click.option('--email', prompt=True, help='Account email.')
+@click.option('--password', prompt=True, hide_input=True, help='Account password (never stored).')
+@click.option('--name', 'token_name', default='sp-cli', show_default=True, help='Token label.')
+@click.option('--days', 'expires_in_days', type=int, default=30, show_default=True,
+              help='Token lifetime in days (max 90).')
+@click.pass_context
+def auth_login(ctx: click.Context, email: str, password: str,
+               token_name: str, expires_in_days: int) -> None:
+    """Create an API token; store the printed value in SP_API_TOKEN."""
+    client = ctx.obj['client']
+    output = ctx.obj['output']
+    body = {'email': email, 'password': password,
+            'token_name': token_name, 'expires_in_days': expires_in_days}
+    try:
+        result = client.request('POST', '/auth/tokens', json_body=body)
+    except ApiError as error:
+        render_error(error, output)
+        raise SystemExit(error.exit_code)
+    render(result, output)
+
+
+@auth.command('logout')
+@click.pass_context
+def auth_logout(ctx: click.Context) -> None:
+    """Revoke the current API token."""
+    client = ctx.obj['client']
+    output = ctx.obj['output']
+    try:
+        client.request('DELETE', '/auth/tokens/current')
+    except ApiError as error:
+        render_error(error, output)
+        raise SystemExit(error.exit_code)
+    click.echo('Token revoked.')
diff --git a/sp_cli/commands/investigate.py b/sp_cli/commands/investigate.py
new file mode 100644
index 000000000..16c2a2436
--- /dev/null
+++ b/sp_cli/commands/investigate.py
@@ -0,0 +1,72 @@
+"""``sp investigate`` — one-shot triage of a run (status + counts + classified failures)."""
+
+from typing import Any, Dict, List
+
+import click
+
+from sp_cli.client import ApiError
+from sp_cli.output import render, render_error
+from sp_cli.triage import classify_sample, group_by_code, is_failure
+
+_RUN_FIELDS = ('run_id', 'pr_number', 'platform', 'commit_sha', 'branch', 'status', 'github_link')
+
+
+@click.command('investigate')
+@click.argument('run_id', type=int)
+@click.pass_context
+def investigate(ctx: click.Context, run_id: int) -> None:
+    """Triage a run in one shot: run info, pass/fail counts, and classified failures.
+
+    Combines the run detail, summary, and per-result classification into a single
+    report — the whole "what failed and why" investigation in one command.
+    """
+    client = ctx.obj['client']
+    output = ctx.obj['output']
+    try:
+        run = client.get(f'/runs/{run_id}')
+        summary = client.get(f'/runs/{run_id}/summary')
+        samples = client.get_paginated(f'/runs/{run_id}/samples')
+    except ApiError as error:
+        render_error(error, output)
+        raise SystemExit(error.exit_code)
+
+    failures = [classify_sample(s) for s in samples if is_failure(s)]
+    report = {
+        'run': {field: run.get(field) for field in _RUN_FIELDS},
+        'summary': summary,
+        'by_code': group_by_code(failures),
+        'failures': failures,
+    }
+
+    if output == 'json':
+        render(report, 'json')
+    else:
+        _print_digest(report)
+
+
+def _print_digest(report: Dict[str, Any]) -> None:
+    """
+    Print a human-readable investigation digest.
+
+    :param report: The assembled investigation report.
+    :type report: Dict[str, Any]
+    """
+    run = report['run']
+    summary = report['summary']
+    header = (f"Run {run.get('run_id')} · PR {run.get('pr_number')} · {run.get('platform')} · "
+              f"{run.get('commit_sha')} · {str(run.get('status')).upper()}")
+    click.echo(header)
+    click.echo(f"  {summary.get('fail_count')} failed / {summary.get('total_samples')} total"
+               f"  ({summary.get('pass_count')} pass)")
+
+    by_code = report['by_code']
+    if by_code:
+        click.echo()
+        click.echo("  by code:")
+        for code, count in by_code.items():
+            click.echo(f"    {str(count).rjust(4)}  {code}")
+
+    failures: List[Dict[str, Any]] = report['failures']
+    if failures:
+        click.echo()
+        render({'data': failures}, 'table')
diff --git a/sp_cli/commands/regression.py b/sp_cli/commands/regression.py
new file mode 100644
index 000000000..fac9e37b2
--- /dev/null
+++ b/sp_cli/commands/regression.py
@@ -0,0 +1,29 @@
+"""``sp regression`` — list regression-test definitions."""
+
+from typing import Optional
+
+import click
+
+from sp_cli.runner import clean_params, fetch_and_render
+
+
+@click.group()
+def regression() -> None:
+    """List regression-test definitions."""
+
+
+@regression.command('ls')
+@click.option('--category', default=None, help='Filter by category name.')
+@click.option('--tag', default=None, help='Filter by tag.')
+@click.option('--active/--all', 'active', default=None, help='Only active tests (default: all).')
+@click.option('--sample-id', type=int, default=None, help='Filter by sample id.')
+@click.option('--limit', type=int, default=None, help='Page size (max 100).')
+@click.option('--offset', type=int, default=None, help='Pagination offset.')
+@click.pass_context
+def regression_ls(ctx: click.Context, category: Optional[str], tag: Optional[str],
+                  active: Optional[bool], sample_id: Optional[int],
+                  limit: Optional[int], offset: Optional[int]) -> None:
+    """List regression-test definitions."""
+    params = clean_params({'category': category, 'tag': tag, 'active': active,
+                           'sample_id': sample_id, 'limit': limit, 'offset': offset})
+    fetch_and_render(ctx, '/regression-tests', params)
diff --git a/sp_cli/commands/run.py b/sp_cli/commands/run.py
new file mode 100644
index 000000000..64a1b16c8
--- /dev/null
+++ b/sp_cli/commands/run.py
@@ -0,0 +1,183 @@
+"""``sp run`` — list, inspect, and triage CI runs."""
+
+from typing import Any, Dict, List, Optional, Tuple
+
+import click
+
+from sp_cli.client import ApiError
+from sp_cli.output import render, render_error
+from sp_cli.runner import clean_params, fetch_and_render
+from sp_cli.triage import classify_sample, is_failure
+
+
+@click.group()
+def run() -> None:
+    """List, inspect, and triage CI runs."""
+
+
+@run.command('ls')
+@click.option('--status', default=None, help='queued|running|pass|fail|canceled|error|incomplete')
+@click.option('--platform', default=None, help='linux|windows')
+@click.option('--branch', default=None, help='Filter by branch name.')
+@click.option('--commit', 'commit_sha', default=None, help='Full 40-char commit SHA.')
+@click.option('--limit', type=int, default=None, help='Page size (max 100).')
+@click.option('--offset', type=int, default=None, help='Pagination offset.')
+@click.pass_context
+def run_ls(ctx: click.Context, status: Optional[str], platform: Optional[str], branch: Optional[str],
+           commit_sha: Optional[str], limit: Optional[int], offset: Optional[int]) -> None:
+    """List CI runs (newest first)."""
+    params = clean_params({'status': status, 'platform': platform, 'branch': branch,
+                           'commit_sha': commit_sha, 'limit': limit, 'offset': offset})
+    fetch_and_render(ctx, '/runs', params)
+
+
+@run.command('show')
+@click.argument('run_id', type=int)
+@click.pass_context
+def run_show(ctx: click.Context, run_id: int) -> None:
+    """Show a single run's details."""
+    fetch_and_render(ctx, f'/runs/{run_id}')
+
+
+@run.command('summary')
+@click.argument('run_id', type=int)
+@click.pass_context
+def run_summary(ctx: click.Context, run_id: int) -> None:
+    """Show a run's pass/fail summary."""
+    fetch_and_render(ctx, f'/runs/{run_id}/summary')
+
+
+@run.command('failures')
+@click.argument('run_id', type=int)
+@click.pass_context
+def run_failures(ctx: click.Context, run_id: int) -> None:
+    """Show a run's failing tests, each labeled with a classification code."""
+    client = ctx.obj['client']
+    output = ctx.obj['output']
+    try:
+        samples = client.get_paginated(f'/runs/{run_id}/samples')
+    except ApiError as error:
+        render_error(error, output)
+        raise SystemExit(error.exit_code)
+
+    rows = [classify_sample(s) for s in samples if is_failure(s)]
+    render({'data': rows, 'summary': {'failures': len(rows), 'of_total': len(samples)}}, output)
+
+
+@run.command('results')
+@click.argument('run_id', type=int)
+@click.option('--status', default=None, help='pass|fail|skipped|missing_output|running|not_started')
+@click.option('--limit', type=int, default=None, help='Page size (max 100).')
+@click.option('--offset', type=int, default=None, help='Pagination offset.')
+@click.pass_context
+def run_results(ctx: click.Context, run_id: int, status: Optional[str],
+                limit: Optional[int], offset: Optional[int]) -> None:
+    """List all regression-test results in a run."""
+    params = clean_params({'status': status, 'limit': limit, 'offset': offset})
+    fetch_and_render(ctx, f'/runs/{run_id}/samples', params)
+
+
+@run.command('result')
+@click.argument('run_id', type=int)
+@click.argument('sample_id', type=int)
+@click.pass_context
+def run_result(ctx: click.Context, run_id: int, sample_id: int) -> None:
+    """Show full details for a single regression-test result in a run."""
+    fetch_and_render(ctx, f'/runs/{run_id}/samples/{sample_id}')
+
+
+@run.command('diff')
+@click.argument('run_id', type=int)
+@click.argument('sample_id', type=int)
+@click.option('--regression', 'regression_id', type=int, default=None,
+              help='Regression test id (auto-resolved if omitted).')
+@click.option('--output', 'output_id', type=int, default=None,
+              help='Output file id (auto-resolved if omitted).')
+@click.option('--context', 'context_lines', type=int, default=None, help='Diff context lines.')
+@click.pass_context
+def run_diff(ctx: click.Context, run_id: int, sample_id: int, regression_id: Optional[int],
+             output_id: Optional[int], context_lines: Optional[int]) -> None:
+    """Show the expected-vs-actual diff for a failing result.
+
+    Resolves the (regression, output) ids automatically when omitted, so you
+    don't need the hidden ids the web UI requires to build a diff URL.
+    """
+    client = ctx.obj['client']
+    output = ctx.obj['output']
+    try:
+        targets = _resolve_diff_targets(client, run_id, sample_id, regression_id, output_id)
+        if not targets:
+            raise ApiError('not_found', 'No differing output to diff for this result', 404)
+        diffs = []
+        for reg_id, out_id in targets:
+            params = clean_params({'regression_id': reg_id, 'output_id': out_id,
+                                   'context_lines': context_lines})
+            diffs.append(client.get(f'/runs/{run_id}/samples/{sample_id}/diff', params=params))
+    except ApiError as error:
+        render_error(error, output)
+        raise SystemExit(error.exit_code)
+
+    render(diffs[0] if len(diffs) == 1 else {'data': diffs}, output)
+
+
+@run.command('artifacts')
+@click.argument('run_id', type=int)
+@click.pass_context
+def run_artifacts(ctx: click.Context, run_id: int) -> None:
+    """List downloadable artifacts for a run (signed URLs)."""
+    fetch_and_render(ctx, f'/runs/{run_id}/artifacts')
+
+
+@run.command('logs')
+@click.argument('run_id', type=int)
+@click.pass_context
+def run_logs(ctx: click.Context, run_id: int) -> None:
+    """Show raw logs for a run."""
+    fetch_and_render(ctx, f'/runs/{run_id}/logs')
+
+
+@run.command('errors')
+@click.argument('run_id', type=int)
+@click.option('--type', 'error_type', default=None,
+              help='test_failure|exit_code_mismatch|missing_output|diff_mismatch')
+@click.pass_context
+def run_errors(ctx: click.Context, run_id: int, error_type: Optional[str]) -> None:
+    """Show structured test errors for a run."""
+    fetch_and_render(ctx, f'/runs/{run_id}/errors', clean_params({'type': error_type}))
+
+
+def _resolve_diff_targets(client: Any, run_id: int, sample_id: int,
+                          regression_id: Optional[int],
+                          output_id: Optional[int]) -> List[Tuple[int, int]]:
+    """
+    Resolve the (regression_id, output_id) pairs to diff for a result.
+
+    If both ids are supplied, uses them directly. Otherwise fetches the result
+    detail and picks the differing output(s), so the caller never needs the
+    hidden ids the diff endpoint requires.
+
+    :param client: The API client.
+    :type client: Any
+    :param run_id: The run id.
+    :type run_id: int
+    :param sample_id: The result id within the run.
+    :type sample_id: int
+    :param regression_id: Optional explicit regression id.
+    :type regression_id: Optional[int]
+    :param output_id: Optional explicit output id.
+    :type output_id: Optional[int]
+    :return: A list of ``(regression_id, output_id)`` pairs to diff.
+    :rtype: List[Tuple[int, int]]
+    """
+    if regression_id is not None and output_id is not None:
+        return [(regression_id, output_id)]
+
+    detail = client.get(f'/runs/{run_id}/samples/{sample_id}')
+    reg_id = regression_id if regression_id is not None else detail.get('regression_test_id')
+    outputs = detail.get('outputs') or []
+    differing = [o for o in outputs if o.get('status') not in (None, 'match')] or outputs
+
+    if output_id is not None:
+        return [(reg_id, output_id)] if reg_id is not None else []
+    return [(reg_id, o.get('output_id')) for o in differing
+            if reg_id is not None and o.get('output_id') is not None]
diff --git a/sp_cli/commands/sample.py b/sp_cli/commands/sample.py
new file mode 100644
index 000000000..d8bd8ae6c
--- /dev/null
+++ b/sp_cli/commands/sample.py
@@ -0,0 +1,47 @@
+"""``sp sample`` — list and inspect media samples."""
+
+from typing import Optional
+
+import click
+
+from sp_cli.runner import clean_params, fetch_and_render
+
+
+@click.group()
+def sample() -> None:
+    """List and inspect media samples."""
+
+
+@sample.command('ls')
+@click.option('--name', default=None, help='Filter by sample name.')
+@click.option('--tag', default=None, help='Filter by tag.')
+@click.option('--extension', default=None, help='Filter by file extension.')
+@click.option('--limit', type=int, default=None, help='Page size (max 100).')
+@click.option('--offset', type=int, default=None, help='Pagination offset.')
+@click.pass_context
+def sample_ls(ctx: click.Context, name: Optional[str], tag: Optional[str],
+              extension: Optional[str], limit: Optional[int], offset: Optional[int]) -> None:
+    """List known media samples."""
+    params = clean_params({'name': name, 'tag': tag, 'extension': extension,
+                           'limit': limit, 'offset': offset})
+    fetch_and_render(ctx, '/samples', params)
+
+
+@sample.command('show')
+@click.argument('sample_id', type=int)
+@click.pass_context
+def sample_show(ctx: click.Context, sample_id: int) -> None:
+    """Show metadata for a single sample."""
+    fetch_and_render(ctx, f'/samples/{sample_id}')
+
+
+@sample.command('history')
+@click.argument('sample_id', type=int)
+@click.option('--platform', default=None, help='linux|windows')
+@click.option('--limit', type=int, default=None, help='Page size (max 100).')
+@click.pass_context
+def sample_history(ctx: click.Context, sample_id: int, platform: Optional[str],
+                   limit: Optional[int]) -> None:
+    """Show this sample's result history across runs."""
+    params = clean_params({'platform': platform, 'limit': limit})
+    fetch_and_render(ctx, f'/samples/{sample_id}/history', params)
diff --git a/sp_cli/commands/system.py b/sp_cli/commands/system.py
new file mode 100644
index 000000000..cb913a2e2
--- /dev/null
+++ b/sp_cli/commands/system.py
@@ -0,0 +1,23 @@
+"""``sp health`` and ``sp queue`` — system status commands."""
+
+from typing import Optional
+
+import click
+
+from sp_cli.runner import clean_params, fetch_and_render
+
+
+@click.command('health')
+@click.pass_context
+def health(ctx: click.Context) -> None:
+    """Show CI system health and dependency status."""
+    fetch_and_render(ctx, '/system/health')
+
+
+@click.command('queue')
+@click.option('--platform', default=None, help='linux|windows')
+@click.option('--status', default=None, help='queued|running')
+@click.pass_context
+def queue(ctx: click.Context, platform: Optional[str], status: Optional[str]) -> None:
+    """Show queue depth and currently running jobs."""
+    fetch_and_render(ctx, '/system/queue', clean_params({'platform': platform, 'status': status}))
diff --git a/sp_cli/main.py b/sp_cli/main.py
new file mode 100644
index 000000000..47d8c14b3
--- /dev/null
+++ b/sp_cli/main.py
@@ -0,0 +1,51 @@
+"""Entry point and root command group for the ``sp`` CLI."""
+
+from typing import Optional
+
+import click
+
+from sp_cli import __version__
+from sp_cli.client import ApiClient
+from sp_cli.commands.auth import auth
+from sp_cli.commands.investigate import investigate
+from sp_cli.commands.regression import regression
+from sp_cli.commands.run import run
+from sp_cli.commands.sample import sample
+from sp_cli.commands.system import health, queue
+
+DEFAULT_BASE_URL = 'http://localhost:5000/api/v1'
+
+
+@click.group(invoke_without_command=True)
+@click.option('--base-url', envvar='SP_BASE_URL', default=DEFAULT_BASE_URL, show_default=True,
+              help='API base URL incl. the /api/v1 prefix. Env: SP_BASE_URL.')
+@click.option('--token', envvar='SP_API_TOKEN', default=None,
+              help='Bearer token sent with each request. Env: SP_API_TOKEN.')
+@click.option('--output', '-o', type=click.Choice(['json', 'table']), default='json', show_default=True,
+              help='Output format.')
+@click.option('--timeout', type=int, default=30, show_default=True, help='Per-request timeout (seconds).')
+@click.version_option(__version__, prog_name='sp')
+@click.pass_context
+def cli(ctx: click.Context, base_url: str, token: Optional[str], output: str, timeout: int) -> None:
+    """AI-friendly CLI for the CCExtractor CI / Sample Platform.
+
+    Emits JSON by default so it can be driven by agents and scripts. Point it at
+    a running platform with --base-url or the SP_BASE_URL environment variable,
+    and authenticate with a token via --token / SP_API_TOKEN (see `sp auth login`).
+    """
+    ctx.obj = {
+        'client': ApiClient(base_url, token=token, timeout=timeout),
+        'output': output,
+    }
+    if ctx.invoked_subcommand is None:
+        from sp_cli.banner import show_welcome
+        show_welcome()
+
+
+cli.add_command(investigate)
+cli.add_command(run)
+cli.add_command(sample)
+cli.add_command(regression)
+cli.add_command(auth)
+cli.add_command(health)
+cli.add_command(queue)
diff --git a/sp_cli/output.py b/sp_cli/output.py
new file mode 100644
index 000000000..2fa0556d4
--- /dev/null
+++ b/sp_cli/output.py
@@ -0,0 +1,138 @@
+"""Render API responses to the terminal as JSON (default) or a simple table."""
+
+import json
+from typing import Any, Dict, List
+
+import click
+
+from sp_cli.client import ApiError
+
+#: Value types rendered as plain table columns; nested structures are skipped.
+_SCALAR = (str, int, float, bool, type(None))
+
+
+def render(payload: Any, output: str) -> None:
+    """
+    Render a successful API payload in the requested format.
+
+    Handles the API's three shapes: a list wrapper (``{data, pagination}``), a
+    flat single object (run/summary/health), and bare values.
+
+    :param payload: The decoded JSON body returned by the API.
+    :type payload: Any
+    :param output: Either ``json`` or ``table``.
+    :type output: str
+    """
+    if output == 'json':
+        click.echo(json.dumps(payload, indent=2))
+        return
+
+    if isinstance(payload, dict) and isinstance(payload.get('data'), list):
+        _print_rows(payload['data'])
+        footer = _footer(payload)
+        if footer:
+            click.echo(f"\n{footer}")
+    elif isinstance(payload, dict):
+        _print_kv(payload)
+    else:
+        click.echo(json.dumps(payload, indent=2))
+
+
+def render_error(error: ApiError, output: str) -> None:
+    """
+    Render an API error as a JSON envelope on stderr, regardless of output mode.
+
+    :param error: The error to render.
+    :type error: ApiError
+    :param output: The selected output mode (unused; kept for symmetry).
+    :type output: str
+    """
+    envelope: Dict[str, Any] = {'error': {'code': error.code, 'message': error.message}}
+    if error.status is not None:
+        envelope['error']['status'] = error.status
+    if error.details:
+        envelope['error']['details'] = error.details
+    click.echo(json.dumps(envelope, indent=2), err=True)
+
+
+def _footer(payload: Dict[str, Any]) -> str:
+    """
+    Build a one-line footer from a ``summary`` or ``pagination`` block.
+
+    :param payload: The full response payload.
+    :type payload: Dict[str, Any]
+    :return: A footer string (possibly empty).
+    :rtype: str
+    """
+    summary = payload.get('summary')
+    if isinstance(summary, dict):
+        return ' · '.join(f"{k}: {v}" for k, v in summary.items())
+    pagination = payload.get('pagination')
+    if isinstance(pagination, dict):
+        parts = []
+        if pagination.get('total') is not None:
+            parts.append(f"{pagination['total']} total")
+        if pagination.get('next_offset') is not None:
+            parts.append(f"more at offset {pagination['next_offset']}")
+        return ' · '.join(parts)
+    return ''
+
+
+def _print_rows(rows: List[Any]) -> None:
+    """
+    Print a list of flat dicts as an aligned table of their scalar fields.
+
+    :param rows: The list of row dicts to render.
+    :type rows: List[Any]
+    """
+    if not rows:
+        click.echo('(no results)')
+        return
+    if not all(isinstance(row, dict) for row in rows):
+        click.echo(json.dumps(rows, indent=2))
+        return
+
+    columns: List[str] = []
+    for row in rows:
+        for key, value in row.items():
+            if key not in columns and isinstance(value, _SCALAR):
+                columns.append(key)
+
+    widths = {col: len(col) for col in columns}
+    for row in rows:
+        for col in columns:
+            widths[col] = max(widths[col], len(_cell(row.get(col))))
+
+    click.echo('  '.join(col.ljust(widths[col]) for col in columns))
+    click.echo('  '.join('-' * widths[col] for col in columns))
+    for row in rows:
+        click.echo('  '.join(_cell(row.get(col)).ljust(widths[col]) for col in columns))
+
+
+def _print_kv(record: Dict[str, Any]) -> None:
+    """
+    Print a single record as ``key: value`` lines, JSON-encoding nested values.
+
+    :param record: The record to render.
+    :type record: Dict[str, Any]
+    """
+    width = max((len(key) for key in record), default=0)
+    for key, value in record.items():
+        rendered = _cell(value) if isinstance(value, _SCALAR) else json.dumps(value)
+        click.echo(f"{key.ljust(width)} : {rendered}")
+
+
+def _cell(value: Any) -> str:
+    """
+    Format a scalar cell value for table display.
+
+    :param value: The value to format.
+    :type value: Any
+    :return: A string representation (empty string for ``None``).
+    :rtype: str
+    """
+    if value is None:
+        return ''
+    if isinstance(value, bool):
+        return 'true' if value else 'false'
+    return str(value)
diff --git a/sp_cli/runner.py b/sp_cli/runner.py
new file mode 100644
index 000000000..52767722f
--- /dev/null
+++ b/sp_cli/runner.py
@@ -0,0 +1,41 @@
+"""Shared helper that fetches from the API and renders the result or error."""
+
+from typing import Any, Dict, Optional
+
+import click
+
+from sp_cli.client import ApiError
+from sp_cli.output import render, render_error
+
+
+def fetch_and_render(ctx: click.Context, path: str, params: Optional[Dict[str, Any]] = None) -> None:
+    """
+    Fetch a path via the configured client and render it, exiting on error.
+
+    :param ctx: The active Click context (carries the client and output mode).
+    :type ctx: click.Context
+    :param path: API path below ``/api/v1``.
+    :type path: str
+    :param params: Optional query-string parameters.
+    :type params: Optional[Dict[str, Any]]
+    """
+    client = ctx.obj['client']
+    output = ctx.obj['output']
+    try:
+        payload = client.get(path, params=params)
+    except ApiError as error:
+        render_error(error, output)
+        raise SystemExit(error.exit_code)
+    render(payload, output)
+
+
+def clean_params(params: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Drop ``None`` values so unset options are not sent as query parameters.
+
+    :param params: Raw mapping of option names to values.
+    :type params: Dict[str, Any]
+    :return: The mapping without ``None`` values.
+    :rtype: Dict[str, Any]
+    """
+    return {key: value for key, value in params.items() if value is not None}
diff --git a/sp_cli/triage.py b/sp_cli/triage.py
new file mode 100644
index 000000000..bbfc1e84e
--- /dev/null
+++ b/sp_cli/triage.py
@@ -0,0 +1,85 @@
+"""Triage helpers: turn raw RunSample results into classified failure rows.
+
+Shared by ``sp run failures`` and ``sp investigate`` so both label failures the
+same way. The classification itself lives in :mod:`sp_cli.classifier`; this module
+adapts a ``RunSample`` (from ``/runs/{id}/samples``) into a flat, agent-friendly row.
+"""
+
+from typing import Any, Dict, List
+
+from sp_cli.classifier import classify
+
+#: RunSample statuses that count as a failure worth triaging.
+FAILURE_STATUSES = ('fail', 'missing_output')
+
+
+def is_failure(sample: Dict[str, Any]) -> bool:
+    """
+    Report whether a RunSample result is a failure.
+
+    :param sample: One ``RunSample`` object.
+    :type sample: Dict[str, Any]
+    :return: True if the result failed or produced no output.
+    :rtype: bool
+    """
+    return sample.get('status') in FAILURE_STATUSES
+
+
+def has_output_diff(sample: Dict[str, Any]) -> bool:
+    """
+    Decide whether a failing sample recorded a differing output file.
+
+    Prefers the per-output ``status`` when present; otherwise falls back to
+    "failed but the exit code matched, so the failure must be an output diff."
+
+    :param sample: One ``RunSample`` object.
+    :type sample: Dict[str, Any]
+    :return: True if an output differed from expected.
+    :rtype: bool
+    """
+    outputs = sample.get('outputs') or []
+    if outputs:
+        return any(item.get('status') not in (None, 'match') for item in outputs)
+    return sample.get('status') == 'fail' and sample.get('exit_code') == sample.get('expected_rc')
+
+
+def classify_sample(sample: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Map a RunSample result onto a classified failure row.
+
+    :param sample: One ``RunSample`` object from ``/runs/{id}/samples``.
+    :type sample: Dict[str, Any]
+    :return: A flat row with ids plus the classification code, confidence and reason.
+    :rtype: Dict[str, Any]
+    """
+    label = classify(
+        sample.get('exit_code'), sample.get('expected_rc'),
+        missing_output=(sample.get('status') == 'missing_output'),
+        has_output_diff=has_output_diff(sample),
+    )
+    return {
+        'regression_test_id': sample.get('regression_test_id'),
+        'sample_id': sample.get('sample_id'),
+        'sample_name': sample.get('sample_name'),
+        'category': sample.get('category'),
+        'exit_code': sample.get('exit_code'),
+        'expected_rc': sample.get('expected_rc'),
+        'code': label['code'],
+        'confidence': label['confidence'],
+        'reason': label['reason'],
+    }
+
+
+def group_by_code(failures: List[Dict[str, Any]]) -> Dict[str, int]:
+    """
+    Count classified failures by their code.
+
+    :param failures: Classified failure rows.
+    :type failures: List[Dict[str, Any]]
+    :return: A mapping of code → count, highest first.
+    :rtype: Dict[str, int]
+    """
+    counts: Dict[str, int] = {}
+    for failure in failures:
+        counts[failure['code']] = counts.get(failure['code'], 0) + 1
+    return dict(sorted(counts.items(), key=lambda item: item[1], reverse=True))
diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py
new file mode 100644
index 000000000..ccccefe2c
--- /dev/null
+++ b/tests/test_cli/__init__.py
@@ -0,0 +1 @@
+"""Tests for the sp CLI (sp_cli)."""
diff --git a/tests/test_cli/test_classifier.py b/tests/test_cli/test_classifier.py
new file mode 100644
index 000000000..34df46cf9
--- /dev/null
+++ b/tests/test_cli/test_classifier.py
@@ -0,0 +1,69 @@
+"""Tests for the rule-based failure classifier, using real examples from run #9299."""
+
+import unittest
+
+from sp_cli import classifier
+
+
+class ClassifierTests(unittest.TestCase):
+    """Each case is grounded in a real failure observed in the friction study."""
+
+    def test_exit_code_mismatch(self):
+        """`10 (Expected 0)` — the common CEA-708 failure in run #9299."""
+        result = classifier.classify(10, 0)
+        self.assertEqual(result["code"], classifier.CODE_EXIT_CODE_MISMATCH)
+        self.assertEqual(result["confidence"], "high")
+        self.assertIn("10", result["reason"])
+
+    def test_windows_segfault_normalized(self):
+        """`-1073741819` (0xC0000005) on Windows — the DVB failure in #9299."""
+        result = classifier.classify(-1073741819, 0)
+        self.assertEqual(result["code"], classifier.CODE_SEGFAULT)
+        self.assertEqual(result["confidence"], "high")
+
+    def test_linux_segfault_normalized(self):
+        """`139` on Linux is the same crash — must map to the same code."""
+        self.assertEqual(classifier.classify(139, 0)["code"], classifier.CODE_SEGFAULT)
+
+    def test_abort(self):
+        """`134` (SIGABRT) classifies as ABORT."""
+        self.assertEqual(classifier.classify(134, 0)["code"], classifier.CODE_ABORT)
+
+    def test_timeout(self):
+        """`124` (timeout) classifies as TIMEOUT."""
+        self.assertEqual(classifier.classify(124, 0)["code"], classifier.CODE_TIMEOUT)
+
+    def test_missing_output(self):
+        """'No output generated but there should be' — exit matches but output missing."""
+        result = classifier.classify(0, 0, missing_output=True)
+        self.assertEqual(result["code"], classifier.CODE_MISSING_OUTPUT)
+
+    def test_output_diff(self):
+        """Exit code matches but the output file differs."""
+        result = classifier.classify(0, 0, has_output_diff=True)
+        self.assertEqual(result["code"], classifier.CODE_OUTPUT_DIFF)
+        self.assertEqual(result["confidence"], "medium")
+
+    def test_pass(self):
+        """Exit matches, no diff, nothing missing → PASS."""
+        self.assertEqual(classifier.classify(0, 0)["code"], classifier.CODE_PASS)
+
+    def test_crash_beats_exit_mismatch(self):
+        """A segfault is reported as SEGFAULT, not a generic exit mismatch."""
+        self.assertEqual(classifier.classify(139, 0)["code"], classifier.CODE_SEGFAULT)
+
+    def test_regression_flag_true_when_previously_passed(self):
+        """A failure on a test that has passed before is a real regression."""
+        self.assertTrue(classifier.classify(10, 0, has_ever_passed=True)["regression"])
+
+    def test_regression_flag_false_when_never_passed(self):
+        """A failure on a test that never passed is pre-existing (never worked)."""
+        self.assertFalse(classifier.classify(10, 0, has_ever_passed=False)["regression"])
+
+    def test_regression_flag_none_when_unknown(self):
+        """Without history, the regression flag is None (unknown)."""
+        self.assertIsNone(classifier.classify(10, 0)["regression"])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_cli/test_cli.py b/tests/test_cli/test_cli.py
new file mode 100644
index 000000000..b1744284d
--- /dev/null
+++ b/tests/test_cli/test_cli.py
@@ -0,0 +1,156 @@
+"""Tests for the sp CLI command surface, mocking the API client."""
+
+import json
+import unittest
+from unittest import mock
+
+from click.testing import CliRunner
+
+from sp_cli.client import ApiError
+from sp_cli.main import cli
+
+RUNS_PAGE = {
+    'data': [{'run_id': 9299, 'status': 'fail', 'platform': 'windows', 'commit_sha': 'e6cd34e'}],
+    'pagination': {'limit': 50, 'offset': 0, 'total': 1, 'next_offset': None},
+}
+
+# A run's results: a segfault, an exit mismatch, a missing output, and a pass.
+RUN_SAMPLES = [
+    {'regression_test_id': 18, 'sample_name': 'dvb', 'category': 'DVB',
+     'status': 'fail', 'exit_code': -1073741819, 'expected_rc': 0, 'outputs': []},
+    {'regression_test_id': 137, 'sample_name': 'cea708', 'category': 'CEA-708',
+     'status': 'fail', 'exit_code': 10, 'expected_rc': 0, 'outputs': []},
+    {'regression_test_id': 7, 'sample_name': 'broken', 'category': 'Broken',
+     'status': 'missing_output', 'exit_code': 0, 'expected_rc': 0, 'outputs': []},
+    {'regression_test_id': 1, 'sample_name': 'ok', 'category': 'General',
+     'status': 'pass', 'exit_code': 0, 'expected_rc': 0, 'outputs': []},
+]
+
+
+class CliCommandTests(unittest.TestCase):
+    """Exercise the CLI commands with a mocked client."""
+
+    def setUp(self):
+        """Create a runner for each test."""
+        self.runner = CliRunner()
+
+    @mock.patch('sp_cli.client.ApiClient.get')
+    def test_run_ls_calls_runs_with_filters(self, mock_get):
+        """`run ls` hits /runs and forwards set filters only."""
+        mock_get.return_value = RUNS_PAGE
+        result = self.runner.invoke(cli, ['run', 'ls', '--platform', 'windows'])
+
+        self.assertEqual(result.exit_code, 0)
+        mock_get.assert_called_once_with('/runs', params={'platform': 'windows'})
+
+    @mock.patch('sp_cli.client.ApiClient.get')
+    def test_run_show(self, mock_get):
+        """`run show <id>` targets the run detail path."""
+        mock_get.return_value = {'run_id': 9299, 'status': 'fail'}
+        result = self.runner.invoke(cli, ['run', 'show', '9299'])
+
+        self.assertEqual(result.exit_code, 0)
+        mock_get.assert_called_once_with('/runs/9299', params=None)
+
+    @mock.patch('sp_cli.client.ApiClient.get_paginated')
+    def test_run_failures_classifies(self, mock_paginated):
+        """`run failures` keeps only failures and labels each with a code."""
+        mock_paginated.return_value = RUN_SAMPLES
+        result = self.runner.invoke(cli, ['run', 'failures', '9299'])
+
+        self.assertEqual(result.exit_code, 0)
+        mock_paginated.assert_called_once_with('/runs/9299/samples')
+        data = json.loads(result.output)
+        codes = {row['regression_test_id']: row['code'] for row in data['data']}
+        self.assertEqual(codes, {18: 'SEGFAULT', 137: 'EXIT_CODE_MISMATCH', 7: 'MISSING_OUTPUT'})
+        self.assertEqual(data['summary'], {'failures': 3, 'of_total': 4})
+
+    @mock.patch('sp_cli.client.ApiClient.get_paginated')
+    def test_run_failures_table_output(self, mock_paginated):
+        """Table mode renders the classification columns."""
+        mock_paginated.return_value = RUN_SAMPLES
+        result = self.runner.invoke(cli, ['-o', 'table', 'run', 'failures', '9299'])
+
+        self.assertEqual(result.exit_code, 0)
+        self.assertIn('SEGFAULT', result.output)
+        self.assertIn('code', result.output)
+
+    @mock.patch('sp_cli.client.ApiClient.get')
+    def test_sample_ls(self, mock_get):
+        """`sample ls` hits /samples."""
+        mock_get.return_value = {'data': [], 'pagination': {'total': 0, 'next_offset': None}}
+        result = self.runner.invoke(cli, ['sample', 'ls', '--tag', 'teletext'])
+
+        self.assertEqual(result.exit_code, 0)
+        mock_get.assert_called_once_with('/samples', params={'tag': 'teletext'})
+
+    @mock.patch('sp_cli.client.ApiClient.get')
+    def test_health(self, mock_get):
+        """`sp health` hits /system/health."""
+        mock_get.return_value = {'status': 'ok', 'dependencies': []}
+        result = self.runner.invoke(cli, ['health'])
+
+        self.assertEqual(result.exit_code, 0)
+        mock_get.assert_called_once_with('/system/health', params=None)
+
+    @mock.patch('sp_cli.client.ApiClient.get')
+    def test_not_found_maps_to_exit_code_and_stderr(self, mock_get):
+        """A not-found error exits 4 with a JSON envelope on stderr."""
+        mock_get.side_effect = ApiError('not_found', 'Run 9 not found', 404)
+        result = self.runner.invoke(cli, ['run', 'show', '9'])
+
+        self.assertEqual(result.exit_code, 4)
+        self.assertEqual(json.loads(result.stderr)['error']['code'], 'not_found')
+
+    @mock.patch('sp_cli.client.ApiClient.get')
+    def test_run_result(self, mock_get):
+        """`run result <run> <sample>` targets the result-detail path."""
+        mock_get.return_value = {'regression_test_id': 137, 'status': 'fail'}
+        result = self.runner.invoke(cli, ['run', 'result', '9299', '5'])
+
+        self.assertEqual(result.exit_code, 0)
+        mock_get.assert_called_once_with('/runs/9299/samples/5', params=None)
+
+    @mock.patch('sp_cli.client.ApiClient.get')
+    def test_run_diff_auto_resolves_hidden_ids(self, mock_get):
+        """`run diff` fetches the result detail to resolve regression_id + output_id."""
+        mock_get.side_effect = [
+            {'regression_test_id': 137, 'outputs': [{'output_id': 2, 'status': 'different'}]},
+            {'status': 'different', 'hunks': []},
+        ]
+        result = self.runner.invoke(cli, ['run', 'diff', '9299', '5'])
+
+        self.assertEqual(result.exit_code, 0)
+        self.assertEqual(mock_get.call_count, 2)
+        args, kwargs = mock_get.call_args
+        self.assertEqual(args[0], '/runs/9299/samples/5/diff')
+        self.assertEqual(kwargs['params'], {'regression_id': 137, 'output_id': 2})
+
+    @mock.patch('sp_cli.client.ApiClient.get')
+    def test_run_diff_with_explicit_ids_skips_resolution(self, mock_get):
+        """Passing --regression and --output diffs directly with no extra call."""
+        mock_get.return_value = {'status': 'different', 'hunks': []}
+        result = self.runner.invoke(cli, ['run', 'diff', '9299', '5', '--regression', '137', '--output', '2'])
+
+        self.assertEqual(result.exit_code, 0)
+        mock_get.assert_called_once_with('/runs/9299/samples/5/diff',
+                                         params={'regression_id': 137, 'output_id': 2})
+
+    @mock.patch('sp_cli.client.ApiClient.get_paginated')
+    @mock.patch('sp_cli.client.ApiClient.get')
+    def test_investigate_combines_run_summary_and_failures(self, mock_get, mock_paginated):
+        """`investigate` merges run detail, summary, and classified failures."""
+        mock_get.side_effect = [
+            {'run_id': 9299, 'pr_number': 2264, 'platform': 'windows', 'status': 'fail'},
+            {'run_id': 9299, 'total_samples': 4, 'pass_count': 1, 'fail_count': 3},
+        ]
+        mock_paginated.return_value = RUN_SAMPLES
+        result = self.runner.invoke(cli, ['investigate', '9299'])
+
+        self.assertEqual(result.exit_code, 0)
+        report = json.loads(result.output)
+        self.assertEqual(report['run']['pr_number'], 2264)
+        self.assertEqual(report['summary']['fail_count'], 3)
+        self.assertEqual(report['by_code'],
+                         {'SEGFAULT': 1, 'EXIT_CODE_MISMATCH': 1, 'MISSING_OUTPUT': 1})
+        self.assertEqual(len(report['failures']), 3)
diff --git a/tests/test_cli/test_client.py b/tests/test_cli/test_client.py
new file mode 100644
index 000000000..ffc5086cb
--- /dev/null
+++ b/tests/test_cli/test_client.py
@@ -0,0 +1,90 @@
+"""Tests for the CLI's HTTP client, mocking the requests session."""
+
+import unittest
+from unittest import mock
+
+import requests  # type: ignore[import-untyped]
+
+from sp_cli.client import ApiClient, ApiError
+
+
+class FakeResponse:
+    """Minimal stand-in for a requests Response."""
+
+    def __init__(self, status_code, json_data=None, raise_json=False):
+        """Store the canned status and body."""
+        self.status_code = status_code
+        self._json = json_data
+        self._raise_json = raise_json
+
+    def json(self):
+        """Return the canned JSON body or raise like requests does on non-JSON."""
+        if self._raise_json:
+            raise ValueError('No JSON could be decoded')
+        return self._json
+
+
+class ApiClientTests(unittest.TestCase):
+    """Exercise request building and error mapping in the client."""
+
+    @mock.patch('requests.Session.request')
+    def test_get_returns_payload_and_builds_url(self, mock_request):
+        """A 2xx response is returned and the API prefix is applied."""
+        mock_request.return_value = FakeResponse(200, {'data': []})
+        client = ApiClient('http://host/api/v1')
+
+        self.assertEqual(client.get('/runs'), {'data': []})
+        args, _ = mock_request.call_args
+        self.assertEqual(args[0], 'GET')
+        self.assertEqual(args[1], 'http://host/api/v1/runs')
+
+    @mock.patch('requests.Session.request')
+    def test_204_returns_none(self, mock_request):
+        """A 204 (e.g. token revoke) returns None, not a parse error."""
+        mock_request.return_value = FakeResponse(204)
+        self.assertIsNone(ApiClient('http://host').request('DELETE', '/auth/tokens/current'))
+
+    @mock.patch('requests.Session.request')
+    def test_error_codes_map_to_exit_codes(self, mock_request):
+        """Each HTTP error maps to its documented exit code."""
+        cases = {404: 4, 422: 5, 400: 5, 401: 6, 403: 6, 429: 7}
+        client = ApiClient('http://host')
+        for status, expected_exit in cases.items():
+            mock_request.return_value = FakeResponse(status, {'code': 'x', 'message': 'm'})
+            with self.assertRaises(ApiError) as caught:
+                client.get('/runs/9')
+            self.assertEqual(caught.exception.exit_code, expected_exit, f'status {status}')
+
+    @mock.patch('requests.Session.request')
+    def test_token_is_sent_as_bearer_header(self, mock_request):
+        """A configured token is sent as an Authorization header."""
+        mock_request.return_value = FakeResponse(200, {})
+        ApiClient('http://host', token='secret').get('/runs')
+        _, kwargs = mock_request.call_args
+        self.assertEqual(kwargs['headers']['Authorization'], 'Bearer secret')
+
+    @mock.patch('requests.Session.request', side_effect=requests.RequestException('boom'))
+    def test_connection_failure(self, mock_request):
+        """A transport failure maps to a connection_error with exit code 3."""
+        with self.assertRaises(ApiError) as caught:
+            ApiClient('http://host').get('/runs')
+        self.assertEqual(caught.exception.code, 'connection_error')
+        self.assertEqual(caught.exception.exit_code, 3)
+
+    @mock.patch('requests.Session.request')
+    def test_non_json_body_raises_invalid_response(self, mock_request):
+        """A non-JSON body raises invalid_response rather than crashing."""
+        mock_request.return_value = FakeResponse(500, raise_json=True)
+        with self.assertRaises(ApiError) as caught:
+            ApiClient('http://host').get('/runs')
+        self.assertEqual(caught.exception.code, 'invalid_response')
+
+    @mock.patch('requests.Session.request')
+    def test_get_paginated_follows_offset(self, mock_request):
+        """Pagination is followed across pages until next_offset is null."""
+        mock_request.side_effect = [
+            FakeResponse(200, {'data': [1, 2, 3], 'pagination': {'next_offset': 3}}),
+            FakeResponse(200, {'data': [4, 5], 'pagination': {'next_offset': None}}),
+        ]
+        items = ApiClient('http://host').get_paginated('/runs/9/samples')
+        self.assertEqual(items, [1, 2, 3, 4, 5])

From 50379f4dbb176f79751d96938295e2f2fcf29c27 Mon Sep 17 00:00:00 2001
From: GAURAV KARMAKAR <gaurav.k@graeon.ai>
Date: Sun, 14 Jun 2026 15:27:05 +0530
Subject: [PATCH 2/4] fix(cli): align sp run diff with REST API contract

Validated against pulk17:Feature/REST-API-Endpoints (PR #1117):
- diff is keyed by media sample_id + regression_id + output_id as PATH
  segments, not query params on /samples/<id>/diff
- the diff route's <sample_id> slot is the media sample id (different from
  the regression_test_id used by the result-detail route); pull it from
  detail['sample_id'] instead of reusing the CLI argument
- per-output status vocab is pass|fail|missing_output, not 'match'
---
 sp_cli/commands/run.py | 50 ++++++++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/sp_cli/commands/run.py b/sp_cli/commands/run.py
index 64a1b16c8..b9f79f2d6 100644
--- a/sp_cli/commands/run.py
+++ b/sp_cli/commands/run.py
@@ -109,10 +109,12 @@ def run_diff(ctx: click.Context, run_id: int, sample_id: int, regression_id: Opt
         if not targets:
             raise ApiError('not_found', 'No differing output to diff for this result', 404)
         diffs = []
-        for reg_id, out_id in targets:
-            params = clean_params({'regression_id': reg_id, 'output_id': out_id,
-                                   'context_lines': context_lines})
-            diffs.append(client.get(f'/runs/{run_id}/samples/{sample_id}/diff', params=params))
+        for media_sample_id, reg_id, out_id in targets:
+            params = clean_params({'context_lines': context_lines})
+            diffs.append(client.get(
+                f'/runs/{run_id}/samples/{media_sample_id}'
+                f'/regression-tests/{reg_id}/outputs/{out_id}/diff',
+                params=params))
     except ApiError as error:
         render_error(error, output)
         raise SystemExit(error.exit_code)
@@ -148,36 +150,42 @@ def run_errors(ctx: click.Context, run_id: int, error_type: Optional[str]) -> No
 
 def _resolve_diff_targets(client: Any, run_id: int, sample_id: int,
                           regression_id: Optional[int],
-                          output_id: Optional[int]) -> List[Tuple[int, int]]:
+                          output_id: Optional[int]) -> List[Tuple[int, int, int]]:
     """
-    Resolve the (regression_id, output_id) pairs to diff for a result.
+    Resolve the (media_sample_id, regression_id, output_id) triples to diff.
 
-    If both ids are supplied, uses them directly. Otherwise fetches the result
-    detail and picks the differing output(s), so the caller never needs the
-    hidden ids the diff endpoint requires.
+    The diff endpoint is keyed by the *media* sample id, the regression test
+    id, and the output file id -- three different numbers. The CLI's
+    ``sample_id`` argument is the result id within the run (the regression test
+    id), so we always fetch the result detail to recover the media sample id
+    (``detail['sample_id']``) and the differing output(s), and let the caller
+    pass explicit ids only to narrow which output(s) to diff.
 
     :param client: The API client.
     :type client: Any
     :param run_id: The run id.
     :type run_id: int
-    :param sample_id: The result id within the run.
+    :param sample_id: The result id within the run (the regression test id).
     :type sample_id: int
-    :param regression_id: Optional explicit regression id.
+    :param regression_id: Optional explicit regression id override.
     :type regression_id: Optional[int]
-    :param output_id: Optional explicit output id.
+    :param output_id: Optional explicit output id to diff.
     :type output_id: Optional[int]
-    :return: A list of ``(regression_id, output_id)`` pairs to diff.
-    :rtype: List[Tuple[int, int]]
+    :return: A list of ``(media_sample_id, regression_id, output_id)`` triples.
+    :rtype: List[Tuple[int, int, int]]
     """
-    if regression_id is not None and output_id is not None:
-        return [(regression_id, output_id)]
-
     detail = client.get(f'/runs/{run_id}/samples/{sample_id}')
+    media_sample_id = detail.get('sample_id')
     reg_id = regression_id if regression_id is not None else detail.get('regression_test_id')
+    if media_sample_id is None or reg_id is None:
+        return []
+
     outputs = detail.get('outputs') or []
-    differing = [o for o in outputs if o.get('status') not in (None, 'match')] or outputs
+    # The API reports per-output status as pass|fail|missing_output; anything
+    # not 'pass' is worth diffing. Fall back to all outputs if none qualify.
+    differing = [o for o in outputs if o.get('status') not in (None, 'pass')] or outputs
 
     if output_id is not None:
-        return [(reg_id, output_id)] if reg_id is not None else []
-    return [(reg_id, o.get('output_id')) for o in differing
-            if reg_id is not None and o.get('output_id') is not None]
+        return [(media_sample_id, reg_id, output_id)]
+    return [(media_sample_id, reg_id, o.get('output_id')) for o in differing
+            if o.get('output_id') is not None]

From 78a9e8ed8d0fd715b26ea0670fd781c2a4740212 Mon Sep 17 00:00:00 2001
From: GAURAV KARMAKAR <gaurav.k@graeon.ai>
Date: Sun, 14 Jun 2026 16:45:54 +0530
Subject: [PATCH 3/4] test(cli): update diff tests for path-based REST API
 contract

---
 tests/test_cli/test_cli.py | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/tests/test_cli/test_cli.py b/tests/test_cli/test_cli.py
index b1744284d..202e37904 100644
--- a/tests/test_cli/test_cli.py
+++ b/tests/test_cli/test_cli.py
@@ -113,9 +113,10 @@ def test_run_result(self, mock_get):
 
     @mock.patch('sp_cli.client.ApiClient.get')
     def test_run_diff_auto_resolves_hidden_ids(self, mock_get):
-        """`run diff` fetches the result detail to resolve regression_id + output_id."""
+        """`run diff` resolves the media sample id + regression/output ids from detail."""
         mock_get.side_effect = [
-            {'regression_test_id': 137, 'outputs': [{'output_id': 2, 'status': 'different'}]},
+            {'regression_test_id': 137, 'sample_id': 42,
+             'outputs': [{'output_id': 2, 'status': 'fail'}]},
             {'status': 'different', 'hunks': []},
         ]
         result = self.runner.invoke(cli, ['run', 'diff', '9299', '5'])
@@ -123,18 +124,23 @@ def test_run_diff_auto_resolves_hidden_ids(self, mock_get):
         self.assertEqual(result.exit_code, 0)
         self.assertEqual(mock_get.call_count, 2)
         args, kwargs = mock_get.call_args
-        self.assertEqual(args[0], '/runs/9299/samples/5/diff')
-        self.assertEqual(kwargs['params'], {'regression_id': 137, 'output_id': 2})
+        self.assertEqual(args[0], '/runs/9299/samples/42/regression-tests/137/outputs/2/diff')
+        self.assertEqual(kwargs['params'], {})
 
     @mock.patch('sp_cli.client.ApiClient.get')
-    def test_run_diff_with_explicit_ids_skips_resolution(self, mock_get):
-        """Passing --regression and --output diffs directly with no extra call."""
-        mock_get.return_value = {'status': 'different', 'hunks': []}
+    def test_run_diff_with_explicit_ids_uses_media_sample_from_detail(self, mock_get):
+        """Explicit --regression/--output still fetch detail for the media sample id."""
+        mock_get.side_effect = [
+            {'regression_test_id': 137, 'sample_id': 42, 'outputs': []},
+            {'status': 'different', 'hunks': []},
+        ]
         result = self.runner.invoke(cli, ['run', 'diff', '9299', '5', '--regression', '137', '--output', '2'])
 
         self.assertEqual(result.exit_code, 0)
-        mock_get.assert_called_once_with('/runs/9299/samples/5/diff',
-                                         params={'regression_id': 137, 'output_id': 2})
+        self.assertEqual(mock_get.call_count, 2)
+        args, kwargs = mock_get.call_args
+        self.assertEqual(args[0], '/runs/9299/samples/42/regression-tests/137/outputs/2/diff')
+        self.assertEqual(kwargs['params'], {})
 
     @mock.patch('sp_cli.client.ApiClient.get_paginated')
     @mock.patch('sp_cli.client.ApiClient.get')

From 36fde366eebec22cc40f075ac5bf6d64ffa823d0 Mon Sep 17 00:00:00 2001
From: GAURAV KARMAKAR <gaurav.k@graeon.ai>
Date: Sun, 14 Jun 2026 18:13:42 +0530
Subject: [PATCH 4/4] feat(cli): add run approve-baseline + document logs
 access control

Aligns the CLI with the mentor's review of the REST API (PR #1117):
- new 'sp run approve-baseline' (POST baseline-approval): requires explicit
  --regression/--output (destructive), maps --remove-variants to the API's
  remove_variants, resolves the media sample id from result detail (the
  endpoint's <sample_id> slot is the media sample, not the result id), and
  carries the cross-platform warning + admin-only note in its help
- 'sp run logs' help now states it requires contributor/admin privileges

Read-only behaviour is unchanged; approve-baseline is the CLI's only
mutating command besides auth.
---
 sp_cli/commands/run.py     | 47 +++++++++++++++++++++++++++++++++++++-
 tests/test_cli/test_cli.py | 24 +++++++++++++++++++
 2 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/sp_cli/commands/run.py b/sp_cli/commands/run.py
index b9f79f2d6..b304248e9 100644
--- a/sp_cli/commands/run.py
+++ b/sp_cli/commands/run.py
@@ -122,6 +122,51 @@ def run_diff(ctx: click.Context, run_id: int, sample_id: int, regression_id: Opt
     render(diffs[0] if len(diffs) == 1 else {'data': diffs}, output)
 
 
+@run.command('approve-baseline')
+@click.argument('run_id', type=int)
+@click.argument('sample_id', type=int)
+@click.option('--regression', 'regression_id', type=int, required=True,
+              help='Regression test id of the result to approve.')
+@click.option('--output', 'output_id', type=int, required=True,
+              help="Output file id whose actual output becomes the new baseline.")
+@click.option('--remove-variants', is_flag=True, default=False,
+              help='Remove all platform-specific variants (see WARNING below).')
+@click.pass_context
+def run_approve_baseline(ctx: click.Context, run_id: int, sample_id: int,
+                         regression_id: int, output_id: int, remove_variants: bool) -> None:
+    """Approve a result's actual output as the new expected baseline.
+
+    Requires admin privileges (the ``baselines:write`` scope).
+
+    WARNING: --remove-variants makes this output the single source of truth
+    across ALL platforms by deleting every platform-specific variant. This
+    applies globally and cannot be undone from the CLI -- use with care.
+
+    --regression and --output are required (no auto-resolution): approving a
+    baseline is destructive, so the exact target must be stated explicitly.
+    """
+    client = ctx.obj['client']
+    output = ctx.obj['output']
+    try:
+        # The endpoint's <sample_id> path slot is the *media* sample id, which
+        # differs from the regression-result id passed on the command line.
+        # Resolve it from the result detail (same contract as `run diff`).
+        detail = client.get(f'/runs/{run_id}/samples/{sample_id}')
+        media_sample_id = detail.get('sample_id')
+        if media_sample_id is None:
+            raise ApiError('not_found', 'Could not resolve the media sample for this result', 404)
+        body = {'regression_id': regression_id, 'output_id': output_id,
+                'remove_variants': remove_variants}
+        result = client.request(
+            'POST', f'/runs/{run_id}/samples/{media_sample_id}/baseline-approval',
+            json_body=body)
+    except ApiError as error:
+        render_error(error, output)
+        raise SystemExit(error.exit_code)
+
+    render(result, output)
+
+
 @run.command('artifacts')
 @click.argument('run_id', type=int)
 @click.pass_context
@@ -134,7 +179,7 @@ def run_artifacts(ctx: click.Context, run_id: int) -> None:
 @click.argument('run_id', type=int)
 @click.pass_context
 def run_logs(ctx: click.Context, run_id: int) -> None:
-    """Show raw logs for a run."""
+    """Show raw logs for a run (requires contributor or admin privileges)."""
     fetch_and_render(ctx, f'/runs/{run_id}/logs')
 
 
diff --git a/tests/test_cli/test_cli.py b/tests/test_cli/test_cli.py
index 202e37904..4685d8247 100644
--- a/tests/test_cli/test_cli.py
+++ b/tests/test_cli/test_cli.py
@@ -142,6 +142,30 @@ def test_run_diff_with_explicit_ids_uses_media_sample_from_detail(self, mock_get
         self.assertEqual(args[0], '/runs/9299/samples/42/regression-tests/137/outputs/2/diff')
         self.assertEqual(kwargs['params'], {})
 
+    @mock.patch('sp_cli.client.ApiClient.request')
+    @mock.patch('sp_cli.client.ApiClient.get')
+    def test_run_approve_baseline_resolves_media_sample_and_posts(self, mock_get, mock_request):
+        """`run approve-baseline` POSTs to the media-sample path resolved from detail."""
+        mock_get.return_value = {'regression_test_id': 137, 'sample_id': 42, 'outputs': []}
+        mock_request.return_value = {'status': 'approved', 'run_id': 9299, 'sample_id': 42,
+                                     'regression_id': 137, 'output_id': 2}
+        result = self.runner.invoke(cli, ['run', 'approve-baseline', '9299', '5',
+                                          '--regression', '137', '--output', '2', '--remove-variants'])
+
+        self.assertEqual(result.exit_code, 0)
+        mock_get.assert_called_once_with('/runs/9299/samples/5')
+        mock_request.assert_called_once_with(
+            'POST', '/runs/9299/samples/42/baseline-approval',
+            json_body={'regression_id': 137, 'output_id': 2, 'remove_variants': True})
+
+    @mock.patch('sp_cli.client.ApiClient.get')
+    def test_run_approve_baseline_requires_regression_and_output(self, mock_get):
+        """Approving a baseline refuses to run without the explicit target ids."""
+        result = self.runner.invoke(cli, ['run', 'approve-baseline', '9299', '5'])
+
+        self.assertNotEqual(result.exit_code, 0)
+        mock_get.assert_not_called()
+
     @mock.patch('sp_cli.client.ApiClient.get_paginated')
     @mock.patch('sp_cli.client.ApiClient.get')
     def test_investigate_combines_run_summary_and_failures(self, mock_get, mock_paginated):