From c5564ff3e551fd118103ad5cb76694c5e21f48bd Mon Sep 17 00:00:00 2001 From: GAURAV KARMAKAR Date: Sun, 14 Jun 2026 15:15:10 +0530 Subject: [PATCH 1/4] WIP: sp_cli command-line client for Sample Platform Adds the sp_cli package (client, commands, runner, output, classifier, triage) plus test_cli suite and pyproject packaging. --- .gitignore | 3 + pyproject.toml | 16 +++ run.py | 12 +- sp_cli/__init__.py | 9 ++ sp_cli/__main__.py | 6 + sp_cli/banner.py | 54 +++++++++ sp_cli/classifier.py | 110 ++++++++++++++++++ sp_cli/client.py | 168 +++++++++++++++++++++++++++ sp_cli/commands/__init__.py | 1 + sp_cli/commands/auth.py | 47 ++++++++ sp_cli/commands/investigate.py | 72 ++++++++++++ sp_cli/commands/regression.py | 29 +++++ sp_cli/commands/run.py | 183 ++++++++++++++++++++++++++++++ sp_cli/commands/sample.py | 47 ++++++++ sp_cli/commands/system.py | 23 ++++ sp_cli/main.py | 51 +++++++++ sp_cli/output.py | 138 ++++++++++++++++++++++ sp_cli/runner.py | 41 +++++++ sp_cli/triage.py | 85 ++++++++++++++ tests/test_cli/__init__.py | 1 + tests/test_cli/test_classifier.py | 69 +++++++++++ tests/test_cli/test_cli.py | 156 +++++++++++++++++++++++++ tests/test_cli/test_client.py | 90 +++++++++++++++ 23 files changed, 1407 insertions(+), 4 deletions(-) create mode 100644 pyproject.toml create mode 100644 sp_cli/__init__.py create mode 100644 sp_cli/__main__.py create mode 100644 sp_cli/banner.py create mode 100644 sp_cli/classifier.py create mode 100644 sp_cli/client.py create mode 100644 sp_cli/commands/__init__.py create mode 100644 sp_cli/commands/auth.py create mode 100644 sp_cli/commands/investigate.py create mode 100644 sp_cli/commands/regression.py create mode 100644 sp_cli/commands/run.py create mode 100644 sp_cli/commands/sample.py create mode 100644 sp_cli/commands/system.py create mode 100644 sp_cli/main.py create mode 100644 sp_cli/output.py create mode 100644 sp_cli/runner.py create mode 100644 sp_cli/triage.py create mode 100644 tests/test_cli/__init__.py create mode 100644 tests/test_cli/test_classifier.py create mode 100644 tests/test_cli/test_cli.py create mode 100644 tests/test_cli/test_client.py diff --git a/.gitignore b/.gitignore index 4f8ca2d8d..9ca6892b0 100644 --- a/.gitignore +++ b/.gitignore @@ -53,3 +53,6 @@ monkeytype.sqlite3 # Test related data temp/ + +# Python packaging +*.egg-info/ diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..58784e5b1 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,16 @@ +[build-system] +requires = ["setuptools>=61"] +build-backend = "setuptools.build_meta" + +[project] +name = "sp-cli" +version = "0.1.0" +description = "AI-friendly CLI for the CCExtractor CI / Sample Platform" +requires-python = ">=3.10" +dependencies = ["click", "requests"] + +[project.scripts] +sp = "sp_cli.main:cli" + +[tool.setuptools] +packages = ["sp_cli", "sp_cli.commands"] diff --git a/run.py b/run.py index e277c6d97..a27ea2a3b 100755 --- a/run.py +++ b/run.py @@ -60,10 +60,14 @@ app.config['DEBUG']) log = log_configuration.create_logger("Platform") -# Create bucket objext using GCS storage client -sa_file = os.path.join(app.config.get('INSTALL_FOLDER', ''), app.config.get('SERVICE_ACCOUNT_FILE', '')) -storage_client = Client.from_service_account_json(sa_file) -storage_client_bucket = storage_client.bucket(app.config.get('GCS_BUCKET_NAME', '')) +# Create bucket object using GCS storage client, unless explicitly disabled (local dev) +if os.environ.get('DISABLE_GCS', '0') == '1': + storage_client = None + storage_client_bucket = None +else: + sa_file = os.path.join(app.config.get('INSTALL_FOLDER', ''), app.config.get('SERVICE_ACCOUNT_FILE', '')) + storage_client = Client.from_service_account_json(sa_file) + storage_client_bucket = storage_client.bucket(app.config.get('GCS_BUCKET_NAME', '')) # Save build commit repo = git.Repo(app.config.get('INSTALL_FOLDER', '')) diff --git a/sp_cli/__init__.py b/sp_cli/__init__.py new file mode 100644 index 000000000..c3179aac4 --- /dev/null +++ b/sp_cli/__init__.py @@ -0,0 +1,9 @@ +"""``sp`` — an AI-friendly command-line client for the CCExtractor Sample Platform. + +The CLI is a thin layer over the Sample Platform JSON API (``/api/v1``). It is +designed to be driven by AI agents as well as humans: it emits machine-readable +JSON by default and uses non-zero exit codes plus a consistent error envelope on +failure, so it can be scripted without screen-scraping the web UI. +""" + +__version__ = "0.1.0" diff --git a/sp_cli/__main__.py b/sp_cli/__main__.py new file mode 100644 index 000000000..d5e9b2ebd --- /dev/null +++ b/sp_cli/__main__.py @@ -0,0 +1,6 @@ +"""Allow the CLI to be run as ``python -m sp_cli``.""" + +from sp_cli.main import cli + +if __name__ == '__main__': + cli() diff --git a/sp_cli/banner.py b/sp_cli/banner.py new file mode 100644 index 000000000..d0cb6de4d --- /dev/null +++ b/sp_cli/banner.py @@ -0,0 +1,54 @@ +"""Branded welcome screen for the ``sp`` CLI. + +Shown only when ``sp`` is invoked with no subcommand. Never emitted on command +output, so machine consumers (agents parsing JSON) are unaffected. Colors are +applied via :func:`click.style` and are auto-stripped when output is piped. +""" + +import click + +from sp_cli import __version__ + +#: Figlet-style "sp" wordmark. +LOGO = r""" ___ _ __ + / __| '_ \ + \__ \ |_) | + |___/ .__/ + |_|""" + +_GROUPS = [ + ('TRIAGE', 'sp investigate ← one-shot: what failed and why'), + ('RUNS', 'sp run ls · show · summary · failures · results · result · diff · artifacts · logs · errors'), + ('SAMPLES', 'sp sample ls · show · history'), + ('TESTS', 'sp regression ls'), + ('SYSTEM', 'sp health · queue'), + ('AUTH', 'sp auth login · logout'), +] + +_EXAMPLES = [ + ('sp investigate 9299', 'triage a run end-to-end'), + ('sp run failures 9299', 'failing tests, each labeled with why'), + ('sp run diff 9299 137', 'expected-vs-actual diff (ids auto-resolved)'), +] + + +def show_welcome() -> None: + """Print the branded welcome screen (banner, command map, examples).""" + click.echo() + click.echo(click.style(LOGO, fg='cyan')) + click.echo(f" {click.style('CCExtractor CI', bold=True)} · AI-friendly CLI · v{__version__}") + click.echo(" drive CI investigations from the terminal — no UI, no HTML scraping") + click.echo() + + for name, line in _GROUPS: + click.echo(f" {click.style(name.ljust(8), fg='green', bold=True)} {line}") + click.echo() + + click.echo(f" {click.style('Examples', bold=True)}") + for command, note in _EXAMPLES: + click.echo(f" {command.ljust(28)} {click.style('# ' + note, fg='bright_black')}") + click.echo() + + click.echo(f" {click.style('Help', bold=True)} sp COMMAND --help" + f" {click.style('Config', bold=True)} SP_BASE_URL · SP_API_TOKEN") + click.echo() diff --git a/sp_cli/classifier.py b/sp_cli/classifier.py new file mode 100644 index 000000000..ec67ef9d5 --- /dev/null +++ b/sp_cli/classifier.py @@ -0,0 +1,110 @@ +"""Rule-based classification of regression-test failures into stable codes. + +Deterministic, no ML: maps the raw signals a test run exposes (exit code, +expected return code, output presence, pass-history) onto a small, stable +taxonomy so an agent can branch on *why* a test failed instead of parsing +prose. Platform differences are normalized — e.g. a segfault surfaces as ``139`` +on Linux and ``-1073741819`` (0xC0000005) on Windows; both classify as +``SEGFAULT``. + +Each classification returns a ``code`` (stable, machine-readable), a +``confidence`` (``high`` for unambiguous exit-code rules, ``medium`` for +output-based ones), a human ``reason``, and ``regression`` (True if the test was +passing before — a real regression; False if it never passed; None if unknown). +""" + +from typing import Any, Dict, Optional + +# --- Failure codes (stable; downstream tools may pin on these) --------------- +CODE_PASS = "PASS" +CODE_SEGFAULT = "SEGFAULT" +CODE_ABORT = "ABORT" +CODE_TIMEOUT = "TIMEOUT" +CODE_MISSING_OUTPUT = "MISSING_OUTPUT" +CODE_EXIT_CODE_MISMATCH = "EXIT_CODE_MISMATCH" +CODE_OUTPUT_DIFF = "OUTPUT_DIFF" +CODE_UNKNOWN = "UNKNOWN" + +# --- Exit codes that denote a crash, normalized across platforms ------------- +#: SIGSEGV (128+11) on Linux, raw -11, and 0xC0000005 access violation on Windows. +_SEGFAULT_CODES = frozenset({139, -11, -1073741819}) +#: SIGABRT (128+6) on Linux and raw -6. +_ABORT_CODES = frozenset({134, -6}) +#: `timeout` exit (124) and SIGTERM (143 / -15). +_TIMEOUT_CODES = frozenset({124, 143, -15}) + + +def classify(exit_code: Optional[int], expected_rc: Optional[int], *, + has_output_diff: bool = False, missing_output: bool = False, + has_ever_passed: Optional[bool] = None) -> Dict[str, Any]: + """ + Classify a single regression-test result into a stable failure code. + + Rules are evaluated most-severe first (crash > timeout > missing output > + exit-code mismatch > output diff), so the most actionable signal wins. + + :param exit_code: The process exit code observed for the test. + :type exit_code: Optional[int] + :param expected_rc: The exit code the test was expected to return. + :type expected_rc: Optional[int] + :param has_output_diff: True if a differing output file was recorded. + :type has_output_diff: bool + :param missing_output: True if output was expected but none was produced. + :type missing_output: bool + :param has_ever_passed: Whether this test has ever passed (history), if known. + :type has_ever_passed: Optional[bool] + :return: ``{code, confidence, reason, regression}``. + :rtype: Dict[str, Any] + """ + regression = _regression_state(has_ever_passed) + + if exit_code in _SEGFAULT_CODES: + return _result(CODE_SEGFAULT, "high", + f"Crash (segfault / access violation), exit {exit_code}", regression) + if exit_code in _ABORT_CODES: + return _result(CODE_ABORT, "high", f"Aborted (SIGABRT), exit {exit_code}", regression) + if exit_code in _TIMEOUT_CODES: + return _result(CODE_TIMEOUT, "high", f"Timed out / terminated, exit {exit_code}", regression) + if missing_output: + return _result(CODE_MISSING_OUTPUT, "high", + "No output was produced but one was expected", regression) + if exit_code != expected_rc: + return _result(CODE_EXIT_CODE_MISMATCH, "high", + f"Exited {exit_code}, expected {expected_rc}", regression) + if has_output_diff: + return _result(CODE_OUTPUT_DIFF, "medium", + "Exit code matched but output differs from expected", regression) + + return _result(CODE_PASS, "high", "Exit code matched and no output diff recorded", regression) + + +def _regression_state(has_ever_passed: Optional[bool]) -> Optional[bool]: + """ + Translate pass-history into the ``regression`` flag. + + :param has_ever_passed: Whether the test has ever passed, if known. + :type has_ever_passed: Optional[bool] + :return: True if a real regression, False if never worked, None if unknown. + :rtype: Optional[bool] + """ + if has_ever_passed is None: + return None + return bool(has_ever_passed) + + +def _result(code: str, confidence: str, reason: str, regression: Optional[bool]) -> Dict[str, Any]: + """ + Assemble a classification result dict. + + :param code: The stable failure code. + :type code: str + :param confidence: ``high`` or ``medium``. + :type confidence: str + :param reason: Human-readable explanation. + :type reason: str + :param regression: Regression flag (see :func:`_regression_state`). + :type regression: Optional[bool] + :return: The assembled result. + :rtype: Dict[str, Any] + """ + return {"code": code, "confidence": confidence, "reason": reason, "regression": regression} diff --git a/sp_cli/client.py b/sp_cli/client.py new file mode 100644 index 000000000..2719deeb7 --- /dev/null +++ b/sp_cli/client.py @@ -0,0 +1,168 @@ +"""HTTP client for the CCExtractor CI System API (`/api/v1`).""" + +from typing import Any, Dict, List, Optional + +import requests # type: ignore[import-untyped] + + +class ApiError(Exception): + """Raised when an API request fails, carrying the structured error envelope.""" + + def __init__(self, code: str, message: str, status: Optional[int] = None, + details: Optional[Dict[str, Any]] = None) -> None: + """ + Build an API error. + + :param code: Stable machine-readable error code (e.g. ``not_found``). + :type code: str + :param message: Human-readable explanation. + :type message: str + :param status: HTTP status code, if the failure was an HTTP response. + :type status: Optional[int] + :param details: Optional structured context echoed from the API. + :type details: Optional[Dict[str, Any]] + """ + super().__init__(message) + self.code = code + self.message = message + self.status = status + self.details = details + + @property + def exit_code(self) -> int: + """ + Map the error to a process exit code so callers can branch on it. + + :return: 3 connection · 4 not-found · 5 validation · 6 auth · 7 rate-limited · 1 other. + :rtype: int + """ + if self.code == 'connection_error': + return 3 + if self.status == 404: + return 4 + if self.status in (400, 422): + return 5 + if self.status in (401, 403): + return 6 + if self.status == 429: + return 7 + return 1 + + +class ApiClient: + """Minimal client over the JSON API. Sends a bearer token when configured.""" + + def __init__(self, base_url: str, token: Optional[str] = None, timeout: int = 30) -> None: + """ + Configure the client. + + :param base_url: Root URL of the platform (without the ``/api/v1`` prefix). + :type base_url: str + :param token: Optional opaque bearer token sent on every request. + :type token: Optional[str] + :param timeout: Per-request timeout in seconds. + :type timeout: int + """ + self.base_url = base_url.rstrip('/') + self.token = token + self.timeout = timeout + self.session = requests.Session() + + def _headers(self) -> Dict[str, str]: + """ + Build request headers, including the bearer token when set. + + :return: Header mapping. + :rtype: Dict[str, str] + """ + headers = {'Accept': 'application/json'} + if self.token: + headers['Authorization'] = f'Bearer {self.token}' + return headers + + def request(self, method: str, path: str, params: Optional[Dict[str, Any]] = None, + json_body: Optional[Dict[str, Any]] = None) -> Any: + """ + Perform a request against an API path and return the decoded JSON body. + + :param method: HTTP method (``GET``, ``POST``, ``DELETE`` …). + :type method: str + :param path: API path below ``/api/v1`` (e.g. ``/runs``). + :type path: str + :param params: Optional query-string parameters. + :type params: Optional[Dict[str, Any]] + :param json_body: Optional JSON request body. + :type json_body: Optional[Dict[str, Any]] + :raises ApiError: on connection failure, a non-JSON body, or an HTTP error. + :return: The decoded JSON response body (or ``None`` for ``204``). + :rtype: Any + """ + url = f"{self.base_url}{path}" + try: + response = self.session.request(method, url, params=params, json=json_body, + headers=self._headers(), timeout=self.timeout) + except requests.RequestException as exc: + raise ApiError('connection_error', f'Could not reach {url}: {exc}') + + if response.status_code == 204: + return None + + try: + payload = response.json() + except ValueError: + raise ApiError('invalid_response', + f'Expected JSON but got HTTP {response.status_code}', response.status_code) + + if response.status_code >= 400: + error = payload if isinstance(payload, dict) else {} + raise ApiError( + error.get('code', 'http_error'), + error.get('message', f'Request failed with HTTP {response.status_code}'), + response.status_code, + error.get('details'), + ) + + return payload + + def get(self, path: str, params: Optional[Dict[str, Any]] = None) -> Any: + """ + Perform a GET and return the decoded body. + + :param path: API path below ``/api/v1``. + :type path: str + :param params: Optional query-string parameters. + :type params: Optional[Dict[str, Any]] + :return: The decoded JSON body. + :rtype: Any + """ + return self.request('GET', path, params=params) + + def get_paginated(self, path: str, params: Optional[Dict[str, Any]] = None, + max_items: int = 1000) -> List[Any]: + """ + Follow offset pagination and return the combined ``data`` list. + + :param path: API path below ``/api/v1``. + :type path: str + :param params: Optional query-string parameters (``limit``/``offset`` are managed). + :type params: Optional[Dict[str, Any]] + :param max_items: Safety cap on total items collected. + :type max_items: int + :return: All items across pages. + :rtype: List[Any] + """ + merged = dict(params or {}) + merged.setdefault('limit', 100) + offset = 0 + items: List[Any] = [] + while True: + merged['offset'] = offset + payload = self.get(path, params=merged) + data = payload.get('data', []) if isinstance(payload, dict) else [] + items.extend(data) + pagination = payload.get('pagination', {}) if isinstance(payload, dict) else {} + next_offset = pagination.get('next_offset') + if not data or next_offset is None or len(items) >= max_items: + break + offset = next_offset + return items diff --git a/sp_cli/commands/__init__.py b/sp_cli/commands/__init__.py new file mode 100644 index 000000000..38d122629 --- /dev/null +++ b/sp_cli/commands/__init__.py @@ -0,0 +1 @@ +"""Command groups for the ``sp`` CLI, grouped by resource (noun-verb).""" diff --git a/sp_cli/commands/auth.py b/sp_cli/commands/auth.py new file mode 100644 index 000000000..3de56a836 --- /dev/null +++ b/sp_cli/commands/auth.py @@ -0,0 +1,47 @@ +"""``sp auth`` — obtain and revoke API tokens.""" + +import click + +from sp_cli.client import ApiError +from sp_cli.output import render, render_error + + +@click.group() +def auth() -> None: + """Obtain and revoke API tokens.""" + + +@auth.command('login') +@click.option('--email', prompt=True, help='Account email.') +@click.option('--password', prompt=True, hide_input=True, help='Account password (never stored).') +@click.option('--name', 'token_name', default='sp-cli', show_default=True, help='Token label.') +@click.option('--days', 'expires_in_days', type=int, default=30, show_default=True, + help='Token lifetime in days (max 90).') +@click.pass_context +def auth_login(ctx: click.Context, email: str, password: str, + token_name: str, expires_in_days: int) -> None: + """Create an API token; store the printed value in SP_API_TOKEN.""" + client = ctx.obj['client'] + output = ctx.obj['output'] + body = {'email': email, 'password': password, + 'token_name': token_name, 'expires_in_days': expires_in_days} + try: + result = client.request('POST', '/auth/tokens', json_body=body) + except ApiError as error: + render_error(error, output) + raise SystemExit(error.exit_code) + render(result, output) + + +@auth.command('logout') +@click.pass_context +def auth_logout(ctx: click.Context) -> None: + """Revoke the current API token.""" + client = ctx.obj['client'] + output = ctx.obj['output'] + try: + client.request('DELETE', '/auth/tokens/current') + except ApiError as error: + render_error(error, output) + raise SystemExit(error.exit_code) + click.echo('Token revoked.') diff --git a/sp_cli/commands/investigate.py b/sp_cli/commands/investigate.py new file mode 100644 index 000000000..16c2a2436 --- /dev/null +++ b/sp_cli/commands/investigate.py @@ -0,0 +1,72 @@ +"""``sp investigate`` — one-shot triage of a run (status + counts + classified failures).""" + +from typing import Any, Dict, List + +import click + +from sp_cli.client import ApiError +from sp_cli.output import render, render_error +from sp_cli.triage import classify_sample, group_by_code, is_failure + +_RUN_FIELDS = ('run_id', 'pr_number', 'platform', 'commit_sha', 'branch', 'status', 'github_link') + + +@click.command('investigate') +@click.argument('run_id', type=int) +@click.pass_context +def investigate(ctx: click.Context, run_id: int) -> None: + """Triage a run in one shot: run info, pass/fail counts, and classified failures. + + Combines the run detail, summary, and per-result classification into a single + report — the whole "what failed and why" investigation in one command. + """ + client = ctx.obj['client'] + output = ctx.obj['output'] + try: + run = client.get(f'/runs/{run_id}') + summary = client.get(f'/runs/{run_id}/summary') + samples = client.get_paginated(f'/runs/{run_id}/samples') + except ApiError as error: + render_error(error, output) + raise SystemExit(error.exit_code) + + failures = [classify_sample(s) for s in samples if is_failure(s)] + report = { + 'run': {field: run.get(field) for field in _RUN_FIELDS}, + 'summary': summary, + 'by_code': group_by_code(failures), + 'failures': failures, + } + + if output == 'json': + render(report, 'json') + else: + _print_digest(report) + + +def _print_digest(report: Dict[str, Any]) -> None: + """ + Print a human-readable investigation digest. + + :param report: The assembled investigation report. + :type report: Dict[str, Any] + """ + run = report['run'] + summary = report['summary'] + header = (f"Run {run.get('run_id')} · PR {run.get('pr_number')} · {run.get('platform')} · " + f"{run.get('commit_sha')} · {str(run.get('status')).upper()}") + click.echo(header) + click.echo(f" {summary.get('fail_count')} failed / {summary.get('total_samples')} total" + f" ({summary.get('pass_count')} pass)") + + by_code = report['by_code'] + if by_code: + click.echo() + click.echo(" by code:") + for code, count in by_code.items(): + click.echo(f" {str(count).rjust(4)} {code}") + + failures: List[Dict[str, Any]] = report['failures'] + if failures: + click.echo() + render({'data': failures}, 'table') diff --git a/sp_cli/commands/regression.py b/sp_cli/commands/regression.py new file mode 100644 index 000000000..fac9e37b2 --- /dev/null +++ b/sp_cli/commands/regression.py @@ -0,0 +1,29 @@ +"""``sp regression`` — list regression-test definitions.""" + +from typing import Optional + +import click + +from sp_cli.runner import clean_params, fetch_and_render + + +@click.group() +def regression() -> None: + """List regression-test definitions.""" + + +@regression.command('ls') +@click.option('--category', default=None, help='Filter by category name.') +@click.option('--tag', default=None, help='Filter by tag.') +@click.option('--active/--all', 'active', default=None, help='Only active tests (default: all).') +@click.option('--sample-id', type=int, default=None, help='Filter by sample id.') +@click.option('--limit', type=int, default=None, help='Page size (max 100).') +@click.option('--offset', type=int, default=None, help='Pagination offset.') +@click.pass_context +def regression_ls(ctx: click.Context, category: Optional[str], tag: Optional[str], + active: Optional[bool], sample_id: Optional[int], + limit: Optional[int], offset: Optional[int]) -> None: + """List regression-test definitions.""" + params = clean_params({'category': category, 'tag': tag, 'active': active, + 'sample_id': sample_id, 'limit': limit, 'offset': offset}) + fetch_and_render(ctx, '/regression-tests', params) diff --git a/sp_cli/commands/run.py b/sp_cli/commands/run.py new file mode 100644 index 000000000..64a1b16c8 --- /dev/null +++ b/sp_cli/commands/run.py @@ -0,0 +1,183 @@ +"""``sp run`` — list, inspect, and triage CI runs.""" + +from typing import Any, Dict, List, Optional, Tuple + +import click + +from sp_cli.client import ApiError +from sp_cli.output import render, render_error +from sp_cli.runner import clean_params, fetch_and_render +from sp_cli.triage import classify_sample, is_failure + + +@click.group() +def run() -> None: + """List, inspect, and triage CI runs.""" + + +@run.command('ls') +@click.option('--status', default=None, help='queued|running|pass|fail|canceled|error|incomplete') +@click.option('--platform', default=None, help='linux|windows') +@click.option('--branch', default=None, help='Filter by branch name.') +@click.option('--commit', 'commit_sha', default=None, help='Full 40-char commit SHA.') +@click.option('--limit', type=int, default=None, help='Page size (max 100).') +@click.option('--offset', type=int, default=None, help='Pagination offset.') +@click.pass_context +def run_ls(ctx: click.Context, status: Optional[str], platform: Optional[str], branch: Optional[str], + commit_sha: Optional[str], limit: Optional[int], offset: Optional[int]) -> None: + """List CI runs (newest first).""" + params = clean_params({'status': status, 'platform': platform, 'branch': branch, + 'commit_sha': commit_sha, 'limit': limit, 'offset': offset}) + fetch_and_render(ctx, '/runs', params) + + +@run.command('show') +@click.argument('run_id', type=int) +@click.pass_context +def run_show(ctx: click.Context, run_id: int) -> None: + """Show a single run's details.""" + fetch_and_render(ctx, f'/runs/{run_id}') + + +@run.command('summary') +@click.argument('run_id', type=int) +@click.pass_context +def run_summary(ctx: click.Context, run_id: int) -> None: + """Show a run's pass/fail summary.""" + fetch_and_render(ctx, f'/runs/{run_id}/summary') + + +@run.command('failures') +@click.argument('run_id', type=int) +@click.pass_context +def run_failures(ctx: click.Context, run_id: int) -> None: + """Show a run's failing tests, each labeled with a classification code.""" + client = ctx.obj['client'] + output = ctx.obj['output'] + try: + samples = client.get_paginated(f'/runs/{run_id}/samples') + except ApiError as error: + render_error(error, output) + raise SystemExit(error.exit_code) + + rows = [classify_sample(s) for s in samples if is_failure(s)] + render({'data': rows, 'summary': {'failures': len(rows), 'of_total': len(samples)}}, output) + + +@run.command('results') +@click.argument('run_id', type=int) +@click.option('--status', default=None, help='pass|fail|skipped|missing_output|running|not_started') +@click.option('--limit', type=int, default=None, help='Page size (max 100).') +@click.option('--offset', type=int, default=None, help='Pagination offset.') +@click.pass_context +def run_results(ctx: click.Context, run_id: int, status: Optional[str], + limit: Optional[int], offset: Optional[int]) -> None: + """List all regression-test results in a run.""" + params = clean_params({'status': status, 'limit': limit, 'offset': offset}) + fetch_and_render(ctx, f'/runs/{run_id}/samples', params) + + +@run.command('result') +@click.argument('run_id', type=int) +@click.argument('sample_id', type=int) +@click.pass_context +def run_result(ctx: click.Context, run_id: int, sample_id: int) -> None: + """Show full details for a single regression-test result in a run.""" + fetch_and_render(ctx, f'/runs/{run_id}/samples/{sample_id}') + + +@run.command('diff') +@click.argument('run_id', type=int) +@click.argument('sample_id', type=int) +@click.option('--regression', 'regression_id', type=int, default=None, + help='Regression test id (auto-resolved if omitted).') +@click.option('--output', 'output_id', type=int, default=None, + help='Output file id (auto-resolved if omitted).') +@click.option('--context', 'context_lines', type=int, default=None, help='Diff context lines.') +@click.pass_context +def run_diff(ctx: click.Context, run_id: int, sample_id: int, regression_id: Optional[int], + output_id: Optional[int], context_lines: Optional[int]) -> None: + """Show the expected-vs-actual diff for a failing result. + + Resolves the (regression, output) ids automatically when omitted, so you + don't need the hidden ids the web UI requires to build a diff URL. + """ + client = ctx.obj['client'] + output = ctx.obj['output'] + try: + targets = _resolve_diff_targets(client, run_id, sample_id, regression_id, output_id) + if not targets: + raise ApiError('not_found', 'No differing output to diff for this result', 404) + diffs = [] + for reg_id, out_id in targets: + params = clean_params({'regression_id': reg_id, 'output_id': out_id, + 'context_lines': context_lines}) + diffs.append(client.get(f'/runs/{run_id}/samples/{sample_id}/diff', params=params)) + except ApiError as error: + render_error(error, output) + raise SystemExit(error.exit_code) + + render(diffs[0] if len(diffs) == 1 else {'data': diffs}, output) + + +@run.command('artifacts') +@click.argument('run_id', type=int) +@click.pass_context +def run_artifacts(ctx: click.Context, run_id: int) -> None: + """List downloadable artifacts for a run (signed URLs).""" + fetch_and_render(ctx, f'/runs/{run_id}/artifacts') + + +@run.command('logs') +@click.argument('run_id', type=int) +@click.pass_context +def run_logs(ctx: click.Context, run_id: int) -> None: + """Show raw logs for a run.""" + fetch_and_render(ctx, f'/runs/{run_id}/logs') + + +@run.command('errors') +@click.argument('run_id', type=int) +@click.option('--type', 'error_type', default=None, + help='test_failure|exit_code_mismatch|missing_output|diff_mismatch') +@click.pass_context +def run_errors(ctx: click.Context, run_id: int, error_type: Optional[str]) -> None: + """Show structured test errors for a run.""" + fetch_and_render(ctx, f'/runs/{run_id}/errors', clean_params({'type': error_type})) + + +def _resolve_diff_targets(client: Any, run_id: int, sample_id: int, + regression_id: Optional[int], + output_id: Optional[int]) -> List[Tuple[int, int]]: + """ + Resolve the (regression_id, output_id) pairs to diff for a result. + + If both ids are supplied, uses them directly. Otherwise fetches the result + detail and picks the differing output(s), so the caller never needs the + hidden ids the diff endpoint requires. + + :param client: The API client. + :type client: Any + :param run_id: The run id. + :type run_id: int + :param sample_id: The result id within the run. + :type sample_id: int + :param regression_id: Optional explicit regression id. + :type regression_id: Optional[int] + :param output_id: Optional explicit output id. + :type output_id: Optional[int] + :return: A list of ``(regression_id, output_id)`` pairs to diff. + :rtype: List[Tuple[int, int]] + """ + if regression_id is not None and output_id is not None: + return [(regression_id, output_id)] + + detail = client.get(f'/runs/{run_id}/samples/{sample_id}') + reg_id = regression_id if regression_id is not None else detail.get('regression_test_id') + outputs = detail.get('outputs') or [] + differing = [o for o in outputs if o.get('status') not in (None, 'match')] or outputs + + if output_id is not None: + return [(reg_id, output_id)] if reg_id is not None else [] + return [(reg_id, o.get('output_id')) for o in differing + if reg_id is not None and o.get('output_id') is not None] diff --git a/sp_cli/commands/sample.py b/sp_cli/commands/sample.py new file mode 100644 index 000000000..d8bd8ae6c --- /dev/null +++ b/sp_cli/commands/sample.py @@ -0,0 +1,47 @@ +"""``sp sample`` — list and inspect media samples.""" + +from typing import Optional + +import click + +from sp_cli.runner import clean_params, fetch_and_render + + +@click.group() +def sample() -> None: + """List and inspect media samples.""" + + +@sample.command('ls') +@click.option('--name', default=None, help='Filter by sample name.') +@click.option('--tag', default=None, help='Filter by tag.') +@click.option('--extension', default=None, help='Filter by file extension.') +@click.option('--limit', type=int, default=None, help='Page size (max 100).') +@click.option('--offset', type=int, default=None, help='Pagination offset.') +@click.pass_context +def sample_ls(ctx: click.Context, name: Optional[str], tag: Optional[str], + extension: Optional[str], limit: Optional[int], offset: Optional[int]) -> None: + """List known media samples.""" + params = clean_params({'name': name, 'tag': tag, 'extension': extension, + 'limit': limit, 'offset': offset}) + fetch_and_render(ctx, '/samples', params) + + +@sample.command('show') +@click.argument('sample_id', type=int) +@click.pass_context +def sample_show(ctx: click.Context, sample_id: int) -> None: + """Show metadata for a single sample.""" + fetch_and_render(ctx, f'/samples/{sample_id}') + + +@sample.command('history') +@click.argument('sample_id', type=int) +@click.option('--platform', default=None, help='linux|windows') +@click.option('--limit', type=int, default=None, help='Page size (max 100).') +@click.pass_context +def sample_history(ctx: click.Context, sample_id: int, platform: Optional[str], + limit: Optional[int]) -> None: + """Show this sample's result history across runs.""" + params = clean_params({'platform': platform, 'limit': limit}) + fetch_and_render(ctx, f'/samples/{sample_id}/history', params) diff --git a/sp_cli/commands/system.py b/sp_cli/commands/system.py new file mode 100644 index 000000000..cb913a2e2 --- /dev/null +++ b/sp_cli/commands/system.py @@ -0,0 +1,23 @@ +"""``sp health`` and ``sp queue`` — system status commands.""" + +from typing import Optional + +import click + +from sp_cli.runner import clean_params, fetch_and_render + + +@click.command('health') +@click.pass_context +def health(ctx: click.Context) -> None: + """Show CI system health and dependency status.""" + fetch_and_render(ctx, '/system/health') + + +@click.command('queue') +@click.option('--platform', default=None, help='linux|windows') +@click.option('--status', default=None, help='queued|running') +@click.pass_context +def queue(ctx: click.Context, platform: Optional[str], status: Optional[str]) -> None: + """Show queue depth and currently running jobs.""" + fetch_and_render(ctx, '/system/queue', clean_params({'platform': platform, 'status': status})) diff --git a/sp_cli/main.py b/sp_cli/main.py new file mode 100644 index 000000000..47d8c14b3 --- /dev/null +++ b/sp_cli/main.py @@ -0,0 +1,51 @@ +"""Entry point and root command group for the ``sp`` CLI.""" + +from typing import Optional + +import click + +from sp_cli import __version__ +from sp_cli.client import ApiClient +from sp_cli.commands.auth import auth +from sp_cli.commands.investigate import investigate +from sp_cli.commands.regression import regression +from sp_cli.commands.run import run +from sp_cli.commands.sample import sample +from sp_cli.commands.system import health, queue + +DEFAULT_BASE_URL = 'http://localhost:5000/api/v1' + + +@click.group(invoke_without_command=True) +@click.option('--base-url', envvar='SP_BASE_URL', default=DEFAULT_BASE_URL, show_default=True, + help='API base URL incl. the /api/v1 prefix. Env: SP_BASE_URL.') +@click.option('--token', envvar='SP_API_TOKEN', default=None, + help='Bearer token sent with each request. Env: SP_API_TOKEN.') +@click.option('--output', '-o', type=click.Choice(['json', 'table']), default='json', show_default=True, + help='Output format.') +@click.option('--timeout', type=int, default=30, show_default=True, help='Per-request timeout (seconds).') +@click.version_option(__version__, prog_name='sp') +@click.pass_context +def cli(ctx: click.Context, base_url: str, token: Optional[str], output: str, timeout: int) -> None: + """AI-friendly CLI for the CCExtractor CI / Sample Platform. + + Emits JSON by default so it can be driven by agents and scripts. Point it at + a running platform with --base-url or the SP_BASE_URL environment variable, + and authenticate with a token via --token / SP_API_TOKEN (see `sp auth login`). + """ + ctx.obj = { + 'client': ApiClient(base_url, token=token, timeout=timeout), + 'output': output, + } + if ctx.invoked_subcommand is None: + from sp_cli.banner import show_welcome + show_welcome() + + +cli.add_command(investigate) +cli.add_command(run) +cli.add_command(sample) +cli.add_command(regression) +cli.add_command(auth) +cli.add_command(health) +cli.add_command(queue) diff --git a/sp_cli/output.py b/sp_cli/output.py new file mode 100644 index 000000000..2fa0556d4 --- /dev/null +++ b/sp_cli/output.py @@ -0,0 +1,138 @@ +"""Render API responses to the terminal as JSON (default) or a simple table.""" + +import json +from typing import Any, Dict, List + +import click + +from sp_cli.client import ApiError + +#: Value types rendered as plain table columns; nested structures are skipped. +_SCALAR = (str, int, float, bool, type(None)) + + +def render(payload: Any, output: str) -> None: + """ + Render a successful API payload in the requested format. + + Handles the API's three shapes: a list wrapper (``{data, pagination}``), a + flat single object (run/summary/health), and bare values. + + :param payload: The decoded JSON body returned by the API. + :type payload: Any + :param output: Either ``json`` or ``table``. + :type output: str + """ + if output == 'json': + click.echo(json.dumps(payload, indent=2)) + return + + if isinstance(payload, dict) and isinstance(payload.get('data'), list): + _print_rows(payload['data']) + footer = _footer(payload) + if footer: + click.echo(f"\n{footer}") + elif isinstance(payload, dict): + _print_kv(payload) + else: + click.echo(json.dumps(payload, indent=2)) + + +def render_error(error: ApiError, output: str) -> None: + """ + Render an API error as a JSON envelope on stderr, regardless of output mode. + + :param error: The error to render. + :type error: ApiError + :param output: The selected output mode (unused; kept for symmetry). + :type output: str + """ + envelope: Dict[str, Any] = {'error': {'code': error.code, 'message': error.message}} + if error.status is not None: + envelope['error']['status'] = error.status + if error.details: + envelope['error']['details'] = error.details + click.echo(json.dumps(envelope, indent=2), err=True) + + +def _footer(payload: Dict[str, Any]) -> str: + """ + Build a one-line footer from a ``summary`` or ``pagination`` block. + + :param payload: The full response payload. + :type payload: Dict[str, Any] + :return: A footer string (possibly empty). + :rtype: str + """ + summary = payload.get('summary') + if isinstance(summary, dict): + return ' · '.join(f"{k}: {v}" for k, v in summary.items()) + pagination = payload.get('pagination') + if isinstance(pagination, dict): + parts = [] + if pagination.get('total') is not None: + parts.append(f"{pagination['total']} total") + if pagination.get('next_offset') is not None: + parts.append(f"more at offset {pagination['next_offset']}") + return ' · '.join(parts) + return '' + + +def _print_rows(rows: List[Any]) -> None: + """ + Print a list of flat dicts as an aligned table of their scalar fields. + + :param rows: The list of row dicts to render. + :type rows: List[Any] + """ + if not rows: + click.echo('(no results)') + return + if not all(isinstance(row, dict) for row in rows): + click.echo(json.dumps(rows, indent=2)) + return + + columns: List[str] = [] + for row in rows: + for key, value in row.items(): + if key not in columns and isinstance(value, _SCALAR): + columns.append(key) + + widths = {col: len(col) for col in columns} + for row in rows: + for col in columns: + widths[col] = max(widths[col], len(_cell(row.get(col)))) + + click.echo(' '.join(col.ljust(widths[col]) for col in columns)) + click.echo(' '.join('-' * widths[col] for col in columns)) + for row in rows: + click.echo(' '.join(_cell(row.get(col)).ljust(widths[col]) for col in columns)) + + +def _print_kv(record: Dict[str, Any]) -> None: + """ + Print a single record as ``key: value`` lines, JSON-encoding nested values. + + :param record: The record to render. + :type record: Dict[str, Any] + """ + width = max((len(key) for key in record), default=0) + for key, value in record.items(): + rendered = _cell(value) if isinstance(value, _SCALAR) else json.dumps(value) + click.echo(f"{key.ljust(width)} : {rendered}") + + +def _cell(value: Any) -> str: + """ + Format a scalar cell value for table display. + + :param value: The value to format. + :type value: Any + :return: A string representation (empty string for ``None``). + :rtype: str + """ + if value is None: + return '' + if isinstance(value, bool): + return 'true' if value else 'false' + return str(value) diff --git a/sp_cli/runner.py b/sp_cli/runner.py new file mode 100644 index 000000000..52767722f --- /dev/null +++ b/sp_cli/runner.py @@ -0,0 +1,41 @@ +"""Shared helper that fetches from the API and renders the result or error.""" + +from typing import Any, Dict, Optional + +import click + +from sp_cli.client import ApiError +from sp_cli.output import render, render_error + + +def fetch_and_render(ctx: click.Context, path: str, params: Optional[Dict[str, Any]] = None) -> None: + """ + Fetch a path via the configured client and render it, exiting on error. + + :param ctx: The active Click context (carries the client and output mode). + :type ctx: click.Context + :param path: API path below ``/api/v1``. + :type path: str + :param params: Optional query-string parameters. + :type params: Optional[Dict[str, Any]] + """ + client = ctx.obj['client'] + output = ctx.obj['output'] + try: + payload = client.get(path, params=params) + except ApiError as error: + render_error(error, output) + raise SystemExit(error.exit_code) + render(payload, output) + + +def clean_params(params: Dict[str, Any]) -> Dict[str, Any]: + """ + Drop ``None`` values so unset options are not sent as query parameters. + + :param params: Raw mapping of option names to values. + :type params: Dict[str, Any] + :return: The mapping without ``None`` values. + :rtype: Dict[str, Any] + """ + return {key: value for key, value in params.items() if value is not None} diff --git a/sp_cli/triage.py b/sp_cli/triage.py new file mode 100644 index 000000000..bbfc1e84e --- /dev/null +++ b/sp_cli/triage.py @@ -0,0 +1,85 @@ +"""Triage helpers: turn raw RunSample results into classified failure rows. + +Shared by ``sp run failures`` and ``sp investigate`` so both label failures the +same way. The classification itself lives in :mod:`sp_cli.classifier`; this module +adapts a ``RunSample`` (from ``/runs/{id}/samples``) into a flat, agent-friendly row. +""" + +from typing import Any, Dict, List + +from sp_cli.classifier import classify + +#: RunSample statuses that count as a failure worth triaging. +FAILURE_STATUSES = ('fail', 'missing_output') + + +def is_failure(sample: Dict[str, Any]) -> bool: + """ + Report whether a RunSample result is a failure. + + :param sample: One ``RunSample`` object. + :type sample: Dict[str, Any] + :return: True if the result failed or produced no output. + :rtype: bool + """ + return sample.get('status') in FAILURE_STATUSES + + +def has_output_diff(sample: Dict[str, Any]) -> bool: + """ + Decide whether a failing sample recorded a differing output file. + + Prefers the per-output ``status`` when present; otherwise falls back to + "failed but the exit code matched, so the failure must be an output diff." + + :param sample: One ``RunSample`` object. + :type sample: Dict[str, Any] + :return: True if an output differed from expected. + :rtype: bool + """ + outputs = sample.get('outputs') or [] + if outputs: + return any(item.get('status') not in (None, 'match') for item in outputs) + return sample.get('status') == 'fail' and sample.get('exit_code') == sample.get('expected_rc') + + +def classify_sample(sample: Dict[str, Any]) -> Dict[str, Any]: + """ + Map a RunSample result onto a classified failure row. + + :param sample: One ``RunSample`` object from ``/runs/{id}/samples``. + :type sample: Dict[str, Any] + :return: A flat row with ids plus the classification code, confidence and reason. + :rtype: Dict[str, Any] + """ + label = classify( + sample.get('exit_code'), sample.get('expected_rc'), + missing_output=(sample.get('status') == 'missing_output'), + has_output_diff=has_output_diff(sample), + ) + return { + 'regression_test_id': sample.get('regression_test_id'), + 'sample_id': sample.get('sample_id'), + 'sample_name': sample.get('sample_name'), + 'category': sample.get('category'), + 'exit_code': sample.get('exit_code'), + 'expected_rc': sample.get('expected_rc'), + 'code': label['code'], + 'confidence': label['confidence'], + 'reason': label['reason'], + } + + +def group_by_code(failures: List[Dict[str, Any]]) -> Dict[str, int]: + """ + Count classified failures by their code. + + :param failures: Classified failure rows. + :type failures: List[Dict[str, Any]] + :return: A mapping of code → count, highest first. + :rtype: Dict[str, int] + """ + counts: Dict[str, int] = {} + for failure in failures: + counts[failure['code']] = counts.get(failure['code'], 0) + 1 + return dict(sorted(counts.items(), key=lambda item: item[1], reverse=True)) diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py new file mode 100644 index 000000000..ccccefe2c --- /dev/null +++ b/tests/test_cli/__init__.py @@ -0,0 +1 @@ +"""Tests for the sp CLI (sp_cli).""" diff --git a/tests/test_cli/test_classifier.py b/tests/test_cli/test_classifier.py new file mode 100644 index 000000000..34df46cf9 --- /dev/null +++ b/tests/test_cli/test_classifier.py @@ -0,0 +1,69 @@ +"""Tests for the rule-based failure classifier, using real examples from run #9299.""" + +import unittest + +from sp_cli import classifier + + +class ClassifierTests(unittest.TestCase): + """Each case is grounded in a real failure observed in the friction study.""" + + def test_exit_code_mismatch(self): + """`10 (Expected 0)` — the common CEA-708 failure in run #9299.""" + result = classifier.classify(10, 0) + self.assertEqual(result["code"], classifier.CODE_EXIT_CODE_MISMATCH) + self.assertEqual(result["confidence"], "high") + self.assertIn("10", result["reason"]) + + def test_windows_segfault_normalized(self): + """`-1073741819` (0xC0000005) on Windows — the DVB failure in #9299.""" + result = classifier.classify(-1073741819, 0) + self.assertEqual(result["code"], classifier.CODE_SEGFAULT) + self.assertEqual(result["confidence"], "high") + + def test_linux_segfault_normalized(self): + """`139` on Linux is the same crash — must map to the same code.""" + self.assertEqual(classifier.classify(139, 0)["code"], classifier.CODE_SEGFAULT) + + def test_abort(self): + """`134` (SIGABRT) classifies as ABORT.""" + self.assertEqual(classifier.classify(134, 0)["code"], classifier.CODE_ABORT) + + def test_timeout(self): + """`124` (timeout) classifies as TIMEOUT.""" + self.assertEqual(classifier.classify(124, 0)["code"], classifier.CODE_TIMEOUT) + + def test_missing_output(self): + """'No output generated but there should be' — exit matches but output missing.""" + result = classifier.classify(0, 0, missing_output=True) + self.assertEqual(result["code"], classifier.CODE_MISSING_OUTPUT) + + def test_output_diff(self): + """Exit code matches but the output file differs.""" + result = classifier.classify(0, 0, has_output_diff=True) + self.assertEqual(result["code"], classifier.CODE_OUTPUT_DIFF) + self.assertEqual(result["confidence"], "medium") + + def test_pass(self): + """Exit matches, no diff, nothing missing → PASS.""" + self.assertEqual(classifier.classify(0, 0)["code"], classifier.CODE_PASS) + + def test_crash_beats_exit_mismatch(self): + """A segfault is reported as SEGFAULT, not a generic exit mismatch.""" + self.assertEqual(classifier.classify(139, 0)["code"], classifier.CODE_SEGFAULT) + + def test_regression_flag_true_when_previously_passed(self): + """A failure on a test that has passed before is a real regression.""" + self.assertTrue(classifier.classify(10, 0, has_ever_passed=True)["regression"]) + + def test_regression_flag_false_when_never_passed(self): + """A failure on a test that never passed is pre-existing (never worked).""" + self.assertFalse(classifier.classify(10, 0, has_ever_passed=False)["regression"]) + + def test_regression_flag_none_when_unknown(self): + """Without history, the regression flag is None (unknown).""" + self.assertIsNone(classifier.classify(10, 0)["regression"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_cli/test_cli.py b/tests/test_cli/test_cli.py new file mode 100644 index 000000000..b1744284d --- /dev/null +++ b/tests/test_cli/test_cli.py @@ -0,0 +1,156 @@ +"""Tests for the sp CLI command surface, mocking the API client.""" + +import json +import unittest +from unittest import mock + +from click.testing import CliRunner + +from sp_cli.client import ApiError +from sp_cli.main import cli + +RUNS_PAGE = { + 'data': [{'run_id': 9299, 'status': 'fail', 'platform': 'windows', 'commit_sha': 'e6cd34e'}], + 'pagination': {'limit': 50, 'offset': 0, 'total': 1, 'next_offset': None}, +} + +# A run's results: a segfault, an exit mismatch, a missing output, and a pass. +RUN_SAMPLES = [ + {'regression_test_id': 18, 'sample_name': 'dvb', 'category': 'DVB', + 'status': 'fail', 'exit_code': -1073741819, 'expected_rc': 0, 'outputs': []}, + {'regression_test_id': 137, 'sample_name': 'cea708', 'category': 'CEA-708', + 'status': 'fail', 'exit_code': 10, 'expected_rc': 0, 'outputs': []}, + {'regression_test_id': 7, 'sample_name': 'broken', 'category': 'Broken', + 'status': 'missing_output', 'exit_code': 0, 'expected_rc': 0, 'outputs': []}, + {'regression_test_id': 1, 'sample_name': 'ok', 'category': 'General', + 'status': 'pass', 'exit_code': 0, 'expected_rc': 0, 'outputs': []}, +] + + +class CliCommandTests(unittest.TestCase): + """Exercise the CLI commands with a mocked client.""" + + def setUp(self): + """Create a runner for each test.""" + self.runner = CliRunner() + + @mock.patch('sp_cli.client.ApiClient.get') + def test_run_ls_calls_runs_with_filters(self, mock_get): + """`run ls` hits /runs and forwards set filters only.""" + mock_get.return_value = RUNS_PAGE + result = self.runner.invoke(cli, ['run', 'ls', '--platform', 'windows']) + + self.assertEqual(result.exit_code, 0) + mock_get.assert_called_once_with('/runs', params={'platform': 'windows'}) + + @mock.patch('sp_cli.client.ApiClient.get') + def test_run_show(self, mock_get): + """`run show ` targets the run detail path.""" + mock_get.return_value = {'run_id': 9299, 'status': 'fail'} + result = self.runner.invoke(cli, ['run', 'show', '9299']) + + self.assertEqual(result.exit_code, 0) + mock_get.assert_called_once_with('/runs/9299', params=None) + + @mock.patch('sp_cli.client.ApiClient.get_paginated') + def test_run_failures_classifies(self, mock_paginated): + """`run failures` keeps only failures and labels each with a code.""" + mock_paginated.return_value = RUN_SAMPLES + result = self.runner.invoke(cli, ['run', 'failures', '9299']) + + self.assertEqual(result.exit_code, 0) + mock_paginated.assert_called_once_with('/runs/9299/samples') + data = json.loads(result.output) + codes = {row['regression_test_id']: row['code'] for row in data['data']} + self.assertEqual(codes, {18: 'SEGFAULT', 137: 'EXIT_CODE_MISMATCH', 7: 'MISSING_OUTPUT'}) + self.assertEqual(data['summary'], {'failures': 3, 'of_total': 4}) + + @mock.patch('sp_cli.client.ApiClient.get_paginated') + def test_run_failures_table_output(self, mock_paginated): + """Table mode renders the classification columns.""" + mock_paginated.return_value = RUN_SAMPLES + result = self.runner.invoke(cli, ['-o', 'table', 'run', 'failures', '9299']) + + self.assertEqual(result.exit_code, 0) + self.assertIn('SEGFAULT', result.output) + self.assertIn('code', result.output) + + @mock.patch('sp_cli.client.ApiClient.get') + def test_sample_ls(self, mock_get): + """`sample ls` hits /samples.""" + mock_get.return_value = {'data': [], 'pagination': {'total': 0, 'next_offset': None}} + result = self.runner.invoke(cli, ['sample', 'ls', '--tag', 'teletext']) + + self.assertEqual(result.exit_code, 0) + mock_get.assert_called_once_with('/samples', params={'tag': 'teletext'}) + + @mock.patch('sp_cli.client.ApiClient.get') + def test_health(self, mock_get): + """`sp health` hits /system/health.""" + mock_get.return_value = {'status': 'ok', 'dependencies': []} + result = self.runner.invoke(cli, ['health']) + + self.assertEqual(result.exit_code, 0) + mock_get.assert_called_once_with('/system/health', params=None) + + @mock.patch('sp_cli.client.ApiClient.get') + def test_not_found_maps_to_exit_code_and_stderr(self, mock_get): + """A not-found error exits 4 with a JSON envelope on stderr.""" + mock_get.side_effect = ApiError('not_found', 'Run 9 not found', 404) + result = self.runner.invoke(cli, ['run', 'show', '9']) + + self.assertEqual(result.exit_code, 4) + self.assertEqual(json.loads(result.stderr)['error']['code'], 'not_found') + + @mock.patch('sp_cli.client.ApiClient.get') + def test_run_result(self, mock_get): + """`run result ` targets the result-detail path.""" + mock_get.return_value = {'regression_test_id': 137, 'status': 'fail'} + result = self.runner.invoke(cli, ['run', 'result', '9299', '5']) + + self.assertEqual(result.exit_code, 0) + mock_get.assert_called_once_with('/runs/9299/samples/5', params=None) + + @mock.patch('sp_cli.client.ApiClient.get') + def test_run_diff_auto_resolves_hidden_ids(self, mock_get): + """`run diff` fetches the result detail to resolve regression_id + output_id.""" + mock_get.side_effect = [ + {'regression_test_id': 137, 'outputs': [{'output_id': 2, 'status': 'different'}]}, + {'status': 'different', 'hunks': []}, + ] + result = self.runner.invoke(cli, ['run', 'diff', '9299', '5']) + + self.assertEqual(result.exit_code, 0) + self.assertEqual(mock_get.call_count, 2) + args, kwargs = mock_get.call_args + self.assertEqual(args[0], '/runs/9299/samples/5/diff') + self.assertEqual(kwargs['params'], {'regression_id': 137, 'output_id': 2}) + + @mock.patch('sp_cli.client.ApiClient.get') + def test_run_diff_with_explicit_ids_skips_resolution(self, mock_get): + """Passing --regression and --output diffs directly with no extra call.""" + mock_get.return_value = {'status': 'different', 'hunks': []} + result = self.runner.invoke(cli, ['run', 'diff', '9299', '5', '--regression', '137', '--output', '2']) + + self.assertEqual(result.exit_code, 0) + mock_get.assert_called_once_with('/runs/9299/samples/5/diff', + params={'regression_id': 137, 'output_id': 2}) + + @mock.patch('sp_cli.client.ApiClient.get_paginated') + @mock.patch('sp_cli.client.ApiClient.get') + def test_investigate_combines_run_summary_and_failures(self, mock_get, mock_paginated): + """`investigate` merges run detail, summary, and classified failures.""" + mock_get.side_effect = [ + {'run_id': 9299, 'pr_number': 2264, 'platform': 'windows', 'status': 'fail'}, + {'run_id': 9299, 'total_samples': 4, 'pass_count': 1, 'fail_count': 3}, + ] + mock_paginated.return_value = RUN_SAMPLES + result = self.runner.invoke(cli, ['investigate', '9299']) + + self.assertEqual(result.exit_code, 0) + report = json.loads(result.output) + self.assertEqual(report['run']['pr_number'], 2264) + self.assertEqual(report['summary']['fail_count'], 3) + self.assertEqual(report['by_code'], + {'SEGFAULT': 1, 'EXIT_CODE_MISMATCH': 1, 'MISSING_OUTPUT': 1}) + self.assertEqual(len(report['failures']), 3) diff --git a/tests/test_cli/test_client.py b/tests/test_cli/test_client.py new file mode 100644 index 000000000..ffc5086cb --- /dev/null +++ b/tests/test_cli/test_client.py @@ -0,0 +1,90 @@ +"""Tests for the CLI's HTTP client, mocking the requests session.""" + +import unittest +from unittest import mock + +import requests # type: ignore[import-untyped] + +from sp_cli.client import ApiClient, ApiError + + +class FakeResponse: + """Minimal stand-in for a requests Response.""" + + def __init__(self, status_code, json_data=None, raise_json=False): + """Store the canned status and body.""" + self.status_code = status_code + self._json = json_data + self._raise_json = raise_json + + def json(self): + """Return the canned JSON body or raise like requests does on non-JSON.""" + if self._raise_json: + raise ValueError('No JSON could be decoded') + return self._json + + +class ApiClientTests(unittest.TestCase): + """Exercise request building and error mapping in the client.""" + + @mock.patch('requests.Session.request') + def test_get_returns_payload_and_builds_url(self, mock_request): + """A 2xx response is returned and the API prefix is applied.""" + mock_request.return_value = FakeResponse(200, {'data': []}) + client = ApiClient('http://host/api/v1') + + self.assertEqual(client.get('/runs'), {'data': []}) + args, _ = mock_request.call_args + self.assertEqual(args[0], 'GET') + self.assertEqual(args[1], 'http://host/api/v1/runs') + + @mock.patch('requests.Session.request') + def test_204_returns_none(self, mock_request): + """A 204 (e.g. token revoke) returns None, not a parse error.""" + mock_request.return_value = FakeResponse(204) + self.assertIsNone(ApiClient('http://host').request('DELETE', '/auth/tokens/current')) + + @mock.patch('requests.Session.request') + def test_error_codes_map_to_exit_codes(self, mock_request): + """Each HTTP error maps to its documented exit code.""" + cases = {404: 4, 422: 5, 400: 5, 401: 6, 403: 6, 429: 7} + client = ApiClient('http://host') + for status, expected_exit in cases.items(): + mock_request.return_value = FakeResponse(status, {'code': 'x', 'message': 'm'}) + with self.assertRaises(ApiError) as caught: + client.get('/runs/9') + self.assertEqual(caught.exception.exit_code, expected_exit, f'status {status}') + + @mock.patch('requests.Session.request') + def test_token_is_sent_as_bearer_header(self, mock_request): + """A configured token is sent as an Authorization header.""" + mock_request.return_value = FakeResponse(200, {}) + ApiClient('http://host', token='secret').get('/runs') + _, kwargs = mock_request.call_args + self.assertEqual(kwargs['headers']['Authorization'], 'Bearer secret') + + @mock.patch('requests.Session.request', side_effect=requests.RequestException('boom')) + def test_connection_failure(self, mock_request): + """A transport failure maps to a connection_error with exit code 3.""" + with self.assertRaises(ApiError) as caught: + ApiClient('http://host').get('/runs') + self.assertEqual(caught.exception.code, 'connection_error') + self.assertEqual(caught.exception.exit_code, 3) + + @mock.patch('requests.Session.request') + def test_non_json_body_raises_invalid_response(self, mock_request): + """A non-JSON body raises invalid_response rather than crashing.""" + mock_request.return_value = FakeResponse(500, raise_json=True) + with self.assertRaises(ApiError) as caught: + ApiClient('http://host').get('/runs') + self.assertEqual(caught.exception.code, 'invalid_response') + + @mock.patch('requests.Session.request') + def test_get_paginated_follows_offset(self, mock_request): + """Pagination is followed across pages until next_offset is null.""" + mock_request.side_effect = [ + FakeResponse(200, {'data': [1, 2, 3], 'pagination': {'next_offset': 3}}), + FakeResponse(200, {'data': [4, 5], 'pagination': {'next_offset': None}}), + ] + items = ApiClient('http://host').get_paginated('/runs/9/samples') + self.assertEqual(items, [1, 2, 3, 4, 5]) From 50379f4dbb176f79751d96938295e2f2fcf29c27 Mon Sep 17 00:00:00 2001 From: GAURAV KARMAKAR Date: Sun, 14 Jun 2026 15:27:05 +0530 Subject: [PATCH 2/4] fix(cli): align sp run diff with REST API contract Validated against pulk17:Feature/REST-API-Endpoints (PR #1117): - diff is keyed by media sample_id + regression_id + output_id as PATH segments, not query params on /samples//diff - the diff route's slot is the media sample id (different from the regression_test_id used by the result-detail route); pull it from detail['sample_id'] instead of reusing the CLI argument - per-output status vocab is pass|fail|missing_output, not 'match' --- sp_cli/commands/run.py | 50 ++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/sp_cli/commands/run.py b/sp_cli/commands/run.py index 64a1b16c8..b9f79f2d6 100644 --- a/sp_cli/commands/run.py +++ b/sp_cli/commands/run.py @@ -109,10 +109,12 @@ def run_diff(ctx: click.Context, run_id: int, sample_id: int, regression_id: Opt if not targets: raise ApiError('not_found', 'No differing output to diff for this result', 404) diffs = [] - for reg_id, out_id in targets: - params = clean_params({'regression_id': reg_id, 'output_id': out_id, - 'context_lines': context_lines}) - diffs.append(client.get(f'/runs/{run_id}/samples/{sample_id}/diff', params=params)) + for media_sample_id, reg_id, out_id in targets: + params = clean_params({'context_lines': context_lines}) + diffs.append(client.get( + f'/runs/{run_id}/samples/{media_sample_id}' + f'/regression-tests/{reg_id}/outputs/{out_id}/diff', + params=params)) except ApiError as error: render_error(error, output) raise SystemExit(error.exit_code) @@ -148,36 +150,42 @@ def run_errors(ctx: click.Context, run_id: int, error_type: Optional[str]) -> No def _resolve_diff_targets(client: Any, run_id: int, sample_id: int, regression_id: Optional[int], - output_id: Optional[int]) -> List[Tuple[int, int]]: + output_id: Optional[int]) -> List[Tuple[int, int, int]]: """ - Resolve the (regression_id, output_id) pairs to diff for a result. + Resolve the (media_sample_id, regression_id, output_id) triples to diff. - If both ids are supplied, uses them directly. Otherwise fetches the result - detail and picks the differing output(s), so the caller never needs the - hidden ids the diff endpoint requires. + The diff endpoint is keyed by the *media* sample id, the regression test + id, and the output file id -- three different numbers. The CLI's + ``sample_id`` argument is the result id within the run (the regression test + id), so we always fetch the result detail to recover the media sample id + (``detail['sample_id']``) and the differing output(s), and let the caller + pass explicit ids only to narrow which output(s) to diff. :param client: The API client. :type client: Any :param run_id: The run id. :type run_id: int - :param sample_id: The result id within the run. + :param sample_id: The result id within the run (the regression test id). :type sample_id: int - :param regression_id: Optional explicit regression id. + :param regression_id: Optional explicit regression id override. :type regression_id: Optional[int] - :param output_id: Optional explicit output id. + :param output_id: Optional explicit output id to diff. :type output_id: Optional[int] - :return: A list of ``(regression_id, output_id)`` pairs to diff. - :rtype: List[Tuple[int, int]] + :return: A list of ``(media_sample_id, regression_id, output_id)`` triples. + :rtype: List[Tuple[int, int, int]] """ - if regression_id is not None and output_id is not None: - return [(regression_id, output_id)] - detail = client.get(f'/runs/{run_id}/samples/{sample_id}') + media_sample_id = detail.get('sample_id') reg_id = regression_id if regression_id is not None else detail.get('regression_test_id') + if media_sample_id is None or reg_id is None: + return [] + outputs = detail.get('outputs') or [] - differing = [o for o in outputs if o.get('status') not in (None, 'match')] or outputs + # The API reports per-output status as pass|fail|missing_output; anything + # not 'pass' is worth diffing. Fall back to all outputs if none qualify. + differing = [o for o in outputs if o.get('status') not in (None, 'pass')] or outputs if output_id is not None: - return [(reg_id, output_id)] if reg_id is not None else [] - return [(reg_id, o.get('output_id')) for o in differing - if reg_id is not None and o.get('output_id') is not None] + return [(media_sample_id, reg_id, output_id)] + return [(media_sample_id, reg_id, o.get('output_id')) for o in differing + if o.get('output_id') is not None] From 78a9e8ed8d0fd715b26ea0670fd781c2a4740212 Mon Sep 17 00:00:00 2001 From: GAURAV KARMAKAR Date: Sun, 14 Jun 2026 16:45:54 +0530 Subject: [PATCH 3/4] test(cli): update diff tests for path-based REST API contract --- tests/test_cli/test_cli.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/tests/test_cli/test_cli.py b/tests/test_cli/test_cli.py index b1744284d..202e37904 100644 --- a/tests/test_cli/test_cli.py +++ b/tests/test_cli/test_cli.py @@ -113,9 +113,10 @@ def test_run_result(self, mock_get): @mock.patch('sp_cli.client.ApiClient.get') def test_run_diff_auto_resolves_hidden_ids(self, mock_get): - """`run diff` fetches the result detail to resolve regression_id + output_id.""" + """`run diff` resolves the media sample id + regression/output ids from detail.""" mock_get.side_effect = [ - {'regression_test_id': 137, 'outputs': [{'output_id': 2, 'status': 'different'}]}, + {'regression_test_id': 137, 'sample_id': 42, + 'outputs': [{'output_id': 2, 'status': 'fail'}]}, {'status': 'different', 'hunks': []}, ] result = self.runner.invoke(cli, ['run', 'diff', '9299', '5']) @@ -123,18 +124,23 @@ def test_run_diff_auto_resolves_hidden_ids(self, mock_get): self.assertEqual(result.exit_code, 0) self.assertEqual(mock_get.call_count, 2) args, kwargs = mock_get.call_args - self.assertEqual(args[0], '/runs/9299/samples/5/diff') - self.assertEqual(kwargs['params'], {'regression_id': 137, 'output_id': 2}) + self.assertEqual(args[0], '/runs/9299/samples/42/regression-tests/137/outputs/2/diff') + self.assertEqual(kwargs['params'], {}) @mock.patch('sp_cli.client.ApiClient.get') - def test_run_diff_with_explicit_ids_skips_resolution(self, mock_get): - """Passing --regression and --output diffs directly with no extra call.""" - mock_get.return_value = {'status': 'different', 'hunks': []} + def test_run_diff_with_explicit_ids_uses_media_sample_from_detail(self, mock_get): + """Explicit --regression/--output still fetch detail for the media sample id.""" + mock_get.side_effect = [ + {'regression_test_id': 137, 'sample_id': 42, 'outputs': []}, + {'status': 'different', 'hunks': []}, + ] result = self.runner.invoke(cli, ['run', 'diff', '9299', '5', '--regression', '137', '--output', '2']) self.assertEqual(result.exit_code, 0) - mock_get.assert_called_once_with('/runs/9299/samples/5/diff', - params={'regression_id': 137, 'output_id': 2}) + self.assertEqual(mock_get.call_count, 2) + args, kwargs = mock_get.call_args + self.assertEqual(args[0], '/runs/9299/samples/42/regression-tests/137/outputs/2/diff') + self.assertEqual(kwargs['params'], {}) @mock.patch('sp_cli.client.ApiClient.get_paginated') @mock.patch('sp_cli.client.ApiClient.get') From 36fde366eebec22cc40f075ac5bf6d64ffa823d0 Mon Sep 17 00:00:00 2001 From: GAURAV KARMAKAR Date: Sun, 14 Jun 2026 18:13:42 +0530 Subject: [PATCH 4/4] feat(cli): add run approve-baseline + document logs access control Aligns the CLI with the mentor's review of the REST API (PR #1117): - new 'sp run approve-baseline' (POST baseline-approval): requires explicit --regression/--output (destructive), maps --remove-variants to the API's remove_variants, resolves the media sample id from result detail (the endpoint's slot is the media sample, not the result id), and carries the cross-platform warning + admin-only note in its help - 'sp run logs' help now states it requires contributor/admin privileges Read-only behaviour is unchanged; approve-baseline is the CLI's only mutating command besides auth. --- sp_cli/commands/run.py | 47 +++++++++++++++++++++++++++++++++++++- tests/test_cli/test_cli.py | 24 +++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/sp_cli/commands/run.py b/sp_cli/commands/run.py index b9f79f2d6..b304248e9 100644 --- a/sp_cli/commands/run.py +++ b/sp_cli/commands/run.py @@ -122,6 +122,51 @@ def run_diff(ctx: click.Context, run_id: int, sample_id: int, regression_id: Opt render(diffs[0] if len(diffs) == 1 else {'data': diffs}, output) +@run.command('approve-baseline') +@click.argument('run_id', type=int) +@click.argument('sample_id', type=int) +@click.option('--regression', 'regression_id', type=int, required=True, + help='Regression test id of the result to approve.') +@click.option('--output', 'output_id', type=int, required=True, + help="Output file id whose actual output becomes the new baseline.") +@click.option('--remove-variants', is_flag=True, default=False, + help='Remove all platform-specific variants (see WARNING below).') +@click.pass_context +def run_approve_baseline(ctx: click.Context, run_id: int, sample_id: int, + regression_id: int, output_id: int, remove_variants: bool) -> None: + """Approve a result's actual output as the new expected baseline. + + Requires admin privileges (the ``baselines:write`` scope). + + WARNING: --remove-variants makes this output the single source of truth + across ALL platforms by deleting every platform-specific variant. This + applies globally and cannot be undone from the CLI -- use with care. + + --regression and --output are required (no auto-resolution): approving a + baseline is destructive, so the exact target must be stated explicitly. + """ + client = ctx.obj['client'] + output = ctx.obj['output'] + try: + # The endpoint's path slot is the *media* sample id, which + # differs from the regression-result id passed on the command line. + # Resolve it from the result detail (same contract as `run diff`). + detail = client.get(f'/runs/{run_id}/samples/{sample_id}') + media_sample_id = detail.get('sample_id') + if media_sample_id is None: + raise ApiError('not_found', 'Could not resolve the media sample for this result', 404) + body = {'regression_id': regression_id, 'output_id': output_id, + 'remove_variants': remove_variants} + result = client.request( + 'POST', f'/runs/{run_id}/samples/{media_sample_id}/baseline-approval', + json_body=body) + except ApiError as error: + render_error(error, output) + raise SystemExit(error.exit_code) + + render(result, output) + + @run.command('artifacts') @click.argument('run_id', type=int) @click.pass_context @@ -134,7 +179,7 @@ def run_artifacts(ctx: click.Context, run_id: int) -> None: @click.argument('run_id', type=int) @click.pass_context def run_logs(ctx: click.Context, run_id: int) -> None: - """Show raw logs for a run.""" + """Show raw logs for a run (requires contributor or admin privileges).""" fetch_and_render(ctx, f'/runs/{run_id}/logs') diff --git a/tests/test_cli/test_cli.py b/tests/test_cli/test_cli.py index 202e37904..4685d8247 100644 --- a/tests/test_cli/test_cli.py +++ b/tests/test_cli/test_cli.py @@ -142,6 +142,30 @@ def test_run_diff_with_explicit_ids_uses_media_sample_from_detail(self, mock_get self.assertEqual(args[0], '/runs/9299/samples/42/regression-tests/137/outputs/2/diff') self.assertEqual(kwargs['params'], {}) + @mock.patch('sp_cli.client.ApiClient.request') + @mock.patch('sp_cli.client.ApiClient.get') + def test_run_approve_baseline_resolves_media_sample_and_posts(self, mock_get, mock_request): + """`run approve-baseline` POSTs to the media-sample path resolved from detail.""" + mock_get.return_value = {'regression_test_id': 137, 'sample_id': 42, 'outputs': []} + mock_request.return_value = {'status': 'approved', 'run_id': 9299, 'sample_id': 42, + 'regression_id': 137, 'output_id': 2} + result = self.runner.invoke(cli, ['run', 'approve-baseline', '9299', '5', + '--regression', '137', '--output', '2', '--remove-variants']) + + self.assertEqual(result.exit_code, 0) + mock_get.assert_called_once_with('/runs/9299/samples/5') + mock_request.assert_called_once_with( + 'POST', '/runs/9299/samples/42/baseline-approval', + json_body={'regression_id': 137, 'output_id': 2, 'remove_variants': True}) + + @mock.patch('sp_cli.client.ApiClient.get') + def test_run_approve_baseline_requires_regression_and_output(self, mock_get): + """Approving a baseline refuses to run without the explicit target ids.""" + result = self.runner.invoke(cli, ['run', 'approve-baseline', '9299', '5']) + + self.assertNotEqual(result.exit_code, 0) + mock_get.assert_not_called() + @mock.patch('sp_cli.client.ApiClient.get_paginated') @mock.patch('sp_cli.client.ApiClient.get') def test_investigate_combines_run_summary_and_failures(self, mock_get, mock_paginated):