CCExtractor · gaurav02081 · Jun 14, 2026 · Jun 14, 2026 · Jun 14, 2026 · Jun 14, 2026
@@ -53,3 +53,6 @@ monkeytype.sqlite3
 
 # Test related data
 temp/
+
+# Python packaging
+*.egg-info/
@@ -0,0 +1,16 @@
+[build-system]
+requires = ["setuptools>=61"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "sp-cli"
+version = "0.1.0"
+description = "AI-friendly CLI for the CCExtractor CI / Sample Platform"
+requires-python = ">=3.10"
+dependencies = ["click", "requests"]
+
+[project.scripts]
+sp = "sp_cli.main:cli"
+
+[tool.setuptools]
+packages = ["sp_cli", "sp_cli.commands"]
@@ -60,10 +60,14 @@
                                      app.config['DEBUG'])
 log = log_configuration.create_logger("Platform")
 
-# Create bucket objext using GCS storage client
-sa_file = os.path.join(app.config.get('INSTALL_FOLDER', ''), app.config.get('SERVICE_ACCOUNT_FILE', ''))
-storage_client = Client.from_service_account_json(sa_file)
-storage_client_bucket = storage_client.bucket(app.config.get('GCS_BUCKET_NAME', ''))
+# Create bucket object using GCS storage client, unless explicitly disabled (local dev)
+if os.environ.get('DISABLE_GCS', '0') == '1':
+    storage_client = None
+    storage_client_bucket = None
+else:
+    sa_file = os.path.join(app.config.get('INSTALL_FOLDER', ''), app.config.get('SERVICE_ACCOUNT_FILE', ''))
+    storage_client = Client.from_service_account_json(sa_file)
+    storage_client_bucket = storage_client.bucket(app.config.get('GCS_BUCKET_NAME', ''))
 
 # Save build commit
 repo = git.Repo(app.config.get('INSTALL_FOLDER', ''))

@@ -0,0 +1,9 @@
+"""``sp`` — an AI-friendly command-line client for the CCExtractor Sample Platform.
+
+The CLI is a thin layer over the Sample Platform JSON API (``/api/v1``). It is
+designed to be driven by AI agents as well as humans: it emits machine-readable
+JSON by default and uses non-zero exit codes plus a consistent error envelope on
+failure, so it can be scripted without screen-scraping the web UI.
+"""
+
+__version__ = "0.1.0"
@@ -0,0 +1,6 @@
+"""Allow the CLI to be run as ``python -m sp_cli``."""
+
+from sp_cli.main import cli
+
+if __name__ == '__main__':
+    cli()
@@ -0,0 +1,54 @@
+"""Branded welcome screen for the ``sp`` CLI.
+
+Shown only when ``sp`` is invoked with no subcommand. Never emitted on command
+output, so machine consumers (agents parsing JSON) are unaffected. Colors are
+applied via :func:`click.style` and are auto-stripped when output is piped.
+"""
+
+import click
+
+from sp_cli import __version__
+
+#: Figlet-style "sp" wordmark.
+LOGO = r"""  ___ _ __
+ / __| '_ \
+ \__ \ |_) |
+ |___/ .__/
+     |_|"""
+
+_GROUPS = [
+    ('TRIAGE', 'sp investigate <run>     ← one-shot: what failed and why'),
+    ('RUNS', 'sp run ls · show · summary · failures · results · result · diff · artifacts · logs · errors'),
+    ('SAMPLES', 'sp sample ls · show · history'),
+    ('TESTS', 'sp regression ls'),
+    ('SYSTEM', 'sp health · queue'),
+    ('AUTH', 'sp auth login · logout'),
+]
+
+_EXAMPLES = [
+    ('sp investigate 9299', 'triage a run end-to-end'),
+    ('sp run failures 9299', 'failing tests, each labeled with why'),
+    ('sp run diff 9299 137', 'expected-vs-actual diff (ids auto-resolved)'),
+]
+
+
+def show_welcome() -> None:
+    """Print the branded welcome screen (banner, command map, examples)."""
+    click.echo()
+    click.echo(click.style(LOGO, fg='cyan'))
+    click.echo(f"  {click.style('CCExtractor CI', bold=True)} · AI-friendly CLI · v{__version__}")
+    click.echo("  drive CI investigations from the terminal — no UI, no HTML scraping")
+    click.echo()
+
+    for name, line in _GROUPS:
+        click.echo(f"  {click.style(name.ljust(8), fg='green', bold=True)} {line}")
+    click.echo()
+
+    click.echo(f"  {click.style('Examples', bold=True)}")
+    for command, note in _EXAMPLES:
+        click.echo(f"    {command.ljust(28)} {click.style('# ' + note, fg='bright_black')}")
+    click.echo()
+
+    click.echo(f"  {click.style('Help', bold=True)} sp COMMAND --help"
+               f"     {click.style('Config', bold=True)} SP_BASE_URL · SP_API_TOKEN")
+    click.echo()
@@ -0,0 +1,110 @@
+"""Rule-based classification of regression-test failures into stable codes.
+
+Deterministic, no ML: maps the raw signals a test run exposes (exit code,
+expected return code, output presence, pass-history) onto a small, stable
+taxonomy so an agent can branch on *why* a test failed instead of parsing
+prose. Platform differences are normalized — e.g. a segfault surfaces as ``139``
+on Linux and ``-1073741819`` (0xC0000005) on Windows; both classify as
+``SEGFAULT``.
+
+Each classification returns a ``code`` (stable, machine-readable), a
+``confidence`` (``high`` for unambiguous exit-code rules, ``medium`` for
+output-based ones), a human ``reason``, and ``regression`` (True if the test was
+passing before — a real regression; False if it never passed; None if unknown).
+"""
+
+from typing import Any, Dict, Optional
+
+# --- Failure codes (stable; downstream tools may pin on these) ---------------
+CODE_PASS = "PASS"
+CODE_SEGFAULT = "SEGFAULT"
+CODE_ABORT = "ABORT"
+CODE_TIMEOUT = "TIMEOUT"
+CODE_MISSING_OUTPUT = "MISSING_OUTPUT"
+CODE_EXIT_CODE_MISMATCH = "EXIT_CODE_MISMATCH"
+CODE_OUTPUT_DIFF = "OUTPUT_DIFF"
+CODE_UNKNOWN = "UNKNOWN"
+
+# --- Exit codes that denote a crash, normalized across platforms -------------
+#: SIGSEGV (128+11) on Linux, raw -11, and 0xC0000005 access violation on Windows.
+_SEGFAULT_CODES = frozenset({139, -11, -1073741819})
+#: SIGABRT (128+6) on Linux and raw -6.
+_ABORT_CODES = frozenset({134, -6})
+#: `timeout` exit (124) and SIGTERM (143 / -15).
+_TIMEOUT_CODES = frozenset({124, 143, -15})
+
+
+def classify(exit_code: Optional[int], expected_rc: Optional[int], *,
+             has_output_diff: bool = False, missing_output: bool = False,
+             has_ever_passed: Optional[bool] = None) -> Dict[str, Any]:
+    """
+    Classify a single regression-test result into a stable failure code.
+
+    Rules are evaluated most-severe first (crash > timeout > missing output >
+    exit-code mismatch > output diff), so the most actionable signal wins.
+
+    :param exit_code: The process exit code observed for the test.
+    :type exit_code: Optional[int]
+    :param expected_rc: The exit code the test was expected to return.
+    :type expected_rc: Optional[int]
+    :param has_output_diff: True if a differing output file was recorded.
+    :type has_output_diff: bool
+    :param missing_output: True if output was expected but none was produced.
+    :type missing_output: bool
+    :param has_ever_passed: Whether this test has ever passed (history), if known.
+    :type has_ever_passed: Optional[bool]
+    :return: ``{code, confidence, reason, regression}``.
+    :rtype: Dict[str, Any]
+    """
+    regression = _regression_state(has_ever_passed)
+
+    if exit_code in _SEGFAULT_CODES:
+        return _result(CODE_SEGFAULT, "high",
+                       f"Crash (segfault / access violation), exit {exit_code}", regression)
+    if exit_code in _ABORT_CODES:
+        return _result(CODE_ABORT, "high", f"Aborted (SIGABRT), exit {exit_code}", regression)
+    if exit_code in _TIMEOUT_CODES:
+        return _result(CODE_TIMEOUT, "high", f"Timed out / terminated, exit {exit_code}", regression)
+    if missing_output:
+        return _result(CODE_MISSING_OUTPUT, "high",
+                       "No output was produced but one was expected", regression)
+    if exit_code != expected_rc:
+        return _result(CODE_EXIT_CODE_MISMATCH, "high",
+                       f"Exited {exit_code}, expected {expected_rc}", regression)
+    if has_output_diff:
+        return _result(CODE_OUTPUT_DIFF, "medium",
+                       "Exit code matched but output differs from expected", regression)
+
+    return _result(CODE_PASS, "high", "Exit code matched and no output diff recorded", regression)
+
+
+def _regression_state(has_ever_passed: Optional[bool]) -> Optional[bool]:
+    """
+    Translate pass-history into the ``regression`` flag.
+
+    :param has_ever_passed: Whether the test has ever passed, if known.
+    :type has_ever_passed: Optional[bool]
+    :return: True if a real regression, False if never worked, None if unknown.
+    :rtype: Optional[bool]
+    """
+    if has_ever_passed is None:
+        return None
+    return bool(has_ever_passed)
+
+
+def _result(code: str, confidence: str, reason: str, regression: Optional[bool]) -> Dict[str, Any]:
+    """
+    Assemble a classification result dict.
+
+    :param code: The stable failure code.
+    :type code: str
+    :param confidence: ``high`` or ``medium``.
+    :type confidence: str
+    :param reason: Human-readable explanation.
+    :type reason: str
+    :param regression: Regression flag (see :func:`_regression_state`).
+    :type regression: Optional[bool]
+    :return: The assembled result.
+    :rtype: Dict[str, Any]
+    """
+    return {"code": code, "confidence": confidence, "reason": reason, "regression": regression}
@@ -0,0 +1,168 @@
+"""HTTP client for the CCExtractor CI System API (`/api/v1`)."""
+
+from typing import Any, Dict, List, Optional
+
+import requests  # type: ignore[import-untyped]
+
+
+class ApiError(Exception):
+    """Raised when an API request fails, carrying the structured error envelope."""
+
+    def __init__(self, code: str, message: str, status: Optional[int] = None,
+                 details: Optional[Dict[str, Any]] = None) -> None:
+        """
+        Build an API error.
+
+        :param code: Stable machine-readable error code (e.g. ``not_found``).
+        :type code: str
+        :param message: Human-readable explanation.
+        :type message: str
+        :param status: HTTP status code, if the failure was an HTTP response.
+        :type status: Optional[int]
+        :param details: Optional structured context echoed from the API.
+        :type details: Optional[Dict[str, Any]]
+        """
+        super().__init__(message)
+        self.code = code
+        self.message = message
+        self.status = status
+        self.details = details
+
+    @property
+    def exit_code(self) -> int:
+        """
+        Map the error to a process exit code so callers can branch on it.
+
+        :return: 3 connection · 4 not-found · 5 validation · 6 auth · 7 rate-limited · 1 other.
+        :rtype: int
+        """
+        if self.code == 'connection_error':
+            return 3
+        if self.status == 404:
+            return 4
+        if self.status in (400, 422):
+            return 5
+        if self.status in (401, 403):
+            return 6
+        if self.status == 429:
+            return 7
+        return 1
+
+
+class ApiClient:
+    """Minimal client over the JSON API. Sends a bearer token when configured."""
+
+    def __init__(self, base_url: str, token: Optional[str] = None, timeout: int = 30) -> None:
+        """
+        Configure the client.
+
+        :param base_url: Root URL of the platform (without the ``/api/v1`` prefix).
+        :type base_url: str
+        :param token: Optional opaque bearer token sent on every request.
+        :type token: Optional[str]
+        :param timeout: Per-request timeout in seconds.
+        :type timeout: int
+        """
+        self.base_url = base_url.rstrip('/')
+        self.token = token
+        self.timeout = timeout
+        self.session = requests.Session()
+
+    def _headers(self) -> Dict[str, str]:
+        """
+        Build request headers, including the bearer token when set.
+
+        :return: Header mapping.
+        :rtype: Dict[str, str]
+        """
+        headers = {'Accept': 'application/json'}
+        if self.token:
+            headers['Authorization'] = f'Bearer {self.token}'
+        return headers
+
+    def request(self, method: str, path: str, params: Optional[Dict[str, Any]] = None,
+                json_body: Optional[Dict[str, Any]] = None) -> Any:
+        """
+        Perform a request against an API path and return the decoded JSON body.
+
+        :param method: HTTP method (``GET``, ``POST``, ``DELETE`` …).
+        :type method: str
+        :param path: API path below ``/api/v1`` (e.g. ``/runs``).
+        :type path: str
+        :param params: Optional query-string parameters.
+        :type params: Optional[Dict[str, Any]]
+        :param json_body: Optional JSON request body.
+        :type json_body: Optional[Dict[str, Any]]
+        :raises ApiError: on connection failure, a non-JSON body, or an HTTP error.
+        :return: The decoded JSON response body (or ``None`` for ``204``).
+        :rtype: Any
+        """
+        url = f"{self.base_url}{path}"
+        try:
+            response = self.session.request(method, url, params=params, json=json_body,
+                                            headers=self._headers(), timeout=self.timeout)
+        except requests.RequestException as exc:
+            raise ApiError('connection_error', f'Could not reach {url}: {exc}')
+
+        if response.status_code == 204:
+            return None
+
+        try:
+            payload = response.json()
+        except ValueError:
+            raise ApiError('invalid_response',
+                           f'Expected JSON but got HTTP {response.status_code}', response.status_code)
+
+        if response.status_code >= 400:
+            error = payload if isinstance(payload, dict) else {}
+            raise ApiError(
+                error.get('code', 'http_error'),
+                error.get('message', f'Request failed with HTTP {response.status_code}'),
+                response.status_code,
+                error.get('details'),
+            )
+
+        return payload
+
+    def get(self, path: str, params: Optional[Dict[str, Any]] = None) -> Any:
+        """
+        Perform a GET and return the decoded body.
+
+        :param path: API path below ``/api/v1``.
+        :type path: str
+        :param params: Optional query-string parameters.
+        :type params: Optional[Dict[str, Any]]
+        :return: The decoded JSON body.
+        :rtype: Any
+        """
+        return self.request('GET', path, params=params)
+
+    def get_paginated(self, path: str, params: Optional[Dict[str, Any]] = None,
+                      max_items: int = 1000) -> List[Any]:
+        """
+        Follow offset pagination and return the combined ``data`` list.
+
+        :param path: API path below ``/api/v1``.
+        :type path: str
+        :param params: Optional query-string parameters (``limit``/``offset`` are managed).
+        :type params: Optional[Dict[str, Any]]
+        :param max_items: Safety cap on total items collected.
+        :type max_items: int
+        :return: All items across pages.
+        :rtype: List[Any]
+        """
+        merged = dict(params or {})
+        merged.setdefault('limit', 100)
+        offset = 0
+        items: List[Any] = []
+        while True:
+            merged['offset'] = offset
+            payload = self.get(path, params=merged)
+            data = payload.get('data', []) if isinstance(payload, dict) else []
+            items.extend(data)
+            pagination = payload.get('pagination', {}) if isinstance(payload, dict) else {}
+            next_offset = pagination.get('next_offset')
+            if not data or next_offset is None or len(items) >= max_items:
+                break
+            offset = next_offset
+        return items
@@ -0,0 +1 @@
+"""Command groups for the ``sp`` CLI, grouped by resource (noun-verb)."""
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		"""Command groups for the ``sp`` CLI, grouped by resource (noun-verb)."""