diff --git a/pkg/rest/vd_put_size_bounds_round4_test.go b/pkg/rest/vd_put_size_bounds_round4_test.go new file mode 100644 index 00000000..41b64549 --- /dev/null +++ b/pkg/rest/vd_put_size_bounds_round4_test.go @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: Apache-2.0 + +/* +Copyright 2026 Cozystack contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package rest + +import ( + "encoding/json" + "io" + "net/http" + "strings" + "testing" + + apiv1 "github.com/cozystack/blockstor/pkg/api/v1" + "github.com/cozystack/blockstor/pkg/store" +) + +// VD-resize size-bounds gap (adversarial round 4, 2026-07-03): the VD +// CREATE path gates size_kib into [4 MiB, 16 TiB] via validateVDSize +// (Bug 155), so the satellite never hot-loops on `drbdadm create-md` +// for an unmaterializable size. The RESIZE path +// (`PUT /v1/resource-definitions/{rd}/volume-definitions/{vn}`, i.e. +// `linstor vd set-size`) enforced only the Bug 383 non-positive floor +// and the scenario 4.W13 shrink-vs-force gate — it never called +// validateVDSize. So a `vd set-size` below the 4 MiB metadata floor +// (with force to clear the shrink gate) or above the 16 TiB ceiling +// was accepted (200) and stored verbatim, reproducing the exact Bug +// 155 satellite hot-loop through the resize verb instead of create. +// +// The fix mirrors the create gate on the PUT branch (rejectVDPatch +// OutOfBounds), evaluated — like the Bug 383 non-positive floor — BEFORE +// the shrink-vs-force check: `force` waives the shrink-direction opt-in, +// never the physical floor/ceiling, and running the bounds check first +// hands the operator the accurate "invalid size" envelope instead of an +// "add --force" hint on a size that would be rejected even with force. +// +// These are the L1 fail-on-bug regressions: each FAILS on the pre-fix +// tree (PUT 200s and mutates the stored row) and PASSES with the gate. + +// TestVDPutBelowFloorWithForceRejected: a force-shrink to a positive +// size below the 4 MiB DRBD metadata floor must be refused with the +// same 400 + FAIL_INVLD_VLM_SIZE envelope the create path returns, and +// must NOT mutate the stored size. +func TestVDPutBelowFloorWithForceRejected(t *testing.T) { + st := store.NewInMemory() + const origSize = int64(1024 * 1024) // 1 GiB, comfortably in-bounds + seedRDWithVD(t, st, "r4-floor-rd", origSize) + + base, stop := startServerWithStore(t, st) + defer stop() + + belowFloor := minVolumeDefinitionSizeKib - 1024 // 1 MiB below the 4 MiB floor, still > 0 + body, err := json.Marshal(volumeDefinitionModifyBody{ + SizeKib: &belowFloor, + Force: true, // clears the shrink-without-force gate + }) + if err != nil { + t.Fatalf("marshal: %v", err) + } + + resp := httpPut(t, base+"/v1/resource-definitions/r4-floor-rd/volume-definitions/0", body) + defer func() { _ = resp.Body.Close() }() + + respBody, err := io.ReadAll(resp.Body) + if err != nil { + t.Fatalf("read body: %v", err) + } + + if resp.StatusCode != http.StatusBadRequest { + t.Fatalf("status: got %d, want 400 (sub-floor size must be refused like create); body=%s", + resp.StatusCode, respBody) + } + + assertVDSizeRejectionEnvelope(t, respBody, "below minimum") + + // The stored row must stay at the pre-PUT size — a rejected resize + // leaves the spec untouched. + assertVDSize(t, st, "r4-floor-rd", origSize) +} + +// TestVDPutAboveMaxRejected: a grow above the 16 TiB DRBD per-device +// ceiling (a pure grow, so only a max-bound gate can stop it) must be +// refused and must not mutate the stored size. +func TestVDPutAboveMaxRejected(t *testing.T) { + st := store.NewInMemory() + const origSize = int64(8192) + seedRDWithVD(t, st, "r4-max-rd", origSize) + + base, stop := startServerWithStore(t, st) + defer stop() + + aboveMax := maxVolumeDefinitionSizeKib + (1024 * 1024) // 1 GiB past the ceiling + body, err := json.Marshal(volumeDefinitionModifyBody{ + SizeKib: &aboveMax, + }) + if err != nil { + t.Fatalf("marshal: %v", err) + } + + resp := httpPut(t, base+"/v1/resource-definitions/r4-max-rd/volume-definitions/0", body) + defer func() { _ = resp.Body.Close() }() + + respBody, err := io.ReadAll(resp.Body) + if err != nil { + t.Fatalf("read body: %v", err) + } + + if resp.StatusCode != http.StatusBadRequest { + t.Fatalf("status: got %d, want 400 (over-ceiling size must be refused like create); body=%s", + resp.StatusCode, respBody) + } + + assertVDSizeRejectionEnvelope(t, respBody, "above maximum") + + assertVDSize(t, st, "r4-max-rd", origSize) +} + +// TestVDPutAtBoundsAccepted: the boundary is inclusive on both ends +// (validateVDSize rejects `< min` and `> max`), so a resize to exactly +// the floor (force-shrink) and exactly the ceiling (grow) must still +// land. Guards the new gate against being one-off over-broad. +func TestVDPutAtBoundsAccepted(t *testing.T) { + t.Run("exact-floor-force-shrink", func(t *testing.T) { + st := store.NewInMemory() + seedRDWithVD(t, st, "r4-atfloor-rd", 1024*1024) + + base, stop := startServerWithStore(t, st) + defer stop() + + atFloor := minVolumeDefinitionSizeKib + body, _ := json.Marshal(volumeDefinitionModifyBody{SizeKib: &atFloor, Force: true}) + + resp := httpPut(t, base+"/v1/resource-definitions/r4-atfloor-rd/volume-definitions/0", body) + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusOK { + rb, _ := io.ReadAll(resp.Body) + t.Fatalf("status: got %d, want 200 (exact floor is in-bounds); body=%s", resp.StatusCode, rb) + } + + assertVDSize(t, st, "r4-atfloor-rd", atFloor) + }) + + t.Run("exact-ceiling-grow", func(t *testing.T) { + st := store.NewInMemory() + seedRDWithVD(t, st, "r4-atmax-rd", 8192) + + base, stop := startServerWithStore(t, st) + defer stop() + + atMax := maxVolumeDefinitionSizeKib + body, _ := json.Marshal(volumeDefinitionModifyBody{SizeKib: &atMax}) + + resp := httpPut(t, base+"/v1/resource-definitions/r4-atmax-rd/volume-definitions/0", body) + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusOK { + rb, _ := io.ReadAll(resp.Body) + t.Fatalf("status: got %d, want 200 (exact ceiling is in-bounds); body=%s", resp.StatusCode, rb) + } + + assertVDSize(t, st, "r4-atmax-rd", atMax) + }) +} + +// TestVDPutInBoundsForceShrinkStillWorks: a legitimate in-bounds +// force-shrink (1 GiB → 8 MiB, well above the 4 MiB floor) must still +// land 200 and persist. The bounds gate must not regress the +// scenario-4.W13 force-shrink path linstor-csi drives after a +// `resize2fs -s` on the consumer — `force` still clears the shrink- +// direction gate, and the in-bounds size passes the new floor/ceiling. +func TestVDPutInBoundsForceShrinkStillWorks(t *testing.T) { + st := store.NewInMemory() + seedRDWithVD(t, st, "r4-inbounds-shrink-rd", 1024*1024) // 1 GiB + + base, stop := startServerWithStore(t, st) + defer stop() + + newSize := int64(8 * 1024) // 8 MiB, comfortably in-bounds + body, err := json.Marshal(volumeDefinitionModifyBody{SizeKib: &newSize, Force: true}) + if err != nil { + t.Fatalf("marshal: %v", err) + } + + resp := httpPut(t, base+"/v1/resource-definitions/r4-inbounds-shrink-rd/volume-definitions/0", body) + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusOK { + rb, _ := io.ReadAll(resp.Body) + t.Fatalf("status: got %d, want 200 (in-bounds force-shrink must still land); body=%s", resp.StatusCode, rb) + } + + assertVDSize(t, st, "r4-inbounds-shrink-rd", newSize) +} + +// assertVDSizeRejectionEnvelope pins that a rejected resize returns the +// single-entry LINSTOR envelope with the FAIL_INVLD_VLM_SIZE sub-code +// (create-path parity) and a message naming the specific bound. +func assertVDSizeRejectionEnvelope(t *testing.T, respBody []byte, wantBound string) { + t.Helper() + + var rcs []apiv1.APICallRc + if err := json.Unmarshal(respBody, &rcs); err != nil { + t.Fatalf("unmarshal envelope: %v; body=%s", err, respBody) + } + + if len(rcs) != 1 { + t.Fatalf("envelope length: got %d, want 1; body=%s", len(rcs), respBody) + } + + if rcs[0].RetCode&apiCallRcFailInvldVlmSize == 0 { + t.Errorf("ret_code missing FAIL_INVLD_VLM_SIZE sub-code (create-path parity); got %d", rcs[0].RetCode) + } + + if !strings.Contains(rcs[0].Message, wantBound) { + t.Errorf("message must name the %q bound; got %q", wantBound, rcs[0].Message) + } +} diff --git a/pkg/rest/volume_definitions.go b/pkg/rest/volume_definitions.go index c5740f3a..1b37d8dc 100644 --- a/pkg/rest/volume_definitions.go +++ b/pkg/rest/volume_definitions.go @@ -819,6 +819,21 @@ func rejectVDPatchSize( return true } + // Adversarial round 4 (2026-07-03): mirror the CREATE path's Bug 155 + // bounds gate on the RESIZE path. The create path refuses size_kib + // outside [4 MiB, 16 TiB] via validateVDSize so the satellite never + // hot-loops on `drbdadm create-md`; `linstor vd set-size` previously + // skipped that check, so a below-floor force-shrink or an over-ceiling + // grow was stored verbatim and reproduced the Bug 155 hot-loop through + // the resize verb. Runs — like the Bug 383 non-positive floor above — + // BEFORE the shrink-vs-force gate: `force` waives the shrink-direction + // opt-in, never the physical floor/ceiling, and checking bounds first + // gives the operator the accurate "invalid size" envelope instead of an + // "add --force" hint on a size that would be refused even with force. + if rejectVDPatchOutOfBounds(w, patch, rd, vn) { + return true + } + // Scenario 4.W13: reject any shrink (`new < previous`) unless the // operator opted in via `force=true`. Runs BEFORE the merge + store // write so a rejected shrink leaves the stored spec untouched — a @@ -827,6 +842,30 @@ func rejectVDPatchSize( return rejectShrinkWithoutForce(w, r, patch, rd, vn, previousSizeKib) } +// rejectVDPatchOutOfBounds writes a 400 + FAIL_INVLD_VLM_SIZE envelope +// when the patch carries a `size_kib` outside the accepted +// [minVolumeDefinitionSizeKib, maxVolumeDefinitionSizeKib] range and +// returns true to signal the caller to short-circuit. Reuses the create +// path's validateVDSize + writeVDSizeRejection so the wire shape is +// byte-identical across the create and resize verbs (Bug 155 parity). +// A patch that does not touch size (`SizeKib == nil`) is left alone. +func rejectVDPatchOutOfBounds( + w http.ResponseWriter, patch *volumeDefinitionModifyBody, rd string, vn int32, +) bool { + if patch.SizeKib == nil { + return false + } + + sizeErr := validateVDSize(*patch.SizeKib) + if sizeErr == nil { + return false + } + + writeVDSizeRejection(w, rd, vn, *patch.SizeKib, sizeErr) + + return true +} + // rejectVDNonPositiveSize writes a 400 + FAIL_INVLD_VLM_SIZE envelope // when the patch carries a non-positive `size_kib` and returns true // to signal the caller to short-circuit. diff --git a/pkg/rest/volume_definitions_test.go b/pkg/rest/volume_definitions_test.go index d40bc1e1..b60389fd 100644 --- a/pkg/rest/volume_definitions_test.go +++ b/pkg/rest/volume_definitions_test.go @@ -810,12 +810,18 @@ func TestVolumeDefinitionsUpdateShrinkWithForceQueryAccepted(t *testing.T) { } } -// TestVolumeDefinitionsUpdateLargeSizeKibRoundTrip pins that -// petabyte-scale `size_kib` values survive the JSON round-trip -// without truncation. The wire field is int64 on our side and uint64 -// in golinstor; a regression that decoded into int32 would clamp -// anything above ~2 TiB. 2^40 KiB = 1 PiB — covers the largest -// volumes any sane cluster would carve. +// TestVolumeDefinitionsUpdateLargeSizeKibRoundTrip pins that a large +// (multi-TiB) `size_kib` survives the JSON round-trip without +// truncation. The wire field is int64 on our side and uint64 in +// golinstor; a regression that decoded into int32 would clamp anything +// above ~2 TiB. The guard uses DRBD's 16 TiB per-device ceiling +// (maxVolumeDefinitionSizeKib, Bug 155) — the largest accepted size and +// still 8× above the int32 clamp point, so it exercises the >int32 wire +// path while remaining a size the resize bounds gate accepts. Petabyte- +// scale sizes are refused on BOTH create and resize (see +// TestBug155VDCreateRefusesAbsurdSize and the round-4 resize-bounds +// regressions); this test previously used 1 PiB, which pinned the exact +// create/resize bounds asymmetry the round-4 gate closed. func TestVolumeDefinitionsUpdateLargeSizeKibRoundTrip(t *testing.T) { st := store.NewInMemory() ctx := t.Context() @@ -832,9 +838,9 @@ func TestVolumeDefinitionsUpdateLargeSizeKibRoundTrip(t *testing.T) { base, stop := startServerWithStore(t, st) defer stop() - const oneEiB = int64(1) << 40 // 1 PiB in KiB + const largeInBoundsKib = maxVolumeDefinitionSizeKib // 16 TiB, the largest accepted size (~8× int32 max) - body, _ := json.Marshal(apiv1.VolumeDefinition{SizeKib: oneEiB}) + body, _ := json.Marshal(apiv1.VolumeDefinition{SizeKib: largeInBoundsKib}) resp := httpPut(t, base+"/v1/resource-definitions/pvc-pib/volume-definitions/0", body) _ = resp.Body.Close() @@ -848,8 +854,8 @@ func TestVolumeDefinitionsUpdateLargeSizeKibRoundTrip(t *testing.T) { t.Fatalf("Get: %v", err) } - if got.SizeKib != oneEiB { - t.Errorf("SizeKib after large PUT: got %d, want %d (truncation?)", got.SizeKib, oneEiB) + if got.SizeKib != largeInBoundsKib { + t.Errorf("SizeKib after large PUT: got %d, want %d (truncation?)", got.SizeKib, largeInBoundsKib) } } diff --git a/tests/e2e/cli-matrix/vd-resize-bounds-rejected.sh b/tests/e2e/cli-matrix/vd-resize-bounds-rejected.sh new file mode 100755 index 00000000..619fc9aa --- /dev/null +++ b/tests/e2e/cli-matrix/vd-resize-bounds-rejected.sh @@ -0,0 +1,157 @@ +#!/usr/bin/env bash +# +# usage: vd-resize-bounds-rejected.sh WORK_DIR +# +# L6 cli-matrix cell — VD resize size-bounds rejection (adversarial +# round 4, 2026-07-03). +# +# The CREATE path gates size_kib into [4 MiB, 16 TiB] (Bug 155) so the +# satellite never hot-loops on `drbdadm create-md` for a size DRBD +# cannot address. `linstor vd set-size` (RESIZE) must enforce the SAME +# floor/ceiling — otherwise an operator/CSI resize persists an +# unmaterializable spec and the satellite hot-loops forever (the Bug +# 155 failure mode, reached through the resize verb). Does not self-heal. +# +# RUN-DEFERRED (2026-07-03): authored as the CLI-level paper trail per +# the blockstor CLAUDE.md CLI-bug-fix protocol. The fix itself is proven +# in the SAME PR at the L1 (handler) + integration (real apiserver via +# envtest) tiers — a REST-layer input REJECTION is fully provable there +# (the L6/L7 tiers validate DRBD-state convergence, which is N/A for a +# refused request). This cell needs the live dev-kvaps stand, which is +# unavailable this session; a stand-pending task tracks running it once +# the oracle is up. +# +# Steps: +# 1. rd c + vd c 1G + r c --auto-place=2 -s ; wait UpToDate. +# 2. over-ceiling GROW via CLI: `vd s 0 16385G` (16 TiB + 1 GiB) +# MUST exit non-zero, error names the size/maximum, size still 1G. +# 3. below-floor FORCE-shrink via raw REST PUT (force=true, 3072 KiB +# = 3 MiB < 4 MiB floor) MUST return 400 + FAIL_INVLD_VLM_SIZE, +# size still 1G. The floor gate only fires on a shrink, and the +# python-linstor `vd set-size` exposes no force flag, so — like +# rd-c-layer-list-rejected.sh's server-side case — it is exercised +# via a raw REST PUT with force=true. +# 4. in-bounds GROW via CLI: `vd s 0 2G` MUST succeed (exit 0, +# size becomes 2G) — the gate must not regress legitimate resizes. + +set -euo pipefail + +WORK_DIR=${1:?work_dir required} +export KUBECONFIG="$WORK_DIR/kubeconfig" + +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=lib.sh +source "$SCRIPT_DIR/lib.sh" + +require_workers 2 + +linstor_cli_setup + +RD=cli-matrix-resize-bounds-rejected +POOL=${POOL:-lvm-thin} +SIZE_1G_KIB=1048576 +SIZE_2G_KIB=2097152 + +cleanup() { + delete_rd "$RD" + assert_no_orphans "$RD" + linstor_cli_teardown +} +trap cleanup EXIT + +echo ">> pre-flight: 2 healthy $POOL SPs" +sp_json=$("${LCTL[@]}" --machine-readable storage-pool list --storage-pools "$POOL" 2>/dev/null || echo "[]") +ok_nodes=$(jq -r '[.[]? | .[]? | select(.provider_kind != null) | .node_name] | unique | length' <<<"$sp_json" 2>/dev/null || echo 0) +if (( ok_nodes < 2 )); then + echo "SKIP: $POOL SP not on >=2 nodes (got $ok_nodes)" + exit 0 +fi + +echo ">> rd c + vd c 1G + r c --auto-place=2 -s $POOL" +_rc_out=$("${LCTL[@]}" resource-definition create "$RD" 2>&1) \ + || { echo "FAIL: rd c $RD: $_rc_out" >&2; exit 1; } +_rc_out=$("${LCTL[@]}" volume-definition create "$RD" 1G 2>&1) \ + || { echo "FAIL: vd c $RD 1G: $_rc_out" >&2; exit 1; } +_rc_out=$("${LCTL[@]}" resource create --auto-place=2 --storage-pool="$POOL" "$RD" 2>&1) \ + || { echo "FAIL: r c --auto-place=2 -s $POOL $RD: $_rc_out" >&2; exit 1; } + +# Wait for both diskful replicas before any resize so we assert the +# steady-state gate, not a mid-sync race. Diskful = Spec.Flags carries +# NEITHER "DISKLESS" NOR "TIE_BREAKER" (see vd-shrink-rejected.sh). +deadline=$(( $(date +%s) + 90 )) +placed_nodes=() +while (( $(date +%s) < deadline )); do + mapfile -t placed_nodes < <( + kubectl get resources.blockstor.cozystack.io -o json 2>/dev/null \ + | jq -r --arg rd "$RD" ' + .items[]? + | select(.spec.resourceDefinitionName==$rd) + | select(((.spec.flags // []) + | map(select(.=="DISKLESS" or .=="TIE_BREAKER")) + | length) == 0) + | .spec.nodeName' + ) + if (( ${#placed_nodes[@]} >= 2 )); then break; fi + sleep 2 +done +if (( ${#placed_nodes[@]} < 2 )); then + echo "FAIL: autoplace did not stage 2 diskful replicas (got ${#placed_nodes[@]})" >&2 + exit 1 +fi +wait_uptodate "$RD" "${placed_nodes[0]}" "${placed_nodes[1]}" + +echo ">> over-ceiling grow vd s $RD 0 16385G (16 TiB + 1 GiB — MUST exit non-zero)" +err_file=$(mktemp) +if "${LCTL[@]}" volume-definition set-size "$RD" 0 16385G >"$err_file" 2>&1; then + echo "FAIL: over-ceiling grow (16 TiB + 1 GiB) unexpectedly succeeded" >&2 + echo " size_kib > 16 TiB is unaddressable by DRBD — REST must reject." >&2 + cat "$err_file" >&2 + rm -f "$err_file" + exit 1 +fi +if ! grep -qiE 'above maximum|maximum|ceiling|invalid volume definition size' "$err_file"; then + echo "FAIL: over-ceiling rejected but error text is unhelpful:" >&2 + cat "$err_file" >&2 + rm -f "$err_file" + exit 1 +fi +rm -f "$err_file" + +cur_kib=$(linstor_vd_size_kib "$RD" 0) +if (( cur_kib != SIZE_1G_KIB )); then + echo "FAIL: post-reject SizeKib=$cur_kib != $SIZE_1G_KIB (over-ceiling reject mutated state)" >&2 + exit 1 +fi + +echo ">> below-floor force-shrink via raw REST PUT (3072 KiB, force=true — MUST be 400)" +floor_json=$(mktemp) +code=$(curl -s -m 5 -o "$floor_json" -w '%{http_code}' -X PUT \ + -H 'Content-Type: application/json' \ + -d '{"size_kib":3072,"force":true}' \ + "http://localhost:${LCTL_PORT}/v1/resource-definitions/${RD}/volume-definitions/0") +if [[ "$code" != "400" ]]; then + echo "FAIL: below-floor force-shrink returned HTTP $code, want 400" >&2 + cat "$floor_json" >&2 + rm -f "$floor_json" + exit 1 +fi +if ! grep -qiE 'below minimum|invalid volume definition size' "$floor_json"; then + echo "FAIL: 400 but envelope is not a size rejection:" >&2 + cat "$floor_json" >&2 + rm -f "$floor_json" + exit 1 +fi +rm -f "$floor_json" + +cur_kib=$(linstor_vd_size_kib "$RD" 0) +if (( cur_kib != SIZE_1G_KIB )); then + echo "FAIL: post-reject SizeKib=$cur_kib != $SIZE_1G_KIB (below-floor reject mutated state)" >&2 + exit 1 +fi + +echo ">> in-bounds grow vd s $RD 0 2G (MUST succeed — no over-broad regression)" +_out=$("${LCTL[@]}" volume-definition set-size "$RD" 0 2G 2>&1) \ + || { echo "FAIL: in-bounds grow 1G->2G rejected: $_out" >&2; exit 1; } +wait_vd_size "$RD" 0 "$SIZE_2G_KIB" + +echo ">> vd-resize-bounds-rejected OK (out-of-bounds refused, in-bounds grow landed, no partial writes)" diff --git a/tests/integration/vd_resize_bounds_test.go b/tests/integration/vd_resize_bounds_test.go new file mode 100644 index 00000000..a8aed99b --- /dev/null +++ b/tests/integration/vd_resize_bounds_test.go @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: Apache-2.0 + +//go:build integration + +/* +Copyright 2026 Cozystack contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration + +import ( + "bytes" + "context" + "encoding/json" + "io" + "net/http" + "testing" + + "k8s.io/apimachinery/pkg/types" + + blockstoriov1alpha1 "github.com/cozystack/blockstor/api/v1alpha1" + "github.com/cozystack/blockstor/tests/integration/harness" +) + +// VD-resize size-bounds regression (adversarial round 4, 2026-07-03). +// The CREATE path gates size_kib into [4 MiB, 16 TiB] (Bug 155) so the +// satellite never hot-loops on `drbdadm create-md`. The RESIZE path +// (`PUT .../volume-definitions/{vn}`, `linstor vd set-size`) did not, +// so an out-of-range size could be persisted through the resize verb +// and wedge the satellite exactly as Bug 155 described. These envtest +// regressions drive the real apiserver + store round-trip and assert +// both the wire rejection and that the durable spec is left untouched. + +// VD size bounds mirrored from pkg/rest/volume_definitions.go (Bug 155). +const ( + vdBoundsMinSizeKib int64 = 4 * 1024 // 4 MiB + vdBoundsMaxSizeKib int64 = 16 * 1024 * 1024 * 1024 // 16 TiB +) + +// vdBoundsPut issues a PUT with a JSON body and returns (status, body). +func vdBoundsPut(t *testing.T, url string, payload any) (int, []byte) { + t.Helper() + + raw, err := json.Marshal(payload) + if err != nil { + t.Fatalf("marshal: %v", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), groupGTimeout) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodPut, url, bytes.NewReader(raw)) + if err != nil { + t.Fatalf("build PUT: %v", err) + } + + req.Header.Set("Content-Type", "application/json") + + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("PUT %s: %v", url, err) + } + defer func() { _ = resp.Body.Close() }() + + body, err := io.ReadAll(resp.Body) + if err != nil { + t.Fatalf("read body: %v", err) + } + + return resp.StatusCode, body +} + +// vdBoundsStoredSize reads the durable SizeKib of vol-0 on rd from the +// RD CRD (the source of truth the satellite would reconcile against). +func vdBoundsStoredSize(t *testing.T, stack *harness.Stack, rd string) int64 { + t.Helper() + + var rdObj blockstoriov1alpha1.ResourceDefinition + if err := stack.Env.Client.Get(context.Background(), types.NamespacedName{Name: rd}, &rdObj); err != nil { + t.Fatalf("get RD %q: %v", rd, err) + } + + for i := range rdObj.Spec.VolumeDefinitions { + if rdObj.Spec.VolumeDefinitions[i].VolumeNumber == 0 { + return rdObj.Spec.VolumeDefinitions[i].SizeKib + } + } + + t.Fatalf("RD %q has no vol-0", rd) + + return 0 +} + +// TestVDResizeRejectsBelowFloor: `vd set-size` (PUT) with force=true to +// a positive size below the 4 MiB DRBD floor must be refused, and the +// stored size must stay unchanged. Pre-fix the resize path accepted it +// (200) and persisted the sub-floor size, reproducing the Bug 155 +// satellite hot-loop through the resize verb. +func TestVDResizeRejectsBelowFloor(t *testing.T) { + stack := harness.StartStack(t) + harness.SeedThreeNodeCluster(t, stack) + + rd := seedRDWithVolume(t, stack, "r4-floor") + orig := vdBoundsStoredSize(t, stack, rd) + + belowFloor := vdBoundsMinSizeKib - 1024 // 1 MiB below the floor, still > 0 + url := stack.RestURL + "/v1/resource-definitions/" + rd + "/volume-definitions/0" + + status, body := vdBoundsPut(t, url, map[string]any{"size_kib": belowFloor, "force": true}) + t.Logf("resize to %d KiB (below 4 MiB floor, force) → status=%d body=%s", belowFloor, status, string(body)) + + if status >= 200 && status < 300 { + t.Fatalf("resize ACCEPTED a sub-floor size %d KiB < %d KiB min (Bug-155 class via vd set-size)", + belowFloor, vdBoundsMinSizeKib) + } + + if got := vdBoundsStoredSize(t, stack, rd); got != orig { + t.Fatalf("rejected sub-floor resize STILL mutated the stored size: got %d KiB, want %d", got, orig) + } +} + +// TestVDResizeRejectsAboveMax: `vd set-size` (PUT) grow above the 16 TiB +// ceiling (a pure grow, so only a max-bound gate can stop it) must be +// refused, and the stored size must stay unchanged. +func TestVDResizeRejectsAboveMax(t *testing.T) { + stack := harness.StartStack(t) + harness.SeedThreeNodeCluster(t, stack) + + rd := seedRDWithVolume(t, stack, "r4-max") + orig := vdBoundsStoredSize(t, stack, rd) + + aboveMax := vdBoundsMaxSizeKib + (1024 * 1024) // 1 GiB past the ceiling + url := stack.RestURL + "/v1/resource-definitions/" + rd + "/volume-definitions/0" + + status, body := vdBoundsPut(t, url, map[string]any{"size_kib": aboveMax}) + t.Logf("resize to %d KiB (above 16 TiB max) → status=%d body=%s", aboveMax, status, string(body)) + + if status >= 200 && status < 300 { + t.Fatalf("resize ACCEPTED an over-max size %d KiB > %d KiB max (Bug-155 class via vd set-size)", + aboveMax, vdBoundsMaxSizeKib) + } + + if got := vdBoundsStoredSize(t, stack, rd); got != orig { + t.Fatalf("rejected over-max resize STILL mutated the stored size: got %d KiB, want %d", got, orig) + } +} diff --git a/tests/operator-harness/replay/vd-resize-bounds-rejected.yaml b/tests/operator-harness/replay/vd-resize-bounds-rejected.yaml new file mode 100644 index 00000000..2e6acee1 --- /dev/null +++ b/tests/operator-harness/replay/vd-resize-bounds-rejected.yaml @@ -0,0 +1,128 @@ +name: vd-resize-bounds-rejected +description: | + Adversarial round 4 (2026-07-03): `linstor vd set-size` (RESIZE) must + enforce the SAME [4 MiB, 16 TiB] size bounds the CREATE path enforces + (Bug 155). Pre-fix the resize path skipped validateVDSize, so an + over-ceiling grow or a below-floor force-shrink was stored verbatim + and the satellite hot-looped on `drbdadm create-md` — the Bug 155 + failure mode reached through the resize verb, no self-heal. + + Operator sequence codified here (CLI-driveable contract only — the + runner drives the operator CLI): + - grow to 16 TiB + 1 GiB (over ceiling) -> MUST be rejected + - size unchanged after the rejected grow + - shrink to 3 MiB (below the 4 MiB floor) -> MUST be rejected + - size unchanged after the rejected shrink + - grow to 2 GiB (in-bounds) -> MUST succeed + + Note on the below-floor step: the python-linstor `vd set-size` exposes + no force flag, so a below-current shrink is refused by the shrink- + direction gate; WITH force it is refused by the new floor gate. Both + are correct rejections and both leave the size unchanged. The floor- + gate-specific proof (force-shrink below 4 MiB -> 400 FAIL_INVLD_VLM_ + SIZE) lives in the same PR at the L1 handler + integration (envtest) + tiers and in the L6 cell's raw-REST step + (tests/e2e/cli-matrix/vd-resize-bounds-rejected.sh). + + RUN-DEFERRED (2026-07-03): the on-live-stand run needs the dev-kvaps + oracle, unavailable this session. This YAML is the CLI-level paper + trail required by the blockstor CLAUDE.md CLI-bug-fix protocol; a + stand-pending task tracks running it (and the L6 cell) once the stand + is up. The fix is already proven at the L1 + integration tiers, which + fully cover a REST-layer input rejection. + + If this replay goes red, no fix in pkg/rest/volume_definitions.go + (rejectVDPatchSize / rejectVDPatchOutOfBounds) may be claimed closed. + +prerequisites: + min_nodes: 2 + storage_pool: stand + +vars: + sp: stand + +steps: + - name: create-rd + cmd: ["resource-definition", "create", "{{rd}}"] + expect_exit: 0 + - name: create-vd-1g + cmd: ["volume-definition", "create", "{{rd}}", "1G"] + expect_exit: 0 + await: + kind: vd_size_kib + rd: "{{rd}}" + vol: 0 + expected_kib: 1048576 + timeout_s: 30 + - name: auto-place-2 + cmd: ["resource", "create", "--auto-place", "2", "--storage-pool={{sp}}", "{{rd}}"] + expect_exit: 0 + await: + kind: replica_count + rd: "{{rd}}" + min: 2 + timeout_s: 120 + - name: wait-uptodate-1g + cmd: ["resource", "list", "--resources", "{{rd}}"] + expect_exit: 0 + await: + kind: all_uptodate + rd: "{{rd}}" + timeout_s: 600 + + # ---- Over-ceiling grow MUST be rejected ---- + # 16385G = 16 TiB + 1 GiB > maxVolumeDefinitionSizeKib (16 TiB). + # python-linstor surfaces an API-level rejection as exit 10 and a + # client/usage failure as exit 1; both mean "rejected", the load- + # bearing contract (size does NOT change) is pinned below. + - name: grow-over-ceiling-rejected + cmd: ["volume-definition", "set-size", "{{rd}}", "0", "16385G"] + expect_exit: [1, 10] + - name: size-unchanged-after-ceiling-reject + cmd: ["volume-definition", "list", "--resource-definitions", "{{rd}}"] + expect_exit: 0 + await: + kind: vd_size_kib + rd: "{{rd}}" + vol: 0 + expected_kib: 1048576 + timeout_s: 10 + + # ---- Below-floor shrink MUST be rejected ---- + # 3M = 3 MiB < minVolumeDefinitionSizeKib (4 MiB). Rejected by the + # shrink-direction gate (no force flag on the CLI) or the floor gate + # (with force); either way the size stays 1G. + - name: shrink-below-floor-rejected + cmd: ["volume-definition", "set-size", "{{rd}}", "0", "3M"] + expect_exit: [1, 10] + - name: size-unchanged-after-floor-reject + cmd: ["volume-definition", "list", "--resource-definitions", "{{rd}}"] + expect_exit: 0 + await: + kind: vd_size_kib + rd: "{{rd}}" + vol: 0 + expected_kib: 1048576 + timeout_s: 10 + + # ---- In-bounds grow MUST succeed (no over-broad regression) ---- + - name: grow-in-bounds-2g + cmd: ["volume-definition", "set-size", "{{rd}}", "0", "2G"] + expect_exit: 0 + await: + kind: vd_size_kib + rd: "{{rd}}" + vol: 0 + expected_kib: 2097152 + timeout_s: 60 + +teardown: + - cmd: ["resource-definition", "delete", "{{rd}}"] + expect_exit: 0 + await: + kind: rd_absent + rd: "{{rd}}" + timeout_s: 60 + +invariants: + - no_orphans