From c62e79367485fcf378f23e40a712506a27a592ff Mon Sep 17 00:00:00 2001 From: Andrey Kolkov Date: Thu, 18 Jun 2026 19:15:27 +0400 Subject: [PATCH 1/2] feat(main): add image parameters for air-gapped cases Signed-off-by: Andrey Kolkov --- Makefile | 1 + api/v1alpha2/etcdcluster_types.go | 71 +++++++ api/v1alpha2/etcdmember_types.go | 13 ++ api/v1alpha2/zz_generated.deepcopy.go | 45 +++++ ...cd-operator.cozystack.io_etcdclusters.yaml | 129 +++++++++++++ ...tcd-operator.cozystack.io_etcdmembers.yaml | 57 ++++++ .../etcd-operator/templates/deployment.yaml | 6 + charts/etcd-operator/values.yaml | 26 +++ controllers/etcdcluster_controller.go | 10 +- controllers/etcdmember_controller.go | 15 +- controllers/etcdmember_controller_test.go | 43 +++++ controllers/helpers.go | 35 +++- controllers/helpers_test.go | 55 ++++++ docs/installation.md | 26 ++- docs/migration.md | 6 +- hack/e2e.sh | 27 ++- internal/migrate/adopt.go | 4 + internal/migrate/translate.go | 62 +++++- internal/migrate/translate_test.go | 91 +++++++++ main.go | 14 +- test/e2e/image_override_test.go | 182 ++++++++++++++++++ 21 files changed, 905 insertions(+), 13 deletions(-) create mode 100644 test/e2e/image_override_test.go diff --git a/Makefile b/Makefile index f7829447..8dcd2bcb 100644 --- a/Makefile +++ b/Makefile @@ -192,6 +192,7 @@ deploy: manifests require-helm ## Install/upgrade the operator (CRDs + RBAC + ma img='$(IMG)'; $(HELM) upgrade --install $(HELM_RELEASE) charts/etcd-operator \ --namespace $(NAMESPACE) --create-namespace \ --set image.repository="$${img%:*}" --set image.tag="$${img##*:}" \ + $(HELM_EXTRA_ARGS) \ --wait --timeout 5m .PHONY: undeploy diff --git a/api/v1alpha2/etcdcluster_types.go b/api/v1alpha2/etcdcluster_types.go index 67eba1c4..a5bbb003 100644 --- a/api/v1alpha2/etcdcluster_types.go +++ b/api/v1alpha2/etcdcluster_types.go @@ -558,6 +558,66 @@ type EtcdClusterSpec struct { // tuning change in place. // +optional Options *EtcdOptions `json:"options,omitempty"` + + // Image overrides the etcd container image for this cluster's member + // Pods. The primary use is air-gapped environments that mirror the + // upstream image to a private registry. Each unset field falls back to + // an operator-wide default: the repository to the operator's + // --etcd-image-repository / ETCD_IMAGE_REPOSITORY (itself defaulting to + // quay.io/coreos/etcd), the tag to "v"+spec.version. The pull policy is + // apiserver-defaulted to IfNotPresent as soon as this image block is + // present at all (even with only repository set); it falls back to the + // kubelet default only when no image block is given. See EtcdImageSpec + // for the per-field rules. + // + // Updates take effect on newly-created members (scale-up, replacement); + // the operator does not roll existing Pods to apply an image change in + // place. Latched through status.observed with the rest of the target + // spec, so a mid-flight edit only applies once the current target is + // reached. + // +optional + Image *EtcdImageSpec `json:"image,omitempty"` + + // ImagePullSecrets is a list of Secret references in the cluster's + // namespace used to pull the etcd (and restore initContainer) image. + // Passed straight through to each member Pod's spec.imagePullSecrets. + // Required when spec.image points at a private registry that needs + // credentials. + // + // Like spec.image, changes take effect on newly-created members; the + // operator does not roll existing Pods. Latched through status.observed. + // +optional + ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"` +} + +// EtcdImageSpec overrides the etcd container image. Every field is optional; +// an unset field falls back to its operator-wide default so a cluster can +// override just the registry (the common air-gap case) without restating the +// version-derived tag. +type EtcdImageSpec struct { + // Repository is the image repository (registry host + path, no tag), + // e.g. "registry.internal/mirror/etcd". When empty the operator falls + // back to its --etcd-image-repository / ETCD_IMAGE_REPOSITORY default, + // which itself defaults to "quay.io/coreos/etcd". + // +optional + Repository string `json:"repository,omitempty"` + + // Tag overrides the image tag. When empty the operator derives it from + // spec.version as "v"+version (e.g. "v3.6.11"). Set this only when the + // mirror uses a tag scheme that differs from the upstream "vX.Y.Z" — the + // operator still keys all of its version-dependent behaviour off + // spec.version, not off this tag. + // +optional + Tag string `json:"tag,omitempty"` + + // PullPolicy is the etcd container's imagePullPolicy. Defaults to + // IfNotPresent — the right policy for the fixed "v" tags this + // operator runs (it also matches the kubelet's own default for a fixed + // tag). Set Always only when a mutable tag is used via Tag. + // +kubebuilder:default=IfNotPresent + // +kubebuilder:validation:Enum=Always;IfNotPresent;Never + // +optional + PullPolicy corev1.PullPolicy `json:"pullPolicy,omitempty"` } // AdditionalMetadata is a set of labels and annotations the operator merges @@ -624,6 +684,17 @@ type ObservedClusterSpec struct { // reached. // +optional Options *EtcdOptions `json:"options,omitempty"` + + // Image is the locked target etcd image override for member Pods. + // Latched with the rest of the target spec so a mid-flight image edit + // only applies to members created once the current target is reached. + // +optional + Image *EtcdImageSpec `json:"image,omitempty"` + + // ImagePullSecrets is the locked target pull-secret list for member + // Pods. Latched with the rest of the target spec. + // +optional + ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"` } // EtcdClusterStatus defines the observed state of an etcd cluster. diff --git a/api/v1alpha2/etcdmember_types.go b/api/v1alpha2/etcdmember_types.go index b6c737a7..2ef697fe 100644 --- a/api/v1alpha2/etcdmember_types.go +++ b/api/v1alpha2/etcdmember_types.go @@ -116,6 +116,19 @@ type EtcdMemberSpec struct { // +optional Options *EtcdOptions `json:"options,omitempty"` + // Image mirrors EtcdCluster.spec.image at the time this member was + // created. The member controller resolves it (against the operator-wide + // repository default and spec.version) into the etcd container's image + // and imagePullPolicy at Pod-build time. + // +optional + Image *EtcdImageSpec `json:"image,omitempty"` + + // ImagePullSecrets mirrors EtcdCluster.spec.imagePullSecrets at the time + // this member was created. Passed straight to the Pod's + // spec.imagePullSecrets at build time. + // +optional + ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"` + // Bootstrap indicates this member is part of the initial cluster formation. // When true the member starts with --initial-cluster-state=new. // +optional diff --git a/api/v1alpha2/zz_generated.deepcopy.go b/api/v1alpha2/zz_generated.deepcopy.go index 47b13ce8..8b3710e3 100644 --- a/api/v1alpha2/zz_generated.deepcopy.go +++ b/api/v1alpha2/zz_generated.deepcopy.go @@ -257,6 +257,16 @@ func (in *EtcdClusterSpec) DeepCopyInto(out *EtcdClusterSpec) { *out = new(EtcdOptions) (*in).DeepCopyInto(*out) } + if in.Image != nil { + in, out := &in.Image, &out.Image + *out = new(EtcdImageSpec) + **out = **in + } + if in.ImagePullSecrets != nil { + in, out := &in.ImagePullSecrets, &out.ImagePullSecrets + *out = make([]v1.LocalObjectReference, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EtcdClusterSpec. @@ -325,6 +335,21 @@ func (in *EtcdClusterTLS) DeepCopy() *EtcdClusterTLS { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EtcdImageSpec) DeepCopyInto(out *EtcdImageSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EtcdImageSpec. +func (in *EtcdImageSpec) DeepCopy() *EtcdImageSpec { + if in == nil { + return nil + } + out := new(EtcdImageSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EtcdMember) DeepCopyInto(out *EtcdMember) { *out = *in @@ -411,6 +436,16 @@ func (in *EtcdMemberSpec) DeepCopyInto(out *EtcdMemberSpec) { *out = new(EtcdOptions) (*in).DeepCopyInto(*out) } + if in.Image != nil { + in, out := &in.Image, &out.Image + *out = new(EtcdImageSpec) + **out = **in + } + if in.ImagePullSecrets != nil { + in, out := &in.ImagePullSecrets, &out.ImagePullSecrets + *out = make([]v1.LocalObjectReference, len(*in)) + copy(*out, *in) + } if in.TLS != nil { in, out := &in.TLS, &out.TLS *out = new(EtcdMemberTLS) @@ -650,6 +685,16 @@ func (in *ObservedClusterSpec) DeepCopyInto(out *ObservedClusterSpec) { *out = new(EtcdOptions) (*in).DeepCopyInto(*out) } + if in.Image != nil { + in, out := &in.Image, &out.Image + *out = new(EtcdImageSpec) + **out = **in + } + if in.ImagePullSecrets != nil { + in, out := &in.ImagePullSecrets, &out.ImagePullSecrets + *out = make([]v1.LocalObjectReference, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ObservedClusterSpec. diff --git a/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdclusters.yaml b/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdclusters.yaml index 6be40900..6ac683de 100644 --- a/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdclusters.yaml +++ b/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdclusters.yaml @@ -1166,6 +1166,80 @@ spec: rule: '!has(self.source.pvc) || (has(self.source.pvc.subPath) && size(self.source.pvc.subPath) > 0)' type: object + image: + description: |- + Image overrides the etcd container image for this cluster's member + Pods. The primary use is air-gapped environments that mirror the + upstream image to a private registry. Each unset field falls back to + an operator-wide default: the repository to the operator's + --etcd-image-repository / ETCD_IMAGE_REPOSITORY (itself defaulting to + quay.io/coreos/etcd), the tag to "v"+spec.version. The pull policy is + apiserver-defaulted to IfNotPresent as soon as this image block is + present at all (even with only repository set); it falls back to the + kubelet default only when no image block is given. See EtcdImageSpec + for the per-field rules. + + Updates take effect on newly-created members (scale-up, replacement); + the operator does not roll existing Pods to apply an image change in + place. Latched through status.observed with the rest of the target + spec, so a mid-flight edit only applies once the current target is + reached. + properties: + pullPolicy: + default: IfNotPresent + description: |- + PullPolicy is the etcd container's imagePullPolicy. Defaults to + IfNotPresent — the right policy for the fixed "v" tags this + operator runs (it also matches the kubelet's own default for a fixed + tag). Set Always only when a mutable tag is used via Tag. + enum: + - Always + - IfNotPresent + - Never + type: string + repository: + description: |- + Repository is the image repository (registry host + path, no tag), + e.g. "registry.internal/mirror/etcd". When empty the operator falls + back to its --etcd-image-repository / ETCD_IMAGE_REPOSITORY default, + which itself defaults to "quay.io/coreos/etcd". + type: string + tag: + description: |- + Tag overrides the image tag. When empty the operator derives it from + spec.version as "v"+version (e.g. "v3.6.11"). Set this only when the + mirror uses a tag scheme that differs from the upstream "vX.Y.Z" — the + operator still keys all of its version-dependent behaviour off + spec.version, not off this tag. + type: string + type: object + imagePullSecrets: + description: |- + ImagePullSecrets is a list of Secret references in the cluster's + namespace used to pull the etcd (and restore initContainer) image. + Passed straight through to each member Pod's spec.imagePullSecrets. + Required when spec.image points at a private registry that needs + credentials. + + Like spec.image, changes take effect on newly-created members; the + operator does not roll existing Pods. Latched through status.observed. + items: + description: |- + LocalObjectReference contains enough information to let you locate the + referenced object inside the same namespace. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + type: array options: description: |- Options carries etcd server tuning flags (backend quota, @@ -2856,6 +2930,61 @@ spec: x-kubernetes-list-type: atomic type: object type: object + image: + description: |- + Image is the locked target etcd image override for member Pods. + Latched with the rest of the target spec so a mid-flight image edit + only applies to members created once the current target is reached. + properties: + pullPolicy: + default: IfNotPresent + description: |- + PullPolicy is the etcd container's imagePullPolicy. Defaults to + IfNotPresent — the right policy for the fixed "v" tags this + operator runs (it also matches the kubelet's own default for a fixed + tag). Set Always only when a mutable tag is used via Tag. + enum: + - Always + - IfNotPresent + - Never + type: string + repository: + description: |- + Repository is the image repository (registry host + path, no tag), + e.g. "registry.internal/mirror/etcd". When empty the operator falls + back to its --etcd-image-repository / ETCD_IMAGE_REPOSITORY default, + which itself defaults to "quay.io/coreos/etcd". + type: string + tag: + description: |- + Tag overrides the image tag. When empty the operator derives it from + spec.version as "v"+version (e.g. "v3.6.11"). Set this only when the + mirror uses a tag scheme that differs from the upstream "vX.Y.Z" — the + operator still keys all of its version-dependent behaviour off + spec.version, not off this tag. + type: string + type: object + imagePullSecrets: + description: |- + ImagePullSecrets is the locked target pull-secret list for member + Pods. Latched with the rest of the target spec. + items: + description: |- + LocalObjectReference contains enough information to let you locate the + referenced object inside the same namespace. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + type: array options: description: |- Options is the locked target etcd tuning flags for member Pods. diff --git a/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdmembers.yaml b/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdmembers.yaml index 7d47d313..a50f28b4 100644 --- a/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdmembers.yaml +++ b/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdmembers.yaml @@ -1020,6 +1020,63 @@ spec: the same ClusterID and member ID. While dormant, the member does not count toward the EtcdCluster's `current` replica accounting. type: boolean + image: + description: |- + Image mirrors EtcdCluster.spec.image at the time this member was + created. The member controller resolves it (against the operator-wide + repository default and spec.version) into the etcd container's image + and imagePullPolicy at Pod-build time. + properties: + pullPolicy: + default: IfNotPresent + description: |- + PullPolicy is the etcd container's imagePullPolicy. Defaults to + IfNotPresent — the right policy for the fixed "v" tags this + operator runs (it also matches the kubelet's own default for a fixed + tag). Set Always only when a mutable tag is used via Tag. + enum: + - Always + - IfNotPresent + - Never + type: string + repository: + description: |- + Repository is the image repository (registry host + path, no tag), + e.g. "registry.internal/mirror/etcd". When empty the operator falls + back to its --etcd-image-repository / ETCD_IMAGE_REPOSITORY default, + which itself defaults to "quay.io/coreos/etcd". + type: string + tag: + description: |- + Tag overrides the image tag. When empty the operator derives it from + spec.version as "v"+version (e.g. "v3.6.11"). Set this only when the + mirror uses a tag scheme that differs from the upstream "vX.Y.Z" — the + operator still keys all of its version-dependent behaviour off + spec.version, not off this tag. + type: string + type: object + imagePullSecrets: + description: |- + ImagePullSecrets mirrors EtcdCluster.spec.imagePullSecrets at the time + this member was created. Passed straight to the Pod's + spec.imagePullSecrets at build time. + items: + description: |- + LocalObjectReference contains enough information to let you locate the + referenced object inside the same namespace. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + type: array initialCluster: description: |- InitialCluster is the value passed to etcd's --initial-cluster flag. diff --git a/charts/etcd-operator/templates/deployment.yaml b/charts/etcd-operator/templates/deployment.yaml index 69bbb487..58a8f685 100644 --- a/charts/etcd-operator/templates/deployment.yaml +++ b/charts/etcd-operator/templates/deployment.yaml @@ -53,6 +53,12 @@ spec: # is left at a placeholder. - name: OPERATOR_IMAGE value: {{ include "etcd-operator.image" . }} + # Operator-wide default etcd image repository for member Pods. Always + # set, so the operator's built-in fallback is only reached when the + # binary runs outside this chart. An EtcdCluster's spec.image.repository + # overrides it per cluster; repoint here for an air-gapped mirror. + - name: ETCD_IMAGE_REPOSITORY + value: {{ .Values.etcdImage.repository | quote }} livenessProbe: httpGet: path: /healthz diff --git a/charts/etcd-operator/values.yaml b/charts/etcd-operator/values.yaml index fb68d85c..243ff413 100644 --- a/charts/etcd-operator/values.yaml +++ b/charts/etcd-operator/values.yaml @@ -29,6 +29,32 @@ image: # -- Image pull policy. pullPolicy: IfNotPresent +# Operator-wide default for the etcd image that runs in member Pods (NOT the +# operator's own image above). An EtcdCluster's spec.image overrides this per +# cluster; set the repository here to point every cluster at an air-gapped +# mirror once. Per-cluster pull credentials go on spec.imagePullSecrets (a +# Secret in the cluster's own namespace), not here. +etcdImage: + # -- Default etcd image repository (registry host + path, no tag) for member + # Pods. Repoint at an air-gapped mirror, e.g. registry.internal/mirror/etcd. + # The chart always wires this into the operator's ETCD_IMAGE_REPOSITORY, so + # this value is the single operator-wide default; an EtcdCluster's spec.image + # overrides it per cluster. + # + # Only the repository is settable operator-wide; the other two parts of the + # image are NOT (no fleet-wide knob, override them per cluster): + # - tag: always derived from each cluster's spec.version as "v" + # (e.g. v3.6.11). Override per cluster via EtcdCluster.spec.image.tag — + # use that only when a mirror's tag scheme differs from upstream vX.Y.Z. + # - pullPolicy: defaults to IfNotPresent (set in the CRD schema). Override + # per cluster via EtcdCluster.spec.image.pullPolicy. + # + # Keep this in sync with the controllers.EtcdImage constant — the operator's + # built-in fallback used only when this env is unset (outside the chart). The + # chart always sets ETCD_IMAGE_REPOSITORY, so a drift between the two is + # harmless here, but bump both together to keep the no-chart default honest. + repository: quay.io/coreos/etcd + # -- Number of operator replicas (leader election picks the active one). replicaCount: 1 diff --git a/controllers/etcdcluster_controller.go b/controllers/etcdcluster_controller.go index 93ec54ea..2e989adc 100644 --- a/controllers/etcdcluster_controller.go +++ b/controllers/etcdcluster_controller.go @@ -430,6 +430,8 @@ func (r *EtcdClusterReconciler) bootstrap( Affinity: cluster.Status.Observed.Affinity, TopologySpreadConstraints: cluster.Status.Observed.TopologySpreadConstraints, Options: cluster.Status.Observed.Options, + Image: cluster.Status.Observed.Image, + ImagePullSecrets: cluster.Status.Observed.ImagePullSecrets, Bootstrap: true, ClusterToken: cluster.Status.ClusterToken, TLS: deriveMemberTLS(cluster), @@ -834,6 +836,8 @@ func (r *EtcdClusterReconciler) scaleUp( Affinity: cluster.Status.Observed.Affinity, TopologySpreadConstraints: cluster.Status.Observed.TopologySpreadConstraints, Options: cluster.Status.Observed.Options, + Image: cluster.Status.Observed.Image, + ImagePullSecrets: cluster.Status.Observed.ImagePullSecrets, Bootstrap: false, ClusterToken: cluster.Status.ClusterToken, TLS: deriveMemberTLS(cluster), @@ -2028,6 +2032,8 @@ func snapshotSpecIntoObserved(cluster *lll.EtcdCluster) { TopologySpreadConstraints: cluster.Spec.TopologySpreadConstraints, AdditionalMetadata: cluster.Spec.AdditionalMetadata, Options: cluster.Spec.Options, + Image: cluster.Spec.Image, + ImagePullSecrets: cluster.Spec.ImagePullSecrets, } } @@ -2048,7 +2054,9 @@ func specEqualsObserved(cluster *lll.EtcdCluster) bool { equality.Semantic.DeepEqual(o.Affinity, cluster.Spec.Affinity) && equality.Semantic.DeepEqual(o.TopologySpreadConstraints, cluster.Spec.TopologySpreadConstraints) && equality.Semantic.DeepEqual(o.AdditionalMetadata, cluster.Spec.AdditionalMetadata) && - equality.Semantic.DeepEqual(o.Options, cluster.Spec.Options) + equality.Semantic.DeepEqual(o.Options, cluster.Spec.Options) && + equality.Semantic.DeepEqual(o.Image, cluster.Spec.Image) && + equality.Semantic.DeepEqual(o.ImagePullSecrets, cluster.Spec.ImagePullSecrets) } // observedAdditionalMetadata returns the latched additionalMetadata target diff --git a/controllers/etcdmember_controller.go b/controllers/etcdmember_controller.go index fe2274ac..3ed26890 100644 --- a/controllers/etcdmember_controller.go +++ b/controllers/etcdmember_controller.go @@ -49,6 +49,13 @@ type EtcdMemberReconciler struct { // OperatorImage is the operator's own image; the restore agent runs from // it as an initContainer on the bootstrap seed Pod. OperatorImage string + + // EtcdImageRepository is the operator-wide default etcd image repository + // (registry host + path, no tag) used for member Pods whose EtcdCluster + // does not set spec.image.repository. Empty falls back to the EtcdImage + // built-in. Set from --etcd-image-repository / ETCD_IMAGE_REPOSITORY; the + // common use is pointing every cluster at an air-gapped mirror once. + EtcdImageRepository string } //+kubebuilder:rbac:groups=etcd-operator.cozystack.io,resources=etcdmembers,verbs=get;list;watch;update;patch @@ -751,6 +758,8 @@ func (r *EtcdMemberReconciler) buildPod(member *lll.EtcdMember) *corev1.Pod { volumes = append(volumes, extraVols...) } + etcdImage, etcdPullPolicy := resolveEtcdImage(member, r.EtcdImageRepository) + return &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: member.Name, @@ -763,6 +772,7 @@ func (r *EtcdMemberReconciler) buildPod(member *lll.EtcdMember) *corev1.Pod { Subdomain: memberServiceName(member), Affinity: member.Spec.Affinity, TopologySpreadConstraints: member.Spec.TopologySpreadConstraints, + ImagePullSecrets: member.Spec.ImagePullSecrets, InitContainers: initContainers, // etcd and the restore agent never call the Kubernetes API, so // don't mount a ServiceAccount token into the member Pod (matches @@ -778,8 +788,9 @@ func (r *EtcdMemberReconciler) buildPod(member *lll.EtcdMember) *corev1.Pod { }, }, Containers: []corev1.Container{{ - Name: "etcd", - Image: fmt.Sprintf("%s:v%s", EtcdImage, member.Spec.Version), + Name: "etcd", + Image: etcdImage, + ImagePullPolicy: etcdPullPolicy, SecurityContext: &corev1.SecurityContext{ AllowPrivilegeEscalation: ptrBool(false), Capabilities: &corev1.Capabilities{ diff --git a/controllers/etcdmember_controller_test.go b/controllers/etcdmember_controller_test.go index 69fd0565..9e054290 100644 --- a/controllers/etcdmember_controller_test.go +++ b/controllers/etcdmember_controller_test.go @@ -1016,6 +1016,49 @@ func TestBuildPod_LivenessIsNotQuorumAware(t *testing.T) { } } +// TestBuildPod_ImageOverrideAndPullSecrets covers the air-gap path: buildPod +// must honour the operator-wide default repository, the per-member spec.image +// override, and stamp imagePullSecrets onto the Pod. +func TestBuildPod_ImageOverrideAndPullSecrets(t *testing.T) { + t.Run("operator default repo, version-derived tag", func(t *testing.T) { + r := &EtcdMemberReconciler{EtcdImageRepository: "registry.internal/mirror/etcd"} + pod := r.buildPod(&lll.EtcdMember{ + ObjectMeta: metav1.ObjectMeta{Name: "test-0", Namespace: "ns"}, + Spec: lll.EtcdMemberSpec{ClusterName: "test", Version: "3.6.11"}, + }) + if got := pod.Spec.Containers[0].Image; got != "registry.internal/mirror/etcd:v3.6.11" { + t.Errorf("image = %q, want operator-default mirror", got) + } + }) + + t.Run("per-member override and pull secrets win", func(t *testing.T) { + r := &EtcdMemberReconciler{EtcdImageRepository: "registry.internal/mirror/etcd"} + pod := r.buildPod(&lll.EtcdMember{ + ObjectMeta: metav1.ObjectMeta{Name: "test-0", Namespace: "ns"}, + Spec: lll.EtcdMemberSpec{ + ClusterName: "test", + Version: "3.6.11", + Image: &lll.EtcdImageSpec{ + Repository: "private.example/etcd", + Tag: "3.6.11-custom", + PullPolicy: corev1.PullAlways, + }, + ImagePullSecrets: []corev1.LocalObjectReference{{Name: "regcreds"}}, + }, + }) + c := pod.Spec.Containers[0] + if c.Image != "private.example/etcd:3.6.11-custom" { + t.Errorf("image = %q, want per-member override", c.Image) + } + if c.ImagePullPolicy != corev1.PullAlways { + t.Errorf("pullPolicy = %q, want Always", c.ImagePullPolicy) + } + if len(pod.Spec.ImagePullSecrets) != 1 || pod.Spec.ImagePullSecrets[0].Name != "regcreds" { + t.Errorf("pod.imagePullSecrets = %+v, want [regcreds]", pod.Spec.ImagePullSecrets) + } + }) +} + // TestBuildPod_AppliesSchedulingAndMetadata covers the additionalMetadata, // affinity, and topologySpreadConstraints passthrough: buildPod must stamp // the Pod with the member's scheduling fields and merge the extra diff --git a/controllers/helpers.go b/controllers/helpers.go index a67269ce..ef725dc2 100644 --- a/controllers/helpers.go +++ b/controllers/helpers.go @@ -23,7 +23,12 @@ const ( LabelRole = "etcd-operator.cozystack.io/role" RoleVoter = "voter" - // EtcdImage is the container image repository for etcd. + // EtcdImage is the built-in fallback etcd image repository (registry + // host + path, no tag). It is the last resort in the resolution chain: + // an EtcdCluster's spec.image.repository wins first, then the + // operator-wide --etcd-image-repository / ETCD_IMAGE_REPOSITORY default, + // then this constant. See resolveEtcdImage. Keep in sync with the chart's + // etcdImage.repository default in charts/etcd-operator/values.yaml. EtcdImage = "quay.io/coreos/etcd" // MemberFinalizer is placed on EtcdMember resources to ensure @@ -108,6 +113,34 @@ func memberDataDir(member *lll.EtcdMember) string { return path.Join(etcdDataDirRoot, sub) } +// resolveEtcdImage resolves a member's etcd container image and pull policy +// from (in precedence order) the member's mirrored spec.image override, the +// operator-wide repository default (defaultRepo, from the operator's +// --etcd-image-repository flag), and the EtcdImage built-in. The tag defaults +// to "v"+spec.version — the operator keys every version-dependent behaviour +// off spec.version, so an explicit spec.image.tag changes only the pulled +// reference, never that behaviour. The pull policy comes from spec.image +// (apiserver-defaulted to IfNotPresent whenever the image block is present); +// when no image block is set at all it stays empty and the container field is +// left unset, so the kubelet default applies. +func resolveEtcdImage(member *lll.EtcdMember, defaultRepo string) (image string, pullPolicy corev1.PullPolicy) { + repo := defaultRepo + if repo == "" { + repo = EtcdImage + } + tag := "v" + member.Spec.Version + if img := member.Spec.Image; img != nil { + if img.Repository != "" { + repo = img.Repository + } + if img.Tag != "" { + tag = img.Tag + } + pullPolicy = img.PullPolicy + } + return repo + ":" + tag, pullPolicy +} + // peerURL returns the etcd peer URL for a member, using the headless Service // DNS. `service` is the headless Service name the member resolves under — // resolve it per-member via memberServiceName (the cluster's own name by diff --git a/controllers/helpers_test.go b/controllers/helpers_test.go index 5457d165..a7d7fed4 100644 --- a/controllers/helpers_test.go +++ b/controllers/helpers_test.go @@ -13,6 +13,7 @@ package controllers import ( "testing" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" lll "github.com/cozystack/etcd-operator/api/v1alpha2" @@ -141,3 +142,57 @@ func TestMemberEndpoints_PerMemberServiceName(t *testing.T) { } } } + +func TestResolveEtcdImage(t *testing.T) { + member := func(version string, img *lll.EtcdImageSpec) *lll.EtcdMember { + return &lll.EtcdMember{Spec: lll.EtcdMemberSpec{Version: version, Image: img}} + } + + cases := []struct { + name string + member *lll.EtcdMember + defaultRepo string + wantImage string + wantPolicy corev1.PullPolicy + }{ + { + name: "no override, no operator default → built-in repo + v", + member: member("3.6.11", nil), + wantImage: EtcdImage + ":v3.6.11", + }, + { + name: "operator default repo, version-derived tag", + member: member("3.6.11", nil), + defaultRepo: "registry.internal/mirror/etcd", + wantImage: "registry.internal/mirror/etcd:v3.6.11", + }, + { + name: "per-cluster repository overrides the operator default", + member: member("3.6.11", &lll.EtcdImageSpec{Repository: "private.example/etcd"}), + defaultRepo: "registry.internal/mirror/etcd", + wantImage: "private.example/etcd:v3.6.11", + }, + { + name: "explicit tag overrides the version-derived default", + member: member("3.6.11", &lll.EtcdImageSpec{Tag: "3.6.11-mirror"}), + wantImage: EtcdImage + ":3.6.11-mirror", + }, + { + name: "pull policy is propagated", + member: member("3.6.11", &lll.EtcdImageSpec{PullPolicy: corev1.PullAlways}), + wantImage: EtcdImage + ":v3.6.11", + wantPolicy: corev1.PullAlways, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + img, policy := resolveEtcdImage(tc.member, tc.defaultRepo) + if img != tc.wantImage { + t.Errorf("image = %q, want %q", img, tc.wantImage) + } + if policy != tc.wantPolicy { + t.Errorf("pullPolicy = %q, want %q", policy, tc.wantPolicy) + } + }) + } +} diff --git a/docs/installation.md b/docs/installation.md index 73d3378b..7f0416d8 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -97,7 +97,8 @@ Common values (`--set key=value`, or a `-f my-values.yaml`): | `metrics.serviceMonitor.enabled` | `false` | Create a prometheus-operator `ServiceMonitor` for the metrics endpoint (needs the `monitoring.coreos.com` CRDs and `kubeRbacProxy.enabled`). | | `crds.enabled` / `crds.keep` | `true` / `true` | Render the CRDs with the release / annotate them so uninstall keeps them. | | `manager.resources` | 10m/64Mi → 500m/128Mi | Manager container requests/limits. | -| `imagePullSecrets` | `[]` | Pull secrets for a private registry mirror. | +| `imagePullSecrets` | `[]` | Pull secrets for the **operator's own** image (private registry mirror). | +| `etcdImage.repository` | `quay.io/coreos/etcd` | Operator-wide default **etcd** image repo for member Pods (always wired into `ETCD_IMAGE_REPOSITORY`). Repoint at an air-gapped mirror once; an `EtcdCluster`'s `spec.image` overrides it per cluster. | See `charts/etcd-operator/values.yaml` for the complete, annotated list. Verify the install: @@ -243,7 +244,28 @@ The `spec.tls` subtree is immutable post-create — flipping TLS on or off on an ## Image versions -`spec.version` in an `EtcdCluster` becomes `quay.io/coreos/etcd:v`. The image repository is hard-coded in `controllers/helpers.go:EtcdImage`. Override it by patching the operator image with your own registry/repo if you mirror etcd internally. +By default `spec.version` in an `EtcdCluster` becomes `quay.io/coreos/etcd:v`. For air-gapped environments that mirror the image to a private registry there are two override surfaces, lowest-precedence first: + +- **Operator-wide default** — set `etcdImage.repository` in the chart (env `ETCD_IMAGE_REPOSITORY` / flag `--etcd-image-repository`) to a registry/path, e.g. `registry.internal/mirror/etcd`. Every cluster that doesn't override picks it up; the tag stays `v`. +- **Per-cluster** — `spec.image` on an `EtcdCluster` overrides the repository, the tag, and the pull policy for that cluster's member Pods: + + ```yaml + spec: + version: "3.6.11" + image: + repository: registry.internal/mirror/etcd # optional; falls back to the operator default + tag: "" # optional; defaults to v3.6.11 + pullPolicy: IfNotPresent # optional; this is the default + imagePullSecrets: + - name: regcreds # Secret in the cluster's namespace + ``` + + `spec.imagePullSecrets` references pull-credential Secrets in the cluster's own namespace and is passed straight through to each member Pod. Note the operator still keys every version-dependent behaviour off `spec.version`, not off `spec.image.tag` — set the tag only when the mirror's tag scheme differs from the upstream `vX.Y.Z`. Like `spec.resources`, an image change applies to **newly-created** members (scale-up, replacement), not existing Pods in place. + + These two surfaces cover the **etcd member image** only. Two caveats in a fully air-gapped install where the operator image is mirrored too: + + - `spec.imagePullSecrets` is set Pod-wide, so it **does** cover the member Pod's restore initContainer (which runs the operator image at bootstrap-from-snapshot) — a `spec.bootstrap.restore` can pull the mirrored operator image via these secrets. + - Standalone `EtcdSnapshot` backup/restore **Jobs** also run the operator image but are **not** covered by `spec.imagePullSecrets`. Repoint the operator image (chart `image.repository`) and make sure the snapshot's namespace can already pull it, or those Jobs `ImagePullBackOff`. The operator Pod itself uses the chart-level `imagePullSecrets`. The `spec.version` examples throughout these docs use **3.6.x**, to match the `etcdutl` bundled in the operator image: `spec.bootstrap.restore` requires `spec.version` and that `etcdutl` to share a minor (see the [restore runbook](operations.md#restoring-a-cluster-from-a-snapshot)). The operator's etcd client is v3.6.x and is wire-compatible with 3.5.x servers, so a cluster you never restore into can still run 3.5.x — but to back up and restore on the same version, run 3.6.x. diff --git a/docs/migration.md b/docs/migration.md index 41bbf308..94046f9c 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -147,7 +147,11 @@ What gets migrated: Every legacy knob with no v1alpha2 equivalent (`spec.options` keys beyond the [four typed ones](#specoptions-free-form-map--typed-fields), service/PDB templates, podTemplate overrides beyond affinity/topology-spread/resources/ -metadata) is reported as a warning — review them before `--apply`. Hard +metadata) is reported as a warning — review them before `--apply`. A legacy +`podTemplate` etcd image from a non-default registry/tag is **preserved** as +`spec.image`, and `podTemplate.spec.imagePullSecrets` is **carried** into +`spec.imagePullSecrets` (both used to be dropped) — so an air-gapped cluster +keeps pulling from its mirror after the operator rolls a replacement Pod. Hard blockers (`emptyDir` storage — nothing to adopt, an unparsable etcd image tag without `--version`, `enableAuth` without server TLS, a non-integer `quota-backend-bytes`/`snapshot-count`, a failed inspection) skip that diff --git a/hack/e2e.sh b/hack/e2e.sh index dcb2ffb5..3fc22a6b 100755 --- a/hack/e2e.sh +++ b/hack/e2e.sh @@ -109,9 +109,34 @@ helm upgrade --install kamaji clastix/kamaji \ echo "--- building and deploying the operator ($IMG)" docker build -t "$IMG" . kind load docker-image "$IMG" --name "$KIND_CLUSTER_NAME" + +# Air-gap image-override coverage (TestEtcdImageOverride). The mirror +# registries below never resolve over the network, so re-tag the upstream +# etcd image under both names and side-load them into the node. With the +# CRD-defaulted pullPolicy=IfNotPresent the kubelet uses the locally-present +# image and never dials registry.internal — exactly how a private air-gapped +# mirror behaves, but with no registry to stand up. +# +# ETCD_IMAGE_TAG must track test/e2e/testdata/02-etcdcluster.yaml's +# spec.version (operator pulls "v"); the override test pins the same. +ETCD_UPSTREAM=quay.io/coreos/etcd:v3.6.11 +OPERATOR_DEFAULT_MIRROR=registry.internal/mirror/etcd +PER_CLUSTER_MIRROR=registry.internal/percluster/etcd +echo "--- side-loading mirrored etcd images for the air-gap override test" +docker pull "$ETCD_UPSTREAM" +for mirror in "$OPERATOR_DEFAULT_MIRROR" "$PER_CLUSTER_MIRROR"; do + docker tag "$ETCD_UPSTREAM" "$mirror:v3.6.11" + kind load docker-image "$mirror:v3.6.11" --name "$KIND_CLUSTER_NAME" +done + # Helm install: CRDs are templated into the release and image == OPERATOR_IMAGE # is wired by the chart, so this one command lands CRDs + RBAC + manager. -make deploy IMG="$IMG" +# etcdImage.repository points the operator-wide default at the mirror: this is +# what exercises the chart-value -> ETCD_IMAGE_REPOSITORY env -> flag -> +# resolveEtcdImage -> member Pod chain (the value differs from the built-in +# EtcdImage constant, so a typo anywhere in that chain is caught). Per-cluster +# spec.image (which outranks this default) is covered by the same test. +make deploy IMG="$IMG" HELM_EXTRA_ARGS="--set etcdImage.repository=$OPERATOR_DEFAULT_MIRROR" # Select by the chart's control-plane label rather than a fixed Deployment name. kubectl -n etcd-operator-system wait deploy \ -l control-plane=controller-manager \ diff --git a/internal/migrate/adopt.go b/internal/migrate/adopt.go index 66deed13..8b434421 100644 --- a/internal/migrate/adopt.go +++ b/internal/migrate/adopt.go @@ -279,6 +279,8 @@ func BuildAdoption(name, namespace string, spec legacy.EtcdClusterSpec, facts Cl Affinity: cluster.Spec.Affinity, TopologySpreadConstraints: cluster.Spec.TopologySpreadConstraints, Options: cluster.Spec.Options, + Image: cluster.Spec.Image, + ImagePullSecrets: cluster.Spec.ImagePullSecrets, Bootstrap: false, InitialCluster: initialCluster, ClusterToken: token, @@ -311,6 +313,8 @@ func BuildAdoption(name, namespace string, spec legacy.EtcdClusterSpec, facts Cl TopologySpreadConstraints: cluster.Spec.TopologySpreadConstraints, AdditionalMetadata: cluster.Spec.AdditionalMetadata, Options: cluster.Spec.Options, + Image: cluster.Spec.Image, + ImagePullSecrets: cluster.Spec.ImagePullSecrets, }, } plan.Adoption.StatefulSetName = name diff --git a/internal/migrate/translate.go b/internal/migrate/translate.go index 85e47dcb..0fab46d3 100644 --- a/internal/migrate/translate.go +++ b/internal/migrate/translate.go @@ -26,6 +26,7 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" lll "github.com/cozystack/etcd-operator/api/v1alpha2" + "github.com/cozystack/etcd-operator/controllers" "github.com/cozystack/etcd-operator/internal/migrate/legacy" ) @@ -170,6 +171,50 @@ func extractVersion(podSpec corev1.PodSpec, override string) (string, []string, return tag, warns, nil } +// etcdImageOverride returns the spec.image override needed to make the new +// operator pull the same etcd image the legacy podTemplate ran, or nil when +// the legacy image already matches the new operator's defaults (repository +// controllers.EtcdImage, tag "v"+version). Only the differing parts are set; +// spec.image fills the rest from those defaults. +// +// A digest reference (repo@sha256:... or repo:tag@sha256:...) carries no +// per-digest field in spec.image, but its registry/repository — the load- +// bearing part for an air-gapped mirror — is fully recoverable: strip the +// digest and continue with the repo[:tag] prefix. A missing tag then defaults +// to v via spec.image, which extractVersion already forced an +// explicit --version to supply. Dropping the whole override here (the old +// behaviour) silently repointed digest-pinned mirrors back at the operator's +// default registry and ImagePullBackOff'd — the very air-gap case this exists +// to fix. +func etcdImageOverride(image, version string) *lll.EtcdImageSpec { + if image == "" { + return nil + } + if at := strings.Index(image, "@"); at >= 0 { + image = image[:at] + } + + // Split on the last ":" only when it introduces a tag. A registry-port + // colon (registry:5000/etcd) is followed by a path component; a tag is + // not, so the presence of a "/" after the colon tells them apart. + repo, tag := image, "" + if idx := strings.LastIndex(image, ":"); idx >= 0 && !strings.Contains(image[idx+1:], "/") { + repo, tag = image[:idx], image[idx+1:] + } + + override := &lll.EtcdImageSpec{} + if repo != controllers.EtcdImage { + override.Repository = repo + } + if tag != "" && tag != "v"+version { + override.Tag = tag + } + if override.Repository == "" && override.Tag == "" { + return nil + } + return override +} + func findContainer(containers []corev1.Container, name string) *corev1.Container { for i := range containers { if containers[i].Name == name { @@ -290,10 +335,24 @@ func translatePodTemplate(pt legacy.PodTemplate, out *lll.EtcdCluster, plan *Res out.Spec.Affinity = ps.Affinity out.Spec.TopologySpreadConstraints = ps.TopologySpreadConstraints + // Carry pull secrets so the new operator can still pull from a private + // (e.g. air-gapped) registry. v1alpha2 grew spec.imagePullSecrets, so + // this is no longer dropped. + if len(ps.ImagePullSecrets) > 0 { + out.Spec.ImagePullSecrets = ps.ImagePullSecrets + } + var dropped []string if c := findContainer(ps.Containers, "etcd"); c != nil { out.Spec.Resources = c.Resources - // Image and Resources are consumed above; everything else on the + // Preserve a custom etcd image as spec.image so the replacement Pods + // the new operator builds pull the same reference the legacy cluster + // ran — load-bearing for air-gapped mirrors whose registry or tag + // scheme differs from the new operator's defaults. + if img := etcdImageOverride(c.Image, out.Spec.Version); img != nil { + out.Spec.Image = img + } + // Image (above), Resources are consumed here; everything else on the // etcd container is an unmappable override. for field, set := range map[string]bool{ "command": len(c.Command) > 0, @@ -325,7 +384,6 @@ func translatePodTemplate(pt legacy.PodTemplate, out *lll.EtcdCluster, plan *Res "serviceAccountName": ps.ServiceAccountName != "", "securityContext": ps.SecurityContext != nil && !equality.Semantic.DeepEqual(*ps.SecurityContext, corev1.PodSecurityContext{}), "priorityClassName": ps.PriorityClassName != "", - "imagePullSecrets": len(ps.ImagePullSecrets) > 0, "hostNetwork": ps.HostNetwork, "hostAliases": len(ps.HostAliases) > 0, "dnsPolicy": ps.DNSPolicy != "", diff --git a/internal/migrate/translate_test.go b/internal/migrate/translate_test.go index 23be94ba..13d40094 100644 --- a/internal/migrate/translate_test.go +++ b/internal/migrate/translate_test.go @@ -208,6 +208,97 @@ func TestTranslateCluster_KitchenSink(t *testing.T) { } } +// TestTranslateCluster_ImageAndPullSecrets pins the air-gap migration path: +// a custom etcd registry/tag is preserved as spec.image so replacement Pods +// pull the same reference, and pull secrets are carried into +// spec.imagePullSecrets instead of being dropped. +func TestTranslateCluster_ImageAndPullSecrets(t *testing.T) { + base := legacy.EtcdClusterSpec{ + Storage: legacy.StorageSpec{VolumeClaimTemplate: legacy.EmbeddedPersistentVolumeClaim{ + Spec: corev1.PersistentVolumeClaimSpec{Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: qty(t, "1Gi")}}}, + }}, + } + + t.Run("custom registry preserved, version-matching tag elided", func(t *testing.T) { + spec := base + spec.PodTemplate.Spec.Containers = []corev1.Container{ + {Name: "etcd", Image: "registry.internal/mirror/etcd:v3.6.11"}} + spec.PodTemplate.Spec.ImagePullSecrets = []corev1.LocalObjectReference{{Name: "regcreds"}} + + plan := TranslateCluster("c", "ns", spec, TranslateOptions{}) + out := clusterTarget(t, plan) + + if out.Spec.Image == nil || out.Spec.Image.Repository != "registry.internal/mirror/etcd" { + t.Fatalf("spec.image.repository = %+v, want mirror repo", out.Spec.Image) + } + if out.Spec.Image.Tag != "" { + t.Errorf("spec.image.tag = %q, want empty (tag equals v+version, so derived)", out.Spec.Image.Tag) + } + if len(out.Spec.ImagePullSecrets) != 1 || out.Spec.ImagePullSecrets[0].Name != "regcreds" { + t.Errorf("spec.imagePullSecrets = %+v, want [regcreds]", out.Spec.ImagePullSecrets) + } + if hasWarning(plan.Warnings, "imagePullSecrets") { + t.Errorf("imagePullSecrets must no longer be dropped; warnings: %v", plan.Warnings) + } + }) + + t.Run("non-default tag scheme preserved", func(t *testing.T) { + spec := base + // Mirror uses a bare "X.Y.Z" tag (no v prefix); the operator would + // otherwise default to v3.6.11 and ImagePullBackOff against the mirror. + spec.PodTemplate.Spec.Containers = []corev1.Container{ + {Name: "etcd", Image: "registry.internal/mirror/etcd:3.6.11"}} + + out := clusterTarget(t, TranslateCluster("c", "ns", spec, TranslateOptions{})) + if out.Spec.Image == nil || out.Spec.Image.Tag != "3.6.11" { + t.Fatalf("spec.image.tag = %+v, want 3.6.11 preserved", out.Spec.Image) + } + }) + + t.Run("digest-pinned mirror preserves the registry", func(t *testing.T) { + // Legacy air-gapped clusters often pin by digest. spec.image has no + // digest field, but the mirror registry must still be carried or the + // replacement Pods ImagePullBackOff against the operator's default + // registry. extractVersion forces --version for digest refs, so the + // tag defaults to v; only the repository is recoverable. + spec := base + spec.PodTemplate.Spec.Containers = []corev1.Container{ + {Name: "etcd", Image: "registry.internal/mirror/etcd@sha256:" + strings.Repeat("d", 64)}} + + out := clusterTarget(t, TranslateCluster("c", "ns", spec, TranslateOptions{VersionOverride: "3.6.11"})) + if out.Spec.Image == nil || out.Spec.Image.Repository != "registry.internal/mirror/etcd" { + t.Fatalf("spec.image.repository = %+v, want mirror repo preserved from digest ref", out.Spec.Image) + } + if out.Spec.Image.Tag != "" { + t.Errorf("spec.image.tag = %q, want empty (digest carries no tag; defaults to v+version)", out.Spec.Image.Tag) + } + }) + + t.Run("digest-pinned upstream default yields no override", func(t *testing.T) { + // The mirror is the upstream default registry: nothing to preserve. + spec := base + spec.PodTemplate.Spec.Containers = []corev1.Container{ + {Name: "etcd", Image: "quay.io/coreos/etcd@sha256:" + strings.Repeat("d", 64)}} + + out := clusterTarget(t, TranslateCluster("c", "ns", spec, TranslateOptions{VersionOverride: "3.6.11"})) + if out.Spec.Image != nil { + t.Errorf("spec.image = %+v, want nil for a digest ref on the default registry", out.Spec.Image) + } + }) + + t.Run("upstream default image yields no override", func(t *testing.T) { + spec := base + spec.PodTemplate.Spec.Containers = []corev1.Container{ + {Name: "etcd", Image: "quay.io/coreos/etcd:v3.6.11"}} + + out := clusterTarget(t, TranslateCluster("c", "ns", spec, TranslateOptions{})) + if out.Spec.Image != nil { + t.Errorf("spec.image = %+v, want nil for the upstream default image", out.Spec.Image) + } + }) +} + // TestTranslateCluster_VersionExtraction pins the image-tag → spec.version // rules across default, override, and unparsable images. func TestTranslateCluster_VersionExtraction(t *testing.T) { diff --git a/main.go b/main.go index 1aac2b56..18b4919b 100644 --- a/main.go +++ b/main.go @@ -114,6 +114,7 @@ func main() { var probeAddr string var clusterDomain string var operatorImage string + var etcdImageRepository string flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") flag.BoolVar(&enableLeaderElection, "leader-elect", false, @@ -132,6 +133,12 @@ func main() { "Operator image reference. The snapshot/restore agents run from this same "+ "image (Job / initContainer). Defaults to $OPERATOR_IMAGE; required for "+ "EtcdSnapshot and spec.bootstrap.restore to function.") + flag.StringVar(&etcdImageRepository, "etcd-image-repository", os.Getenv("ETCD_IMAGE_REPOSITORY"), + "Operator-wide default etcd image repository (registry host + path, no tag), "+ + "e.g. 'registry.internal/mirror/etcd'. Used for every member Pod whose "+ + "EtcdCluster does not set spec.image.repository — point air-gapped "+ + "deployments at a mirror once instead of per cluster. Defaults to "+ + "$ETCD_IMAGE_REPOSITORY; when empty the built-in quay.io/coreos/etcd is used.") opts := zap.Options{ Development: true, } @@ -204,9 +211,10 @@ func main() { os.Exit(1) } if err = (&controllers.EtcdMemberReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - OperatorImage: operatorImage, + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + OperatorImage: operatorImage, + EtcdImageRepository: etcdImageRepository, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "EtcdMember") os.Exit(1) diff --git a/test/e2e/image_override_test.go b/test/e2e/image_override_test.go new file mode 100644 index 00000000..b3559c34 --- /dev/null +++ b/test/e2e/image_override_test.go @@ -0,0 +1,182 @@ +//go:build e2e + +package e2e + +import ( + "context" + "testing" + "time" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + etcdv1alpha2 "github.com/cozystack/etcd-operator/api/v1alpha2" +) + +const ( + // imageOverrideNamespace isolates this test's clusters from the Kamaji + // suite. labelCluster matches controllers.LabelCluster (kept as a literal + // here, mirroring kamaji_datastore_test.go, to avoid importing the + // controllers package into the e2e suite). + imageOverrideNamespace = "airgap-e2e" + labelCluster = "etcd-operator.cozystack.io/cluster" + + // These three must stay in sync with hack/e2e.sh, which side-loads the + // upstream etcd image under both mirror names and deploys the operator + // with etcdImage.repository=operatorDefaultMirror. The version tracks + // test/e2e/testdata/02-etcdcluster.yaml. + imageOverrideVersion = "3.6.11" + operatorDefaultMirror = "registry.internal/mirror/etcd" + perClusterMirror = "registry.internal/percluster/etcd" +) + +// TestEtcdImageOverride proves the air-gap image-override contract end to end +// against a real cluster — the thing the unit tests cannot: that the resolved +// repository actually reaches a member Pod and the member comes up pulling +// from it. +// +// - Operator-wide default: a cluster with no spec.image resolves to the +// operator's etcdImage.repository (chart value -> ETCD_IMAGE_REPOSITORY env +// -> --etcd-image-repository flag -> resolveEtcdImage -> buildPod). Because +// the harness points that default at a mirror whose name differs from the +// built-in EtcdImage constant, a typo anywhere in that chain would surface +// here as the wrong (or unpullable) image. +// - Per-cluster override: a cluster with spec.image.repository outranks the +// operator default, and spec.imagePullSecrets rides through to the Pod. +// +// Both clusters reach Available, which means the kubelet actually pulled the +// mirror reference (side-loaded as IfNotPresent) and the member joined quorum. +func TestEtcdImageOverride(t *testing.T) { + ctx := context.Background() + + // TypeMeta is mandatory for server-side apply: the apiserver resolves the + // target resource from apiVersion/Kind, which a Go-constructed object + // (unlike one decoded from YAML) does not carry by default. + ns := &corev1.Namespace{ + TypeMeta: metav1.TypeMeta{APIVersion: "v1", Kind: "Namespace"}, + ObjectMeta: metav1.ObjectMeta{Name: imageOverrideNamespace}, + } + if err := kube.Patch(ctx, ns, client.Apply, fieldOwner, client.ForceOwnership); err != nil { + t.Fatalf("create namespace %s: %v", imageOverrideNamespace, err) + } + t.Cleanup(func() { + _ = kube.Delete(context.Background(), &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: imageOverrideNamespace}}) + }) + + t.Run("operator-wide default reaches the member Pod", func(t *testing.T) { + const name = "etcd-default" + createImageCluster(ctx, t, name, nil, nil) + defer deleteImageCluster(ctx, t, name) + + waitFor(ctx, t, 5*time.Minute, name+" Available", + etcdClusterAvailable(imageOverrideNamespace, name)) + + img := etcdMemberImage(ctx, t, name) + if want := operatorDefaultMirror + ":v" + imageOverrideVersion; img != want { + t.Errorf("etcd member image = %q, want operator-wide mirror default %q", img, want) + } + // pullPolicy is deliberately not asserted on the live Pod: the apiserver + // defaults any fixed (non-:latest) tag to IfNotPresent, so it reads back + // as IfNotPresent regardless of whether the operator set it. The + // operator-level contract (unset when no image block, propagated when + // set) is covered by the unit tests TestResolveEtcdImage and + // TestBuildPod_ImageOverrideAndPullSecrets. + }) + + t.Run("per-cluster spec.image outranks the default", func(t *testing.T) { + const name = "etcd-percluster" + // A pull-credentials Secret in the cluster's own namespace, referenced + // by spec.imagePullSecrets. The side-loaded image needs no real pull, + // but the Secret must exist and flow through to the Pod unchanged. + pullSecret := "mirror-regcreds" + sec := &corev1.Secret{ + TypeMeta: metav1.TypeMeta{APIVersion: "v1", Kind: "Secret"}, + ObjectMeta: metav1.ObjectMeta{Name: pullSecret, Namespace: imageOverrideNamespace}, + Type: corev1.SecretTypeDockerConfigJson, + StringData: map[string]string{".dockerconfigjson": `{"auths":{}}`}, + } + if err := kube.Patch(ctx, sec, client.Apply, fieldOwner, client.ForceOwnership); err != nil { + t.Fatalf("create pull secret: %v", err) + } + + createImageCluster(ctx, t, name, + &etcdv1alpha2.EtcdImageSpec{Repository: perClusterMirror}, + []corev1.LocalObjectReference{{Name: pullSecret}}) + defer deleteImageCluster(ctx, t, name) + + waitFor(ctx, t, 5*time.Minute, name+" Available", + etcdClusterAvailable(imageOverrideNamespace, name)) + + img := etcdMemberImage(ctx, t, name) + if want := perClusterMirror + ":v" + imageOverrideVersion; img != want { + t.Errorf("etcd member image = %q, want per-cluster mirror %q", img, want) + } + + // The pull-secret passthrough (spec.imagePullSecrets -> member Pod) is + // the genuinely e2e-only half of the contract. + pod := etcdMemberPod(ctx, t, name) + if len(pod.Spec.ImagePullSecrets) != 1 || pod.Spec.ImagePullSecrets[0].Name != pullSecret { + t.Errorf("pod imagePullSecrets = %+v, want [%s]", pod.Spec.ImagePullSecrets, pullSecret) + } + }) +} + +// createImageCluster applies a minimal plaintext single-member EtcdCluster. +func createImageCluster(ctx context.Context, t *testing.T, name string, + image *etcdv1alpha2.EtcdImageSpec, pullSecrets []corev1.LocalObjectReference) { + t.Helper() + one := int32(1) + ec := &etcdv1alpha2.EtcdCluster{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: imageOverrideNamespace}, + Spec: etcdv1alpha2.EtcdClusterSpec{ + Replicas: &one, + Version: imageOverrideVersion, + Storage: etcdv1alpha2.StorageSpec{Size: resource.MustParse("1Gi")}, + Image: image, + }, + } + ec.Spec.ImagePullSecrets = pullSecrets + if err := kube.Create(ctx, ec); err != nil { + t.Fatalf("create EtcdCluster %s: %v", name, err) + } +} + +func deleteImageCluster(ctx context.Context, t *testing.T, name string) { + t.Helper() + ec := &etcdv1alpha2.EtcdCluster{ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: imageOverrideNamespace}} + if err := kube.Delete(ctx, ec); err != nil && !apierrors.IsNotFound(err) { + t.Errorf("delete EtcdCluster %s: %v", name, err) + } +} + +// etcdMemberPod returns one member Pod of the named cluster. +func etcdMemberPod(ctx context.Context, t *testing.T, cluster string) *corev1.Pod { + t.Helper() + pods := &corev1.PodList{} + if err := kube.List(ctx, pods, client.InNamespace(imageOverrideNamespace), + client.MatchingLabels{labelCluster: cluster}); err != nil { + t.Fatalf("list member pods for %s: %v", cluster, err) + } + if len(pods.Items) == 0 { + t.Fatalf("no member pods for cluster %s", cluster) + } + return &pods.Items[0] +} + +// etcdMemberImage returns the etcd container's image from a member Pod of the +// named cluster. +func etcdMemberImage(ctx context.Context, t *testing.T, cluster string) string { + t.Helper() + pod := etcdMemberPod(ctx, t, cluster) + for _, c := range pod.Spec.Containers { + if c.Name == "etcd" { + return c.Image + } + } + t.Fatalf("pod %s has no etcd container", pod.Name) + return "" // unreachable +} From b3b926575277aa5320b6a07b6cbb0990ca500eef Mon Sep 17 00:00:00 2001 From: Andrey Kolkov Date: Mon, 29 Jun 2026 11:02:27 +0400 Subject: [PATCH 2/2] review fixes Signed-off-by: Andrey Kolkov --- api/v1alpha2/etcdcluster_types.go | 65 +------ api/v1alpha2/etcdmember_types.go | 7 - api/v1alpha2/zz_generated.deepcopy.go | 30 ---- ...cd-operator.cozystack.io_etcdclusters.yaml | 91 +--------- ...tcd-operator.cozystack.io_etcdmembers.yaml | 35 ---- .../etcd-operator/templates/deployment.yaml | 4 +- charts/etcd-operator/values.yaml | 24 ++- controllers/etcdcluster_controller.go | 4 - controllers/etcdmember_controller.go | 7 +- controllers/etcdmember_controller_test.go | 26 +-- controllers/helpers.go | 37 ++-- controllers/helpers_test.go | 38 +---- docs/installation.md | 21 +-- docs/migration.md | 12 +- hack/e2e.sh | 28 ++- internal/migrate/adopt.go | 2 - internal/migrate/translate.go | 60 +------ internal/migrate/translate_test.go | 99 +++-------- main.go | 8 +- test/e2e/image_override_test.go | 161 ++++++------------ 20 files changed, 161 insertions(+), 598 deletions(-) diff --git a/api/v1alpha2/etcdcluster_types.go b/api/v1alpha2/etcdcluster_types.go index a5bbb003..8bdd5253 100644 --- a/api/v1alpha2/etcdcluster_types.go +++ b/api/v1alpha2/etcdcluster_types.go @@ -559,67 +559,18 @@ type EtcdClusterSpec struct { // +optional Options *EtcdOptions `json:"options,omitempty"` - // Image overrides the etcd container image for this cluster's member - // Pods. The primary use is air-gapped environments that mirror the - // upstream image to a private registry. Each unset field falls back to - // an operator-wide default: the repository to the operator's - // --etcd-image-repository / ETCD_IMAGE_REPOSITORY (itself defaulting to - // quay.io/coreos/etcd), the tag to "v"+spec.version. The pull policy is - // apiserver-defaulted to IfNotPresent as soon as this image block is - // present at all (even with only repository set); it falls back to the - // kubelet default only when no image block is given. See EtcdImageSpec - // for the per-field rules. - // - // Updates take effect on newly-created members (scale-up, replacement); - // the operator does not roll existing Pods to apply an image change in - // place. Latched through status.observed with the rest of the target - // spec, so a mid-flight edit only applies once the current target is - // reached. - // +optional - Image *EtcdImageSpec `json:"image,omitempty"` - // ImagePullSecrets is a list of Secret references in the cluster's - // namespace used to pull the etcd (and restore initContainer) image. + // namespace used to pull the etcd (and restore initContainer) image from + // a private registry — e.g. an air-gapped mirror behind credentials. // Passed straight through to each member Pod's spec.imagePullSecrets. - // Required when spec.image points at a private registry that needs - // credentials. // - // Like spec.image, changes take effect on newly-created members; the - // operator does not roll existing Pods. Latched through status.observed. + // Changes take effect on newly-created members (scale-up, replacement); + // the operator does not roll existing Pods. Latched through + // status.observed. // +optional ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"` } -// EtcdImageSpec overrides the etcd container image. Every field is optional; -// an unset field falls back to its operator-wide default so a cluster can -// override just the registry (the common air-gap case) without restating the -// version-derived tag. -type EtcdImageSpec struct { - // Repository is the image repository (registry host + path, no tag), - // e.g. "registry.internal/mirror/etcd". When empty the operator falls - // back to its --etcd-image-repository / ETCD_IMAGE_REPOSITORY default, - // which itself defaults to "quay.io/coreos/etcd". - // +optional - Repository string `json:"repository,omitempty"` - - // Tag overrides the image tag. When empty the operator derives it from - // spec.version as "v"+version (e.g. "v3.6.11"). Set this only when the - // mirror uses a tag scheme that differs from the upstream "vX.Y.Z" — the - // operator still keys all of its version-dependent behaviour off - // spec.version, not off this tag. - // +optional - Tag string `json:"tag,omitempty"` - - // PullPolicy is the etcd container's imagePullPolicy. Defaults to - // IfNotPresent — the right policy for the fixed "v" tags this - // operator runs (it also matches the kubelet's own default for a fixed - // tag). Set Always only when a mutable tag is used via Tag. - // +kubebuilder:default=IfNotPresent - // +kubebuilder:validation:Enum=Always;IfNotPresent;Never - // +optional - PullPolicy corev1.PullPolicy `json:"pullPolicy,omitempty"` -} - // AdditionalMetadata is a set of labels and annotations the operator merges // onto every resource it creates for a cluster. See // EtcdClusterSpec.AdditionalMetadata for the precedence and timing rules. @@ -685,12 +636,6 @@ type ObservedClusterSpec struct { // +optional Options *EtcdOptions `json:"options,omitempty"` - // Image is the locked target etcd image override for member Pods. - // Latched with the rest of the target spec so a mid-flight image edit - // only applies to members created once the current target is reached. - // +optional - Image *EtcdImageSpec `json:"image,omitempty"` - // ImagePullSecrets is the locked target pull-secret list for member // Pods. Latched with the rest of the target spec. // +optional diff --git a/api/v1alpha2/etcdmember_types.go b/api/v1alpha2/etcdmember_types.go index 2ef697fe..7c50fbd3 100644 --- a/api/v1alpha2/etcdmember_types.go +++ b/api/v1alpha2/etcdmember_types.go @@ -116,13 +116,6 @@ type EtcdMemberSpec struct { // +optional Options *EtcdOptions `json:"options,omitempty"` - // Image mirrors EtcdCluster.spec.image at the time this member was - // created. The member controller resolves it (against the operator-wide - // repository default and spec.version) into the etcd container's image - // and imagePullPolicy at Pod-build time. - // +optional - Image *EtcdImageSpec `json:"image,omitempty"` - // ImagePullSecrets mirrors EtcdCluster.spec.imagePullSecrets at the time // this member was created. Passed straight to the Pod's // spec.imagePullSecrets at build time. diff --git a/api/v1alpha2/zz_generated.deepcopy.go b/api/v1alpha2/zz_generated.deepcopy.go index 8b3710e3..a3a5794e 100644 --- a/api/v1alpha2/zz_generated.deepcopy.go +++ b/api/v1alpha2/zz_generated.deepcopy.go @@ -257,11 +257,6 @@ func (in *EtcdClusterSpec) DeepCopyInto(out *EtcdClusterSpec) { *out = new(EtcdOptions) (*in).DeepCopyInto(*out) } - if in.Image != nil { - in, out := &in.Image, &out.Image - *out = new(EtcdImageSpec) - **out = **in - } if in.ImagePullSecrets != nil { in, out := &in.ImagePullSecrets, &out.ImagePullSecrets *out = make([]v1.LocalObjectReference, len(*in)) @@ -335,21 +330,6 @@ func (in *EtcdClusterTLS) DeepCopy() *EtcdClusterTLS { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *EtcdImageSpec) DeepCopyInto(out *EtcdImageSpec) { - *out = *in -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EtcdImageSpec. -func (in *EtcdImageSpec) DeepCopy() *EtcdImageSpec { - if in == nil { - return nil - } - out := new(EtcdImageSpec) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EtcdMember) DeepCopyInto(out *EtcdMember) { *out = *in @@ -436,11 +416,6 @@ func (in *EtcdMemberSpec) DeepCopyInto(out *EtcdMemberSpec) { *out = new(EtcdOptions) (*in).DeepCopyInto(*out) } - if in.Image != nil { - in, out := &in.Image, &out.Image - *out = new(EtcdImageSpec) - **out = **in - } if in.ImagePullSecrets != nil { in, out := &in.ImagePullSecrets, &out.ImagePullSecrets *out = make([]v1.LocalObjectReference, len(*in)) @@ -685,11 +660,6 @@ func (in *ObservedClusterSpec) DeepCopyInto(out *ObservedClusterSpec) { *out = new(EtcdOptions) (*in).DeepCopyInto(*out) } - if in.Image != nil { - in, out := &in.Image, &out.Image - *out = new(EtcdImageSpec) - **out = **in - } if in.ImagePullSecrets != nil { in, out := &in.ImagePullSecrets, &out.ImagePullSecrets *out = make([]v1.LocalObjectReference, len(*in)) diff --git a/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdclusters.yaml b/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdclusters.yaml index 6ac683de..ed4d78ad 100644 --- a/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdclusters.yaml +++ b/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdclusters.yaml @@ -1166,63 +1166,16 @@ spec: rule: '!has(self.source.pvc) || (has(self.source.pvc.subPath) && size(self.source.pvc.subPath) > 0)' type: object - image: - description: |- - Image overrides the etcd container image for this cluster's member - Pods. The primary use is air-gapped environments that mirror the - upstream image to a private registry. Each unset field falls back to - an operator-wide default: the repository to the operator's - --etcd-image-repository / ETCD_IMAGE_REPOSITORY (itself defaulting to - quay.io/coreos/etcd), the tag to "v"+spec.version. The pull policy is - apiserver-defaulted to IfNotPresent as soon as this image block is - present at all (even with only repository set); it falls back to the - kubelet default only when no image block is given. See EtcdImageSpec - for the per-field rules. - - Updates take effect on newly-created members (scale-up, replacement); - the operator does not roll existing Pods to apply an image change in - place. Latched through status.observed with the rest of the target - spec, so a mid-flight edit only applies once the current target is - reached. - properties: - pullPolicy: - default: IfNotPresent - description: |- - PullPolicy is the etcd container's imagePullPolicy. Defaults to - IfNotPresent — the right policy for the fixed "v" tags this - operator runs (it also matches the kubelet's own default for a fixed - tag). Set Always only when a mutable tag is used via Tag. - enum: - - Always - - IfNotPresent - - Never - type: string - repository: - description: |- - Repository is the image repository (registry host + path, no tag), - e.g. "registry.internal/mirror/etcd". When empty the operator falls - back to its --etcd-image-repository / ETCD_IMAGE_REPOSITORY default, - which itself defaults to "quay.io/coreos/etcd". - type: string - tag: - description: |- - Tag overrides the image tag. When empty the operator derives it from - spec.version as "v"+version (e.g. "v3.6.11"). Set this only when the - mirror uses a tag scheme that differs from the upstream "vX.Y.Z" — the - operator still keys all of its version-dependent behaviour off - spec.version, not off this tag. - type: string - type: object imagePullSecrets: description: |- ImagePullSecrets is a list of Secret references in the cluster's - namespace used to pull the etcd (and restore initContainer) image. + namespace used to pull the etcd (and restore initContainer) image from + a private registry — e.g. an air-gapped mirror behind credentials. Passed straight through to each member Pod's spec.imagePullSecrets. - Required when spec.image points at a private registry that needs - credentials. - Like spec.image, changes take effect on newly-created members; the - operator does not roll existing Pods. Latched through status.observed. + Changes take effect on newly-created members (scale-up, replacement); + the operator does not roll existing Pods. Latched through + status.observed. items: description: |- LocalObjectReference contains enough information to let you locate the @@ -2930,40 +2883,6 @@ spec: x-kubernetes-list-type: atomic type: object type: object - image: - description: |- - Image is the locked target etcd image override for member Pods. - Latched with the rest of the target spec so a mid-flight image edit - only applies to members created once the current target is reached. - properties: - pullPolicy: - default: IfNotPresent - description: |- - PullPolicy is the etcd container's imagePullPolicy. Defaults to - IfNotPresent — the right policy for the fixed "v" tags this - operator runs (it also matches the kubelet's own default for a fixed - tag). Set Always only when a mutable tag is used via Tag. - enum: - - Always - - IfNotPresent - - Never - type: string - repository: - description: |- - Repository is the image repository (registry host + path, no tag), - e.g. "registry.internal/mirror/etcd". When empty the operator falls - back to its --etcd-image-repository / ETCD_IMAGE_REPOSITORY default, - which itself defaults to "quay.io/coreos/etcd". - type: string - tag: - description: |- - Tag overrides the image tag. When empty the operator derives it from - spec.version as "v"+version (e.g. "v3.6.11"). Set this only when the - mirror uses a tag scheme that differs from the upstream "vX.Y.Z" — the - operator still keys all of its version-dependent behaviour off - spec.version, not off this tag. - type: string - type: object imagePullSecrets: description: |- ImagePullSecrets is the locked target pull-secret list for member diff --git a/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdmembers.yaml b/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdmembers.yaml index a50f28b4..36c945ab 100644 --- a/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdmembers.yaml +++ b/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdmembers.yaml @@ -1020,41 +1020,6 @@ spec: the same ClusterID and member ID. While dormant, the member does not count toward the EtcdCluster's `current` replica accounting. type: boolean - image: - description: |- - Image mirrors EtcdCluster.spec.image at the time this member was - created. The member controller resolves it (against the operator-wide - repository default and spec.version) into the etcd container's image - and imagePullPolicy at Pod-build time. - properties: - pullPolicy: - default: IfNotPresent - description: |- - PullPolicy is the etcd container's imagePullPolicy. Defaults to - IfNotPresent — the right policy for the fixed "v" tags this - operator runs (it also matches the kubelet's own default for a fixed - tag). Set Always only when a mutable tag is used via Tag. - enum: - - Always - - IfNotPresent - - Never - type: string - repository: - description: |- - Repository is the image repository (registry host + path, no tag), - e.g. "registry.internal/mirror/etcd". When empty the operator falls - back to its --etcd-image-repository / ETCD_IMAGE_REPOSITORY default, - which itself defaults to "quay.io/coreos/etcd". - type: string - tag: - description: |- - Tag overrides the image tag. When empty the operator derives it from - spec.version as "v"+version (e.g. "v3.6.11"). Set this only when the - mirror uses a tag scheme that differs from the upstream "vX.Y.Z" — the - operator still keys all of its version-dependent behaviour off - spec.version, not off this tag. - type: string - type: object imagePullSecrets: description: |- ImagePullSecrets mirrors EtcdCluster.spec.imagePullSecrets at the time diff --git a/charts/etcd-operator/templates/deployment.yaml b/charts/etcd-operator/templates/deployment.yaml index 58a8f685..feb4eecd 100644 --- a/charts/etcd-operator/templates/deployment.yaml +++ b/charts/etcd-operator/templates/deployment.yaml @@ -55,8 +55,8 @@ spec: value: {{ include "etcd-operator.image" . }} # Operator-wide default etcd image repository for member Pods. Always # set, so the operator's built-in fallback is only reached when the - # binary runs outside this chart. An EtcdCluster's spec.image.repository - # overrides it per cluster; repoint here for an air-gapped mirror. + # binary runs outside this chart. Repoint here for an air-gapped mirror; + # the tag is always v. - name: ETCD_IMAGE_REPOSITORY value: {{ .Values.etcdImage.repository | quote }} livenessProbe: diff --git a/charts/etcd-operator/values.yaml b/charts/etcd-operator/values.yaml index 243ff413..806ca27d 100644 --- a/charts/etcd-operator/values.yaml +++ b/charts/etcd-operator/values.yaml @@ -30,24 +30,20 @@ image: pullPolicy: IfNotPresent # Operator-wide default for the etcd image that runs in member Pods (NOT the -# operator's own image above). An EtcdCluster's spec.image overrides this per -# cluster; set the repository here to point every cluster at an air-gapped -# mirror once. Per-cluster pull credentials go on spec.imagePullSecrets (a -# Secret in the cluster's own namespace), not here. +# operator's own image above). Set the repository here to point every cluster +# at an air-gapped mirror once. Per-cluster pull credentials go on an +# EtcdCluster's spec.imagePullSecrets (a Secret in the cluster's own +# namespace), not here. etcdImage: # -- Default etcd image repository (registry host + path, no tag) for member # Pods. Repoint at an air-gapped mirror, e.g. registry.internal/mirror/etcd. - # The chart always wires this into the operator's ETCD_IMAGE_REPOSITORY, so - # this value is the single operator-wide default; an EtcdCluster's spec.image - # overrides it per cluster. + # The chart always wires this into the operator's ETCD_IMAGE_REPOSITORY. # - # Only the repository is settable operator-wide; the other two parts of the - # image are NOT (no fleet-wide knob, override them per cluster): - # - tag: always derived from each cluster's spec.version as "v" - # (e.g. v3.6.11). Override per cluster via EtcdCluster.spec.image.tag — - # use that only when a mirror's tag scheme differs from upstream vX.Y.Z. - # - pullPolicy: defaults to IfNotPresent (set in the CRD schema). Override - # per cluster via EtcdCluster.spec.image.pullPolicy. + # Only the repository is configurable; the tag is always derived from each + # cluster's spec.version as "v" (e.g. v3.6.11). There is no + # per-cluster repository/tag override — the operator keys all + # version-dependent behaviour off spec.version, so a separate tag could + # silently disagree with it. # # Keep this in sync with the controllers.EtcdImage constant — the operator's # built-in fallback used only when this env is unset (outside the chart). The diff --git a/controllers/etcdcluster_controller.go b/controllers/etcdcluster_controller.go index 2e989adc..32ab7632 100644 --- a/controllers/etcdcluster_controller.go +++ b/controllers/etcdcluster_controller.go @@ -430,7 +430,6 @@ func (r *EtcdClusterReconciler) bootstrap( Affinity: cluster.Status.Observed.Affinity, TopologySpreadConstraints: cluster.Status.Observed.TopologySpreadConstraints, Options: cluster.Status.Observed.Options, - Image: cluster.Status.Observed.Image, ImagePullSecrets: cluster.Status.Observed.ImagePullSecrets, Bootstrap: true, ClusterToken: cluster.Status.ClusterToken, @@ -836,7 +835,6 @@ func (r *EtcdClusterReconciler) scaleUp( Affinity: cluster.Status.Observed.Affinity, TopologySpreadConstraints: cluster.Status.Observed.TopologySpreadConstraints, Options: cluster.Status.Observed.Options, - Image: cluster.Status.Observed.Image, ImagePullSecrets: cluster.Status.Observed.ImagePullSecrets, Bootstrap: false, ClusterToken: cluster.Status.ClusterToken, @@ -2032,7 +2030,6 @@ func snapshotSpecIntoObserved(cluster *lll.EtcdCluster) { TopologySpreadConstraints: cluster.Spec.TopologySpreadConstraints, AdditionalMetadata: cluster.Spec.AdditionalMetadata, Options: cluster.Spec.Options, - Image: cluster.Spec.Image, ImagePullSecrets: cluster.Spec.ImagePullSecrets, } } @@ -2055,7 +2052,6 @@ func specEqualsObserved(cluster *lll.EtcdCluster) bool { equality.Semantic.DeepEqual(o.TopologySpreadConstraints, cluster.Spec.TopologySpreadConstraints) && equality.Semantic.DeepEqual(o.AdditionalMetadata, cluster.Spec.AdditionalMetadata) && equality.Semantic.DeepEqual(o.Options, cluster.Spec.Options) && - equality.Semantic.DeepEqual(o.Image, cluster.Spec.Image) && equality.Semantic.DeepEqual(o.ImagePullSecrets, cluster.Spec.ImagePullSecrets) } diff --git a/controllers/etcdmember_controller.go b/controllers/etcdmember_controller.go index 3ed26890..6af23952 100644 --- a/controllers/etcdmember_controller.go +++ b/controllers/etcdmember_controller.go @@ -758,7 +758,7 @@ func (r *EtcdMemberReconciler) buildPod(member *lll.EtcdMember) *corev1.Pod { volumes = append(volumes, extraVols...) } - etcdImage, etcdPullPolicy := resolveEtcdImage(member, r.EtcdImageRepository) + etcdImage := resolveEtcdImage(member, r.EtcdImageRepository) return &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ @@ -788,9 +788,8 @@ func (r *EtcdMemberReconciler) buildPod(member *lll.EtcdMember) *corev1.Pod { }, }, Containers: []corev1.Container{{ - Name: "etcd", - Image: etcdImage, - ImagePullPolicy: etcdPullPolicy, + Name: "etcd", + Image: etcdImage, SecurityContext: &corev1.SecurityContext{ AllowPrivilegeEscalation: ptrBool(false), Capabilities: &corev1.Capabilities{ diff --git a/controllers/etcdmember_controller_test.go b/controllers/etcdmember_controller_test.go index 9e054290..8fc2ab8a 100644 --- a/controllers/etcdmember_controller_test.go +++ b/controllers/etcdmember_controller_test.go @@ -1016,10 +1016,10 @@ func TestBuildPod_LivenessIsNotQuorumAware(t *testing.T) { } } -// TestBuildPod_ImageOverrideAndPullSecrets covers the air-gap path: buildPod -// must honour the operator-wide default repository, the per-member spec.image -// override, and stamp imagePullSecrets onto the Pod. -func TestBuildPod_ImageOverrideAndPullSecrets(t *testing.T) { +// TestBuildPod_ImageRepoAndPullSecrets covers the air-gap path: buildPod +// resolves the etcd image against the operator-wide default repository (pinned +// to spec.version) and stamps the member's imagePullSecrets onto the Pod. +func TestBuildPod_ImageRepoAndPullSecrets(t *testing.T) { t.Run("operator default repo, version-derived tag", func(t *testing.T) { r := &EtcdMemberReconciler{EtcdImageRepository: "registry.internal/mirror/etcd"} pod := r.buildPod(&lll.EtcdMember{ @@ -1031,28 +1031,16 @@ func TestBuildPod_ImageOverrideAndPullSecrets(t *testing.T) { } }) - t.Run("per-member override and pull secrets win", func(t *testing.T) { + t.Run("pull secrets are stamped onto the Pod", func(t *testing.T) { r := &EtcdMemberReconciler{EtcdImageRepository: "registry.internal/mirror/etcd"} pod := r.buildPod(&lll.EtcdMember{ ObjectMeta: metav1.ObjectMeta{Name: "test-0", Namespace: "ns"}, Spec: lll.EtcdMemberSpec{ - ClusterName: "test", - Version: "3.6.11", - Image: &lll.EtcdImageSpec{ - Repository: "private.example/etcd", - Tag: "3.6.11-custom", - PullPolicy: corev1.PullAlways, - }, + ClusterName: "test", + Version: "3.6.11", ImagePullSecrets: []corev1.LocalObjectReference{{Name: "regcreds"}}, }, }) - c := pod.Spec.Containers[0] - if c.Image != "private.example/etcd:3.6.11-custom" { - t.Errorf("image = %q, want per-member override", c.Image) - } - if c.ImagePullPolicy != corev1.PullAlways { - t.Errorf("pullPolicy = %q, want Always", c.ImagePullPolicy) - } if len(pod.Spec.ImagePullSecrets) != 1 || pod.Spec.ImagePullSecrets[0].Name != "regcreds" { t.Errorf("pod.imagePullSecrets = %+v, want [regcreds]", pod.Spec.ImagePullSecrets) } diff --git a/controllers/helpers.go b/controllers/helpers.go index ef725dc2..087038a7 100644 --- a/controllers/helpers.go +++ b/controllers/helpers.go @@ -24,11 +24,10 @@ const ( RoleVoter = "voter" // EtcdImage is the built-in fallback etcd image repository (registry - // host + path, no tag). It is the last resort in the resolution chain: - // an EtcdCluster's spec.image.repository wins first, then the - // operator-wide --etcd-image-repository / ETCD_IMAGE_REPOSITORY default, - // then this constant. See resolveEtcdImage. Keep in sync with the chart's - // etcdImage.repository default in charts/etcd-operator/values.yaml. + // host + path, no tag). It is used when the operator-wide + // --etcd-image-repository / ETCD_IMAGE_REPOSITORY default is unset. See + // resolveEtcdImage. Keep in sync with the chart's etcdImage.repository + // default in charts/etcd-operator/values.yaml. EtcdImage = "quay.io/coreos/etcd" // MemberFinalizer is placed on EtcdMember resources to ensure @@ -113,32 +112,18 @@ func memberDataDir(member *lll.EtcdMember) string { return path.Join(etcdDataDirRoot, sub) } -// resolveEtcdImage resolves a member's etcd container image and pull policy -// from (in precedence order) the member's mirrored spec.image override, the +// resolveEtcdImage resolves a member's etcd container image from the // operator-wide repository default (defaultRepo, from the operator's -// --etcd-image-repository flag), and the EtcdImage built-in. The tag defaults -// to "v"+spec.version — the operator keys every version-dependent behaviour -// off spec.version, so an explicit spec.image.tag changes only the pulled -// reference, never that behaviour. The pull policy comes from spec.image -// (apiserver-defaulted to IfNotPresent whenever the image block is present); -// when no image block is set at all it stays empty and the container field is -// left unset, so the kubelet default applies. -func resolveEtcdImage(member *lll.EtcdMember, defaultRepo string) (image string, pullPolicy corev1.PullPolicy) { +// --etcd-image-repository flag) or the EtcdImage built-in when that is empty, +// tagged with "v"+spec.version. The operator keys every version-dependent +// behaviour off spec.version, so the image is always pinned to that version — +// there is no per-cluster tag override that could disagree with it. +func resolveEtcdImage(member *lll.EtcdMember, defaultRepo string) string { repo := defaultRepo if repo == "" { repo = EtcdImage } - tag := "v" + member.Spec.Version - if img := member.Spec.Image; img != nil { - if img.Repository != "" { - repo = img.Repository - } - if img.Tag != "" { - tag = img.Tag - } - pullPolicy = img.PullPolicy - } - return repo + ":" + tag, pullPolicy + return repo + ":v" + member.Spec.Version } // peerURL returns the etcd peer URL for a member, using the headless Service diff --git a/controllers/helpers_test.go b/controllers/helpers_test.go index a7d7fed4..e594908b 100644 --- a/controllers/helpers_test.go +++ b/controllers/helpers_test.go @@ -13,7 +13,6 @@ package controllers import ( "testing" - corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" lll "github.com/cozystack/etcd-operator/api/v1alpha2" @@ -143,9 +142,12 @@ func TestMemberEndpoints_PerMemberServiceName(t *testing.T) { } } +// TestResolveEtcdImage pins the operator-wide repository resolution: the +// --etcd-image-repository default (or the EtcdImage built-in when unset), +// always tagged "v"+spec.version. func TestResolveEtcdImage(t *testing.T) { - member := func(version string, img *lll.EtcdImageSpec) *lll.EtcdMember { - return &lll.EtcdMember{Spec: lll.EtcdMemberSpec{Version: version, Image: img}} + member := func(version string) *lll.EtcdMember { + return &lll.EtcdMember{Spec: lll.EtcdMemberSpec{Version: version}} } cases := []struct { @@ -153,46 +155,24 @@ func TestResolveEtcdImage(t *testing.T) { member *lll.EtcdMember defaultRepo string wantImage string - wantPolicy corev1.PullPolicy }{ { - name: "no override, no operator default → built-in repo + v", - member: member("3.6.11", nil), + name: "no operator default → built-in repo + v", + member: member("3.6.11"), wantImage: EtcdImage + ":v3.6.11", }, { name: "operator default repo, version-derived tag", - member: member("3.6.11", nil), + member: member("3.6.11"), defaultRepo: "registry.internal/mirror/etcd", wantImage: "registry.internal/mirror/etcd:v3.6.11", }, - { - name: "per-cluster repository overrides the operator default", - member: member("3.6.11", &lll.EtcdImageSpec{Repository: "private.example/etcd"}), - defaultRepo: "registry.internal/mirror/etcd", - wantImage: "private.example/etcd:v3.6.11", - }, - { - name: "explicit tag overrides the version-derived default", - member: member("3.6.11", &lll.EtcdImageSpec{Tag: "3.6.11-mirror"}), - wantImage: EtcdImage + ":3.6.11-mirror", - }, - { - name: "pull policy is propagated", - member: member("3.6.11", &lll.EtcdImageSpec{PullPolicy: corev1.PullAlways}), - wantImage: EtcdImage + ":v3.6.11", - wantPolicy: corev1.PullAlways, - }, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - img, policy := resolveEtcdImage(tc.member, tc.defaultRepo) - if img != tc.wantImage { + if img := resolveEtcdImage(tc.member, tc.defaultRepo); img != tc.wantImage { t.Errorf("image = %q, want %q", img, tc.wantImage) } - if policy != tc.wantPolicy { - t.Errorf("pullPolicy = %q, want %q", policy, tc.wantPolicy) - } }) } } diff --git a/docs/installation.md b/docs/installation.md index 7f0416d8..bcf2a932 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -98,7 +98,7 @@ Common values (`--set key=value`, or a `-f my-values.yaml`): | `crds.enabled` / `crds.keep` | `true` / `true` | Render the CRDs with the release / annotate them so uninstall keeps them. | | `manager.resources` | 10m/64Mi → 500m/128Mi | Manager container requests/limits. | | `imagePullSecrets` | `[]` | Pull secrets for the **operator's own** image (private registry mirror). | -| `etcdImage.repository` | `quay.io/coreos/etcd` | Operator-wide default **etcd** image repo for member Pods (always wired into `ETCD_IMAGE_REPOSITORY`). Repoint at an air-gapped mirror once; an `EtcdCluster`'s `spec.image` overrides it per cluster. | +| `etcdImage.repository` | `quay.io/coreos/etcd` | Operator-wide default **etcd** image repo for member Pods (always wired into `ETCD_IMAGE_REPOSITORY`). Repoint at an air-gapped mirror once; the tag is always `v`. | See `charts/etcd-operator/values.yaml` for the complete, annotated list. Verify the install: @@ -244,28 +244,21 @@ The `spec.tls` subtree is immutable post-create — flipping TLS on or off on an ## Image versions -By default `spec.version` in an `EtcdCluster` becomes `quay.io/coreos/etcd:v`. For air-gapped environments that mirror the image to a private registry there are two override surfaces, lowest-precedence first: +By default `spec.version` in an `EtcdCluster` becomes `quay.io/coreos/etcd:v`. For an air-gapped environment that mirrors the image to a private registry, repoint the **repository** operator-wide and supply per-cluster pull credentials: -- **Operator-wide default** — set `etcdImage.repository` in the chart (env `ETCD_IMAGE_REPOSITORY` / flag `--etcd-image-repository`) to a registry/path, e.g. `registry.internal/mirror/etcd`. Every cluster that doesn't override picks it up; the tag stays `v`. -- **Per-cluster** — `spec.image` on an `EtcdCluster` overrides the repository, the tag, and the pull policy for that cluster's member Pods: +- **Repository (operator-wide)** — set `etcdImage.repository` in the chart (env `ETCD_IMAGE_REPOSITORY` / flag `--etcd-image-repository`) to a registry/path, e.g. `registry.internal/mirror/etcd`. Every member Pod the operator creates pulls from it; the tag is always `v`. Mirror once per fleet — there is intentionally no per-cluster repository/tag override, because the operator keys every version-dependent behaviour (the restore version-compat pre-flight, the latched target, drift detection) off `spec.version`, and a per-cluster `tag` could silently disagree with it. +- **Pull credentials (per-cluster)** — `spec.imagePullSecrets` on an `EtcdCluster`: ```yaml spec: version: "3.6.11" - image: - repository: registry.internal/mirror/etcd # optional; falls back to the operator default - tag: "" # optional; defaults to v3.6.11 - pullPolicy: IfNotPresent # optional; this is the default imagePullSecrets: - - name: regcreds # Secret in the cluster's namespace + - name: regcreds # Secret in the cluster's namespace ``` - `spec.imagePullSecrets` references pull-credential Secrets in the cluster's own namespace and is passed straight through to each member Pod. Note the operator still keys every version-dependent behaviour off `spec.version`, not off `spec.image.tag` — set the tag only when the mirror's tag scheme differs from the upstream `vX.Y.Z`. Like `spec.resources`, an image change applies to **newly-created** members (scale-up, replacement), not existing Pods in place. + It references pull-credential Secrets in the cluster's own namespace and is passed straight through to each member Pod. Like `spec.resources`, a change applies to **newly-created** members (scale-up, replacement), not existing Pods in place. - These two surfaces cover the **etcd member image** only. Two caveats in a fully air-gapped install where the operator image is mirrored too: - - - `spec.imagePullSecrets` is set Pod-wide, so it **does** cover the member Pod's restore initContainer (which runs the operator image at bootstrap-from-snapshot) — a `spec.bootstrap.restore` can pull the mirrored operator image via these secrets. - - Standalone `EtcdSnapshot` backup/restore **Jobs** also run the operator image but are **not** covered by `spec.imagePullSecrets`. Repoint the operator image (chart `image.repository`) and make sure the snapshot's namespace can already pull it, or those Jobs `ImagePullBackOff`. The operator Pod itself uses the chart-level `imagePullSecrets`. + `spec.imagePullSecrets` is set Pod-wide, so it **does** cover the member Pod's restore initContainer (which runs the operator image at bootstrap-from-snapshot) — a `spec.bootstrap.restore` can pull the mirrored operator image via these secrets. Standalone `EtcdSnapshot` backup/restore **Jobs** also run the operator image but are **not** covered by `spec.imagePullSecrets`: in a fully air-gapped install repoint the operator image (chart `image.repository`) and make sure the snapshot's namespace can already pull it, or those Jobs `ImagePullBackOff`. The operator Pod itself uses the chart-level `imagePullSecrets`. The `spec.version` examples throughout these docs use **3.6.x**, to match the `etcdutl` bundled in the operator image: `spec.bootstrap.restore` requires `spec.version` and that `etcdutl` to share a minor (see the [restore runbook](operations.md#restoring-a-cluster-from-a-snapshot)). The operator's etcd client is v3.6.x and is wire-compatible with 3.5.x servers, so a cluster you never restore into can still run 3.5.x — but to back up and restore on the same version, run 3.6.x. diff --git a/docs/migration.md b/docs/migration.md index 94046f9c..d5e2bcb9 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -147,11 +147,13 @@ What gets migrated: Every legacy knob with no v1alpha2 equivalent (`spec.options` keys beyond the [four typed ones](#specoptions-free-form-map--typed-fields), service/PDB templates, podTemplate overrides beyond affinity/topology-spread/resources/ -metadata) is reported as a warning — review them before `--apply`. A legacy -`podTemplate` etcd image from a non-default registry/tag is **preserved** as -`spec.image`, and `podTemplate.spec.imagePullSecrets` is **carried** into -`spec.imagePullSecrets` (both used to be dropped) — so an air-gapped cluster -keeps pulling from its mirror after the operator rolls a replacement Pod. Hard +metadata) is reported as a warning — review them before `--apply`. +`podTemplate.spec.imagePullSecrets` is **carried** into `spec.imagePullSecrets` +(it used to be dropped) — so an air-gapped cluster keeps its credentials to pull +from its mirror after the operator rolls a replacement Pod. (The etcd image's +registry/tag is not carried; repoint the mirror operator-wide via +`--etcd-image-repository`, since the operator pins the image to `spec.version`.) +Hard blockers (`emptyDir` storage — nothing to adopt, an unparsable etcd image tag without `--version`, `enableAuth` without server TLS, a non-integer `quota-backend-bytes`/`snapshot-count`, a failed inspection) skip that diff --git a/hack/e2e.sh b/hack/e2e.sh index 3fc22a6b..34d14814 100755 --- a/hack/e2e.sh +++ b/hack/e2e.sh @@ -110,32 +110,28 @@ echo "--- building and deploying the operator ($IMG)" docker build -t "$IMG" . kind load docker-image "$IMG" --name "$KIND_CLUSTER_NAME" -# Air-gap image-override coverage (TestEtcdImageOverride). The mirror -# registries below never resolve over the network, so re-tag the upstream -# etcd image under both names and side-load them into the node. With the -# CRD-defaulted pullPolicy=IfNotPresent the kubelet uses the locally-present -# image and never dials registry.internal — exactly how a private air-gapped -# mirror behaves, but with no registry to stand up. +# Air-gap image-repository coverage (TestEtcdImageOverride). The mirror +# registry below never resolves over the network, so re-tag the upstream etcd +# image under that name and side-load it into the node. With the kubelet's +# default IfNotPresent policy for a fixed tag it uses the locally-present image +# and never dials registry.internal — exactly how a private air-gapped mirror +# behaves, but with no registry to stand up. # -# ETCD_IMAGE_TAG must track test/e2e/testdata/02-etcdcluster.yaml's -# spec.version (operator pulls "v"); the override test pins the same. +# The tag must track test/e2e/testdata/02-etcdcluster.yaml's spec.version +# (operator pulls "v"); the override test pins the same. ETCD_UPSTREAM=quay.io/coreos/etcd:v3.6.11 OPERATOR_DEFAULT_MIRROR=registry.internal/mirror/etcd -PER_CLUSTER_MIRROR=registry.internal/percluster/etcd -echo "--- side-loading mirrored etcd images for the air-gap override test" +echo "--- side-loading the mirrored etcd image for the air-gap repository test" docker pull "$ETCD_UPSTREAM" -for mirror in "$OPERATOR_DEFAULT_MIRROR" "$PER_CLUSTER_MIRROR"; do - docker tag "$ETCD_UPSTREAM" "$mirror:v3.6.11" - kind load docker-image "$mirror:v3.6.11" --name "$KIND_CLUSTER_NAME" -done +docker tag "$ETCD_UPSTREAM" "$OPERATOR_DEFAULT_MIRROR:v3.6.11" +kind load docker-image "$OPERATOR_DEFAULT_MIRROR:v3.6.11" --name "$KIND_CLUSTER_NAME" # Helm install: CRDs are templated into the release and image == OPERATOR_IMAGE # is wired by the chart, so this one command lands CRDs + RBAC + manager. # etcdImage.repository points the operator-wide default at the mirror: this is # what exercises the chart-value -> ETCD_IMAGE_REPOSITORY env -> flag -> # resolveEtcdImage -> member Pod chain (the value differs from the built-in -# EtcdImage constant, so a typo anywhere in that chain is caught). Per-cluster -# spec.image (which outranks this default) is covered by the same test. +# EtcdImage constant, so a typo anywhere in that chain is caught). make deploy IMG="$IMG" HELM_EXTRA_ARGS="--set etcdImage.repository=$OPERATOR_DEFAULT_MIRROR" # Select by the chart's control-plane label rather than a fixed Deployment name. kubectl -n etcd-operator-system wait deploy \ diff --git a/internal/migrate/adopt.go b/internal/migrate/adopt.go index 8b434421..7410ecee 100644 --- a/internal/migrate/adopt.go +++ b/internal/migrate/adopt.go @@ -279,7 +279,6 @@ func BuildAdoption(name, namespace string, spec legacy.EtcdClusterSpec, facts Cl Affinity: cluster.Spec.Affinity, TopologySpreadConstraints: cluster.Spec.TopologySpreadConstraints, Options: cluster.Spec.Options, - Image: cluster.Spec.Image, ImagePullSecrets: cluster.Spec.ImagePullSecrets, Bootstrap: false, InitialCluster: initialCluster, @@ -313,7 +312,6 @@ func BuildAdoption(name, namespace string, spec legacy.EtcdClusterSpec, facts Cl TopologySpreadConstraints: cluster.Spec.TopologySpreadConstraints, AdditionalMetadata: cluster.Spec.AdditionalMetadata, Options: cluster.Spec.Options, - Image: cluster.Spec.Image, ImagePullSecrets: cluster.Spec.ImagePullSecrets, }, } diff --git a/internal/migrate/translate.go b/internal/migrate/translate.go index 0fab46d3..3dce42db 100644 --- a/internal/migrate/translate.go +++ b/internal/migrate/translate.go @@ -26,7 +26,6 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" lll "github.com/cozystack/etcd-operator/api/v1alpha2" - "github.com/cozystack/etcd-operator/controllers" "github.com/cozystack/etcd-operator/internal/migrate/legacy" ) @@ -171,50 +170,6 @@ func extractVersion(podSpec corev1.PodSpec, override string) (string, []string, return tag, warns, nil } -// etcdImageOverride returns the spec.image override needed to make the new -// operator pull the same etcd image the legacy podTemplate ran, or nil when -// the legacy image already matches the new operator's defaults (repository -// controllers.EtcdImage, tag "v"+version). Only the differing parts are set; -// spec.image fills the rest from those defaults. -// -// A digest reference (repo@sha256:... or repo:tag@sha256:...) carries no -// per-digest field in spec.image, but its registry/repository — the load- -// bearing part for an air-gapped mirror — is fully recoverable: strip the -// digest and continue with the repo[:tag] prefix. A missing tag then defaults -// to v via spec.image, which extractVersion already forced an -// explicit --version to supply. Dropping the whole override here (the old -// behaviour) silently repointed digest-pinned mirrors back at the operator's -// default registry and ImagePullBackOff'd — the very air-gap case this exists -// to fix. -func etcdImageOverride(image, version string) *lll.EtcdImageSpec { - if image == "" { - return nil - } - if at := strings.Index(image, "@"); at >= 0 { - image = image[:at] - } - - // Split on the last ":" only when it introduces a tag. A registry-port - // colon (registry:5000/etcd) is followed by a path component; a tag is - // not, so the presence of a "/" after the colon tells them apart. - repo, tag := image, "" - if idx := strings.LastIndex(image, ":"); idx >= 0 && !strings.Contains(image[idx+1:], "/") { - repo, tag = image[:idx], image[idx+1:] - } - - override := &lll.EtcdImageSpec{} - if repo != controllers.EtcdImage { - override.Repository = repo - } - if tag != "" && tag != "v"+version { - override.Tag = tag - } - if override.Repository == "" && override.Tag == "" { - return nil - } - return override -} - func findContainer(containers []corev1.Container, name string) *corev1.Container { for i := range containers { if containers[i].Name == name { @@ -345,15 +300,12 @@ func translatePodTemplate(pt legacy.PodTemplate, out *lll.EtcdCluster, plan *Res var dropped []string if c := findContainer(ps.Containers, "etcd"); c != nil { out.Spec.Resources = c.Resources - // Preserve a custom etcd image as spec.image so the replacement Pods - // the new operator builds pull the same reference the legacy cluster - // ran — load-bearing for air-gapped mirrors whose registry or tag - // scheme differs from the new operator's defaults. - if img := etcdImageOverride(c.Image, out.Spec.Version); img != nil { - out.Spec.Image = img - } - // Image (above), Resources are consumed here; everything else on the - // etcd container is an unmappable override. + // Image (consumed above by extractVersion → spec.version) and Resources + // are mapped; everything else on the etcd container is an unmappable + // override. The image's registry/tag is deliberately not carried: the + // operator pins the etcd image to spec.version, so a private mirror is + // repointed via the operator-wide --etcd-image-repository, not per + // cluster. for field, set := range map[string]bool{ "command": len(c.Command) > 0, "args": len(c.Args) > 0, diff --git a/internal/migrate/translate_test.go b/internal/migrate/translate_test.go index 13d40094..5549adfa 100644 --- a/internal/migrate/translate_test.go +++ b/internal/migrate/translate_test.go @@ -208,11 +208,13 @@ func TestTranslateCluster_KitchenSink(t *testing.T) { } } -// TestTranslateCluster_ImageAndPullSecrets pins the air-gap migration path: -// a custom etcd registry/tag is preserved as spec.image so replacement Pods -// pull the same reference, and pull secrets are carried into -// spec.imagePullSecrets instead of being dropped. -func TestTranslateCluster_ImageAndPullSecrets(t *testing.T) { +// TestTranslateCluster_PullSecretsCarried pins the air-gap migration path: +// a legacy podTemplate's imagePullSecrets are carried into +// spec.imagePullSecrets (they used to be dropped), so an adopted cluster keeps +// its credentials to pull from a private mirror. The etcd image's +// registry/tag is deliberately NOT carried — the operator pins the image to +// spec.version and repoints mirrors operator-wide. +func TestTranslateCluster_PullSecretsCarried(t *testing.T) { base := legacy.EtcdClusterSpec{ Storage: legacy.StorageSpec{VolumeClaimTemplate: legacy.EmbeddedPersistentVolumeClaim{ Spec: corev1.PersistentVolumeClaimSpec{Resources: corev1.VolumeResourceRequirements{ @@ -220,83 +222,20 @@ func TestTranslateCluster_ImageAndPullSecrets(t *testing.T) { }}, } - t.Run("custom registry preserved, version-matching tag elided", func(t *testing.T) { - spec := base - spec.PodTemplate.Spec.Containers = []corev1.Container{ - {Name: "etcd", Image: "registry.internal/mirror/etcd:v3.6.11"}} - spec.PodTemplate.Spec.ImagePullSecrets = []corev1.LocalObjectReference{{Name: "regcreds"}} - - plan := TranslateCluster("c", "ns", spec, TranslateOptions{}) - out := clusterTarget(t, plan) - - if out.Spec.Image == nil || out.Spec.Image.Repository != "registry.internal/mirror/etcd" { - t.Fatalf("spec.image.repository = %+v, want mirror repo", out.Spec.Image) - } - if out.Spec.Image.Tag != "" { - t.Errorf("spec.image.tag = %q, want empty (tag equals v+version, so derived)", out.Spec.Image.Tag) - } - if len(out.Spec.ImagePullSecrets) != 1 || out.Spec.ImagePullSecrets[0].Name != "regcreds" { - t.Errorf("spec.imagePullSecrets = %+v, want [regcreds]", out.Spec.ImagePullSecrets) - } - if hasWarning(plan.Warnings, "imagePullSecrets") { - t.Errorf("imagePullSecrets must no longer be dropped; warnings: %v", plan.Warnings) - } - }) - - t.Run("non-default tag scheme preserved", func(t *testing.T) { - spec := base - // Mirror uses a bare "X.Y.Z" tag (no v prefix); the operator would - // otherwise default to v3.6.11 and ImagePullBackOff against the mirror. - spec.PodTemplate.Spec.Containers = []corev1.Container{ - {Name: "etcd", Image: "registry.internal/mirror/etcd:3.6.11"}} - - out := clusterTarget(t, TranslateCluster("c", "ns", spec, TranslateOptions{})) - if out.Spec.Image == nil || out.Spec.Image.Tag != "3.6.11" { - t.Fatalf("spec.image.tag = %+v, want 3.6.11 preserved", out.Spec.Image) - } - }) - - t.Run("digest-pinned mirror preserves the registry", func(t *testing.T) { - // Legacy air-gapped clusters often pin by digest. spec.image has no - // digest field, but the mirror registry must still be carried or the - // replacement Pods ImagePullBackOff against the operator's default - // registry. extractVersion forces --version for digest refs, so the - // tag defaults to v; only the repository is recoverable. - spec := base - spec.PodTemplate.Spec.Containers = []corev1.Container{ - {Name: "etcd", Image: "registry.internal/mirror/etcd@sha256:" + strings.Repeat("d", 64)}} + spec := base + spec.PodTemplate.Spec.Containers = []corev1.Container{ + {Name: "etcd", Image: "registry.internal/mirror/etcd:v3.6.11"}} + spec.PodTemplate.Spec.ImagePullSecrets = []corev1.LocalObjectReference{{Name: "regcreds"}} - out := clusterTarget(t, TranslateCluster("c", "ns", spec, TranslateOptions{VersionOverride: "3.6.11"})) - if out.Spec.Image == nil || out.Spec.Image.Repository != "registry.internal/mirror/etcd" { - t.Fatalf("spec.image.repository = %+v, want mirror repo preserved from digest ref", out.Spec.Image) - } - if out.Spec.Image.Tag != "" { - t.Errorf("spec.image.tag = %q, want empty (digest carries no tag; defaults to v+version)", out.Spec.Image.Tag) - } - }) - - t.Run("digest-pinned upstream default yields no override", func(t *testing.T) { - // The mirror is the upstream default registry: nothing to preserve. - spec := base - spec.PodTemplate.Spec.Containers = []corev1.Container{ - {Name: "etcd", Image: "quay.io/coreos/etcd@sha256:" + strings.Repeat("d", 64)}} - - out := clusterTarget(t, TranslateCluster("c", "ns", spec, TranslateOptions{VersionOverride: "3.6.11"})) - if out.Spec.Image != nil { - t.Errorf("spec.image = %+v, want nil for a digest ref on the default registry", out.Spec.Image) - } - }) - - t.Run("upstream default image yields no override", func(t *testing.T) { - spec := base - spec.PodTemplate.Spec.Containers = []corev1.Container{ - {Name: "etcd", Image: "quay.io/coreos/etcd:v3.6.11"}} + plan := TranslateCluster("c", "ns", spec, TranslateOptions{}) + out := clusterTarget(t, plan) - out := clusterTarget(t, TranslateCluster("c", "ns", spec, TranslateOptions{})) - if out.Spec.Image != nil { - t.Errorf("spec.image = %+v, want nil for the upstream default image", out.Spec.Image) - } - }) + if len(out.Spec.ImagePullSecrets) != 1 || out.Spec.ImagePullSecrets[0].Name != "regcreds" { + t.Errorf("spec.imagePullSecrets = %+v, want [regcreds]", out.Spec.ImagePullSecrets) + } + if hasWarning(plan.Warnings, "imagePullSecrets") { + t.Errorf("imagePullSecrets must no longer be dropped; warnings: %v", plan.Warnings) + } } // TestTranslateCluster_VersionExtraction pins the image-tag → spec.version diff --git a/main.go b/main.go index 18b4919b..3d6450ba 100644 --- a/main.go +++ b/main.go @@ -135,10 +135,10 @@ func main() { "EtcdSnapshot and spec.bootstrap.restore to function.") flag.StringVar(&etcdImageRepository, "etcd-image-repository", os.Getenv("ETCD_IMAGE_REPOSITORY"), "Operator-wide default etcd image repository (registry host + path, no tag), "+ - "e.g. 'registry.internal/mirror/etcd'. Used for every member Pod whose "+ - "EtcdCluster does not set spec.image.repository — point air-gapped "+ - "deployments at a mirror once instead of per cluster. Defaults to "+ - "$ETCD_IMAGE_REPOSITORY; when empty the built-in quay.io/coreos/etcd is used.") + "e.g. 'registry.internal/mirror/etcd'. Used for every member Pod — point "+ + "air-gapped deployments at a mirror once; the tag is always v. "+ + "Defaults to $ETCD_IMAGE_REPOSITORY; when empty the built-in "+ + "quay.io/coreos/etcd is used.") opts := zap.Options{ Development: true, } diff --git a/test/e2e/image_override_test.go b/test/e2e/image_override_test.go index b3559c34..47918d3e 100644 --- a/test/e2e/image_override_test.go +++ b/test/e2e/image_override_test.go @@ -8,7 +8,6 @@ import ( "time" corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" @@ -17,38 +16,33 @@ import ( ) const ( - // imageOverrideNamespace isolates this test's clusters from the Kamaji + // imageOverrideNamespace isolates this test's cluster from the Kamaji // suite. labelCluster matches controllers.LabelCluster (kept as a literal // here, mirroring kamaji_datastore_test.go, to avoid importing the // controllers package into the e2e suite). imageOverrideNamespace = "airgap-e2e" labelCluster = "etcd-operator.cozystack.io/cluster" - // These three must stay in sync with hack/e2e.sh, which side-loads the - // upstream etcd image under both mirror names and deploys the operator - // with etcdImage.repository=operatorDefaultMirror. The version tracks + // Must stay in sync with hack/e2e.sh, which side-loads the upstream etcd + // image under operatorDefaultMirror and deploys the operator with + // etcdImage.repository=operatorDefaultMirror. The version tracks // test/e2e/testdata/02-etcdcluster.yaml. imageOverrideVersion = "3.6.11" operatorDefaultMirror = "registry.internal/mirror/etcd" - perClusterMirror = "registry.internal/percluster/etcd" ) -// TestEtcdImageOverride proves the air-gap image-override contract end to end -// against a real cluster — the thing the unit tests cannot: that the resolved -// repository actually reaches a member Pod and the member comes up pulling -// from it. +// TestEtcdImageOverride proves the air-gap contract end to end against a real +// cluster — the thing the unit tests cannot: that the operator-wide repository +// default actually reaches a member Pod and the member comes up pulling from +// it, and that spec.imagePullSecrets rides through to the Pod. // -// - Operator-wide default: a cluster with no spec.image resolves to the -// operator's etcdImage.repository (chart value -> ETCD_IMAGE_REPOSITORY env -// -> --etcd-image-repository flag -> resolveEtcdImage -> buildPod). Because -// the harness points that default at a mirror whose name differs from the -// built-in EtcdImage constant, a typo anywhere in that chain would surface -// here as the wrong (or unpullable) image. -// - Per-cluster override: a cluster with spec.image.repository outranks the -// operator default, and spec.imagePullSecrets rides through to the Pod. -// -// Both clusters reach Available, which means the kubelet actually pulled the -// mirror reference (side-loaded as IfNotPresent) and the member joined quorum. +// The operator-wide default resolves through chart value -> ETCD_IMAGE_REPOSITORY +// env -> --etcd-image-repository flag -> resolveEtcdImage -> buildPod. Because +// the harness points that default at a mirror whose name differs from the +// built-in EtcdImage constant, a typo anywhere in that chain would surface here +// as the wrong (or unpullable) image. The cluster reaching Available means the +// kubelet actually pulled the mirror reference (side-loaded as IfNotPresent) +// and the member joined quorum. func TestEtcdImageOverride(t *testing.T) { ctx := context.Background() @@ -67,89 +61,56 @@ func TestEtcdImageOverride(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: imageOverrideNamespace}}) }) - t.Run("operator-wide default reaches the member Pod", func(t *testing.T) { - const name = "etcd-default" - createImageCluster(ctx, t, name, nil, nil) - defer deleteImageCluster(ctx, t, name) - - waitFor(ctx, t, 5*time.Minute, name+" Available", - etcdClusterAvailable(imageOverrideNamespace, name)) - - img := etcdMemberImage(ctx, t, name) - if want := operatorDefaultMirror + ":v" + imageOverrideVersion; img != want { - t.Errorf("etcd member image = %q, want operator-wide mirror default %q", img, want) - } - // pullPolicy is deliberately not asserted on the live Pod: the apiserver - // defaults any fixed (non-:latest) tag to IfNotPresent, so it reads back - // as IfNotPresent regardless of whether the operator set it. The - // operator-level contract (unset when no image block, propagated when - // set) is covered by the unit tests TestResolveEtcdImage and - // TestBuildPod_ImageOverrideAndPullSecrets. - }) - - t.Run("per-cluster spec.image outranks the default", func(t *testing.T) { - const name = "etcd-percluster" - // A pull-credentials Secret in the cluster's own namespace, referenced - // by spec.imagePullSecrets. The side-loaded image needs no real pull, - // but the Secret must exist and flow through to the Pod unchanged. - pullSecret := "mirror-regcreds" - sec := &corev1.Secret{ - TypeMeta: metav1.TypeMeta{APIVersion: "v1", Kind: "Secret"}, - ObjectMeta: metav1.ObjectMeta{Name: pullSecret, Namespace: imageOverrideNamespace}, - Type: corev1.SecretTypeDockerConfigJson, - StringData: map[string]string{".dockerconfigjson": `{"auths":{}}`}, - } - if err := kube.Patch(ctx, sec, client.Apply, fieldOwner, client.ForceOwnership); err != nil { - t.Fatalf("create pull secret: %v", err) - } - - createImageCluster(ctx, t, name, - &etcdv1alpha2.EtcdImageSpec{Repository: perClusterMirror}, - []corev1.LocalObjectReference{{Name: pullSecret}}) - defer deleteImageCluster(ctx, t, name) - - waitFor(ctx, t, 5*time.Minute, name+" Available", - etcdClusterAvailable(imageOverrideNamespace, name)) - - img := etcdMemberImage(ctx, t, name) - if want := perClusterMirror + ":v" + imageOverrideVersion; img != want { - t.Errorf("etcd member image = %q, want per-cluster mirror %q", img, want) - } - - // The pull-secret passthrough (spec.imagePullSecrets -> member Pod) is - // the genuinely e2e-only half of the contract. - pod := etcdMemberPod(ctx, t, name) - if len(pod.Spec.ImagePullSecrets) != 1 || pod.Spec.ImagePullSecrets[0].Name != pullSecret { - t.Errorf("pod imagePullSecrets = %+v, want [%s]", pod.Spec.ImagePullSecrets, pullSecret) - } - }) -} + // A pull-credentials Secret in the cluster's own namespace, referenced by + // spec.imagePullSecrets. The side-loaded image needs no real pull, but the + // Secret must exist and flow through to the Pod unchanged. + const name, pullSecret = "etcd-airgap", "mirror-regcreds" + sec := &corev1.Secret{ + TypeMeta: metav1.TypeMeta{APIVersion: "v1", Kind: "Secret"}, + ObjectMeta: metav1.ObjectMeta{Name: pullSecret, Namespace: imageOverrideNamespace}, + Type: corev1.SecretTypeDockerConfigJson, + StringData: map[string]string{".dockerconfigjson": `{"auths":{}}`}, + } + if err := kube.Patch(ctx, sec, client.Apply, fieldOwner, client.ForceOwnership); err != nil { + t.Fatalf("create pull secret: %v", err) + } -// createImageCluster applies a minimal plaintext single-member EtcdCluster. -func createImageCluster(ctx context.Context, t *testing.T, name string, - image *etcdv1alpha2.EtcdImageSpec, pullSecrets []corev1.LocalObjectReference) { - t.Helper() one := int32(1) ec := &etcdv1alpha2.EtcdCluster{ ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: imageOverrideNamespace}, Spec: etcdv1alpha2.EtcdClusterSpec{ - Replicas: &one, - Version: imageOverrideVersion, - Storage: etcdv1alpha2.StorageSpec{Size: resource.MustParse("1Gi")}, - Image: image, + Replicas: &one, + Version: imageOverrideVersion, + Storage: etcdv1alpha2.StorageSpec{Size: resource.MustParse("1Gi")}, + ImagePullSecrets: []corev1.LocalObjectReference{{Name: pullSecret}}, }, } - ec.Spec.ImagePullSecrets = pullSecrets if err := kube.Create(ctx, ec); err != nil { t.Fatalf("create EtcdCluster %s: %v", name, err) } -} + t.Cleanup(func() { + _ = kube.Delete(context.Background(), &etcdv1alpha2.EtcdCluster{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: imageOverrideNamespace}}) + }) -func deleteImageCluster(ctx context.Context, t *testing.T, name string) { - t.Helper() - ec := &etcdv1alpha2.EtcdCluster{ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: imageOverrideNamespace}} - if err := kube.Delete(ctx, ec); err != nil && !apierrors.IsNotFound(err) { - t.Errorf("delete EtcdCluster %s: %v", name, err) + waitFor(ctx, t, 5*time.Minute, name+" Available", etcdClusterAvailable(imageOverrideNamespace, name)) + + pod := etcdMemberPod(ctx, t, name) + + // Operator-wide repository default reaches the member Pod's etcd container. + var etcdImage string + for _, c := range pod.Spec.Containers { + if c.Name == "etcd" { + etcdImage = c.Image + } + } + if want := operatorDefaultMirror + ":v" + imageOverrideVersion; etcdImage != want { + t.Errorf("etcd member image = %q, want operator-wide mirror default %q", etcdImage, want) + } + + // Pull-secret passthrough (spec.imagePullSecrets -> member Pod). + if len(pod.Spec.ImagePullSecrets) != 1 || pod.Spec.ImagePullSecrets[0].Name != pullSecret { + t.Errorf("pod imagePullSecrets = %+v, want [%s]", pod.Spec.ImagePullSecrets, pullSecret) } } @@ -166,17 +127,3 @@ func etcdMemberPod(ctx context.Context, t *testing.T, cluster string) *corev1.Po } return &pods.Items[0] } - -// etcdMemberImage returns the etcd container's image from a member Pod of the -// named cluster. -func etcdMemberImage(ctx context.Context, t *testing.T, cluster string) string { - t.Helper() - pod := etcdMemberPod(ctx, t, cluster) - for _, c := range pod.Spec.Containers { - if c.Name == "etcd" { - return c.Image - } - } - t.Fatalf("pod %s has no etcd container", pod.Name) - return "" // unreachable -}