diff --git a/Makefile b/Makefile index f7829447..8dcd2bcb 100644 --- a/Makefile +++ b/Makefile @@ -192,6 +192,7 @@ deploy: manifests require-helm ## Install/upgrade the operator (CRDs + RBAC + ma img='$(IMG)'; $(HELM) upgrade --install $(HELM_RELEASE) charts/etcd-operator \ --namespace $(NAMESPACE) --create-namespace \ --set image.repository="$${img%:*}" --set image.tag="$${img##*:}" \ + $(HELM_EXTRA_ARGS) \ --wait --timeout 5m .PHONY: undeploy diff --git a/api/v1alpha2/etcdcluster_types.go b/api/v1alpha2/etcdcluster_types.go index 67eba1c4..8bdd5253 100644 --- a/api/v1alpha2/etcdcluster_types.go +++ b/api/v1alpha2/etcdcluster_types.go @@ -558,6 +558,17 @@ type EtcdClusterSpec struct { // tuning change in place. // +optional Options *EtcdOptions `json:"options,omitempty"` + + // ImagePullSecrets is a list of Secret references in the cluster's + // namespace used to pull the etcd (and restore initContainer) image from + // a private registry — e.g. an air-gapped mirror behind credentials. + // Passed straight through to each member Pod's spec.imagePullSecrets. + // + // Changes take effect on newly-created members (scale-up, replacement); + // the operator does not roll existing Pods. Latched through + // status.observed. + // +optional + ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"` } // AdditionalMetadata is a set of labels and annotations the operator merges @@ -624,6 +635,11 @@ type ObservedClusterSpec struct { // reached. // +optional Options *EtcdOptions `json:"options,omitempty"` + + // ImagePullSecrets is the locked target pull-secret list for member + // Pods. Latched with the rest of the target spec. + // +optional + ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"` } // EtcdClusterStatus defines the observed state of an etcd cluster. diff --git a/api/v1alpha2/etcdmember_types.go b/api/v1alpha2/etcdmember_types.go index b6c737a7..7c50fbd3 100644 --- a/api/v1alpha2/etcdmember_types.go +++ b/api/v1alpha2/etcdmember_types.go @@ -116,6 +116,12 @@ type EtcdMemberSpec struct { // +optional Options *EtcdOptions `json:"options,omitempty"` + // ImagePullSecrets mirrors EtcdCluster.spec.imagePullSecrets at the time + // this member was created. Passed straight to the Pod's + // spec.imagePullSecrets at build time. + // +optional + ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"` + // Bootstrap indicates this member is part of the initial cluster formation. // When true the member starts with --initial-cluster-state=new. // +optional diff --git a/api/v1alpha2/zz_generated.deepcopy.go b/api/v1alpha2/zz_generated.deepcopy.go index 47b13ce8..a3a5794e 100644 --- a/api/v1alpha2/zz_generated.deepcopy.go +++ b/api/v1alpha2/zz_generated.deepcopy.go @@ -257,6 +257,11 @@ func (in *EtcdClusterSpec) DeepCopyInto(out *EtcdClusterSpec) { *out = new(EtcdOptions) (*in).DeepCopyInto(*out) } + if in.ImagePullSecrets != nil { + in, out := &in.ImagePullSecrets, &out.ImagePullSecrets + *out = make([]v1.LocalObjectReference, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EtcdClusterSpec. @@ -411,6 +416,11 @@ func (in *EtcdMemberSpec) DeepCopyInto(out *EtcdMemberSpec) { *out = new(EtcdOptions) (*in).DeepCopyInto(*out) } + if in.ImagePullSecrets != nil { + in, out := &in.ImagePullSecrets, &out.ImagePullSecrets + *out = make([]v1.LocalObjectReference, len(*in)) + copy(*out, *in) + } if in.TLS != nil { in, out := &in.TLS, &out.TLS *out = new(EtcdMemberTLS) @@ -650,6 +660,11 @@ func (in *ObservedClusterSpec) DeepCopyInto(out *ObservedClusterSpec) { *out = new(EtcdOptions) (*in).DeepCopyInto(*out) } + if in.ImagePullSecrets != nil { + in, out := &in.ImagePullSecrets, &out.ImagePullSecrets + *out = make([]v1.LocalObjectReference, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ObservedClusterSpec. diff --git a/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdclusters.yaml b/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdclusters.yaml index 6be40900..ed4d78ad 100644 --- a/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdclusters.yaml +++ b/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdclusters.yaml @@ -1166,6 +1166,33 @@ spec: rule: '!has(self.source.pvc) || (has(self.source.pvc.subPath) && size(self.source.pvc.subPath) > 0)' type: object + imagePullSecrets: + description: |- + ImagePullSecrets is a list of Secret references in the cluster's + namespace used to pull the etcd (and restore initContainer) image from + a private registry — e.g. an air-gapped mirror behind credentials. + Passed straight through to each member Pod's spec.imagePullSecrets. + + Changes take effect on newly-created members (scale-up, replacement); + the operator does not roll existing Pods. Latched through + status.observed. + items: + description: |- + LocalObjectReference contains enough information to let you locate the + referenced object inside the same namespace. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + type: array options: description: |- Options carries etcd server tuning flags (backend quota, @@ -2856,6 +2883,27 @@ spec: x-kubernetes-list-type: atomic type: object type: object + imagePullSecrets: + description: |- + ImagePullSecrets is the locked target pull-secret list for member + Pods. Latched with the rest of the target spec. + items: + description: |- + LocalObjectReference contains enough information to let you locate the + referenced object inside the same namespace. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + type: array options: description: |- Options is the locked target etcd tuning flags for member Pods. diff --git a/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdmembers.yaml b/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdmembers.yaml index 7d47d313..36c945ab 100644 --- a/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdmembers.yaml +++ b/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdmembers.yaml @@ -1020,6 +1020,28 @@ spec: the same ClusterID and member ID. While dormant, the member does not count toward the EtcdCluster's `current` replica accounting. type: boolean + imagePullSecrets: + description: |- + ImagePullSecrets mirrors EtcdCluster.spec.imagePullSecrets at the time + this member was created. Passed straight to the Pod's + spec.imagePullSecrets at build time. + items: + description: |- + LocalObjectReference contains enough information to let you locate the + referenced object inside the same namespace. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + type: array initialCluster: description: |- InitialCluster is the value passed to etcd's --initial-cluster flag. diff --git a/charts/etcd-operator/templates/deployment.yaml b/charts/etcd-operator/templates/deployment.yaml index 69bbb487..feb4eecd 100644 --- a/charts/etcd-operator/templates/deployment.yaml +++ b/charts/etcd-operator/templates/deployment.yaml @@ -53,6 +53,12 @@ spec: # is left at a placeholder. - name: OPERATOR_IMAGE value: {{ include "etcd-operator.image" . }} + # Operator-wide default etcd image repository for member Pods. Always + # set, so the operator's built-in fallback is only reached when the + # binary runs outside this chart. Repoint here for an air-gapped mirror; + # the tag is always v. + - name: ETCD_IMAGE_REPOSITORY + value: {{ .Values.etcdImage.repository | quote }} livenessProbe: httpGet: path: /healthz diff --git a/charts/etcd-operator/values.yaml b/charts/etcd-operator/values.yaml index fb68d85c..806ca27d 100644 --- a/charts/etcd-operator/values.yaml +++ b/charts/etcd-operator/values.yaml @@ -29,6 +29,28 @@ image: # -- Image pull policy. pullPolicy: IfNotPresent +# Operator-wide default for the etcd image that runs in member Pods (NOT the +# operator's own image above). Set the repository here to point every cluster +# at an air-gapped mirror once. Per-cluster pull credentials go on an +# EtcdCluster's spec.imagePullSecrets (a Secret in the cluster's own +# namespace), not here. +etcdImage: + # -- Default etcd image repository (registry host + path, no tag) for member + # Pods. Repoint at an air-gapped mirror, e.g. registry.internal/mirror/etcd. + # The chart always wires this into the operator's ETCD_IMAGE_REPOSITORY. + # + # Only the repository is configurable; the tag is always derived from each + # cluster's spec.version as "v" (e.g. v3.6.11). There is no + # per-cluster repository/tag override — the operator keys all + # version-dependent behaviour off spec.version, so a separate tag could + # silently disagree with it. + # + # Keep this in sync with the controllers.EtcdImage constant — the operator's + # built-in fallback used only when this env is unset (outside the chart). The + # chart always sets ETCD_IMAGE_REPOSITORY, so a drift between the two is + # harmless here, but bump both together to keep the no-chart default honest. + repository: quay.io/coreos/etcd + # -- Number of operator replicas (leader election picks the active one). replicaCount: 1 diff --git a/controllers/etcdcluster_controller.go b/controllers/etcdcluster_controller.go index 93ec54ea..32ab7632 100644 --- a/controllers/etcdcluster_controller.go +++ b/controllers/etcdcluster_controller.go @@ -430,6 +430,7 @@ func (r *EtcdClusterReconciler) bootstrap( Affinity: cluster.Status.Observed.Affinity, TopologySpreadConstraints: cluster.Status.Observed.TopologySpreadConstraints, Options: cluster.Status.Observed.Options, + ImagePullSecrets: cluster.Status.Observed.ImagePullSecrets, Bootstrap: true, ClusterToken: cluster.Status.ClusterToken, TLS: deriveMemberTLS(cluster), @@ -834,6 +835,7 @@ func (r *EtcdClusterReconciler) scaleUp( Affinity: cluster.Status.Observed.Affinity, TopologySpreadConstraints: cluster.Status.Observed.TopologySpreadConstraints, Options: cluster.Status.Observed.Options, + ImagePullSecrets: cluster.Status.Observed.ImagePullSecrets, Bootstrap: false, ClusterToken: cluster.Status.ClusterToken, TLS: deriveMemberTLS(cluster), @@ -2028,6 +2030,7 @@ func snapshotSpecIntoObserved(cluster *lll.EtcdCluster) { TopologySpreadConstraints: cluster.Spec.TopologySpreadConstraints, AdditionalMetadata: cluster.Spec.AdditionalMetadata, Options: cluster.Spec.Options, + ImagePullSecrets: cluster.Spec.ImagePullSecrets, } } @@ -2048,7 +2051,8 @@ func specEqualsObserved(cluster *lll.EtcdCluster) bool { equality.Semantic.DeepEqual(o.Affinity, cluster.Spec.Affinity) && equality.Semantic.DeepEqual(o.TopologySpreadConstraints, cluster.Spec.TopologySpreadConstraints) && equality.Semantic.DeepEqual(o.AdditionalMetadata, cluster.Spec.AdditionalMetadata) && - equality.Semantic.DeepEqual(o.Options, cluster.Spec.Options) + equality.Semantic.DeepEqual(o.Options, cluster.Spec.Options) && + equality.Semantic.DeepEqual(o.ImagePullSecrets, cluster.Spec.ImagePullSecrets) } // observedAdditionalMetadata returns the latched additionalMetadata target diff --git a/controllers/etcdmember_controller.go b/controllers/etcdmember_controller.go index fe2274ac..6af23952 100644 --- a/controllers/etcdmember_controller.go +++ b/controllers/etcdmember_controller.go @@ -49,6 +49,13 @@ type EtcdMemberReconciler struct { // OperatorImage is the operator's own image; the restore agent runs from // it as an initContainer on the bootstrap seed Pod. OperatorImage string + + // EtcdImageRepository is the operator-wide default etcd image repository + // (registry host + path, no tag) used for member Pods whose EtcdCluster + // does not set spec.image.repository. Empty falls back to the EtcdImage + // built-in. Set from --etcd-image-repository / ETCD_IMAGE_REPOSITORY; the + // common use is pointing every cluster at an air-gapped mirror once. + EtcdImageRepository string } //+kubebuilder:rbac:groups=etcd-operator.cozystack.io,resources=etcdmembers,verbs=get;list;watch;update;patch @@ -751,6 +758,8 @@ func (r *EtcdMemberReconciler) buildPod(member *lll.EtcdMember) *corev1.Pod { volumes = append(volumes, extraVols...) } + etcdImage := resolveEtcdImage(member, r.EtcdImageRepository) + return &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: member.Name, @@ -763,6 +772,7 @@ func (r *EtcdMemberReconciler) buildPod(member *lll.EtcdMember) *corev1.Pod { Subdomain: memberServiceName(member), Affinity: member.Spec.Affinity, TopologySpreadConstraints: member.Spec.TopologySpreadConstraints, + ImagePullSecrets: member.Spec.ImagePullSecrets, InitContainers: initContainers, // etcd and the restore agent never call the Kubernetes API, so // don't mount a ServiceAccount token into the member Pod (matches @@ -779,7 +789,7 @@ func (r *EtcdMemberReconciler) buildPod(member *lll.EtcdMember) *corev1.Pod { }, Containers: []corev1.Container{{ Name: "etcd", - Image: fmt.Sprintf("%s:v%s", EtcdImage, member.Spec.Version), + Image: etcdImage, SecurityContext: &corev1.SecurityContext{ AllowPrivilegeEscalation: ptrBool(false), Capabilities: &corev1.Capabilities{ diff --git a/controllers/etcdmember_controller_test.go b/controllers/etcdmember_controller_test.go index 69fd0565..8fc2ab8a 100644 --- a/controllers/etcdmember_controller_test.go +++ b/controllers/etcdmember_controller_test.go @@ -1016,6 +1016,37 @@ func TestBuildPod_LivenessIsNotQuorumAware(t *testing.T) { } } +// TestBuildPod_ImageRepoAndPullSecrets covers the air-gap path: buildPod +// resolves the etcd image against the operator-wide default repository (pinned +// to spec.version) and stamps the member's imagePullSecrets onto the Pod. +func TestBuildPod_ImageRepoAndPullSecrets(t *testing.T) { + t.Run("operator default repo, version-derived tag", func(t *testing.T) { + r := &EtcdMemberReconciler{EtcdImageRepository: "registry.internal/mirror/etcd"} + pod := r.buildPod(&lll.EtcdMember{ + ObjectMeta: metav1.ObjectMeta{Name: "test-0", Namespace: "ns"}, + Spec: lll.EtcdMemberSpec{ClusterName: "test", Version: "3.6.11"}, + }) + if got := pod.Spec.Containers[0].Image; got != "registry.internal/mirror/etcd:v3.6.11" { + t.Errorf("image = %q, want operator-default mirror", got) + } + }) + + t.Run("pull secrets are stamped onto the Pod", func(t *testing.T) { + r := &EtcdMemberReconciler{EtcdImageRepository: "registry.internal/mirror/etcd"} + pod := r.buildPod(&lll.EtcdMember{ + ObjectMeta: metav1.ObjectMeta{Name: "test-0", Namespace: "ns"}, + Spec: lll.EtcdMemberSpec{ + ClusterName: "test", + Version: "3.6.11", + ImagePullSecrets: []corev1.LocalObjectReference{{Name: "regcreds"}}, + }, + }) + if len(pod.Spec.ImagePullSecrets) != 1 || pod.Spec.ImagePullSecrets[0].Name != "regcreds" { + t.Errorf("pod.imagePullSecrets = %+v, want [regcreds]", pod.Spec.ImagePullSecrets) + } + }) +} + // TestBuildPod_AppliesSchedulingAndMetadata covers the additionalMetadata, // affinity, and topologySpreadConstraints passthrough: buildPod must stamp // the Pod with the member's scheduling fields and merge the extra diff --git a/controllers/helpers.go b/controllers/helpers.go index a67269ce..087038a7 100644 --- a/controllers/helpers.go +++ b/controllers/helpers.go @@ -23,7 +23,11 @@ const ( LabelRole = "etcd-operator.cozystack.io/role" RoleVoter = "voter" - // EtcdImage is the container image repository for etcd. + // EtcdImage is the built-in fallback etcd image repository (registry + // host + path, no tag). It is used when the operator-wide + // --etcd-image-repository / ETCD_IMAGE_REPOSITORY default is unset. See + // resolveEtcdImage. Keep in sync with the chart's etcdImage.repository + // default in charts/etcd-operator/values.yaml. EtcdImage = "quay.io/coreos/etcd" // MemberFinalizer is placed on EtcdMember resources to ensure @@ -108,6 +112,20 @@ func memberDataDir(member *lll.EtcdMember) string { return path.Join(etcdDataDirRoot, sub) } +// resolveEtcdImage resolves a member's etcd container image from the +// operator-wide repository default (defaultRepo, from the operator's +// --etcd-image-repository flag) or the EtcdImage built-in when that is empty, +// tagged with "v"+spec.version. The operator keys every version-dependent +// behaviour off spec.version, so the image is always pinned to that version — +// there is no per-cluster tag override that could disagree with it. +func resolveEtcdImage(member *lll.EtcdMember, defaultRepo string) string { + repo := defaultRepo + if repo == "" { + repo = EtcdImage + } + return repo + ":v" + member.Spec.Version +} + // peerURL returns the etcd peer URL for a member, using the headless Service // DNS. `service` is the headless Service name the member resolves under — // resolve it per-member via memberServiceName (the cluster's own name by diff --git a/controllers/helpers_test.go b/controllers/helpers_test.go index 5457d165..e594908b 100644 --- a/controllers/helpers_test.go +++ b/controllers/helpers_test.go @@ -141,3 +141,38 @@ func TestMemberEndpoints_PerMemberServiceName(t *testing.T) { } } } + +// TestResolveEtcdImage pins the operator-wide repository resolution: the +// --etcd-image-repository default (or the EtcdImage built-in when unset), +// always tagged "v"+spec.version. +func TestResolveEtcdImage(t *testing.T) { + member := func(version string) *lll.EtcdMember { + return &lll.EtcdMember{Spec: lll.EtcdMemberSpec{Version: version}} + } + + cases := []struct { + name string + member *lll.EtcdMember + defaultRepo string + wantImage string + }{ + { + name: "no operator default → built-in repo + v", + member: member("3.6.11"), + wantImage: EtcdImage + ":v3.6.11", + }, + { + name: "operator default repo, version-derived tag", + member: member("3.6.11"), + defaultRepo: "registry.internal/mirror/etcd", + wantImage: "registry.internal/mirror/etcd:v3.6.11", + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if img := resolveEtcdImage(tc.member, tc.defaultRepo); img != tc.wantImage { + t.Errorf("image = %q, want %q", img, tc.wantImage) + } + }) + } +} diff --git a/docs/installation.md b/docs/installation.md index 73d3378b..bcf2a932 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -97,7 +97,8 @@ Common values (`--set key=value`, or a `-f my-values.yaml`): | `metrics.serviceMonitor.enabled` | `false` | Create a prometheus-operator `ServiceMonitor` for the metrics endpoint (needs the `monitoring.coreos.com` CRDs and `kubeRbacProxy.enabled`). | | `crds.enabled` / `crds.keep` | `true` / `true` | Render the CRDs with the release / annotate them so uninstall keeps them. | | `manager.resources` | 10m/64Mi → 500m/128Mi | Manager container requests/limits. | -| `imagePullSecrets` | `[]` | Pull secrets for a private registry mirror. | +| `imagePullSecrets` | `[]` | Pull secrets for the **operator's own** image (private registry mirror). | +| `etcdImage.repository` | `quay.io/coreos/etcd` | Operator-wide default **etcd** image repo for member Pods (always wired into `ETCD_IMAGE_REPOSITORY`). Repoint at an air-gapped mirror once; the tag is always `v`. | See `charts/etcd-operator/values.yaml` for the complete, annotated list. Verify the install: @@ -243,7 +244,21 @@ The `spec.tls` subtree is immutable post-create — flipping TLS on or off on an ## Image versions -`spec.version` in an `EtcdCluster` becomes `quay.io/coreos/etcd:v`. The image repository is hard-coded in `controllers/helpers.go:EtcdImage`. Override it by patching the operator image with your own registry/repo if you mirror etcd internally. +By default `spec.version` in an `EtcdCluster` becomes `quay.io/coreos/etcd:v`. For an air-gapped environment that mirrors the image to a private registry, repoint the **repository** operator-wide and supply per-cluster pull credentials: + +- **Repository (operator-wide)** — set `etcdImage.repository` in the chart (env `ETCD_IMAGE_REPOSITORY` / flag `--etcd-image-repository`) to a registry/path, e.g. `registry.internal/mirror/etcd`. Every member Pod the operator creates pulls from it; the tag is always `v`. Mirror once per fleet — there is intentionally no per-cluster repository/tag override, because the operator keys every version-dependent behaviour (the restore version-compat pre-flight, the latched target, drift detection) off `spec.version`, and a per-cluster `tag` could silently disagree with it. +- **Pull credentials (per-cluster)** — `spec.imagePullSecrets` on an `EtcdCluster`: + + ```yaml + spec: + version: "3.6.11" + imagePullSecrets: + - name: regcreds # Secret in the cluster's namespace + ``` + + It references pull-credential Secrets in the cluster's own namespace and is passed straight through to each member Pod. Like `spec.resources`, a change applies to **newly-created** members (scale-up, replacement), not existing Pods in place. + + `spec.imagePullSecrets` is set Pod-wide, so it **does** cover the member Pod's restore initContainer (which runs the operator image at bootstrap-from-snapshot) — a `spec.bootstrap.restore` can pull the mirrored operator image via these secrets. Standalone `EtcdSnapshot` backup/restore **Jobs** also run the operator image but are **not** covered by `spec.imagePullSecrets`: in a fully air-gapped install repoint the operator image (chart `image.repository`) and make sure the snapshot's namespace can already pull it, or those Jobs `ImagePullBackOff`. The operator Pod itself uses the chart-level `imagePullSecrets`. The `spec.version` examples throughout these docs use **3.6.x**, to match the `etcdutl` bundled in the operator image: `spec.bootstrap.restore` requires `spec.version` and that `etcdutl` to share a minor (see the [restore runbook](operations.md#restoring-a-cluster-from-a-snapshot)). The operator's etcd client is v3.6.x and is wire-compatible with 3.5.x servers, so a cluster you never restore into can still run 3.5.x — but to back up and restore on the same version, run 3.6.x. diff --git a/docs/migration.md b/docs/migration.md index 41bbf308..d5e2bcb9 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -147,7 +147,13 @@ What gets migrated: Every legacy knob with no v1alpha2 equivalent (`spec.options` keys beyond the [four typed ones](#specoptions-free-form-map--typed-fields), service/PDB templates, podTemplate overrides beyond affinity/topology-spread/resources/ -metadata) is reported as a warning — review them before `--apply`. Hard +metadata) is reported as a warning — review them before `--apply`. +`podTemplate.spec.imagePullSecrets` is **carried** into `spec.imagePullSecrets` +(it used to be dropped) — so an air-gapped cluster keeps its credentials to pull +from its mirror after the operator rolls a replacement Pod. (The etcd image's +registry/tag is not carried; repoint the mirror operator-wide via +`--etcd-image-repository`, since the operator pins the image to `spec.version`.) +Hard blockers (`emptyDir` storage — nothing to adopt, an unparsable etcd image tag without `--version`, `enableAuth` without server TLS, a non-integer `quota-backend-bytes`/`snapshot-count`, a failed inspection) skip that diff --git a/hack/e2e.sh b/hack/e2e.sh index dcb2ffb5..34d14814 100755 --- a/hack/e2e.sh +++ b/hack/e2e.sh @@ -109,9 +109,30 @@ helm upgrade --install kamaji clastix/kamaji \ echo "--- building and deploying the operator ($IMG)" docker build -t "$IMG" . kind load docker-image "$IMG" --name "$KIND_CLUSTER_NAME" + +# Air-gap image-repository coverage (TestEtcdImageOverride). The mirror +# registry below never resolves over the network, so re-tag the upstream etcd +# image under that name and side-load it into the node. With the kubelet's +# default IfNotPresent policy for a fixed tag it uses the locally-present image +# and never dials registry.internal — exactly how a private air-gapped mirror +# behaves, but with no registry to stand up. +# +# The tag must track test/e2e/testdata/02-etcdcluster.yaml's spec.version +# (operator pulls "v"); the override test pins the same. +ETCD_UPSTREAM=quay.io/coreos/etcd:v3.6.11 +OPERATOR_DEFAULT_MIRROR=registry.internal/mirror/etcd +echo "--- side-loading the mirrored etcd image for the air-gap repository test" +docker pull "$ETCD_UPSTREAM" +docker tag "$ETCD_UPSTREAM" "$OPERATOR_DEFAULT_MIRROR:v3.6.11" +kind load docker-image "$OPERATOR_DEFAULT_MIRROR:v3.6.11" --name "$KIND_CLUSTER_NAME" + # Helm install: CRDs are templated into the release and image == OPERATOR_IMAGE # is wired by the chart, so this one command lands CRDs + RBAC + manager. -make deploy IMG="$IMG" +# etcdImage.repository points the operator-wide default at the mirror: this is +# what exercises the chart-value -> ETCD_IMAGE_REPOSITORY env -> flag -> +# resolveEtcdImage -> member Pod chain (the value differs from the built-in +# EtcdImage constant, so a typo anywhere in that chain is caught). +make deploy IMG="$IMG" HELM_EXTRA_ARGS="--set etcdImage.repository=$OPERATOR_DEFAULT_MIRROR" # Select by the chart's control-plane label rather than a fixed Deployment name. kubectl -n etcd-operator-system wait deploy \ -l control-plane=controller-manager \ diff --git a/internal/migrate/adopt.go b/internal/migrate/adopt.go index 66deed13..7410ecee 100644 --- a/internal/migrate/adopt.go +++ b/internal/migrate/adopt.go @@ -279,6 +279,7 @@ func BuildAdoption(name, namespace string, spec legacy.EtcdClusterSpec, facts Cl Affinity: cluster.Spec.Affinity, TopologySpreadConstraints: cluster.Spec.TopologySpreadConstraints, Options: cluster.Spec.Options, + ImagePullSecrets: cluster.Spec.ImagePullSecrets, Bootstrap: false, InitialCluster: initialCluster, ClusterToken: token, @@ -311,6 +312,7 @@ func BuildAdoption(name, namespace string, spec legacy.EtcdClusterSpec, facts Cl TopologySpreadConstraints: cluster.Spec.TopologySpreadConstraints, AdditionalMetadata: cluster.Spec.AdditionalMetadata, Options: cluster.Spec.Options, + ImagePullSecrets: cluster.Spec.ImagePullSecrets, }, } plan.Adoption.StatefulSetName = name diff --git a/internal/migrate/translate.go b/internal/migrate/translate.go index 85e47dcb..3dce42db 100644 --- a/internal/migrate/translate.go +++ b/internal/migrate/translate.go @@ -290,11 +290,22 @@ func translatePodTemplate(pt legacy.PodTemplate, out *lll.EtcdCluster, plan *Res out.Spec.Affinity = ps.Affinity out.Spec.TopologySpreadConstraints = ps.TopologySpreadConstraints + // Carry pull secrets so the new operator can still pull from a private + // (e.g. air-gapped) registry. v1alpha2 grew spec.imagePullSecrets, so + // this is no longer dropped. + if len(ps.ImagePullSecrets) > 0 { + out.Spec.ImagePullSecrets = ps.ImagePullSecrets + } + var dropped []string if c := findContainer(ps.Containers, "etcd"); c != nil { out.Spec.Resources = c.Resources - // Image and Resources are consumed above; everything else on the - // etcd container is an unmappable override. + // Image (consumed above by extractVersion → spec.version) and Resources + // are mapped; everything else on the etcd container is an unmappable + // override. The image's registry/tag is deliberately not carried: the + // operator pins the etcd image to spec.version, so a private mirror is + // repointed via the operator-wide --etcd-image-repository, not per + // cluster. for field, set := range map[string]bool{ "command": len(c.Command) > 0, "args": len(c.Args) > 0, @@ -325,7 +336,6 @@ func translatePodTemplate(pt legacy.PodTemplate, out *lll.EtcdCluster, plan *Res "serviceAccountName": ps.ServiceAccountName != "", "securityContext": ps.SecurityContext != nil && !equality.Semantic.DeepEqual(*ps.SecurityContext, corev1.PodSecurityContext{}), "priorityClassName": ps.PriorityClassName != "", - "imagePullSecrets": len(ps.ImagePullSecrets) > 0, "hostNetwork": ps.HostNetwork, "hostAliases": len(ps.HostAliases) > 0, "dnsPolicy": ps.DNSPolicy != "", diff --git a/internal/migrate/translate_test.go b/internal/migrate/translate_test.go index 23be94ba..5549adfa 100644 --- a/internal/migrate/translate_test.go +++ b/internal/migrate/translate_test.go @@ -208,6 +208,36 @@ func TestTranslateCluster_KitchenSink(t *testing.T) { } } +// TestTranslateCluster_PullSecretsCarried pins the air-gap migration path: +// a legacy podTemplate's imagePullSecrets are carried into +// spec.imagePullSecrets (they used to be dropped), so an adopted cluster keeps +// its credentials to pull from a private mirror. The etcd image's +// registry/tag is deliberately NOT carried — the operator pins the image to +// spec.version and repoints mirrors operator-wide. +func TestTranslateCluster_PullSecretsCarried(t *testing.T) { + base := legacy.EtcdClusterSpec{ + Storage: legacy.StorageSpec{VolumeClaimTemplate: legacy.EmbeddedPersistentVolumeClaim{ + Spec: corev1.PersistentVolumeClaimSpec{Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: qty(t, "1Gi")}}}, + }}, + } + + spec := base + spec.PodTemplate.Spec.Containers = []corev1.Container{ + {Name: "etcd", Image: "registry.internal/mirror/etcd:v3.6.11"}} + spec.PodTemplate.Spec.ImagePullSecrets = []corev1.LocalObjectReference{{Name: "regcreds"}} + + plan := TranslateCluster("c", "ns", spec, TranslateOptions{}) + out := clusterTarget(t, plan) + + if len(out.Spec.ImagePullSecrets) != 1 || out.Spec.ImagePullSecrets[0].Name != "regcreds" { + t.Errorf("spec.imagePullSecrets = %+v, want [regcreds]", out.Spec.ImagePullSecrets) + } + if hasWarning(plan.Warnings, "imagePullSecrets") { + t.Errorf("imagePullSecrets must no longer be dropped; warnings: %v", plan.Warnings) + } +} + // TestTranslateCluster_VersionExtraction pins the image-tag → spec.version // rules across default, override, and unparsable images. func TestTranslateCluster_VersionExtraction(t *testing.T) { diff --git a/main.go b/main.go index 1aac2b56..3d6450ba 100644 --- a/main.go +++ b/main.go @@ -114,6 +114,7 @@ func main() { var probeAddr string var clusterDomain string var operatorImage string + var etcdImageRepository string flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") flag.BoolVar(&enableLeaderElection, "leader-elect", false, @@ -132,6 +133,12 @@ func main() { "Operator image reference. The snapshot/restore agents run from this same "+ "image (Job / initContainer). Defaults to $OPERATOR_IMAGE; required for "+ "EtcdSnapshot and spec.bootstrap.restore to function.") + flag.StringVar(&etcdImageRepository, "etcd-image-repository", os.Getenv("ETCD_IMAGE_REPOSITORY"), + "Operator-wide default etcd image repository (registry host + path, no tag), "+ + "e.g. 'registry.internal/mirror/etcd'. Used for every member Pod — point "+ + "air-gapped deployments at a mirror once; the tag is always v. "+ + "Defaults to $ETCD_IMAGE_REPOSITORY; when empty the built-in "+ + "quay.io/coreos/etcd is used.") opts := zap.Options{ Development: true, } @@ -204,9 +211,10 @@ func main() { os.Exit(1) } if err = (&controllers.EtcdMemberReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - OperatorImage: operatorImage, + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + OperatorImage: operatorImage, + EtcdImageRepository: etcdImageRepository, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "EtcdMember") os.Exit(1) diff --git a/test/e2e/image_override_test.go b/test/e2e/image_override_test.go new file mode 100644 index 00000000..47918d3e --- /dev/null +++ b/test/e2e/image_override_test.go @@ -0,0 +1,129 @@ +//go:build e2e + +package e2e + +import ( + "context" + "testing" + "time" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + etcdv1alpha2 "github.com/cozystack/etcd-operator/api/v1alpha2" +) + +const ( + // imageOverrideNamespace isolates this test's cluster from the Kamaji + // suite. labelCluster matches controllers.LabelCluster (kept as a literal + // here, mirroring kamaji_datastore_test.go, to avoid importing the + // controllers package into the e2e suite). + imageOverrideNamespace = "airgap-e2e" + labelCluster = "etcd-operator.cozystack.io/cluster" + + // Must stay in sync with hack/e2e.sh, which side-loads the upstream etcd + // image under operatorDefaultMirror and deploys the operator with + // etcdImage.repository=operatorDefaultMirror. The version tracks + // test/e2e/testdata/02-etcdcluster.yaml. + imageOverrideVersion = "3.6.11" + operatorDefaultMirror = "registry.internal/mirror/etcd" +) + +// TestEtcdImageOverride proves the air-gap contract end to end against a real +// cluster — the thing the unit tests cannot: that the operator-wide repository +// default actually reaches a member Pod and the member comes up pulling from +// it, and that spec.imagePullSecrets rides through to the Pod. +// +// The operator-wide default resolves through chart value -> ETCD_IMAGE_REPOSITORY +// env -> --etcd-image-repository flag -> resolveEtcdImage -> buildPod. Because +// the harness points that default at a mirror whose name differs from the +// built-in EtcdImage constant, a typo anywhere in that chain would surface here +// as the wrong (or unpullable) image. The cluster reaching Available means the +// kubelet actually pulled the mirror reference (side-loaded as IfNotPresent) +// and the member joined quorum. +func TestEtcdImageOverride(t *testing.T) { + ctx := context.Background() + + // TypeMeta is mandatory for server-side apply: the apiserver resolves the + // target resource from apiVersion/Kind, which a Go-constructed object + // (unlike one decoded from YAML) does not carry by default. + ns := &corev1.Namespace{ + TypeMeta: metav1.TypeMeta{APIVersion: "v1", Kind: "Namespace"}, + ObjectMeta: metav1.ObjectMeta{Name: imageOverrideNamespace}, + } + if err := kube.Patch(ctx, ns, client.Apply, fieldOwner, client.ForceOwnership); err != nil { + t.Fatalf("create namespace %s: %v", imageOverrideNamespace, err) + } + t.Cleanup(func() { + _ = kube.Delete(context.Background(), &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: imageOverrideNamespace}}) + }) + + // A pull-credentials Secret in the cluster's own namespace, referenced by + // spec.imagePullSecrets. The side-loaded image needs no real pull, but the + // Secret must exist and flow through to the Pod unchanged. + const name, pullSecret = "etcd-airgap", "mirror-regcreds" + sec := &corev1.Secret{ + TypeMeta: metav1.TypeMeta{APIVersion: "v1", Kind: "Secret"}, + ObjectMeta: metav1.ObjectMeta{Name: pullSecret, Namespace: imageOverrideNamespace}, + Type: corev1.SecretTypeDockerConfigJson, + StringData: map[string]string{".dockerconfigjson": `{"auths":{}}`}, + } + if err := kube.Patch(ctx, sec, client.Apply, fieldOwner, client.ForceOwnership); err != nil { + t.Fatalf("create pull secret: %v", err) + } + + one := int32(1) + ec := &etcdv1alpha2.EtcdCluster{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: imageOverrideNamespace}, + Spec: etcdv1alpha2.EtcdClusterSpec{ + Replicas: &one, + Version: imageOverrideVersion, + Storage: etcdv1alpha2.StorageSpec{Size: resource.MustParse("1Gi")}, + ImagePullSecrets: []corev1.LocalObjectReference{{Name: pullSecret}}, + }, + } + if err := kube.Create(ctx, ec); err != nil { + t.Fatalf("create EtcdCluster %s: %v", name, err) + } + t.Cleanup(func() { + _ = kube.Delete(context.Background(), &etcdv1alpha2.EtcdCluster{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: imageOverrideNamespace}}) + }) + + waitFor(ctx, t, 5*time.Minute, name+" Available", etcdClusterAvailable(imageOverrideNamespace, name)) + + pod := etcdMemberPod(ctx, t, name) + + // Operator-wide repository default reaches the member Pod's etcd container. + var etcdImage string + for _, c := range pod.Spec.Containers { + if c.Name == "etcd" { + etcdImage = c.Image + } + } + if want := operatorDefaultMirror + ":v" + imageOverrideVersion; etcdImage != want { + t.Errorf("etcd member image = %q, want operator-wide mirror default %q", etcdImage, want) + } + + // Pull-secret passthrough (spec.imagePullSecrets -> member Pod). + if len(pod.Spec.ImagePullSecrets) != 1 || pod.Spec.ImagePullSecrets[0].Name != pullSecret { + t.Errorf("pod imagePullSecrets = %+v, want [%s]", pod.Spec.ImagePullSecrets, pullSecret) + } +} + +// etcdMemberPod returns one member Pod of the named cluster. +func etcdMemberPod(ctx context.Context, t *testing.T, cluster string) *corev1.Pod { + t.Helper() + pods := &corev1.PodList{} + if err := kube.List(ctx, pods, client.InNamespace(imageOverrideNamespace), + client.MatchingLabels{labelCluster: cluster}); err != nil { + t.Fatalf("list member pods for %s: %v", cluster, err) + } + if len(pods.Items) == 0 { + t.Fatalf("no member pods for cluster %s", cluster) + } + return &pods.Items[0] +}