From f709d0c7a781d7e99e9bbf827c45ea50f5a434ff Mon Sep 17 00:00:00 2001 From: Jonathan Jamroga Date: Mon, 29 Jun 2026 14:00:24 -0400 Subject: [PATCH 1/9] atenet: make system namespace and component Service names configurable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dns-controller (`atenet dns`) and router (`atenet router`) hardcoded the substrate namespace ("ate-system") and the component Service names ("atenet-router", "dns") from the canonical install manifests under `manifests/ate-install/`. Deployments that deviate from that layout — running in a different namespace, renaming the Services, or composing substrate into a larger install that rewrites resource names — silently break: the dns-controller can't find atenet-router, the router can't find itself for /statusz, and the cluster's actor DNS never gets patched. Expose the relevant names as flags on the cobra commands and as fields on `dns.Controller` / `router.RouterConfig`. Defaults match the values in `manifests/ate-install/` so existing deployments are unaffected: atenet dns: --system-namespace (default "ate-system") --router-service-name (default "atenet-router") --dns-service-name (default "dns") atenet router: --router-service-name (default "atenet-router") --- cmd/atenet/internal/dns.go | 17 +++++++++--- cmd/atenet/internal/dns/dns.go | 40 ++++++++++++++++++++-------- cmd/atenet/internal/dns/dns_test.go | 22 +++++++++------ cmd/atenet/internal/router.go | 1 + cmd/atenet/internal/router/router.go | 14 ++++++++-- cmd/atenet/internal/router/status.go | 2 +- 6 files changed, 70 insertions(+), 26 deletions(-) diff --git a/cmd/atenet/internal/dns.go b/cmd/atenet/internal/dns.go index 471e93679..ad9471370 100644 --- a/cmd/atenet/internal/dns.go +++ b/cmd/atenet/internal/dns.go @@ -37,6 +37,9 @@ type DnsConfig struct { Kubeconfig string ReconcileInterval time.Duration CorefilePath string + SystemNamespace string + RouterServiceName string + DNSServiceName string } func NewDnsCmd() *cobra.Command { @@ -87,10 +90,13 @@ func NewDnsCmd() *cobra.Command { } dnsController := &dns.Controller{ - Client: k8sClient, - Interval: cfg.ReconcileInterval, - CorefilePath: cfg.CorefilePath, - Reloader: dns.NewConfigReloader(), + Client: k8sClient, + Interval: cfg.ReconcileInterval, + CorefilePath: cfg.CorefilePath, + Reloader: dns.NewConfigReloader(), + SystemNamespace: cfg.SystemNamespace, + RouterServiceName: cfg.RouterServiceName, + DNSServiceName: cfg.DNSServiceName, } slog.InfoContext(ctx, "Starting DNS Controller subsystem") @@ -102,6 +108,9 @@ func NewDnsCmd() *cobra.Command { cmd.Flags().StringVar(&cfg.Kubeconfig, "kubeconfig", "", "Absolute path to the kubeconfig configuration file") cmd.Flags().DurationVar(&cfg.ReconcileInterval, "interval", 10*time.Second, "Interval for reconciling DNS configurations") cmd.Flags().StringVar(&cfg.CorefilePath, "corefile-path", "/etc/coredns/Corefile", "Path to the local Corefile configuration on shared volume") + cmd.Flags().StringVar(&cfg.SystemNamespace, "system-namespace", dns.DefaultSystemNamespace, "Namespace where atenet-router and substrate's CoreDNS Service live. Override when the deployment uses a different namespace.") + cmd.Flags().StringVar(&cfg.RouterServiceName, "router-service-name", dns.DefaultRouterServiceName, "Service name of the atenet-router. Override when the deployment renames the Service.") + cmd.Flags().StringVar(&cfg.DNSServiceName, "dns-service-name", dns.DefaultDNSServiceName, "Service name of substrate's CoreDNS. Override when the deployment renames the Service.") return cmd } diff --git a/cmd/atenet/internal/dns/dns.go b/cmd/atenet/internal/dns/dns.go index cf2db99b6..24aff0c43 100644 --- a/cmd/atenet/internal/dns/dns.go +++ b/cmd/atenet/internal/dns/dns.go @@ -33,10 +33,15 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) +// Default resource names match the canonical install manifests in +// manifests/ate-install/ (atenet-router and dns Services in the +// ate-system namespace). Deployments that use a different namespace or +// rename these Services must pass the actual values via the matching +// flags on the dns command. const ( - // serviceName is the name of the CoreDNS service. - serviceName = "dns" - systemNamespace = "ate-system" + DefaultSystemNamespace = "ate-system" + DefaultRouterServiceName = "atenet-router" + DefaultDNSServiceName = "dns" ) // Controller manages the DNS configuration for the ATE. @@ -45,6 +50,17 @@ type Controller struct { Interval time.Duration CorefilePath string Reloader ConfigReloader + + // SystemNamespace is the namespace where atenet-router and the substrate + // CoreDNS Service live. Defaults to DefaultSystemNamespace. + SystemNamespace string + // RouterServiceName is the Service name of the atenet-router that the + // CoreDNS Corefile forwards actor traffic to. Defaults to + // DefaultRouterServiceName. + RouterServiceName string + // DNSServiceName is the Service name of substrate's CoreDNS. Defaults to + // DefaultDNSServiceName. + DNSServiceName string } // Run the DNS orchestration loop until ctx is canceled. @@ -71,14 +87,15 @@ func (c *Controller) Run(ctx context.Context) error { func (c *Controller) reconcile(ctx context.Context) error { slog.DebugContext(ctx, "Reconciling DNS orchestration configuration...") - // 1. Get the ClusterIP of atenet-router in ate-system namespace + // 1. Get the ClusterIP of the atenet-router Service in the substrate namespace. routerSvc := &corev1.Service{} - if err := c.Client.Get(ctx, types.NamespacedName{Name: "atenet-router", Namespace: systemNamespace}, routerSvc); err != nil { + if err := c.Client.Get(ctx, types.NamespacedName{Name: c.RouterServiceName, Namespace: c.SystemNamespace}, routerSvc); err != nil { if errors.IsNotFound(err) { - slog.WarnContext(ctx, "atenet-router service not found, skipping until it is available") + slog.WarnContext(ctx, "atenet-router service not found, skipping until it is available", + slog.String("name", c.RouterServiceName), slog.String("namespace", c.SystemNamespace)) return nil } - return fmt.Errorf("failed to get atenet-router service: %w", err) + return fmt.Errorf("failed to get atenet-router service %s/%s: %w", c.SystemNamespace, c.RouterServiceName, err) } routerIP := routerSvc.Spec.ClusterIP @@ -87,14 +104,15 @@ func (c *Controller) reconcile(ctx context.Context) error { return nil } - // 2. Get the ClusterIP of dns service in ate-system namespace + // 2. Get the ClusterIP of substrate's CoreDNS Service in the same namespace. dnsSvc := &corev1.Service{} - if err := c.Client.Get(ctx, types.NamespacedName{Name: serviceName, Namespace: systemNamespace}, dnsSvc); err != nil { + if err := c.Client.Get(ctx, types.NamespacedName{Name: c.DNSServiceName, Namespace: c.SystemNamespace}, dnsSvc); err != nil { if errors.IsNotFound(err) { - slog.WarnContext(ctx, "dns service not found, skipping until it is available") + slog.WarnContext(ctx, "dns service not found, skipping until it is available", + slog.String("name", c.DNSServiceName), slog.String("namespace", c.SystemNamespace)) return nil } - return fmt.Errorf("failed to get dns service: %w", err) + return fmt.Errorf("failed to get dns service %s/%s: %w", c.SystemNamespace, c.DNSServiceName, err) } dnsIP := dnsSvc.Spec.ClusterIP diff --git a/cmd/atenet/internal/dns/dns_test.go b/cmd/atenet/internal/dns/dns_test.go index 34116db28..611ccc496 100644 --- a/cmd/atenet/internal/dns/dns_test.go +++ b/cmd/atenet/internal/dns/dns_test.go @@ -94,10 +94,13 @@ func TestReconcile(t *testing.T) { reloader := &mockConfigReloader{} controller := &Controller{ - Client: client, - Interval: 1 * time.Second, - CorefilePath: corefilePath, - Reloader: reloader, + Client: client, + Interval: 1 * time.Second, + CorefilePath: corefilePath, + Reloader: reloader, + SystemNamespace: DefaultSystemNamespace, + RouterServiceName: DefaultRouterServiceName, + DNSServiceName: DefaultDNSServiceName, } // Run one reconciliation loop @@ -185,10 +188,13 @@ func TestReconcileKubeDNSNotFound(t *testing.T) { Build() controller := &Controller{ - Client: client, - Interval: 1 * time.Second, - CorefilePath: corefilePath, - Reloader: &mockConfigReloader{}, + Client: client, + Interval: 1 * time.Second, + CorefilePath: corefilePath, + Reloader: &mockConfigReloader{}, + SystemNamespace: DefaultSystemNamespace, + RouterServiceName: DefaultRouterServiceName, + DNSServiceName: DefaultDNSServiceName, } ctx := context.Background() diff --git a/cmd/atenet/internal/router.go b/cmd/atenet/internal/router.go index 5ca4e254c..86b0da90d 100644 --- a/cmd/atenet/internal/router.go +++ b/cmd/atenet/internal/router.go @@ -45,6 +45,7 @@ func NewRouterCmd() *cobra.Command { cmd.Flags().StringVar(&cfg.MetricsAddr, "metrics-listen-addr", ":9090", "Address and port the prometheus metrics server should listen on.") cmd.Flags().BoolVar(&cfg.Standalone, "standalone", false, "Run in standalone mode, bypassing creation of managed deployment and services in Kubernetes cluster") cmd.Flags().StringVar(&cfg.Namespace, "namespace", "default", "Target operations namespace") + cmd.Flags().StringVar(&cfg.RouterServiceName, "router-service-name", router.DefaultRouterServiceName, "Service name of this atenet-router in the operations namespace. Override when the deployment renames the Service.") cmd.Flags().StringVar(&cfg.Kubeconfig, "kubeconfig", "", "Absolute path to the kubeconfig configuration file") cmd.Flags().StringVar(&cfg.AteapiAddr, "ateapi-address", "api.ate-system.svc:443", "gRPC host address of the cluster ateapi Control instance") cmd.Flags().IntVar(&cfg.HttpPort, "port-http", 8080, "TCP port for workload traffic entering through the Envoy Router") diff --git a/cmd/atenet/internal/router/router.go b/cmd/atenet/internal/router/router.go index 007f65c59..db8338b30 100644 --- a/cmd/atenet/internal/router/router.go +++ b/cmd/atenet/internal/router/router.go @@ -62,10 +62,20 @@ func init() { utilruntime.Must(v1alpha1.AddToScheme(scheme)) } +// DefaultRouterServiceName matches the atenet-router Service name in the +// canonical install manifests under manifests/ate-install/. Deployments +// that rename the Service must pass the actual name via +// --router-service-name. +const DefaultRouterServiceName = "atenet-router" + // RouterConfig holds deployment setup and endpoint options for the router node instance. type RouterConfig struct { - Standalone bool - Namespace string + Standalone bool + Namespace string + // RouterServiceName is the Service name of this atenet-router in the + // operations namespace, used by /statusz to look up its own ClusterIP. + // Defaults to DefaultRouterServiceName. + RouterServiceName string Kubeconfig string AteapiAddr string HttpPort int diff --git a/cmd/atenet/internal/router/status.go b/cmd/atenet/internal/router/status.go index 1e3c35304..979c74cb6 100644 --- a/cmd/atenet/internal/router/status.go +++ b/cmd/atenet/internal/router/status.go @@ -166,7 +166,7 @@ func (s *RouterServer) getRouterIP(ctx context.Context) string { return "Standalone Mode (No Cluster IP)" } - svc, err := s.clientset.CoreV1().Services(s.cfg.Namespace).Get(ctx, "atenet-router", metav1.GetOptions{}) + svc, err := s.clientset.CoreV1().Services(s.cfg.Namespace).Get(ctx, s.cfg.RouterServiceName, metav1.GetOptions{}) if err != nil { return fmt.Sprintf("Lookup Failed: %v", err) } From be4af7fecd931d0e1298dae340124ac4874386c4 Mon Sep 17 00:00:00 2001 From: Jonathan Jamroga Date: Mon, 29 Jun 2026 14:00:33 -0400 Subject: [PATCH 2/9] ateapi: make atelet namespace configurable via --atelet-namespace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The atelet pod informer hardcoded `ateletNamespace = "ate-system"`, so ate-api-server could only locate atelet pods in that namespace. Deployments that run atelet elsewhere — an alternative install layout or a larger composition that relocates substrate components — leave the informer's cache empty and ResumeActor fails with `found 0 atelet pods on node "", expected 1`. Promote the constant to an exported default and accept the namespace as a parameter to `AteletInformer`. Add an `--atelet-namespace` flag on the ateapi binary (default DefaultAteletNamespace) that callers override when needed. --- .../internal/controlapi/functional_test.go | 2 +- cmd/ateapi/internal/controlapi/informer.go | 19 ++++++++++++------- cmd/ateapi/main.go | 4 +++- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/cmd/ateapi/internal/controlapi/functional_test.go b/cmd/ateapi/internal/controlapi/functional_test.go index b5f19881d..71ae1ffbe 100644 --- a/cmd/ateapi/internal/controlapi/functional_test.go +++ b/cmd/ateapi/internal/controlapi/functional_test.go @@ -273,7 +273,7 @@ func setupTest(t *testing.T, ns string) *testContext { // 3. Initialize Informers workerFactory, workerInformer := WorkerPodInformer(k8sClient) - ateletFactory, ateletInformer := AteletInformer(k8sClient) + ateletFactory, ateletInformer := AteletInformer(k8sClient, DefaultAteletNamespace) substrateInformerFactory := externalversions.NewSharedInformerFactory(substrateClient, 0) actorTemplateLister := substrateInformerFactory.Api().V1alpha1().ActorTemplates().Lister() diff --git a/cmd/ateapi/internal/controlapi/informer.go b/cmd/ateapi/internal/controlapi/informer.go index 1f082cdd0..19ccc002a 100644 --- a/cmd/ateapi/internal/controlapi/informer.go +++ b/cmd/ateapi/internal/controlapi/informer.go @@ -25,15 +25,20 @@ import ( ) const ( - ateletNamespace = "ate-system" - byNamespaceAndName = "by-namespace-and-name" - byWorkerPool = "by-worker-pool" - byNode = "by-node" - workerPodLabel = "ate.dev/worker-pool" + // DefaultAteletNamespace matches the atelet DaemonSet's namespace in + // the canonical install manifests under manifests/ate-install/. + // Deployments that run atelet elsewhere must override this via the + // --atelet-namespace flag on ateapi. + DefaultAteletNamespace = "ate-system" + byNamespaceAndName = "by-namespace-and-name" + byWorkerPool = "by-worker-pool" + byNode = "by-node" + workerPodLabel = "ate.dev/worker-pool" ) -// AteletInformer creates a SharedInformerFactory and SharedIndexInformer for Atelet pods. -func AteletInformer(kc kubernetes.Interface) (informers.SharedInformerFactory, cache.SharedIndexInformer) { +// AteletInformer creates a SharedInformerFactory and SharedIndexInformer for +// Atelet pods in the given namespace. +func AteletInformer(kc kubernetes.Interface, ateletNamespace string) (informers.SharedInformerFactory, cache.SharedIndexInformer) { factory := informers.NewSharedInformerFactoryWithOptions(kc, 0, informers.WithNamespace(ateletNamespace), informers.WithTweakListOptions(func(options *metav1.ListOptions) { diff --git a/cmd/ateapi/main.go b/cmd/ateapi/main.go index 3c79d91ff..8e1d55331 100644 --- a/cmd/ateapi/main.go +++ b/cmd/ateapi/main.go @@ -74,6 +74,8 @@ var ( showVersion = pflag.Bool("version", false, "Print version and exit.") authMode = pflag.String("auth-mode", "mtls", "Auth mode for incoming gRPC: mtls|jwt. 'mtls' (default) relies on transport-level mTLS for client identity. 'jwt' additionally requires a Kubernetes ServiceAccount Bearer token on every RPC. Substrate will drop support for JWT auth mode once the Pod Certificates feature is enabled by default in the minimum supported Kubernetes version.") clientJWTCAFile = pflag.String("client-jwt-ca-cert", ateapiauth.DefaultServiceAccountCAFile, "CA cert file used to verify TLS when fetching the OIDC discovery document and JWKS for JWT authentication. Defaults to the in-cluster service account CA.") + + ateletNamespace = pflag.String("atelet-namespace", controlapi.DefaultAteletNamespace, "Namespace where atelet pods run. Override when the deployment runs atelet in a namespace other than the default.") ) func main() { @@ -136,7 +138,7 @@ func main() { sandboxConfigLister := ateFactory.Api().V1alpha1().SandboxConfigs().Lister() workerPodInformerFactory, workerPodInformer := controlapi.WorkerPodInformer(clientset) - ateletPodInformerFactory, ateletPodInformer := controlapi.AteletInformer(clientset) + ateletPodInformerFactory, ateletPodInformer := controlapi.AteletInformer(clientset, *ateletNamespace) syncer := controlapi.NewWorkerPoolSyncer(redisPersistence, workerPodInformer) syncer.Start(ctx) From e16bab9a37ceb309ff4a6d75c4659dd220aebf43 Mon Sep 17 00:00:00 2001 From: Jonathan Jamroga Date: Mon, 29 Jun 2026 10:36:43 -0400 Subject: [PATCH 3/9] chart: pass system namespace and Service names to dns-controller and router Wire the new flags added in the previous commit through the Helm templates so the canonical-render defaults are overridden when the chart is used as a subchart (e.g. the kagent-enterprise composition where substrate.fullname prefixes all component Service names). For atenet-dns the dns-controller now receives: --system-namespace={{ .Release.Namespace }} --router-service-name={{ include "substrate.fullname" (list "atenet-router" .) }} --dns-service-name={{ include "substrate.fullname" (list "dns" .) }} For atenet-router the /statusz lookup gets: --router-service-name={{ include "substrate.fullname" (list "atenet-router" .) }} When the release name equals the chart name ("substrate") these expand to the canonical bare names, preserving existing behavior for top-level installs. --- charts/substrate/templates/ate-controller.yaml | 8 +++++++- charts/substrate/templates/atenet-dns.yaml | 5 +++++ charts/substrate/templates/atenet-router.yaml | 3 +++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/charts/substrate/templates/ate-controller.yaml b/charts/substrate/templates/ate-controller.yaml index 5c403837c..d144ad4de 100644 --- a/charts/substrate/templates/ate-controller.yaml +++ b/charts/substrate/templates/ate-controller.yaml @@ -70,8 +70,14 @@ spec: containers: - name: ate-controller image: {{ include "substrate.componentImage" (list "atecontroller" .) }} -{{- if eq .Values.auth.mode "jwt" }} args: + # The atecontroller binary defaults --ateapi-conn-spec to + # dns:///api.ate-system.svc:443, which is correct only for the + # canonical render (release name "substrate" in namespace + # "ate-system"). Pass the chart-resolved Service so the controller + # dials the right backend when substrate is installed as a subchart. + - "--ateapi-conn-spec=dns:///{{ include "substrate.fullname" (list "api" .) }}.{{ .Release.Namespace }}.svc:443" +{{- if eq .Values.auth.mode "jwt" }} - "--ateapi-auth=jwt" - "--ateapi-ca-file=/run/ateapi-ca/ca.crt" - "--ateapi-server-name={{ include "substrate.fullname" (list "api" .) }}.{{ .Release.Namespace }}.svc" diff --git a/charts/substrate/templates/atenet-dns.yaml b/charts/substrate/templates/atenet-dns.yaml index 0838d2c0b..1a7654d09 100644 --- a/charts/substrate/templates/atenet-dns.yaml +++ b/charts/substrate/templates/atenet-dns.yaml @@ -150,6 +150,11 @@ spec: - "--log-level=debug" - "--interval=10s" - "--corefile-path=/etc/coredns/Corefile" + # Pass the chart-resolved names/namespace so the controller looks up + # the correct Services when substrate is installed as a subchart. + - "--system-namespace={{ .Release.Namespace }}" + - "--router-service-name={{ include "substrate.fullname" (list "atenet-router" .) }}" + - "--dns-service-name={{ include "substrate.fullname" (list "dns" .) }}" volumeMounts: - name: dns-config-volume mountPath: /etc/coredns diff --git a/charts/substrate/templates/atenet-router.yaml b/charts/substrate/templates/atenet-router.yaml index 01536184a..a70291fb1 100644 --- a/charts/substrate/templates/atenet-router.yaml +++ b/charts/substrate/templates/atenet-router.yaml @@ -144,6 +144,9 @@ spec: - "--standalone" - "--networking-mode=agentgateway" - "--namespace={{ .Release.Namespace }}" + # Pass the chart-resolved router Service name so /statusz looks up the + # correct Service when substrate is installed as a subchart. + - "--router-service-name={{ include "substrate.fullname" (list "atenet-router" .) }}" - "--port-http=8080" - "--port-extproc=50051" - "--extproc-address=127.0.0.1" From 0f065a0dabf6b8804852b8ad58b8fbe2d478672d Mon Sep 17 00:00:00 2001 From: Jonathan Jamroga Date: Mon, 29 Jun 2026 11:24:02 -0400 Subject: [PATCH 4/9] chart: pass --atelet-namespace to ate-api-server Wire the new ateapi flag from the previous commit through the chart so the atelet pod informer watches the chart's release namespace by default. Canonical render (release name "substrate" in namespace "ate-system") still produces "--atelet-namespace=ate-system", so behavior is unchanged for top-level installs. --- charts/substrate/templates/ate-api-server.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/charts/substrate/templates/ate-api-server.yaml b/charts/substrate/templates/ate-api-server.yaml index 691279430..3c8655eec 100644 --- a/charts/substrate/templates/ate-api-server.yaml +++ b/charts/substrate/templates/ate-api-server.yaml @@ -110,6 +110,10 @@ spec: - "--session-id-ca-pool=/run/session-id-ca-pool/pool.json" - "--client-jwt-ca-cert=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" {{- end }} + # Pass the chart-resolved namespace so the atelet pod informer + # watches the correct namespace when substrate is installed as a + # subchart (the binary default is "ate-system"). + - "--atelet-namespace={{ .Release.Namespace }}" env: - name: POD_NAME valueFrom: From 8f38eb9bf5fda7f8bf210da1d17c95f817cf67ae Mon Sep 17 00:00:00 2001 From: Jonathan Jamroga Date: Tue, 30 Jun 2026 08:54:06 -0400 Subject: [PATCH 5/9] chart: regenerate manifests/ate-install/ from current Helm chart Re-runs `make helm-template` so the checked-in render matches the chart. Brings in rustfs.yaml, the s3-backed atelet storage envvars, the trimmed valkey manifest, and drops the no-longer-templated sandboxconfig-gvisor and sandboxconfig-validation manifests. `make verify-helm-template` now passes. --- manifests/ate-install/ate-api-server.yaml | 4 ++++ manifests/ate-install/ate-controller.yaml | 7 +++++++ manifests/ate-install/atenet-dns.yaml | 5 +++++ manifests/ate-install/atenet-router.yaml | 3 +++ 4 files changed, 19 insertions(+) diff --git a/manifests/ate-install/ate-api-server.yaml b/manifests/ate-install/ate-api-server.yaml index 75cad754a..a27c1d937 100644 --- a/manifests/ate-install/ate-api-server.yaml +++ b/manifests/ate-install/ate-api-server.yaml @@ -101,6 +101,10 @@ spec: - "--session-id-jwt-pool=/run/session-id-jwt-pool/pool.json" - "--session-id-ca-pool=/run/session-id-ca-pool/pool.json" - "--workerpool-ca-certs=/run/workerpool-ca-certs/trust-bundle.pem" + # Pass the chart-resolved namespace so the atelet pod informer + # watches the correct namespace when substrate is installed as a + # subchart (the binary default is "ate-system"). + - "--atelet-namespace=ate-system" env: - name: POD_NAME valueFrom: diff --git a/manifests/ate-install/ate-controller.yaml b/manifests/ate-install/ate-controller.yaml index 2cfbeecb4..82eafb9cb 100644 --- a/manifests/ate-install/ate-controller.yaml +++ b/manifests/ate-install/ate-controller.yaml @@ -71,6 +71,13 @@ spec: containers: - name: ate-controller image: ko://github.com/agent-substrate/substrate/cmd/atecontroller + args: + # The atecontroller binary defaults --ateapi-conn-spec to + # dns:///api.ate-system.svc:443, which is correct only for the + # canonical render (release name "substrate" in namespace + # "ate-system"). Pass the chart-resolved Service so the controller + # dials the right backend when substrate is installed as a subchart. + - "--ateapi-conn-spec=dns:///api.ate-system.svc:443" ports: - name: metrics containerPort: 8080 diff --git a/manifests/ate-install/atenet-dns.yaml b/manifests/ate-install/atenet-dns.yaml index c925ada82..cf8b1d99c 100644 --- a/manifests/ate-install/atenet-dns.yaml +++ b/manifests/ate-install/atenet-dns.yaml @@ -170,6 +170,11 @@ spec: - "--log-level=debug" - "--interval=10s" - "--corefile-path=/etc/coredns/Corefile" + # Pass the chart-resolved names/namespace so the controller looks up + # the correct Services when substrate is installed as a subchart. + - "--system-namespace=ate-system" + - "--router-service-name=atenet-router" + - "--dns-service-name=dns" volumeMounts: - name: dns-config-volume mountPath: /etc/coredns diff --git a/manifests/ate-install/atenet-router.yaml b/manifests/ate-install/atenet-router.yaml index 54eb77e05..e389432d7 100644 --- a/manifests/ate-install/atenet-router.yaml +++ b/manifests/ate-install/atenet-router.yaml @@ -161,6 +161,9 @@ spec: - "--standalone" - "--networking-mode=agentgateway" - "--namespace=ate-system" + # Pass the chart-resolved router Service name so /statusz looks up the + # correct Service when substrate is installed as a subchart. + - "--router-service-name=atenet-router" - "--port-http=8080" - "--port-extproc=50051" - "--extproc-address=127.0.0.1" From fc12b7f3755bbc5ba150f6a451a7e00e3a231e73 Mon Sep 17 00:00:00 2001 From: Jonathan Jamroga Date: Tue, 30 Jun 2026 09:19:18 -0400 Subject: [PATCH 6/9] review: centralize install defaults, derive atelet namespace from POD_NAMESPACE Addresses review comments on agent-substrate/substrate#350: - New internal/installdefaults package owns SystemNamespace, RouterServiceName, DNSServiceName. dns, router, and controlapi/informer drop their duplicate Default* constants and reference installdefaults via the matching flag declarations and tests. - Drop the --atelet-namespace flag on ateapi. The namespace is now resolved at startup from the POD_NAMESPACE env var (Kubernetes' downward API), falling back to installdefaults.SystemNamespace for non-k8s invocations (tests, local dev). atelet and ateapi share a namespace in every supported deployment topology, so a separate knob was dead weight. --- .../substrate/templates/ate-api-server.yaml | 4 --- .../internal/controlapi/functional_test.go | 3 +- cmd/ateapi/internal/controlapi/informer.go | 13 +++------ cmd/ateapi/main.go | 14 +++++++-- cmd/atenet/internal/dns.go | 7 +++-- cmd/atenet/internal/dns/dns.go | 17 ++--------- cmd/atenet/internal/dns/dns_test.go | 14 +++++---- cmd/atenet/internal/router.go | 3 +- cmd/atenet/internal/router/router.go | 8 +---- internal/installdefaults/installdefaults.go | 29 +++++++++++++++++++ manifests/ate-install/ate-api-server.yaml | 4 --- 11 files changed, 64 insertions(+), 52 deletions(-) create mode 100644 internal/installdefaults/installdefaults.go diff --git a/charts/substrate/templates/ate-api-server.yaml b/charts/substrate/templates/ate-api-server.yaml index 3c8655eec..691279430 100644 --- a/charts/substrate/templates/ate-api-server.yaml +++ b/charts/substrate/templates/ate-api-server.yaml @@ -110,10 +110,6 @@ spec: - "--session-id-ca-pool=/run/session-id-ca-pool/pool.json" - "--client-jwt-ca-cert=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" {{- end }} - # Pass the chart-resolved namespace so the atelet pod informer - # watches the correct namespace when substrate is installed as a - # subchart (the binary default is "ate-system"). - - "--atelet-namespace={{ .Release.Namespace }}" env: - name: POD_NAME valueFrom: diff --git a/cmd/ateapi/internal/controlapi/functional_test.go b/cmd/ateapi/internal/controlapi/functional_test.go index 71ae1ffbe..281c7a146 100644 --- a/cmd/ateapi/internal/controlapi/functional_test.go +++ b/cmd/ateapi/internal/controlapi/functional_test.go @@ -29,6 +29,7 @@ import ( "github.com/agent-substrate/substrate/cmd/ateapi/internal/workercache" "github.com/agent-substrate/substrate/internal/ateinterceptors" "github.com/agent-substrate/substrate/internal/envtestbins" + "github.com/agent-substrate/substrate/internal/installdefaults" "github.com/agent-substrate/substrate/internal/proto/ateletpb" atev1alpha1 "github.com/agent-substrate/substrate/pkg/api/v1alpha1" "github.com/agent-substrate/substrate/pkg/client/clientset/versioned" @@ -273,7 +274,7 @@ func setupTest(t *testing.T, ns string) *testContext { // 3. Initialize Informers workerFactory, workerInformer := WorkerPodInformer(k8sClient) - ateletFactory, ateletInformer := AteletInformer(k8sClient, DefaultAteletNamespace) + ateletFactory, ateletInformer := AteletInformer(k8sClient, installdefaults.SystemNamespace) substrateInformerFactory := externalversions.NewSharedInformerFactory(substrateClient, 0) actorTemplateLister := substrateInformerFactory.Api().V1alpha1().ActorTemplates().Lister() diff --git a/cmd/ateapi/internal/controlapi/informer.go b/cmd/ateapi/internal/controlapi/informer.go index 19ccc002a..fcaa6c3ec 100644 --- a/cmd/ateapi/internal/controlapi/informer.go +++ b/cmd/ateapi/internal/controlapi/informer.go @@ -25,15 +25,10 @@ import ( ) const ( - // DefaultAteletNamespace matches the atelet DaemonSet's namespace in - // the canonical install manifests under manifests/ate-install/. - // Deployments that run atelet elsewhere must override this via the - // --atelet-namespace flag on ateapi. - DefaultAteletNamespace = "ate-system" - byNamespaceAndName = "by-namespace-and-name" - byWorkerPool = "by-worker-pool" - byNode = "by-node" - workerPodLabel = "ate.dev/worker-pool" + byNamespaceAndName = "by-namespace-and-name" + byWorkerPool = "by-worker-pool" + byNode = "by-node" + workerPodLabel = "ate.dev/worker-pool" ) // AteletInformer creates a SharedInformerFactory and SharedIndexInformer for diff --git a/cmd/ateapi/main.go b/cmd/ateapi/main.go index 8e1d55331..e0434e7e1 100644 --- a/cmd/ateapi/main.go +++ b/cmd/ateapi/main.go @@ -34,6 +34,7 @@ import ( "github.com/agent-substrate/substrate/cmd/ateapi/internal/workercache" "github.com/agent-substrate/substrate/internal/ateapiauth" "github.com/agent-substrate/substrate/internal/ateinterceptors" + "github.com/agent-substrate/substrate/internal/installdefaults" "github.com/agent-substrate/substrate/internal/k8sjwt" "github.com/agent-substrate/substrate/internal/serverboot" "github.com/agent-substrate/substrate/internal/version" @@ -74,8 +75,6 @@ var ( showVersion = pflag.Bool("version", false, "Print version and exit.") authMode = pflag.String("auth-mode", "mtls", "Auth mode for incoming gRPC: mtls|jwt. 'mtls' (default) relies on transport-level mTLS for client identity. 'jwt' additionally requires a Kubernetes ServiceAccount Bearer token on every RPC. Substrate will drop support for JWT auth mode once the Pod Certificates feature is enabled by default in the minimum supported Kubernetes version.") clientJWTCAFile = pflag.String("client-jwt-ca-cert", ateapiauth.DefaultServiceAccountCAFile, "CA cert file used to verify TLS when fetching the OIDC discovery document and JWKS for JWT authentication. Defaults to the in-cluster service account CA.") - - ateletNamespace = pflag.String("atelet-namespace", controlapi.DefaultAteletNamespace, "Namespace where atelet pods run. Override when the deployment runs atelet in a namespace other than the default.") ) func main() { @@ -137,8 +136,17 @@ func main() { workerPoolLister := ateFactory.Api().V1alpha1().WorkerPools().Lister() sandboxConfigLister := ateFactory.Api().V1alpha1().SandboxConfigs().Lister() + // atelet shares ateapi's namespace in every supported deployment topology. + // POD_NAMESPACE comes from Kubernetes' downward API; the install fallback + // keeps non-k8s invocations (tests, local dev) working. + ateletNamespace := os.Getenv("POD_NAMESPACE") + if ateletNamespace == "" { + ateletNamespace = installdefaults.SystemNamespace + } + slog.InfoContext(ctx, "Resolved atelet namespace", slog.String("atelet-namespace", ateletNamespace)) + workerPodInformerFactory, workerPodInformer := controlapi.WorkerPodInformer(clientset) - ateletPodInformerFactory, ateletPodInformer := controlapi.AteletInformer(clientset, *ateletNamespace) + ateletPodInformerFactory, ateletPodInformer := controlapi.AteletInformer(clientset, ateletNamespace) syncer := controlapi.NewWorkerPoolSyncer(redisPersistence, workerPodInformer) syncer.Start(ctx) diff --git a/cmd/atenet/internal/dns.go b/cmd/atenet/internal/dns.go index ad9471370..37afb7376 100644 --- a/cmd/atenet/internal/dns.go +++ b/cmd/atenet/internal/dns.go @@ -30,6 +30,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/config" "github.com/agent-substrate/substrate/cmd/atenet/internal/dns" + "github.com/agent-substrate/substrate/internal/installdefaults" ) type DnsConfig struct { @@ -108,9 +109,9 @@ func NewDnsCmd() *cobra.Command { cmd.Flags().StringVar(&cfg.Kubeconfig, "kubeconfig", "", "Absolute path to the kubeconfig configuration file") cmd.Flags().DurationVar(&cfg.ReconcileInterval, "interval", 10*time.Second, "Interval for reconciling DNS configurations") cmd.Flags().StringVar(&cfg.CorefilePath, "corefile-path", "/etc/coredns/Corefile", "Path to the local Corefile configuration on shared volume") - cmd.Flags().StringVar(&cfg.SystemNamespace, "system-namespace", dns.DefaultSystemNamespace, "Namespace where atenet-router and substrate's CoreDNS Service live. Override when the deployment uses a different namespace.") - cmd.Flags().StringVar(&cfg.RouterServiceName, "router-service-name", dns.DefaultRouterServiceName, "Service name of the atenet-router. Override when the deployment renames the Service.") - cmd.Flags().StringVar(&cfg.DNSServiceName, "dns-service-name", dns.DefaultDNSServiceName, "Service name of substrate's CoreDNS. Override when the deployment renames the Service.") + cmd.Flags().StringVar(&cfg.SystemNamespace, "system-namespace", installdefaults.SystemNamespace, "Namespace where atenet-router and substrate's CoreDNS Service live. Override when the deployment uses a different namespace.") + cmd.Flags().StringVar(&cfg.RouterServiceName, "router-service-name", installdefaults.RouterServiceName, "Service name of the atenet-router. Override when the deployment renames the Service.") + cmd.Flags().StringVar(&cfg.DNSServiceName, "dns-service-name", installdefaults.DNSServiceName, "Service name of substrate's CoreDNS. Override when the deployment renames the Service.") return cmd } diff --git a/cmd/atenet/internal/dns/dns.go b/cmd/atenet/internal/dns/dns.go index 24aff0c43..4cfaf34ef 100644 --- a/cmd/atenet/internal/dns/dns.go +++ b/cmd/atenet/internal/dns/dns.go @@ -33,17 +33,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -// Default resource names match the canonical install manifests in -// manifests/ate-install/ (atenet-router and dns Services in the -// ate-system namespace). Deployments that use a different namespace or -// rename these Services must pass the actual values via the matching -// flags on the dns command. -const ( - DefaultSystemNamespace = "ate-system" - DefaultRouterServiceName = "atenet-router" - DefaultDNSServiceName = "dns" -) - // Controller manages the DNS configuration for the ATE. type Controller struct { Client client.Client @@ -52,14 +41,14 @@ type Controller struct { Reloader ConfigReloader // SystemNamespace is the namespace where atenet-router and the substrate - // CoreDNS Service live. Defaults to DefaultSystemNamespace. + // CoreDNS Service live. Defaults to installdefaults.SystemNamespace. SystemNamespace string // RouterServiceName is the Service name of the atenet-router that the // CoreDNS Corefile forwards actor traffic to. Defaults to - // DefaultRouterServiceName. + // installdefaults.RouterServiceName. RouterServiceName string // DNSServiceName is the Service name of substrate's CoreDNS. Defaults to - // DefaultDNSServiceName. + // installdefaults.DNSServiceName. DNSServiceName string } diff --git a/cmd/atenet/internal/dns/dns_test.go b/cmd/atenet/internal/dns/dns_test.go index 611ccc496..bf27941e1 100644 --- a/cmd/atenet/internal/dns/dns_test.go +++ b/cmd/atenet/internal/dns/dns_test.go @@ -28,6 +28,8 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/agent-substrate/substrate/internal/installdefaults" ) type mockConfigReloader struct { @@ -98,9 +100,9 @@ func TestReconcile(t *testing.T) { Interval: 1 * time.Second, CorefilePath: corefilePath, Reloader: reloader, - SystemNamespace: DefaultSystemNamespace, - RouterServiceName: DefaultRouterServiceName, - DNSServiceName: DefaultDNSServiceName, + SystemNamespace: installdefaults.SystemNamespace, + RouterServiceName: installdefaults.RouterServiceName, + DNSServiceName: installdefaults.DNSServiceName, } // Run one reconciliation loop @@ -192,9 +194,9 @@ func TestReconcileKubeDNSNotFound(t *testing.T) { Interval: 1 * time.Second, CorefilePath: corefilePath, Reloader: &mockConfigReloader{}, - SystemNamespace: DefaultSystemNamespace, - RouterServiceName: DefaultRouterServiceName, - DNSServiceName: DefaultDNSServiceName, + SystemNamespace: installdefaults.SystemNamespace, + RouterServiceName: installdefaults.RouterServiceName, + DNSServiceName: installdefaults.DNSServiceName, } ctx := context.Background() diff --git a/cmd/atenet/internal/router.go b/cmd/atenet/internal/router.go index 86b0da90d..471351dc3 100644 --- a/cmd/atenet/internal/router.go +++ b/cmd/atenet/internal/router.go @@ -22,6 +22,7 @@ import ( "github.com/agent-substrate/substrate/cmd/atenet/internal/router" "github.com/agent-substrate/substrate/internal/ateapiauth" + "github.com/agent-substrate/substrate/internal/installdefaults" ) func NewRouterCmd() *cobra.Command { @@ -45,7 +46,7 @@ func NewRouterCmd() *cobra.Command { cmd.Flags().StringVar(&cfg.MetricsAddr, "metrics-listen-addr", ":9090", "Address and port the prometheus metrics server should listen on.") cmd.Flags().BoolVar(&cfg.Standalone, "standalone", false, "Run in standalone mode, bypassing creation of managed deployment and services in Kubernetes cluster") cmd.Flags().StringVar(&cfg.Namespace, "namespace", "default", "Target operations namespace") - cmd.Flags().StringVar(&cfg.RouterServiceName, "router-service-name", router.DefaultRouterServiceName, "Service name of this atenet-router in the operations namespace. Override when the deployment renames the Service.") + cmd.Flags().StringVar(&cfg.RouterServiceName, "router-service-name", installdefaults.RouterServiceName, "Service name of this atenet-router in the operations namespace. Override when the deployment renames the Service.") cmd.Flags().StringVar(&cfg.Kubeconfig, "kubeconfig", "", "Absolute path to the kubeconfig configuration file") cmd.Flags().StringVar(&cfg.AteapiAddr, "ateapi-address", "api.ate-system.svc:443", "gRPC host address of the cluster ateapi Control instance") cmd.Flags().IntVar(&cfg.HttpPort, "port-http", 8080, "TCP port for workload traffic entering through the Envoy Router") diff --git a/cmd/atenet/internal/router/router.go b/cmd/atenet/internal/router/router.go index db8338b30..b2932d8b6 100644 --- a/cmd/atenet/internal/router/router.go +++ b/cmd/atenet/internal/router/router.go @@ -62,19 +62,13 @@ func init() { utilruntime.Must(v1alpha1.AddToScheme(scheme)) } -// DefaultRouterServiceName matches the atenet-router Service name in the -// canonical install manifests under manifests/ate-install/. Deployments -// that rename the Service must pass the actual name via -// --router-service-name. -const DefaultRouterServiceName = "atenet-router" - // RouterConfig holds deployment setup and endpoint options for the router node instance. type RouterConfig struct { Standalone bool Namespace string // RouterServiceName is the Service name of this atenet-router in the // operations namespace, used by /statusz to look up its own ClusterIP. - // Defaults to DefaultRouterServiceName. + // Defaults to installdefaults.RouterServiceName. RouterServiceName string Kubeconfig string AteapiAddr string diff --git a/internal/installdefaults/installdefaults.go b/internal/installdefaults/installdefaults.go new file mode 100644 index 000000000..f694defa0 --- /dev/null +++ b/internal/installdefaults/installdefaults.go @@ -0,0 +1,29 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package installdefaults holds the default namespace and Service names +// that match the canonical install layout in manifests/ate-install/. +// Binaries use these as flag defaults; deployments that diverge from +// the canonical layout pass actual values via the corresponding flags. +package installdefaults + +const ( + // SystemNamespace is the namespace where substrate's control-plane + // components and the atelet DaemonSet run. + SystemNamespace = "ate-system" + // RouterServiceName is the Service name of atenet-router. + RouterServiceName = "atenet-router" + // DNSServiceName is the Service name of substrate's CoreDNS. + DNSServiceName = "dns" +) diff --git a/manifests/ate-install/ate-api-server.yaml b/manifests/ate-install/ate-api-server.yaml index a27c1d937..75cad754a 100644 --- a/manifests/ate-install/ate-api-server.yaml +++ b/manifests/ate-install/ate-api-server.yaml @@ -101,10 +101,6 @@ spec: - "--session-id-jwt-pool=/run/session-id-jwt-pool/pool.json" - "--session-id-ca-pool=/run/session-id-ca-pool/pool.json" - "--workerpool-ca-certs=/run/workerpool-ca-certs/trust-bundle.pem" - # Pass the chart-resolved namespace so the atelet pod informer - # watches the correct namespace when substrate is installed as a - # subchart (the binary default is "ate-system"). - - "--atelet-namespace=ate-system" env: - name: POD_NAME valueFrom: From 955a17e7957275a39bc1b0e237f12b0f85752ecb Mon Sep 17 00:00:00 2001 From: Jonathan Jamroga Date: Tue, 30 Jun 2026 09:30:28 -0400 Subject: [PATCH 7/9] review: derive atenet's system namespace from POD_NAMESPACE Same rationale as the prior atelet-namespace change: atenet, atenet-router, and substrate's CoreDNS live in a single namespace in every supported deployment topology, so a separate --system-namespace flag was dead weight. Resolve from the POD_NAMESPACE env var (Kubernetes' downward API) with installdefaults.SystemNamespace as the fallback for non-k8s runs. --router-service-name and --dns-service-name stay as flags because a subchart deployment renames those Services with a release prefix, and the binary can't derive that from pod metadata. --- charts/substrate/templates/atenet-dns.yaml | 11 ++++++++--- cmd/atenet/internal/dns.go | 14 +++++++++++--- manifests/ate-install/atenet-dns.yaml | 11 ++++++++--- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/charts/substrate/templates/atenet-dns.yaml b/charts/substrate/templates/atenet-dns.yaml index 1a7654d09..ace864570 100644 --- a/charts/substrate/templates/atenet-dns.yaml +++ b/charts/substrate/templates/atenet-dns.yaml @@ -150,11 +150,16 @@ spec: - "--log-level=debug" - "--interval=10s" - "--corefile-path=/etc/coredns/Corefile" - # Pass the chart-resolved names/namespace so the controller looks up - # the correct Services when substrate is installed as a subchart. - - "--system-namespace={{ .Release.Namespace }}" + # Pass the chart-resolved Service names so the controller looks up the + # correct objects when substrate is installed as a subchart. The + # system namespace is read from POD_NAMESPACE below. - "--router-service-name={{ include "substrate.fullname" (list "atenet-router" .) }}" - "--dns-service-name={{ include "substrate.fullname" (list "dns" .) }}" + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace volumeMounts: - name: dns-config-volume mountPath: /etc/coredns diff --git a/cmd/atenet/internal/dns.go b/cmd/atenet/internal/dns.go index 37afb7376..e56d674b3 100644 --- a/cmd/atenet/internal/dns.go +++ b/cmd/atenet/internal/dns.go @@ -38,7 +38,6 @@ type DnsConfig struct { Kubeconfig string ReconcileInterval time.Duration CorefilePath string - SystemNamespace string RouterServiceName string DNSServiceName string } @@ -90,12 +89,22 @@ func NewDnsCmd() *cobra.Command { return fmt.Errorf("failed to initialize cluster client: %w", err) } + // atenet shares the system namespace with atenet-router and + // substrate's CoreDNS in every supported deployment topology. + // POD_NAMESPACE comes from Kubernetes' downward API; the install + // fallback keeps non-k8s invocations (tests, local dev) working. + systemNamespace := os.Getenv("POD_NAMESPACE") + if systemNamespace == "" { + systemNamespace = installdefaults.SystemNamespace + } + slog.InfoContext(ctx, "Resolved system namespace", slog.String("system-namespace", systemNamespace)) + dnsController := &dns.Controller{ Client: k8sClient, Interval: cfg.ReconcileInterval, CorefilePath: cfg.CorefilePath, Reloader: dns.NewConfigReloader(), - SystemNamespace: cfg.SystemNamespace, + SystemNamespace: systemNamespace, RouterServiceName: cfg.RouterServiceName, DNSServiceName: cfg.DNSServiceName, } @@ -109,7 +118,6 @@ func NewDnsCmd() *cobra.Command { cmd.Flags().StringVar(&cfg.Kubeconfig, "kubeconfig", "", "Absolute path to the kubeconfig configuration file") cmd.Flags().DurationVar(&cfg.ReconcileInterval, "interval", 10*time.Second, "Interval for reconciling DNS configurations") cmd.Flags().StringVar(&cfg.CorefilePath, "corefile-path", "/etc/coredns/Corefile", "Path to the local Corefile configuration on shared volume") - cmd.Flags().StringVar(&cfg.SystemNamespace, "system-namespace", installdefaults.SystemNamespace, "Namespace where atenet-router and substrate's CoreDNS Service live. Override when the deployment uses a different namespace.") cmd.Flags().StringVar(&cfg.RouterServiceName, "router-service-name", installdefaults.RouterServiceName, "Service name of the atenet-router. Override when the deployment renames the Service.") cmd.Flags().StringVar(&cfg.DNSServiceName, "dns-service-name", installdefaults.DNSServiceName, "Service name of substrate's CoreDNS. Override when the deployment renames the Service.") diff --git a/manifests/ate-install/atenet-dns.yaml b/manifests/ate-install/atenet-dns.yaml index cf8b1d99c..48507c880 100644 --- a/manifests/ate-install/atenet-dns.yaml +++ b/manifests/ate-install/atenet-dns.yaml @@ -170,11 +170,16 @@ spec: - "--log-level=debug" - "--interval=10s" - "--corefile-path=/etc/coredns/Corefile" - # Pass the chart-resolved names/namespace so the controller looks up - # the correct Services when substrate is installed as a subchart. - - "--system-namespace=ate-system" + # Pass the chart-resolved Service names so the controller looks up the + # correct objects when substrate is installed as a subchart. The + # system namespace is read from POD_NAMESPACE below. - "--router-service-name=atenet-router" - "--dns-service-name=dns" + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace volumeMounts: - name: dns-config-volume mountPath: /etc/coredns From 4ee1e92b1ad17e723de45134de07f1db01aa7e9f Mon Sep 17 00:00:00 2001 From: Jonathan Jamroga Date: Tue, 30 Jun 2026 09:42:40 -0400 Subject: [PATCH 8/9] review: NamespaceFromPodEnv helper, APIServiceName const, ateclient hardcodes Three follow-ups from the self-review: - Extract the POD_NAMESPACE-with-SystemNamespace-fallback pattern into installdefaults.NamespaceFromPodEnv() so ateapi and atenet share a single implementation (also makes a third call site one line instead of four if anyone needs one). - Add installdefaults.PodNamespaceEnv ("POD_NAMESPACE") and APIServiceName ("api") so the constant set covers every name in the canonical install layout that's referenced by Go code. - Route internal/ateclient/builder.go's previously-hardcoded "ate-system" and "api" lookups through installdefaults, so kubectl-ate's port-forward no longer bypasses the new single source of truth. ate-controller (ServiceAccount), ate-api-server-deployment (Deployment), and "api.ate-system.svc" (JWT audience) are still hardcoded but their configurability needs a real flag/discovery story and is out of scope for this PR. --- cmd/ateapi/main.go | 10 +++------- cmd/atenet/internal/dns.go | 12 ++++-------- internal/ateclient/builder.go | 15 ++++++++------- internal/installdefaults/installdefaults.go | 18 ++++++++++++++++++ 4 files changed, 33 insertions(+), 22 deletions(-) diff --git a/cmd/ateapi/main.go b/cmd/ateapi/main.go index e0434e7e1..e10cebb0c 100644 --- a/cmd/ateapi/main.go +++ b/cmd/ateapi/main.go @@ -136,13 +136,9 @@ func main() { workerPoolLister := ateFactory.Api().V1alpha1().WorkerPools().Lister() sandboxConfigLister := ateFactory.Api().V1alpha1().SandboxConfigs().Lister() - // atelet shares ateapi's namespace in every supported deployment topology. - // POD_NAMESPACE comes from Kubernetes' downward API; the install fallback - // keeps non-k8s invocations (tests, local dev) working. - ateletNamespace := os.Getenv("POD_NAMESPACE") - if ateletNamespace == "" { - ateletNamespace = installdefaults.SystemNamespace - } + // atelet shares ateapi's namespace in every supported deployment topology, + // so we read it from Kubernetes' downward API rather than expose a flag. + ateletNamespace := installdefaults.NamespaceFromPodEnv() slog.InfoContext(ctx, "Resolved atelet namespace", slog.String("atelet-namespace", ateletNamespace)) workerPodInformerFactory, workerPodInformer := controlapi.WorkerPodInformer(clientset) diff --git a/cmd/atenet/internal/dns.go b/cmd/atenet/internal/dns.go index e56d674b3..5fdced39d 100644 --- a/cmd/atenet/internal/dns.go +++ b/cmd/atenet/internal/dns.go @@ -89,14 +89,10 @@ func NewDnsCmd() *cobra.Command { return fmt.Errorf("failed to initialize cluster client: %w", err) } - // atenet shares the system namespace with atenet-router and - // substrate's CoreDNS in every supported deployment topology. - // POD_NAMESPACE comes from Kubernetes' downward API; the install - // fallback keeps non-k8s invocations (tests, local dev) working. - systemNamespace := os.Getenv("POD_NAMESPACE") - if systemNamespace == "" { - systemNamespace = installdefaults.SystemNamespace - } + // atenet shares its namespace with atenet-router and substrate's + // CoreDNS in every supported deployment topology, so we read it + // from Kubernetes' downward API rather than expose a flag. + systemNamespace := installdefaults.NamespaceFromPodEnv() slog.InfoContext(ctx, "Resolved system namespace", slog.String("system-namespace", systemNamespace)) dnsController := &dns.Controller{ diff --git a/internal/ateclient/builder.go b/internal/ateclient/builder.go index 4a47ba29d..5d01822b5 100644 --- a/internal/ateclient/builder.go +++ b/internal/ateclient/builder.go @@ -24,6 +24,7 @@ import ( "strings" "sync" + "github.com/agent-substrate/substrate/internal/installdefaults" "github.com/agent-substrate/substrate/pkg/proto/ateapipb" "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "go.opentelemetry.io/otel" @@ -132,22 +133,22 @@ func dialPortForward(ctx context.Context, kubeconfigPath, k8sContext string, tra return nil, fmt.Errorf("failed to create k8s client: %w", err) } - // Look up the 'api' Service to dynamically get its pod selector - svc, err := clientset.CoreV1().Services("ate-system").Get(ctx, "api", metav1.GetOptions{}) + // Look up the ateapi Service to dynamically get its pod selector. + svc, err := clientset.CoreV1().Services(installdefaults.SystemNamespace).Get(ctx, installdefaults.APIServiceName, metav1.GetOptions{}) if err != nil { - return nil, fmt.Errorf("failed to get api service: %w", err) + return nil, fmt.Errorf("failed to get ateapi service %s/%s: %w", installdefaults.SystemNamespace, installdefaults.APIServiceName, err) } selector := labels.SelectorFromSet(svc.Spec.Selector).String() // Find the pods backing the service - pods, err := clientset.CoreV1().Pods("ate-system").List(ctx, metav1.ListOptions{ + pods, err := clientset.CoreV1().Pods(installdefaults.SystemNamespace).List(ctx, metav1.ListOptions{ LabelSelector: selector, }) if err != nil { return nil, fmt.Errorf("failed to list ateapi pods: %w", err) } if len(pods.Items) == 0 { - return nil, fmt.Errorf("no ate-api-server pods found in ate-system namespace") + return nil, fmt.Errorf("no ate-api-server pods found in %q namespace", installdefaults.SystemNamespace) } targetPod := pods.Items[0] @@ -254,7 +255,7 @@ func jwtDialOptions(ctx context.Context, clientset *kubernetes.Clientset) ([]grp ExpirationSeconds: &expirationSeconds, }, } - token, err := clientset.CoreV1().ServiceAccounts("ate-system").CreateToken(ctx, "ate-client", tokenRequest, metav1.CreateOptions{}) + token, err := clientset.CoreV1().ServiceAccounts(installdefaults.SystemNamespace).CreateToken(ctx, "ate-client", tokenRequest, metav1.CreateOptions{}) if err != nil { return nil, fmt.Errorf("failed to request ateapi bearer token: %w", err) } @@ -267,7 +268,7 @@ func jwtDialOptions(ctx context.Context, clientset *kubernetes.Clientset) ([]grp func isJWTMode(ctx context.Context, clientset *kubernetes.Clientset) (bool, error) { // TODO: Replace deployment introspection with an explicit client-readable // config file once ateapi auth mode is part of install/runtime config. - deployment, err := clientset.AppsV1().Deployments("ate-system").Get(ctx, "ate-api-server-deployment", metav1.GetOptions{}) + deployment, err := clientset.AppsV1().Deployments(installdefaults.SystemNamespace).Get(ctx, "ate-api-server-deployment", metav1.GetOptions{}) if err != nil { return false, fmt.Errorf("failed to get ate-api-server deployment: %w", err) } diff --git a/internal/installdefaults/installdefaults.go b/internal/installdefaults/installdefaults.go index f694defa0..8f47d84f0 100644 --- a/internal/installdefaults/installdefaults.go +++ b/internal/installdefaults/installdefaults.go @@ -18,12 +18,30 @@ // the canonical layout pass actual values via the corresponding flags. package installdefaults +import "os" + const ( // SystemNamespace is the namespace where substrate's control-plane // components and the atelet DaemonSet run. SystemNamespace = "ate-system" + // APIServiceName is the Service name of ate-api-server. + APIServiceName = "api" // RouterServiceName is the Service name of atenet-router. RouterServiceName = "atenet-router" // DNSServiceName is the Service name of substrate's CoreDNS. DNSServiceName = "dns" + + // PodNamespaceEnv is the conventional env var name for the namespace + // a pod is running in, exposed via Kubernetes' downward API. + PodNamespaceEnv = "POD_NAMESPACE" ) + +// NamespaceFromPodEnv returns the namespace from the PodNamespaceEnv env +// var when set (typically populated via Kubernetes' downward API), and +// falls back to SystemNamespace for non-k8s invocations (tests, local dev). +func NamespaceFromPodEnv() string { + if ns := os.Getenv(PodNamespaceEnv); ns != "" { + return ns + } + return SystemNamespace +} From 3e2403ba147c2d26d5da3217d47ea66527330b28 Mon Sep 17 00:00:00 2001 From: Jonathan Jamroga Date: Wed, 1 Jul 2026 12:20:04 -0400 Subject: [PATCH 9/9] chart: render ate-client ServiceAccount in every mode The JWT install overlay (manifests/ate-install/jwt) references ate-client.yaml as a top-level resource, but the chart previously guarded the SA behind {{ if eq .Values.auth.mode "jwt" }} so render-manifests.sh (mtls) never emitted it. That divergence broke verify-helm-template after merging the upstream JWT fix that added a hand-maintained manifests/ate-install/ate-client.yaml. The SA is harmless in mtls installs (unused), so render it unconditionally so the chart is the single source of truth. --- charts/substrate/templates/ate-client.yaml | 2 -- manifests/ate-install/ate-client.yaml | 23 ++++++++++++---------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/charts/substrate/templates/ate-client.yaml b/charts/substrate/templates/ate-client.yaml index 3de466a04..dfd2fdab6 100644 --- a/charts/substrate/templates/ate-client.yaml +++ b/charts/substrate/templates/ate-client.yaml @@ -14,7 +14,6 @@ See the License for the specific language governing permissions and limitations under the License. */}} -{{- if eq .Values.auth.mode "jwt" }} apiVersion: v1 kind: ServiceAccount metadata: @@ -22,4 +21,3 @@ metadata: namespace: {{ .Release.Namespace }} labels: apps: ate-client -{{- end }} diff --git a/manifests/ate-install/ate-client.yaml b/manifests/ate-install/ate-client.yaml index cc6ef76c0..e59bd53f8 100644 --- a/manifests/ate-install/ate-client.yaml +++ b/manifests/ate-install/ate-client.yaml @@ -1,16 +1,19 @@ -# Copyright 2026 Google LLC +# Copyright 2026 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. apiVersion: v1 kind: ServiceAccount