diff --git a/.changelog/23088.txt b/.changelog/23088.txt new file mode 100644 index 000000000000..614d0e722769 --- /dev/null +++ b/.changelog/23088.txt @@ -0,0 +1,3 @@ +```release-note:bug +agent: fix bug prevents default TCP checks from being re-added on service reload when they were explicitly disabled or when custom checks were specified during initial registration. +``` \ No newline at end of file diff --git a/agent/agent.go b/agent/agent.go index c177ac3f13c4..7d4bbcc18518 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -2159,6 +2159,10 @@ type persistedService struct { // to exclude it from API output, but we need it to properly deregister // persisted sidecars. LocallyRegisteredAsSidecar bool `json:",omitempty"` + // we need it tracks whether default sidecar checks (TCP + alias) were + // disabled at registration time. Persisted to maintain consistent check configuration + // across agent restarts/reloads. Excluded from API responses but required for state restoration. + DisableSidecarDefaultChecks bool `json:",omitempty"` } func (a *Agent) makeServiceFilePath(svcID structs.ServiceID) string { @@ -2166,15 +2170,16 @@ func (a *Agent) makeServiceFilePath(svcID structs.ServiceID) string { } // persistService saves a service definition to a JSON file in the data dir -func (a *Agent) persistService(service *structs.NodeService, source configSource) error { +func (a *Agent) persistService(service *structs.NodeService, source configSource, disableSidecarDefaultChecks bool) error { svcID := service.CompoundServiceID() svcPath := a.makeServiceFilePath(svcID) wrapped := persistedService{ - Token: a.State.ServiceToken(service.CompoundServiceID()), - Service: service, - Source: source.String(), - LocallyRegisteredAsSidecar: service.LocallyRegisteredAsSidecar, + Token: a.State.ServiceToken(service.CompoundServiceID()), + Service: service, + Source: source.String(), + LocallyRegisteredAsSidecar: service.LocallyRegisteredAsSidecar, + DisableSidecarDefaultChecks: disableSidecarDefaultChecks, } encoded, err := json.Marshal(wrapped) if err != nil { @@ -2374,8 +2379,10 @@ func (a *Agent) addServiceLocked(req addServiceLockedRequest) error { req.Service.Port = port } // Setup default check if none given. - if len(req.chkTypes) < 1 { + if len(req.chkTypes) < 1 && !req.disableSidecarDefaultChecks { req.chkTypes = sidecarDefaultChecks(req.Service.ID, req.Service.Address, req.Service.Proxy.LocalServiceAddress, req.Service.Port) + } else { + req.disableSidecarDefaultChecks = true } } @@ -2416,12 +2423,13 @@ type addServiceLockedRequest struct { // AddServiceRequest contains the fields used to register a service on the local // agent using Agent.AddService. type AddServiceRequest struct { - Service *structs.NodeService - chkTypes []*structs.CheckType - persist bool - token string - replaceExistingChecks bool - Source configSource + Service *structs.NodeService + chkTypes []*structs.CheckType + persist bool + token string + replaceExistingChecks bool + Source configSource + disableSidecarDefaultChecks bool } type addServiceInternalRequest struct { @@ -2614,7 +2622,7 @@ func (a *Agent) addServiceInternal(req addServiceInternalRequest) error { req.persistService = service } - if err := a.persistService(req.persistService, source); err != nil { + if err := a.persistService(req.persistService, source, req.disableSidecarDefaultChecks); err != nil { a.cleanupRegistration(cleanupServices, cleanupChecks) return err } @@ -3882,12 +3890,13 @@ func (a *Agent) loadServices(conf *config.RuntimeConfig, snap map[structs.CheckI ) err = a.addServiceLocked(addServiceLockedRequest{ AddServiceRequest: AddServiceRequest{ - Service: p.Service, - chkTypes: nil, - persist: false, // don't rewrite the file with the same data we just read - token: p.Token, - replaceExistingChecks: false, // do default behavior - Source: source, + Service: p.Service, + chkTypes: nil, + persist: false, // don't rewrite the file with the same data we just read + token: p.Token, + replaceExistingChecks: false, // do default behavior + Source: source, + disableSidecarDefaultChecks: p.DisableSidecarDefaultChecks, }, serviceDefaults: serviceDefaultsFromStruct(persistedServiceConfigs[serviceID]), persistServiceConfig: false, // don't rewrite the file with the same data we just read diff --git a/agent/agent_test.go b/agent/agent_test.go index cf5abf85a792..801cccd46e68 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -2469,6 +2469,105 @@ func testAgent_PersistService(t *testing.T, extraHCL string) { } } +func TestAgent_Reload_HonorsDisableDefaultSidecarChecks(t *testing.T) { + if testing.Short() { + t.Skip("too slow for testing.Short") + } + + t.Run("no custom checks - should add default TCP and alias checks", func(t *testing.T) { + t.Parallel() + testAgent_Reload_HonorsDisableDefaultSidecarChecks(t, nil, false) + }) + + t.Run("with custom alias check - should not add default checks", func(t *testing.T) { + t.Parallel() + testAgent_Reload_HonorsDisableDefaultSidecarChecks(t, []*structs.CheckType{ + { + Name: "Custom Connect Sidecar Aliasing redis", + AliasService: "redis", + }, + }, true) + }) +} + +func testAgent_Reload_HonorsDisableDefaultSidecarChecks(t *testing.T, checks []*structs.CheckType, disable_default_tcp_check bool) { + t.Helper() + + cfg := ` + server = false + bootstrap = false + ` + a := StartTestAgent(t, TestAgent{HCL: cfg}) + defer a.Shutdown() + + svc := &structs.NodeService{ + Kind: "connect-proxy", + ID: "redis-proxy", + Service: "redis", + Tags: []string{"foo"}, + Port: 8000, + LocallyRegisteredAsSidecar: true, + } + + file := filepath.Join(a.Config.DataDir, servicesDir, structs.NewServiceID(svc.ID, nil).StringHashSHA256()) + + if err := a.addServiceFromSource(svc, checks, true, "mytoken", ConfigSourceLocal); err != nil { + t.Fatalf("err: %v", err) + } + + expected, err := json.Marshal(persistedService{ + Token: "mytoken", + Service: svc, + Source: "local", + LocallyRegisteredAsSidecar: true, + DisableSidecarDefaultChecks: disable_default_tcp_check, + }) + if err != nil { + t.Fatalf("err: %s", err) + } + content, err := os.ReadFile(file) + if err != nil { + t.Fatalf("err: %s", err) + } + if !bytes.Equal(expected, content) { + t.Fatalf("bad: %s %s", string(expected), string(content)) + } + + // Shutdown the agent + a.Shutdown() + + // Restart the agent + a2 := StartTestAgent(t, TestAgent{HCL: cfg, DataDir: a.DataDir}) + defer a2.Shutdown() + + restored := a2.State.ServiceState(structs.NewServiceID(svc.ID, nil)) + if restored == nil { + t.Fatalf("service %q missing", svc.ID) + } + // check service does not have default sidecar TCP checks after restart + checkStateSnapshot := a2.State.AllChecks() + if disable_default_tcp_check { + require.Len(t, checkStateSnapshot, 1) + } else { + require.Len(t, checkStateSnapshot, 2) + } + + // Now reload the config and ensure the flag is still honored + a2.reloadConfig(false) + + reloadRestored := a2.State.ServiceState(structs.NewServiceID(svc.ID, nil)) + if reloadRestored == nil { + t.Fatalf("service %q missing", svc.ID) + } + // check service does not have default sidecar TCP checks after config reload + checkStateSnapshotAfterReload := a2.State.AllChecks() + if disable_default_tcp_check { + require.Len(t, checkStateSnapshotAfterReload, 1) + } else { + require.Len(t, checkStateSnapshotAfterReload, 2) + } +} + func TestAgent_persistedService_compat(t *testing.T) { if testing.Short() { t.Skip("too slow for testing.Short")