diff --git a/cgroups.go b/cgroups.go index 5a97bd3..12bf74e 100644 --- a/cgroups.go +++ b/cgroups.go @@ -44,6 +44,10 @@ type Manager interface { // GetStats returns cgroups statistics. GetStats() (*Stats, error) + // Stats returns statistics for specified controllers. + // If opts is nil or opts.Controllers is 0, all controllers are queried. + Stats(opts *StatsOptions) (*Stats, error) + // Freeze sets the freezer cgroup to the specified state. Freeze(state FreezerState) error diff --git a/fs/blkio.go b/fs/blkio.go index f3c4c5c..e5b2d31 100644 --- a/fs/blkio.go +++ b/fs/blkio.go @@ -19,6 +19,11 @@ func (s *BlkioGroup) Name() string { return "blkio" } +// ID returns the controller ID for blkio subsystem. +func (s *BlkioGroup) ID() cgroups.Controller { + return cgroups.IO +} + func (s *BlkioGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/fs/cpu.go b/fs/cpu.go index 3e05788..84f9f74 100644 --- a/fs/cpu.go +++ b/fs/cpu.go @@ -18,6 +18,11 @@ func (s *CpuGroup) Name() string { return "cpu" } +// ID returns the controller ID for CPU subsystem. +func (s *CpuGroup) ID() cgroups.Controller { + return cgroups.CPU +} + func (s *CpuGroup) Apply(path string, r *cgroups.Resources, pid int) error { if err := os.MkdirAll(path, 0o755); err != nil { return err diff --git a/fs/cpuacct.go b/fs/cpuacct.go index 5930dfc..5a1be75 100644 --- a/fs/cpuacct.go +++ b/fs/cpuacct.go @@ -26,6 +26,11 @@ func (s *CpuacctGroup) Name() string { return "cpuacct" } +// ID returns the controller ID for cpuacct subsystem. +func (s *CpuacctGroup) ID() cgroups.Controller { + return cgroups.CPU +} + func (s *CpuacctGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/fs/cpuset.go b/fs/cpuset.go index f3f96df..327d5e7 100644 --- a/fs/cpuset.go +++ b/fs/cpuset.go @@ -54,6 +54,11 @@ func (s *CpusetGroup) Name() string { return "cpuset" } +// ID returns the controller ID for cpuset subsystem. +func (s *CpusetGroup) ID() cgroups.Controller { + return cgroups.CPUSet +} + func (s *CpusetGroup) Apply(path string, r *cgroups.Resources, pid int) error { return s.ApplyDir(path, r, pid) } diff --git a/fs/devices.go b/fs/devices.go index 26483ec..5eee641 100644 --- a/fs/devices.go +++ b/fs/devices.go @@ -10,6 +10,12 @@ func (s *DevicesGroup) Name() string { return "devices" } +// ID returns the controller ID for devices subsystem. +// Returns 0 as devices is not a cgroups.Controller. +func (s *DevicesGroup) ID() cgroups.Controller { + return 0 +} + func (s *DevicesGroup) Apply(path string, r *cgroups.Resources, pid int) error { if r.SkipDevices { return nil diff --git a/fs/freezer.go b/fs/freezer.go index fe0f0dd..3edc7a8 100644 --- a/fs/freezer.go +++ b/fs/freezer.go @@ -18,6 +18,12 @@ func (s *FreezerGroup) Name() string { return "freezer" } +// ID returns the controller ID for freezer subsystem. +// Returns 0 as freezer is not a cgroups.Controller. +func (s *FreezerGroup) ID() cgroups.Controller { + return 0 +} + func (s *FreezerGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/fs/fs.go b/fs/fs.go index 6259311..ca0a65e 100644 --- a/fs/fs.go +++ b/fs/fs.go @@ -29,7 +29,7 @@ var subsystems = []subsystem{ &FreezerGroup{}, &RdmaGroup{}, &NameGroup{GroupName: "name=systemd", Join: true}, - &NameGroup{GroupName: "misc", Join: true}, + &NameGroup{GroupName: "misc", Join: true, GroupID: cgroups.Misc}, } var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") @@ -45,6 +45,8 @@ func init() { type subsystem interface { // Name returns the name of the subsystem. Name() string + // ID returns the controller ID for filtering. + ID() cgroups.Controller // GetStats fills in the stats for the subsystem. GetStats(path string, stats *cgroups.Stats) error // Apply creates and joins a cgroup, adding pid into it. Some @@ -181,14 +183,33 @@ func (m *Manager) Path(subsys string) string { } func (m *Manager) GetStats() (*cgroups.Stats, error) { + return m.Stats(nil) +} + +// Stats returns cgroup statistics for the specified controllers. +// If opts is nil or opts.Controllers is zero, statistics for all controllers are returned. +func (m *Manager) Stats(opts *cgroups.StatsOptions) (*cgroups.Stats, error) { m.mu.Lock() defer m.mu.Unlock() + + // Default: query all controllers + controllers := cgroups.AllControllers + if opts != nil && opts.Controllers != 0 { + controllers = opts.Controllers + } + stats := cgroups.NewStats() for _, sys := range subsystems { path := m.paths[sys.Name()] if path == "" { continue } + + // Filter based on controller type + if sys.ID()&controllers == 0 { + continue + } + if err := sys.GetStats(path, stats); err != nil { return nil, err } diff --git a/fs/fs_test.go b/fs/fs_test.go index 331e9f1..a5a5fa0 100644 --- a/fs/fs_test.go +++ b/fs/fs_test.go @@ -6,6 +6,215 @@ import ( "github.com/opencontainers/cgroups" ) +// pointerTo returns a pointer to the given controller value. +func pointerTo(c cgroups.Controller) *cgroups.Controller { + return &c +} + +func TestStats(t *testing.T) { + testCases := []struct { + name string + controller *cgroups.Controller + subsystems map[string]map[string]string // subsystem -> file contents + validate func(*testing.T, *cgroups.Stats) + }{ + { + name: "CPU stats", + controller: pointerTo(cgroups.CPU), + subsystems: map[string]map[string]string{ + "cpu": { + "cpu.stat": "nr_periods 2000\nnr_throttled 200\nthrottled_time 18446744073709551615\n", + }, + "cpuacct": { + "cpuacct.usage": cpuAcctUsageContents, + "cpuacct.usage_percpu": cpuAcctUsagePerCPUContents, + "cpuacct.stat": cpuAcctStatContents, + }, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify throttling data from cpu.stat + expectedThrottling := cgroups.ThrottlingData{ + Periods: 2000, + ThrottledPeriods: 200, + ThrottledTime: 18446744073709551615, + } + expectThrottlingDataEquals(t, expectedThrottling, stats.CpuStats.ThrottlingData) + + // Verify total usage from cpuacct.usage + if stats.CpuStats.CpuUsage.TotalUsage != 12262454190222160 { + t.Errorf("expected TotalUsage 12262454190222160, got %d", stats.CpuStats.CpuUsage.TotalUsage) + } + }, + }, + { + name: "Memory stats", + controller: pointerTo(cgroups.Memory), + subsystems: map[string]map[string]string{ + "memory": { + "memory.stat": memoryStatContents, + "memory.usage_in_bytes": "2048", + "memory.max_usage_in_bytes": "4096", + "memory.failcnt": "100", + "memory.limit_in_bytes": "8192", + "memory.use_hierarchy": "1", + }, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + expected := cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192} + expectMemoryDataEquals(t, expected, stats.MemoryStats.Usage) + }, + }, + { + name: "Pids stats", + controller: pointerTo(cgroups.Pids), + subsystems: map[string]map[string]string{ + "pids": { + "pids.current": "1337", + "pids.max": "1024", + }, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + if stats.PidsStats.Current != 1337 { + t.Errorf("expected Current 1337, got %d", stats.PidsStats.Current) + } + if stats.PidsStats.Limit != 1024 { + t.Errorf("expected Limit 1024, got %d", stats.PidsStats.Limit) + } + }, + }, + { + name: "IO stats", + controller: pointerTo(cgroups.IO), + subsystems: map[string]map[string]string{ + "blkio": blkioBFQStatsTestFiles, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify we have entries + if len(stats.BlkioStats.IoServiceBytesRecursive) == 0 { + t.Error("expected IoServiceBytesRecursive to have entries") + } + if len(stats.BlkioStats.IoServicedRecursive) == 0 { + t.Error("expected IoServicedRecursive to have entries") + } + }, + }, + { + name: "Multiple controllers - CPU+Pids", + controller: pointerTo(cgroups.CPU | cgroups.Pids), + subsystems: map[string]map[string]string{ + "cpu": { + "cpu.stat": "nr_periods 100\nnr_throttled 10\nthrottled_time 5000\n", + }, + "pids": { + "pids.current": "42", + "pids.max": "1000", + }, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify both are populated + if stats.CpuStats.ThrottlingData.Periods != 100 { + t.Errorf("expected Periods 100, got %d", stats.CpuStats.ThrottlingData.Periods) + } + if stats.PidsStats.Current != 42 { + t.Errorf("expected Current 42, got %d", stats.PidsStats.Current) + } + if stats.PidsStats.Limit != 1000 { + t.Errorf("expected Limit 1000, got %d", stats.PidsStats.Limit) + } + }, + }, + { + name: "All controllers with nil options", + controller: nil, // nil means all controllers (default behavior) + subsystems: map[string]map[string]string{ + "cpu": { + "cpu.stat": "nr_periods 2000\nnr_throttled 200\nthrottled_time 18446744073709551615\n", + }, + "cpuacct": { + "cpuacct.usage": cpuAcctUsageContents, + "cpuacct.usage_percpu": cpuAcctUsagePerCPUContents, + "cpuacct.stat": cpuAcctStatContents, + }, + "memory": { + "memory.stat": memoryStatContents, + "memory.usage_in_bytes": "2048", + "memory.max_usage_in_bytes": "4096", + "memory.failcnt": "100", + "memory.limit_in_bytes": "8192", + "memory.use_hierarchy": "1", + }, + "pids": { + "pids.current": "1337", + "pids.max": "1024", + }, + "blkio": blkioBFQStatsTestFiles, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify CPU stats + expectedThrottling := cgroups.ThrottlingData{ + Periods: 2000, + ThrottledPeriods: 200, + ThrottledTime: 18446744073709551615, + } + expectThrottlingDataEquals(t, expectedThrottling, stats.CpuStats.ThrottlingData) + if stats.CpuStats.CpuUsage.TotalUsage != 12262454190222160 { + t.Errorf("expected TotalUsage 12262454190222160, got %d", stats.CpuStats.CpuUsage.TotalUsage) + } + + // Verify Memory stats + expectedMemory := cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192} + expectMemoryDataEquals(t, expectedMemory, stats.MemoryStats.Usage) + + // Verify Pids stats + if stats.PidsStats.Current != 1337 { + t.Errorf("expected Current 1337, got %d", stats.PidsStats.Current) + } + if stats.PidsStats.Limit != 1024 { + t.Errorf("expected Limit 1024, got %d", stats.PidsStats.Limit) + } + + // Verify IO stats + if len(stats.BlkioStats.IoServiceBytesRecursive) == 0 { + t.Error("expected IoServiceBytesRecursive to have entries") + } + if len(stats.BlkioStats.IoServicedRecursive) == 0 { + t.Error("expected IoServicedRecursive to have entries") + } + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create temp directories for each subsystem and write files + paths := make(map[string]string) + for subsystem, files := range tc.subsystems { + path := tempDir(t, subsystem) + writeFileContents(t, path, files) + paths[subsystem] = path + } + m := &Manager{ + cgroups: &cgroups.Cgroup{Resources: &cgroups.Resources{}}, + paths: paths, + } + + var stats *cgroups.Stats + var err error + if tc.controller != nil { + stats, err = m.Stats(&cgroups.StatsOptions{Controllers: *tc.controller}) + } else { + stats, err = m.Stats(nil) + } + if err != nil { + t.Fatal(err) + } + + // Validate the results + tc.validate(t, stats) + }) + } +} + func BenchmarkGetStats(b *testing.B) { if cgroups.IsCgroup2UnifiedMode() { b.Skip("cgroup v2 is not supported") diff --git a/fs/hugetlb.go b/fs/hugetlb.go index 698fd69..39b8f80 100644 --- a/fs/hugetlb.go +++ b/fs/hugetlb.go @@ -15,6 +15,11 @@ func (s *HugetlbGroup) Name() string { return "hugetlb" } +// ID returns the controller ID for hugetlb subsystem. +func (s *HugetlbGroup) ID() cgroups.Controller { + return cgroups.HugeTLB +} + func (s *HugetlbGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/fs/memory.go b/fs/memory.go index d92f232..0250819 100644 --- a/fs/memory.go +++ b/fs/memory.go @@ -29,6 +29,11 @@ func (s *MemoryGroup) Name() string { return "memory" } +// ID returns the controller ID for memory subsystem. +func (s *MemoryGroup) ID() cgroups.Controller { + return cgroups.Memory +} + func (s *MemoryGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/fs/name.go b/fs/name.go index 2864351..47c6022 100644 --- a/fs/name.go +++ b/fs/name.go @@ -7,12 +7,18 @@ import ( type NameGroup struct { GroupName string Join bool + GroupID cgroups.Controller } func (s *NameGroup) Name() string { return s.GroupName } +// ID returns the controller ID for named subsystem. +func (s *NameGroup) ID() cgroups.Controller { + return s.GroupID +} + func (s *NameGroup) Apply(path string, _ *cgroups.Resources, pid int) error { if s.Join { // Ignore errors if the named cgroup does not exist. diff --git a/fs/net_cls.go b/fs/net_cls.go index 2bd6c5a..c61bded 100644 --- a/fs/net_cls.go +++ b/fs/net_cls.go @@ -12,6 +12,12 @@ func (s *NetClsGroup) Name() string { return "net_cls" } +// ID returns the controller ID for net_cls subsystem. +// Returns 0 as net_cls is not a cgroups.Controller. +func (s *NetClsGroup) ID() cgroups.Controller { + return 0 +} + func (s *NetClsGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/fs/net_prio.go b/fs/net_prio.go index b51682b..228eb34 100644 --- a/fs/net_prio.go +++ b/fs/net_prio.go @@ -10,6 +10,12 @@ func (s *NetPrioGroup) Name() string { return "net_prio" } +// ID returns the controller ID for net_prio subsystem. +// Returns 0 as net_prio is not a cgroups.Controller. +func (s *NetPrioGroup) ID() cgroups.Controller { + return 0 +} + func (s *NetPrioGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/fs/perf_event.go b/fs/perf_event.go index 929c412..ba6f448 100644 --- a/fs/perf_event.go +++ b/fs/perf_event.go @@ -10,6 +10,12 @@ func (s *PerfEventGroup) Name() string { return "perf_event" } +// ID returns the controller ID for perf_event subsystem. +// Returns 0 as perf_event is not a cgroups.Controller. +func (s *PerfEventGroup) ID() cgroups.Controller { + return 0 +} + func (s *PerfEventGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/fs/pids.go b/fs/pids.go index 36bd339..fa5b1c8 100644 --- a/fs/pids.go +++ b/fs/pids.go @@ -14,6 +14,11 @@ func (s *PidsGroup) Name() string { return "pids" } +// ID returns the controller ID for pids subsystem. +func (s *PidsGroup) ID() cgroups.Controller { + return cgroups.Pids +} + func (s *PidsGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/fs/rdma.go b/fs/rdma.go index 4b17536..8cc436c 100644 --- a/fs/rdma.go +++ b/fs/rdma.go @@ -11,6 +11,11 @@ func (s *RdmaGroup) Name() string { return "rdma" } +// ID returns the controller ID for rdma subsystem. +func (s *RdmaGroup) ID() cgroups.Controller { + return cgroups.RDMA +} + func (s *RdmaGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/fs2/fs2.go b/fs2/fs2.go index 356d087..d129a92 100644 --- a/fs2/fs2.go +++ b/fs2/fs2.go @@ -105,50 +105,86 @@ func (m *Manager) GetAllPids() ([]int, error) { } func (m *Manager) GetStats() (*cgroups.Stats, error) { - var errs []error + return m.Stats(nil) +} + +// Stats returns cgroup statistics for the specified controllers. +// If opts is nil or opts.Controllers is zero, statistics for all controllers are returned. +func (m *Manager) Stats(opts *cgroups.StatsOptions) (*cgroups.Stats, error) { + // Default: query all controllers + controllers := cgroups.AllControllers + if opts != nil && opts.Controllers != 0 { + controllers = opts.Controllers + } + var errs []error + var err error st := cgroups.NewStats() // pids (since kernel 4.5) - if err := statPids(m.dirPath, st); err != nil { - errs = append(errs, err) + if controllers&cgroups.Pids != 0 { + if err = statPids(m.dirPath, st); err != nil { + errs = append(errs, err) + } } + // memory (since kernel 4.5) - if err := statMemory(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) + if controllers&cgroups.Memory != 0 { + if err = statMemory(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } + + if st.MemoryStats.PSI, err = statPSI(m.dirPath, "memory.pressure"); err != nil { + errs = append(errs, err) + } } + // io (since kernel 4.5) - if err := statIo(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) + if controllers&cgroups.IO != 0 { + if err = statIo(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } + + if st.BlkioStats.PSI, err = statPSI(m.dirPath, "io.pressure"); err != nil { + errs = append(errs, err) + } } + // cpu (since kernel 4.15) // Note cpu.stat is available even if the controller is not enabled. - if err := statCpu(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) - } - // PSI (since kernel 4.20). - var err error - if st.CpuStats.PSI, err = statPSI(m.dirPath, "cpu.pressure"); err != nil { - errs = append(errs, err) - } - if st.MemoryStats.PSI, err = statPSI(m.dirPath, "memory.pressure"); err != nil { - errs = append(errs, err) - } - if st.BlkioStats.PSI, err = statPSI(m.dirPath, "io.pressure"); err != nil { - errs = append(errs, err) + if controllers&cgroups.CPU != 0 { + if err = statCpu(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } + + // PSI (since kernel 4.20) + if st.CpuStats.PSI, err = statPSI(m.dirPath, "cpu.pressure"); err != nil { + errs = append(errs, err) + } + } + // hugetlb (since kernel 5.6) - if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) + if controllers&cgroups.HugeTLB != 0 { + if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } } + // rdma (since kernel 4.11) - if err := fscommon.RdmaGetStats(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) + if controllers&cgroups.RDMA != 0 { + if err := fscommon.RdmaGetStats(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } } + // misc (since kernel 5.13) - if err := statMisc(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) + if controllers&cgroups.Misc != 0 { + if err := statMisc(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } } + if len(errs) > 0 && !m.config.Rootless { return st, fmt.Errorf("error while statting cgroup v2: %+v", errs) } diff --git a/fs2/fs2_test.go b/fs2/fs2_test.go new file mode 100644 index 0000000..736dc86 --- /dev/null +++ b/fs2/fs2_test.go @@ -0,0 +1,306 @@ +package fs2 + +import ( + "os" + "path/filepath" + "testing" + + "github.com/opencontainers/cgroups" +) + +const ( + exampleCPUStatData = `usage_usec 1000000 +user_usec 600000 +system_usec 400000 +nr_periods 100 +nr_throttled 10 +throttled_usec 50000 +nr_bursts 5 +burst_usec 10000` + + exampleCPUStatDataShort = `usage_usec 1000000 +user_usec 600000 +system_usec 400000` + + exampleMemoryCurrent = "4194304" + exampleMemoryMax = "max" + + examplePSIData = `some avg10=1.00 avg60=2.00 avg300=3.00 total=100000 +full avg10=0.50 avg60=1.00 avg300=1.50 total=50000` + + exampleRdmaCurrent = `mlx5_0 hca_handle=10 hca_object=20` +) + +func pointerTo(c cgroups.Controller) *cgroups.Controller { + return &c +} + +func TestStats(t *testing.T) { + // We're using a fake cgroupfs. + cgroups.TestMode = true + + testCases := []struct { + name string + controller *cgroups.Controller + setupFiles map[string]string + validate func(*testing.T, *cgroups.Stats) + }{ + { + name: "CPU stats", + controller: pointerTo(cgroups.CPU), + setupFiles: map[string]string{ + "cpu.stat": exampleCPUStatData, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify CPU stats populated correctly (values are converted from usec to nsec) + if stats.CpuStats.CpuUsage.TotalUsage != 1000000000 { + t.Errorf("expected TotalUsage 1000000000, got %d", stats.CpuStats.CpuUsage.TotalUsage) + } + if stats.CpuStats.CpuUsage.UsageInUsermode != 600000000 { + t.Errorf("expected UsageInUsermode 600000000, got %d", stats.CpuStats.CpuUsage.UsageInUsermode) + } + if stats.CpuStats.CpuUsage.UsageInKernelmode != 400000000 { + t.Errorf("expected UsageInKernelmode 400000000, got %d", stats.CpuStats.CpuUsage.UsageInKernelmode) + } + if stats.CpuStats.ThrottlingData.Periods != 100 { + t.Errorf("expected Periods 100, got %d", stats.CpuStats.ThrottlingData.Periods) + } + if stats.CpuStats.ThrottlingData.ThrottledPeriods != 10 { + t.Errorf("expected ThrottledPeriods 10, got %d", stats.CpuStats.ThrottlingData.ThrottledPeriods) + } + }, + }, + { + name: "CPU stats with PSI", + controller: pointerTo(cgroups.CPU), + setupFiles: map[string]string{ + "cpu.stat": exampleCPUStatData, + "cpu.pressure": examplePSIData, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify PSI data is populated + if stats.CpuStats.PSI == nil { + t.Fatal("expected PSI to be populated") + } + if stats.CpuStats.PSI.Some.Avg10 != 1.00 { + t.Errorf("expected PSI.Some.Avg10 1.00, got %f", stats.CpuStats.PSI.Some.Avg10) + } + if stats.CpuStats.PSI.Full.Total != 50000 { + t.Errorf("expected PSI.Full.Total 50000, got %d", stats.CpuStats.PSI.Full.Total) + } + }, + }, + { + name: "Memory stats", + controller: pointerTo(cgroups.Memory), + setupFiles: map[string]string{ + "memory.stat": exampleMemoryStatData, + "memory.current": exampleMemoryCurrent, + "memory.max": exampleMemoryMax, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify memory stats + if stats.MemoryStats.Usage.Usage != 4194304 { + t.Errorf("expected Usage 4194304, got %d", stats.MemoryStats.Usage.Usage) + } + // Cache comes from "file" field in memory.stat (6502666240 from exampleMemoryStatData) + if stats.MemoryStats.Cache != 6502666240 { + t.Errorf("expected Cache 6502666240, got %d", stats.MemoryStats.Cache) + } + }, + }, + { + name: "Memory stats with PSI", + controller: pointerTo(cgroups.Memory), + setupFiles: map[string]string{ + "memory.stat": exampleMemoryStatData, + "memory.current": exampleMemoryCurrent, + "memory.max": exampleMemoryMax, + "memory.pressure": examplePSIData, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify PSI data is populated + if stats.MemoryStats.PSI == nil { + t.Fatal("expected PSI to be populated") + } + if stats.MemoryStats.PSI.Some.Avg60 != 2.00 { + t.Errorf("expected PSI.Some.Avg60 2.00, got %f", stats.MemoryStats.PSI.Some.Avg60) + } + }, + }, + { + name: "Pids stats", + controller: pointerTo(cgroups.Pids), + setupFiles: map[string]string{ + "pids.current": "42\n", + "pids.max": "1000\n", + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + if stats.PidsStats.Current != 42 { + t.Errorf("expected Current 42, got %d", stats.PidsStats.Current) + } + if stats.PidsStats.Limit != 1000 { + t.Errorf("expected Limit 1000, got %d", stats.PidsStats.Limit) + } + }, + }, + { + name: "IO stats", + controller: pointerTo(cgroups.IO), + setupFiles: map[string]string{ + "io.stat": exampleIoStatData, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify IO stats - check that we have entries + if len(stats.BlkioStats.IoServiceBytesRecursive) == 0 { + t.Error("expected IoServiceBytesRecursive to have entries") + } + if len(stats.BlkioStats.IoServicedRecursive) == 0 { + t.Error("expected IoServicedRecursive to have entries") + } + }, + }, + { + name: "IO stats with PSI", + controller: pointerTo(cgroups.IO), + setupFiles: map[string]string{ + "io.stat": exampleIoStatData, + "io.pressure": examplePSIData, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify PSI data is populated + if stats.BlkioStats.PSI == nil { + t.Fatal("expected PSI to be populated") + } + if stats.BlkioStats.PSI.Full.Avg300 != 1.50 { + t.Errorf("expected PSI.Full.Avg300 1.50, got %f", stats.BlkioStats.PSI.Full.Avg300) + } + }, + }, + { + name: "Misc stats", + controller: pointerTo(cgroups.Misc), + setupFiles: map[string]string{ + "misc.current": exampleMiscCurrentData, + "misc.events": exampleMiscEventsData, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify misc stats - exampleMiscCurrentData has res_a, res_b, res_c + if _, ok := stats.MiscStats["res_a"]; !ok { + t.Error("expected MiscStats to have 'res_a' entry") + } + if _, ok := stats.MiscStats["res_b"]; !ok { + t.Error("expected MiscStats to have 'res_b' entry") + } + if _, ok := stats.MiscStats["res_c"]; !ok { + t.Error("expected MiscStats to have 'res_c' entry") + } + }, + }, + { + name: "RDMA stats", + controller: pointerTo(cgroups.RDMA), + setupFiles: map[string]string{ + "rdma.current": exampleRdmaCurrent, + "rdma.max": "mlx5_0 hca_handle=max hca_object=max", + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify RDMA stats are populated + if len(stats.RdmaStats.RdmaCurrent) == 0 { + t.Error("expected RdmaStats.RdmaCurrent to have entries") + } + }, + }, + { + name: "HugeTLB stats", + controller: pointerTo(cgroups.HugeTLB), + setupFiles: map[string]string{}, + validate: func(_ *testing.T, _ *cgroups.Stats) { + // HugePageSizes() returns available page sizes from the system + // We can only test if files don't exist (should not error) + // No specific assertions needed - just verifying it doesn't error + }, + }, + { + name: "Multiple controllers - CPU+Pids", + controller: pointerTo(cgroups.CPU | cgroups.Pids), + setupFiles: map[string]string{ + "cpu.stat": exampleCPUStatDataShort, + "pids.current": "42\n", + "pids.max": "1000\n", + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify both stats are populated in the same object + if stats.CpuStats.CpuUsage.TotalUsage != 1000000000 { + t.Errorf("expected TotalUsage 1000000000, got %d", stats.CpuStats.CpuUsage.TotalUsage) + } + if stats.PidsStats.Current != 42 { + t.Errorf("expected Current 42, got %d", stats.PidsStats.Current) + } + if stats.PidsStats.Limit != 1000 { + t.Errorf("expected Limit 1000, got %d", stats.PidsStats.Limit) + } + }, + }, + { + name: "All controllers with nil options", + controller: nil, + setupFiles: map[string]string{ + "cpu.stat": exampleCPUStatData, + "memory.stat": exampleMemoryStatData, + "memory.current": exampleMemoryCurrent, + "memory.max": exampleMemoryMax, + "pids.current": "42\n", + "pids.max": "1000\n", + "io.stat": exampleIoStatData, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify all stats are populated (non-zero values) + if stats.CpuStats.CpuUsage.TotalUsage == 0 { + t.Error("expected non-zero CPU TotalUsage") + } + if stats.MemoryStats.Usage.Usage == 0 { + t.Error("expected non-zero Memory Usage") + } + if stats.PidsStats.Current == 0 { + t.Error("expected non-zero Pids Current") + } + if len(stats.BlkioStats.IoServiceBytesRecursive) == 0 { + t.Error("expected non-empty IO stats") + } + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + fakeCgroupDir := t.TempDir() + + // Setup + for filename, content := range tc.setupFiles { + if err := os.WriteFile(filepath.Join(fakeCgroupDir, filename), []byte(content), 0o644); err != nil { + t.Fatal(err) + } + } + config := &cgroups.Cgroup{} + m, err := NewManager(config, fakeCgroupDir) + if err != nil { + t.Fatal(err) + } + + var stats *cgroups.Stats + if tc.controller == nil { + stats, err = m.Stats(nil) + } else { + stats, err = m.Stats(&cgroups.StatsOptions{Controllers: *tc.controller}) + } + if err != nil { + t.Fatal(err) + } + + if tc.validate != nil { + tc.validate(t, stats) + } + }) + } +} diff --git a/stats.go b/stats.go index debc2df..7501000 100644 --- a/stats.go +++ b/stats.go @@ -211,3 +211,53 @@ func NewStats() *Stats { miscStats := make(map[string]MiscStats) return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats, MiscStats: miscStats} } + +// Controller represents a cgroup controller type for stats collection. +type Controller int + +// Controller types for cgroup stats collection. +const ( + CPU Controller = 1 << iota + Memory + Pids + IO + HugeTLB + RDMA + Misc + CPUSet // v1 only +) + +// AllControllers is a bitmask of all available controllers. +const AllControllers = CPU | Memory | Pids | IO | HugeTLB | RDMA | Misc | CPUSet + +// String returns the controller name. +func (c Controller) String() string { + switch c { + case CPU: + return "cpu" + case Memory: + return "memory" + case Pids: + return "pids" + case IO: + return "io" + case HugeTLB: + return "hugetlb" + case RDMA: + return "rdma" + case Misc: + return "misc" + case CPUSet: + return "cpuset" + default: + panic("unknown controller") + } +} + +// StatsOptions specifies which controllers to retrieve statistics for. +type StatsOptions struct { + // Controllers is a bitmask of Controller values. + // If 0, all available controllers are queried (default behavior). + // Use Controller constants like: CPU | Memory | Pids + Controllers Controller +} diff --git a/systemd/v1.go b/systemd/v1.go index 96e69bb..4e71377 100644 --- a/systemd/v1.go +++ b/systemd/v1.go @@ -46,6 +46,8 @@ func NewLegacyManager(cg *cgroups.Cgroup, paths map[string]string) (*LegacyManag type subsystem interface { // Name returns the name of the subsystem. Name() string + // ID returns the controller ID for filtering. + ID() cgroups.Controller // GetStats returns the stats, as 'stats', corresponding to the cgroup under 'path'. GetStats(path string, stats *cgroups.Stats) error // Set sets cgroup resource limits. @@ -69,7 +71,7 @@ var legacySubsystems = []subsystem{ &fs.NetClsGroup{}, &fs.NameGroup{GroupName: "name=systemd"}, &fs.RdmaGroup{}, - &fs.NameGroup{GroupName: "misc"}, + &fs.NameGroup{GroupName: "misc", GroupID: cgroups.Misc}, } func genV1ResourcesProperties(r *cgroups.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) { @@ -339,14 +341,33 @@ func (m *LegacyManager) GetAllPids() ([]int, error) { } func (m *LegacyManager) GetStats() (*cgroups.Stats, error) { + return m.Stats(nil) +} + +// Stats returns cgroup statistics for the specified controllers. +// If opts is nil or opts.Controllers is zero, statistics for all controllers are returned. +func (m *LegacyManager) Stats(opts *cgroups.StatsOptions) (*cgroups.Stats, error) { m.mu.Lock() defer m.mu.Unlock() + + // Default: query all controllers (same as original GetStats behavior) + controllers := cgroups.AllControllers + if opts != nil && opts.Controllers != 0 { + controllers = opts.Controllers + } + stats := cgroups.NewStats() for _, sys := range legacySubsystems { path := m.paths[sys.Name()] if path == "" { continue } + + // Filter based on controller type + if sys.ID()&controllers == 0 { + continue + } + if err := sys.GetStats(path, stats); err != nil { return nil, err } diff --git a/systemd/v2.go b/systemd/v2.go index f76c93e..6e1b7ec 100644 --- a/systemd/v2.go +++ b/systemd/v2.go @@ -497,6 +497,12 @@ func (m *UnifiedManager) GetStats() (*cgroups.Stats, error) { return m.fsMgr.GetStats() } +// Stats returns cgroup statistics for the specified controllers. +// If opts is nil or opts.Controllers is zero, statistics for all controllers are returned. +func (m *UnifiedManager) Stats(opts *cgroups.StatsOptions) (*cgroups.Stats, error) { + return m.fsMgr.Stats(opts) +} + func (m *UnifiedManager) Set(r *cgroups.Resources) error { if r == nil { return nil