gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

memory.go (13738B)


      1 /*
      2    Copyright The containerd Authors.
      3 
      4    Licensed under the Apache License, Version 2.0 (the "License");
      5    you may not use this file except in compliance with the License.
      6    You may obtain a copy of the License at
      7 
      8        http://www.apache.org/licenses/LICENSE-2.0
      9 
     10    Unless required by applicable law or agreed to in writing, software
     11    distributed under the License is distributed on an "AS IS" BASIS,
     12    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13    See the License for the specific language governing permissions and
     14    limitations under the License.
     15 */
     16 
     17 package cgroup1
     18 
     19 import (
     20 	"bufio"
     21 	"fmt"
     22 	"io"
     23 	"os"
     24 	"path/filepath"
     25 	"strconv"
     26 	"strings"
     27 
     28 	v1 "github.com/containerd/cgroups/v3/cgroup1/stats"
     29 	specs "github.com/opencontainers/runtime-spec/specs-go"
     30 	"golang.org/x/sys/unix"
     31 )
     32 
     33 // MemoryEvent is an interface that V1 memory Cgroup notifications implement. Arg returns the
     34 // file name whose fd should be written to "cgroups.event_control". EventFile returns the name of
     35 // the file that supports the notification api e.g. "memory.usage_in_bytes".
     36 type MemoryEvent interface {
     37 	Arg() string
     38 	EventFile() string
     39 }
     40 
     41 type memoryThresholdEvent struct {
     42 	threshold uint64
     43 	swap      bool
     44 }
     45 
     46 // MemoryThresholdEvent returns a new [MemoryEvent] representing the memory threshold set.
     47 // If swap is true, the event will be registered using memory.memsw.usage_in_bytes
     48 func MemoryThresholdEvent(threshold uint64, swap bool) MemoryEvent {
     49 	return &memoryThresholdEvent{
     50 		threshold,
     51 		swap,
     52 	}
     53 }
     54 
     55 func (m *memoryThresholdEvent) Arg() string {
     56 	return strconv.FormatUint(m.threshold, 10)
     57 }
     58 
     59 func (m *memoryThresholdEvent) EventFile() string {
     60 	if m.swap {
     61 		return "memory.memsw.usage_in_bytes"
     62 	}
     63 	return "memory.usage_in_bytes"
     64 }
     65 
     66 type oomEvent struct{}
     67 
     68 // OOMEvent returns a new oom event to be used with RegisterMemoryEvent.
     69 func OOMEvent() MemoryEvent {
     70 	return &oomEvent{}
     71 }
     72 
     73 func (oom *oomEvent) Arg() string {
     74 	return ""
     75 }
     76 
     77 func (oom *oomEvent) EventFile() string {
     78 	return "memory.oom_control"
     79 }
     80 
     81 type memoryPressureEvent struct {
     82 	pressureLevel MemoryPressureLevel
     83 	hierarchy     EventNotificationMode
     84 }
     85 
     86 // MemoryPressureEvent returns a new [MemoryEvent] representing the memory pressure set.
     87 func MemoryPressureEvent(pressureLevel MemoryPressureLevel, hierarchy EventNotificationMode) MemoryEvent {
     88 	return &memoryPressureEvent{
     89 		pressureLevel,
     90 		hierarchy,
     91 	}
     92 }
     93 
     94 func (m *memoryPressureEvent) Arg() string {
     95 	return string(m.pressureLevel) + "," + string(m.hierarchy)
     96 }
     97 
     98 func (m *memoryPressureEvent) EventFile() string {
     99 	return "memory.pressure_level"
    100 }
    101 
    102 // MemoryPressureLevel corresponds to the memory pressure levels defined
    103 // for memory cgroups.
    104 type MemoryPressureLevel string
    105 
    106 // The three memory pressure levels are as follows.
    107 //   - The "low" level means that the system is reclaiming memory for new
    108 //     allocations. Monitoring this reclaiming activity might be useful for
    109 //     maintaining cache level. Upon notification, the program (typically
    110 //     "Activity Manager") might analyze vmstat and act in advance (i.e.
    111 //     prematurely shutdown unimportant services).
    112 //   - The "medium" level means that the system is experiencing medium memory
    113 //     pressure, the system might be making swap, paging out active file caches,
    114 //     etc. Upon this event applications may decide to further analyze
    115 //     vmstat/zoneinfo/memcg or internal memory usage statistics and free any
    116 //     resources that can be easily reconstructed or re-read from a disk.
    117 //   - The "critical" level means that the system is actively thrashing, it is
    118 //     about to out of memory (OOM) or even the in-kernel OOM killer is on its
    119 //     way to trigger. Applications should do whatever they can to help the
    120 //     system. It might be too late to consult with vmstat or any other
    121 //     statistics, so it is advisable to take an immediate action.
    122 //     "https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt" Section 11
    123 const (
    124 	LowPressure      MemoryPressureLevel = "low"
    125 	MediumPressure   MemoryPressureLevel = "medium"
    126 	CriticalPressure MemoryPressureLevel = "critical"
    127 )
    128 
    129 // EventNotificationMode corresponds to the notification modes
    130 // for the memory cgroups pressure level notifications.
    131 type EventNotificationMode string
    132 
    133 // There are three optional modes that specify different propagation behavior:
    134 //   - "default": this is the default behavior specified above. This mode is the
    135 //     same as omitting the optional mode parameter, preserved by backwards
    136 //     compatibility.
    137 //   - "hierarchy": events always propagate up to the root, similar to the default
    138 //     behavior, except that propagation continues regardless of whether there are
    139 //     event listeners at each level, with the "hierarchy" mode. In the above
    140 //     example, groups A, B, and C will receive notification of memory pressure.
    141 //   - "local": events are pass-through, i.e. they only receive notifications when
    142 //     memory pressure is experienced in the memcg for which the notification is
    143 //     registered. In the above example, group C will receive notification if
    144 //     registered for "local" notification and the group experiences memory
    145 //     pressure. However, group B will never receive notification, regardless if
    146 //     there is an event listener for group C or not, if group B is registered for
    147 //     local notification.
    148 //     "https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt" Section 11
    149 const (
    150 	DefaultMode   EventNotificationMode = "default"
    151 	LocalMode     EventNotificationMode = "local"
    152 	HierarchyMode EventNotificationMode = "hierarchy"
    153 )
    154 
    155 // NewMemory returns a Memory controller given the root folder of cgroups.
    156 // It may optionally accept other configuration options, such as IgnoreModules(...)
    157 func NewMemory(root string, options ...func(*memoryController)) *memoryController {
    158 	mc := &memoryController{
    159 		root:    filepath.Join(root, string(Memory)),
    160 		ignored: map[string]struct{}{},
    161 	}
    162 	for _, opt := range options {
    163 		opt(mc)
    164 	}
    165 	return mc
    166 }
    167 
    168 // IgnoreModules configure the memory controller to not read memory metrics for some
    169 // module names (e.g. passing "memsw" would avoid all the memory.memsw.* entries)
    170 func IgnoreModules(names ...string) func(*memoryController) {
    171 	return func(mc *memoryController) {
    172 		for _, name := range names {
    173 			mc.ignored[name] = struct{}{}
    174 		}
    175 	}
    176 }
    177 
    178 // OptionalSwap allows the memory controller to not fail if cgroups is not accounting
    179 // Swap memory (there are no memory.memsw.* entries)
    180 func OptionalSwap() func(*memoryController) {
    181 	return func(mc *memoryController) {
    182 		_, err := os.Stat(filepath.Join(mc.root, "memory.memsw.usage_in_bytes"))
    183 		if os.IsNotExist(err) {
    184 			mc.ignored["memsw"] = struct{}{}
    185 		}
    186 	}
    187 }
    188 
    189 type memoryController struct {
    190 	root    string
    191 	ignored map[string]struct{}
    192 }
    193 
    194 func (m *memoryController) Name() Name {
    195 	return Memory
    196 }
    197 
    198 func (m *memoryController) Path(path string) string {
    199 	return filepath.Join(m.root, path)
    200 }
    201 
    202 func (m *memoryController) Create(path string, resources *specs.LinuxResources) error {
    203 	if err := os.MkdirAll(m.Path(path), defaultDirPerm); err != nil {
    204 		return err
    205 	}
    206 	if resources.Memory == nil {
    207 		return nil
    208 	}
    209 	return m.set(path, getMemorySettings(resources))
    210 }
    211 
    212 func (m *memoryController) Update(path string, resources *specs.LinuxResources) error {
    213 	if resources.Memory == nil {
    214 		return nil
    215 	}
    216 	g := func(v *int64) bool {
    217 		return v != nil && *v > 0
    218 	}
    219 	settings := getMemorySettings(resources)
    220 	if g(resources.Memory.Limit) && g(resources.Memory.Swap) {
    221 		// if the updated swap value is larger than the current memory limit set the swap changes first
    222 		// then set the memory limit as swap must always be larger than the current limit
    223 		current, err := readUint(filepath.Join(m.Path(path), "memory.limit_in_bytes"))
    224 		if err != nil {
    225 			return err
    226 		}
    227 		if current < uint64(*resources.Memory.Swap) {
    228 			settings[0], settings[1] = settings[1], settings[0]
    229 		}
    230 	}
    231 	return m.set(path, settings)
    232 }
    233 
    234 func (m *memoryController) Stat(path string, stats *v1.Metrics) error {
    235 	fMemStat, err := os.Open(filepath.Join(m.Path(path), "memory.stat"))
    236 	if err != nil {
    237 		return err
    238 	}
    239 	defer fMemStat.Close()
    240 	stats.Memory = &v1.MemoryStat{
    241 		Usage:     &v1.MemoryEntry{},
    242 		Swap:      &v1.MemoryEntry{},
    243 		Kernel:    &v1.MemoryEntry{},
    244 		KernelTCP: &v1.MemoryEntry{},
    245 	}
    246 	if err := m.parseStats(fMemStat, stats.Memory); err != nil {
    247 		return err
    248 	}
    249 
    250 	fMemOomControl, err := os.Open(filepath.Join(m.Path(path), "memory.oom_control"))
    251 	if err != nil {
    252 		return err
    253 	}
    254 	defer fMemOomControl.Close()
    255 	stats.MemoryOomControl = &v1.MemoryOomControl{}
    256 	if err := m.parseOomControlStats(fMemOomControl, stats.MemoryOomControl); err != nil {
    257 		return err
    258 	}
    259 	for _, t := range []struct {
    260 		module string
    261 		entry  *v1.MemoryEntry
    262 	}{
    263 		{
    264 			module: "",
    265 			entry:  stats.Memory.Usage,
    266 		},
    267 		{
    268 			module: "memsw",
    269 			entry:  stats.Memory.Swap,
    270 		},
    271 		{
    272 			module: "kmem",
    273 			entry:  stats.Memory.Kernel,
    274 		},
    275 		{
    276 			module: "kmem.tcp",
    277 			entry:  stats.Memory.KernelTCP,
    278 		},
    279 	} {
    280 		if _, ok := m.ignored[t.module]; ok {
    281 			continue
    282 		}
    283 		for _, tt := range []struct {
    284 			name  string
    285 			value *uint64
    286 		}{
    287 			{
    288 				name:  "usage_in_bytes",
    289 				value: &t.entry.Usage,
    290 			},
    291 			{
    292 				name:  "max_usage_in_bytes",
    293 				value: &t.entry.Max,
    294 			},
    295 			{
    296 				name:  "failcnt",
    297 				value: &t.entry.Failcnt,
    298 			},
    299 			{
    300 				name:  "limit_in_bytes",
    301 				value: &t.entry.Limit,
    302 			},
    303 		} {
    304 			parts := []string{"memory"}
    305 			if t.module != "" {
    306 				parts = append(parts, t.module)
    307 			}
    308 			parts = append(parts, tt.name)
    309 			v, err := readUint(filepath.Join(m.Path(path), strings.Join(parts, ".")))
    310 			if err != nil {
    311 				return err
    312 			}
    313 			*tt.value = v
    314 		}
    315 	}
    316 	return nil
    317 }
    318 
    319 func (m *memoryController) parseStats(r io.Reader, stat *v1.MemoryStat) error {
    320 	var (
    321 		raw  = make(map[string]uint64)
    322 		sc   = bufio.NewScanner(r)
    323 		line int
    324 	)
    325 	for sc.Scan() {
    326 		key, v, err := parseKV(sc.Text())
    327 		if err != nil {
    328 			return fmt.Errorf("%d: %v", line, err)
    329 		}
    330 		raw[key] = v
    331 		line++
    332 	}
    333 	if err := sc.Err(); err != nil {
    334 		return err
    335 	}
    336 	stat.Cache = raw["cache"]
    337 	stat.RSS = raw["rss"]
    338 	stat.RSSHuge = raw["rss_huge"]
    339 	stat.MappedFile = raw["mapped_file"]
    340 	stat.Dirty = raw["dirty"]
    341 	stat.Writeback = raw["writeback"]
    342 	stat.PgPgIn = raw["pgpgin"]
    343 	stat.PgPgOut = raw["pgpgout"]
    344 	stat.PgFault = raw["pgfault"]
    345 	stat.PgMajFault = raw["pgmajfault"]
    346 	stat.InactiveAnon = raw["inactive_anon"]
    347 	stat.ActiveAnon = raw["active_anon"]
    348 	stat.InactiveFile = raw["inactive_file"]
    349 	stat.ActiveFile = raw["active_file"]
    350 	stat.Unevictable = raw["unevictable"]
    351 	stat.HierarchicalMemoryLimit = raw["hierarchical_memory_limit"]
    352 	stat.HierarchicalSwapLimit = raw["hierarchical_memsw_limit"]
    353 	stat.TotalCache = raw["total_cache"]
    354 	stat.TotalRSS = raw["total_rss"]
    355 	stat.TotalRSSHuge = raw["total_rss_huge"]
    356 	stat.TotalMappedFile = raw["total_mapped_file"]
    357 	stat.TotalDirty = raw["total_dirty"]
    358 	stat.TotalWriteback = raw["total_writeback"]
    359 	stat.TotalPgPgIn = raw["total_pgpgin"]
    360 	stat.TotalPgPgOut = raw["total_pgpgout"]
    361 	stat.TotalPgFault = raw["total_pgfault"]
    362 	stat.TotalPgMajFault = raw["total_pgmajfault"]
    363 	stat.TotalInactiveAnon = raw["total_inactive_anon"]
    364 	stat.TotalActiveAnon = raw["total_active_anon"]
    365 	stat.TotalInactiveFile = raw["total_inactive_file"]
    366 	stat.TotalActiveFile = raw["total_active_file"]
    367 	stat.TotalUnevictable = raw["total_unevictable"]
    368 	return nil
    369 }
    370 
    371 func (m *memoryController) parseOomControlStats(r io.Reader, stat *v1.MemoryOomControl) error {
    372 	var (
    373 		raw  = make(map[string]uint64)
    374 		sc   = bufio.NewScanner(r)
    375 		line int
    376 	)
    377 	for sc.Scan() {
    378 		key, v, err := parseKV(sc.Text())
    379 		if err != nil {
    380 			return fmt.Errorf("%d: %v", line, err)
    381 		}
    382 		raw[key] = v
    383 		line++
    384 	}
    385 	if err := sc.Err(); err != nil {
    386 		return err
    387 	}
    388 	stat.OomKillDisable = raw["oom_kill_disable"]
    389 	stat.UnderOom = raw["under_oom"]
    390 	stat.OomKill = raw["oom_kill"]
    391 	return nil
    392 }
    393 
    394 func (m *memoryController) set(path string, settings []memorySettings) error {
    395 	for _, t := range settings {
    396 		if t.value != nil {
    397 			if err := os.WriteFile(
    398 				filepath.Join(m.Path(path), "memory."+t.name),
    399 				[]byte(strconv.FormatInt(*t.value, 10)),
    400 				defaultFilePerm,
    401 			); err != nil {
    402 				return err
    403 			}
    404 		}
    405 	}
    406 	return nil
    407 }
    408 
    409 type memorySettings struct {
    410 	name  string
    411 	value *int64
    412 }
    413 
    414 func getMemorySettings(resources *specs.LinuxResources) []memorySettings {
    415 	mem := resources.Memory
    416 	var swappiness *int64
    417 	if mem.Swappiness != nil {
    418 		v := int64(*mem.Swappiness)
    419 		swappiness = &v
    420 	}
    421 	return []memorySettings{
    422 		{
    423 			name:  "limit_in_bytes",
    424 			value: mem.Limit,
    425 		},
    426 		{
    427 			name:  "soft_limit_in_bytes",
    428 			value: mem.Reservation,
    429 		},
    430 		{
    431 			name:  "memsw.limit_in_bytes",
    432 			value: mem.Swap,
    433 		},
    434 		{
    435 			name:  "kmem.limit_in_bytes",
    436 			value: mem.Kernel,
    437 		},
    438 		{
    439 			name:  "kmem.tcp.limit_in_bytes",
    440 			value: mem.KernelTCP,
    441 		},
    442 		{
    443 			name:  "oom_control",
    444 			value: getOomControlValue(mem),
    445 		},
    446 		{
    447 			name:  "swappiness",
    448 			value: swappiness,
    449 		},
    450 	}
    451 }
    452 
    453 func getOomControlValue(mem *specs.LinuxMemory) *int64 {
    454 	if mem.DisableOOMKiller != nil && *mem.DisableOOMKiller {
    455 		i := int64(1)
    456 		return &i
    457 	}
    458 	return nil
    459 }
    460 
    461 func (m *memoryController) memoryEvent(path string, event MemoryEvent) (uintptr, error) {
    462 	root := m.Path(path)
    463 	efd, err := unix.Eventfd(0, unix.EFD_CLOEXEC)
    464 	if err != nil {
    465 		return 0, err
    466 	}
    467 	evtFile, err := os.Open(filepath.Join(root, event.EventFile()))
    468 	if err != nil {
    469 		unix.Close(efd)
    470 		return 0, err
    471 	}
    472 	defer evtFile.Close()
    473 	data := fmt.Sprintf("%d %d %s", efd, evtFile.Fd(), event.Arg())
    474 	evctlPath := filepath.Join(root, "cgroup.event_control")
    475 	if err := os.WriteFile(evctlPath, []byte(data), 0700); err != nil {
    476 		unix.Close(efd)
    477 		return 0, err
    478 	}
    479 	return uintptr(efd), nil
    480 }