utils.go (10837B)
1 /* 2 Copyright The containerd Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package cgroup2 18 19 import ( 20 "bufio" 21 "fmt" 22 "io" 23 "math" 24 "os" 25 "path/filepath" 26 "strconv" 27 "strings" 28 "time" 29 "unsafe" 30 31 "github.com/containerd/cgroups/v3/cgroup2/stats" 32 33 "github.com/godbus/dbus/v5" 34 "github.com/opencontainers/runtime-spec/specs-go" 35 "github.com/sirupsen/logrus" 36 "golang.org/x/sys/unix" 37 ) 38 39 const ( 40 cgroupProcs = "cgroup.procs" 41 cgroupThreads = "cgroup.threads" 42 defaultDirPerm = 0755 43 ) 44 45 // defaultFilePerm is a var so that the test framework can change the filemode 46 // of all files created when the tests are running. The difference between the 47 // tests and real world use is that files like "cgroup.procs" will exist when writing 48 // to a read cgroup filesystem and do not exist prior when running in the tests. 49 // this is set to a non 0 value in the test code 50 var defaultFilePerm = os.FileMode(0) 51 52 // remove will remove a cgroup path handling EAGAIN and EBUSY errors and 53 // retrying the remove after a exp timeout 54 func remove(path string) error { 55 var err error 56 delay := 10 * time.Millisecond 57 for i := 0; i < 5; i++ { 58 if i != 0 { 59 time.Sleep(delay) 60 delay *= 2 61 } 62 if err = os.RemoveAll(path); err == nil { 63 return nil 64 } 65 } 66 return fmt.Errorf("cgroups: unable to remove path %q: %w", path, err) 67 } 68 69 // parseCgroupProcsFile parses /sys/fs/cgroup/$GROUPPATH/cgroup.procs 70 func parseCgroupProcsFile(path string) ([]uint64, error) { 71 f, err := os.Open(path) 72 if err != nil { 73 return nil, err 74 } 75 defer f.Close() 76 var ( 77 out []uint64 78 s = bufio.NewScanner(f) 79 ) 80 for s.Scan() { 81 if t := s.Text(); t != "" { 82 pid, err := strconv.ParseUint(t, 10, 0) 83 if err != nil { 84 return nil, err 85 } 86 out = append(out, pid) 87 } 88 } 89 if err := s.Err(); err != nil { 90 return nil, err 91 } 92 return out, nil 93 } 94 95 func parseKV(raw string) (string, interface{}, error) { 96 parts := strings.Fields(raw) 97 switch len(parts) { 98 case 2: 99 v, err := parseUint(parts[1], 10, 64) 100 if err != nil { 101 // if we cannot parse as a uint, parse as a string 102 return parts[0], parts[1], nil 103 } 104 return parts[0], v, nil 105 default: 106 return "", 0, ErrInvalidFormat 107 } 108 } 109 110 func parseUint(s string, base, bitSize int) (uint64, error) { 111 v, err := strconv.ParseUint(s, base, bitSize) 112 if err != nil { 113 intValue, intErr := strconv.ParseInt(s, base, bitSize) 114 // 1. Handle negative values greater than MinInt64 (and) 115 // 2. Handle negative values lesser than MinInt64 116 if intErr == nil && intValue < 0 { 117 return 0, nil 118 } else if intErr != nil && 119 intErr.(*strconv.NumError).Err == strconv.ErrRange && 120 intValue < 0 { 121 return 0, nil 122 } 123 return 0, err 124 } 125 return v, nil 126 } 127 128 // parseCgroupFile parses /proc/PID/cgroup file and return string 129 func parseCgroupFile(path string) (string, error) { 130 f, err := os.Open(path) 131 if err != nil { 132 return "", err 133 } 134 defer f.Close() 135 return parseCgroupFromReader(f) 136 } 137 138 func parseCgroupFromReader(r io.Reader) (string, error) { 139 var ( 140 s = bufio.NewScanner(r) 141 ) 142 for s.Scan() { 143 var ( 144 text = s.Text() 145 parts = strings.SplitN(text, ":", 3) 146 ) 147 if len(parts) < 3 { 148 return "", fmt.Errorf("invalid cgroup entry: %q", text) 149 } 150 // text is like "0::/user.slice/user-1001.slice/session-1.scope" 151 if parts[0] == "0" && parts[1] == "" { 152 return parts[2], nil 153 } 154 } 155 if err := s.Err(); err != nil { 156 return "", err 157 } 158 return "", fmt.Errorf("cgroup path not found") 159 } 160 161 // ToResources converts the oci LinuxResources struct into a 162 // v2 Resources type for use with this package. 163 // 164 // converting cgroups configuration from v1 to v2 165 // ref: https://github.com/containers/crun/blob/master/crun.1.md#cgroup-v2 166 func ToResources(spec *specs.LinuxResources) *Resources { 167 var resources Resources 168 if cpu := spec.CPU; cpu != nil { 169 resources.CPU = &CPU{ 170 Cpus: cpu.Cpus, 171 Mems: cpu.Mems, 172 } 173 if shares := cpu.Shares; shares != nil { 174 convertedWeight := 1 + ((*shares-2)*9999)/262142 175 resources.CPU.Weight = &convertedWeight 176 } 177 if period := cpu.Period; period != nil { 178 resources.CPU.Max = NewCPUMax(cpu.Quota, period) 179 } 180 } 181 if mem := spec.Memory; mem != nil { 182 resources.Memory = &Memory{} 183 if swap := mem.Swap; swap != nil { 184 resources.Memory.Swap = swap 185 } 186 if l := mem.Limit; l != nil { 187 resources.Memory.Max = l 188 } 189 if l := mem.Reservation; l != nil { 190 resources.Memory.Low = l 191 } 192 } 193 if hugetlbs := spec.HugepageLimits; hugetlbs != nil { 194 hugeTlbUsage := HugeTlb{} 195 for _, hugetlb := range hugetlbs { 196 hugeTlbUsage = append(hugeTlbUsage, HugeTlbEntry{ 197 HugePageSize: hugetlb.Pagesize, 198 Limit: hugetlb.Limit, 199 }) 200 } 201 resources.HugeTlb = &hugeTlbUsage 202 } 203 if pids := spec.Pids; pids != nil { 204 resources.Pids = &Pids{ 205 Max: pids.Limit, 206 } 207 } 208 if i := spec.BlockIO; i != nil { 209 resources.IO = &IO{} 210 if i.Weight != nil { 211 resources.IO.BFQ.Weight = 1 + (*i.Weight-10)*9999/990 212 } 213 for t, devices := range map[IOType][]specs.LinuxThrottleDevice{ 214 ReadBPS: i.ThrottleReadBpsDevice, 215 WriteBPS: i.ThrottleWriteBpsDevice, 216 ReadIOPS: i.ThrottleReadIOPSDevice, 217 WriteIOPS: i.ThrottleWriteIOPSDevice, 218 } { 219 for _, d := range devices { 220 resources.IO.Max = append(resources.IO.Max, Entry{ 221 Type: t, 222 Major: d.Major, 223 Minor: d.Minor, 224 Rate: d.Rate, 225 }) 226 } 227 } 228 } 229 if i := spec.Rdma; i != nil { 230 resources.RDMA = &RDMA{} 231 for device, value := range spec.Rdma { 232 if device != "" && (value.HcaHandles != nil && value.HcaObjects != nil) { 233 resources.RDMA.Limit = append(resources.RDMA.Limit, RDMAEntry{ 234 Device: device, 235 HcaHandles: *value.HcaHandles, 236 HcaObjects: *value.HcaObjects, 237 }) 238 } 239 } 240 } 241 242 return &resources 243 } 244 245 // Gets uint64 parsed content of single value cgroup stat file 246 func getStatFileContentUint64(filePath string) uint64 { 247 contents, err := os.ReadFile(filePath) 248 if err != nil { 249 return 0 250 } 251 trimmed := strings.TrimSpace(string(contents)) 252 if trimmed == "max" { 253 return math.MaxUint64 254 } 255 256 res, err := parseUint(trimmed, 10, 64) 257 if err != nil { 258 logrus.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), filePath) 259 return res 260 } 261 262 return res 263 } 264 265 func readIoStats(path string) []*stats.IOEntry { 266 // more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt 267 var usage []*stats.IOEntry 268 fpath := filepath.Join(path, "io.stat") 269 currentData, err := os.ReadFile(fpath) 270 if err != nil { 271 return usage 272 } 273 entries := strings.Split(string(currentData), "\n") 274 275 for _, entry := range entries { 276 parts := strings.Split(entry, " ") 277 if len(parts) < 2 { 278 continue 279 } 280 majmin := strings.Split(parts[0], ":") 281 if len(majmin) != 2 { 282 continue 283 } 284 major, err := strconv.ParseUint(majmin[0], 10, 0) 285 if err != nil { 286 return usage 287 } 288 minor, err := strconv.ParseUint(majmin[1], 10, 0) 289 if err != nil { 290 return usage 291 } 292 parts = parts[1:] 293 ioEntry := stats.IOEntry{ 294 Major: major, 295 Minor: minor, 296 } 297 for _, s := range parts { 298 keyPairValue := strings.Split(s, "=") 299 if len(keyPairValue) != 2 { 300 continue 301 } 302 v, err := strconv.ParseUint(keyPairValue[1], 10, 0) 303 if err != nil { 304 continue 305 } 306 switch keyPairValue[0] { 307 case "rbytes": 308 ioEntry.Rbytes = v 309 case "wbytes": 310 ioEntry.Wbytes = v 311 case "rios": 312 ioEntry.Rios = v 313 case "wios": 314 ioEntry.Wios = v 315 } 316 } 317 usage = append(usage, &ioEntry) 318 } 319 return usage 320 } 321 322 func rdmaStats(filepath string) []*stats.RdmaEntry { 323 currentData, err := os.ReadFile(filepath) 324 if err != nil { 325 return []*stats.RdmaEntry{} 326 } 327 return toRdmaEntry(strings.Split(string(currentData), "\n")) 328 } 329 330 func parseRdmaKV(raw string, entry *stats.RdmaEntry) { 331 var value uint64 332 var err error 333 334 parts := strings.Split(raw, "=") 335 switch len(parts) { 336 case 2: 337 if parts[1] == "max" { 338 value = math.MaxUint32 339 } else { 340 value, err = parseUint(parts[1], 10, 32) 341 if err != nil { 342 return 343 } 344 } 345 if parts[0] == "hca_handle" { 346 entry.HcaHandles = uint32(value) 347 } else if parts[0] == "hca_object" { 348 entry.HcaObjects = uint32(value) 349 } 350 } 351 } 352 353 func toRdmaEntry(strEntries []string) []*stats.RdmaEntry { 354 var rdmaEntries []*stats.RdmaEntry 355 for i := range strEntries { 356 parts := strings.Fields(strEntries[i]) 357 switch len(parts) { 358 case 3: 359 entry := new(stats.RdmaEntry) 360 entry.Device = parts[0] 361 parseRdmaKV(parts[1], entry) 362 parseRdmaKV(parts[2], entry) 363 364 rdmaEntries = append(rdmaEntries, entry) 365 default: 366 continue 367 } 368 } 369 return rdmaEntries 370 } 371 372 // isUnitExists returns true if the error is that a systemd unit already exists. 373 func isUnitExists(err error) bool { 374 if err != nil { 375 if dbusError, ok := err.(dbus.Error); ok { 376 return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists") 377 } 378 } 379 return false 380 } 381 382 func systemdUnitFromPath(path string) string { 383 _, unit := filepath.Split(path) 384 return unit 385 } 386 387 func readHugeTlbStats(path string) []*stats.HugeTlbStat { 388 var usage = []*stats.HugeTlbStat{} 389 var keyUsage = make(map[string]*stats.HugeTlbStat) 390 f, err := os.Open(path) 391 if err != nil { 392 return usage 393 } 394 files, err := f.Readdir(-1) 395 f.Close() 396 if err != nil { 397 return usage 398 } 399 400 for _, file := range files { 401 if strings.Contains(file.Name(), "hugetlb") && 402 (strings.HasSuffix(file.Name(), "max") || strings.HasSuffix(file.Name(), "current")) { 403 var hugeTlb *stats.HugeTlbStat 404 var ok bool 405 fileName := strings.Split(file.Name(), ".") 406 pageSize := fileName[1] 407 if hugeTlb, ok = keyUsage[pageSize]; !ok { 408 hugeTlb = &stats.HugeTlbStat{} 409 } 410 hugeTlb.Pagesize = pageSize 411 out, err := os.ReadFile(filepath.Join(path, file.Name())) 412 if err != nil { 413 continue 414 } 415 var value uint64 416 stringVal := strings.TrimSpace(string(out)) 417 if stringVal == "max" { 418 value = math.MaxUint64 419 } else { 420 value, err = strconv.ParseUint(stringVal, 10, 64) 421 } 422 if err != nil { 423 continue 424 } 425 switch fileName[2] { 426 case "max": 427 hugeTlb.Max = value 428 case "current": 429 hugeTlb.Current = value 430 } 431 keyUsage[pageSize] = hugeTlb 432 } 433 } 434 for _, entry := range keyUsage { 435 usage = append(usage, entry) 436 } 437 return usage 438 } 439 440 func getSubreaper() (int, error) { 441 var i uintptr 442 if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil { 443 return -1, err 444 } 445 return int(i), nil 446 }