map.go (37505B)
1 package ebpf 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "io" 8 "math/rand" 9 "path/filepath" 10 "reflect" 11 "time" 12 "unsafe" 13 14 "github.com/cilium/ebpf/btf" 15 "github.com/cilium/ebpf/internal" 16 "github.com/cilium/ebpf/internal/sys" 17 "github.com/cilium/ebpf/internal/unix" 18 ) 19 20 // Errors returned by Map and MapIterator methods. 21 var ( 22 ErrKeyNotExist = errors.New("key does not exist") 23 ErrKeyExist = errors.New("key already exists") 24 ErrIterationAborted = errors.New("iteration aborted") 25 ErrMapIncompatible = errors.New("map spec is incompatible with existing map") 26 errMapNoBTFValue = errors.New("map spec does not contain a BTF Value") 27 ) 28 29 // MapOptions control loading a map into the kernel. 30 type MapOptions struct { 31 // The base path to pin maps in if requested via PinByName. 32 // Existing maps will be re-used if they are compatible, otherwise an 33 // error is returned. 34 PinPath string 35 LoadPinOptions LoadPinOptions 36 } 37 38 // MapID represents the unique ID of an eBPF map 39 type MapID uint32 40 41 // MapSpec defines a Map. 42 type MapSpec struct { 43 // Name is passed to the kernel as a debug aid. Must only contain 44 // alpha numeric and '_' characters. 45 Name string 46 Type MapType 47 KeySize uint32 48 ValueSize uint32 49 MaxEntries uint32 50 51 // Flags is passed to the kernel and specifies additional map 52 // creation attributes. 53 Flags uint32 54 55 // Automatically pin and load a map from MapOptions.PinPath. 56 // Generates an error if an existing pinned map is incompatible with the MapSpec. 57 Pinning PinType 58 59 // Specify numa node during map creation 60 // (effective only if unix.BPF_F_NUMA_NODE flag is set, 61 // which can be imported from golang.org/x/sys/unix) 62 NumaNode uint32 63 64 // The initial contents of the map. May be nil. 65 Contents []MapKV 66 67 // Whether to freeze a map after setting its initial contents. 68 Freeze bool 69 70 // InnerMap is used as a template for ArrayOfMaps and HashOfMaps 71 InnerMap *MapSpec 72 73 // Extra trailing bytes found in the ELF map definition when using structs 74 // larger than libbpf's bpf_map_def. nil if no trailing bytes were present. 75 // Must be nil or empty before instantiating the MapSpec into a Map. 76 Extra *bytes.Reader 77 78 // The key and value type of this map. May be nil. 79 Key, Value btf.Type 80 81 // The BTF associated with this map. 82 BTF *btf.Spec 83 } 84 85 func (ms *MapSpec) String() string { 86 return fmt.Sprintf("%s(keySize=%d, valueSize=%d, maxEntries=%d, flags=%d)", ms.Type, ms.KeySize, ms.ValueSize, ms.MaxEntries, ms.Flags) 87 } 88 89 // Copy returns a copy of the spec. 90 // 91 // MapSpec.Contents is a shallow copy. 92 func (ms *MapSpec) Copy() *MapSpec { 93 if ms == nil { 94 return nil 95 } 96 97 cpy := *ms 98 99 cpy.Contents = make([]MapKV, len(ms.Contents)) 100 copy(cpy.Contents, ms.Contents) 101 102 cpy.InnerMap = ms.InnerMap.Copy() 103 104 return &cpy 105 } 106 107 // hasBTF returns true if the MapSpec has a valid BTF spec and if its 108 // map type supports associated BTF metadata in the kernel. 109 func (ms *MapSpec) hasBTF() bool { 110 return ms.BTF != nil && ms.Type.hasBTF() 111 } 112 113 func (ms *MapSpec) clampPerfEventArraySize() error { 114 if ms.Type != PerfEventArray { 115 return nil 116 } 117 118 n, err := internal.PossibleCPUs() 119 if err != nil { 120 return fmt.Errorf("perf event array: %w", err) 121 } 122 123 if n := uint32(n); ms.MaxEntries > n { 124 ms.MaxEntries = n 125 } 126 127 return nil 128 } 129 130 // dataSection returns the contents and BTF Datasec descriptor of the spec. 131 func (ms *MapSpec) dataSection() ([]byte, *btf.Datasec, error) { 132 133 if ms.Value == nil { 134 return nil, nil, errMapNoBTFValue 135 } 136 137 ds, ok := ms.Value.(*btf.Datasec) 138 if !ok { 139 return nil, nil, fmt.Errorf("map value BTF is a %T, not a *btf.Datasec", ms.Value) 140 } 141 142 if n := len(ms.Contents); n != 1 { 143 return nil, nil, fmt.Errorf("expected one key, found %d", n) 144 } 145 146 kv := ms.Contents[0] 147 value, ok := kv.Value.([]byte) 148 if !ok { 149 return nil, nil, fmt.Errorf("value at first map key is %T, not []byte", kv.Value) 150 } 151 152 return value, ds, nil 153 } 154 155 // MapKV is used to initialize the contents of a Map. 156 type MapKV struct { 157 Key interface{} 158 Value interface{} 159 } 160 161 func (ms *MapSpec) checkCompatibility(m *Map) error { 162 switch { 163 case m.typ != ms.Type: 164 return fmt.Errorf("expected type %v, got %v: %w", ms.Type, m.typ, ErrMapIncompatible) 165 166 case m.keySize != ms.KeySize: 167 return fmt.Errorf("expected key size %v, got %v: %w", ms.KeySize, m.keySize, ErrMapIncompatible) 168 169 case m.valueSize != ms.ValueSize: 170 return fmt.Errorf("expected value size %v, got %v: %w", ms.ValueSize, m.valueSize, ErrMapIncompatible) 171 172 case !(ms.Type == PerfEventArray && ms.MaxEntries == 0) && 173 m.maxEntries != ms.MaxEntries: 174 return fmt.Errorf("expected max entries %v, got %v: %w", ms.MaxEntries, m.maxEntries, ErrMapIncompatible) 175 176 case m.flags != ms.Flags: 177 return fmt.Errorf("expected flags %v, got %v: %w", ms.Flags, m.flags, ErrMapIncompatible) 178 } 179 return nil 180 } 181 182 // Map represents a Map file descriptor. 183 // 184 // It is not safe to close a map which is used by other goroutines. 185 // 186 // Methods which take interface{} arguments by default encode 187 // them using binary.Read/Write in the machine's native endianness. 188 // 189 // Implement encoding.BinaryMarshaler or encoding.BinaryUnmarshaler 190 // if you require custom encoding. 191 type Map struct { 192 name string 193 fd *sys.FD 194 typ MapType 195 keySize uint32 196 valueSize uint32 197 maxEntries uint32 198 flags uint32 199 pinnedPath string 200 // Per CPU maps return values larger than the size in the spec 201 fullValueSize int 202 } 203 204 // NewMapFromFD creates a map from a raw fd. 205 // 206 // You should not use fd after calling this function. 207 func NewMapFromFD(fd int) (*Map, error) { 208 f, err := sys.NewFD(fd) 209 if err != nil { 210 return nil, err 211 } 212 213 return newMapFromFD(f) 214 } 215 216 func newMapFromFD(fd *sys.FD) (*Map, error) { 217 info, err := newMapInfoFromFd(fd) 218 if err != nil { 219 fd.Close() 220 return nil, fmt.Errorf("get map info: %w", err) 221 } 222 223 return newMap(fd, info.Name, info.Type, info.KeySize, info.ValueSize, info.MaxEntries, info.Flags) 224 } 225 226 // NewMap creates a new Map. 227 // 228 // It's equivalent to calling NewMapWithOptions with default options. 229 func NewMap(spec *MapSpec) (*Map, error) { 230 return NewMapWithOptions(spec, MapOptions{}) 231 } 232 233 // NewMapWithOptions creates a new Map. 234 // 235 // Creating a map for the first time will perform feature detection 236 // by creating small, temporary maps. 237 // 238 // The caller is responsible for ensuring the process' rlimit is set 239 // sufficiently high for locking memory during map creation. This can be done 240 // by calling rlimit.RemoveMemlock() prior to calling NewMapWithOptions. 241 // 242 // May return an error wrapping ErrMapIncompatible. 243 func NewMapWithOptions(spec *MapSpec, opts MapOptions) (*Map, error) { 244 handles := newHandleCache() 245 defer handles.close() 246 247 m, err := newMapWithOptions(spec, opts, handles) 248 if err != nil { 249 return nil, fmt.Errorf("creating map: %w", err) 250 } 251 252 if err := m.finalize(spec); err != nil { 253 m.Close() 254 return nil, fmt.Errorf("populating map: %w", err) 255 } 256 257 return m, nil 258 } 259 260 func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_ *Map, err error) { 261 closeOnError := func(c io.Closer) { 262 if err != nil { 263 c.Close() 264 } 265 } 266 267 switch spec.Pinning { 268 case PinByName: 269 if spec.Name == "" { 270 return nil, fmt.Errorf("pin by name: missing Name") 271 } 272 273 if opts.PinPath == "" { 274 return nil, fmt.Errorf("pin by name: missing MapOptions.PinPath") 275 } 276 277 path := filepath.Join(opts.PinPath, spec.Name) 278 m, err := LoadPinnedMap(path, &opts.LoadPinOptions) 279 if errors.Is(err, unix.ENOENT) { 280 break 281 } 282 if err != nil { 283 return nil, fmt.Errorf("load pinned map: %w", err) 284 } 285 defer closeOnError(m) 286 287 if err := spec.checkCompatibility(m); err != nil { 288 return nil, fmt.Errorf("use pinned map %s: %w", spec.Name, err) 289 } 290 291 return m, nil 292 293 case PinNone: 294 // Nothing to do here 295 296 default: 297 return nil, fmt.Errorf("pin type %d: %w", int(spec.Pinning), ErrNotSupported) 298 } 299 300 var innerFd *sys.FD 301 if spec.Type == ArrayOfMaps || spec.Type == HashOfMaps { 302 if spec.InnerMap == nil { 303 return nil, fmt.Errorf("%s requires InnerMap", spec.Type) 304 } 305 306 if spec.InnerMap.Pinning != PinNone { 307 return nil, errors.New("inner maps cannot be pinned") 308 } 309 310 template, err := spec.InnerMap.createMap(nil, opts, handles) 311 if err != nil { 312 return nil, fmt.Errorf("inner map: %w", err) 313 } 314 defer template.Close() 315 316 // Intentionally skip populating and freezing (finalizing) 317 // the inner map template since it will be removed shortly. 318 319 innerFd = template.fd 320 } 321 322 m, err := spec.createMap(innerFd, opts, handles) 323 if err != nil { 324 return nil, err 325 } 326 defer closeOnError(m) 327 328 if spec.Pinning == PinByName { 329 path := filepath.Join(opts.PinPath, spec.Name) 330 if err := m.Pin(path); err != nil { 331 return nil, fmt.Errorf("pin map: %w", err) 332 } 333 } 334 335 return m, nil 336 } 337 338 // createMap validates the spec's properties and creates the map in the kernel 339 // using the given opts. It does not populate or freeze the map. 340 func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions, handles *handleCache) (_ *Map, err error) { 341 closeOnError := func(closer io.Closer) { 342 if err != nil { 343 closer.Close() 344 } 345 } 346 347 spec = spec.Copy() 348 349 // Kernels 4.13 through 5.4 used a struct bpf_map_def that contained 350 // additional 'inner_map_idx' and later 'numa_node' fields. 351 // In order to support loading these definitions, tolerate the presence of 352 // extra bytes, but require them to be zeroes. 353 if spec.Extra != nil { 354 if _, err := io.Copy(internal.DiscardZeroes{}, spec.Extra); err != nil { 355 return nil, errors.New("extra contains unhandled non-zero bytes, drain before creating map") 356 } 357 } 358 359 switch spec.Type { 360 case ArrayOfMaps, HashOfMaps: 361 if err := haveNestedMaps(); err != nil { 362 return nil, err 363 } 364 365 if spec.ValueSize != 0 && spec.ValueSize != 4 { 366 return nil, errors.New("ValueSize must be zero or four for map of map") 367 } 368 spec.ValueSize = 4 369 370 case PerfEventArray: 371 if spec.KeySize != 0 && spec.KeySize != 4 { 372 return nil, errors.New("KeySize must be zero or four for perf event array") 373 } 374 spec.KeySize = 4 375 376 if spec.ValueSize != 0 && spec.ValueSize != 4 { 377 return nil, errors.New("ValueSize must be zero or four for perf event array") 378 } 379 spec.ValueSize = 4 380 381 if spec.MaxEntries == 0 { 382 n, err := internal.PossibleCPUs() 383 if err != nil { 384 return nil, fmt.Errorf("perf event array: %w", err) 385 } 386 spec.MaxEntries = uint32(n) 387 } 388 } 389 390 if spec.Flags&(unix.BPF_F_RDONLY_PROG|unix.BPF_F_WRONLY_PROG) > 0 || spec.Freeze { 391 if err := haveMapMutabilityModifiers(); err != nil { 392 return nil, fmt.Errorf("map create: %w", err) 393 } 394 } 395 if spec.Flags&unix.BPF_F_MMAPABLE > 0 { 396 if err := haveMmapableMaps(); err != nil { 397 return nil, fmt.Errorf("map create: %w", err) 398 } 399 } 400 if spec.Flags&unix.BPF_F_INNER_MAP > 0 { 401 if err := haveInnerMaps(); err != nil { 402 return nil, fmt.Errorf("map create: %w", err) 403 } 404 } 405 if spec.Flags&unix.BPF_F_NO_PREALLOC > 0 { 406 if err := haveNoPreallocMaps(); err != nil { 407 return nil, fmt.Errorf("map create: %w", err) 408 } 409 } 410 411 attr := sys.MapCreateAttr{ 412 MapType: sys.MapType(spec.Type), 413 KeySize: spec.KeySize, 414 ValueSize: spec.ValueSize, 415 MaxEntries: spec.MaxEntries, 416 MapFlags: spec.Flags, 417 NumaNode: spec.NumaNode, 418 } 419 420 if inner != nil { 421 attr.InnerMapFd = inner.Uint() 422 } 423 424 if haveObjName() == nil { 425 attr.MapName = sys.NewObjName(spec.Name) 426 } 427 428 if spec.hasBTF() { 429 handle, err := handles.btfHandle(spec.BTF) 430 if err != nil && !errors.Is(err, btf.ErrNotSupported) { 431 return nil, fmt.Errorf("load BTF: %w", err) 432 } 433 434 if handle != nil { 435 keyTypeID, err := spec.BTF.TypeID(spec.Key) 436 if err != nil { 437 return nil, err 438 } 439 440 valueTypeID, err := spec.BTF.TypeID(spec.Value) 441 if err != nil { 442 return nil, err 443 } 444 445 attr.BtfFd = uint32(handle.FD()) 446 attr.BtfKeyTypeId = uint32(keyTypeID) 447 attr.BtfValueTypeId = uint32(valueTypeID) 448 } 449 } 450 451 fd, err := sys.MapCreate(&attr) 452 if err != nil { 453 if errors.Is(err, unix.EPERM) { 454 return nil, fmt.Errorf("map create: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err) 455 } 456 if !spec.hasBTF() { 457 return nil, fmt.Errorf("map create without BTF: %w", err) 458 } 459 if errors.Is(err, unix.EINVAL) && attr.MaxEntries == 0 { 460 return nil, fmt.Errorf("map create: %w (MaxEntries may be incorrectly set to zero)", err) 461 } 462 return nil, fmt.Errorf("map create: %w", err) 463 } 464 defer closeOnError(fd) 465 466 m, err := newMap(fd, spec.Name, spec.Type, spec.KeySize, spec.ValueSize, spec.MaxEntries, spec.Flags) 467 if err != nil { 468 return nil, fmt.Errorf("map create: %w", err) 469 } 470 471 return m, nil 472 } 473 474 // newMap allocates and returns a new Map structure. 475 // Sets the fullValueSize on per-CPU maps. 476 func newMap(fd *sys.FD, name string, typ MapType, keySize, valueSize, maxEntries, flags uint32) (*Map, error) { 477 m := &Map{ 478 name, 479 fd, 480 typ, 481 keySize, 482 valueSize, 483 maxEntries, 484 flags, 485 "", 486 int(valueSize), 487 } 488 489 if !typ.hasPerCPUValue() { 490 return m, nil 491 } 492 493 possibleCPUs, err := internal.PossibleCPUs() 494 if err != nil { 495 return nil, err 496 } 497 498 m.fullValueSize = internal.Align(int(valueSize), 8) * possibleCPUs 499 return m, nil 500 } 501 502 func (m *Map) String() string { 503 if m.name != "" { 504 return fmt.Sprintf("%s(%s)#%v", m.typ, m.name, m.fd) 505 } 506 return fmt.Sprintf("%s#%v", m.typ, m.fd) 507 } 508 509 // Type returns the underlying type of the map. 510 func (m *Map) Type() MapType { 511 return m.typ 512 } 513 514 // KeySize returns the size of the map key in bytes. 515 func (m *Map) KeySize() uint32 { 516 return m.keySize 517 } 518 519 // ValueSize returns the size of the map value in bytes. 520 func (m *Map) ValueSize() uint32 { 521 return m.valueSize 522 } 523 524 // MaxEntries returns the maximum number of elements the map can hold. 525 func (m *Map) MaxEntries() uint32 { 526 return m.maxEntries 527 } 528 529 // Flags returns the flags of the map. 530 func (m *Map) Flags() uint32 { 531 return m.flags 532 } 533 534 // Info returns metadata about the map. 535 func (m *Map) Info() (*MapInfo, error) { 536 return newMapInfoFromFd(m.fd) 537 } 538 539 // MapLookupFlags controls the behaviour of the map lookup calls. 540 type MapLookupFlags uint64 541 542 // LookupLock look up the value of a spin-locked map. 543 const LookupLock MapLookupFlags = 4 544 545 // Lookup retrieves a value from a Map. 546 // 547 // Calls Close() on valueOut if it is of type **Map or **Program, 548 // and *valueOut is not nil. 549 // 550 // Returns an error if the key doesn't exist, see ErrKeyNotExist. 551 func (m *Map) Lookup(key, valueOut interface{}) error { 552 valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize) 553 if err := m.lookup(key, valuePtr, 0); err != nil { 554 return err 555 } 556 557 return m.unmarshalValue(valueOut, valueBytes) 558 } 559 560 // LookupWithFlags retrieves a value from a Map with flags. 561 // 562 // Passing LookupLock flag will look up the value of a spin-locked 563 // map without returning the lock. This must be specified if the 564 // elements contain a spinlock. 565 // 566 // Calls Close() on valueOut if it is of type **Map or **Program, 567 // and *valueOut is not nil. 568 // 569 // Returns an error if the key doesn't exist, see ErrKeyNotExist. 570 func (m *Map) LookupWithFlags(key, valueOut interface{}, flags MapLookupFlags) error { 571 valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize) 572 if err := m.lookup(key, valuePtr, flags); err != nil { 573 return err 574 } 575 576 return m.unmarshalValue(valueOut, valueBytes) 577 } 578 579 // LookupAndDelete retrieves and deletes a value from a Map. 580 // 581 // Returns ErrKeyNotExist if the key doesn't exist. 582 func (m *Map) LookupAndDelete(key, valueOut interface{}) error { 583 return m.lookupAndDelete(key, valueOut, 0) 584 } 585 586 // LookupAndDeleteWithFlags retrieves and deletes a value from a Map. 587 // 588 // Passing LookupLock flag will look up and delete the value of a spin-locked 589 // map without returning the lock. This must be specified if the elements 590 // contain a spinlock. 591 // 592 // Returns ErrKeyNotExist if the key doesn't exist. 593 func (m *Map) LookupAndDeleteWithFlags(key, valueOut interface{}, flags MapLookupFlags) error { 594 return m.lookupAndDelete(key, valueOut, flags) 595 } 596 597 // LookupBytes gets a value from Map. 598 // 599 // Returns a nil value if a key doesn't exist. 600 func (m *Map) LookupBytes(key interface{}) ([]byte, error) { 601 valueBytes := make([]byte, m.fullValueSize) 602 valuePtr := sys.NewSlicePointer(valueBytes) 603 604 err := m.lookup(key, valuePtr, 0) 605 if errors.Is(err, ErrKeyNotExist) { 606 return nil, nil 607 } 608 609 return valueBytes, err 610 } 611 612 func (m *Map) lookup(key interface{}, valueOut sys.Pointer, flags MapLookupFlags) error { 613 keyPtr, err := m.marshalKey(key) 614 if err != nil { 615 return fmt.Errorf("can't marshal key: %w", err) 616 } 617 618 attr := sys.MapLookupElemAttr{ 619 MapFd: m.fd.Uint(), 620 Key: keyPtr, 621 Value: valueOut, 622 Flags: uint64(flags), 623 } 624 625 if err = sys.MapLookupElem(&attr); err != nil { 626 return fmt.Errorf("lookup: %w", wrapMapError(err)) 627 } 628 return nil 629 } 630 631 func (m *Map) lookupAndDelete(key, valueOut interface{}, flags MapLookupFlags) error { 632 valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize) 633 634 keyPtr, err := m.marshalKey(key) 635 if err != nil { 636 return fmt.Errorf("can't marshal key: %w", err) 637 } 638 639 attr := sys.MapLookupAndDeleteElemAttr{ 640 MapFd: m.fd.Uint(), 641 Key: keyPtr, 642 Value: valuePtr, 643 Flags: uint64(flags), 644 } 645 646 if err := sys.MapLookupAndDeleteElem(&attr); err != nil { 647 return fmt.Errorf("lookup and delete: %w", wrapMapError(err)) 648 } 649 650 return m.unmarshalValue(valueOut, valueBytes) 651 } 652 653 // MapUpdateFlags controls the behaviour of the Map.Update call. 654 // 655 // The exact semantics depend on the specific MapType. 656 type MapUpdateFlags uint64 657 658 const ( 659 // UpdateAny creates a new element or update an existing one. 660 UpdateAny MapUpdateFlags = iota 661 // UpdateNoExist creates a new element. 662 UpdateNoExist MapUpdateFlags = 1 << (iota - 1) 663 // UpdateExist updates an existing element. 664 UpdateExist 665 // UpdateLock updates elements under bpf_spin_lock. 666 UpdateLock 667 ) 668 669 // Put replaces or creates a value in map. 670 // 671 // It is equivalent to calling Update with UpdateAny. 672 func (m *Map) Put(key, value interface{}) error { 673 return m.Update(key, value, UpdateAny) 674 } 675 676 // Update changes the value of a key. 677 func (m *Map) Update(key, value interface{}, flags MapUpdateFlags) error { 678 keyPtr, err := m.marshalKey(key) 679 if err != nil { 680 return fmt.Errorf("can't marshal key: %w", err) 681 } 682 683 valuePtr, err := m.marshalValue(value) 684 if err != nil { 685 return fmt.Errorf("can't marshal value: %w", err) 686 } 687 688 attr := sys.MapUpdateElemAttr{ 689 MapFd: m.fd.Uint(), 690 Key: keyPtr, 691 Value: valuePtr, 692 Flags: uint64(flags), 693 } 694 695 if err = sys.MapUpdateElem(&attr); err != nil { 696 return fmt.Errorf("update: %w", wrapMapError(err)) 697 } 698 699 return nil 700 } 701 702 // Delete removes a value. 703 // 704 // Returns ErrKeyNotExist if the key does not exist. 705 func (m *Map) Delete(key interface{}) error { 706 keyPtr, err := m.marshalKey(key) 707 if err != nil { 708 return fmt.Errorf("can't marshal key: %w", err) 709 } 710 711 attr := sys.MapDeleteElemAttr{ 712 MapFd: m.fd.Uint(), 713 Key: keyPtr, 714 } 715 716 if err = sys.MapDeleteElem(&attr); err != nil { 717 return fmt.Errorf("delete: %w", wrapMapError(err)) 718 } 719 return nil 720 } 721 722 // NextKey finds the key following an initial key. 723 // 724 // See NextKeyBytes for details. 725 // 726 // Returns ErrKeyNotExist if there is no next key. 727 func (m *Map) NextKey(key, nextKeyOut interface{}) error { 728 nextKeyPtr, nextKeyBytes := makeBuffer(nextKeyOut, int(m.keySize)) 729 730 if err := m.nextKey(key, nextKeyPtr); err != nil { 731 return err 732 } 733 734 if err := m.unmarshalKey(nextKeyOut, nextKeyBytes); err != nil { 735 return fmt.Errorf("can't unmarshal next key: %w", err) 736 } 737 return nil 738 } 739 740 // NextKeyBytes returns the key following an initial key as a byte slice. 741 // 742 // Passing nil will return the first key. 743 // 744 // Use Iterate if you want to traverse all entries in the map. 745 // 746 // Returns nil if there are no more keys. 747 func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) { 748 nextKey := make([]byte, m.keySize) 749 nextKeyPtr := sys.NewSlicePointer(nextKey) 750 751 err := m.nextKey(key, nextKeyPtr) 752 if errors.Is(err, ErrKeyNotExist) { 753 return nil, nil 754 } 755 756 return nextKey, err 757 } 758 759 func (m *Map) nextKey(key interface{}, nextKeyOut sys.Pointer) error { 760 var ( 761 keyPtr sys.Pointer 762 err error 763 ) 764 765 if key != nil { 766 keyPtr, err = m.marshalKey(key) 767 if err != nil { 768 return fmt.Errorf("can't marshal key: %w", err) 769 } 770 } 771 772 attr := sys.MapGetNextKeyAttr{ 773 MapFd: m.fd.Uint(), 774 Key: keyPtr, 775 NextKey: nextKeyOut, 776 } 777 778 if err = sys.MapGetNextKey(&attr); err != nil { 779 // Kernels 4.4.131 and earlier return EFAULT instead of a pointer to the 780 // first map element when a nil key pointer is specified. 781 if key == nil && errors.Is(err, unix.EFAULT) { 782 var guessKey []byte 783 guessKey, err = m.guessNonExistentKey() 784 if err != nil { 785 return err 786 } 787 788 // Retry the syscall with a valid non-existing key. 789 attr.Key = sys.NewSlicePointer(guessKey) 790 if err = sys.MapGetNextKey(&attr); err == nil { 791 return nil 792 } 793 } 794 795 return fmt.Errorf("next key: %w", wrapMapError(err)) 796 } 797 798 return nil 799 } 800 801 // guessNonExistentKey attempts to perform a map lookup that returns ENOENT. 802 // This is necessary on kernels before 4.4.132, since those don't support 803 // iterating maps from the start by providing an invalid key pointer. 804 func (m *Map) guessNonExistentKey() ([]byte, error) { 805 // Provide an invalid value pointer to prevent a copy on the kernel side. 806 valuePtr := sys.NewPointer(unsafe.Pointer(^uintptr(0))) 807 randKey := make([]byte, int(m.keySize)) 808 809 for i := 0; i < 4; i++ { 810 switch i { 811 // For hash maps, the 0 key is less likely to be occupied. They're often 812 // used for storing data related to pointers, and their access pattern is 813 // generally scattered across the keyspace. 814 case 0: 815 // An all-0xff key is guaranteed to be out of bounds of any array, since 816 // those have a fixed key size of 4 bytes. The only corner case being 817 // arrays with 2^32 max entries, but those are prohibitively expensive 818 // in many environments. 819 case 1: 820 for r := range randKey { 821 randKey[r] = 0xff 822 } 823 // Inspired by BCC, 0x55 is an alternating binary pattern (0101), so 824 // is unlikely to be taken. 825 case 2: 826 for r := range randKey { 827 randKey[r] = 0x55 828 } 829 // Last ditch effort, generate a random key. 830 case 3: 831 rand.New(rand.NewSource(time.Now().UnixNano())).Read(randKey) 832 } 833 834 err := m.lookup(randKey, valuePtr, 0) 835 if errors.Is(err, ErrKeyNotExist) { 836 return randKey, nil 837 } 838 } 839 840 return nil, errors.New("couldn't find non-existing key") 841 } 842 843 // BatchLookup looks up many elements in a map at once. 844 // 845 // "keysOut" and "valuesOut" must be of type slice, a pointer 846 // to a slice or buffer will not work. 847 // "prevKey" is the key to start the batch lookup from, it will 848 // *not* be included in the results. Use nil to start at the first key. 849 // 850 // ErrKeyNotExist is returned when the batch lookup has reached 851 // the end of all possible results, even when partial results 852 // are returned. It should be used to evaluate when lookup is "done". 853 func (m *Map) BatchLookup(prevKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { 854 return m.batchLookup(sys.BPF_MAP_LOOKUP_BATCH, prevKey, nextKeyOut, keysOut, valuesOut, opts) 855 } 856 857 // BatchLookupAndDelete looks up many elements in a map at once, 858 // 859 // It then deletes all those elements. 860 // "keysOut" and "valuesOut" must be of type slice, a pointer 861 // to a slice or buffer will not work. 862 // "prevKey" is the key to start the batch lookup from, it will 863 // *not* be included in the results. Use nil to start at the first key. 864 // 865 // ErrKeyNotExist is returned when the batch lookup has reached 866 // the end of all possible results, even when partial results 867 // are returned. It should be used to evaluate when lookup is "done". 868 func (m *Map) BatchLookupAndDelete(prevKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { 869 return m.batchLookup(sys.BPF_MAP_LOOKUP_AND_DELETE_BATCH, prevKey, nextKeyOut, keysOut, valuesOut, opts) 870 } 871 872 func (m *Map) batchLookup(cmd sys.Cmd, startKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { 873 if err := haveBatchAPI(); err != nil { 874 return 0, err 875 } 876 if m.typ.hasPerCPUValue() { 877 return 0, ErrNotSupported 878 } 879 keysValue := reflect.ValueOf(keysOut) 880 if keysValue.Kind() != reflect.Slice { 881 return 0, fmt.Errorf("keys must be a slice") 882 } 883 valuesValue := reflect.ValueOf(valuesOut) 884 if valuesValue.Kind() != reflect.Slice { 885 return 0, fmt.Errorf("valuesOut must be a slice") 886 } 887 count := keysValue.Len() 888 if count != valuesValue.Len() { 889 return 0, fmt.Errorf("keysOut and valuesOut must be the same length") 890 } 891 keyBuf := make([]byte, count*int(m.keySize)) 892 keyPtr := sys.NewSlicePointer(keyBuf) 893 valueBuf := make([]byte, count*int(m.fullValueSize)) 894 valuePtr := sys.NewSlicePointer(valueBuf) 895 nextPtr, nextBuf := makeBuffer(nextKeyOut, int(m.keySize)) 896 897 attr := sys.MapLookupBatchAttr{ 898 MapFd: m.fd.Uint(), 899 Keys: keyPtr, 900 Values: valuePtr, 901 Count: uint32(count), 902 OutBatch: nextPtr, 903 } 904 905 if opts != nil { 906 attr.ElemFlags = opts.ElemFlags 907 attr.Flags = opts.Flags 908 } 909 910 var err error 911 if startKey != nil { 912 attr.InBatch, err = marshalPtr(startKey, int(m.keySize)) 913 if err != nil { 914 return 0, err 915 } 916 } 917 918 _, sysErr := sys.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) 919 sysErr = wrapMapError(sysErr) 920 if sysErr != nil && !errors.Is(sysErr, unix.ENOENT) { 921 return 0, sysErr 922 } 923 924 err = m.unmarshalKey(nextKeyOut, nextBuf) 925 if err != nil { 926 return 0, err 927 } 928 err = unmarshalBytes(keysOut, keyBuf) 929 if err != nil { 930 return 0, err 931 } 932 err = unmarshalBytes(valuesOut, valueBuf) 933 if err != nil { 934 return 0, err 935 } 936 937 return int(attr.Count), sysErr 938 } 939 940 // BatchUpdate updates the map with multiple keys and values 941 // simultaneously. 942 // "keys" and "values" must be of type slice, a pointer 943 // to a slice or buffer will not work. 944 func (m *Map) BatchUpdate(keys, values interface{}, opts *BatchOptions) (int, error) { 945 if err := haveBatchAPI(); err != nil { 946 return 0, err 947 } 948 if m.typ.hasPerCPUValue() { 949 return 0, ErrNotSupported 950 } 951 keysValue := reflect.ValueOf(keys) 952 if keysValue.Kind() != reflect.Slice { 953 return 0, fmt.Errorf("keys must be a slice") 954 } 955 valuesValue := reflect.ValueOf(values) 956 if valuesValue.Kind() != reflect.Slice { 957 return 0, fmt.Errorf("values must be a slice") 958 } 959 var ( 960 count = keysValue.Len() 961 valuePtr sys.Pointer 962 err error 963 ) 964 if count != valuesValue.Len() { 965 return 0, fmt.Errorf("keys and values must be the same length") 966 } 967 keyPtr, err := marshalPtr(keys, count*int(m.keySize)) 968 if err != nil { 969 return 0, err 970 } 971 valuePtr, err = marshalPtr(values, count*int(m.valueSize)) 972 if err != nil { 973 return 0, err 974 } 975 976 attr := sys.MapUpdateBatchAttr{ 977 MapFd: m.fd.Uint(), 978 Keys: keyPtr, 979 Values: valuePtr, 980 Count: uint32(count), 981 } 982 if opts != nil { 983 attr.ElemFlags = opts.ElemFlags 984 attr.Flags = opts.Flags 985 } 986 987 err = sys.MapUpdateBatch(&attr) 988 if err != nil { 989 return int(attr.Count), fmt.Errorf("batch update: %w", wrapMapError(err)) 990 } 991 992 return int(attr.Count), nil 993 } 994 995 // BatchDelete batch deletes entries in the map by keys. 996 // "keys" must be of type slice, a pointer to a slice or buffer will not work. 997 func (m *Map) BatchDelete(keys interface{}, opts *BatchOptions) (int, error) { 998 if err := haveBatchAPI(); err != nil { 999 return 0, err 1000 } 1001 if m.typ.hasPerCPUValue() { 1002 return 0, ErrNotSupported 1003 } 1004 keysValue := reflect.ValueOf(keys) 1005 if keysValue.Kind() != reflect.Slice { 1006 return 0, fmt.Errorf("keys must be a slice") 1007 } 1008 count := keysValue.Len() 1009 keyPtr, err := marshalPtr(keys, count*int(m.keySize)) 1010 if err != nil { 1011 return 0, fmt.Errorf("cannot marshal keys: %v", err) 1012 } 1013 1014 attr := sys.MapDeleteBatchAttr{ 1015 MapFd: m.fd.Uint(), 1016 Keys: keyPtr, 1017 Count: uint32(count), 1018 } 1019 1020 if opts != nil { 1021 attr.ElemFlags = opts.ElemFlags 1022 attr.Flags = opts.Flags 1023 } 1024 1025 if err = sys.MapDeleteBatch(&attr); err != nil { 1026 return int(attr.Count), fmt.Errorf("batch delete: %w", wrapMapError(err)) 1027 } 1028 1029 return int(attr.Count), nil 1030 } 1031 1032 // Iterate traverses a map. 1033 // 1034 // It's safe to create multiple iterators at the same time. 1035 // 1036 // It's not possible to guarantee that all keys in a map will be 1037 // returned if there are concurrent modifications to the map. 1038 func (m *Map) Iterate() *MapIterator { 1039 return newMapIterator(m) 1040 } 1041 1042 // Close the Map's underlying file descriptor, which could unload the 1043 // Map from the kernel if it is not pinned or in use by a loaded Program. 1044 func (m *Map) Close() error { 1045 if m == nil { 1046 // This makes it easier to clean up when iterating maps 1047 // of maps / programs. 1048 return nil 1049 } 1050 1051 return m.fd.Close() 1052 } 1053 1054 // FD gets the file descriptor of the Map. 1055 // 1056 // Calling this function is invalid after Close has been called. 1057 func (m *Map) FD() int { 1058 return m.fd.Int() 1059 } 1060 1061 // Clone creates a duplicate of the Map. 1062 // 1063 // Closing the duplicate does not affect the original, and vice versa. 1064 // Changes made to the map are reflected by both instances however. 1065 // If the original map was pinned, the cloned map will not be pinned by default. 1066 // 1067 // Cloning a nil Map returns nil. 1068 func (m *Map) Clone() (*Map, error) { 1069 if m == nil { 1070 return nil, nil 1071 } 1072 1073 dup, err := m.fd.Dup() 1074 if err != nil { 1075 return nil, fmt.Errorf("can't clone map: %w", err) 1076 } 1077 1078 return &Map{ 1079 m.name, 1080 dup, 1081 m.typ, 1082 m.keySize, 1083 m.valueSize, 1084 m.maxEntries, 1085 m.flags, 1086 "", 1087 m.fullValueSize, 1088 }, nil 1089 } 1090 1091 // Pin persists the map on the BPF virtual file system past the lifetime of 1092 // the process that created it . 1093 // 1094 // Calling Pin on a previously pinned map will overwrite the path, except when 1095 // the new path already exists. Re-pinning across filesystems is not supported. 1096 // You can Clone a map to pin it to a different path. 1097 // 1098 // This requires bpffs to be mounted above fileName. See https://docs.cilium.io/en/k8s-doc/admin/#admin-mount-bpffs 1099 func (m *Map) Pin(fileName string) error { 1100 if err := internal.Pin(m.pinnedPath, fileName, m.fd); err != nil { 1101 return err 1102 } 1103 m.pinnedPath = fileName 1104 return nil 1105 } 1106 1107 // Unpin removes the persisted state for the map from the BPF virtual filesystem. 1108 // 1109 // Failed calls to Unpin will not alter the state returned by IsPinned. 1110 // 1111 // Unpinning an unpinned Map returns nil. 1112 func (m *Map) Unpin() error { 1113 if err := internal.Unpin(m.pinnedPath); err != nil { 1114 return err 1115 } 1116 m.pinnedPath = "" 1117 return nil 1118 } 1119 1120 // IsPinned returns true if the map has a non-empty pinned path. 1121 func (m *Map) IsPinned() bool { 1122 return m.pinnedPath != "" 1123 } 1124 1125 // Freeze prevents a map to be modified from user space. 1126 // 1127 // It makes no changes to kernel-side restrictions. 1128 func (m *Map) Freeze() error { 1129 if err := haveMapMutabilityModifiers(); err != nil { 1130 return fmt.Errorf("can't freeze map: %w", err) 1131 } 1132 1133 attr := sys.MapFreezeAttr{ 1134 MapFd: m.fd.Uint(), 1135 } 1136 1137 if err := sys.MapFreeze(&attr); err != nil { 1138 return fmt.Errorf("can't freeze map: %w", err) 1139 } 1140 return nil 1141 } 1142 1143 // finalize populates the Map according to the Contents specified 1144 // in spec and freezes the Map if requested by spec. 1145 func (m *Map) finalize(spec *MapSpec) error { 1146 for _, kv := range spec.Contents { 1147 if err := m.Put(kv.Key, kv.Value); err != nil { 1148 return fmt.Errorf("putting value: key %v: %w", kv.Key, err) 1149 } 1150 } 1151 1152 if spec.Freeze { 1153 if err := m.Freeze(); err != nil { 1154 return fmt.Errorf("freezing map: %w", err) 1155 } 1156 } 1157 1158 return nil 1159 } 1160 1161 func (m *Map) marshalKey(data interface{}) (sys.Pointer, error) { 1162 if data == nil { 1163 if m.keySize == 0 { 1164 // Queues have a key length of zero, so passing nil here is valid. 1165 return sys.NewPointer(nil), nil 1166 } 1167 return sys.Pointer{}, errors.New("can't use nil as key of map") 1168 } 1169 1170 return marshalPtr(data, int(m.keySize)) 1171 } 1172 1173 func (m *Map) unmarshalKey(data interface{}, buf []byte) error { 1174 if buf == nil { 1175 // This is from a makeBuffer call, nothing do do here. 1176 return nil 1177 } 1178 1179 return unmarshalBytes(data, buf) 1180 } 1181 1182 func (m *Map) marshalValue(data interface{}) (sys.Pointer, error) { 1183 if m.typ.hasPerCPUValue() { 1184 return marshalPerCPUValue(data, int(m.valueSize)) 1185 } 1186 1187 var ( 1188 buf []byte 1189 err error 1190 ) 1191 1192 switch value := data.(type) { 1193 case *Map: 1194 if !m.typ.canStoreMap() { 1195 return sys.Pointer{}, fmt.Errorf("can't store map in %s", m.typ) 1196 } 1197 buf, err = marshalMap(value, int(m.valueSize)) 1198 1199 case *Program: 1200 if !m.typ.canStoreProgram() { 1201 return sys.Pointer{}, fmt.Errorf("can't store program in %s", m.typ) 1202 } 1203 buf, err = marshalProgram(value, int(m.valueSize)) 1204 1205 default: 1206 return marshalPtr(data, int(m.valueSize)) 1207 } 1208 1209 if err != nil { 1210 return sys.Pointer{}, err 1211 } 1212 1213 return sys.NewSlicePointer(buf), nil 1214 } 1215 1216 func (m *Map) unmarshalValue(value interface{}, buf []byte) error { 1217 if buf == nil { 1218 // This is from a makeBuffer call, nothing do do here. 1219 return nil 1220 } 1221 1222 if m.typ.hasPerCPUValue() { 1223 return unmarshalPerCPUValue(value, int(m.valueSize), buf) 1224 } 1225 1226 switch value := value.(type) { 1227 case **Map: 1228 if !m.typ.canStoreMap() { 1229 return fmt.Errorf("can't read a map from %s", m.typ) 1230 } 1231 1232 other, err := unmarshalMap(buf) 1233 if err != nil { 1234 return err 1235 } 1236 1237 // The caller might close the map externally, so ignore errors. 1238 _ = (*value).Close() 1239 1240 *value = other 1241 return nil 1242 1243 case *Map: 1244 if !m.typ.canStoreMap() { 1245 return fmt.Errorf("can't read a map from %s", m.typ) 1246 } 1247 return errors.New("require pointer to *Map") 1248 1249 case **Program: 1250 if !m.typ.canStoreProgram() { 1251 return fmt.Errorf("can't read a program from %s", m.typ) 1252 } 1253 1254 other, err := unmarshalProgram(buf) 1255 if err != nil { 1256 return err 1257 } 1258 1259 // The caller might close the program externally, so ignore errors. 1260 _ = (*value).Close() 1261 1262 *value = other 1263 return nil 1264 1265 case *Program: 1266 if !m.typ.canStoreProgram() { 1267 return fmt.Errorf("can't read a program from %s", m.typ) 1268 } 1269 return errors.New("require pointer to *Program") 1270 } 1271 1272 return unmarshalBytes(value, buf) 1273 } 1274 1275 // LoadPinnedMap loads a Map from a BPF file. 1276 func LoadPinnedMap(fileName string, opts *LoadPinOptions) (*Map, error) { 1277 fd, err := sys.ObjGet(&sys.ObjGetAttr{ 1278 Pathname: sys.NewStringPointer(fileName), 1279 FileFlags: opts.Marshal(), 1280 }) 1281 if err != nil { 1282 return nil, err 1283 } 1284 1285 m, err := newMapFromFD(fd) 1286 if err == nil { 1287 m.pinnedPath = fileName 1288 } 1289 1290 return m, err 1291 } 1292 1293 // unmarshalMap creates a map from a map ID encoded in host endianness. 1294 func unmarshalMap(buf []byte) (*Map, error) { 1295 if len(buf) != 4 { 1296 return nil, errors.New("map id requires 4 byte value") 1297 } 1298 1299 id := internal.NativeEndian.Uint32(buf) 1300 return NewMapFromID(MapID(id)) 1301 } 1302 1303 // marshalMap marshals the fd of a map into a buffer in host endianness. 1304 func marshalMap(m *Map, length int) ([]byte, error) { 1305 if length != 4 { 1306 return nil, fmt.Errorf("can't marshal map to %d bytes", length) 1307 } 1308 1309 buf := make([]byte, 4) 1310 internal.NativeEndian.PutUint32(buf, m.fd.Uint()) 1311 return buf, nil 1312 } 1313 1314 // MapIterator iterates a Map. 1315 // 1316 // See Map.Iterate. 1317 type MapIterator struct { 1318 target *Map 1319 prevKey interface{} 1320 prevBytes []byte 1321 count, maxEntries uint32 1322 done bool 1323 err error 1324 } 1325 1326 func newMapIterator(target *Map) *MapIterator { 1327 return &MapIterator{ 1328 target: target, 1329 maxEntries: target.maxEntries, 1330 prevBytes: make([]byte, target.keySize), 1331 } 1332 } 1333 1334 // Next decodes the next key and value. 1335 // 1336 // Iterating a hash map from which keys are being deleted is not 1337 // safe. You may see the same key multiple times. Iteration may 1338 // also abort with an error, see IsIterationAborted. 1339 // 1340 // Returns false if there are no more entries. You must check 1341 // the result of Err afterwards. 1342 // 1343 // See Map.Get for further caveats around valueOut. 1344 func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool { 1345 if mi.err != nil || mi.done { 1346 return false 1347 } 1348 1349 // For array-like maps NextKeyBytes returns nil only on after maxEntries 1350 // iterations. 1351 for mi.count <= mi.maxEntries { 1352 var nextBytes []byte 1353 nextBytes, mi.err = mi.target.NextKeyBytes(mi.prevKey) 1354 if mi.err != nil { 1355 return false 1356 } 1357 1358 if nextBytes == nil { 1359 mi.done = true 1360 return false 1361 } 1362 1363 // The user can get access to nextBytes since unmarshalBytes 1364 // does not copy when unmarshaling into a []byte. 1365 // Make a copy to prevent accidental corruption of 1366 // iterator state. 1367 copy(mi.prevBytes, nextBytes) 1368 mi.prevKey = mi.prevBytes 1369 1370 mi.count++ 1371 mi.err = mi.target.Lookup(nextBytes, valueOut) 1372 if errors.Is(mi.err, ErrKeyNotExist) { 1373 // Even though the key should be valid, we couldn't look up 1374 // its value. If we're iterating a hash map this is probably 1375 // because a concurrent delete removed the value before we 1376 // could get it. This means that the next call to NextKeyBytes 1377 // is very likely to restart iteration. 1378 // If we're iterating one of the fd maps like 1379 // ProgramArray it means that a given slot doesn't have 1380 // a valid fd associated. It's OK to continue to the next slot. 1381 continue 1382 } 1383 if mi.err != nil { 1384 return false 1385 } 1386 1387 mi.err = mi.target.unmarshalKey(keyOut, nextBytes) 1388 return mi.err == nil 1389 } 1390 1391 mi.err = fmt.Errorf("%w", ErrIterationAborted) 1392 return false 1393 } 1394 1395 // Err returns any encountered error. 1396 // 1397 // The method must be called after Next returns nil. 1398 // 1399 // Returns ErrIterationAborted if it wasn't possible to do a full iteration. 1400 func (mi *MapIterator) Err() error { 1401 return mi.err 1402 } 1403 1404 // MapGetNextID returns the ID of the next eBPF map. 1405 // 1406 // Returns ErrNotExist, if there is no next eBPF map. 1407 func MapGetNextID(startID MapID) (MapID, error) { 1408 attr := &sys.MapGetNextIdAttr{Id: uint32(startID)} 1409 return MapID(attr.NextId), sys.MapGetNextId(attr) 1410 } 1411 1412 // NewMapFromID returns the map for a given id. 1413 // 1414 // Returns ErrNotExist, if there is no eBPF map with the given id. 1415 func NewMapFromID(id MapID) (*Map, error) { 1416 fd, err := sys.MapGetFdById(&sys.MapGetFdByIdAttr{ 1417 Id: uint32(id), 1418 }) 1419 if err != nil { 1420 return nil, err 1421 } 1422 1423 return newMapFromFD(fd) 1424 }