gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

perf_event.go (12213B)


      1 package link
      2 
      3 import (
      4 	"bytes"
      5 	"errors"
      6 	"fmt"
      7 	"os"
      8 	"path/filepath"
      9 	"runtime"
     10 	"strconv"
     11 	"strings"
     12 	"unsafe"
     13 
     14 	"github.com/cilium/ebpf"
     15 	"github.com/cilium/ebpf/asm"
     16 	"github.com/cilium/ebpf/internal"
     17 	"github.com/cilium/ebpf/internal/sys"
     18 	"github.com/cilium/ebpf/internal/unix"
     19 )
     20 
     21 // Getting the terminology right is usually the hardest part. For posterity and
     22 // for staying sane during implementation:
     23 //
     24 // - trace event: Representation of a kernel runtime hook. Filesystem entries
     25 //   under <tracefs>/events. Can be tracepoints (static), kprobes or uprobes.
     26 //   Can be instantiated into perf events (see below).
     27 // - tracepoint: A predetermined hook point in the kernel. Exposed as trace
     28 //   events in (sub)directories under <tracefs>/events. Cannot be closed or
     29 //   removed, they are static.
     30 // - k(ret)probe: Ephemeral trace events based on entry or exit points of
     31 //   exported kernel symbols. kprobe-based (tracefs) trace events can be
     32 //   created system-wide by writing to the <tracefs>/kprobe_events file, or
     33 //   they can be scoped to the current process by creating PMU perf events.
     34 // - u(ret)probe: Ephemeral trace events based on user provides ELF binaries
     35 //   and offsets. uprobe-based (tracefs) trace events can be
     36 //   created system-wide by writing to the <tracefs>/uprobe_events file, or
     37 //   they can be scoped to the current process by creating PMU perf events.
     38 // - perf event: An object instantiated based on an existing trace event or
     39 //   kernel symbol. Referred to by fd in userspace.
     40 //   Exactly one eBPF program can be attached to a perf event. Multiple perf
     41 //   events can be created from a single trace event. Closing a perf event
     42 //   stops any further invocations of the attached eBPF program.
     43 
     44 var (
     45 	tracefsPath = "/sys/kernel/debug/tracing"
     46 
     47 	errInvalidInput = errors.New("invalid input")
     48 )
     49 
     50 const (
     51 	perfAllThreads = -1
     52 )
     53 
     54 type perfEventType uint8
     55 
     56 const (
     57 	tracepointEvent perfEventType = iota
     58 	kprobeEvent
     59 	kretprobeEvent
     60 	uprobeEvent
     61 	uretprobeEvent
     62 )
     63 
     64 // A perfEvent represents a perf event kernel object. Exactly one eBPF program
     65 // can be attached to it. It is created based on a tracefs trace event or a
     66 // Performance Monitoring Unit (PMU).
     67 type perfEvent struct {
     68 	// The event type determines the types of programs that can be attached.
     69 	typ perfEventType
     70 
     71 	// Group and name of the tracepoint/kprobe/uprobe.
     72 	group string
     73 	name  string
     74 
     75 	// PMU event ID read from sysfs. Valid IDs are non-zero.
     76 	pmuID uint64
     77 	// ID of the trace event read from tracefs. Valid IDs are non-zero.
     78 	tracefsID uint64
     79 
     80 	// User provided arbitrary value.
     81 	cookie uint64
     82 
     83 	// This is the perf event FD.
     84 	fd *sys.FD
     85 }
     86 
     87 func (pe *perfEvent) Close() error {
     88 	if err := pe.fd.Close(); err != nil {
     89 		return fmt.Errorf("closing perf event fd: %w", err)
     90 	}
     91 
     92 	switch pe.typ {
     93 	case kprobeEvent, kretprobeEvent:
     94 		// Clean up kprobe tracefs entry.
     95 		if pe.tracefsID != 0 {
     96 			return closeTraceFSProbeEvent(kprobeType, pe.group, pe.name)
     97 		}
     98 	case uprobeEvent, uretprobeEvent:
     99 		// Clean up uprobe tracefs entry.
    100 		if pe.tracefsID != 0 {
    101 			return closeTraceFSProbeEvent(uprobeType, pe.group, pe.name)
    102 		}
    103 	case tracepointEvent:
    104 		// Tracepoint trace events don't hold any extra resources.
    105 		return nil
    106 	}
    107 
    108 	return nil
    109 }
    110 
    111 // perfEventLink represents a bpf perf link.
    112 type perfEventLink struct {
    113 	RawLink
    114 	pe *perfEvent
    115 }
    116 
    117 func (pl *perfEventLink) isLink() {}
    118 
    119 // Pinning requires the underlying perf event FD to stay open.
    120 //
    121 // | PerfEvent FD | BpfLink FD | Works |
    122 // |--------------|------------|-------|
    123 // | Open         | Open       | Yes   |
    124 // | Closed       | Open       | No    |
    125 // | Open         | Closed     | No (Pin() -> EINVAL) |
    126 // | Closed       | Closed     | No (Pin() -> EINVAL) |
    127 //
    128 // There is currently no pretty way to recover the perf event FD
    129 // when loading a pinned link, so leave as not supported for now.
    130 func (pl *perfEventLink) Pin(string) error {
    131 	return fmt.Errorf("perf event link pin: %w", ErrNotSupported)
    132 }
    133 
    134 func (pl *perfEventLink) Unpin() error {
    135 	return fmt.Errorf("perf event link unpin: %w", ErrNotSupported)
    136 }
    137 
    138 func (pl *perfEventLink) Close() error {
    139 	if err := pl.pe.Close(); err != nil {
    140 		return fmt.Errorf("perf event link close: %w", err)
    141 	}
    142 	return pl.fd.Close()
    143 }
    144 
    145 func (pl *perfEventLink) Update(prog *ebpf.Program) error {
    146 	return fmt.Errorf("perf event link update: %w", ErrNotSupported)
    147 }
    148 
    149 // perfEventIoctl implements Link and handles the perf event lifecycle
    150 // via ioctl().
    151 type perfEventIoctl struct {
    152 	*perfEvent
    153 }
    154 
    155 func (pi *perfEventIoctl) isLink() {}
    156 
    157 // Since 4.15 (e87c6bc3852b "bpf: permit multiple bpf attachments for a single perf event"),
    158 // calling PERF_EVENT_IOC_SET_BPF appends the given program to a prog_array
    159 // owned by the perf event, which means multiple programs can be attached
    160 // simultaneously.
    161 //
    162 // Before 4.15, calling PERF_EVENT_IOC_SET_BPF more than once on a perf event
    163 // returns EEXIST.
    164 //
    165 // Detaching a program from a perf event is currently not possible, so a
    166 // program replacement mechanism cannot be implemented for perf events.
    167 func (pi *perfEventIoctl) Update(prog *ebpf.Program) error {
    168 	return fmt.Errorf("perf event ioctl update: %w", ErrNotSupported)
    169 }
    170 
    171 func (pi *perfEventIoctl) Pin(string) error {
    172 	return fmt.Errorf("perf event ioctl pin: %w", ErrNotSupported)
    173 }
    174 
    175 func (pi *perfEventIoctl) Unpin() error {
    176 	return fmt.Errorf("perf event ioctl unpin: %w", ErrNotSupported)
    177 }
    178 
    179 func (pi *perfEventIoctl) Info() (*Info, error) {
    180 	return nil, fmt.Errorf("perf event ioctl info: %w", ErrNotSupported)
    181 }
    182 
    183 // attach the given eBPF prog to the perf event stored in pe.
    184 // pe must contain a valid perf event fd.
    185 // prog's type must match the program type stored in pe.
    186 func attachPerfEvent(pe *perfEvent, prog *ebpf.Program) (Link, error) {
    187 	if prog == nil {
    188 		return nil, errors.New("cannot attach a nil program")
    189 	}
    190 	if prog.FD() < 0 {
    191 		return nil, fmt.Errorf("invalid program: %w", sys.ErrClosedFd)
    192 	}
    193 
    194 	switch pe.typ {
    195 	case kprobeEvent, kretprobeEvent, uprobeEvent, uretprobeEvent:
    196 		if t := prog.Type(); t != ebpf.Kprobe {
    197 			return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.Kprobe, t)
    198 		}
    199 	case tracepointEvent:
    200 		if t := prog.Type(); t != ebpf.TracePoint {
    201 			return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.TracePoint, t)
    202 		}
    203 	default:
    204 		return nil, fmt.Errorf("unknown perf event type: %d", pe.typ)
    205 	}
    206 
    207 	if err := haveBPFLinkPerfEvent(); err == nil {
    208 		return attachPerfEventLink(pe, prog)
    209 	}
    210 	return attachPerfEventIoctl(pe, prog)
    211 }
    212 
    213 func attachPerfEventIoctl(pe *perfEvent, prog *ebpf.Program) (*perfEventIoctl, error) {
    214 	if pe.cookie != 0 {
    215 		return nil, fmt.Errorf("cookies are not supported: %w", ErrNotSupported)
    216 	}
    217 
    218 	// Assign the eBPF program to the perf event.
    219 	err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_SET_BPF, prog.FD())
    220 	if err != nil {
    221 		return nil, fmt.Errorf("setting perf event bpf program: %w", err)
    222 	}
    223 
    224 	// PERF_EVENT_IOC_ENABLE and _DISABLE ignore their given values.
    225 	if err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_ENABLE, 0); err != nil {
    226 		return nil, fmt.Errorf("enable perf event: %s", err)
    227 	}
    228 
    229 	pi := &perfEventIoctl{pe}
    230 
    231 	// Close the perf event when its reference is lost to avoid leaking system resources.
    232 	runtime.SetFinalizer(pi, (*perfEventIoctl).Close)
    233 	return pi, nil
    234 }
    235 
    236 // Use the bpf api to attach the perf event (BPF_LINK_TYPE_PERF_EVENT, 5.15+).
    237 //
    238 // https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e
    239 func attachPerfEventLink(pe *perfEvent, prog *ebpf.Program) (*perfEventLink, error) {
    240 	fd, err := sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{
    241 		ProgFd:     uint32(prog.FD()),
    242 		TargetFd:   pe.fd.Uint(),
    243 		AttachType: sys.BPF_PERF_EVENT,
    244 		BpfCookie:  pe.cookie,
    245 	})
    246 	if err != nil {
    247 		return nil, fmt.Errorf("cannot create bpf perf link: %v", err)
    248 	}
    249 
    250 	pl := &perfEventLink{RawLink{fd: fd}, pe}
    251 
    252 	// Close the perf event when its reference is lost to avoid leaking system resources.
    253 	runtime.SetFinalizer(pl, (*perfEventLink).Close)
    254 	return pl, nil
    255 }
    256 
    257 // unsafeStringPtr returns an unsafe.Pointer to a NUL-terminated copy of str.
    258 func unsafeStringPtr(str string) (unsafe.Pointer, error) {
    259 	p, err := unix.BytePtrFromString(str)
    260 	if err != nil {
    261 		return nil, err
    262 	}
    263 	return unsafe.Pointer(p), nil
    264 }
    265 
    266 // getTraceEventID reads a trace event's ID from tracefs given its group and name.
    267 // The kernel requires group and name to be alphanumeric or underscore.
    268 //
    269 // name automatically has its invalid symbols converted to underscores so the caller
    270 // can pass a raw symbol name, e.g. a kernel symbol containing dots.
    271 func getTraceEventID(group, name string) (uint64, error) {
    272 	name = sanitizeSymbol(name)
    273 	tid, err := uint64FromFile(tracefsPath, "events", group, name, "id")
    274 	if errors.Is(err, os.ErrNotExist) {
    275 		return 0, fmt.Errorf("trace event %s/%s: %w", group, name, os.ErrNotExist)
    276 	}
    277 	if err != nil {
    278 		return 0, fmt.Errorf("reading trace event ID of %s/%s: %w", group, name, err)
    279 	}
    280 
    281 	return tid, nil
    282 }
    283 
    284 // getPMUEventType reads a Performance Monitoring Unit's type (numeric identifier)
    285 // from /sys/bus/event_source/devices/<pmu>/type.
    286 //
    287 // Returns ErrNotSupported if the pmu type is not supported.
    288 func getPMUEventType(typ probeType) (uint64, error) {
    289 	et, err := uint64FromFile("/sys/bus/event_source/devices", typ.String(), "type")
    290 	if errors.Is(err, os.ErrNotExist) {
    291 		return 0, fmt.Errorf("pmu type %s: %w", typ, ErrNotSupported)
    292 	}
    293 	if err != nil {
    294 		return 0, fmt.Errorf("reading pmu type %s: %w", typ, err)
    295 	}
    296 
    297 	return et, nil
    298 }
    299 
    300 // openTracepointPerfEvent opens a tracepoint-type perf event. System-wide
    301 // [k,u]probes created by writing to <tracefs>/[k,u]probe_events are tracepoints
    302 // behind the scenes, and can be attached to using these perf events.
    303 func openTracepointPerfEvent(tid uint64, pid int) (*sys.FD, error) {
    304 	attr := unix.PerfEventAttr{
    305 		Type:        unix.PERF_TYPE_TRACEPOINT,
    306 		Config:      tid,
    307 		Sample_type: unix.PERF_SAMPLE_RAW,
    308 		Sample:      1,
    309 		Wakeup:      1,
    310 	}
    311 
    312 	fd, err := unix.PerfEventOpen(&attr, pid, 0, -1, unix.PERF_FLAG_FD_CLOEXEC)
    313 	if err != nil {
    314 		return nil, fmt.Errorf("opening tracepoint perf event: %w", err)
    315 	}
    316 
    317 	return sys.NewFD(fd)
    318 }
    319 
    320 // uint64FromFile reads a uint64 from a file. All elements of path are sanitized
    321 // and joined onto base. Returns error if base no longer prefixes the path after
    322 // joining all components.
    323 func uint64FromFile(base string, path ...string) (uint64, error) {
    324 	l := filepath.Join(path...)
    325 	p := filepath.Join(base, l)
    326 	if !strings.HasPrefix(p, base) {
    327 		return 0, fmt.Errorf("path '%s' attempts to escape base path '%s': %w", l, base, errInvalidInput)
    328 	}
    329 
    330 	data, err := os.ReadFile(p)
    331 	if err != nil {
    332 		return 0, fmt.Errorf("reading file %s: %w", p, err)
    333 	}
    334 
    335 	et := bytes.TrimSpace(data)
    336 	return strconv.ParseUint(string(et), 10, 64)
    337 }
    338 
    339 // Probe BPF perf link.
    340 //
    341 // https://elixir.bootlin.com/linux/v5.16.8/source/kernel/bpf/syscall.c#L4307
    342 // https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e
    343 var haveBPFLinkPerfEvent = internal.FeatureTest("bpf_link_perf_event", "5.15", func() error {
    344 	prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
    345 		Name: "probe_bpf_perf_link",
    346 		Type: ebpf.Kprobe,
    347 		Instructions: asm.Instructions{
    348 			asm.Mov.Imm(asm.R0, 0),
    349 			asm.Return(),
    350 		},
    351 		License: "MIT",
    352 	})
    353 	if err != nil {
    354 		return err
    355 	}
    356 	defer prog.Close()
    357 
    358 	_, err = sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{
    359 		ProgFd:     uint32(prog.FD()),
    360 		AttachType: sys.BPF_PERF_EVENT,
    361 	})
    362 	if errors.Is(err, unix.EINVAL) {
    363 		return internal.ErrNotSupported
    364 	}
    365 	if errors.Is(err, unix.EBADF) {
    366 		return nil
    367 	}
    368 	return err
    369 })
    370 
    371 // isValidTraceID implements the equivalent of a regex match
    372 // against "^[a-zA-Z_][0-9a-zA-Z_]*$".
    373 //
    374 // Trace event groups, names and kernel symbols must adhere to this set
    375 // of characters. Non-empty, first character must not be a number, all
    376 // characters must be alphanumeric or underscore.
    377 func isValidTraceID(s string) bool {
    378 	if len(s) < 1 {
    379 		return false
    380 	}
    381 	for i, c := range []byte(s) {
    382 		switch {
    383 		case c >= 'a' && c <= 'z':
    384 		case c >= 'A' && c <= 'Z':
    385 		case c == '_':
    386 		case i > 0 && c >= '0' && c <= '9':
    387 
    388 		default:
    389 			return false
    390 		}
    391 	}
    392 
    393 	return true
    394 }