diff --git a/aya/src/generated/linux_bindings_aarch64.rs b/aya/src/generated/linux_bindings_aarch64.rs index ecc46e26..e4015d67 100644 --- a/aya/src/generated/linux_bindings_aarch64.rs +++ b/aya/src/generated/linux_bindings_aarch64.rs @@ -117,6 +117,8 @@ pub const BTF_KIND_MAX: u32 = 15; pub const BTF_INT_SIGNED: u32 = 1; pub const BTF_INT_CHAR: u32 = 2; pub const BTF_INT_BOOL: u32 = 4; +pub const PERF_MAX_STACK_DEPTH: u32 = 127; +pub const PERF_MAX_CONTEXTS_PER_STACK: u32 = 8; pub const PERF_FLAG_FD_NO_GROUP: u32 = 1; pub const PERF_FLAG_FD_OUTPUT: u32 = 2; pub const PERF_FLAG_PID_CGROUP: u32 = 4; diff --git a/aya/src/generated/linux_bindings_x86_64.rs b/aya/src/generated/linux_bindings_x86_64.rs index fc06f4e9..89f3c9f2 100644 --- a/aya/src/generated/linux_bindings_x86_64.rs +++ b/aya/src/generated/linux_bindings_x86_64.rs @@ -117,6 +117,8 @@ pub const BTF_KIND_MAX: u32 = 15; pub const BTF_INT_SIGNED: u32 = 1; pub const BTF_INT_CHAR: u32 = 2; pub const BTF_INT_BOOL: u32 = 4; +pub const PERF_MAX_STACK_DEPTH: u32 = 127; +pub const PERF_MAX_CONTEXTS_PER_STACK: u32 = 8; pub const PERF_FLAG_FD_NO_GROUP: u32 = 1; pub const PERF_FLAG_FD_OUTPUT: u32 = 2; pub const PERF_FLAG_PID_CGROUP: u32 = 4; diff --git a/aya/src/maps/mod.rs b/aya/src/maps/mod.rs index 33ab060c..7b449f49 100644 --- a/aya/src/maps/mod.rs +++ b/aya/src/maps/mod.rs @@ -43,15 +43,18 @@ use crate::{ Pod, }; +mod map_lock; + pub mod array; pub mod hash_map; -mod map_lock; pub mod perf; +pub mod stack_trace; pub use array::{Array, PerCpuArray, ProgramArray}; pub use hash_map::{HashMap, PerCpuHashMap}; pub use map_lock::*; pub use perf::PerfEventArray; +pub use stack_trace::StackTraceMap; #[derive(Error, Debug)] pub enum MapError { diff --git a/aya/src/maps/stack_trace.rs b/aya/src/maps/stack_trace.rs new file mode 100644 index 00000000..65b04457 --- /dev/null +++ b/aya/src/maps/stack_trace.rs @@ -0,0 +1,232 @@ +//! A hash map of kernel or user space stack traces. +//! +//! See [`StackTraceMap`] for documentation and examples. +use std::{ + collections::BTreeMap, convert::TryFrom, fs, io, mem, ops::Deref, path::Path, str::FromStr, +}; + +use crate::{ + generated::bpf_map_type::BPF_MAP_TYPE_STACK_TRACE, + maps::{IterableMap, Map, MapError, MapIter, MapKeys, MapRef, MapRefMut}, + sys::bpf_map_lookup_elem_ptr, +}; + +/// A hash map of kernel or user space stack traces. +/// +/// Stack trace maps can be used to store stack traces captured by eBPF programs, which can be +/// useful for profiling, to associate a trace to an event, etc. You can capture traces calling +/// `stack_id = bpf_get_stackid(ctx, map, flags)` from eBPF, and then you can retrieve the traces +/// from their stack ids. +/// +/// # Example +/// +/// ```no_run +/// # #[derive(thiserror::Error, Debug)] +/// # enum Error { +/// # #[error(transparent)] +/// # IO(#[from] std::io::Error), +/// # #[error(transparent)] +/// # Map(#[from] aya::maps::MapError), +/// # #[error(transparent)] +/// # Bpf(#[from] aya::BpfError) +/// # } +/// # let bpf = aya::Bpf::load(&[], None)?; +/// use aya::maps::StackTraceMap; +/// use aya::util::kernel_symbols; +/// use std::convert::TryFrom; +/// +/// let mut stack_traces = StackTraceMap::try_from(bpf.map("STACK_TRACES")?)?; +/// // load kernel symbols from /proc/kallsyms +/// let ksyms = kernel_symbols()?; +/// +/// // NOTE: you tipically send stack_ids from eBPF to user space using other maps +/// let stack_id = 1234; +/// let mut stack_trace = stack_traces.get(&stack_id, 0)?; +/// +/// // here we resolve symbol names using kernel symbols. If this was a user space stack (for +/// // example captured from a uprobe), you'd have to load the symbols using some other mechanism +/// // (eg loading the target binary debuginfo) +/// for frame in stack_trace.resolve(&ksyms).frames() { +/// println!( +/// "{:#x} {}", +/// frame.ip, +/// frame +/// .symbol_name +/// .as_ref() +/// .unwrap_or(&"[unknown symbol name]".to_owned()) +/// ); +/// } +/// +/// # Ok::<(), Error>(()) +/// ``` +/// +#[derive(Debug)] +pub struct StackTraceMap { + inner: T, + max_stack_depth: usize, +} + +impl> StackTraceMap { + fn new(map: T) -> Result, MapError> { + let map_type = map.obj.def.map_type; + if map_type != BPF_MAP_TYPE_STACK_TRACE as u32 { + return Err(MapError::InvalidMapType { + map_type: map_type as u32, + })?; + } + let expected = mem::size_of::(); + let size = map.obj.def.key_size as usize; + if size != expected { + return Err(MapError::InvalidKeySize { size, expected }); + } + + let max_stack_depth = + sysctl::("kernel/perf_event_max_stack").map_err(|io_error| { + MapError::SyscallError { + call: "sysctl".to_owned(), + code: -1, + io_error, + } + })?; + let size = map.obj.def.value_size as usize; + if size > max_stack_depth * mem::size_of::() { + return Err(MapError::InvalidValueSize { size, expected }); + } + let _fd = map.fd_or_err()?; + + Ok(StackTraceMap { + inner: map, + max_stack_depth, + }) + } + + /// Returns the stack trace with the given stack_id. + /// + /// # Errors + /// + /// Returns [`MapError::KeyNotFound`] if there is no stack trace with the + /// given `stack_id`, or [`MapError::SyscallError`] if `bpf_map_lookup_elem` fails. + pub fn get(&self, stack_id: &u32, flags: u64) -> Result { + let fd = self.inner.fd_or_err()?; + + let mut frames = vec![0; self.max_stack_depth]; + bpf_map_lookup_elem_ptr(fd, stack_id, frames.as_mut_ptr(), flags) + .map_err(|(code, io_error)| MapError::SyscallError { + call: "bpf_map_lookup_elem".to_owned(), + code, + io_error, + })? + .ok_or(MapError::KeyNotFound)?; + + let frames = frames + .drain(..) + .take_while(|ip| *ip != 0) + .map(|ip| StackFrame { + ip, + symbol_name: None, + }) + .collect::>(); + + Ok(StackTrace { + id: *stack_id, + frames, + }) + } + + /// An iterator visiting all (`stack_id`, `stack_trace`) pairs in arbitrary order. The + /// iterator item type is `Result<(u32, StackTrace), MapError>`. + pub fn iter(&self) -> MapIter<'_, u32, StackTrace> { + MapIter::new(self) + } + + /// An iterator visiting all the stack_ids in arbitrary order. The iterator element + /// type is `Result`. + pub fn stack_ids(&self) -> MapKeys<'_, u32> { + MapKeys::new(&self.inner) + } +} + +impl> IterableMap for StackTraceMap { + fn map(&self) -> &Map { + &self.inner + } + + unsafe fn get(&self, index: &u32) -> Result { + self.get(index, 0) + } +} + +impl TryFrom for StackTraceMap { + type Error = MapError; + + fn try_from(a: MapRef) -> Result, MapError> { + StackTraceMap::new(a) + } +} + +impl TryFrom for StackTraceMap { + type Error = MapError; + + fn try_from(a: MapRefMut) -> Result, MapError> { + StackTraceMap::new(a) + } +} + +impl<'a, T: Deref> IntoIterator for &'a StackTraceMap { + type Item = Result<(u32, StackTrace), MapError>; + type IntoIter = MapIter<'a, u32, StackTrace>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +/// A kernel or user space stack trace. +/// +/// See the [`StackTraceMap`] documentation for examples. +pub struct StackTrace { + /// The stack trace id as returned by `bpf_get_stackid()`. + pub id: u32, + frames: Vec, +} + +impl StackTrace { + /// Resolves symbol names using the given symbol map. + /// + /// You can use [`util::kernel_symbols()`](crate::util::kernel_symbols) to load kernel symbols. For + /// user-space traces you need to provide the symbols, for example loading + /// them from debug info. + pub fn resolve(&mut self, symbols: &BTreeMap) -> &StackTrace { + for frame in self.frames.iter_mut() { + frame.symbol_name = symbols + .range(..=frame.ip) + .next_back() + .map(|(_, s)| s.clone()) + } + + self + } + + /// Returns the frames in this stack trace. + pub fn frames(&self) -> &[StackFrame] { + &self.frames + } +} + +/// A stack frame. +pub struct StackFrame { + /// The instruction pointer of this frame. + pub ip: u64, + /// The symbol name corresponding to the start of this frame. + /// + /// Set to `Some()` if the frame address can be found in the symbols passed + /// to [`StackTrace::resolve`]. + pub symbol_name: Option, +} + +fn sysctl(key: &str) -> Result { + let val = fs::read_to_string(Path::new("/proc/sys").join(key))?; + val.trim() + .parse::() + .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, val)) +} diff --git a/aya/src/util.rs b/aya/src/util.rs index c5bc773c..ea28c73c 100644 --- a/aya/src/util.rs +++ b/aya/src/util.rs @@ -57,6 +57,9 @@ fn parse_cpu_ranges(data: &str) -> Result, ()> { Ok(cpus) } +/// Loads kernel symbols from `/proc/kallsyms`. +/// +/// The symbols can be passed to [`StackTrace::resolve`](crate::maps::stack_trace::StackTrace::resolve). pub fn kernel_symbols() -> Result, io::Error> { let mut reader = BufReader::new(File::open("/proc/kallsyms")?); parse_kernel_symbols(&mut reader) diff --git a/xtask/src/codegen/aya.rs b/xtask/src/codegen/aya.rs index 25817205..77ad3732 100644 --- a/xtask/src/codegen/aya.rs +++ b/xtask/src/codegen/aya.rs @@ -98,6 +98,7 @@ fn codegen_bindings(opts: &Options) -> Result<(), anyhow::Error> { // PERF "PERF_FLAG_.*", "PERF_EVENT_.*", + "PERF_MAX_.*", // see linux_wrapper.h, these are to workaround the IOC macros "AYA_PERF_EVENT_.*", // NETLINK