From 6ff54d06f8d272abee41d98c93c41a687ade47bb Mon Sep 17 00:00:00 2001 From: Tatsuyuki Ishi Date: Thu, 21 Oct 2021 17:52:01 -0400 Subject: [PATCH] aya-bpf: implement ringbuf map Co-authored-by: William Findlay --- aya/src/maps/ringbuf.rs | 19 ++++ bpf/aya-bpf/src/maps/mod.rs | 2 + bpf/aya-bpf/src/maps/ringbuf.rs | 163 ++++++++++++++++++++++++++++++++ 3 files changed, 184 insertions(+) create mode 100644 aya/src/maps/ringbuf.rs create mode 100644 bpf/aya-bpf/src/maps/ringbuf.rs diff --git a/aya/src/maps/ringbuf.rs b/aya/src/maps/ringbuf.rs new file mode 100644 index 00000000..d3b0988d --- /dev/null +++ b/aya/src/maps/ringbuf.rs @@ -0,0 +1,19 @@ +//! A [ring buffer map][ringbuf] that may be used to receive events from eBPF programs. +//! As of Linux 5.8, this is the preferred way to transfer per-event data from eBPF +//! programs to userspace. +//! +//! [ringbuf]: https://www.kernel.org/doc/html/latest/bpf/ringbuf.html + +use std::{ops::DerefMut, sync::Arc}; + +use crate::{ + generated::bpf_map_type::BPF_MAP_TYPE_RINGBUF, + maps::{Map, MapError, MapRefMut}, +}; + +#[doc(alias = "BPF_MAP_TYPE_RINGBUF")] +pub struct RingBuf> { + _map: Arc, +} + +impl> RingBuf {} diff --git a/bpf/aya-bpf/src/maps/mod.rs b/bpf/aya-bpf/src/maps/mod.rs index 8fa375dd..e6cd390c 100644 --- a/bpf/aya-bpf/src/maps/mod.rs +++ b/bpf/aya-bpf/src/maps/mod.rs @@ -13,6 +13,7 @@ pub mod per_cpu_array; pub mod perf; pub mod program_array; pub mod queue; +pub mod ringbuf; pub mod sock_hash; pub mod sock_map; pub mod stack; @@ -26,6 +27,7 @@ pub use per_cpu_array::PerCpuArray; pub use perf::{PerfEventArray, PerfEventByteArray}; pub use program_array::ProgramArray; pub use queue::Queue; +pub use ringbuf::RingBuf; pub use sock_hash::SockHash; pub use sock_map::SockMap; pub use stack::Stack; diff --git a/bpf/aya-bpf/src/maps/ringbuf.rs b/bpf/aya-bpf/src/maps/ringbuf.rs new file mode 100644 index 00000000..b5ec4792 --- /dev/null +++ b/bpf/aya-bpf/src/maps/ringbuf.rs @@ -0,0 +1,163 @@ +use core::{ + cell::UnsafeCell, + mem, + mem::MaybeUninit, + ops::{Deref, DerefMut}, +}; + +use crate::{ + bindings::{bpf_map_def, bpf_map_type::BPF_MAP_TYPE_RINGBUF}, + helpers::{ + bpf_ringbuf_discard, bpf_ringbuf_output, bpf_ringbuf_query, bpf_ringbuf_reserve, + bpf_ringbuf_submit, + }, + maps::PinningType, +}; + +#[repr(transparent)] +pub struct RingBuf { + def: UnsafeCell, +} + +unsafe impl Sync for RingBuf {} + +/// A ring buffer entry, returned from [`RingBuf::reserve`]. +/// +/// You must [`submit`] or [`discard`] this entry before this gets dropped. +/// +/// [`submit`]: RingBufEntry::submit +/// [`discard`]: RingBufEntry::discard +#[must_use = "BPF verifier requires ring buffer entries to be either submitted or discarded"] +pub struct RingBufEntry(&'static mut MaybeUninit); + +impl Deref for RingBufEntry { + type Target = MaybeUninit; + + fn deref(&self) -> &Self::Target { + self.0 + } +} + +impl DerefMut for RingBufEntry { + fn deref_mut(&mut self) -> &mut Self::Target { + self.0 + } +} + +impl RingBufEntry { + /// Discard this ring buffer entry. The entry will be skipped by the userspace reader. + pub fn discard(self, flags: u64) { + unsafe { bpf_ringbuf_discard(self.0.as_mut_ptr() as *mut _, flags) }; + } + + /// Commit this ring buffer entry. The entry will be made visible to the userspace reader. + pub fn submit(self, flags: u64) { + unsafe { bpf_ringbuf_submit(self.0.as_mut_ptr() as *mut _, flags) }; + } +} + +impl RingBuf { + /// Declare a BPF ring buffer. + /// + /// `max_entries` must be a power of two. + pub const fn with_max_entries(max_entries: u32, flags: u32) -> RingBuf { + RingBuf { + def: UnsafeCell::new(bpf_map_def { + type_: BPF_MAP_TYPE_RINGBUF, + key_size: 0, + value_size: 0, + max_entries, + map_flags: flags, + id: 0, + pinning: PinningType::None as u32, + }), + } + } + + /// Declare a pinned BPF ring buffer. + /// + /// `max_entries` must be a power of two. + pub const fn pinned(max_entries: u32, flags: u32) -> RingBuf { + RingBuf { + def: UnsafeCell::new(bpf_map_def { + type_: BPF_MAP_TYPE_RINGBUF, + key_size: 0, + value_size: 0, + max_entries, + map_flags: flags, + id: 0, + pinning: PinningType::ByName as u32, + }), + } + } + + /// Reserve memory in the ring buffer that can fit `T`. + /// + /// Returns `None` if the ring buffer is full, or a reference to the allocated memory if the + /// allocation succeeds. + /// + /// If the return value is not None, you must commit or discard the reserved entry through a + /// call to [`RingBufEntry::submit`] or [`RingBufEntry::discard`]. + /// + /// `T` must be aligned to 1, 2, 4 or 8 bytes; it's not possible to fulfill larger alignment + /// requests. If you use this with a `T` that isn't properly aligned, this function will + /// be compiled to a panic and silently make your eBPF program fail to load. + pub fn reserve(&self, flags: u64) -> Option> { + // The reserved pointer may be null, which we handle with an Option. + // We also need to ensure that the returned pointer is of a proper sized allocation and + // satisfies T's alignment requirements. + // Finally, cast it to an MaybeUninit as creating a reference to uninitialized memory is UB. + + // ringbuf allocations are aligned to 8 bytes (hardcoded in kernel code). + assert!(8 % mem::align_of::() == 0); + + let ptr = unsafe { + bpf_ringbuf_reserve(self.def.get() as *mut _, mem::size_of::() as _, flags) + as *mut MaybeUninit + }; + match ptr.is_null() { + true => None, + false => Some(RingBufEntry(unsafe { &mut *ptr })), + } + } + + /// Copy `data` to the ring buffer output. + /// + /// Consider using [`reserve`] and [`submit`] if `T` is statically sized and you want to save a + /// redundant allocation on and a copy from the stack. + /// + /// Unlike [`reserve`], this function can handle dynamically sized types (which is hard to + /// create in eBPF but still possible, e.g. by slicing an array). + /// + /// `T` must be aligned to 1, 2, 4 or 8 bytes; it's not possible to fulfill larger alignment + /// requests. If you use this with a `T` that isn't properly aligned, this function will + /// be compiled to a panic and silently make your eBPF program fail to load. + /// + /// [`reserve`]: RingBuf::reserve + /// [`submit`]: RingBufEntry::submit + pub fn output(&self, data: &T, flags: u64) -> Result<(), i64> { + // See `reserve` for alignment requirements. + assert!(8 % mem::align_of_val(data) == 0); + + let ret = unsafe { + bpf_ringbuf_output( + self.def.get() as *mut _, + data as *const _ as *mut _, + mem::size_of_val(data) as _, + flags, + ) + }; + if ret < 0 { + Err(ret) + } else { + Ok(()) + } + } + + /// Query various information about the ring buffer. + /// + /// Consult `bpf_ringbuf_query` documentation for a list of allowed flags. + pub fn query(&self, flags: u64) -> u64 { + unsafe { bpf_ringbuf_query(self.def.get() as *mut _, flags) } + } +}