mirror of https://github.com/aya-rs/aya
Merge branch 'main' into main
commit
dffb17b5b8
@ -0,0 +1,8 @@
|
|||||||
|
config:
|
||||||
|
no-duplicate-heading: false
|
||||||
|
|
||||||
|
globs:
|
||||||
|
- "**/*.md"
|
||||||
|
ignores:
|
||||||
|
- "target/**/*"
|
||||||
|
- "xtask/libbpf/**/*"
|
@ -0,0 +1,24 @@
|
|||||||
|
//! XDP programs.
|
||||||
|
|
||||||
|
use crate::generated::bpf_attach_type;
|
||||||
|
|
||||||
|
/// Defines where to attach an `XDP` program.
|
||||||
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
pub enum XdpAttachType {
|
||||||
|
/// Attach to a network interface.
|
||||||
|
Interface,
|
||||||
|
/// Attach to a cpumap. Requires kernel 5.9 or later.
|
||||||
|
CpuMap,
|
||||||
|
/// Attach to a devmap. Requires kernel 5.8 or later.
|
||||||
|
DevMap,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<XdpAttachType> for bpf_attach_type {
|
||||||
|
fn from(value: XdpAttachType) -> Self {
|
||||||
|
match value {
|
||||||
|
XdpAttachType::Interface => bpf_attach_type::BPF_XDP,
|
||||||
|
XdpAttachType::CpuMap => bpf_attach_type::BPF_XDP_CPUMAP,
|
||||||
|
XdpAttachType::DevMap => bpf_attach_type::BPF_XDP_DEVMAP,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,458 @@
|
|||||||
|
//! A [ring buffer map][ringbuf] that may be used to receive events from eBPF programs.
|
||||||
|
//! As of Linux 5.8, this is the preferred way to transfer per-event data from eBPF
|
||||||
|
//! programs to userspace.
|
||||||
|
//!
|
||||||
|
//! [ringbuf]: https://www.kernel.org/doc/html/latest/bpf/ringbuf.html
|
||||||
|
|
||||||
|
use std::{
|
||||||
|
borrow::Borrow,
|
||||||
|
ffi::{c_int, c_void},
|
||||||
|
fmt::{self, Debug, Formatter},
|
||||||
|
io, mem,
|
||||||
|
ops::Deref,
|
||||||
|
os::fd::{AsFd as _, AsRawFd, BorrowedFd, RawFd},
|
||||||
|
ptr,
|
||||||
|
ptr::NonNull,
|
||||||
|
slice,
|
||||||
|
sync::atomic::{AtomicU32, AtomicUsize, Ordering},
|
||||||
|
};
|
||||||
|
|
||||||
|
use libc::{munmap, off_t, MAP_FAILED, MAP_SHARED, PROT_READ, PROT_WRITE};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
generated::{BPF_RINGBUF_BUSY_BIT, BPF_RINGBUF_DISCARD_BIT, BPF_RINGBUF_HDR_SZ},
|
||||||
|
maps::{MapData, MapError},
|
||||||
|
sys::{mmap, SyscallError},
|
||||||
|
util::page_size,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// A map that can be used to receive events from eBPF programs.
|
||||||
|
///
|
||||||
|
/// This is similar to [`crate::maps::PerfEventArray`], but different in a few ways:
|
||||||
|
/// * It's shared across all CPUs, which allows a strong ordering between events.
|
||||||
|
/// * Data notifications are delivered precisely instead of being sampled for every N events; the
|
||||||
|
/// eBPF program can also control notification delivery if sampling is desired for performance
|
||||||
|
/// reasons. By default, a notification will be sent if the consumer is caught up at the time of
|
||||||
|
/// committing. The eBPF program can use the `BPF_RB_NO_WAKEUP` or `BPF_RB_FORCE_WAKEUP` flags to
|
||||||
|
/// control this behavior.
|
||||||
|
/// * On the eBPF side, it supports the reverse-commit pattern where the event can be directly
|
||||||
|
/// written into the ring without copying from a temporary location.
|
||||||
|
/// * Dropped sample notifications go to the eBPF program as the return value of `reserve`/`output`,
|
||||||
|
/// and not the userspace reader. This might require extra code to handle, but allows for more
|
||||||
|
/// flexible schemes to handle dropped samples.
|
||||||
|
///
|
||||||
|
/// To receive events you need to:
|
||||||
|
/// * Construct [`RingBuf`] using [`RingBuf::try_from`].
|
||||||
|
/// * Call [`RingBuf::next`] to poll events from the [`RingBuf`].
|
||||||
|
///
|
||||||
|
/// To receive async notifications of data availability, you may construct an
|
||||||
|
/// [`tokio::io::unix::AsyncFd`] from the [`RingBuf`]'s file descriptor and poll it for readiness.
|
||||||
|
///
|
||||||
|
/// # Minimum kernel version
|
||||||
|
///
|
||||||
|
/// The minimum kernel version required to use this feature is 5.8.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```no_run
|
||||||
|
/// # struct PollFd<T>(T);
|
||||||
|
/// # fn poll_fd<T>(t: T) -> PollFd<T> { PollFd(t) }
|
||||||
|
/// # impl<T> PollFd<T> {
|
||||||
|
/// # fn readable(&mut self) -> Guard<'_, T> { Guard(self) }
|
||||||
|
/// # }
|
||||||
|
/// # struct Guard<'a, T>(&'a mut PollFd<T>);
|
||||||
|
/// # impl<T> Guard<'_, T> {
|
||||||
|
/// # fn inner_mut(&mut self) -> &mut T {
|
||||||
|
/// # let Guard(PollFd(t)) = self;
|
||||||
|
/// # t
|
||||||
|
/// # }
|
||||||
|
/// # fn clear_ready(&mut self) {}
|
||||||
|
/// # }
|
||||||
|
/// # let bpf = aya::Bpf::load(&[])?;
|
||||||
|
/// use aya::maps::RingBuf;
|
||||||
|
/// use std::convert::TryFrom;
|
||||||
|
///
|
||||||
|
/// let ring_buf = RingBuf::try_from(bpf.map_mut("ARRAY")?)?;
|
||||||
|
/// let poll = poll_fd(ring_buf);
|
||||||
|
/// loop {
|
||||||
|
/// let mut guard = poll.readable()?;
|
||||||
|
/// let ring_buf = guard.inner_mut()
|
||||||
|
/// while let Some(item) = ring_buf.next() {
|
||||||
|
/// println!("Received: {:?}", item);
|
||||||
|
/// }
|
||||||
|
/// guard.clear_ready();
|
||||||
|
/// }
|
||||||
|
/// # Ok::<(), aya::BpfError>(())
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// # Polling
|
||||||
|
///
|
||||||
|
/// In the example above the implementations of poll(), poll.readable(), guard.inner_mut(), and
|
||||||
|
/// guard.clear_ready() are not given. RingBuf implements the AsRawFd trait, so you can implement
|
||||||
|
/// polling using any crate that can poll file descriptors, like epoll, mio etc. The above example
|
||||||
|
/// API is motivated by that of [`tokio::io::unix::AsyncFd`].
|
||||||
|
#[doc(alias = "BPF_MAP_TYPE_RINGBUF")]
|
||||||
|
pub struct RingBuf<T> {
|
||||||
|
map: T,
|
||||||
|
consumer: ConsumerPos,
|
||||||
|
producer: ProducerData,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Borrow<MapData>> RingBuf<T> {
|
||||||
|
pub(crate) fn new(map: T) -> Result<Self, MapError> {
|
||||||
|
let data: &MapData = map.borrow();
|
||||||
|
let page_size = page_size();
|
||||||
|
let map_fd = data.fd().as_fd();
|
||||||
|
let byte_size = data.obj.max_entries();
|
||||||
|
let consumer_metadata = ConsumerMetadata::new(map_fd, 0, page_size)?;
|
||||||
|
let consumer = ConsumerPos::new(consumer_metadata);
|
||||||
|
let producer = ProducerData::new(map_fd, page_size, page_size, byte_size)?;
|
||||||
|
Ok(Self {
|
||||||
|
map,
|
||||||
|
consumer,
|
||||||
|
producer,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> RingBuf<T> {
|
||||||
|
/// Try to take a new entry from the ringbuf.
|
||||||
|
///
|
||||||
|
/// Returns `Some(item)` if the ringbuf is not empty. Returns `None` if the ringbuf is empty, in
|
||||||
|
/// which case the caller may register for availability notifications through `epoll` or other
|
||||||
|
/// APIs. Only one RingBufItem may be outstanding at a time.
|
||||||
|
//
|
||||||
|
// This is not an implementation of `Iterator` because we need to be able to refer to the
|
||||||
|
// lifetime of the iterator in the returned `RingBufItem`. If the Iterator::Item leveraged GATs,
|
||||||
|
// one could imagine an implementation of `Iterator` that would work. GATs are stabilized in
|
||||||
|
// Rust 1.65, but there's not yet a trait that the community seems to have standardized around.
|
||||||
|
#[allow(clippy::should_implement_trait)]
|
||||||
|
pub fn next(&mut self) -> Option<RingBufItem<'_>> {
|
||||||
|
let Self {
|
||||||
|
consumer, producer, ..
|
||||||
|
} = self;
|
||||||
|
producer.next(consumer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Access to the RawFd can be used to construct an AsyncFd for use with epoll.
|
||||||
|
impl<T: Borrow<MapData>> AsRawFd for RingBuf<T> {
|
||||||
|
fn as_raw_fd(&self) -> RawFd {
|
||||||
|
let Self {
|
||||||
|
map,
|
||||||
|
consumer: _,
|
||||||
|
producer: _,
|
||||||
|
} = self;
|
||||||
|
map.borrow().fd().as_fd().as_raw_fd()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The current outstanding item read from the ringbuf.
|
||||||
|
pub struct RingBufItem<'a> {
|
||||||
|
data: &'a [u8],
|
||||||
|
consumer: &'a mut ConsumerPos,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Deref for RingBufItem<'_> {
|
||||||
|
type Target = [u8];
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
let Self { data, .. } = self;
|
||||||
|
data
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for RingBufItem<'_> {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
let Self { consumer, data } = self;
|
||||||
|
consumer.consume(data.len())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for RingBufItem<'_> {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||||
|
let Self {
|
||||||
|
data,
|
||||||
|
consumer:
|
||||||
|
ConsumerPos {
|
||||||
|
pos,
|
||||||
|
metadata: ConsumerMetadata { mmap: _ },
|
||||||
|
},
|
||||||
|
} = self;
|
||||||
|
// In general Relaxed here is sufficient, for debugging, it certainly is.
|
||||||
|
f.debug_struct("RingBufItem")
|
||||||
|
.field("pos", pos)
|
||||||
|
.field("len", &data.len())
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ConsumerMetadata {
|
||||||
|
mmap: MMap,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ConsumerMetadata {
|
||||||
|
fn new(fd: BorrowedFd<'_>, offset: usize, page_size: usize) -> Result<Self, MapError> {
|
||||||
|
let mmap = MMap::new(
|
||||||
|
fd,
|
||||||
|
page_size,
|
||||||
|
PROT_READ | PROT_WRITE,
|
||||||
|
MAP_SHARED,
|
||||||
|
offset.try_into().unwrap(),
|
||||||
|
)?;
|
||||||
|
Ok(Self { mmap })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AsRef<AtomicUsize> for ConsumerMetadata {
|
||||||
|
fn as_ref(&self) -> &AtomicUsize {
|
||||||
|
let Self {
|
||||||
|
mmap: MMap { ptr, .. },
|
||||||
|
} = self;
|
||||||
|
unsafe { ptr.cast::<AtomicUsize>().as_ref() }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ConsumerPos {
|
||||||
|
pos: usize,
|
||||||
|
metadata: ConsumerMetadata,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ConsumerPos {
|
||||||
|
fn new(metadata: ConsumerMetadata) -> Self {
|
||||||
|
// Load the initial value of the consumer position. SeqCst is used to be safe given we don't
|
||||||
|
// have any claims about memory synchronization performed by some previous writer.
|
||||||
|
let pos = metadata.as_ref().load(Ordering::SeqCst);
|
||||||
|
Self { pos, metadata }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn consume(&mut self, len: usize) {
|
||||||
|
let Self { pos, metadata } = self;
|
||||||
|
|
||||||
|
// TODO: Use primitive method when https://github.com/rust-lang/rust/issues/88581 is stabilized.
|
||||||
|
fn next_multiple_of(n: usize, multiple: usize) -> usize {
|
||||||
|
match n % multiple {
|
||||||
|
0 => n,
|
||||||
|
rem => n + (multiple - rem),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*pos += next_multiple_of(usize::try_from(BPF_RINGBUF_HDR_SZ).unwrap() + len, 8);
|
||||||
|
|
||||||
|
// Write operation needs to be properly ordered with respect to the producer committing new
|
||||||
|
// data to the ringbuf. The producer uses xchg (SeqCst) to commit new data [1]. The producer
|
||||||
|
// reads the consumer offset after clearing the busy bit on a new entry [2]. By using SeqCst
|
||||||
|
// here we ensure that either a subsequent read by the consumer to consume messages will see
|
||||||
|
// an available message, or the producer in the kernel will see the updated consumer offset
|
||||||
|
// that is caught up.
|
||||||
|
//
|
||||||
|
// [1]: https://github.com/torvalds/linux/blob/2772d7df/kernel/bpf/ringbuf.c#L487-L488
|
||||||
|
// [2]: https://github.com/torvalds/linux/blob/2772d7df/kernel/bpf/ringbuf.c#L494
|
||||||
|
metadata.as_ref().store(*pos, Ordering::SeqCst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ProducerData {
|
||||||
|
mmap: MMap,
|
||||||
|
|
||||||
|
// Offset in the mmap where the data starts.
|
||||||
|
data_offset: usize,
|
||||||
|
|
||||||
|
// A cache of the value of the producer position. It is used to avoid re-reading the producer
|
||||||
|
// position when we know there is more data to consume.
|
||||||
|
pos_cache: usize,
|
||||||
|
|
||||||
|
// A bitmask which truncates u32 values to the domain of valid offsets in the ringbuf.
|
||||||
|
mask: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ProducerData {
|
||||||
|
fn new(
|
||||||
|
fd: BorrowedFd<'_>,
|
||||||
|
offset: usize,
|
||||||
|
page_size: usize,
|
||||||
|
byte_size: u32,
|
||||||
|
) -> Result<Self, MapError> {
|
||||||
|
// The producer pages have one page of metadata and then the data pages, all mapped
|
||||||
|
// read-only. Note that the length of the mapping includes the data pages twice as the
|
||||||
|
// kernel will map them two time consecutively to avoid special handling of entries that
|
||||||
|
// cross over the end of the ring buffer.
|
||||||
|
//
|
||||||
|
// The kernel diagram below shows the layout of the ring buffer. It references "meta pages",
|
||||||
|
// but we only map exactly one producer meta page read-only. The consumer meta page is mapped
|
||||||
|
// read-write elsewhere, and is taken into consideration via the offset parameter.
|
||||||
|
//
|
||||||
|
// From kernel/bpf/ringbuf.c[0]:
|
||||||
|
//
|
||||||
|
// Each data page is mapped twice to allow "virtual"
|
||||||
|
// continuous read of samples wrapping around the end of ring
|
||||||
|
// buffer area:
|
||||||
|
// ------------------------------------------------------
|
||||||
|
// | meta pages | real data pages | same data pages |
|
||||||
|
// ------------------------------------------------------
|
||||||
|
// | | 1 2 3 4 5 6 7 8 9 | 1 2 3 4 5 6 7 8 9 |
|
||||||
|
// ------------------------------------------------------
|
||||||
|
// | | TA DA | TA DA |
|
||||||
|
// ------------------------------------------------------
|
||||||
|
// ^^^^^^^
|
||||||
|
// |
|
||||||
|
// Here, no need to worry about special handling of wrapped-around
|
||||||
|
// data due to double-mapped data pages. This works both in kernel and
|
||||||
|
// when mmap()'ed in user-space, simplifying both kernel and
|
||||||
|
// user-space implementations significantly.
|
||||||
|
//
|
||||||
|
// [0]: https://github.com/torvalds/linux/blob/3f01e9fe/kernel/bpf/ringbuf.c#L108-L124
|
||||||
|
let len = page_size + 2 * usize::try_from(byte_size).unwrap();
|
||||||
|
let mmap = MMap::new(fd, len, PROT_READ, MAP_SHARED, offset.try_into().unwrap())?;
|
||||||
|
|
||||||
|
// byte_size is required to be a power of two multiple of page_size (which implicitly is a
|
||||||
|
// power of 2), so subtracting one will create a bitmask for values less than byte_size.
|
||||||
|
debug_assert!(byte_size.is_power_of_two());
|
||||||
|
let mask = byte_size - 1;
|
||||||
|
Ok(Self {
|
||||||
|
mmap,
|
||||||
|
data_offset: page_size,
|
||||||
|
pos_cache: 0,
|
||||||
|
mask,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next<'a>(&'a mut self, consumer: &'a mut ConsumerPos) -> Option<RingBufItem<'a>> {
|
||||||
|
let Self {
|
||||||
|
ref mmap,
|
||||||
|
data_offset,
|
||||||
|
pos_cache,
|
||||||
|
mask,
|
||||||
|
} = self;
|
||||||
|
let pos = unsafe { mmap.ptr.cast().as_ref() };
|
||||||
|
let mmap_data = mmap.as_ref();
|
||||||
|
let data_pages = mmap_data.get(*data_offset..).unwrap_or_else(|| {
|
||||||
|
panic!(
|
||||||
|
"offset {} out of bounds, data len {}",
|
||||||
|
data_offset,
|
||||||
|
mmap_data.len()
|
||||||
|
)
|
||||||
|
});
|
||||||
|
while data_available(pos, pos_cache, consumer) {
|
||||||
|
match read_item(data_pages, *mask, consumer) {
|
||||||
|
Item::Busy => return None,
|
||||||
|
Item::Discard { len } => consumer.consume(len),
|
||||||
|
Item::Data(data) => return Some(RingBufItem { data, consumer }),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return None;
|
||||||
|
|
||||||
|
enum Item<'a> {
|
||||||
|
Busy,
|
||||||
|
Discard { len: usize },
|
||||||
|
Data(&'a [u8]),
|
||||||
|
}
|
||||||
|
|
||||||
|
fn data_available(
|
||||||
|
producer: &AtomicUsize,
|
||||||
|
cache: &mut usize,
|
||||||
|
consumer: &ConsumerPos,
|
||||||
|
) -> bool {
|
||||||
|
let ConsumerPos { pos: consumer, .. } = consumer;
|
||||||
|
if consumer == cache {
|
||||||
|
// This value is written using Release by the kernel [1], and should be read with
|
||||||
|
// Acquire to ensure that the prior writes to the entry header are visible.
|
||||||
|
//
|
||||||
|
// [1]: https://github.com/torvalds/linux/blob/eb26cbb1/kernel/bpf/ringbuf.c#L447-L448
|
||||||
|
*cache = producer.load(Ordering::Acquire);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note that we don't compare the order of the values because the producer position may
|
||||||
|
// overflow u32 and wrap around to 0. Instead we just compare equality and assume that
|
||||||
|
// the consumer position is always logically less than the producer position.
|
||||||
|
//
|
||||||
|
// Note also that the kernel, at the time of writing [1], doesn't seem to handle this
|
||||||
|
// overflow correctly at all, and it's not clear that one can produce events after the
|
||||||
|
// producer position has wrapped around.
|
||||||
|
//
|
||||||
|
// [1]: https://github.com/torvalds/linux/blob/4b810bf0/kernel/bpf/ringbuf.c#L434-L440
|
||||||
|
consumer != cache
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_item<'data>(data: &'data [u8], mask: u32, pos: &ConsumerPos) -> Item<'data> {
|
||||||
|
let ConsumerPos { pos, .. } = pos;
|
||||||
|
let offset = pos & usize::try_from(mask).unwrap();
|
||||||
|
let must_get_data = |offset, len| {
|
||||||
|
data.get(offset..offset + len).unwrap_or_else(|| {
|
||||||
|
panic!("{:?} not in {:?}", offset..offset + len, 0..data.len())
|
||||||
|
})
|
||||||
|
};
|
||||||
|
let header_ptr =
|
||||||
|
must_get_data(offset, mem::size_of::<AtomicU32>()).as_ptr() as *const AtomicU32;
|
||||||
|
// Pair the kernel's SeqCst write (implies Release) [1] with an Acquire load. This
|
||||||
|
// ensures data written by the producer will be visible.
|
||||||
|
//
|
||||||
|
// [1]: https://github.com/torvalds/linux/blob/eb26cbb1/kernel/bpf/ringbuf.c#L488
|
||||||
|
let header = unsafe { &*header_ptr }.load(Ordering::Acquire);
|
||||||
|
if header & BPF_RINGBUF_BUSY_BIT != 0 {
|
||||||
|
Item::Busy
|
||||||
|
} else {
|
||||||
|
let len = usize::try_from(header & mask).unwrap();
|
||||||
|
if header & BPF_RINGBUF_DISCARD_BIT != 0 {
|
||||||
|
Item::Discard { len }
|
||||||
|
} else {
|
||||||
|
let data_offset = offset + usize::try_from(BPF_RINGBUF_HDR_SZ).unwrap();
|
||||||
|
let data = must_get_data(data_offset, len);
|
||||||
|
Item::Data(data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MMap corresponds to a memory-mapped region.
|
||||||
|
//
|
||||||
|
// The data is unmapped in Drop.
|
||||||
|
struct MMap {
|
||||||
|
ptr: NonNull<c_void>,
|
||||||
|
len: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MMap {
|
||||||
|
fn new(
|
||||||
|
fd: BorrowedFd<'_>,
|
||||||
|
len: usize,
|
||||||
|
prot: c_int,
|
||||||
|
flags: c_int,
|
||||||
|
offset: off_t,
|
||||||
|
) -> Result<Self, MapError> {
|
||||||
|
match unsafe { mmap(ptr::null_mut(), len, prot, flags, fd, offset) } {
|
||||||
|
MAP_FAILED => Err(MapError::SyscallError(SyscallError {
|
||||||
|
call: "mmap",
|
||||||
|
io_error: io::Error::last_os_error(),
|
||||||
|
})),
|
||||||
|
ptr => Ok(Self {
|
||||||
|
ptr: ptr::NonNull::new(ptr).ok_or(
|
||||||
|
// This should never happen, but to be paranoid, and so we never need to talk
|
||||||
|
// about a null pointer, we check it anyway.
|
||||||
|
MapError::SyscallError(SyscallError {
|
||||||
|
call: "mmap",
|
||||||
|
io_error: io::Error::new(
|
||||||
|
io::ErrorKind::Other,
|
||||||
|
"mmap returned null pointer",
|
||||||
|
),
|
||||||
|
}),
|
||||||
|
)?,
|
||||||
|
len,
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AsRef<[u8]> for MMap {
|
||||||
|
fn as_ref(&self) -> &[u8] {
|
||||||
|
let Self { ptr, len } = self;
|
||||||
|
unsafe { slice::from_raw_parts(ptr.as_ptr().cast(), *len) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for MMap {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
let Self { ptr, len } = *self;
|
||||||
|
unsafe { munmap(ptr.as_ptr(), len) };
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,194 @@
|
|||||||
|
//! An array of available CPUs.
|
||||||
|
|
||||||
|
use std::{
|
||||||
|
borrow::{Borrow, BorrowMut},
|
||||||
|
num::NonZeroU32,
|
||||||
|
os::fd::{AsFd, AsRawFd},
|
||||||
|
};
|
||||||
|
|
||||||
|
use aya_obj::generated::bpf_cpumap_val;
|
||||||
|
|
||||||
|
use super::XdpMapError;
|
||||||
|
use crate::{
|
||||||
|
maps::{check_bounds, check_kv_size, IterableMap, MapData, MapError},
|
||||||
|
programs::ProgramFd,
|
||||||
|
sys::{bpf_map_lookup_elem, bpf_map_update_elem, SyscallError},
|
||||||
|
Pod, FEATURES,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// An array of available CPUs.
|
||||||
|
///
|
||||||
|
/// XDP programs can use this map to redirect packets to a target
|
||||||
|
/// CPU for processing.
|
||||||
|
///
|
||||||
|
/// # Minimum kernel version
|
||||||
|
///
|
||||||
|
/// The minimum kernel version required to use this feature is 4.15.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```no_run
|
||||||
|
/// # let elf_bytes = &[];
|
||||||
|
/// use aya::maps::xdp::CpuMap;
|
||||||
|
///
|
||||||
|
/// let ncpus = aya::util::nr_cpus().unwrap() as u32;
|
||||||
|
/// let mut bpf = aya::BpfLoader::new()
|
||||||
|
/// .set_max_entries("CPUS", ncpus)
|
||||||
|
/// .load(elf_bytes)
|
||||||
|
/// .unwrap();
|
||||||
|
/// let mut cpumap = CpuMap::try_from(bpf.map_mut("CPUS").unwrap())?;
|
||||||
|
/// let flags = 0;
|
||||||
|
/// let queue_size = 2048;
|
||||||
|
/// for i in 0..ncpus {
|
||||||
|
/// cpumap.set(i, queue_size, None, flags);
|
||||||
|
/// }
|
||||||
|
///
|
||||||
|
/// # Ok::<(), aya::BpfError>(())
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// # See also
|
||||||
|
///
|
||||||
|
/// Kernel documentation: <https://docs.kernel.org/next/bpf/map_cpumap.html>
|
||||||
|
#[doc(alias = "BPF_MAP_TYPE_CPUMAP")]
|
||||||
|
pub struct CpuMap<T> {
|
||||||
|
pub(crate) inner: T,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Borrow<MapData>> CpuMap<T> {
|
||||||
|
pub(crate) fn new(map: T) -> Result<Self, MapError> {
|
||||||
|
let data = map.borrow();
|
||||||
|
|
||||||
|
if FEATURES.cpumap_prog_id() {
|
||||||
|
check_kv_size::<u32, bpf_cpumap_val>(data)?;
|
||||||
|
} else {
|
||||||
|
check_kv_size::<u32, u32>(data)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Self { inner: map })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the number of elements in the array.
|
||||||
|
///
|
||||||
|
/// This corresponds to the value of `bpf_map_def::max_entries` on the eBPF side.
|
||||||
|
pub fn len(&self) -> u32 {
|
||||||
|
self.inner.borrow().obj.max_entries()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the queue size and optional program for a given CPU index.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns [`MapError::OutOfBounds`] if `cpu_index` is out of bounds,
|
||||||
|
/// [`MapError::SyscallError`] if `bpf_map_lookup_elem` fails.
|
||||||
|
pub fn get(&self, cpu_index: u32, flags: u64) -> Result<CpuMapValue, MapError> {
|
||||||
|
let data = self.inner.borrow();
|
||||||
|
check_bounds(data, cpu_index)?;
|
||||||
|
let fd = data.fd().as_fd();
|
||||||
|
|
||||||
|
let value = if FEATURES.cpumap_prog_id() {
|
||||||
|
bpf_map_lookup_elem::<_, bpf_cpumap_val>(fd, &cpu_index, flags).map(|value| {
|
||||||
|
value.map(|value| CpuMapValue {
|
||||||
|
queue_size: value.qsize,
|
||||||
|
// SAFETY: map writes use fd, map reads use id.
|
||||||
|
// https://github.com/torvalds/linux/blob/2dde18cd1d8fac735875f2e4987f11817cc0bc2c/include/uapi/linux/bpf.h#L6241
|
||||||
|
prog_id: NonZeroU32::new(unsafe { value.bpf_prog.id }),
|
||||||
|
})
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
bpf_map_lookup_elem::<_, u32>(fd, &cpu_index, flags).map(|value| {
|
||||||
|
value.map(|qsize| CpuMapValue {
|
||||||
|
queue_size: qsize,
|
||||||
|
prog_id: None,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
};
|
||||||
|
value
|
||||||
|
.map_err(|(_, io_error)| SyscallError {
|
||||||
|
call: "bpf_map_lookup_elem",
|
||||||
|
io_error,
|
||||||
|
})?
|
||||||
|
.ok_or(MapError::KeyNotFound)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator over the elements of the map.
|
||||||
|
pub fn iter(&self) -> impl Iterator<Item = Result<CpuMapValue, MapError>> + '_ {
|
||||||
|
(0..self.len()).map(move |i| self.get(i, 0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: BorrowMut<MapData>> CpuMap<T> {
|
||||||
|
/// Sets the queue size at the given CPU index, and optionally a chained program.
|
||||||
|
///
|
||||||
|
/// When sending the packet to the CPU at the given index, the kernel will queue up to
|
||||||
|
/// `queue_size` packets before dropping them.
|
||||||
|
///
|
||||||
|
/// Starting from Linux kernel 5.9, another XDP program can be passed in that will be run on the
|
||||||
|
/// target CPU, instead of the CPU that receives the packets. This allows to perform minimal
|
||||||
|
/// computations on CPUs that directly handle packets from a NIC's RX queues, and perform
|
||||||
|
/// possibly heavier ones in other, less busy CPUs.
|
||||||
|
///
|
||||||
|
/// The chained program must be loaded with the `BPF_XDP_CPUMAP` attach type. When using
|
||||||
|
/// `aya-ebpf`, that means XDP programs that specify the `map = "cpumap"` argument. See the
|
||||||
|
/// kernel-space `aya_ebpf::xdp` for more information.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns [`MapError::OutOfBounds`] if `index` is out of bounds, [`MapError::SyscallError`]
|
||||||
|
/// if `bpf_map_update_elem` fails, [`XdpMapError::ChainedProgramNotSupported`] if the kernel
|
||||||
|
/// does not support chained programs and one is provided.
|
||||||
|
pub fn set(
|
||||||
|
&mut self,
|
||||||
|
cpu_index: u32,
|
||||||
|
queue_size: u32,
|
||||||
|
program: Option<&ProgramFd>,
|
||||||
|
flags: u64,
|
||||||
|
) -> Result<(), XdpMapError> {
|
||||||
|
let data = self.inner.borrow_mut();
|
||||||
|
check_bounds(data, cpu_index)?;
|
||||||
|
let fd = data.fd().as_fd();
|
||||||
|
|
||||||
|
let res = if FEATURES.cpumap_prog_id() {
|
||||||
|
let mut value = unsafe { std::mem::zeroed::<bpf_cpumap_val>() };
|
||||||
|
value.qsize = queue_size;
|
||||||
|
// Default is valid as the kernel will only consider fd > 0:
|
||||||
|
// https://github.com/torvalds/linux/blob/2dde18cd1d8fac735875f2e4987f11817cc0bc2c/kernel/bpf/cpumap.c#L466
|
||||||
|
value.bpf_prog.fd = program
|
||||||
|
.map(|prog| prog.as_fd().as_raw_fd())
|
||||||
|
.unwrap_or_default();
|
||||||
|
bpf_map_update_elem(fd, Some(&cpu_index), &value, flags)
|
||||||
|
} else {
|
||||||
|
if program.is_some() {
|
||||||
|
return Err(XdpMapError::ChainedProgramNotSupported);
|
||||||
|
}
|
||||||
|
bpf_map_update_elem(fd, Some(&cpu_index), &queue_size, flags)
|
||||||
|
};
|
||||||
|
|
||||||
|
res.map_err(|(_, io_error)| {
|
||||||
|
MapError::from(SyscallError {
|
||||||
|
call: "bpf_map_update_elem",
|
||||||
|
io_error,
|
||||||
|
})
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Borrow<MapData>> IterableMap<u32, CpuMapValue> for CpuMap<T> {
|
||||||
|
fn map(&self) -> &MapData {
|
||||||
|
self.inner.borrow()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get(&self, key: &u32) -> Result<CpuMapValue, MapError> {
|
||||||
|
self.get(*key, 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe impl Pod for bpf_cpumap_val {}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
/// The value of a CPU map.
|
||||||
|
pub struct CpuMapValue {
|
||||||
|
/// Size of the for the CPU.
|
||||||
|
pub queue_size: u32,
|
||||||
|
/// Chained XDP program ID.
|
||||||
|
pub prog_id: Option<NonZeroU32>,
|
||||||
|
}
|
@ -0,0 +1,186 @@
|
|||||||
|
//! An array of network devices.
|
||||||
|
|
||||||
|
use std::{
|
||||||
|
borrow::{Borrow, BorrowMut},
|
||||||
|
num::NonZeroU32,
|
||||||
|
os::fd::{AsFd, AsRawFd},
|
||||||
|
};
|
||||||
|
|
||||||
|
use aya_obj::generated::bpf_devmap_val;
|
||||||
|
|
||||||
|
use super::XdpMapError;
|
||||||
|
use crate::{
|
||||||
|
maps::{check_bounds, check_kv_size, IterableMap, MapData, MapError},
|
||||||
|
programs::ProgramFd,
|
||||||
|
sys::{bpf_map_lookup_elem, bpf_map_update_elem, SyscallError},
|
||||||
|
Pod, FEATURES,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// An array of network devices.
|
||||||
|
///
|
||||||
|
/// XDP programs can use this map to redirect to other network
|
||||||
|
/// devices.
|
||||||
|
///
|
||||||
|
/// # Minimum kernel version
|
||||||
|
///
|
||||||
|
/// The minimum kernel version required to use this feature is 4.14.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```no_run
|
||||||
|
/// # let mut bpf = aya::Bpf::load(&[])?;
|
||||||
|
/// use aya::maps::xdp::DevMap;
|
||||||
|
///
|
||||||
|
/// let mut devmap = DevMap::try_from(bpf.map_mut("IFACES").unwrap())?;
|
||||||
|
/// // Lookups at index 2 will redirect packets to interface with index 3 (e.g. eth1)
|
||||||
|
/// devmap.set(2, 3, None, 0);
|
||||||
|
///
|
||||||
|
/// # Ok::<(), aya::BpfError>(())
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// # See also
|
||||||
|
///
|
||||||
|
/// Kernel documentation: <https://docs.kernel.org/next/bpf/map_devmap.html>
|
||||||
|
#[doc(alias = "BPF_MAP_TYPE_DEVMAP")]
|
||||||
|
pub struct DevMap<T> {
|
||||||
|
pub(crate) inner: T,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Borrow<MapData>> DevMap<T> {
|
||||||
|
pub(crate) fn new(map: T) -> Result<Self, MapError> {
|
||||||
|
let data = map.borrow();
|
||||||
|
|
||||||
|
if FEATURES.devmap_prog_id() {
|
||||||
|
check_kv_size::<u32, bpf_devmap_val>(data)?;
|
||||||
|
} else {
|
||||||
|
check_kv_size::<u32, u32>(data)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Self { inner: map })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the number of elements in the array.
|
||||||
|
///
|
||||||
|
/// This corresponds to the value of `bpf_map_def::max_entries` on the eBPF side.
|
||||||
|
pub fn len(&self) -> u32 {
|
||||||
|
self.inner.borrow().obj.max_entries()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the target interface index and optional program at a given index.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns [`MapError::OutOfBounds`] if `index` is out of bounds, [`MapError::SyscallError`]
|
||||||
|
/// if `bpf_map_lookup_elem` fails.
|
||||||
|
pub fn get(&self, index: u32, flags: u64) -> Result<DevMapValue, MapError> {
|
||||||
|
let data = self.inner.borrow();
|
||||||
|
check_bounds(data, index)?;
|
||||||
|
let fd = data.fd().as_fd();
|
||||||
|
|
||||||
|
let value = if FEATURES.devmap_prog_id() {
|
||||||
|
bpf_map_lookup_elem::<_, bpf_devmap_val>(fd, &index, flags).map(|value| {
|
||||||
|
value.map(|value| DevMapValue {
|
||||||
|
if_index: value.ifindex,
|
||||||
|
// SAFETY: map writes use fd, map reads use id.
|
||||||
|
// https://github.com/torvalds/linux/blob/2dde18cd1d8fac735875f2e4987f11817cc0bc2c/include/uapi/linux/bpf.h#L6228
|
||||||
|
prog_id: NonZeroU32::new(unsafe { value.bpf_prog.id }),
|
||||||
|
})
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
bpf_map_lookup_elem::<_, u32>(fd, &index, flags).map(|value| {
|
||||||
|
value.map(|ifindex| DevMapValue {
|
||||||
|
if_index: ifindex,
|
||||||
|
prog_id: None,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
};
|
||||||
|
value
|
||||||
|
.map_err(|(_, io_error)| SyscallError {
|
||||||
|
call: "bpf_map_lookup_elem",
|
||||||
|
io_error,
|
||||||
|
})?
|
||||||
|
.ok_or(MapError::KeyNotFound)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator over the elements of the array.
|
||||||
|
pub fn iter(&self) -> impl Iterator<Item = Result<DevMapValue, MapError>> + '_ {
|
||||||
|
(0..self.len()).map(move |i| self.get(i, 0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: BorrowMut<MapData>> DevMap<T> {
|
||||||
|
/// Sets the target interface index at index, and optionally a chained program.
|
||||||
|
///
|
||||||
|
/// When redirecting using `index`, packets will be transmitted by the interface with
|
||||||
|
/// `target_if_index`.
|
||||||
|
///
|
||||||
|
/// Starting from Linux kernel 5.8, another XDP program can be passed in that will be run before
|
||||||
|
/// actual transmission. It can be used to modify the packet before transmission with NIC
|
||||||
|
/// specific data (MAC address update, checksum computations, etc) or other purposes.
|
||||||
|
///
|
||||||
|
/// The chained program must be loaded with the `BPF_XDP_DEVMAP` attach type. When using
|
||||||
|
/// `aya-ebpf`, that means XDP programs that specify the `map = "devmap"` argument. See the
|
||||||
|
/// kernel-space `aya_ebpf::xdp` for more information.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns [`MapError::OutOfBounds`] if `index` is out of bounds, [`MapError::SyscallError`]
|
||||||
|
/// if `bpf_map_update_elem` fails, [`MapError::ProgIdNotSupported`] if the kernel does not
|
||||||
|
/// support chained programs and one is provided.
|
||||||
|
pub fn set(
|
||||||
|
&mut self,
|
||||||
|
index: u32,
|
||||||
|
target_if_index: u32,
|
||||||
|
program: Option<&ProgramFd>,
|
||||||
|
flags: u64,
|
||||||
|
) -> Result<(), XdpMapError> {
|
||||||
|
let data = self.inner.borrow_mut();
|
||||||
|
check_bounds(data, index)?;
|
||||||
|
let fd = data.fd().as_fd();
|
||||||
|
|
||||||
|
let res = if FEATURES.devmap_prog_id() {
|
||||||
|
let mut value = unsafe { std::mem::zeroed::<bpf_devmap_val>() };
|
||||||
|
value.ifindex = target_if_index;
|
||||||
|
// Default is valid as the kernel will only consider fd > 0:
|
||||||
|
// https://github.com/torvalds/linux/blob/2dde18cd1d8fac735875f2e4987f11817cc0bc2c/kernel/bpf/devmap.c#L866
|
||||||
|
// https://github.com/torvalds/linux/blob/2dde18cd1d8fac735875f2e4987f11817cc0bc2c/kernel/bpf/devmap.c#L918
|
||||||
|
value.bpf_prog.fd = program
|
||||||
|
.map(|prog| prog.as_fd().as_raw_fd())
|
||||||
|
.unwrap_or_default();
|
||||||
|
bpf_map_update_elem(fd, Some(&index), &value, flags)
|
||||||
|
} else {
|
||||||
|
if program.is_some() {
|
||||||
|
return Err(XdpMapError::ChainedProgramNotSupported);
|
||||||
|
}
|
||||||
|
bpf_map_update_elem(fd, Some(&index), &target_if_index, flags)
|
||||||
|
};
|
||||||
|
|
||||||
|
res.map_err(|(_, io_error)| {
|
||||||
|
MapError::from(SyscallError {
|
||||||
|
call: "bpf_map_update_elem",
|
||||||
|
io_error,
|
||||||
|
})
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Borrow<MapData>> IterableMap<u32, DevMapValue> for DevMap<T> {
|
||||||
|
fn map(&self) -> &MapData {
|
||||||
|
self.inner.borrow()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get(&self, key: &u32) -> Result<DevMapValue, MapError> {
|
||||||
|
self.get(*key, 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe impl Pod for bpf_devmap_val {}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
/// The value of a device map.
|
||||||
|
pub struct DevMapValue {
|
||||||
|
/// Target interface index to redirect to.
|
||||||
|
pub if_index: u32,
|
||||||
|
/// Chained XDP program ID.
|
||||||
|
pub prog_id: Option<NonZeroU32>,
|
||||||
|
}
|
@ -0,0 +1,167 @@
|
|||||||
|
//! An hashmap of network devices.
|
||||||
|
|
||||||
|
use std::{
|
||||||
|
borrow::{Borrow, BorrowMut},
|
||||||
|
num::NonZeroU32,
|
||||||
|
os::fd::{AsFd, AsRawFd},
|
||||||
|
};
|
||||||
|
|
||||||
|
use aya_obj::generated::bpf_devmap_val;
|
||||||
|
|
||||||
|
use super::{dev_map::DevMapValue, XdpMapError};
|
||||||
|
use crate::{
|
||||||
|
maps::{check_kv_size, hash_map, IterableMap, MapData, MapError, MapIter, MapKeys},
|
||||||
|
programs::ProgramFd,
|
||||||
|
sys::{bpf_map_lookup_elem, SyscallError},
|
||||||
|
FEATURES,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// An hashmap of network devices.
|
||||||
|
///
|
||||||
|
/// XDP programs can use this map to redirect to other network
|
||||||
|
/// devices.
|
||||||
|
///
|
||||||
|
/// # Minimum kernel version
|
||||||
|
///
|
||||||
|
/// The minimum kernel version required to use this feature is 5.4.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```no_run
|
||||||
|
/// # let mut bpf = aya::Bpf::load(&[])?;
|
||||||
|
/// use aya::maps::xdp::DevMapHash;
|
||||||
|
///
|
||||||
|
/// let mut devmap = DevMapHash::try_from(bpf.map_mut("IFACES").unwrap())?;
|
||||||
|
/// // Lookups with key 2 will redirect packets to interface with index 3 (e.g. eth1)
|
||||||
|
/// devmap.insert(2, 3, None, 0);
|
||||||
|
///
|
||||||
|
/// # Ok::<(), aya::BpfError>(())
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// # See also
|
||||||
|
///
|
||||||
|
/// Kernel documentation: <https://docs.kernel.org/next/bpf/map_devmap.html>
|
||||||
|
#[doc(alias = "BPF_MAP_TYPE_DEVMAP_HASH")]
|
||||||
|
pub struct DevMapHash<T> {
|
||||||
|
pub(crate) inner: T,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Borrow<MapData>> DevMapHash<T> {
|
||||||
|
pub(crate) fn new(map: T) -> Result<Self, MapError> {
|
||||||
|
let data = map.borrow();
|
||||||
|
|
||||||
|
if FEATURES.devmap_prog_id() {
|
||||||
|
check_kv_size::<u32, bpf_devmap_val>(data)?;
|
||||||
|
} else {
|
||||||
|
check_kv_size::<u32, u32>(data)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Self { inner: map })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the target interface index and optional program for a given key.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns [`MapError::SyscallError`] if `bpf_map_lookup_elem` fails.
|
||||||
|
pub fn get(&self, key: u32, flags: u64) -> Result<DevMapValue, MapError> {
|
||||||
|
let fd = self.inner.borrow().fd().as_fd();
|
||||||
|
|
||||||
|
let value = if FEATURES.devmap_prog_id() {
|
||||||
|
bpf_map_lookup_elem::<_, bpf_devmap_val>(fd, &key, flags).map(|value| {
|
||||||
|
value.map(|value| DevMapValue {
|
||||||
|
if_index: value.ifindex,
|
||||||
|
// SAFETY: map writes use fd, map reads use id.
|
||||||
|
// https://github.com/torvalds/linux/blob/2dde18cd1d8fac735875f2e4987f11817cc0bc2c/include/uapi/linux/bpf.h#L6228
|
||||||
|
prog_id: NonZeroU32::new(unsafe { value.bpf_prog.id }),
|
||||||
|
})
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
bpf_map_lookup_elem::<_, u32>(fd, &key, flags).map(|value| {
|
||||||
|
value.map(|ifindex| DevMapValue {
|
||||||
|
if_index: ifindex,
|
||||||
|
prog_id: None,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
};
|
||||||
|
value
|
||||||
|
.map_err(|(_, io_error)| SyscallError {
|
||||||
|
call: "bpf_map_lookup_elem",
|
||||||
|
io_error,
|
||||||
|
})?
|
||||||
|
.ok_or(MapError::KeyNotFound)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator over the elements of the devmap in arbitrary order.
|
||||||
|
pub fn iter(&self) -> MapIter<'_, u32, DevMapValue, Self> {
|
||||||
|
MapIter::new(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator visiting all keys in arbitrary order.
|
||||||
|
pub fn keys(&self) -> MapKeys<'_, u32> {
|
||||||
|
MapKeys::new(self.inner.borrow())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: BorrowMut<MapData>> DevMapHash<T> {
|
||||||
|
/// Inserts an ifindex and optionally a chained program in the map.
|
||||||
|
///
|
||||||
|
/// When redirecting using `key`, packets will be transmitted by the interface with `ifindex`.
|
||||||
|
///
|
||||||
|
/// Starting from Linux kernel 5.8, another XDP program can be passed in that will be run before
|
||||||
|
/// actual transmission. It can be used to modify the packet before transmission with NIC
|
||||||
|
/// specific data (MAC address update, checksum computations, etc) or other purposes.
|
||||||
|
///
|
||||||
|
/// The chained program must be loaded with the `BPF_XDP_DEVMAP` attach type. When using
|
||||||
|
/// `aya-ebpf`, that means XDP programs that specify the `map = "devmap"` argument. See the
|
||||||
|
/// kernel-space `aya_ebpf::xdp` for more information.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns [`MapError::SyscallError`] if `bpf_map_update_elem` fails,
|
||||||
|
/// [`MapError::ProgIdNotSupported`] if the kernel does not support chained programs and one is
|
||||||
|
/// provided.
|
||||||
|
pub fn insert(
|
||||||
|
&mut self,
|
||||||
|
key: u32,
|
||||||
|
target_if_index: u32,
|
||||||
|
program: Option<&ProgramFd>,
|
||||||
|
flags: u64,
|
||||||
|
) -> Result<(), XdpMapError> {
|
||||||
|
if FEATURES.devmap_prog_id() {
|
||||||
|
let mut value = unsafe { std::mem::zeroed::<bpf_devmap_val>() };
|
||||||
|
value.ifindex = target_if_index;
|
||||||
|
// Default is valid as the kernel will only consider fd > 0:
|
||||||
|
// https://github.com/torvalds/linux/blob/2dde18cd1d8fac735875f2e4987f11817cc0bc2c/kernel/bpf/devmap.c#L866
|
||||||
|
// https://github.com/torvalds/linux/blob/2dde18cd1d8fac735875f2e4987f11817cc0bc2c/kernel/bpf/devmap.c#L918
|
||||||
|
value.bpf_prog.fd = program
|
||||||
|
.map(|prog| prog.as_fd().as_raw_fd())
|
||||||
|
.unwrap_or_default();
|
||||||
|
hash_map::insert(self.inner.borrow_mut(), &key, &value, flags)?;
|
||||||
|
} else {
|
||||||
|
if program.is_some() {
|
||||||
|
return Err(XdpMapError::ChainedProgramNotSupported);
|
||||||
|
}
|
||||||
|
hash_map::insert(self.inner.borrow_mut(), &key, &target_if_index, flags)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Removes a value from the map.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns [`MapError::SyscallError`] if `bpf_map_delete_elem` fails.
|
||||||
|
pub fn remove(&mut self, key: u32) -> Result<(), MapError> {
|
||||||
|
hash_map::remove(self.inner.borrow_mut(), &key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Borrow<MapData>> IterableMap<u32, DevMapValue> for DevMapHash<T> {
|
||||||
|
fn map(&self) -> &MapData {
|
||||||
|
self.inner.borrow()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get(&self, key: &u32) -> Result<DevMapValue, MapError> {
|
||||||
|
self.get(*key, 0)
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,25 @@
|
|||||||
|
//! XDP maps.
|
||||||
|
mod cpu_map;
|
||||||
|
mod dev_map;
|
||||||
|
mod dev_map_hash;
|
||||||
|
mod xsk_map;
|
||||||
|
|
||||||
|
pub use cpu_map::CpuMap;
|
||||||
|
pub use dev_map::DevMap;
|
||||||
|
pub use dev_map_hash::DevMapHash;
|
||||||
|
use thiserror::Error;
|
||||||
|
pub use xsk_map::XskMap;
|
||||||
|
|
||||||
|
use super::MapError;
|
||||||
|
|
||||||
|
#[derive(Error, Debug)]
|
||||||
|
/// Errors occuring from working with XDP maps.
|
||||||
|
pub enum XdpMapError {
|
||||||
|
/// Chained programs are not supported.
|
||||||
|
#[error("chained programs are not supported by the current kernel")]
|
||||||
|
ChainedProgramNotSupported,
|
||||||
|
|
||||||
|
/// Map operation failed.
|
||||||
|
#[error(transparent)]
|
||||||
|
MapError(#[from] MapError),
|
||||||
|
}
|
@ -0,0 +1,81 @@
|
|||||||
|
//! An array of AF_XDP sockets.
|
||||||
|
|
||||||
|
use std::{
|
||||||
|
borrow::{Borrow, BorrowMut},
|
||||||
|
os::fd::{AsFd, AsRawFd, RawFd},
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
maps::{check_bounds, check_kv_size, MapData, MapError},
|
||||||
|
sys::{bpf_map_update_elem, SyscallError},
|
||||||
|
};
|
||||||
|
|
||||||
|
/// An array of AF_XDP sockets.
|
||||||
|
///
|
||||||
|
/// XDP programs can use this map to redirect packets to a target
|
||||||
|
/// AF_XDP socket using the `XDP_REDIRECT` action.
|
||||||
|
///
|
||||||
|
/// # Minimum kernel version
|
||||||
|
///
|
||||||
|
/// The minimum kernel version required to use this feature is 4.18.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```no_run
|
||||||
|
/// # let mut bpf = aya::Bpf::load(&[])?;
|
||||||
|
/// # let socket_fd = 1;
|
||||||
|
/// use aya::maps::XskMap;
|
||||||
|
///
|
||||||
|
/// let mut xskmap = XskMap::try_from(bpf.map_mut("SOCKETS").unwrap())?;
|
||||||
|
/// // socket_fd is the RawFd of an AF_XDP socket
|
||||||
|
/// xskmap.set(0, socket_fd, 0);
|
||||||
|
/// # Ok::<(), aya::BpfError>(())
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// # See also
|
||||||
|
///
|
||||||
|
/// Kernel documentation: <https://docs.kernel.org/next/bpf/map_xskmap.html>
|
||||||
|
#[doc(alias = "BPF_MAP_TYPE_XSKMAP")]
|
||||||
|
pub struct XskMap<T> {
|
||||||
|
pub(crate) inner: T,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Borrow<MapData>> XskMap<T> {
|
||||||
|
pub(crate) fn new(map: T) -> Result<Self, MapError> {
|
||||||
|
let data = map.borrow();
|
||||||
|
check_kv_size::<u32, RawFd>(data)?;
|
||||||
|
|
||||||
|
Ok(Self { inner: map })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the number of elements in the array.
|
||||||
|
///
|
||||||
|
/// This corresponds to the value of `bpf_map_def::max_entries` on the eBPF side.
|
||||||
|
pub fn len(&self) -> u32 {
|
||||||
|
self.inner.borrow().obj.max_entries()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: BorrowMut<MapData>> XskMap<T> {
|
||||||
|
/// Sets the `AF_XDP` socket at a given index.
|
||||||
|
///
|
||||||
|
/// When redirecting a packet, the `AF_XDP` socket at `index` will recieve the packet. Note
|
||||||
|
/// that it will do so only if the socket is bound to the same queue the packet was recieved
|
||||||
|
/// on.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns [`MapError::OutOfBounds`] if `index` is out of bounds, [`MapError::SyscallError`]
|
||||||
|
/// if `bpf_map_update_elem` fails.
|
||||||
|
pub fn set(&mut self, index: u32, socket_fd: impl AsRawFd, flags: u64) -> Result<(), MapError> {
|
||||||
|
let data = self.inner.borrow_mut();
|
||||||
|
check_bounds(data, index)?;
|
||||||
|
let fd = data.fd().as_fd();
|
||||||
|
bpf_map_update_elem(fd, Some(&index), &socket_fd.as_raw_fd(), flags).map_err(
|
||||||
|
|(_, io_error)| SyscallError {
|
||||||
|
call: "bpf_map_update_elem",
|
||||||
|
io_error,
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue