From 3a5b28916385b35824bc2a05606808e5e8c1968a Mon Sep 17 00:00:00 2001 From: Alessandro Decina Date: Sat, 13 Mar 2021 09:10:24 +0000 Subject: [PATCH] aya: add PerCpuHashMap --- aya/src/maps/hash_map/mod.rs | 3 + aya/src/maps/hash_map/per_cpu_hash_map.rs | 181 ++++++++++++++++++++++ aya/src/maps/mod.rs | 121 +++++++++++++-- aya/src/sys/bpf.rs | 32 ++++ aya/src/util.rs | 4 + 5 files changed, 332 insertions(+), 9 deletions(-) create mode 100644 aya/src/maps/hash_map/per_cpu_hash_map.rs diff --git a/aya/src/maps/hash_map/mod.rs b/aya/src/maps/hash_map/mod.rs index 31c4ae23..dc895845 100644 --- a/aya/src/maps/hash_map/mod.rs +++ b/aya/src/maps/hash_map/mod.rs @@ -6,7 +6,10 @@ use crate::{ }; mod hash_map; +mod per_cpu_hash_map; + pub use hash_map::*; +pub use per_cpu_hash_map::*; pub(crate) fn check_kv_size(map: &Map) -> Result<(), MapError> { let size = mem::size_of::(); diff --git a/aya/src/maps/hash_map/per_cpu_hash_map.rs b/aya/src/maps/hash_map/per_cpu_hash_map.rs new file mode 100644 index 00000000..9381928e --- /dev/null +++ b/aya/src/maps/hash_map/per_cpu_hash_map.rs @@ -0,0 +1,181 @@ +//! Per-CPU hash map. +use std::{ + convert::TryFrom, + marker::PhantomData, + ops::{Deref, DerefMut}, + os::unix::io::RawFd, +}; + +use crate::{ + generated::bpf_map_type::BPF_MAP_TYPE_PERCPU_HASH, + maps::{ + hash_map, IterableMap, Map, MapError, MapIter, MapKeys, MapRef, MapRefMut, PerCpuValues, + }, + sys::{bpf_map_lookup_elem_per_cpu, bpf_map_update_elem_per_cpu}, + Pod, +}; + +/// Similar to [`HashMap`](crate::maps::HashMap) but each CPU holds a separate value for a given key. Tipically used to +/// minimize lock contention in eBPF programs. +/// +/// # Example +/// +/// ```no_run +/// # let bpf = aya::Bpf::load(&[], None)?; +/// use aya::maps::PerCpuHashMap; +/// use std::convert::TryFrom; +/// +/// const CPU_IDS: u8 = 1; +/// const WAKEUPS: u8 = 2; +/// +/// let mut hm = PerCpuHashMap::<_, u8, u32>::try_from(bpf.map("COUNTERS")?)?; +/// let cpu_ids = unsafe { hm.get(&CPU_IDS, 0)?.unwrap() }; +/// let wakeups = unsafe { hm.get(&WAKEUPS, 0)?.unwrap() }; +/// for (cpu_id, wakeups) in cpu_ids.iter().zip(wakeups.iter()) { +/// println!("cpu {} woke up {} times", cpu_id, wakeups); +/// } +/// # Ok::<(), aya::BpfError>(()) +/// ``` +pub struct PerCpuHashMap, K: Pod, V: Pod> { + inner: T, + _k: PhantomData, + _v: PhantomData, +} + +impl, K: Pod, V: Pod> PerCpuHashMap { + pub(crate) fn new(map: T) -> Result, MapError> { + let map_type = map.obj.def.map_type; + + // validate the map definition + if map_type != BPF_MAP_TYPE_PERCPU_HASH as u32 { + return Err(MapError::InvalidMapType { + map_type: map_type as u32, + })?; + } + hash_map::check_kv_size::(&map)?; + let _ = map.fd_or_err()?; + + Ok(PerCpuHashMap { + inner: map, + _k: PhantomData, + _v: PhantomData, + }) + } + + /// Returns a slice of values - one for each CPU - associated with the key. + pub unsafe fn get(&self, key: &K, flags: u64) -> Result>, MapError> { + let fd = self.inner.deref().fd_or_err()?; + bpf_map_lookup_elem_per_cpu(fd, key, flags).map_err(|(code, io_error)| { + MapError::SyscallError { + call: "bpf_map_lookup_elem".to_owned(), + code, + io_error, + } + }) + } + + /// An iterator visiting all key-value pairs in arbitrary order. The + /// iterator item type is `Result<(K, PerCpuValues), MapError>`. + pub unsafe fn iter(&self) -> MapIter<'_, K, PerCpuValues> { + MapIter::new(self) + } + + /// An iterator visiting all keys in arbitrary order. The iterator element + /// type is `Result`. + pub unsafe fn keys(&self) -> MapKeys<'_, K, PerCpuValues> { + MapKeys::new(self) + } +} + +impl, K: Pod, V: Pod> PerCpuHashMap { + /// Inserts a slice of values - one for each CPU - for the given key. + /// + /// # Example + /// + /// ```no_run + /// # #[derive(thiserror::Error, Debug)] + /// # enum Error { + /// # #[error(transparent)] + /// # IO(#[from] std::io::Error), + /// # #[error(transparent)] + /// # Map(#[from] aya::maps::MapError), + /// # #[error(transparent)] + /// # Bpf(#[from] aya::BpfError) + /// # } + /// # let bpf = aya::Bpf::load(&[], None)?; + /// use aya::maps::{PerCpuHashMap, PerCpuValues}; + /// use aya::util::nr_cpus; + /// use std::convert::TryFrom; + /// + /// const RETRIES: u8 = 1; + /// + /// let mut hm = PerCpuHashMap::<_, u8, u32>::try_from(bpf.map_mut("PER_CPU_STORAGE")?)?; + /// hm.insert( + /// RETRIES, + /// PerCpuValues::try_from(vec![3u32; nr_cpus()?])?, + /// 0, + /// )?; + /// # Ok::<(), Error>(()) + /// ``` + pub fn insert(&mut self, key: K, values: PerCpuValues, flags: u64) -> Result<(), MapError> { + let fd = self.inner.fd_or_err()?; + bpf_map_update_elem_per_cpu(fd, &key, &values, flags).map_err(|(code, io_error)| { + MapError::SyscallError { + call: "bpf_map_update_elem".to_owned(), + code, + io_error, + } + })?; + + Ok(()) + } + + /// Removes a key from the map. + pub fn remove(&mut self, key: &K) -> Result<(), MapError> { + hash_map::remove(&mut self.inner, key) + } +} + +impl, K: Pod, V: Pod> IterableMap> + for PerCpuHashMap +{ + fn fd(&self) -> Result { + self.inner.deref().fd_or_err() + } + + unsafe fn get(&self, key: &K) -> Result>, MapError> { + PerCpuHashMap::get(self, key, 0) + } +} + +impl TryFrom for PerCpuHashMap { + type Error = MapError; + + fn try_from(a: MapRef) -> Result, MapError> { + PerCpuHashMap::new(a) + } +} + +impl TryFrom for PerCpuHashMap { + type Error = MapError; + + fn try_from(a: MapRefMut) -> Result, MapError> { + PerCpuHashMap::new(a) + } +} + +impl<'a, K: Pod, V: Pod> TryFrom<&'a Map> for PerCpuHashMap<&'a Map, K, V> { + type Error = MapError; + + fn try_from(a: &'a Map) -> Result, MapError> { + PerCpuHashMap::new(a) + } +} + +impl<'a, K: Pod, V: Pod> TryFrom<&'a mut Map> for PerCpuHashMap<&'a mut Map, K, V> { + type Error = MapError; + + fn try_from(a: &'a mut Map) -> Result, MapError> { + PerCpuHashMap::new(a) + } +} diff --git a/aya/src/maps/mod.rs b/aya/src/maps/mod.rs index d9a3830a..77d86a1d 100644 --- a/aya/src/maps/mod.rs +++ b/aya/src/maps/mod.rs @@ -29,13 +29,14 @@ //! //! The code above uses `HashMap`, but all the concrete map types implement the //! `TryFrom` trait. -use std::{convert::TryFrom, ffi::CString, io, os::unix::io::RawFd}; +use std::{convert::TryFrom, ffi::CString, io, mem, ops::Deref, os::unix::io::RawFd, ptr}; use thiserror::Error; use crate::{ generated::bpf_map_type, obj, sys::{bpf_create_map, bpf_map_get_next_key}, + util::nr_cpus, Pod, }; @@ -44,7 +45,7 @@ mod map_lock; pub mod perf; pub mod program_array; -pub use hash_map::HashMap; +pub use hash_map::{HashMap, PerCpuHashMap}; pub use map_lock::*; pub use perf::PerfEventArray; pub use program_array::ProgramArray; @@ -143,19 +144,19 @@ impl Map { } } -pub(crate) trait IterableMap { +pub(crate) trait IterableMap { fn fd(&self) -> Result; unsafe fn get(&self, key: &K) -> Result, MapError>; } /// Iterator returned by `map.keys()`. -pub struct MapKeys<'coll, K: Pod, V: Pod> { +pub struct MapKeys<'coll, K: Pod, V> { map: &'coll dyn IterableMap, err: bool, key: Option, } -impl<'coll, K: Pod, V: Pod> MapKeys<'coll, K, V> { +impl<'coll, K: Pod, V> MapKeys<'coll, K, V> { fn new(map: &'coll dyn IterableMap) -> MapKeys<'coll, K, V> { MapKeys { map, @@ -165,7 +166,7 @@ impl<'coll, K: Pod, V: Pod> MapKeys<'coll, K, V> { } } -impl Iterator for MapKeys<'_, K, V> { +impl Iterator for MapKeys<'_, K, V> { type Item = Result; fn next(&mut self) -> Option> { @@ -203,11 +204,11 @@ impl Iterator for MapKeys<'_, K, V> { } /// Iterator returned by `map.iter()`. -pub struct MapIter<'coll, K: Pod, V: Pod> { +pub struct MapIter<'coll, K: Pod, V> { inner: MapKeys<'coll, K, V>, } -impl<'coll, K: Pod, V: Pod> MapIter<'coll, K, V> { +impl<'coll, K: Pod, V> MapIter<'coll, K, V> { fn new(map: &'coll dyn IterableMap) -> MapIter<'coll, K, V> { MapIter { inner: MapKeys::new(map), @@ -215,7 +216,7 @@ impl<'coll, K: Pod, V: Pod> MapIter<'coll, K, V> { } } -impl Iterator for MapIter<'_, K, V> { +impl Iterator for MapIter<'_, K, V> { type Item = Result<(K, V), MapError>; fn next(&mut self) -> Option { @@ -278,6 +279,108 @@ impl TryFrom for bpf_map_type { }) } } +pub struct PerCpuKernelMem { + bytes: Vec, +} + +impl PerCpuKernelMem { + pub(crate) fn as_ptr(&self) -> *const u8 { + self.bytes.as_ptr() + } + + pub(crate) fn as_mut_ptr(&mut self) -> *mut u8 { + self.bytes.as_mut_ptr() + } +} + +/// A slice of per-CPU values. +/// +/// Used by maps that implement per-CPU storage like [`PerCpuHashMap`]. +/// +/// # Example +/// +/// ```no_run +/// # #[derive(thiserror::Error, Debug)] +/// # enum Error { +/// # #[error(transparent)] +/// # IO(#[from] std::io::Error), +/// # #[error(transparent)] +/// # Map(#[from] aya::maps::MapError), +/// # #[error(transparent)] +/// # Bpf(#[from] aya::BpfError) +/// # } +/// # let bpf = aya::Bpf::load(&[], None)?; +/// use aya::maps::PerCpuValues; +/// use aya::util::nr_cpus; +/// use std::convert::TryFrom; +/// +/// let values = PerCpuValues::try_from(vec![42u32; nr_cpus()?])?; +/// # Ok::<(), Error>(()) +/// ``` +#[derive(Debug)] +pub struct PerCpuValues { + values: Box<[T]>, +} + +impl TryFrom> for PerCpuValues { + type Error = io::Error; + + fn try_from(values: Vec) -> Result { + let nr_cpus = nr_cpus()?; + if values.len() != nr_cpus { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!("not enough values ({}), nr_cpus: {}", values.len(), nr_cpus), + )); + } + Ok(PerCpuValues { + values: values.into_boxed_slice(), + }) + } +} + +impl PerCpuValues { + pub(crate) fn alloc_kernel_mem() -> Result { + let value_size = mem::size_of::() + 7 & !7; + Ok(PerCpuKernelMem { + bytes: vec![0u8; nr_cpus()? * value_size], + }) + } + + pub(crate) unsafe fn from_kernel_mem(mem: PerCpuKernelMem) -> PerCpuValues { + let mem_ptr = mem.bytes.as_ptr() as usize; + let value_size = mem::size_of::() + 7 & !7; + let mut values = Vec::new(); + let mut offset = 0; + while offset < mem.bytes.len() { + values.push(ptr::read_unaligned((mem_ptr + offset) as *const _)); + offset += value_size; + } + + PerCpuValues { + values: values.into_boxed_slice(), + } + } + + pub(crate) fn into_kernel_mem(&self) -> Result { + let mut mem = PerCpuValues::::alloc_kernel_mem()?; + let mem_ptr = mem.as_mut_ptr() as usize; + let value_size = mem::size_of::() + 7 & !7; + for i in 0..self.values.len() { + unsafe { ptr::write_unaligned((mem_ptr + i * value_size) as *mut _, self.values[i]) }; + } + + Ok(mem) + } +} + +impl Deref for PerCpuValues { + type Target = Box<[T]>; + + fn deref(&self) -> &Self::Target { + &self.values + } +} #[cfg(test)] mod tests { diff --git a/aya/src/sys/bpf.rs b/aya/src/sys/bpf.rs index 3c1d4984..9fe07d97 100644 --- a/aya/src/sys/bpf.rs +++ b/aya/src/sys/bpf.rs @@ -12,6 +12,7 @@ use libc::{c_long, ENOENT}; use crate::{ bpf_map_def, generated::{bpf_attach_type, bpf_attr, bpf_cmd, bpf_insn, bpf_prog_type}, + maps::PerCpuValues, programs::VerifierLog, sys::SysResult, Pod, BPF_OBJ_NAME_LEN, @@ -100,6 +101,27 @@ pub(crate) fn bpf_map_lookup_and_delete_elem( lookup(fd, key, 0, bpf_cmd::BPF_MAP_LOOKUP_AND_DELETE_ELEM) } +pub(crate) fn bpf_map_lookup_elem_per_cpu( + fd: RawFd, + key: &K, + flags: u64, +) -> Result>, (c_long, io::Error)> { + let mut attr = unsafe { mem::zeroed::() }; + let mut mem = PerCpuValues::::alloc_kernel_mem().map_err(|io_error| (-1, io_error))?; + + let u = unsafe { &mut attr.__bindgen_anon_2 }; + u.map_fd = fd as u32; + u.key = key as *const _ as u64; + u.__bindgen_anon_1.value = mem.as_mut_ptr() as u64; + u.flags = flags; + + match sys_bpf(bpf_cmd::BPF_MAP_LOOKUP_ELEM, &attr) { + Ok(_) => Ok(Some(unsafe { PerCpuValues::from_kernel_mem(mem) })), + Err((_, io_error)) if io_error.raw_os_error() == Some(ENOENT) => Ok(None), + Err(e) => Err(e), + } +} + pub(crate) fn bpf_map_update_elem(fd: RawFd, key: &K, value: &V, flags: u64) -> SysResult { let mut attr = unsafe { mem::zeroed::() }; @@ -129,6 +151,16 @@ pub(crate) fn bpf_map_update_elem_ptr( sys_bpf(bpf_cmd::BPF_MAP_UPDATE_ELEM, &attr) } +pub(crate) fn bpf_map_update_elem_per_cpu( + fd: RawFd, + key: &K, + values: &PerCpuValues, + flags: u64, +) -> SysResult { + let mem = values.into_kernel_mem().map_err(|e| (-1, e))?; + bpf_map_update_elem_ptr(fd, key, mem.as_ptr(), flags) +} + pub(crate) fn bpf_map_delete_elem(fd: RawFd, key: &K) -> SysResult { let mut attr = unsafe { mem::zeroed::() }; diff --git a/aya/src/util.rs b/aya/src/util.rs index eb97410d..ef692886 100644 --- a/aya/src/util.rs +++ b/aya/src/util.rs @@ -15,6 +15,10 @@ pub fn online_cpus() -> Result, io::Error> { }) } +pub fn nr_cpus() -> Result { + Ok(possible_cpus()?.len()) +} + pub(crate) fn possible_cpus() -> Result, io::Error> { let data = fs::read_to_string(POSSIBLE_CPUS)?; parse_cpu_ranges(data.trim()).map_err(|_| {