aya: add PerCpuHashMap

4 years ago · 3a5b289163
parent d5098c9e57
commit 3a5b289163
5 changed files with 332 additions and 9 deletions
--- a/aya/src/maps/hash_map/mod.rs
+++ b/aya/src/maps/hash_map/mod.rs
@ -6,7 +6,10 @@ use crate::{
 };

 mod hash_map;
+mod per_cpu_hash_map;
+
 pub use hash_map::*;
+pub use per_cpu_hash_map::*;

 pub(crate) fn check_kv_size<K, V>(map: &Map) -> Result<(), MapError> {
    let size = mem::size_of::<K>();
--- a/aya/src/maps/hash_map/per_cpu_hash_map.rs
+++ b/aya/src/maps/hash_map/per_cpu_hash_map.rs
@ -0,0 +1,181 @@
+//! Per-CPU hash map.
+use std::{
+    convert::TryFrom,
+    marker::PhantomData,
+    ops::{Deref, DerefMut},
+    os::unix::io::RawFd,
+};
+
+use crate::{
+    generated::bpf_map_type::BPF_MAP_TYPE_PERCPU_HASH,
+    maps::{
+        hash_map, IterableMap, Map, MapError, MapIter, MapKeys, MapRef, MapRefMut, PerCpuValues,
+    },
+    sys::{bpf_map_lookup_elem_per_cpu, bpf_map_update_elem_per_cpu},
+    Pod,
+};
+
+/// Similar to [`HashMap`](crate::maps::HashMap) but each CPU holds a separate value for a given key. Tipically used to
+/// minimize lock contention in eBPF programs.
+///
+/// # Example
+///
+/// ```no_run
+/// # let bpf = aya::Bpf::load(&[], None)?;
+/// use aya::maps::PerCpuHashMap;
+/// use std::convert::TryFrom;
+///
+/// const CPU_IDS: u8 = 1;
+/// const WAKEUPS: u8 = 2;
+///
+/// let mut hm = PerCpuHashMap::<_, u8, u32>::try_from(bpf.map("COUNTERS")?)?;
+/// let cpu_ids = unsafe { hm.get(&CPU_IDS, 0)?.unwrap() };
+/// let wakeups = unsafe { hm.get(&WAKEUPS, 0)?.unwrap() };
+/// for (cpu_id, wakeups) in cpu_ids.iter().zip(wakeups.iter()) {
+///     println!("cpu {} woke up {} times", cpu_id, wakeups);
+/// }
+/// # Ok::<(), aya::BpfError>(())
+/// ```
+pub struct PerCpuHashMap<T: Deref<Target = Map>, K: Pod, V: Pod> {
+    inner: T,
+    _k: PhantomData<K>,
+    _v: PhantomData<V>,
+}
+
+impl<T: Deref<Target = Map>, K: Pod, V: Pod> PerCpuHashMap<T, K, V> {
+    pub(crate) fn new(map: T) -> Result<PerCpuHashMap<T, K, V>, MapError> {
+        let map_type = map.obj.def.map_type;
+
+        // validate the map definition
+        if map_type != BPF_MAP_TYPE_PERCPU_HASH as u32 {
+            return Err(MapError::InvalidMapType {
+                map_type: map_type as u32,
+            })?;
+        }
+        hash_map::check_kv_size::<K, V>(&map)?;
+        let _ = map.fd_or_err()?;
+
+        Ok(PerCpuHashMap {
+            inner: map,
+            _k: PhantomData,
+            _v: PhantomData,
+        })
+    }
+
+    /// Returns a slice of values - one for each CPU - associated with the key.
+    pub unsafe fn get(&self, key: &K, flags: u64) -> Result<Option<PerCpuValues<V>>, MapError> {
+        let fd = self.inner.deref().fd_or_err()?;
+        bpf_map_lookup_elem_per_cpu(fd, key, flags).map_err(|(code, io_error)| {
+            MapError::SyscallError {
+                call: "bpf_map_lookup_elem".to_owned(),
+                code,
+                io_error,
+            }
+        })
+    }
+
+    /// An iterator visiting all key-value pairs in arbitrary order. The
+    /// iterator item type is `Result<(K, PerCpuValues<V>), MapError>`.
+    pub unsafe fn iter(&self) -> MapIter<'_, K, PerCpuValues<V>> {
+        MapIter::new(self)
+    }
+
+    /// An iterator visiting all keys in arbitrary order. The iterator element
+    /// type is `Result<K, MapError>`.
+    pub unsafe fn keys(&self) -> MapKeys<'_, K, PerCpuValues<V>> {
+        MapKeys::new(self)
+    }
+}
+
+impl<T: DerefMut<Target = Map>, K: Pod, V: Pod> PerCpuHashMap<T, K, V> {
+    /// Inserts a slice of values - one for each CPU - for the given key.
+    ///
+    /// # Example
+    ///
+    /// ```no_run
+    /// # #[derive(thiserror::Error, Debug)]
+    /// # enum Error {
+    /// #     #[error(transparent)]
+    /// #     IO(#[from] std::io::Error),
+    /// #     #[error(transparent)]
+    /// #     Map(#[from] aya::maps::MapError),
+    /// #     #[error(transparent)]
+    /// #     Bpf(#[from] aya::BpfError)
+    /// # }
+    /// # let bpf = aya::Bpf::load(&[], None)?;
+    /// use aya::maps::{PerCpuHashMap, PerCpuValues};
+    /// use aya::util::nr_cpus;
+    /// use std::convert::TryFrom;
+    ///
+    /// const RETRIES: u8 = 1;
+    ///
+    /// let mut hm = PerCpuHashMap::<_, u8, u32>::try_from(bpf.map_mut("PER_CPU_STORAGE")?)?;
+    /// hm.insert(
+    ///     RETRIES,
+    ///     PerCpuValues::try_from(vec![3u32; nr_cpus()?])?,
+    ///     0,
+    /// )?;
+    /// # Ok::<(), Error>(())
+    /// ```
+    pub fn insert(&mut self, key: K, values: PerCpuValues<V>, flags: u64) -> Result<(), MapError> {
+        let fd = self.inner.fd_or_err()?;
+        bpf_map_update_elem_per_cpu(fd, &key, &values, flags).map_err(|(code, io_error)| {
+            MapError::SyscallError {
+                call: "bpf_map_update_elem".to_owned(),
+                code,
+                io_error,
+            }
+        })?;
+
+        Ok(())
+    }
+
+    /// Removes a key from the map.
+    pub fn remove(&mut self, key: &K) -> Result<(), MapError> {
+        hash_map::remove(&mut self.inner, key)
+    }
+}
+
+impl<T: Deref<Target = Map>, K: Pod, V: Pod> IterableMap<K, PerCpuValues<V>>
+    for PerCpuHashMap<T, K, V>
+{
+    fn fd(&self) -> Result<RawFd, MapError> {
+        self.inner.deref().fd_or_err()
+    }
+
+    unsafe fn get(&self, key: &K) -> Result<Option<PerCpuValues<V>>, MapError> {
+        PerCpuHashMap::get(self, key, 0)
+    }
+}
+
+impl<K: Pod, V: Pod> TryFrom<MapRef> for PerCpuHashMap<MapRef, K, V> {
+    type Error = MapError;
+
+    fn try_from(a: MapRef) -> Result<PerCpuHashMap<MapRef, K, V>, MapError> {
+        PerCpuHashMap::new(a)
+    }
+}
+
+impl<K: Pod, V: Pod> TryFrom<MapRefMut> for PerCpuHashMap<MapRefMut, K, V> {
+    type Error = MapError;
+
+    fn try_from(a: MapRefMut) -> Result<PerCpuHashMap<MapRefMut, K, V>, MapError> {
+        PerCpuHashMap::new(a)
+    }
+}
+
+impl<'a, K: Pod, V: Pod> TryFrom<&'a Map> for PerCpuHashMap<&'a Map, K, V> {
+    type Error = MapError;
+
+    fn try_from(a: &'a Map) -> Result<PerCpuHashMap<&'a Map, K, V>, MapError> {
+        PerCpuHashMap::new(a)
+    }
+}
+
+impl<'a, K: Pod, V: Pod> TryFrom<&'a mut Map> for PerCpuHashMap<&'a mut Map, K, V> {
+    type Error = MapError;
+
+    fn try_from(a: &'a mut Map) -> Result<PerCpuHashMap<&'a mut Map, K, V>, MapError> {
+        PerCpuHashMap::new(a)
+    }
+}
--- a/aya/src/maps/mod.rs
+++ b/aya/src/maps/mod.rs
@ -29,13 +29,14 @@
 //!
 //! The code above uses `HashMap`, but all the concrete map types implement the
 //! `TryFrom` trait.
-use std::{convert::TryFrom, ffi::CString, io, os::unix::io::RawFd};
+use std::{convert::TryFrom, ffi::CString, io, mem, ops::Deref, os::unix::io::RawFd, ptr};
 use thiserror::Error;

 use crate::{
    generated::bpf_map_type,
    obj,
    sys::{bpf_create_map, bpf_map_get_next_key},
+    util::nr_cpus,
    Pod,
 };

@ -44,7 +45,7 @@ mod map_lock;
 pub mod perf;
 pub mod program_array;

-pub use hash_map::HashMap;
+pub use hash_map::{HashMap, PerCpuHashMap};
 pub use map_lock::*;
 pub use perf::PerfEventArray;
 pub use program_array::ProgramArray;
@ -143,19 +144,19 @@ impl Map {
    }
 }

-pub(crate) trait IterableMap<K: Pod, V: Pod> {
+pub(crate) trait IterableMap<K: Pod, V> {
    fn fd(&self) -> Result<RawFd, MapError>;
    unsafe fn get(&self, key: &K) -> Result<Option<V>, MapError>;
 }

 /// Iterator returned by `map.keys()`.
-pub struct MapKeys<'coll, K: Pod, V: Pod> {
+pub struct MapKeys<'coll, K: Pod, V> {
    map: &'coll dyn IterableMap<K, V>,
    err: bool,
    key: Option<K>,
 }

-impl<'coll, K: Pod, V: Pod> MapKeys<'coll, K, V> {
+impl<'coll, K: Pod, V> MapKeys<'coll, K, V> {
    fn new(map: &'coll dyn IterableMap<K, V>) -> MapKeys<'coll, K, V> {
        MapKeys {
            map,
@ -165,7 +166,7 @@ impl<'coll, K: Pod, V: Pod> MapKeys<'coll, K, V> {
    }
 }

-impl<K: Pod, V: Pod> Iterator for MapKeys<'_, K, V> {
+impl<K: Pod, V> Iterator for MapKeys<'_, K, V> {
    type Item = Result<K, MapError>;

    fn next(&mut self) -> Option<Result<K, MapError>> {
@ -203,11 +204,11 @@ impl<K: Pod, V: Pod> Iterator for MapKeys<'_, K, V> {
 }

 /// Iterator returned by `map.iter()`.
-pub struct MapIter<'coll, K: Pod, V: Pod> {
+pub struct MapIter<'coll, K: Pod, V> {
    inner: MapKeys<'coll, K, V>,
 }

-impl<'coll, K: Pod, V: Pod> MapIter<'coll, K, V> {
+impl<'coll, K: Pod, V> MapIter<'coll, K, V> {
    fn new(map: &'coll dyn IterableMap<K, V>) -> MapIter<'coll, K, V> {
        MapIter {
            inner: MapKeys::new(map),
@ -215,7 +216,7 @@ impl<'coll, K: Pod, V: Pod> MapIter<'coll, K, V> {
    }
 }

-impl<K: Pod, V: Pod> Iterator for MapIter<'_, K, V> {
+impl<K: Pod, V> Iterator for MapIter<'_, K, V> {
    type Item = Result<(K, V), MapError>;

    fn next(&mut self) -> Option<Self::Item> {
@ -278,6 +279,108 @@ impl TryFrom<u32> for bpf_map_type {
        })
    }
 }
+pub struct PerCpuKernelMem {
+    bytes: Vec<u8>,
+}
+
+impl PerCpuKernelMem {
+    pub(crate) fn as_ptr(&self) -> *const u8 {
+        self.bytes.as_ptr()
+    }
+
+    pub(crate) fn as_mut_ptr(&mut self) -> *mut u8 {
+        self.bytes.as_mut_ptr()
+    }
+}
+
+/// A slice of per-CPU values.
+///
+/// Used by maps that implement per-CPU storage like [`PerCpuHashMap`].
+///
+/// # Example
+///
+/// ```no_run
+/// # #[derive(thiserror::Error, Debug)]
+/// # enum Error {
+/// #     #[error(transparent)]
+/// #     IO(#[from] std::io::Error),
+/// #     #[error(transparent)]
+/// #     Map(#[from] aya::maps::MapError),
+/// #     #[error(transparent)]
+/// #     Bpf(#[from] aya::BpfError)
+/// # }
+/// # let bpf = aya::Bpf::load(&[], None)?;
+/// use aya::maps::PerCpuValues;
+/// use aya::util::nr_cpus;
+/// use std::convert::TryFrom;
+///
+/// let values = PerCpuValues::try_from(vec![42u32; nr_cpus()?])?;
+/// # Ok::<(), Error>(())
+/// ```
+#[derive(Debug)]
+pub struct PerCpuValues<T: Pod> {
+    values: Box<[T]>,
+}
+
+impl<T: Pod> TryFrom<Vec<T>> for PerCpuValues<T> {
+    type Error = io::Error;
+
+    fn try_from(values: Vec<T>) -> Result<Self, Self::Error> {
+        let nr_cpus = nr_cpus()?;
+        if values.len() != nr_cpus {
+            return Err(io::Error::new(
+                io::ErrorKind::InvalidInput,
+                format!("not enough values ({}), nr_cpus: {}", values.len(), nr_cpus),
+            ));
+        }
+        Ok(PerCpuValues {
+            values: values.into_boxed_slice(),
+        })
+    }
+}
+
+impl<T: Pod> PerCpuValues<T> {
+    pub(crate) fn alloc_kernel_mem() -> Result<PerCpuKernelMem, io::Error> {
+        let value_size = mem::size_of::<T>() + 7 & !7;
+        Ok(PerCpuKernelMem {
+            bytes: vec![0u8; nr_cpus()? * value_size],
+        })
+    }
+
+    pub(crate) unsafe fn from_kernel_mem(mem: PerCpuKernelMem) -> PerCpuValues<T> {
+        let mem_ptr = mem.bytes.as_ptr() as usize;
+        let value_size = mem::size_of::<T>() + 7 & !7;
+        let mut values = Vec::new();
+        let mut offset = 0;
+        while offset < mem.bytes.len() {
+            values.push(ptr::read_unaligned((mem_ptr + offset) as *const _));
+            offset += value_size;
+        }
+
+        PerCpuValues {
+            values: values.into_boxed_slice(),
+        }
+    }
+
+    pub(crate) fn into_kernel_mem(&self) -> Result<PerCpuKernelMem, io::Error> {
+        let mut mem = PerCpuValues::<T>::alloc_kernel_mem()?;
+        let mem_ptr = mem.as_mut_ptr() as usize;
+        let value_size = mem::size_of::<T>() + 7 & !7;
+        for i in 0..self.values.len() {
+            unsafe { ptr::write_unaligned((mem_ptr + i * value_size) as *mut _, self.values[i]) };
+        }
+
+        Ok(mem)
+    }
+}
+
+impl<T: Pod> Deref for PerCpuValues<T> {
+    type Target = Box<[T]>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.values
+    }
+}

 #[cfg(test)]
 mod tests {
--- a/aya/src/sys/bpf.rs
+++ b/aya/src/sys/bpf.rs
@ -12,6 +12,7 @@ use libc::{c_long, ENOENT};
 use crate::{
    bpf_map_def,
    generated::{bpf_attach_type, bpf_attr, bpf_cmd, bpf_insn, bpf_prog_type},
+    maps::PerCpuValues,
    programs::VerifierLog,
    sys::SysResult,
    Pod, BPF_OBJ_NAME_LEN,
@ -100,6 +101,27 @@ pub(crate) fn bpf_map_lookup_and_delete_elem<K: Pod, V: Pod>(
    lookup(fd, key, 0, bpf_cmd::BPF_MAP_LOOKUP_AND_DELETE_ELEM)
 }

+pub(crate) fn bpf_map_lookup_elem_per_cpu<K: Pod, V: Pod>(
+    fd: RawFd,
+    key: &K,
+    flags: u64,
+) -> Result<Option<PerCpuValues<V>>, (c_long, io::Error)> {
+    let mut attr = unsafe { mem::zeroed::<bpf_attr>() };
+    let mut mem = PerCpuValues::<V>::alloc_kernel_mem().map_err(|io_error| (-1, io_error))?;
+
+    let u = unsafe { &mut attr.__bindgen_anon_2 };
+    u.map_fd = fd as u32;
+    u.key = key as *const _ as u64;
+    u.__bindgen_anon_1.value = mem.as_mut_ptr() as u64;
+    u.flags = flags;
+
+    match sys_bpf(bpf_cmd::BPF_MAP_LOOKUP_ELEM, &attr) {
+        Ok(_) => Ok(Some(unsafe { PerCpuValues::from_kernel_mem(mem) })),
+        Err((_, io_error)) if io_error.raw_os_error() == Some(ENOENT) => Ok(None),
+        Err(e) => Err(e),
+    }
+}
+
 pub(crate) fn bpf_map_update_elem<K, V>(fd: RawFd, key: &K, value: &V, flags: u64) -> SysResult {
    let mut attr = unsafe { mem::zeroed::<bpf_attr>() };

@ -129,6 +151,16 @@ pub(crate) fn bpf_map_update_elem_ptr<K, V>(
    sys_bpf(bpf_cmd::BPF_MAP_UPDATE_ELEM, &attr)
 }

+pub(crate) fn bpf_map_update_elem_per_cpu<K, V: Pod>(
+    fd: RawFd,
+    key: &K,
+    values: &PerCpuValues<V>,
+    flags: u64,
+) -> SysResult {
+    let mem = values.into_kernel_mem().map_err(|e| (-1, e))?;
+    bpf_map_update_elem_ptr(fd, key, mem.as_ptr(), flags)
+}
+
 pub(crate) fn bpf_map_delete_elem<K>(fd: RawFd, key: &K) -> SysResult {
    let mut attr = unsafe { mem::zeroed::<bpf_attr>() };

--- a/aya/src/util.rs
+++ b/aya/src/util.rs
@ -15,6 +15,10 @@ pub fn online_cpus() -> Result<Vec<u32>, io::Error> {
    })
 }

+pub fn nr_cpus() -> Result<usize, io::Error> {
+    Ok(possible_cpus()?.len())
+}
+
 pub(crate) fn possible_cpus() -> Result<Vec<u32>, io::Error> {
    let data = fs::read_to_string(POSSIBLE_CPUS)?;
    parse_cpu_ranges(data.trim()).map_err(|_| {