From 1655df36c7e376d779476b7bf5ea21894f7acec6 Mon Sep 17 00:00:00 2001
From: Tyrone Wu <wudevelops@gmail.com>
Date: Mon, 28 Oct 2024 02:09:27 +0000
Subject: [PATCH] aya: add feature probing for map type

Add API that probes whether kernel supports a map type.
---
 aya/src/sys/bpf.rs                            |   5 +-
 aya/src/sys/feature_probe.rs                  | 202 +++++++++++++++++-
 aya/src/sys/mod.rs                            |   2 +-
 .../src/tests/feature_probe.rs                |  96 ++++++++-
 xtask/public-api/aya.txt                      |   1 +
 5 files changed, 299 insertions(+), 7 deletions(-)

diff --git a/aya/src/sys/bpf.rs b/aya/src/sys/bpf.rs
index 21386e1d..4e80a71a 100644
--- a/aya/src/sys/bpf.rs
+++ b/aya/src/sys/bpf.rs
@@ -681,7 +681,10 @@ pub(crate) fn bpf_load_btf(
 }
 
 // SAFETY: only use for bpf_cmd that return a new file descriptor on success.
-unsafe fn fd_sys_bpf(cmd: bpf_cmd, attr: &mut bpf_attr) -> io::Result<crate::MockableFd> {
+pub(super) unsafe fn fd_sys_bpf(
+    cmd: bpf_cmd,
+    attr: &mut bpf_attr,
+) -> io::Result<crate::MockableFd> {
     let fd = sys_bpf(cmd, attr)?;
     let fd = fd.try_into().map_err(|std::num::TryFromIntError { .. }| {
         io::Error::new(
diff --git a/aya/src/sys/feature_probe.rs b/aya/src/sys/feature_probe.rs
index f55b83fc..51eae01d 100644
--- a/aya/src/sys/feature_probe.rs
+++ b/aya/src/sys/feature_probe.rs
@@ -1,10 +1,20 @@
 //! Probes and identifies available eBPF features supported by the host kernel.
 
-use aya_obj::btf::{Btf, BtfKind};
-use libc::{E2BIG, EINVAL};
+use std::{mem, os::fd::AsRawFd as _};
 
-use super::{SyscallError, bpf_prog_load, with_trivial_prog};
-use crate::programs::{ProgramError, ProgramType};
+use aya_obj::{
+    btf::{Btf, BtfKind},
+    generated::{BPF_F_MMAPABLE, BPF_F_NO_PREALLOC, bpf_attr, bpf_cmd, bpf_map_type},
+};
+use libc::{E2BIG, EBADF, EINVAL};
+
+use super::{SyscallError, bpf_prog_load, fd_sys_bpf, with_trivial_prog};
+use crate::{
+    MockableFd,
+    maps::MapType,
+    programs::{ProgramError, ProgramType},
+    util::page_size,
+};
 
 /// Whether the host kernel supports the [`ProgramType`].
 ///
@@ -128,3 +138,187 @@ pub fn is_program_supported(program_type: ProgramType) -> Result<bool, ProgramEr
         _ => Err(error),
     }
 }
+
+/// Whether the host kernel supports the [`MapType`].
+///
+/// # Examples
+///
+/// ```no_run
+/// # use aya::{maps::MapType, sys::is_map_supported};
+/// #
+/// match is_map_supported(MapType::HashOfMaps) {
+///     Ok(true) => println!("hash_of_maps supported :)"),
+///     Ok(false) => println!("hash_of_maps not supported :("),
+///     Err(err) => println!("Uh oh! Unexpected error: {:?}", err),
+/// }
+/// ```
+///
+/// # Errors
+///
+/// Returns [`SyscallError`] if kernel probing fails with an unexpected error.
+///
+/// Note that certain errors are expected and handled internally; only
+/// unanticipated failures during probing will result in this error.
+pub fn is_map_supported(map_type: MapType) -> Result<bool, SyscallError> {
+    // Each `bpf_map_ops` struct contains their own `.map_alloc()` & `.map_alloc_check()` that does
+    // field validation on map_create.
+    let (key_size, value_size, max_entries) = match map_type {
+        MapType::Unspecified => return Ok(false),
+        MapType::Hash // https://elixir.bootlin.com/linux/v3.19/source/kernel/bpf/hashtab.c#L349
+        | MapType::PerCpuHash  // https://elixir.bootlin.com/linux/v4.6/source/kernel/bpf/hashtab.c#L726
+        | MapType::LruHash // https://elixir.bootlin.com/linux/v4.10/source/kernel/bpf/hashtab.c#L1032
+        | MapType::LruPerCpuHash // https://elixir.bootlin.com/linux/v4.10/source/kernel/bpf/hashtab.c#L1133
+            => (1, 1, 1),
+        MapType::Array // https://elixir.bootlin.com/linux/v3.19/source/kernel/bpf/arraymap.c#L138
+        | MapType::PerCpuArray // https://elixir.bootlin.com/linux/v4.6/source/kernel/bpf/arraymap.c#L283
+            => (4, 1, 1),
+        MapType::ProgramArray // https://elixir.bootlin.com/linux/v4.2/source/kernel/bpf/arraymap.c#L239
+        | MapType::PerfEventArray // https://elixir.bootlin.com/linux/v4.3/source/kernel/bpf/arraymap.c#L312
+        | MapType::CgroupArray // https://elixir.bootlin.com/linux/v4.8/source/kernel/bpf/arraymap.c#L562
+        | MapType::ArrayOfMaps // https://elixir.bootlin.com/linux/v4.12/source/kernel/bpf/arraymap.c#L595
+        | MapType::DevMap // https://elixir.bootlin.com/linux/v4.14/source/kernel/bpf/devmap.c#L360
+        | MapType::SockMap // https://elixir.bootlin.com/linux/v4.14/source/kernel/bpf/sockmap.c#L874
+        | MapType::CpuMap // https://elixir.bootlin.com/linux/v4.15/source/kernel/bpf/cpumap.c#L589
+        | MapType::XskMap // https://elixir.bootlin.com/linux/v4.18/source/kernel/bpf/xskmap.c#L224
+        | MapType::ReuseportSockArray // https://elixir.bootlin.com/linux/v4.20/source/kernel/bpf/reuseport_array.c#L357
+        | MapType::DevMapHash // https://elixir.bootlin.com/linux/v5.4/source/kernel/bpf/devmap.c#L713
+            => (4, 4, 1),
+        MapType::StackTrace // https://elixir.bootlin.com/linux/v4.6/source/kernel/bpf/stackmap.c#L272
+            => (4, 8, 1),
+        MapType::LpmTrie // https://elixir.bootlin.com/linux/v4.11/source/kernel/bpf/lpm_trie.c#L509
+            => (8, 1, 1),
+        MapType::HashOfMaps // https://elixir.bootlin.com/linux/v4.12/source/kernel/bpf/hashtab.c#L1301
+        | MapType::SockHash // https://elixir.bootlin.com/linux/v4.18/source/kernel/bpf/sockmap.c#L2507
+            => (1, 4, 1),
+        MapType::CgroupStorage // https://elixir.bootlin.com/linux/v4.19/source/kernel/bpf/local_storage.c#L246
+        | MapType::PerCpuCgroupStorage // https://elixir.bootlin.com/linux/v4.20/source/kernel/bpf/local_storage.c#L313
+            => (16, 1, 0),
+        MapType::Queue // https://elixir.bootlin.com/linux/v4.20/source/kernel/bpf/queue_stack_maps.c#L267
+        | MapType::Stack // https://elixir.bootlin.com/linux/v4.20/source/kernel/bpf/queue_stack_maps.c#L280
+        | MapType::BloomFilter // https://elixir.bootlin.com/linux/v5.16/source/kernel/bpf/bloom_filter.c#L193
+            => (0, 1, 1),
+        MapType::SkStorage // https://elixir.bootlin.com/linux/v5.2/source/net/core/bpf_sk_storage.c#L779
+        | MapType::InodeStorage // https://elixir.bootlin.com/linux/v5.10/source/kernel/bpf/bpf_inode_storage.c#L239
+        | MapType::TaskStorage // https://elixir.bootlin.com/linux/v5.11/source/kernel/bpf/bpf_task_storage.c#L285
+        | MapType::CgrpStorage // https://elixir.bootlin.com/linux/v6.2/source/kernel/bpf/bpf_cgrp_storage.c#L216
+            => (4, 1, 0),
+        MapType::StructOps // https://elixir.bootlin.com/linux/v5.6/source/kernel/bpf/bpf_struct_ops.c#L607
+            => (4, 0, 1),
+        MapType::RingBuf // https://elixir.bootlin.com/linux/v5.8/source/kernel/bpf/ringbuf.c#L296
+        | MapType::UserRingBuf // https://elixir.bootlin.com/linux/v6.1/source/kernel/bpf/ringbuf.c#L356
+        // `max_entries` required to be multiple of kernel page size & power of 2: https://elixir.bootlin.com/linux/v5.8/source/kernel/bpf/ringbuf.c#L160
+            => (0, 0, page_size() as u32),
+        MapType::Arena // https://elixir.bootlin.com/linux/v6.9/source/kernel/bpf/arena.c#L380
+            => (0, 0, 1),
+    };
+
+    // SAFETY: all-zero byte-pattern valid for `bpf_attr`
+    let mut attr = unsafe { mem::zeroed::<bpf_attr>() };
+    // SAFETY: union access
+    let u = unsafe { &mut attr.__bindgen_anon_1 };
+    u.map_type = map_type as u32;
+    u.key_size = key_size;
+    u.value_size = value_size;
+    u.max_entries = max_entries;
+
+    // Ensure that fd doesn't get dropped due to scoping for for *_of_maps type.
+    let inner_map_fd: MockableFd;
+    match map_type {
+        // lpm_trie required to not be pre-alloced: https://elixir.bootlin.com/linux/v4.11/source/kernel/bpf/lpm_trie.c#L419
+        MapType::LpmTrie => u.map_flags = BPF_F_NO_PREALLOC,
+        // For these types, we aim to intentionally trigger `EBADF` by supplying invalid btf attach
+        // data to verify the map type's existance. Otherwise, negative support will produce
+        // `EINVAL` instead.
+        MapType::SkStorage
+        | MapType::InodeStorage
+        | MapType::TaskStorage
+        | MapType::CgrpStorage => {
+            // These types required to not be pre-alloced:
+            // - sk_storage: https://elixir.bootlin.com/linux/v5.2/source/net/core/bpf_sk_storage.c#L604
+            // - inode_storage: https://elixir.bootlin.com/linux/v5.10/source/kernel/bpf/bpf_local_storage.c#L525
+            // - task_storage: https://elixir.bootlin.com/linux/v5.11/source/kernel/bpf/bpf_local_storage.c#L527
+            // - cgrp_storage: https://elixir.bootlin.com/linux/v6.2/source/kernel/bpf/bpf_local_storage.c#L539
+            u.map_flags = BPF_F_NO_PREALLOC;
+            // Will trigger `EBADF` from `btf_get_by_fd()` https://elixir.bootlin.com/linux/v5.2/source/kernel/bpf/btf.c#L3428
+            u.btf_fd = u32::MAX;
+            u.btf_key_type_id = 1;
+            u.btf_value_type_id = 1;
+        }
+        MapType::ArrayOfMaps | MapType::HashOfMaps => {
+            // SAFETY: all-zero byte-pattern valid for `bpf_attr`
+            let mut attr_map = unsafe { mem::zeroed::<bpf_attr>() };
+            // SAFETY: union access
+            let u_map = unsafe { &mut attr_map.__bindgen_anon_1 };
+            u_map.map_type = bpf_map_type::BPF_MAP_TYPE_HASH as u32;
+            u_map.key_size = 1;
+            u_map.value_size = 1;
+            u_map.max_entries = 1;
+            // SAFETY: BPF_MAP_CREATE returns a new file descriptor.
+            inner_map_fd = unsafe { fd_sys_bpf(bpf_cmd::BPF_MAP_CREATE, &mut attr_map) }.map_err(
+                |io_error| SyscallError {
+                    call: "bpf_map_create",
+                    io_error,
+                },
+            )?;
+
+            u.inner_map_fd = inner_map_fd.as_raw_fd() as u32;
+        }
+        // We aim to intentionally trigger `ENOTSUPP` by setting an invalid, non-zero
+        // `btf_vmlinux_value_type_id`. Negative support produce `EINVAL` instead.
+        MapType::StructOps => u.btf_vmlinux_value_type_id = 1,
+        // arena required to be mmapable: https://elixir.bootlin.com/linux/v6.9/source/kernel/bpf/arena.c#L103
+        MapType::Arena => u.map_flags = BPF_F_MMAPABLE,
+        _ => {}
+    }
+
+    // SAFETY: BPF_MAP_CREATE returns a new file descriptor.
+    let io_error = match unsafe { fd_sys_bpf(bpf_cmd::BPF_MAP_CREATE, &mut attr) } {
+        Ok(_) => return Ok(true),
+        Err(io_error) => io_error,
+    };
+
+    // sk_storage, struct_ops, inode_storage, task_storage, & cgrp_storage requires further
+    // examination to verify support.
+    match io_error.raw_os_error() {
+        Some(EINVAL) => Ok(false),
+        // These types use fields that may not exist at the kernel's current version.
+        // Supplying `bpf_attr` fields unknown to the kernel triggers `E2BIG` from
+        // `bpf_check_uarg_tail_zero()` https://elixir.bootlin.com/linux/v4.18/source/kernel/bpf/syscall.c#L71.
+        Some(E2BIG)
+            if matches!(
+                map_type,
+                MapType::SkStorage
+                    | MapType::StructOps
+                    | MapType::InodeStorage
+                    | MapType::TaskStorage
+                    | MapType::CgrpStorage
+            ) =>
+        {
+            Ok(false)
+        }
+        // For these types, `EBADF` from `btf_get_by_fd()` https://elixir.bootlin.com/linux/v5.2/source/kernel/bpf/btf.c#L3428
+        // indicates that map_create advanced far enough in the validation to recognize the type
+        // before being rejected.
+        // Otherwise, negative support produces `EINVAL`, meaning it was immediately rejected.
+        Some(EBADF)
+            if matches!(
+                map_type,
+                MapType::SkStorage
+                    | MapType::InodeStorage
+                    | MapType::TaskStorage
+                    | MapType::CgrpStorage
+            ) =>
+        {
+            Ok(true)
+        }
+        // `ENOTSUPP` from `bpf_struct_ops_map_alloc()` https://elixir.bootlin.com/linux/v5.6/source/kernel/bpf/bpf_struct_ops.c#L557
+        // indicates that map_create advanced far enough in the validation to recognize the type
+        // before being rejected.
+        // Otherwise, negative support produces `EINVAL`, meaning it was immediately rejected.
+        Some(524) if map_type == MapType::StructOps => Ok(true),
+        _ => Err(SyscallError {
+            call: "bpf_map_create",
+            io_error,
+        }),
+    }
+}
diff --git a/aya/src/sys/mod.rs b/aya/src/sys/mod.rs
index f2cffdf4..c0d2c59c 100644
--- a/aya/src/sys/mod.rs
+++ b/aya/src/sys/mod.rs
@@ -18,7 +18,7 @@ use aya_obj::generated::{bpf_attr, bpf_cmd, perf_event_attr};
 pub(crate) use bpf::*;
 #[cfg(test)]
 pub(crate) use fake::*;
-pub use feature_probe::is_program_supported;
+pub use feature_probe::{is_map_supported, is_program_supported};
 #[doc(hidden)]
 pub use netlink::netlink_set_link_up;
 pub(crate) use netlink::*;
diff --git a/test/integration-test/src/tests/feature_probe.rs b/test/integration-test/src/tests/feature_probe.rs
index 282f148b..d0bb29e9 100644
--- a/test/integration-test/src/tests/feature_probe.rs
+++ b/test/integration-test/src/tests/feature_probe.rs
@@ -1,6 +1,12 @@
 //! Test feature probing against kernel version.
 
-use aya::{Btf, programs::ProgramType, sys::is_program_supported, util::KernelVersion};
+use aya::{
+    Btf,
+    maps::MapType,
+    programs::ProgramType,
+    sys::{is_map_supported, is_program_supported},
+    util::KernelVersion,
+};
 use procfs::kernel_config;
 
 use crate::utils::kernel_assert;
@@ -129,3 +135,91 @@ fn probe_supported_programs() {
     let kern_version = KernelVersion::new(6, 4, 0);
     kernel_assert!(is_supported!(ProgramType::Netfilter), kern_version);
 }
+
+#[test]
+fn probe_supported_maps() {
+    macro_rules! is_supported {
+        ($map_type:expr) => {
+            is_map_supported($map_type).unwrap()
+        };
+    }
+
+    let kern_version = KernelVersion::new(3, 19, 0);
+    kernel_assert!(is_supported!(MapType::Hash), kern_version);
+    kernel_assert!(is_supported!(MapType::Array), kern_version);
+
+    let kern_version = KernelVersion::new(4, 2, 0);
+    kernel_assert!(is_supported!(MapType::ProgramArray), kern_version);
+
+    let kern_version = KernelVersion::new(4, 3, 0);
+    kernel_assert!(is_supported!(MapType::PerfEventArray), kern_version);
+
+    let kern_version = KernelVersion::new(4, 6, 0);
+    kernel_assert!(is_supported!(MapType::PerCpuHash), kern_version);
+    kernel_assert!(is_supported!(MapType::PerCpuArray), kern_version);
+    kernel_assert!(is_supported!(MapType::StackTrace), kern_version);
+
+    let kern_version = KernelVersion::new(4, 8, 0);
+    kernel_assert!(is_supported!(MapType::CgroupArray), kern_version);
+
+    let kern_version = KernelVersion::new(4, 10, 0);
+    kernel_assert!(is_supported!(MapType::LruHash), kern_version);
+    kernel_assert!(is_supported!(MapType::LruPerCpuHash), kern_version);
+
+    let kern_version = KernelVersion::new(4, 11, 0);
+    kernel_assert!(is_supported!(MapType::LpmTrie), kern_version);
+
+    let kern_version = KernelVersion::new(4, 12, 0);
+    kernel_assert!(is_supported!(MapType::ArrayOfMaps), kern_version);
+    kernel_assert!(is_supported!(MapType::HashOfMaps), kern_version);
+
+    let kern_version = KernelVersion::new(4, 14, 0);
+    kernel_assert!(is_supported!(MapType::DevMap), kern_version);
+    kernel_assert!(is_supported!(MapType::SockMap), kern_version);
+
+    let kern_version = KernelVersion::new(4, 15, 0);
+    kernel_assert!(is_supported!(MapType::CpuMap), kern_version);
+
+    let kern_version = KernelVersion::new(4, 18, 0);
+    kernel_assert!(is_supported!(MapType::XskMap), kern_version);
+    kernel_assert!(is_supported!(MapType::SockHash), kern_version);
+
+    let kern_version = KernelVersion::new(4, 19, 0);
+    kernel_assert!(is_supported!(MapType::CgroupStorage), kern_version);
+    kernel_assert!(is_supported!(MapType::ReuseportSockArray), kern_version);
+
+    let kern_version = KernelVersion::new(4, 20, 0);
+    kernel_assert!(is_supported!(MapType::PerCpuCgroupStorage), kern_version);
+    kernel_assert!(is_supported!(MapType::Queue), kern_version);
+    kernel_assert!(is_supported!(MapType::Stack), kern_version);
+
+    let kern_version = KernelVersion::new(5, 2, 0);
+    kernel_assert!(is_supported!(MapType::SkStorage), kern_version);
+
+    let kern_version = KernelVersion::new(5, 4, 0);
+    kernel_assert!(is_supported!(MapType::DevMapHash), kern_version);
+
+    let kern_version = KernelVersion::new(5, 6, 0);
+    kernel_assert!(is_supported!(MapType::StructOps), kern_version);
+
+    let kern_version = KernelVersion::new(5, 8, 0);
+    kernel_assert!(is_supported!(MapType::RingBuf), kern_version);
+
+    let kern_version = KernelVersion::new(5, 10, 0);
+    kernel_assert!(is_supported!(MapType::InodeStorage), kern_version); // Requires `CONFIG_BPF_LSM=y`
+
+    let kern_version = KernelVersion::new(5, 11, 0);
+    kernel_assert!(is_supported!(MapType::TaskStorage), kern_version);
+
+    let kern_version = KernelVersion::new(5, 16, 0);
+    kernel_assert!(is_supported!(MapType::BloomFilter), kern_version);
+
+    let kern_version = KernelVersion::new(6, 1, 0);
+    kernel_assert!(is_supported!(MapType::UserRingBuf), kern_version);
+
+    let kern_version = KernelVersion::new(6, 2, 0);
+    kernel_assert!(is_supported!(MapType::CgrpStorage), kern_version);
+
+    let kern_version = KernelVersion::new(6, 9, 0);
+    kernel_assert!(is_supported!(MapType::Arena), kern_version);
+}
diff --git a/xtask/public-api/aya.txt b/xtask/public-api/aya.txt
index 2d62d08f..16e2a4ea 100644
--- a/xtask/public-api/aya.txt
+++ b/xtask/public-api/aya.txt
@@ -10076,6 +10076,7 @@ pub fn aya::sys::SyscallError::borrow_mut(&mut self) -> &mut T
 impl<T> core::convert::From<T> for aya::sys::SyscallError
 pub fn aya::sys::SyscallError::from(t: T) -> T
 pub fn aya::sys::enable_stats(stats_type: aya::sys::Stats) -> core::result::Result<std::os::fd::owned::OwnedFd, aya::sys::SyscallError>
+pub fn aya::sys::is_map_supported(map_type: aya::maps::MapType) -> core::result::Result<bool, aya::sys::SyscallError>
 pub fn aya::sys::is_program_supported(program_type: aya::programs::ProgramType) -> core::result::Result<bool, aya::programs::ProgramError>
 pub mod aya::util
 pub struct aya::util::KernelVersion