use std::{io, fs, fmt, os, path, process}; use io::Read; use fmt::Display; use os::fd::{AsFd, AsRawFd}; use os::unix::{fs::PermissionsExt, process::CommandExt}; use path::{Path, PathBuf}; use nix::sched::{clone, CloneCb, CloneFlags, setns}; use nix::sys::{signal::{kill, Signal}, wait::{waitpid, WaitPidFlag}}; use nix::unistd::{dup2, pivot_root, setgid, setgroups, sethostname, setuid, Gid, Pid, Uid, User}; use nix::mount::{mount, MntFlags, MsFlags, umount2}; use uuid; use toml; use serde::{Deserialize, Serialize}; use clap::Parser; use error::{Result, RockerError}; mod error; static WORKSPACE: &str = "/root/rocker"; static USER_NAME: &str = "rocker"; static INFO_FILE: &str = "info.toml"; static LOCK_FILE: &str = ".lock"; static mut STACK: [u8; 1024*1024*1] = [0; 1024*1024*1]; static CLONE_FLAG: i32 = 0b1101100000000100000000000000000; // CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWPID | CLONE_NEWIPC | CLONE_NEWNET; #[derive(Parser, Debug)] #[command(version, about, long_about = None)] struct RockerArgs { // --wait/--log --run /bin/bash --image busybox #[arg(long)] run: Option, #[arg(long)] image: Option, #[arg(long)] // --volume "/tmp/test1:tmp/test1,/tmp/test2:tmp/test2" volume: Option, #[arg(long)] // --env "a=1,b=2,c=3" env: Option, // --run /bin/bash --exec container_id #[arg(long)] exec: Option, #[arg(long)] log: bool, #[arg(long)] wait: bool, // --logs container_id #[arg(long)] logs: Option, // --ps #[arg(long)] ps: bool, // --psa #[arg(long)] psa: bool, // rm "container_id_1, container_id_2, container_id_3" #[arg(long)] rm: Option, // stop "container_id_1, container_id_2, container_id_3" #[arg(long)] stop: Option } /// 从images解压到volumes fn extend_image(image_name: &String) -> Result { // 源文件 let image_path = Path::new(WORKSPACE).join("images").join(image_name); if image_path.exists() == false { return Err(RockerError::from(io::Error::new(io::ErrorKind::NotFound, "未找到镜像"))); } let image_path_str = image_path.to_str().unwrap(); // 安全的unwrap // volumes只读层 let volume_path = Path::new(WORKSPACE).join("volumes").join(image_name); if volume_path.exists() { return Ok(volume_path); } else { create_dir(&volume_path, true)?; } let volume_path_str = volume_path.to_str().unwrap(); // 安全的unwrap // 解压缩 let out = process::Command::new("tar") .arg("-xvf") .arg(image_path_str) .arg("-C") .arg(volume_path_str) .output()?; let std_out = String::from_utf8_lossy(&out.stdout); let std_err = String::from_utf8_lossy(&out.stderr); if std_err.len() == 0 { println!("解压缩完毕: {std_out:?}"); Ok(volume_path) } else { // 删除 volume_path std::fs::remove_dir_all(volume_path)?; Err(RockerError::from(io::Error::new(io::ErrorKind::Other, format!("解压缩镜像失败: {std_err}")))) } } fn init_container_lock>(container_work_path:P) -> Result { use nix::sys::stat::Mode; use nix::fcntl::{OFlag, open}; let lock_path = container_work_path.as_ref().join(LOCK_FILE); let lock_path_str = lock_path.as_os_str(); let oflag = OFlag::O_RDWR | OFlag::O_CREAT; let mode = Mode::empty(); println!("{lock_path_str:?}"); Ok(open(lock_path_str, oflag, mode)?) } fn init_container_overlay>(volume_path: P, upper_path: P, merged_path: P) -> Result<()> { let lower_dir = volume_path.as_ref().to_string_lossy().to_string(); let upper_dir = upper_path.as_ref().to_string_lossy().to_string(); let merged_dir = merged_path.as_ref().to_string_lossy().to_string(); let dirs = format!("lowerdir={lower_dir},upperdir={upper_dir},workdir={merged_dir}"); println!("dirs: {dirs:?}"); let out = process::Command::new("mount") .arg("-t") .arg("overlay") .arg("overlay") .arg("-o") .arg(dirs) .arg(merged_dir) .output()?; // let std_out = String::from_utf8_lossy(&out.stdout); let std_err = String::from_utf8_lossy(&out.stderr); if std_err.len() == 0 { println!("容器文件系统创建完成"); } else { return Err(RockerError::from(io::Error::new(io::ErrorKind::Other, format!("容器文件系统创建失败: {std_err:?}")))); } Ok(()) } fn init_container_custom_volume>(container_merged_path: P, custom_volume_s: &String) -> Result<()> { for custom_volume in custom_volume_s.split(",") { let custom_volume_v = custom_volume.split(":").collect::>(); if custom_volume_v.len() < 2 { return Err(RockerError::OtherError(format!("volume 参数格式不正确: {custom_volume}"))); } let host_path = custom_volume_v[0]; let container_path_buf = { if custom_volume_v[1].starts_with("/") { container_merged_path.as_ref().join(&custom_volume_v[1][1..]) } else { container_merged_path.as_ref().join(&custom_volume_v[1]) } }; let container_path = container_path_buf.to_string_lossy().to_string(); // 创建宿主机和容器内的目录 create_dir(Path::new(host_path), true)?; create_dir(&container_path, true)?; // 绑定 let out = process::Command::new("mount") .arg("-o") .arg("bind") .arg(host_path) .arg(container_path) .output()?; // let std_out = String::from_utf8_lossy(&out.stdout); let std_err = String::from_utf8_lossy(&out.stderr); if std_err.len() == 0 { println!("创建自定义 volume: {custom_volume:?}"); } else { return Err(RockerError::OtherError(format!("创建volume失败: {std_err}"))) } } Ok(()) } fn init_container_env(env: Option<&String>) -> Result<()>{ for (k, _) in std::env::vars(){ std::env::remove_var(k); } if let Some(env) = env { let env_vec = if env.starts_with("./") || env.starts_with("/") { // 读取出路径指定的文件作为env let env_path = Path::new(env); let mut env_file = fs::File::open(env_path)?; let text = { let mut s = String::new(); env_file.read_to_string(&mut s)?; s }; text.lines().map(String::from).collect::>() } else { env.split(",").map(String::from).collect::>() }; for item_env in env_vec.iter() { let item_env_v = item_env.split("=").collect::>(); if item_env_v.len() == 2 { std::env::set_var(item_env_v[0], item_env_v[1]) } else { println!("env 格式不正确: {item_env}") } } } Ok(()) } fn init_container_pivot>(merged_path: P) -> Result<()> { // 在我们没有设置 chroot之前, 需要先把所有挂载点的传播类型改为 private, 避免进程中的系统调用污染全局 mount(None::<&str>, "/", None::<&str>, MsFlags::MS_PRIVATE | MsFlags::MS_REC, None::<&str>)?; // 修改overlayfs 为rootfs std::env::set_current_dir(merged_path)?; let pwd_path = std::env::current_dir()?; let pwd_str = pwd_path.to_string_lossy().to_string(); // 挂载bind mount(Some(pwd_str.as_str()), pwd_str.as_str(), Some("bind"), MsFlags::MS_BIND | MsFlags::MS_REC, Some(""))?; // 创建 rootfs/.pivot_root 目录用于存储 old_root let pivot_root_dir = format!("{pwd_str}/.pivot_root"); // 将系统rootfs切换到新的rootfs, 并设置权限 create_dir(&pivot_root_dir, true)?; pivot_root(pwd_str.as_str(), pivot_root_dir.as_str())?; // 修改当前进程工作目录(注意我们之前已经到rootfs内, 并且把根目录设置完毕了) std::env::set_current_dir("/")?; // 卸载 old_root, 并删除临时文件 umount2(".pivot_root", MntFlags::MNT_DETACH).unwrap(); std::fs::remove_dir(".pivot_root").unwrap(); Ok(()) } fn init_container_mount() -> Result<()> { // 挂载proc let mount_flags = MsFlags::MS_NODEV | MsFlags::MS_NOEXEC | MsFlags::MS_NOSUID; mount(Some("proc"), "/proc", Some("proc"), mount_flags, Some(""))?; // 挂载dev mount(Some("tmpfs"), "/dev", Some("tmpfs"), mount_flags, Some("mode=755"))?; Ok(()) } fn init_container_log(log: bool) -> Result<()> { let log_path = Path::new("logs"); create_dir(log_path, true)?; let log_fd = fs::File::create(log_path.join("log"))?; if log { unsafe { let log_fd_raw = log_fd.as_raw_fd(); dup2(log_fd_raw, 1)?; dup2(log_fd_raw, 2)?; } } Ok(()) } fn init_container_user(uid: Uid, gid: Gid) -> Result<()>{ setgid(gid)?; setgroups(&[gid])?; setuid(uid)?; Ok(()) } fn create_dir>(path: P, is_any:bool) -> Result<()> { fs::create_dir_all(&path)?; if is_any { fs::set_permissions(&path, PermissionsExt::from_mode(0o777))?; } Ok(()) } fn check_container_is_running(pid: &Pid, main_exe: &Path) -> Result { // 检查pid对应的exe是否和外部传过来的相同 let child_exe_s= format!("/proc/{pid}/exe"); let child_exe_path = Path::new(child_exe_s.as_str()); let target_child_exe_path = fs::read_link(child_exe_path)?; if target_child_exe_path != main_exe { return Ok(true); } Ok(false) } fn init_exec_ns(pid: i32) -> Result<()>{ // 把当前进程加入到指定pid的namespace for ns_name in vec!["ipc", "uts", "net", "pid", "mnt"] { let ns_path = format!("/proc/{pid}/ns/{ns_name}"); let ns_fild = fs::File::open(ns_path)?; setns(ns_fild.as_fd(), CloneFlags::from_bits_retain(0))? } Ok(()) } fn start(is_wait: bool, cb: CloneCb, clong_flags: CloneFlags) -> Result{ let main_exe = std::env::current_exe()?; unsafe { match clone(cb, STACK.as_mut_slice(), clong_flags, None) { Ok(child_pid) => { println!("clone ok: {child_pid:?}"); // check_container_is_running let mut cnt = 0; while let Ok(running) = check_container_is_running(&child_pid, &main_exe) { cnt += 1; std::thread::sleep(std::time::Duration::from_millis(10)); if running { break; } if cnt > 1000 { println!("{child_pid} 启动 超时"); break; } } // wait if is_wait { match waitpid(child_pid, Some(WaitPidFlag::WUNTRACED)) { Ok(status) => { println!("{child_pid:?} exit: {status:?}"); } Err(e) => { println!("{child_pid} wait err: {e}"); } } } Ok(child_pid.as_raw()) } Err(e) => { Err(RockerError::OtherError(format!("clone err: {e}"))) } } } } fn run_container(_container_id: &String, cmd: &String, args: &RockerArgs, volume_path: &PathBuf, is_exec: bool) -> Result { // 禁止同时wait和log if args.wait && args.log { return Err(RockerError::OtherError("--wait/--log 禁止同时使用".to_string())); } let clone_flags; let rocker_user_info = User::from_name(USER_NAME)?.ok_or(RockerError::OtherError(format!("没找到 用户: {USER_NAME}")))?; let rocker_uid = rocker_user_info.uid; let rocker_gid = rocker_user_info.gid; let _cb = if is_exec { let _cb = move || { let container_info = get_container_info(_container_id).unwrap(); init_exec_ns(container_info.pid).unwrap(); init_container_env(None).unwrap(); init_container_user(rocker_uid, rocker_gid).unwrap(); let cmd_vec = cmd.split(" ").collect::>(); let err = process::Command::new(cmd_vec[0]) .args(&cmd_vec[1..]) .exec(); println!("execv {cmd_vec:?}失败: {err:?}"); 0isize }; clone_flags = CloneFlags::empty(); Box::new(_cb) as CloneCb } else { // 初始化容器工作目录 let container_work_path = Path::new(WORKSPACE).join("containers").join(&_container_id); let container_upper_path = container_work_path.join("upper"); let container_merged_path = container_work_path.join("merged"); create_dir(&container_work_path, true)?; create_dir(&container_upper_path, true)?; create_dir(&container_merged_path, true)?; let _cb = move || { init_container_lock(&container_work_path).unwrap(); init_container_overlay(volume_path, &container_upper_path, &container_merged_path).unwrap(); if let Some(custom_volume) = &args.volume { init_container_custom_volume(&container_merged_path, custom_volume).unwrap(); } sethostname(USER_NAME).unwrap(); init_container_env(args.env.as_ref()).unwrap(); init_container_pivot(&container_merged_path).unwrap(); init_container_mount().unwrap(); init_container_log(args.log).unwrap(); init_container_user(rocker_uid, rocker_gid).unwrap(); let cmd_vec = cmd.split(" ").collect::>(); let err = process::Command::new(cmd_vec[0]) .args(&cmd_vec[1..]) .exec(); println!("execv {cmd_vec:?}失败: {err:?}"); 0isize }; clone_flags = CloneFlags::from_bits_truncate(CLONE_FLAG); Box::new(_cb) as CloneCb }; start(args.wait, _cb, clone_flags) } #[derive(Deserialize, Serialize, Debug, PartialEq)] enum ContainerStatus { READY, RUNNING, STOP, } impl Display for ContainerStatus { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::READY => write!(f, "😀"), Self::RUNNING => write!(f, "✅"), Self::STOP => write!(f, "❌"), } } } #[derive(Deserialize, Serialize, Debug)] struct ContainerInfo { id: String, pid: i32, run: String, // /bin/bash image: String, // busybox volume: String, // /root/tmp:/root/tmp,/root/tmp1:/root/tmp1 env: String, // a=1,b=2,c=3 或者 env文件路径 status: ContainerStatus, } impl Display for ContainerInfo { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let volume: String = self.volume.chars().take(20).collect(); let env: String = self.env.chars().take(20).collect(); write!(f, "\x1b[4m{:<10} {:<8} {:<10} {:<20} {:<20} {:<20} {:<10}\x1b[24m", self.id, self.pid, self.image, self.run, volume, env, &self.status) } } fn save_container_info(args: &RockerArgs, container_id: &String, pid: i32) -> Result<()> { let container_info_path = Path::new(WORKSPACE).join("containers").join(container_id).join(INFO_FILE); let container_info = ContainerInfo { id: container_id.clone(), pid: pid, run: args.run.as_ref().unwrap().clone(), image: args.image.as_ref().unwrap().clone(), volume: args.volume.clone().unwrap_or("".to_string()), env: args.env.clone().unwrap_or("".to_string()), status: ContainerStatus::READY, }; let toml_str = toml::to_string(&container_info)?; fs::write(container_info_path, toml_str)?; Ok(()) } fn get_container_info(container_id: &str) -> Result { let container_work_path = Path::new(WORKSPACE).join("containers").join(container_id); let container_info_path = container_work_path.join(INFO_FILE); let lock_path = container_work_path.join(LOCK_FILE); let info_str = fs::read_to_string(container_info_path)?; let mut container_info: ContainerInfo = toml::from_str(&info_str)?; // 判断是否正在运行, 首先得到该容器进程对应的所有的fd let proc_fd_path = Path::new("/proc").join(container_info.pid.to_string()).join("fd"); let is_running = if let Ok(fd_dir) = fs::read_dir(proc_fd_path) { fd_dir.filter_map(|p|p.ok()) .filter_map(|f| fs::read_link(f.path()).ok()) .any(|p|p == lock_path) } else { false }; if is_running { container_info.status = ContainerStatus::RUNNING; } else { container_info.status = ContainerStatus::STOP; } Ok(container_info) } fn get_all_container_info() -> Result> { let containers_path = Path::new(WORKSPACE).join("containers"); let all_containers_info = fs::read_dir(containers_path)? .map(|res| res.map(|e| e.file_name())) .filter_map(|p| p.ok()) .map(|f|f.to_string_lossy().to_string()) .filter_map(|s|get_container_info(s.as_str()).ok()) .collect::>(); Ok(all_containers_info) } /// 读取所有容器的状态 fn show_containers(is_show_all: bool) -> Result<()> { println!("{:<10} {:<8} {:<10} {:<20} {:<20} {:<20} {:<10}", "id", "pid", "image", "run", "volume", "env", "status"); for container_info in get_all_container_info()? { if is_show_all{ println!("{container_info}"); } else if container_info.status == ContainerStatus::RUNNING { println!("{container_info}"); } } Ok(()) } fn stop_container(containers_id: &str, is_remove: bool) -> Result<()> { if containers_id == "all" { for container_info in get_all_container_info()?{ stop_container(container_info.id.as_str(), is_remove)? } return Ok(()) } for container_id in containers_id.split(" ") { if let Ok(container_info) = get_container_info(container_id) { let container_work_path = Path::new(WORKSPACE).join("containers").join(container_id); let container_merged_path = container_work_path.join("merged"); println!("container_merged_path: {container_merged_path:?}"); // 正在运行中的需要 kill if container_info.status == ContainerStatus::RUNNING { let _ = kill(Pid::from_raw(container_info.pid), Signal::SIGTERM); let pid_path = Path::new("/proc").join(container_info.pid.to_string()); while pid_path.exists() { std::thread::sleep(std::time::Duration::from_millis(10)); } } // 卸载自定义挂载点 if container_info.volume != "" { container_info.volume .split(",") .filter_map(|v| v.split(":").last()) .map(|v| { if v.starts_with("/") { container_merged_path.join(&v[1..]).to_string_lossy().to_string() } else { container_merged_path.join(v).to_string_lossy().to_string() } }) .for_each(|s| { match umount2(s.as_str(), MntFlags::MNT_DETACH) { Ok(_) => println!("卸载自定卷{s}"), Err(e) => println!("卸载卷{s}失败: {e:?}"), } }); } // 卸载overlayfs match umount2(container_merged_path.to_str().unwrap(), MntFlags::MNT_DETACH) { Ok(_) => println!("卸载overlayfs卷"), Err(e) => println!("卸载overlayfs失败: {e:?}"), } println!("停止容器: {container_id:?}"); // 删除容器目录 if is_remove { match fs::remove_dir_all(container_work_path) { Ok(_) => println!("删除容器 {container_id} 成功"), Err(e) => println!("删除容器失败: {e:?}"), } } } else { println!("容器不存在: {container_id}") } } Ok(()) } fn main() -> Result<()>{ let args = RockerArgs::parse(); if let (Some(cmd), Some(image_name)) = (&args.run, &args.image) { // run let volume_path = extend_image(image_name)?; let container_id = uuid::Uuid::new_v4().to_string()[0..8].to_string(); let mut pid = -1; match run_container(&container_id,&cmd, &args, &volume_path, false) { Ok(child_pid) => { pid = child_pid; } Err(e) => { println!("run_container失败: {e}"); } } save_container_info(&args, &container_id, pid)?; // todo 无论出不错, 都要保存一个信息, 后面需要删除用清理 } else if args.ps || args.psa { // --ps show_containers(args.psa)? } else if let Some(containers_id) = &args.rm { // --rm stop_container(containers_id, true)?; } else if let Some(containers_id) = &args.stop { // --stop stop_container(containers_id, false)?; } else if let (Some(cmd), Some(container_id)) = (&args.run, &args.exec) { run_container(container_id, &cmd, &args, &Default::default(), true).unwrap(); } Ok(()) }