You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

610 lines
21 KiB
Rust

use std::{io, fs, fmt, os, path, process};
use io::Read;
use fmt::Display;
use os::fd::{AsFd, AsRawFd};
use os::unix::{fs::PermissionsExt, process::CommandExt};
use path::{Path, PathBuf};
use nix::sched::{clone, CloneCb, CloneFlags, setns};
use nix::sys::{signal::{kill, Signal}, wait::{waitpid, WaitPidFlag}};
use nix::unistd::{dup2, pivot_root, setgid, setgroups, sethostname, setuid, Gid, Pid, Uid, User};
use nix::mount::{mount, MntFlags, MsFlags, umount2};
use uuid;
use toml;
use serde::{Deserialize, Serialize};
use clap::Parser;
use error::{Result, RockerError};
mod error;
static WORKSPACE: &str = "/root/rocker";
static USER_NAME: &str = "rocker";
static INFO_FILE: &str = "info.toml";
static LOCK_FILE: &str = ".lock";
static mut STACK: [u8; 1024*1024*1] = [0; 1024*1024*1];
static CLONE_FLAG: i32 = 0b1101100000000100000000000000000; // CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWPID | CLONE_NEWIPC | CLONE_NEWNET;
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct RockerArgs {
// --wait/--log --run /bin/bash --image busybox
#[arg(long)]
run: Option<String>,
#[arg(long)]
image: Option<String>,
#[arg(long)]
// --volume "/tmp/test1:tmp/test1,/tmp/test2:tmp/test2"
volume: Option<String>,
#[arg(long)]
// --env "a=1,b=2,c=3"
env: Option<String>,
// --run /bin/bash --exec container_id
#[arg(long)]
exec: Option<String>,
#[arg(long)]
log: bool,
#[arg(long)]
wait: bool,
// --logs container_id
#[arg(long)]
logs: Option<String>,
// --ps
#[arg(long)]
ps: bool,
// --psa
#[arg(long)]
psa: bool,
// rm "container_id_1, container_id_2, container_id_3"
#[arg(long)]
rm: Option<String>,
// stop "container_id_1, container_id_2, container_id_3"
#[arg(long)]
stop: Option<String>
}
/// 从images解压到volumes
fn extend_image(image_name: &String) -> Result<PathBuf> {
// 源文件
let image_path = Path::new(WORKSPACE).join("images").join(image_name);
if image_path.exists() == false {
return Err(RockerError::from(io::Error::new(io::ErrorKind::NotFound, "未找到镜像")));
}
let image_path_str = image_path.to_str().unwrap(); // 安全的unwrap
// volumes只读层
let volume_path = Path::new(WORKSPACE).join("volumes").join(image_name);
if volume_path.exists() {
return Ok(volume_path);
} else {
create_dir(&volume_path, true)?;
}
let volume_path_str = volume_path.to_str().unwrap(); // 安全的unwrap
// 解压缩
let out = process::Command::new("tar")
.arg("-xvf")
.arg(image_path_str)
.arg("-C")
.arg(volume_path_str)
.output()?;
let std_out = String::from_utf8_lossy(&out.stdout);
let std_err = String::from_utf8_lossy(&out.stderr);
if std_err.len() == 0 {
println!("解压缩完毕: {std_out:?}");
Ok(volume_path)
} else {
// 删除 volume_path
std::fs::remove_dir_all(volume_path)?;
Err(RockerError::from(io::Error::new(io::ErrorKind::Other, format!("解压缩镜像失败: {std_err}"))))
}
}
fn init_container_lock<P: AsRef<Path>>(container_work_path:P) -> Result<i32> {
use nix::sys::stat::Mode;
use nix::fcntl::{OFlag, open};
let lock_path = container_work_path.as_ref().join(LOCK_FILE);
let lock_path_str = lock_path.as_os_str();
let oflag = OFlag::O_RDWR | OFlag::O_CREAT;
let mode = Mode::empty();
println!("{lock_path_str:?}");
Ok(open(lock_path_str, oflag, mode)?)
}
fn init_container_overlay<P: AsRef<Path>>(volume_path: P, upper_path: P, merged_path: P) -> Result<()> {
let lower_dir = volume_path.as_ref().to_string_lossy().to_string();
let upper_dir = upper_path.as_ref().to_string_lossy().to_string();
let merged_dir = merged_path.as_ref().to_string_lossy().to_string();
let dirs = format!("lowerdir={lower_dir},upperdir={upper_dir},workdir={merged_dir}");
println!("dirs: {dirs:?}");
let out = process::Command::new("mount")
.arg("-t")
.arg("overlay")
.arg("overlay")
.arg("-o")
.arg(dirs)
.arg(merged_dir)
.output()?;
// let std_out = String::from_utf8_lossy(&out.stdout);
let std_err = String::from_utf8_lossy(&out.stderr);
if std_err.len() == 0 {
println!("容器文件系统创建完成");
} else {
return Err(RockerError::from(io::Error::new(io::ErrorKind::Other, format!("容器文件系统创建失败: {std_err:?}"))));
}
Ok(())
}
fn init_container_custom_volume<P: AsRef<Path>>(container_merged_path: P, custom_volume_s: &String) -> Result<()> {
for custom_volume in custom_volume_s.split(",") {
let custom_volume_v = custom_volume.split(":").collect::<Vec<&str>>();
if custom_volume_v.len() < 2 {
return Err(RockerError::OtherError(format!("volume 参数格式不正确: {custom_volume}")));
}
let host_path = custom_volume_v[0];
let container_path_buf = {
if custom_volume_v[1].starts_with("/") {
container_merged_path.as_ref().join(&custom_volume_v[1][1..])
} else {
container_merged_path.as_ref().join(&custom_volume_v[1])
}
};
let container_path = container_path_buf.to_string_lossy().to_string();
// 创建宿主机和容器内的目录
create_dir(Path::new(host_path), true)?;
create_dir(&container_path, true)?;
// 绑定
let out = process::Command::new("mount")
.arg("-o")
.arg("bind")
.arg(host_path)
.arg(container_path)
.output()?;
// let std_out = String::from_utf8_lossy(&out.stdout);
let std_err = String::from_utf8_lossy(&out.stderr);
if std_err.len() == 0 {
println!("创建自定义 volume: {custom_volume:?}");
} else {
return Err(RockerError::OtherError(format!("创建volume失败: {std_err}")))
}
}
Ok(())
}
fn init_container_env(env: Option<&String>) -> Result<()>{
for (k, _) in std::env::vars(){
std::env::remove_var(k);
}
if let Some(env) = env {
let env_vec = if env.starts_with("./") || env.starts_with("/") {
// 读取出路径指定的文件作为env
let env_path = Path::new(env);
let mut env_file = fs::File::open(env_path)?;
let text = {
let mut s = String::new();
env_file.read_to_string(&mut s)?;
s
};
text.lines().map(String::from).collect::<Vec<String>>()
} else {
env.split(",").map(String::from).collect::<Vec<String>>()
};
for item_env in env_vec.iter() {
let item_env_v = item_env.split("=").collect::<Vec<&str>>();
if item_env_v.len() == 2 {
std::env::set_var(item_env_v[0], item_env_v[1])
} else {
println!("env 格式不正确: {item_env}")
}
}
}
Ok(())
}
fn init_container_pivot<P: AsRef<Path>>(merged_path: P) -> Result<()> {
// 在我们没有设置 chroot之前, 需要先把所有挂载点的传播类型改为 private, 避免进程中的系统调用污染全局
mount(None::<&str>, "/", None::<&str>, MsFlags::MS_PRIVATE | MsFlags::MS_REC, None::<&str>)?;
// 修改overlayfs 为rootfs
std::env::set_current_dir(merged_path)?;
let pwd_path = std::env::current_dir()?;
let pwd_str = pwd_path.to_string_lossy().to_string();
// 挂载bind
mount(Some(pwd_str.as_str()), pwd_str.as_str(), Some("bind"), MsFlags::MS_BIND | MsFlags::MS_REC, Some(""))?;
// 创建 rootfs/.pivot_root 目录用于存储 old_root
let pivot_root_dir = format!("{pwd_str}/.pivot_root");
// 将系统rootfs切换到新的rootfs, 并设置权限
create_dir(&pivot_root_dir, true)?;
pivot_root(pwd_str.as_str(), pivot_root_dir.as_str())?;
// 修改当前进程工作目录(注意我们之前已经到rootfs内, 并且把根目录设置完毕了)
std::env::set_current_dir("/")?;
// 卸载 old_root, 并删除临时文件
umount2(".pivot_root", MntFlags::MNT_DETACH).unwrap();
std::fs::remove_dir(".pivot_root").unwrap();
Ok(())
}
fn init_container_mount() -> Result<()> {
// 挂载proc
let mount_flags = MsFlags::MS_NODEV | MsFlags::MS_NOEXEC | MsFlags::MS_NOSUID;
mount(Some("proc"), "/proc", Some("proc"), mount_flags, Some(""))?;
// 挂载dev
mount(Some("tmpfs"), "/dev", Some("tmpfs"), mount_flags, Some("mode=755"))?;
Ok(())
}
fn init_container_log(log: bool) -> Result<()> {
let log_path = Path::new("logs");
create_dir(log_path, true)?;
let log_fd = fs::File::create(log_path.join("log"))?;
if log {
unsafe {
let log_fd_raw = log_fd.as_raw_fd();
dup2(log_fd_raw, 1)?;
dup2(log_fd_raw, 2)?;
}
}
Ok(())
}
fn init_container_user(uid: Uid, gid: Gid) -> Result<()>{
setgid(gid)?;
setgroups(&[gid])?;
setuid(uid)?;
Ok(())
}
fn create_dir<P: AsRef<Path>>(path: P, is_any:bool) -> Result<()> {
fs::create_dir_all(&path)?;
if is_any {
fs::set_permissions(&path, PermissionsExt::from_mode(0o777))?;
}
Ok(())
}
fn check_container_is_running(pid: &Pid, main_exe: &Path) -> Result<bool> {
// 检查pid对应的exe是否和外部传过来的相同
let child_exe_s= format!("/proc/{pid}/exe");
let child_exe_path = Path::new(child_exe_s.as_str());
let target_child_exe_path = fs::read_link(child_exe_path)?;
if target_child_exe_path != main_exe {
return Ok(true);
}
Ok(false)
}
fn init_exec_ns(pid: i32) -> Result<()>{
// 把当前进程加入到指定pid的namespace
for ns_name in vec!["ipc", "uts", "net", "pid", "mnt"] {
let ns_path = format!("/proc/{pid}/ns/{ns_name}");
let ns_fild = fs::File::open(ns_path)?;
setns(ns_fild.as_fd(), CloneFlags::from_bits_retain(0))? }
Ok(())
}
fn start(is_wait: bool, cb: CloneCb, clong_flags: CloneFlags) -> Result<i32>{
let main_exe = std::env::current_exe()?;
unsafe {
match clone(cb, STACK.as_mut_slice(), clong_flags, None) {
Ok(child_pid) => {
println!("clone ok: {child_pid:?}");
// check_container_is_running
let mut cnt = 0;
while let Ok(running) = check_container_is_running(&child_pid, &main_exe) {
cnt += 1;
std::thread::sleep(std::time::Duration::from_millis(10));
if running {
break;
}
if cnt > 1000 {
println!("{child_pid} 启动 超时");
break;
}
}
// wait
if is_wait {
match waitpid(child_pid, Some(WaitPidFlag::WUNTRACED)) {
Ok(status) => {
println!("{child_pid:?} exit: {status:?}");
}
Err(e) => {
println!("{child_pid} wait err: {e}");
}
}
}
Ok(child_pid.as_raw())
}
Err(e) => {
Err(RockerError::OtherError(format!("clone err: {e}")))
}
}
}
}
fn run_container(_container_id: &String, cmd: &String, args: &RockerArgs, volume_path: &PathBuf, is_exec: bool) -> Result<i32> {
// 禁止同时wait和log
if args.wait && args.log {
return Err(RockerError::OtherError("--wait/--log 禁止同时使用".to_string()));
}
let clone_flags;
let rocker_user_info = User::from_name(USER_NAME)?.ok_or(RockerError::OtherError(format!("没找到 用户: {USER_NAME}")))?;
let rocker_uid = rocker_user_info.uid;
let rocker_gid = rocker_user_info.gid;
let _cb = if is_exec {
let _cb = move || {
let container_info = get_container_info(_container_id).unwrap();
init_exec_ns(container_info.pid).unwrap();
init_container_env(None).unwrap();
init_container_user(rocker_uid, rocker_gid).unwrap();
let cmd_vec = cmd.split(" ").collect::<Vec<&str>>();
let err = process::Command::new(cmd_vec[0])
.args(&cmd_vec[1..])
.exec();
println!("execv {cmd_vec:?}失败: {err:?}");
0isize
};
clone_flags = CloneFlags::empty();
Box::new(_cb) as CloneCb
} else {
// 初始化容器工作目录
let container_work_path = Path::new(WORKSPACE).join("containers").join(&_container_id);
let container_upper_path = container_work_path.join("upper");
let container_merged_path = container_work_path.join("merged");
create_dir(&container_work_path, true)?;
create_dir(&container_upper_path, true)?;
create_dir(&container_merged_path, true)?;
let _cb = move || {
init_container_lock(&container_work_path).unwrap();
init_container_overlay(volume_path, &container_upper_path, &container_merged_path).unwrap();
if let Some(custom_volume) = &args.volume {
init_container_custom_volume(&container_merged_path, custom_volume).unwrap();
}
sethostname(USER_NAME).unwrap();
init_container_env(args.env.as_ref()).unwrap();
init_container_pivot(&container_merged_path).unwrap();
init_container_mount().unwrap();
init_container_log(args.log).unwrap();
init_container_user(rocker_uid, rocker_gid).unwrap();
let cmd_vec = cmd.split(" ").collect::<Vec<&str>>();
let err = process::Command::new(cmd_vec[0])
.args(&cmd_vec[1..])
.exec();
println!("execv {cmd_vec:?}失败: {err:?}");
0isize
};
clone_flags = CloneFlags::from_bits_truncate(CLONE_FLAG);
Box::new(_cb) as CloneCb
};
start(args.wait, _cb, clone_flags)
}
#[derive(Deserialize, Serialize, Debug, PartialEq)]
enum ContainerStatus {
READY,
RUNNING,
STOP,
}
impl Display for ContainerStatus {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::READY => write!(f, "😀"),
Self::RUNNING => write!(f, "✅"),
Self::STOP => write!(f, "❌"),
}
}
}
#[derive(Deserialize, Serialize, Debug)]
struct ContainerInfo {
id: String,
pid: i32,
run: String, // /bin/bash
image: String, // busybox
volume: String, // /root/tmp:/root/tmp,/root/tmp1:/root/tmp1
env: String, // a=1,b=2,c=3 或者 env文件路径
status: ContainerStatus,
}
impl Display for ContainerInfo {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let volume: String = self.volume.chars().take(20).collect();
let env: String = self.env.chars().take(20).collect();
write!(f, "\x1b[4m{:<10} {:<8} {:<10} {:<20} {:<20} {:<20} {:<10}\x1b[24m", self.id, self.pid, self.image, self.run, volume, env, &self.status)
}
}
fn save_container_info(args: &RockerArgs, container_id: &String, pid: i32) -> Result<()> {
let container_info_path = Path::new(WORKSPACE).join("containers").join(container_id).join(INFO_FILE);
let container_info = ContainerInfo {
id: container_id.clone(),
pid: pid,
run: args.run.as_ref().unwrap().clone(),
image: args.image.as_ref().unwrap().clone(),
volume: args.volume.clone().unwrap_or("".to_string()),
env: args.env.clone().unwrap_or("".to_string()),
status: ContainerStatus::READY,
};
let toml_str = toml::to_string(&container_info)?;
fs::write(container_info_path, toml_str)?;
Ok(())
}
fn get_container_info(container_id: &str) -> Result<ContainerInfo> {
let container_work_path = Path::new(WORKSPACE).join("containers").join(container_id);
let container_info_path = container_work_path.join(INFO_FILE);
let lock_path = container_work_path.join(LOCK_FILE);
let info_str = fs::read_to_string(container_info_path)?;
let mut container_info: ContainerInfo = toml::from_str(&info_str)?;
// 判断是否正在运行, 首先得到该容器进程对应的所有的fd
let proc_fd_path = Path::new("/proc").join(container_info.pid.to_string()).join("fd");
let is_running = if let Ok(fd_dir) = fs::read_dir(proc_fd_path) {
fd_dir.filter_map(|p|p.ok())
.filter_map(|f| fs::read_link(f.path()).ok())
.any(|p|p == lock_path)
} else {
false
};
if is_running {
container_info.status = ContainerStatus::RUNNING;
} else {
container_info.status = ContainerStatus::STOP;
}
Ok(container_info)
}
fn get_all_container_info() -> Result<Vec<ContainerInfo>> {
let containers_path = Path::new(WORKSPACE).join("containers");
let all_containers_info = fs::read_dir(containers_path)?
.map(|res| res.map(|e| e.file_name()))
.filter_map(|p| p.ok())
.map(|f|f.to_string_lossy().to_string())
.filter_map(|s|get_container_info(s.as_str()).ok())
.collect::<Vec<ContainerInfo>>();
Ok(all_containers_info)
}
/// 读取所有容器的状态
fn show_containers(is_show_all: bool) -> Result<()> {
println!("{:<10} {:<8} {:<10} {:<20} {:<20} {:<20} {:<10}", "id", "pid", "image", "run", "volume", "env", "status");
for container_info in get_all_container_info()? {
if is_show_all{
println!("{container_info}");
} else if container_info.status == ContainerStatus::RUNNING {
println!("{container_info}");
}
}
Ok(())
}
fn stop_container(containers_id: &str, is_remove: bool) -> Result<()> {
if containers_id == "all" {
for container_info in get_all_container_info()?{
stop_container(container_info.id.as_str(), is_remove)?
}
return Ok(())
}
for container_id in containers_id.split(" ") {
if let Ok(container_info) = get_container_info(container_id) {
let container_work_path = Path::new(WORKSPACE).join("containers").join(container_id);
let container_merged_path = container_work_path.join("merged");
println!("container_merged_path: {container_merged_path:?}");
// 正在运行中的需要 kill
if container_info.status == ContainerStatus::RUNNING {
let _ = kill(Pid::from_raw(container_info.pid), Signal::SIGTERM);
let pid_path = Path::new("/proc").join(container_info.pid.to_string());
while pid_path.exists() {
std::thread::sleep(std::time::Duration::from_millis(10));
}
}
// 卸载自定义挂载点
if container_info.volume != "" {
container_info.volume
.split(",")
.filter_map(|v| v.split(":").last())
.map(|v| {
if v.starts_with("/") {
container_merged_path.join(&v[1..]).to_string_lossy().to_string()
} else {
container_merged_path.join(v).to_string_lossy().to_string()
}
})
.for_each(|s| {
match umount2(s.as_str(), MntFlags::MNT_DETACH) {
Ok(_) => println!("卸载自定卷{s}"),
Err(e) => println!("卸载卷{s}失败: {e:?}"),
}
});
}
// 卸载overlayfs
match umount2(container_merged_path.to_str().unwrap(), MntFlags::MNT_DETACH) {
Ok(_) => println!("卸载overlayfs卷"),
Err(e) => println!("卸载overlayfs失败: {e:?}"),
}
println!("停止容器: {container_id:?}");
// 删除容器目录
if is_remove {
match fs::remove_dir_all(container_work_path) {
Ok(_) => println!("删除容器 {container_id} 成功"),
Err(e) => println!("删除容器失败: {e:?}"),
}
}
} else {
println!("容器不存在: {container_id}")
}
}
Ok(())
}
fn main() -> Result<()>{
let args = RockerArgs::parse();
if let (Some(cmd), Some(image_name)) = (&args.run, &args.image) {
// run
let volume_path = extend_image(image_name)?;
let container_id = uuid::Uuid::new_v4().to_string()[0..8].to_string();
let mut pid = -1;
match run_container(&container_id,&cmd, &args, &volume_path, false) {
Ok(child_pid) => {
pid = child_pid;
}
Err(e) => {
println!("run_container失败: {e}");
}
}
save_container_info(&args, &container_id, pid)?; // todo 无论出不错, 都要保存一个信息, 后面需要删除用清理
} else if args.ps || args.psa {
// --ps
show_containers(args.psa)?
} else if let Some(containers_id) = &args.rm {
// --rm
stop_container(containers_id, true)?;
} else if let Some(containers_id) = &args.stop {
// --stop
stop_container(containers_id, false)?;
} else if let (Some(cmd), Some(container_id)) = (&args.run, &args.exec) {
run_container(container_id, &cmd, &args, &Default::default(), true).unwrap();
}
Ok(())
}