继上篇:踩坑记,如何使用ebpf-go,实战案例:拦截vfs_read函数获取文件名输出,本篇提供一个稍微复杂一点的案例供大家参考学习,这个案例涉及数据的存储、go和c数据的传递。
本案例来源于我们想实现一个功能然后做的技术调研:我们想监控虚拟机上所有挂载文件系统的目录的文件写操作,每个挂载的目录限制可使用50GB,超过这个大小就拒绝写入。
假如在某个虚拟机上挂载了两个目录:
- /user1
- /user2
这两个目录其实挂载的是同一个NFS文件系统,对应文件系统上的不同目录,整个文件系统的大小是1TB。但是我们想限制每个目录的存储上限为50GB,超过就不允许写入。
借助eBPF,我们可以给vfs_write写操作挂个hook逻辑,然后实现两部分功能:
- c程序hook vfs_write方法,并生成一个事件,这个事件记录了此次写文件的根目录, 然后go程序负责消费这个事件,获取根目录,然后在go程序中判断该目录是否写满,如果写满了,往ebpf map写入标志,标志这个目录禁止写。
- c程序获取go程序写入的标志,如果存在标志,则拒绝写操作。(调研结论:我们无法实现这一步,文章后面说明。)
c程序代码如下:
//go:build ignore
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
struct dir_cfg{
u32 disable;
};
// Force emitting struct dir_cfg into the ELF.
const struct dir_cfg *unused_dir_cfg __attribute__((unused));
struct bpf_map_def SEC("maps") dir_allow_write_map = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(u32),
.value_size = sizeof(struct dir_cfg),
.max_entries = 1024,
};
struct write_event {
char root_path[256];
u32 root_path_len;
};
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
__uint(max_entries, 1 << 24);
} write_event_ringbuf SEC(".maps");
// Force emitting struct write_event into the ELF.
const struct write_event *unused_write_event __attribute__((unused));
SEC("kprobe/vfs_write")
int kprobe_vfs_write(struct pt_regs *ctx) {
struct file *f = (struct file *)PT_REGS_PARM1(ctx);
struct dentry *dentry = BPF_CORE_READ(&f->f_path,dentry);
const u32 max = 20;
u32 cnt = 1;
char root_path[256];
int root_path_len = 0;
// 获取根目录
do {
int name_len = BPF_CORE_READ(dentry, d_name.len);
if(name_len<=0){
break;
}
const unsigned char *filename;
filename = BPF_CORE_READ(dentry,d_name.name);
if(name_len>1){ // != '/'
name_len = bpf_probe_read_kernel_str(root_path, sizeof(root_path), filename);
root_path_len = name_len;
}
struct dentry *temp_dentry = BPF_CORE_READ(dentry, d_parent);
if (temp_dentry == dentry || temp_dentry == NULL) {
break;
}
dentry = temp_dentry;
}while(cnt++ < max);
if(root_path_len<=0){
return 0;
}
// 计算hash值
u32 key = 0;
for(int i=0;i<sizeof(root_path);i++) {
char ch = root_path[i];
if(ch=='\0'){
break;
}
key = ch + key * 31;
}
// 读标志位
struct dir_cfg *value = bpf_map_lookup_elem(&dir_allow_write_map, &key);
if (value && value->disable == 1) {
bpf_printk("not allow write by root path = /%s\n", root_path);
return -1;
}
// 发送事件
struct write_event *event;
event = bpf_ringbuf_reserve(&write_event_ringbuf, sizeof(struct write_event), 0);
if (!event) {
return 0;
}
event->root_path_len = root_path_len;
for(int i=0;i<sizeof(root_path);i++) {
event->root_path[i] = root_path[i];
}
bpf_ringbuf_submit(event, 0);
return 0;
}
char __license[] SEC("license") = "Dual MIT/GPL";
- dir_allow_write_map:存储某个目录是否禁止写映射,key为字符串hash值,也就是目录的hash值,而不是目录字符串。value是一个结构体是因为我们除了设置是否禁止写标志,还想在value带一些配置。
- write_event_ringbuf: 一个事件环形队列,c程序写入事件,go程序消费事件。
go程序代码:
package main
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"github.com/cilium/ebpf/link"
"github.com/cilium/ebpf/ringbuf"
"github.com/cilium/ebpf/rlimit"
"golang.org/x/sys/unix"
"log"
"os"
"os/signal"
)
//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -type dir_cfg -type write_event -target amd64 vfstrace vfs-trace.c
func jdk8Hash(s string) uint32 {
var h uint32 = 0
for _, c := range s {
h = 31*h + uint32(c)
}
return h
}
func main() {
// Remove resource limits for kernels <5.11.
if err := rlimit.RemoveMemlock(); err != nil {
log.Fatal("Removing memlock:", err)
}
// Load the compiled eBPF ELF and load it into the kernel.
var objs vfstraceObjects
if err := loadVfstraceObjects(&objs, nil); err != nil {
log.Fatal("Loading eBPF objects:", err)
}
defer objs.Close()
// Open a Kprobe at the entry point of the kernel function and attach the
// pre-compiled program. Each time the kernel function enters, the program
// will increment the execution counter by 1. The read loop below polls this
// map value once per second.
kp, err := link.Kprobe("vfs_write", objs.KprobeVfsWrite, nil)
if err != nil {
log.Fatalf("opening kprobe: %s", err)
}
defer kp.Close()
// Open a ringbuf reader from userspace RINGBUF map described in the
// eBPF C program.
rd, err := ringbuf.NewReader(objs.WriteEventRingbuf)
if err != nil {
log.Fatalf("opening ringbuf reader: %s", err)
}
defer rd.Close()
// 异步消费事件
go func() {
var event vfstraceWriteEvent
for {
record, err := rd.Read()
if err != nil {
if errors.Is(err, ringbuf.ErrClosed) {
log.Println("Received signal, exiting..")
return
}
log.Printf("reading from reader: %s", err)
continue
}
// Parse the ringbuf event entry into a bpfEvent structure.
if err := binary.Read(bytes.NewBuffer(record.RawSample), binary.LittleEndian, &event); err != nil {
log.Printf("parsing ringbuf event: %s", err)
continue
}
rootPath := make([]byte, int(event.RootPathLen))
for i := 0; i < int(event.RootPathLen); i++ {
rootPath[i] = byte(event.RootPath[i])
}
rootPathStr := unix.ByteSliceToString(rootPath)
log.Printf("write file root path is: /%s", rootPathStr)
// ... 这里实现判断逻辑
key := jdk8Hash(rootPathStr)
value := &vfstraceDirCfg{
Disable: uint32(1),
}
if err := objs.DirAllowWriteMap.Put(key, value); err != nil {
fmt.Println(err.Error())
}
}
}()
// exit the program when interrupted.
stop := make(chan os.Signal, 5)
signal.Notify(stop, os.Interrupt)
for {
select {
case <-stop:
log.Print("Received signal, exiting..")
return
}
}
}
go.mod
module vfs-ebpf-trace
go 1.22.1
require github.com/cilium/ebpf v0.15.0
require (
golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 // indirect
golang.org/x/sys v0.15.0 // indirect
)
这个方案实际是行不通的,因为我们无法在eBPF程序中Blocking系统函数的执行。本篇分享的案例中,我们无论reutrn -1
还是0还是1,都无法阻止vfs_write的执行。但发这个案例出来,是可以用来参考做其它的,例如实现一些可观察需求,可以基于这个案例改。
关于案例中c结构体和go结构体的映射,可以看这篇:ebpf-go c结构体和go结构体的映射