diff --git a/fact-ebpf/build.rs b/fact-ebpf/build.rs index b9c35887..48a32564 100644 --- a/fact-ebpf/build.rs +++ b/fact-ebpf/build.rs @@ -6,7 +6,23 @@ use std::{ }; fn compile_bpf(out_dir: &Path) -> anyhow::Result<()> { - let target_arch = format!("-D__TARGET_ARCH_{}", env::var("CARGO_CFG_TARGET_ARCH")?); + // Get the target architecture from Cargo + let cargo_arch = env::var("CARGO_CFG_TARGET_ARCH")?; + + // Map Cargo's architecture names to what bpf_tracing.h expects for PT_REGS macros: + // x86_64 -> x86, aarch64 -> arm64 + let bpf_arch = match cargo_arch.as_str() { + "x86_64" => "x86", + "aarch64" => "arm64", + other => other, + }; + + // Define both: + // - __TARGET_ARCH_ for PT_REGS macros (e.g., __TARGET_ARCH_x86) + // - __TARGET_ARCH_ for vmlinux.h selection (e.g., __TARGET_ARCH_x86_64) + let target_arch_bpf = format!("-D__TARGET_ARCH_{}", bpf_arch); + let target_arch_full = format!("-D__TARGET_ARCH_{}", cargo_arch); + let base_args = [ "-target", "bpf", @@ -15,7 +31,8 @@ fn compile_bpf(out_dir: &Path) -> anyhow::Result<()> { "-c", "-Wall", "-Werror", - &target_arch, + &target_arch_bpf, + &target_arch_full, ]; for name in ["main", "checks"] { diff --git a/fact-ebpf/src/bpf/bound_path.h b/fact-ebpf/src/bpf/bound_path.h index 7a2091f9..65e1bd8a 100644 --- a/fact-ebpf/src/bpf/bound_path.h +++ b/fact-ebpf/src/bpf/bound_path.h @@ -21,7 +21,7 @@ __always_inline static void path_write_char(char* p, unsigned int offset, char c *path_safe_access(p, offset) = c; } -__always_inline static struct bound_path_t* _path_read(struct path* path, bound_path_buffer_t key, bool use_bpf_d_path) { +__always_inline static struct bound_path_t* _path_read(const struct path* path, bound_path_buffer_t key, bool use_bpf_d_path) { struct bound_path_t* bound_path = get_bound_path(key); if (bound_path == NULL) { return NULL; @@ -38,15 +38,15 @@ __always_inline static struct bound_path_t* _path_read(struct path* path, bound_ return bound_path; } -__always_inline static struct bound_path_t* path_read_unchecked(struct path* path) { +__always_inline static struct bound_path_t* path_read_unchecked(const struct path* path) { return _path_read(path, BOUND_PATH_MAIN, true); } -__always_inline static struct bound_path_t* path_read(struct path* path) { +__always_inline static struct bound_path_t* path_read(const struct path* path) { return _path_read(path, BOUND_PATH_MAIN, path_hooks_support_bpf_d_path); } -__always_inline static struct bound_path_t* path_read_alt(struct path* path) { +__always_inline static struct bound_path_t* path_read_alt(const struct path* path) { return _path_read(path, BOUND_PATH_ALTERNATE, path_hooks_support_bpf_d_path); } @@ -76,7 +76,7 @@ __always_inline static enum path_append_status_t path_append_dentry(struct bound return 0; } -__always_inline static struct bound_path_t* _path_read_append_d_entry(struct path* dir, struct dentry* dentry, bound_path_buffer_t key) { +__always_inline static struct bound_path_t* _path_read_append_d_entry(const struct path* dir, struct dentry* dentry, bound_path_buffer_t key) { struct bound_path_t* path = _path_read(dir, key, path_hooks_support_bpf_d_path); if (path == NULL) { @@ -105,7 +105,7 @@ __always_inline static struct bound_path_t* _path_read_append_d_entry(struct pat * directory and a dentry to an element in said directory, this helper * provides a short way of resolving the full path in one call. */ -__always_inline static struct bound_path_t* path_read_append_d_entry(struct path* dir, struct dentry* dentry) { +__always_inline static struct bound_path_t* path_read_append_d_entry(const struct path* dir, struct dentry* dentry) { return _path_read_append_d_entry(dir, dentry, BOUND_PATH_MAIN); } @@ -116,6 +116,6 @@ __always_inline static struct bound_path_t* path_read_append_d_entry(struct path * so in an alternate buffer. Useful for operations that take more than * one path, like path_rename. */ -__always_inline static struct bound_path_t* path_read_alt_append_d_entry(struct path* dir, struct dentry* dentry) { +__always_inline static struct bound_path_t* path_read_alt_append_d_entry(const struct path* dir, struct dentry* dentry) { return _path_read_append_d_entry(dir, dentry, BOUND_PATH_ALTERNATE); } diff --git a/fact-ebpf/src/bpf/d_path.h b/fact-ebpf/src/bpf/d_path.h index a922600e..6867a0dd 100644 --- a/fact-ebpf/src/bpf/d_path.h +++ b/fact-ebpf/src/bpf/d_path.h @@ -140,9 +140,9 @@ __always_inline static long __d_path(const struct path* path, char* buf, int buf return buflen - ctx.offset; } -__always_inline static long d_path(struct path* path, char* buf, int buflen, bool use_bpf_helper) { +__always_inline static long d_path(const struct path* path, char* buf, int buflen, bool use_bpf_helper) { if (use_bpf_helper) { - return bpf_d_path(path, buf, buflen); + return bpf_d_path((struct path*)path, buf, buflen); } return __d_path(path, buf, buflen); } diff --git a/fact-ebpf/src/bpf/events.h b/fact-ebpf/src/bpf/events.h index 26254778..77432cca 100644 --- a/fact-ebpf/src/bpf/events.h +++ b/fact-ebpf/src/bpf/events.h @@ -49,14 +49,30 @@ __always_inline static void submit_open_event(struct metrics_by_hook_t* m, file_activity_type_t event_type, const char filename[PATH_MAX], inode_key_t* inode, - inode_key_t* parent_inode) { + inode_key_t* parent_inode, + bool use_bpf_d_path) { + struct event_t* event = bpf_ringbuf_reserve(&rb, sizeof(struct event_t), 0); + if (event == NULL) { + m->ringbuffer_full++; + return; + } + + __submit_event(event, m, event_type, filename, inode, parent_inode, use_bpf_d_path); +} + +__always_inline static void submit_mkdir_event(struct metrics_by_hook_t* m, + const char dirname[PATH_MAX], + inode_key_t* inode, + inode_key_t* parent_inode) { struct event_t* event = bpf_ringbuf_reserve(&rb, sizeof(struct event_t), 0); if (event == NULL) { m->ringbuffer_full++; return; } - __submit_event(event, m, event_type, filename, inode, parent_inode, true); + // mkdir events from kprobes can't use bpf_d_path (no vfsmount context) + // and only send the directory name (userspace constructs full path from parent inode) + __submit_event(event, m, FILE_ACTIVITY_CREATION, dirname, inode, parent_inode, false); } __always_inline static void submit_unlink_event(struct metrics_by_hook_t* m, diff --git a/fact-ebpf/src/bpf/main.c b/fact-ebpf/src/bpf/main.c index b7c044f1..f22edb93 100644 --- a/fact-ebpf/src/bpf/main.c +++ b/fact-ebpf/src/bpf/main.c @@ -61,7 +61,7 @@ int BPF_PROG(trace_file_open, struct file* file) { goto ignored; } - submit_open_event(&m->file_open, event_type, path->path, inode_to_submit, &parent_key); + submit_open_event(&m->file_open, event_type, path->path, inode_to_submit, &parent_key, true); return 0; @@ -228,3 +228,106 @@ int BPF_PROG(trace_path_rename, struct path* old_dir, m->path_rename.error++; return 0; } + +// Map to store vfs_mkdir parameters from entry to exit +// Map to store vfs_mkdir parameters from entry to exit +// Key: pid_tgid from bpf_get_current_pid_tgid() to handle concurrent calls +// +// Limitation: This assumes vfs_mkdir doesn't recurse (same thread calling +// vfs_mkdir before a previous call returns). If recursion occurs, nested +// calls would overwrite each other's parameters. In practice, vfs_mkdir at +// the VFS layer rarely recurses, making this acceptable for monitoring +// typical container/host filesystem operations. +struct vfs_mkdir_args_t { + struct inode* dir; + struct dentry* dentry; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1024); + __type(key, u64); // pid_tgid + __type(value, struct vfs_mkdir_args_t); +} vfs_mkdir_args SEC(".maps"); + +// Capture parameters at function entry +// We store dir and dentry in a map because they're in registers at entry +// but won't be accessible at exit (kretprobe). The pid_tgid key ensures +// each thread gets its own entry, allowing concurrent mkdir operations. +SEC("kprobe/vfs_mkdir") +int trace_vfs_mkdir_entry(struct pt_regs* ctx) { + u64 pid_tgid = bpf_get_current_pid_tgid(); + struct vfs_mkdir_args_t args = {0}; + + // vfs_mkdir(mnt_idmap, dir, dentry, mode) + args.dir = (struct inode*)PT_REGS_PARM2_CORE(ctx); + args.dentry = (struct dentry*)PT_REGS_PARM3_CORE(ctx); + + bpf_map_update_elem(&vfs_mkdir_args, &pid_tgid, &args, BPF_ANY); + + return 0; +} + +// Process at function exit with return value +SEC("kretprobe/vfs_mkdir") +int trace_vfs_mkdir(struct pt_regs* ctx) { + struct metrics_t* m = get_metrics(); + if (m == NULL) { + return 0; + } + + m->path_mkdir.total++; + + // Retrieve stored parameters + u64 pid_tgid = bpf_get_current_pid_tgid(); + struct vfs_mkdir_args_t* args = bpf_map_lookup_elem(&vfs_mkdir_args, &pid_tgid); + if (args == NULL) { + m->path_mkdir.error++; + return 0; + } + + struct inode* dir = args->dir; + struct dentry* dentry = args->dentry; + + // Get parent inode (dir parameter) + inode_key_t parent_key = inode_to_key(dir); + + // Get child inode from the created dentry + struct inode* child_inode = BPF_CORE_READ(dentry, d_inode); + inode_key_t child_key = inode_to_key(child_inode); + + // Construct path with just the directory name + // Userspace will use the parent inode to construct the full host_path + struct bound_path_t* bound_path = get_bound_path(BOUND_PATH_MAIN); + if (bound_path == NULL) { + bpf_printk("Failed to get bound_path buffer"); + m->path_mkdir.error++; + goto cleanup; + } + + bound_path->path[0] = '/'; + bound_path->len = 1; + + if (path_append_dentry(bound_path, dentry) != PATH_APPEND_SUCCESS) { + m->path_mkdir.error++; + goto cleanup; + } + + inode_key_t* child_to_submit = &child_key; + inode_monitored_t status = is_monitored(child_key, bound_path, &parent_key, &child_to_submit); + + if (status == PARENT_MONITORED) { + inode_add(&child_key); + } + + if (status == NOT_MONITORED) { + m->path_mkdir.ignored++; + goto cleanup; + } + + submit_mkdir_event(&m->path_mkdir, bound_path->path, child_to_submit, &parent_key); + +cleanup: + bpf_map_delete_elem(&vfs_mkdir_args, &pid_tgid); + return 0; +} diff --git a/fact-ebpf/src/bpf/types.h b/fact-ebpf/src/bpf/types.h index 55005c00..96fdc190 100644 --- a/fact-ebpf/src/bpf/types.h +++ b/fact-ebpf/src/bpf/types.h @@ -111,4 +111,5 @@ struct metrics_t { struct metrics_by_hook_t path_chmod; struct metrics_by_hook_t path_chown; struct metrics_by_hook_t path_rename; + struct metrics_by_hook_t path_mkdir; }; diff --git a/fact-ebpf/src/lib.rs b/fact-ebpf/src/lib.rs index bd84ee08..a551f572 100644 --- a/fact-ebpf/src/lib.rs +++ b/fact-ebpf/src/lib.rs @@ -125,6 +125,7 @@ impl metrics_t { m.path_chmod = m.path_chmod.accumulate(&other.path_chmod); m.path_chown = m.path_chown.accumulate(&other.path_chown); m.path_rename = m.path_rename.accumulate(&other.path_rename); + m.path_mkdir = m.path_mkdir.accumulate(&other.path_mkdir); m } } diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 38fe269d..6b88dce9 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -4,7 +4,7 @@ use anyhow::{Context, bail}; use aya::{ Btf, Ebpf, maps::{HashMap, LpmTrie, MapData, PerCpuArray, RingBuf}, - programs::{Program, lsm::LsmLink}, + programs::{Program, kprobe::KProbeLink, lsm::LsmLink}, }; use checks::Checks; use globset::{Glob, GlobSet, GlobSetBuilder}; @@ -24,6 +24,13 @@ mod checks; const RINGBUFFER_NAME: &str = "rb"; +// Links are stored to keep BPF programs attached - they auto-detach on drop. +// Fields are prefixed with _ to indicate they're kept for Drop behavior, not direct access. +enum Link { + Lsm { _link: LsmLink }, + KProbe { _link: KProbeLink }, +} + pub struct Bpf { obj: Ebpf, @@ -34,7 +41,7 @@ pub struct Bpf { paths_globset: GlobSet, - links: Vec, + links: Vec, } impl Bpf { @@ -178,6 +185,7 @@ impl Bpf { }; match prog { Program::Lsm(prog) => prog.load(hook, btf)?, + Program::KProbe(prog) => prog.load()?, u => unimplemented!("{u:?}"), } } @@ -190,12 +198,33 @@ impl Bpf { self.links = self .obj .programs_mut() - .map(|(_, prog)| match prog { - Program::Lsm(prog) => { - let link_id = prog.attach()?; - prog.take_link(link_id) + .map(|(name, prog)| -> anyhow::Result { + match prog { + Program::Lsm(prog) => { + let link_id = prog.attach()?; + Ok(Link::Lsm { + _link: prog.take_link(link_id)?, + }) + } + Program::KProbe(prog) => { + // Extract function name from program name + // trace_vfs_mkdir_entry -> vfs_mkdir + // trace_vfs_mkdir -> vfs_mkdir (kretprobe) + let func_name = if name.ends_with("_entry") { + name.strip_suffix("_entry") + .and_then(|s| s.strip_prefix("trace_")) + .unwrap_or(name) + } else { + name.strip_prefix("trace_").unwrap_or(name) + }; + + let link_id = prog.attach(func_name, 0)?; + Ok(Link::KProbe { + _link: prog.take_link(link_id)?, + }) + } + u => unimplemented!("{u:?}"), } - u => unimplemented!("{u:?}"), }) .collect::>()?; Ok(()) diff --git a/fact/src/event/mod.rs b/fact/src/event/mod.rs index 40bd317a..816ba920 100644 --- a/fact/src/event/mod.rs +++ b/fact/src/event/mod.rs @@ -220,6 +220,21 @@ impl Event { } } + /// Set the `filename` field of the event to the one provided. + /// + /// In the case of operations that involve two paths, like rename, + /// the 'new' filename will be set. + pub fn set_filename(&mut self, filename: PathBuf) { + match &mut self.file { + FileData::Open(data) => data.filename = filename, + FileData::Creation(data) => data.filename = filename, + FileData::Unlink(data) => data.filename = filename, + FileData::Chmod(data) => data.inner.filename = filename, + FileData::Chown(data) => data.inner.filename = filename, + FileData::Rename(data) => data.new.filename = filename, + } + } + /// Determine if the event should be ignored. /// /// With wildcards, the kernel can only match on the inode and diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs index 36cacdef..8375211d 100644 --- a/fact/src/host_scanner.rs +++ b/fact/src/host_scanner.rs @@ -197,27 +197,34 @@ impl HostScanner { /// We use the parent inode provided by the eBPF code /// to look up the parent directory's host path, then construct the full /// path by appending the new file's name. - fn handle_creation_event(&self, event: &Event) -> anyhow::Result<()> { - let inode = event.get_inode(); - let parent_inode = event.get_parent_inode(); - if self.get_host_path(Some(inode)).is_some() || parent_inode.empty() { + fn handle_creation_event(&self, event: &mut Event) -> anyhow::Result<()> { + let inode = *event.get_inode(); + let parent_inode = *event.get_parent_inode(); + + if self.get_host_path(Some(&inode)).is_some() || parent_inode.empty() { return Ok(()); } - if let Some(filename) = event.get_filename().file_name() - && let Some(parent_host_path) = self.get_host_path(Some(parent_inode)) - { - let host_path = parent_host_path.join(filename); - self.update_entry_with_inode(*inode, host_path) - .with_context(|| { - format!( - "Failed to add creation event entry for {}", - filename.display() - ) - })?; - } + let Some(filename) = event.get_filename().file_name() else { + return Ok(()); + }; + let filename = filename.to_os_string(); - Ok(()) + let Some(parent_host_path) = self.get_host_path(Some(&parent_inode)) else { + return Ok(()); + }; + + // Construct full path and update tracking + let host_path = parent_host_path.join(&filename); + event.set_filename(host_path.clone()); + + self.update_entry_with_inode(inode, host_path) + .with_context(|| { + format!( + "Failed to add creation event entry for {}", + filename.to_string_lossy() + ) + }) } /// Periodically notify the host scanner main task that a scan needs @@ -263,7 +270,7 @@ impl HostScanner { // Handle file creation events by adding new inodes to the map if event.is_creation() && - let Err(e) = self.handle_creation_event(&event) { + let Err(e) = self.handle_creation_event(&mut event) { warn!("Failed to handle creation event: {e}"); } diff --git a/fact/src/metrics/kernel_metrics.rs b/fact/src/metrics/kernel_metrics.rs index d1a3a242..a6fcb7e8 100644 --- a/fact/src/metrics/kernel_metrics.rs +++ b/fact/src/metrics/kernel_metrics.rs @@ -13,6 +13,7 @@ pub struct KernelMetrics { path_chmod: EventCounter, path_chown: EventCounter, path_rename: EventCounter, + path_mkdir: EventCounter, map: PerCpuArray, } @@ -43,12 +44,18 @@ impl KernelMetrics { "Events processed by the path_rename LSM hook", &[], // Labels are not needed since `collect` will add them all ); + let path_mkdir = EventCounter::new( + "kernel_path_mkdir_events", + "Events processed by the vfs_mkdir kprobe hook", + &[], // Labels are not needed since `collect` will add them all + ); file_open.register(reg); path_unlink.register(reg); path_chmod.register(reg); path_chown.register(reg); path_rename.register(reg); + path_mkdir.register(reg); KernelMetrics { file_open, @@ -56,6 +63,7 @@ impl KernelMetrics { path_chmod, path_chown, path_rename, + path_mkdir, map: kernel_metrics, } } @@ -105,6 +113,7 @@ impl KernelMetrics { KernelMetrics::refresh_labels(&self.path_chmod, &metrics.path_chmod); KernelMetrics::refresh_labels(&self.path_chown, &metrics.path_chown); KernelMetrics::refresh_labels(&self.path_rename, &metrics.path_rename); + KernelMetrics::refresh_labels(&self.path_mkdir, &metrics.path_mkdir); Ok(()) } diff --git a/tests/test_path_mkdir.py b/tests/test_path_mkdir.py new file mode 100644 index 00000000..5242894d --- /dev/null +++ b/tests/test_path_mkdir.py @@ -0,0 +1,69 @@ +import os + +import pytest + +from event import Event, EventType, Process + + +def test_mkdir_nested(monitored_dir, server): + """ + Tests that creating nested directories tracks all inodes correctly. + + Args: + monitored_dir: Temporary directory path for creating the test directory. + server: The server instance to communicate with. + """ + process = Process.from_proc() + + # Create nested directories + level1 = os.path.join(monitored_dir, 'level1') + level2 = os.path.join(level1, 'level2') + level3 = os.path.join(level2, 'level3') + + os.mkdir(level1) + os.mkdir(level2) + os.mkdir(level3) + + # Create a file in the deepest directory + test_file = os.path.join(level3, 'deep_file.txt') + with open(test_file, 'w') as f: + f.write('nested content') + + events = [ + Event(process=process, event_type=EventType.CREATION, + file=level1, host_path=level1), + Event(process=process, event_type=EventType.CREATION, + file=level2, host_path=level2), + Event(process=process, event_type=EventType.CREATION, + file=level3, host_path=level3), + Event(process=process, event_type=EventType.CREATION, + file=test_file, host_path=test_file), + ] + + server.wait_events(events) + + +def test_mkdir_ignored(monitored_dir, ignored_dir, server): + """ + Tests that directories created outside monitored paths are ignored. + + Args: + monitored_dir: Temporary directory path that is monitored. + ignored_dir: Temporary directory path that is not monitored. + server: The server instance to communicate with. + """ + process = Process.from_proc() + + # Create directory in ignored path - should not be tracked + ignored_subdir = os.path.join(ignored_dir, 'ignored_subdir') + os.mkdir(ignored_subdir) + + # Create directory in monitored path - should be tracked + monitored_subdir = os.path.join(monitored_dir, 'monitored_subdir') + os.mkdir(monitored_subdir) + + # Only the monitored directory should generate an event + e = Event(process=process, event_type=EventType.CREATION, + file=monitored_subdir, host_path=monitored_subdir) + + server.wait_events([e])