diff --git a/Makefile b/Makefile index 447c26b..d095626 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ BUILD_LIB_DIR := $(BUILD_DIR)/lib MMODE_APPS := cond coop cpubench echo hello mqueues mutex \ pipes pipes_small pipes_struct prodcons progress \ rtsched semaphore suspend test64 test_libc timer timer_kill \ - privilege_switch + privilege_switch trace_sched # Auto-detect: if building an M-mode app, enable CONFIG_PRIVILEGED ifneq ($(filter $(MAKECMDGOALS),$(MMODE_APPS)),) @@ -29,7 +29,7 @@ include arch/$(ARCH)/build.mk INC_DIRS += -I $(SRC_DIR)/include \ -I $(SRC_DIR)/include/lib -KERNEL_OBJS := timer.o mqueue.o pipe.o semaphore.o mutex.o logger.o error.o syscall.o task.o memprot.o main.o +KERNEL_OBJS := timer.o mqueue.o pipe.o semaphore.o mutex.o logger.o error.o syscall.o task.o memprot.o debug_trace.o main.o KERNEL_OBJS := $(addprefix $(BUILD_KERNEL_DIR)/,$(KERNEL_OBJS)) deps += $(KERNEL_OBJS:%.o=%.o.d) @@ -41,7 +41,7 @@ deps += $(LIB_OBJS:%.o=%.o.d) APPS := coop echo hello mqueues semaphore mutex cond \ pipes pipes_small pipes_struct prodcons progress \ rtsched suspend test64 timer timer_kill \ - cpubench test_libc umode privilege_switch pmp + cpubench test_libc umode privilege_switch pmp trace_sched # Output files for __link target IMAGE_BASE := $(BUILD_DIR)/image diff --git a/app/trace_sched.c b/app/trace_sched.c new file mode 100644 index 0000000..f663692 --- /dev/null +++ b/app/trace_sched.c @@ -0,0 +1,264 @@ +#include +#include +#include + +#if CONFIG_DEBUG_TRACE +typedef struct { + uint16_t worker_a; + uint16_t worker_b; + uint16_t verifier; +} trace_sched_ids_t; + +static int tests_passed; +static int tests_failed; +static volatile bool scenario_done; +static volatile bool worker_a_ready; +static volatile bool worker_b_ready; +static volatile bool worker_b_suspended; +static volatile bool worker_b_resumed; +static volatile bool worker_b_destroyed; +static trace_sched_ids_t ids; + +#define TEST_ASSERT(condition, description) \ + do { \ + if (condition) { \ + printf("PASS: %s\n", description); \ + tests_passed++; \ + } else { \ + printf("FAIL: %s\n", description); \ + tests_failed++; \ + } \ + } while (0) + +static int find_event_by_type(uint8_t type, + uint32_t p1, + uint32_t p2, + uint32_t start_index) +{ + uint32_t count = debug_trace_count(); + + for (uint32_t i = start_index; i < count; i++) { + debug_event_t event; + + if (debug_trace_get(i, &event) < 0) + break; + + if (event.event_type == type && event.param1 == p1 && + event.param2 == p2) + return (int) i; + } + + return -1; +} + +static int find_event_by_type_any(uint8_t type, uint32_t p1) +{ + uint32_t count = debug_trace_count(); + + for (uint32_t i = 0; i < count; i++) { + debug_event_t event; + + if (debug_trace_get(i, &event) < 0) + break; + + if (event.event_type == type && event.param1 == p1) + return (int) i; + } + + return -1; +} + +static bool assert_event(const char *name, bool ok) +{ + TEST_ASSERT(ok, name); + return ok; +} + +static void flood_yield_events(int iterations) +{ + for (int i = 0; i < iterations; i++) + mo_task_yield(); +} + +static void assert_monotonic_timestamps(void) +{ + debug_event_t prev; + debug_event_t curr; + uint32_t count = debug_trace_count(); + + if (count < 2) { + TEST_ASSERT(true, "trace timestamps are monotonic"); + return; + } + + for (uint32_t i = 1; i < count; i++) { + if (debug_trace_get(i - 1, &prev) < 0 || + debug_trace_get(i, &curr) < 0) { + TEST_ASSERT(false, "trace timestamps are monotonic"); + return; + } + + if (prev.timestamp > curr.timestamp) { + TEST_ASSERT(false, "trace timestamps are monotonic"); + return; + } + } + + TEST_ASSERT(true, "trace timestamps are monotonic"); +} + +static void worker_a(void) +{ + worker_a_ready = true; + while (!worker_b_ready) + mo_task_wfi(); + mo_task_yield(); + mo_task_delay(2); + mo_task_suspend(ids.worker_b); + worker_b_suspended = true; + scenario_done = true; + + while (!worker_b_destroyed) + mo_task_wfi(); + + for (;;) + mo_task_wfi(); +} + +static void worker_b(void) +{ + worker_b_ready = true; + while (!worker_a_ready) + mo_task_wfi(); + + while (!worker_b_suspended) + mo_task_wfi(); + + while (!worker_b_resumed) + mo_task_wfi(); + + while (!worker_b_destroyed) + mo_task_wfi(); + + for (;;) + mo_task_wfi(); +} + +static void verifier(void) +{ + uint32_t trace_count; + uint32_t trace_overwrites; + uint32_t total_events; + + while (!scenario_done) + mo_task_wfi(); + + if (mo_task_resume(ids.worker_b) == 0) + worker_b_resumed = true; + + if (mo_task_cancel(ids.worker_b) == 0) + worker_b_destroyed = true; + + int create_a = find_event_by_type(EVENT_TASK_CREATE, ids.worker_a, 4U, 0); + int create_b = find_event_by_type(EVENT_TASK_CREATE, ids.worker_b, 4U, 0); + int yield_a = find_event_by_type(EVENT_TASK_YIELD, ids.worker_a, 0U, 0); + int delay_a = find_event_by_type(EVENT_TASK_DELAY, 2U, 0U, 0); + int suspend_b = + find_event_by_type(EVENT_TASK_SUSPEND, ids.worker_b, 0U, 0U); + int resume_b = find_event_by_type(EVENT_TASK_RESUME, ids.worker_b, 0U, 0); + int destroy_b = find_event_by_type_any(EVENT_TASK_DESTROY, ids.worker_b); + + assert_event("CREATE worker_a prio 4", create_a >= 0); + assert_event("CREATE worker_b prio 4", create_b >= 0); + assert_event("YIELD worker_a", yield_a >= 0); + assert_event("DELAY worker_a ticks 2", delay_a >= 0); + assert_event("SUSPEND worker_b", suspend_b >= 0); + assert_event("RESUME worker_b", resume_b >= 0); + assert_event("DESTROY worker_b", destroy_b >= 0); + assert_event("CREATE events precede worker_a yield", + create_a >= 0 && create_b >= 0 && yield_a >= 0 && + create_a < yield_a && create_b < yield_a); + assert_event("worker_a yield precedes delay", + yield_a >= 0 && delay_a >= 0 && yield_a < delay_a); + assert_event("worker_a delay precedes worker_b suspend", + delay_a >= 0 && suspend_b >= 0 && delay_a < suspend_b); + assert_event("worker_b suspend precedes resume", + suspend_b >= 0 && resume_b >= 0 && suspend_b < resume_b); + assert_event("worker_b resume precedes destroy", + resume_b >= 0 && destroy_b >= 0 && resume_b < destroy_b); + + bool found_switch = false; + for (uint32_t i = 0; i < debug_trace_count(); i++) { + debug_event_t event; + + if (debug_trace_get(i, &event) < 0) + break; + + if (event.event_type == EVENT_TASK_SWITCH && + ((event.param1 == ids.worker_a && event.param2 == ids.worker_b) || + (event.param1 == ids.worker_b && event.param2 == ids.worker_a))) { + assert_event("SWITCH between worker_a and worker_b", true); + found_switch = true; + break; + } + } + if (!found_switch) + assert_event("SWITCH between worker_a and worker_b", false); + + flood_yield_events(DEBUG_EVENT_BUFFER_SIZE + 8); + TEST_ASSERT(debug_trace_overwrites() > 0, + "trace reports overwrite after wraparound flood"); + assert_monotonic_timestamps(); + + mo_logger_flush(); + printf("Tests passed: %d\n", tests_passed); + printf("Tests failed: %d\n", tests_failed); + printf("Overall: %s\n", tests_failed == 0 ? "PASS" : "FAIL"); + if (tests_failed == 0) { + trace_count = debug_trace_count(); + trace_overwrites = debug_trace_overwrites(); + total_events = trace_count + trace_overwrites; + + printf("Trace totals: count=%u overwrites=%u total_events=%u\n", + trace_count, trace_overwrites, total_events); + } + if (tests_failed != 0) + debug_dump_events(); + + *(volatile uint32_t *) 0x100000U = 0x5555U; + for (;;) + mo_task_wfi(); +} + +int32_t app_main(void) +{ + debug_clear_events(); + tests_passed = 0; + tests_failed = 0; + scenario_done = false; + worker_a_ready = false; + worker_b_ready = false; + worker_b_suspended = false; + worker_b_resumed = false; + worker_b_destroyed = false; + + ids.worker_a = mo_task_spawn(worker_a, DEFAULT_STACK_SIZE); + ids.worker_b = mo_task_spawn(worker_b, DEFAULT_STACK_SIZE); + ids.verifier = mo_task_spawn(verifier, DEFAULT_STACK_SIZE); + + mo_task_priority(ids.worker_a, TASK_PRIO_NORMAL); + mo_task_priority(ids.worker_b, TASK_PRIO_NORMAL); + mo_task_priority(ids.verifier, TASK_PRIO_IDLE); + + return 1; +} +#else +int32_t app_main(void) +{ + debug_clear_events(); + mo_logger_flush(); + printf("Tracing disabled build: PASS\n"); + *(volatile uint32_t *) 0x100000U = 0x5555U; + return 0; +} +#endif diff --git a/config.h b/config.h index 24547a3..cd12f78 100644 --- a/config.h +++ b/config.h @@ -4,3 +4,12 @@ #ifndef CONFIG_STACK_PROTECTION #define CONFIG_STACK_PROTECTION 1 /* Default: enabled for safety */ #endif + +/* Kernel event tracing configuration */ +#ifndef CONFIG_DEBUG_TRACE +#define CONFIG_DEBUG_TRACE 1 +#endif + +#ifndef DEBUG_EVENT_BUFFER_SIZE +#define DEBUG_EVENT_BUFFER_SIZE 256 +#endif diff --git a/include/sys/debug_trace.h b/include/sys/debug_trace.h new file mode 100644 index 0000000..291752a --- /dev/null +++ b/include/sys/debug_trace.h @@ -0,0 +1,73 @@ +#pragma once + +#include + +typedef enum { + EVENT_TASK_CREATE = 0, + EVENT_TASK_DESTROY, + EVENT_TASK_SWITCH, + EVENT_TASK_SUSPEND, + EVENT_TASK_RESUME, + EVENT_TASK_DELAY, + EVENT_TASK_YIELD, + EVENT_SEM_WAIT, + EVENT_SEM_POST, + EVENT_MUTEX_LOCK, + EVENT_MUTEX_UNLOCK, + EVENT_PIPE_READ, + EVENT_PIPE_WRITE, + EVENT_MQUEUE_SEND, + EVENT_MQUEUE_RECV, + EVENT_ISR_ENTER, + EVENT_ISR_EXIT, + EVENT_EXCEPTION, + EVENT_TYPE_MAX +} debug_event_type_t; + +typedef struct { + uint32_t timestamp; + uint8_t event_type; + uint8_t reserved; + uint16_t task_id; + uint32_t param1; + uint32_t param2; +} debug_event_t; + +#if CONFIG_DEBUG_TRACE +void debug_trace_event(uint8_t event_type, uint32_t param1, uint32_t param2); +void debug_dump_events(void); +void debug_clear_events(void); +uint32_t debug_trace_count(void); +uint32_t debug_trace_overwrites(void); +int32_t debug_trace_get(uint32_t index, debug_event_t *event); +#else +static inline void debug_trace_event(uint8_t event_type, + uint32_t param1, + uint32_t param2) +{ + (void) event_type; + (void) param1; + (void) param2; +} + +static inline void debug_dump_events(void) {} + +static inline void debug_clear_events(void) {} + +static inline uint32_t debug_trace_count(void) +{ + return 0U; +} + +static inline uint32_t debug_trace_overwrites(void) +{ + return 0U; +} + +static inline int32_t debug_trace_get(uint32_t index, debug_event_t *event) +{ + (void) index; + (void) event; + return -1; +} +#endif diff --git a/kernel/debug_trace.c b/kernel/debug_trace.c new file mode 100644 index 0000000..f1c00d4 --- /dev/null +++ b/kernel/debug_trace.c @@ -0,0 +1,159 @@ +#include +#include +#include + +#if CONFIG_DEBUG_TRACE + +static debug_event_t event_buffer[DEBUG_EVENT_BUFFER_SIZE]; +static uint32_t event_write_index; +static uint32_t event_count; +static uint32_t event_overwrites; + +typedef struct { + int32_t saved_mie; +} debug_trace_lock_t; + +static inline debug_trace_lock_t debug_trace_enter(void) +{ + debug_trace_lock_t lock = {.saved_mie = _di()}; + + return lock; +} + +static inline void debug_trace_leave(debug_trace_lock_t lock) +{ + hal_interrupt_set(lock.saved_mie); +} + +static const char *debug_trace_event_name(uint8_t event_type) +{ + static const char *const event_names[EVENT_TYPE_MAX] = { + [EVENT_TASK_CREATE] = "TASK_CREATE", + [EVENT_TASK_DESTROY] = "TASK_DESTROY", + [EVENT_TASK_SWITCH] = "TASK_SWITCH", + [EVENT_TASK_SUSPEND] = "TASK_SUSPEND", + [EVENT_TASK_RESUME] = "TASK_RESUME", + [EVENT_TASK_DELAY] = "TASK_DELAY", + [EVENT_TASK_YIELD] = "TASK_YIELD", + [EVENT_SEM_WAIT] = "SEM_WAIT", + [EVENT_SEM_POST] = "SEM_POST", + [EVENT_MUTEX_LOCK] = "MUTEX_LOCK", + [EVENT_MUTEX_UNLOCK] = "MUTEX_UNLOCK", + [EVENT_PIPE_READ] = "PIPE_READ", + [EVENT_PIPE_WRITE] = "PIPE_WRITE", + [EVENT_MQUEUE_SEND] = "MQUEUE_SEND", + [EVENT_MQUEUE_RECV] = "MQUEUE_RECV", + [EVENT_ISR_ENTER] = "ISR_ENTER", + [EVENT_ISR_EXIT] = "ISR_EXIT", + [EVENT_EXCEPTION] = "EXCEPTION", + }; + + if (event_type >= EVENT_TYPE_MAX) + return "UNKNOWN"; + + return event_names[event_type]; +} + +void debug_trace_event(uint8_t event_type, uint32_t param1, uint32_t param2) +{ + debug_event_t *event = &event_buffer[event_write_index]; + event->timestamp = kcb ? kcb->ticks : 0; + event->event_type = event_type; + event->task_id = (kcb && kcb->task_current && kcb->task_current->data) + ? ((tcb_t *) kcb->task_current->data)->id + : 0; + event->reserved = 0; + event->param1 = param1; + event->param2 = param2; + + event_write_index = (event_write_index + 1) % DEBUG_EVENT_BUFFER_SIZE; + if (event_count < DEBUG_EVENT_BUFFER_SIZE) { + event_count++; + } else { + event_overwrites++; + } +} + +void debug_dump_events(void) +{ + uint32_t count; + uint32_t overwrites; + debug_trace_lock_t lock = debug_trace_enter(); + + count = event_count; + overwrites = event_overwrites; + debug_trace_leave(lock); + + printf("debug trace: %u retained event%s, %u overwrite%s\n", count, + (count == 1) ? "" : "s", overwrites, (overwrites == 1) ? "" : "s"); + + for (uint32_t i = 0; i < count; i++) { + debug_event_t event; + + if (debug_trace_get(i, &event) < 0) + break; + + printf("[%u] %s ts=%u tid=%u p1=%u p2=%u\n", i, + debug_trace_event_name(event.event_type), event.timestamp, + event.task_id, event.param1, event.param2); + } +} + +void debug_clear_events(void) +{ + debug_trace_lock_t lock = debug_trace_enter(); + + event_write_index = 0; + event_count = 0; + event_overwrites = 0; + + debug_trace_leave(lock); +} + +uint32_t debug_trace_count(void) +{ + uint32_t count; + debug_trace_lock_t lock = debug_trace_enter(); + + count = event_count; + + debug_trace_leave(lock); + return count; +} + +uint32_t debug_trace_overwrites(void) +{ + uint32_t overwrites; + debug_trace_lock_t lock = debug_trace_enter(); + + overwrites = event_overwrites; + + debug_trace_leave(lock); + return overwrites; +} + +int32_t debug_trace_get(uint32_t index, debug_event_t *event) +{ + uint32_t count; + uint32_t start; + debug_trace_lock_t lock; + + if (!event) + return -1; + + lock = debug_trace_enter(); + + count = event_count; + if (index >= count) { + debug_trace_leave(lock); + return -1; + } + + start = (count < DEBUG_EVENT_BUFFER_SIZE) ? 0 : event_write_index; + *event = event_buffer[(start + index) % DEBUG_EVENT_BUFFER_SIZE]; + debug_trace_leave(lock); + + return 0; +} + +#endif diff --git a/kernel/task.c b/kernel/task.c index 286e83e..379173a 100644 --- a/kernel/task.c +++ b/kernel/task.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "private/error.h" @@ -374,7 +375,12 @@ void panic(int32_t ecode) hal_panic(); } -/* Weak aliases for context switching functions. */ +/* Weak aliases for context switching functions. + * + * yield() is the internal scheduler primitive used by both explicit and + * implicit reschedule paths. EVENT_TASK_YIELD must stay above this layer so + * delay/block/suspend driven handoffs are not misreported as explicit yields. + */ void dispatch(void); void yield(void); void _dispatch(void) __attribute__((weak, alias("dispatch"))); @@ -692,6 +698,14 @@ void dispatch(void) next_task->state = TASK_RUNNING; next_task->time_slice = get_priority_timeslice(next_task->prio_level); +/* Record switches only in tracing-enabled builds so the disabled scheduler + * path keeps upstream behavior and pays no trace-only branch cost. + */ +#if CONFIG_DEBUG_TRACE + if (next_task != prev_task) + debug_trace_event(EVENT_TASK_SWITCH, prev_task->id, next_task->id); +#endif + /* Switch PMP configuration if tasks have different memory spaces */ pmp_switch_context(prev_task->mspace, next_task->mspace); @@ -949,6 +963,9 @@ static int32_t task_spawn_internal(void *task_entry, /* Add to cache and mark ready */ cache_task(tcb->id, tcb); sched_enqueue_task(tcb); +#if CONFIG_DEBUG_TRACE + debug_trace_event(EVENT_TASK_CREATE, tcb->id, tcb->prio_level); +#endif return tcb->id; } @@ -1003,6 +1020,9 @@ int32_t mo_task_cancel(uint16_t id) } tcb_t *tcb = node->data; +#if CONFIG_DEBUG_TRACE + uint16_t previous_state = tcb ? tcb->state : 0; +#endif if (!tcb || tcb->state == TASK_RUNNING) { CRITICAL_LEAVE(); return ERR_TASK_CANT_REMOVE; @@ -1020,6 +1040,9 @@ int32_t mo_task_cancel(uint16_t id) } } +#if CONFIG_DEBUG_TRACE + debug_trace_event(EVENT_TASK_DESTROY, id, previous_state); +#endif CRITICAL_LEAVE(); /* Free memory outside critical section */ @@ -1034,7 +1057,19 @@ int32_t mo_task_cancel(uint16_t id) void mo_task_yield(void) { - _yield(); + if (unlikely(!kcb || !kcb->task_current || !kcb->task_current->data)) + return; + + /* Trace only the explicit public API. Internal reschedule paths still + * funnel through yield()/_yield() but intentionally do not emit + * EVENT_TASK_YIELD. + */ +#if CONFIG_DEBUG_TRACE + NOSCHED_ENTER(); + debug_trace_event(EVENT_TASK_YIELD, mo_task_id(), 0); + NOSCHED_LEAVE(); +#endif + yield(); } void mo_task_delay(uint16_t ticks) @@ -1056,9 +1091,12 @@ void mo_task_delay(uint16_t ticks) /* Set delay and blocked state - scheduler will skip blocked tasks */ self->delay = ticks; self->state = TASK_BLOCKED; +#if CONFIG_DEBUG_TRACE + debug_trace_event(EVENT_TASK_DELAY, ticks, 0); +#endif NOSCHED_LEAVE(); - mo_task_yield(); + _yield(); } int32_t mo_task_suspend(uint16_t id) @@ -1082,11 +1120,13 @@ int32_t mo_task_suspend(uint16_t id) task->state = TASK_SUSPENDED; bool is_current = (kcb->task_current->data == task); - +#if CONFIG_DEBUG_TRACE + debug_trace_event(EVENT_TASK_SUSPEND, id, 0); +#endif CRITICAL_LEAVE(); if (is_current) - mo_task_yield(); + _yield(); return ERR_OK; } @@ -1111,7 +1151,9 @@ int32_t mo_task_resume(uint16_t id) /* mark as ready - scheduler will find it */ task->state = TASK_READY; - +#if CONFIG_DEBUG_TRACE + debug_trace_event(EVENT_TASK_RESUME, id, 0); +#endif CRITICAL_LEAVE(); return ERR_OK; } diff --git a/tools/qemu/Makefile b/tools/qemu/Makefile new file mode 100644 index 0000000..94095c9 --- /dev/null +++ b/tools/qemu/Makefile @@ -0,0 +1,25 @@ +PLUGIN := debug_trace_profiler.so +SRC := debug_trace_profiler.c + +QEMU_PLUGIN_INC ?= +CC ?= cc +PKG_CONFIG ?= pkg-config + +GLIB_CFLAGS := $(shell $(PKG_CONFIG) --cflags glib-2.0 2>/dev/null) + +CFLAGS ?= -O2 +CFLAGS += -fPIC -shared -fvisibility=hidden +ifneq ($(strip $(QEMU_PLUGIN_INC)),) + CFLAGS += -I$(QEMU_PLUGIN_INC) +endif +CFLAGS += $(GLIB_CFLAGS) + +.PHONY: all clean + +all: $(PLUGIN) + +$(PLUGIN): $(SRC) + $(CC) $(CFLAGS) -o $@ $< + +clean: + rm -f $(PLUGIN) diff --git a/tools/qemu/README.md b/tools/qemu/README.md new file mode 100644 index 0000000..cd0bf5a --- /dev/null +++ b/tools/qemu/README.md @@ -0,0 +1,133 @@ +# QEMU Trace Profiling + +This directory contains a QEMU TCG plugin used to profile the Linmo tracing hot path without modifying kernel behavior. + +## Scope + +The current plugin targets `debug_trace_event()` and reports: + +- `insn_count` +- `store_count` + +These numbers are intended for QEMU-based relative analysis only. They are not hardware-cycle measurements. + +## Files + +- `debug_trace_profiler.c` + - QEMU TCG plugin that counts guest instructions and stores within a target PC range +- `Makefile` + - builds `debug_trace_profiler.so` + +## Prerequisites + +- QEMU system emulator with plugin support +- `qemu-plugin.h` +- `cc` +- `pkg-config` +- `glib-2.0` development headers visible to `pkg-config` + +In the current environment, the plugin header is provided through: + +```bash +QEMU_PLUGIN_INC=/path/to/qemu/include +``` + +## Build + +From the Linmo repo root, export the environment first: + +```bash +export CROSS_COMPILE= +export QEMU_PLUGIN_INC=/path/to/qemu/include +export QEMU_BIN=$(command -v qemu-system-riscv32) +export TRACE_PLUGIN_SO="$(pwd)/tools/qemu/debug_trace_profiler.so" +``` + +Then build the plugin: + +```bash +make -C tools/qemu clean +make -C tools/qemu QEMU_PLUGIN_INC="$QEMU_PLUGIN_INC" +``` + +This produces: + +```text +tools/qemu/debug_trace_profiler.so +``` + +## Reproduction + +### 1. Build the target workload + +For the bounded scheduler-tracing workload: + +```bash +make trace_sched CROSS_COMPILE="$CROSS_COMPILE" +``` + +If your toolchain binaries are not already on `PATH`, export that before running the commands above. + +### 2. Resolve the `debug_trace_event()` address range + +```bash +"${CROSS_COMPILE}nm" -S --defined-only build/image.elf | \ + rg ' debug_trace_event$' +``` + +Example output: + +```text +80004a54 000000f0 T debug_trace_event +``` + +Interpret that as: + +- `start = 0x80004a54` +- `size = 0x000000f0` +- `end = 0x80004b44` + +### 3. Run QEMU with `icount` and the plugin + +```bash +"$QEMU_BIN" \ + -machine virt -nographic -bios none \ + -kernel build/image.elf \ + -icount shift=0,align=off,sleep=off \ + -plugin "$TRACE_PLUGIN_SO",start=0x80004a54,end=0x80004b44 +``` + +Expected bounded workload output includes: + +```text +Overall: PASS +Trace totals: count=256 overwrites=1090 total_events=1346 +debug_trace_profiler: start=0x80004a54 end=0x80004b44 insn_count=62795 store_count=17511 +``` + +## Interpreting the result + +The plugin itself reports only: + +- PC range +- total instruction count within that range +- total store count within that range + +To compute per-event values, use the workload diagnostics: + +```text +total_events = debug_trace_count() + debug_trace_overwrites() +``` + +With the example above: + +- `total_events = 1346` +- `instructions/event = 62795 / 1346 = 46.653` +- `stores/event = 17511 / 1346 = 13.010` + +## Notes + +- `QEMU_BIN` should resolve to a QEMU binary with plugin support. +- `TRACE_PLUGIN_SO` should remain an absolute path. +- The plugin is intended for deterministic QEMU-based profiling with `-icount`. +- This tooling characterizes the tracing hot path; it should not be used as proof of hardware-cycle timing. diff --git a/tools/qemu/debug_trace_profiler.c b/tools/qemu/debug_trace_profiler.c new file mode 100644 index 0000000..e33fdea --- /dev/null +++ b/tools/qemu/debug_trace_profiler.c @@ -0,0 +1,152 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; + +typedef struct { + /* Instrument the half-open guest PC range [start, end). + * For example, if: + * nm -S --defined-only build/image.elf + * prints: + * 80004a54 000000f0 T debug_trace_event + * then: + * start = 0x80004a54 + * size = 0x000000f0 + * end = start + size = 0x80004b44 + */ + uint64_t start; + uint64_t end; + uint64_t insn_count; + uint64_t store_count; +} profiler_state_t; + +static profiler_state_t g_state; + +static bool parse_u64_arg(const char *value, uint64_t *out) +{ + char *end = NULL; + size_t len; + + if (!value || value[0] == '\0' || value[0] == '+' || value[0] == '-') { + return false; + } + + len = strlen(value); + if (isspace((unsigned char) value[0]) || + isspace((unsigned char) value[len - 1])) { + return false; + } + + errno = 0; + uint64_t parsed = strtoull(value, &end, 0); + + if (errno == ERANGE || !end || end == value || *end != '\0') { + return false; + } + + *out = parsed; + return true; +} + +static void insn_exec_cb(unsigned int vcpu_index, void *userdata) +{ + (void) vcpu_index; + (void) userdata; + __atomic_fetch_add(&g_state.insn_count, 1, __ATOMIC_RELAXED); +} + +static void mem_exec_cb(unsigned int vcpu_index, + qemu_plugin_meminfo_t info, + uint64_t vaddr, + void *userdata) +{ + (void) vcpu_index; + (void) vaddr; + (void) userdata; + + if (qemu_plugin_mem_is_store(info)) { + __atomic_fetch_add(&g_state.store_count, 1, __ATOMIC_RELAXED); + } +} + +static void plugin_exit_cb(qemu_plugin_id_t id, void *userdata) +{ + (void) id; + (void) userdata; + + fprintf(stderr, + "debug_trace_profiler: start=0x%" PRIx64 " end=0x%" PRIx64 + " insn_count=%" PRIu64 " store_count=%" PRIu64 "\n", + g_state.start, g_state.end, + __atomic_load_n(&g_state.insn_count, __ATOMIC_RELAXED), + __atomic_load_n(&g_state.store_count, __ATOMIC_RELAXED)); +} + +static void tb_trans_cb(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) +{ + size_t n_insns = qemu_plugin_tb_n_insns(tb); + + for (size_t i = 0; i < n_insns; ++i) { + struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i); + uint64_t insn_pc = qemu_plugin_insn_vaddr(insn); + + if (insn_pc < g_state.start || insn_pc >= g_state.end) { + continue; + } + + qemu_plugin_register_vcpu_insn_exec_cb(insn, insn_exec_cb, + QEMU_PLUGIN_CB_NO_REGS, NULL); + qemu_plugin_register_vcpu_mem_cb( + insn, mem_exec_cb, QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_MEM_W, NULL); + } +} + +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, + const qemu_info_t *info, + int argc, + char **argv) +{ + bool have_start = false; + bool have_end = false; + + (void) info; + + for (int i = 0; i < argc; ++i) { + const char *arg = argv[i]; + const char *eq = arg ? strchr(arg, '=') : NULL; + size_t name_len; + + if (!eq) { + continue; + } + + name_len = (size_t) (eq - arg); + if (name_len == 5 && strncmp(arg, "start", name_len) == 0) { + have_start = parse_u64_arg(eq + 1, &g_state.start); + } else if (name_len == 3 && strncmp(arg, "end", name_len) == 0) { + have_end = parse_u64_arg(eq + 1, &g_state.end); + } + } + + if (!have_start || !have_end || g_state.start >= g_state.end) { + fprintf(stderr, + "debug_trace_profiler: expected start=0x... and end=0x...\n"); + return -1; + } + + /* start/end are resolved outside the plugin from the target ELF symbol + * table. end should be computed as start + size from `nm -S` output, so + * the plugin can filter instructions with start <= PC < end. + */ + + qemu_plugin_register_vcpu_tb_trans_cb(id, tb_trans_cb); + qemu_plugin_register_atexit_cb(id, plugin_exit_cb, NULL); + return 0; +}