diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 17443ce..38da336 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -97,6 +97,9 @@ jobs: - name: Test (vcan) if: steps.vcan.outputs.available == 'true' run: cargo llvm-cov --no-report nextest --all-features --run-ignored ignored-only + # Run clippy twice - once with the 1.89 MSRV, and once with the latest stable toolchain + - name: Clippy + run: cargo clippy --no-deps --all-targets --all-features - name: Coverage report run: | cargo llvm-cov report --cobertura --output-path coverage.xml @@ -106,9 +109,6 @@ jobs: PERCENT="$(echo "($RATE * 100)/1" | bc)" echo "PERCENT=$PERCENT" echo "COVERAGE_PERCENT=$PERCENT" >> $GITHUB_ENV - # Run clippy twice - once with the 1.89 MSRV, and once with the latest stable toolchain - - name: Clippy - run: cargo clippy --no-deps --all-targets --all-features - name: Update coverage badge uses: schneegans/dynamic-badges-action@v1.7.0 if: github.ref_name == github.event.repository.default_branch @@ -125,6 +125,20 @@ jobs: valColorRange: ${{ env.COVERAGE_PERCENT }} minColorRange: 40 maxColorRange: 65 + - name: Setup nightly toolchain (ASAN) + uses: dtolnay/rust-toolchain@master + with: + toolchain: nightly + components: rust-src + - name: Test (ASAN) + env: + RUSTFLAGS: -D warnings -Zsanitizer=address + run: cargo +nightly nextest run -Zbuild-std --target x86_64-unknown-linux-gnu --all-features --no-tests=warn + - name: Test (ASAN, vcan) + if: steps.vcan.outputs.available == 'true' + env: + RUSTFLAGS: -D warnings -Zsanitizer=address + run: cargo +nightly nextest run -Zbuild-std --target x86_64-unknown-linux-gnu --all-features --run-ignored ignored-only # Canary job: verifies vcan is available on the runner. Shows yellow when the # linux-modules-extra package drifts from the runner kernel version, which means the socketcan diff --git a/Cargo.toml b/Cargo.toml index bb161d9..33f546b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,9 +11,10 @@ version = "0.1.0-rc0" edition = "2024" license = "MIT" rust-version = "1.89" -description = "Opinionated CAN utils written in Rust" +description = "Opinionated CAN utilities written in Rust" [workspace.dependencies] +assert_cmd = { version = "2.2.0", features = ["color-auto"] } ctor = "0.6" eyre = "0.6" gungraun = "0.17" @@ -21,5 +22,6 @@ io-uring = "0.7" libc = "0.2" neli = "0.7" tabled = "0.18" +tempfile = "3.27.0" tracing = "0.1" tracing-subscriber = "0.3" diff --git a/README.md b/README.md index dec4706..4215200 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ ![release workflow](https://github.com/Notgnoshi/candemonium/actions/workflows/release.yml/badge.svg?event=push) ![code coverage](https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/Notgnoshi/55f3f6cae2abdc5d011d907624dfb883/raw/can-utils-rs-coverage.json) -Opinionated CAN utils written in Rust. +Opinionated CAN utilties written in Rust. ## Purpose @@ -16,3 +16,9 @@ constraints. A modern-ish Linux with io_uring and socketcan available. A ~4 core ~1GHz arm64 CPU with 1GB memory and 4+ J1939 CAN networks. + +## Documentation + +* See [quickstart.md](/docs/developer/quickstart.md) for a developer quickstart +* See `docs/design/` for design documents +* See `docs/user/` for user documentation diff --git a/candumpr/Cargo.toml b/candumpr/Cargo.toml index 78554e3..08abbd4 100644 --- a/candumpr/Cargo.toml +++ b/candumpr/Cargo.toml @@ -13,12 +13,12 @@ ci = [] eyre.workspace = true io-uring.workspace = true libc.workspace = true +tracing.workspace = true [dev-dependencies] ctor.workspace = true gungraun.workspace = true tabled.workspace = true -tracing.workspace = true tracing-subscriber.workspace = true vcan-fixture = { path = "../vcan-fixture" } diff --git a/candumpr/benches/common/mod.rs b/candumpr/benches/common/mod.rs index b295715..fa4ed7e 100644 --- a/candumpr/benches/common/mod.rs +++ b/candumpr/benches/common/mod.rs @@ -1,6 +1,6 @@ use std::os::unix::io::{AsFd, BorrowedFd, OwnedFd}; use std::sync::Arc; -use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering}; use std::time::{Duration, Instant}; use candumpr::can::{self, CanFrame}; @@ -51,6 +51,38 @@ pub const BACKENDS: &[BackendDef] = &[ }, ]; +// --- Sequence checker --- + +fn frame_seq(frame: &CanFrame) -> u32 { + u32::from_le_bytes([frame.data[0], frame.data[1], frame.data[2], frame.data[3]]) +} + +struct SeqCheck { + expected: Vec, +} + +impl SeqCheck { + fn new(n: usize) -> Self { + Self { + expected: (0..n).map(|_| AtomicU32::new(0)).collect(), + } + } + + fn check(&self, idx: usize, frame: &CanFrame) { + let actual = frame_seq(frame); + let expected = self.expected[idx].load(Ordering::Relaxed); + if actual != expected { + tracing::warn!( + iface = idx, + received = actual, + expected = expected, + "out-of-sequence frame" + ); + } + self.expected[idx].store(actual.wrapping_add(1), Ordering::Relaxed); + } +} + // --- Backend run functions --- // // Single-threaded backends: wrap the backend's run() with getrusage_thread() before/after. @@ -58,12 +90,14 @@ pub const BACKENDS: &[BackendDef] = &[ // aggregate the deltas. fn run_dedicated(sockets: Vec, stop: Arc, count: &AtomicU64) -> (u64, Rusage) { + let seq = SeqCheck::new(sockets.len()); let backend = DedicatedRecv::new(sockets); let rusage = std::sync::Mutex::new(Rusage::default()); let total = backend .run_instrumented( stop, - &|_idx, _frame, _meta| { + &|idx, frame, _meta| { + seq.check(idx, frame); count.fetch_add(1, Ordering::Relaxed); }, &|_idx, inner| { @@ -78,10 +112,12 @@ fn run_dedicated(sockets: Vec, stop: Arc, count: &AtomicU64 } fn run_epoll(sockets: Vec, stop: Arc, count: &AtomicU64) -> (u64, Rusage) { + let seq = SeqCheck::new(sockets.len()); let mut backend = EpollRecv::new(sockets).unwrap(); let before = getrusage_thread(); let total = backend - .run(stop, &mut |_idx, _frame, _meta| { + .run(stop, &mut |idx, frame, _meta| { + seq.check(idx, frame); count.fetch_add(1, Ordering::Relaxed); }) .unwrap(); @@ -90,10 +126,12 @@ fn run_epoll(sockets: Vec, stop: Arc, count: &AtomicU64) -> } fn run_recvmmsg(sockets: Vec, stop: Arc, count: &AtomicU64) -> (u64, Rusage) { + let seq = SeqCheck::new(sockets.len()); let mut backend = RecvmmsgRecv::new(sockets).unwrap(); let before = getrusage_thread(); let total = backend - .run(stop, &mut |_idx, _frame, _meta| { + .run(stop, &mut |idx, frame, _meta| { + seq.check(idx, frame); count.fetch_add(1, Ordering::Relaxed); }) .unwrap(); @@ -102,10 +140,12 @@ fn run_recvmmsg(sockets: Vec, stop: Arc, count: &AtomicU64) } fn run_uring(sockets: Vec, stop: Arc, count: &AtomicU64) -> (u64, Rusage) { + let seq = SeqCheck::new(sockets.len()); let mut backend = UringRecv::new(sockets).unwrap(); let before = getrusage_thread(); let total = backend - .run(stop, &mut |_idx, _frame, _meta| { + .run(stop, &mut |idx, frame, _meta| { + seq.check(idx, frame); count.fetch_add(1, Ordering::Relaxed); }) .unwrap(); @@ -118,10 +158,12 @@ fn run_uring_multi( stop: Arc, count: &AtomicU64, ) -> (u64, Rusage) { + let seq = SeqCheck::new(sockets.len()); let mut backend = UringMultiRecv::new(sockets).unwrap(); let before = getrusage_thread(); let total = backend - .run(stop, &mut |_idx, _frame, _meta| { + .run(stop, &mut |idx, frame, _meta| { + seq.check(idx, frame); count.fetch_add(1, Ordering::Relaxed); }) .unwrap(); @@ -155,18 +197,23 @@ fn sender_loop( } frame_idx += 1; } + // Let the receiver drain in-flight frames before signaling stop. Several receivers use 100ms + // as a timeout to wake themselves up. This isn't a great design, but it's possible to drop + // frames, so I can't just say "run until all frames have been received". + std::thread::sleep(Duration::from_millis(110)); stop.store(true, Ordering::Relaxed); } fn make_frame(iface_idx: usize, frame_idx: u32) -> CanFrame { + let seq = frame_idx.to_le_bytes(); CanFrame::new( ((iface_idx as u32) << 8) | (frame_idx & 0xFF) | libc::CAN_EFF_FLAG, &[ + seq[0], + seq[1], + seq[2], + seq[3], iface_idx as u8, - frame_idx as u8, - 0xDE, - 0xAD, - 0xBE, 0xEF, 0xCA, 0xFE, diff --git a/candumpr/examples/dump.rs b/candumpr/examples/dump.rs new file mode 100644 index 0000000..382a63e --- /dev/null +++ b/candumpr/examples/dump.rs @@ -0,0 +1,72 @@ +//! Listen on CAN interfaces using the io_uring multishot backend and print received frames. +//! +//! Usage: uring_multi_dump [iface...] + +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; + +use candumpr::can::{self, CanFrame}; +use candumpr::recv::uring_multi::UringMultiRecv; + +fn main() -> std::io::Result<()> { + tracing_subscriber::fmt() + .with_writer(std::io::stderr) + .with_max_level(tracing::Level::DEBUG) + .init(); + + let ifaces: Vec = std::env::args().skip(1).collect(); + if ifaces.is_empty() { + eprintln!("usage: uring_multi_dump [iface...]"); + std::process::exit(1); + } + + let sockets: Vec<_> = ifaces + .iter() + .map(|name| can::open_can_raw(name)) + .collect::>()?; + + let mut backend = UringMultiRecv::new(sockets)?; + + let stop = Arc::new(AtomicBool::new(false)); + let stop2 = stop.clone(); + ctrlc(stop2); + + let total = backend.run(stop, &mut |idx, frame, _meta| { + print_frame(idx, frame); + })?; + + eprintln!("{total} frames received"); + Ok(()) +} + +fn print_frame(idx: usize, frame: &CanFrame) { + let id = frame.can_id & !libc::CAN_EFF_FLAG & !libc::CAN_RTR_FLAG & !libc::CAN_ERR_FLAG; + + print!("{idx} {id:08X} [{}]", frame.len); + for i in 0..frame.len as usize { + print!(" {:02X}", frame.data[i]); + } + println!(); +} + +/// Install a Ctrl-C handler that sets the stop flag. +fn ctrlc(stop: Arc) { + unsafe { + libc::signal( + libc::SIGINT, + signal_handler as *const () as libc::sighandler_t, + ); + } + // Leak the Arc into a raw pointer so the signal handler can access it. + STOP_FLAG.store(Arc::into_raw(stop) as *mut _, Ordering::Release); +} + +static STOP_FLAG: std::sync::atomic::AtomicPtr = + std::sync::atomic::AtomicPtr::new(std::ptr::null_mut()); + +extern "C" fn signal_handler(_sig: libc::c_int) { + let ptr = STOP_FLAG.load(Ordering::Acquire); + if !ptr.is_null() { + unsafe { &*ptr }.store(true, Ordering::Relaxed); + } +} diff --git a/candumpr/src/recv/uring_multi.rs b/candumpr/src/recv/uring_multi.rs index 9c2bafe..e1aaf27 100644 --- a/candumpr/src/recv/uring_multi.rs +++ b/candumpr/src/recv/uring_multi.rs @@ -5,10 +5,8 @@ //! Performance features: //! * SINGLE_ISSUER: skip internal synchronization (single-threaded use). //! * COOP_TASKRUN: prevent kernel from delivering task_work at arbitrary syscall boundaries. -//! * DEFER_TASKRUN: defer all completion processing to explicit submit_with_args calls. //! * Registered file descriptors: avoid per-operation fd lookup in the kernel. //! * Batched wakeups: submit_with_args(BATCH_SIZE) reduces wakeup frequency. -//! * Enlarged CQ ring: headroom for burst-induced multishot completions. //! //! Ancillary data: //! * Hardware timestamps (SCM_TIMESTAMPING) with software fallback. @@ -25,8 +23,10 @@ use io_uring::{IoUring, cqueue, opcode, types}; use crate::can::{self, CanFrame, FRAME_SIZE}; use crate::recv::{FrameMeta, Timestamp}; -/// Number of provided buffers in the ring. +/// Number of provided buffers (and CQ entries) in the ring. The CQ is sized to match so the +/// kernel can post one completion per buffer without overflow. Must be a power of two. const FRAMEBUF_COUNT: u16 = 256; +const _: () = assert!(FRAMEBUF_COUNT.is_power_of_two()); /// Buffer group ID for the provided buffer ring. io_uring supports multiple buffer rings /// identified by group ID; we only use one. @@ -37,10 +37,6 @@ const BGID: u16 = 0; /// when traffic is sparse. const BATCH_SIZE: usize = 4; -/// CQ ring size. With multishot recv, a single SQE can generate many CQEs in a burst. A larger CQ -/// ring prevents overflow (which terminates the multishot and forces resubmission). -const CQ_SIZE: u32 = 64; - /// Size of the `io_uring_recvmsg_out` header the kernel writes at the start of each provided /// buffer. This is a stable kernel ABI (4 x u32). const RECVMSG_OUT_HDR: usize = 16; @@ -96,8 +92,7 @@ impl UringMultiRecv { let ring = IoUring::builder() .setup_single_issuer() .setup_coop_taskrun() - .setup_defer_taskrun() - .setup_cqsize(CQ_SIZE) + .setup_cqsize(FRAMEBUF_COUNT as u32) .build(sq_size)?; // Register socket file descriptors so the kernel can skip per-op fd lookup. SQEs then use @@ -176,6 +171,10 @@ impl UringMultiRecv { let framebuf_base = self.framebuf_ring_ptr as *mut BufRingEntry; let mask = FRAMEBUF_COUNT - 1; + // Sockets whose multishot terminated but could not be resubmitted because the SQ was full. + // Retried at the top of each loop iteration after submit drains the SQ. + let mut pending_resubmit: Vec = Vec::new(); + // Template msghdr for RecvMsgMulti. The kernel uses msg_namelen and msg_controllen to // determine the layout within each provided buffer. Must remain at a stable address for // the lifetime of the multishot SQEs (i.e., until this function returns). @@ -202,10 +201,19 @@ impl UringMultiRecv { Err(e) => return Err(e), } + // Retry any multishot resubmissions that failed on a previous iteration because the + // SQ was full. The submit_with_args above drained the SQ, so there should be room now. + pending_resubmit.retain(|&idx| { + let entry = opcode::RecvMsgMulti::new(types::Fixed(idx as u32), &msghdr, BGID) + .build() + .user_data(idx as u64); + unsafe { self.ring.submission().push(&entry) }.is_err() + }); + // Drain CQEs into a stack buffer, then process. This avoids heap allocation while // releasing the borrow on the completion queue before we need to touch the submission // queue or buffer ring. - let mut cqe_buf = [(0u64, 0i32, 0u32); CQ_SIZE as usize]; + let mut cqe_buf = [(0u64, 0i32, 0u32); FRAMEBUF_COUNT as usize]; let mut cqe_count = 0; for cqe in self.ring.completion() { cqe_buf[cqe_count] = (cqe.user_data(), cqe.result(), cqe.flags()); @@ -216,9 +224,12 @@ impl UringMultiRecv { let idx = ud as usize; if result < 0 { - let err = std::io::Error::from_raw_os_error(-result); - if err.raw_os_error() != Some(libc::ECANCELED) { - return Err(err); + let err_code = -result; + // ECANCELED: normal shutdown (SQE cancelled). + // ENOBUFS: provided buffer ring exhausted; multishot terminated. The + // resubmission logic below will restart it once buffers are returned. + if err_code != libc::ECANCELED && err_code != libc::ENOBUFS { + return Err(std::io::Error::from_raw_os_error(err_code)); } } else if let Some(buf_id) = cqueue::buffer_select(flags) { let buf_offset = buf_id as usize * BUF_ENTRY_SIZE; @@ -244,7 +255,9 @@ impl UringMultiRecv { let entry = opcode::RecvMsgMulti::new(types::Fixed(idx as u32), &msghdr, BGID) .build() .user_data(ud); - unsafe { self.ring.submission().push(&entry) }.ok(); + if unsafe { self.ring.submission().push(&entry) }.is_err() { + pending_resubmit.push(idx); + } } } } diff --git a/docs/design/01-candumpr-ux.md b/docs/design/01-candumpr-ux.md deleted file mode 100644 index db1e937..0000000 --- a/docs/design/01-candumpr-ux.md +++ /dev/null @@ -1,386 +0,0 @@ -# candumpr UX - -## Status - -**DRAFT** - -## Scope - -This document defines the user-facing features, CLI interface, and configuration file format for -candumpr, a CAN bus logging tool. It does not cover internal implementation details. - -candumpr is an opinionated replacement for can-utils `candump`, focused on J1939 networks. It -prioritizes performance and multi-network support at the cost of broader CAN compatibility. - -A primary design goal is lossless capture: candumpr should never drop a CAN frame under normal -operating conditions, including during log file rotation. Every frame that the kernel delivers to -the socket should appear in the output. - -An additional convenience is to optionally send a J1939 address claim PGN request to ensure that the -CAN logs include address claims for every control function near the beginning of every log. - -## Features - -### Frame support - -* Only supports CAN with 29-bit extended (J1939) identifiers. -* CAN FD and CAN XL are not supported. -* Error frames are supported and logged alongside data frames. - -### Multi-interface logging - -* Supports logging from an arbitrary number of CAN interfaces simultaneously. -* Each interface can be independently configured with its own filters and settings. -* Interfaces can be specified on the CLI, in a TOML config file, or both. - -### Filtering - -Two filtering mechanisms are supported. Both can be used together. - -**candump-compatible mask filters** are specified per-interface using the same syntax as candump: - -* `id:mask` -- positive match (accept when `received_id & mask == id & mask`) -* `id~mask` -- inverse match (accept when `received_id & mask != id & mask`) -* `#error_mask` -- error frame class filter (see `linux/can/error.h`) - -All values are hexadecimal. Multiple filters are comma-separated after the interface name. Appending -`j` or `J` to the filter list switches that interface from OR to AND semantics (same as candump). - -**Convenience filters** provide a more ergonomic way to filter J1939 traffic. These are specified in -the TOML config file: - -* Filter by PGN (Parameter Group Number) -* Filter by source address -* Future work: filter by ISONAME + mask -* Toggle error frame capture on or off - -Convenience filters are compiled to socket-level `id:mask` filters internally. - -When no filters are specified, all traffic is accepted. - -#### Filter combination semantics - -When multiple filters are specified on the same interface (whether candump-style masks, convenience -filters, or both), they are combined with OR by default: a frame is accepted if it matches any -filter. - -To switch to AND semantics (a frame must match all filters): - -* On the CLI, append `j` to the candump-style filter list (e.g., `can0,...,j`) -* In the TOML config, set `filter_join = "and"` on the interface or in `[defaults]` - -Both map to the `CAN_RAW_JOIN_FILTERS` socket option. - -### Output formats - -candumpr supports multiple output formats, configurable per-interface: - -* **candump** (`.log`) -- default -- the can-utils `candump -L` log file format: - `(1345212884.318850) can0 18FECA00#0011223344556677` -* **candump-tty** (`.log`) -- the can-utils `candump` console format: - `can0 18FECA00 [8] 00 11 22 33 44 55 66 77` -* **ASC** (`.asc`) -- Vector ASCII logging format, compatible with CANalyzer/CANoe and other tools - that import ASC files. -* **PCAP** (`.pcap`) -- packet capture format, compatible with Wireshark and tcpdump. - -When compressed, an additional `.zst` suffix is appended (e.g., `.log.zst`, `.asc.zst`). - -### Timestamps - -Timestamp mode controls how frame timestamps are displayed in candump and candump-tty output -formats. ASC and PCAP use their native timestamp conventions and ignore this setting. - -* **absolute** -- seconds since epoch with fractional seconds -* **delta** -- time elapsed since the previous received frame -* **zero** -- time elapsed since the first received frame - -Hardware timestamps from the CAN controller are used automatically when available, falling back to -kernel software timestamps with a diagnostic warning. This requires no configuration. - -### Clock correctness - -candumpr is designed to start early in the boot cycle on IoT devices that may lack a persistent RTC. -On these devices, `CLOCK_REALTIME` can be invalid (near epoch) until NTP or another time source -synchronizes it. - -candumpr will provide options to control how it detects an invalid clock and what it does with -frames captured before the clock becomes valid. Detection methods include a heuristic (is the clock -before a reasonable threshold?) and waiting for a clock step event. Behaviors may include dropping -frames, queueing them in memory, using zero-based timestamps, inserting a marker, or rotating the -log file when the clock becomes valid. The available behaviors may depend on the output format. - -One strategy for clock correctness is to give each log file a strictly monotonic incrementing index. -Then at least you can tell the order of the files. candumpr should also attempt to detect and log -clock jumps to stderr so that they're less surprising if you have to reverse engineer what the clock -did by looking at strictly just the logs. - -This feature requires dedicated detailed design and is not fully specified here. - -### File logging and rotation - -When logging to files, each monitored interface writes to its own log file. This applies even when -using the `any` interface binding; frames are separated by their source interface, and `{interface}` -resolves to the actual interface name (e.g., `can0`), not `any`. - -* Log filenames are controlled by a format string with placeholders: - * `{interface}` -- the source interface name (e.g., `can0`) - * `{start-unix}` -- Unix seconds when the log file was opened (e.g., `1741868400`) - * `{start-iso}` -- ISO 8601 timestamp when the log file was opened, without colons (e.g., - `2026-03-13T120000Z`), since colons break rsync and some filesystems. - * Default format: `candumpr-{interface}-{start-unix}` (plus the appropriate file extension). -* The log directory path supports the same `{interface}` placeholder, allowing per-interface - directory organization (e.g., `/var/log/candumpr/{interface}/`). -* If the resolved file path (directory + name + extension) would be identical for two or more - interfaces, candumpr exits with a configuration error. Disambiguation can be achieved by including - `{interface}` in the filename or directory path, or by setting different `log_dir` values - per-interface. -* File rotation can be triggered by: - * A time interval (e.g., `1h`, `30m`) - * A file size threshold (e.g., `50MB`, `1GB`) - * The value is unambiguous: size units (`B`, `KB`, `MB`, `GB`) and time units (`s`, `m`, `h`, `d`) - do not overlap. Bare integers without a unit suffix are rejected. - * SIGHUP is always available for manual rotation regardless of the configured method. -* During rotation, no frames are lost. Buffered frames are flushed to the old file before the new - file begins. -* Completed log files are never partially written. Files are written to a temporary name and renamed - atomically on completion. -* ZSTD streaming compression is optionally applied during writing. -* Buffered output is flushed to disk periodically (configurable interval) to limit data loss on - unexpected power loss or crash. - -When not logging to files, output goes to stdout. - -### Log retention - -When logging to files, candumpr can automatically prune old log files to prevent unbounded disk -usage. - -* **max_total_size** -- maximum total size of all completed log files across all interfaces (e.g., - `10GB`). When exceeded, the oldest completed log files are deleted regardless of which interface - produced them. Retention is checked after each log rotation. - -### J1939 address claim - -On startup and after each log rotation, candumpr can optionally broadcast a J1939 Address Claim PGN -request. This causes all devices on the bus to re-announce their addresses, ensuring each log file -contains a complete picture of which source addresses are in use. - -### Statistics - -Per-interface statistics counters are maintained and can be reported: - -* Frame count (total and per-second) -* Byte count and estimated bitrate -* Dropped frame count (frames lost due to socket buffer overflow) - -Dropped frame monitoring is always enabled. - -### Socket configuration - -* The socket receive buffer size can be configured per-interface. The tool attempts `SO_RCVBUFFORCE` - first (requires `CAP_NET_ADMIN`) and falls back to `SO_RCVBUF`. - -### Device resilience - -* If a monitored CAN interface goes down, candumpr continues running and resumes logging when the - interface comes back up. This is the default and only behavior (unlike candump, which exits by - default). - -### Signal handling - -* **SIGHUP** -- trigger log file rotation -* **SIGTERM / SIGINT** -- graceful shutdown (flush buffers, finalize current log file) - -### Diagnostic logging - -Operational events are logged to stderr via `tracing`: - -* Dropped frames (socket buffer overflow) -* Bus-off state changes and recovery -* Network interface up/down events -* Startup and shutdown status -* Log file rotation events - -This keeps CAN data output (stdout or log files) clean, while ensuring operational issues are -visible. The log level can be set via `--log-level` on the CLI, `log_level` in the TOML config, or -the `CANDUMPR_LOG` environment variable (in `EnvFilter` format). The environment variable takes -precedence when set. - -### Display options (stdout only) - -When outputting to a TTY: - -* Color mode (`--color`): - * `never` -- no color or styling - * `network` -- each interface gets a distinct color applied to the entire line, to visually - distinguish traffic from different networks - * `highlight` -- use color and weight to improve readability: the interface name and timestamp are - colored, and data bytes alternate between bold and normal weight to make it easier to visually - parse byte boundaries -* TX/RX direction is always shown for each frame. - -## CLI interface - -``` -candumpr [OPTIONS] [INTERFACE[,FILTER]...] -``` - -### Positional arguments - -Interfaces are specified as positional arguments, optionally followed by comma-separated -candump-compatible filters. The special name `any` receives from all CAN interfaces (same as -candump): - -```sh -# Listen on all CAN interfaces that are up -candumpr any - -# No filters (accept all traffic on both interfaces) -candumpr can0 can1 - -# candump-compatible mask filters -candumpr can0,18FECA00:1FFFFFFF can1,18FEE500:1FFFFFFF - -# Inverse match -candumpr can0,18FECA00~1FFFFFFF - -# Error frame filter -candumpr can0,#FFFFFFFF - -# Join filters with AND semantics (must match all) -candumpr can0,18FECA00:1FFF0000,00000017:000000FF,j -``` - -### Options - -#### Configuration - -| Flag | Description | -| --------------------- | -------------------------------------------- | -| `-C, --config ` | Path to a TOML configuration file | -| `--log-level ` | Diagnostic log level (e.g., `info`, `debug`) | - -CLI flags apply globally to every interface. Per-interface configuration, filtering, file logging -options (directory, naming, rotation, compression, retention), and socket tuning require a TOML -config file. Interfaces specified on the CLI are merged with interfaces in the config file. - -#### Output format - -| Flag | Description | -| ------------------------ | ------------------------------------------------------------------ | -| `-f, --format ` | Output format: `candump`, `candump-tty`, `asc`, `pcap` | -| `-t, --timestamp ` | Timestamp mode: `absolute`, `delta`, `zero` (candump formats only) | -| `-c, --color ` | Color mode: `never`, `network`, `highlight` | - -#### File logging - -| Flag | Description | -| ---- | ------------------------------------------------------- | -| `-l` | Log to files in the current directory (default: stdout) | - -#### J1939 - -| Flag | Description | -| ----------------------- | -------------------------------------------------------- | -| `-A`, `--address-claim` | Send address claim request on startup and after rotation | - -#### Termination - -| Flag | Description | -| -------------------- | -------------------------------------------------------- | -| `-n, --count ` | Exit after receiving n frames | -| `-T, --timeout ` | Exit if no frames received within this many milliseconds | - -## TOML configuration file - -The `[defaults]` section provides default values for all interface settings. Individual -`[interfaces.]` sections can override any default. All fields are optional at every level. - -```toml -log_level = "info" # diagnostic log level for stderr output -# All logs together must stay below this limit -max_total_size = "10GB" - -[defaults] -# Output -format = "candump" # "candump" | "candump-tty" | "asc" | "pcap" -timestamp = "absolute" # "absolute" | "delta" | "zero" (candump formats only) -color = "highlight" # "never" | "network" | "highlight" - -# File logging -log_dir = "/var/log/candumpr" # supports {interface} placeholder -log_name = "candumpr-{interface}-{start-unix}" # placeholders: {interface}, {start-unix}, {start-iso} -rotate = "1h" # time or size based rotation -compress = "none" # "zstd" | "none" -zstd_level = 3 -flush_interval = "5s" - -# Filtering -error_frames = true -pgns = [] -source_addresses = [] -filter_join = "or" # "and" | "or" - -# Socket -recv_buffer = "2MB" - -# J1939 -address_claim = true - -# --- Per-interface overrides --- - -# Inherits all [defaults], overrides nothing: -[interfaces.can0] -# Overrides specific settings: -[interfaces.can1] -error_frames = false -pgns = [0xFECA, 0xFEE5] - -[interfaces.can2] -address_claim = false -source_addresses = [0x00, 0x17] -log_dir = "/var/log/candumpr/can2" - -# candump-compatible raw filters: -[interfaces.can3] -filters = ["18FECA00:1FFFFFFF", "18FEE500~1FFFF00"] - -# AND semantics for all filters on this interface: -[interfaces.can4] -pgns = [0xFECA] -source_addresses = [0x17] -filter_join = "and" -``` - -### Precedence - -Settings are resolved in this order, highest priority first: - -1. CLI flags -2. TOML `[interfaces.]` -3. TOML `[defaults]` -4. Built-in defaults - -For settings available on the CLI, CLI flags apply globally and override all other sources, -including per-interface TOML settings. For example, `--format pcap` forces that format on every -interface. Most settings are only available through the TOML config file. - -List-valued options (`pgns`, `source_addresses`, `filters`) are replaced wholesale at each -precedence level, not merged. For example, if `[defaults]` sets `pgns = [0xFECA, 0xFEE5]` and -`[interfaces.can0]` sets `pgns = [0xFECA]`, then `can0` uses only `[0xFECA]`. - -### Interface discovery - -Interfaces to monitor are the union of: - -* Interfaces named on the CLI -* Interfaces listed in `[interfaces]` in the config file - -The special name `any` is specified on the CLI only (`candumpr any`). It binds to all CAN -interfaces, including interfaces that come up after candumpr has started. Using `any` and named -interfaces together is a configuration error, since the `any` binding would duplicate frames from -explicitly-bound interfaces. When using `any`, settings come from `[defaults]` (and CLI flags). - -Even when using `any`, log files are written per source interface (not a single combined file). - -At least one interface must be specified. diff --git a/docs/design/01-testing-strategy.md b/docs/design/01-testing-strategy.md new file mode 100644 index 0000000..61f3462 --- /dev/null +++ b/docs/design/01-testing-strategy.md @@ -0,0 +1,70 @@ +# Testing strategy + +## Status + +**IMPLEMENTED** + +## Scope + +This document describes the mechanisms available for writing tests around the utilities in this +project that depend on Linux socketcan interfaces that require either real hardware or elevated +permissions to create. + +## Problem + +These utilities interact directly with CAN sockets. Testing requires CAN interfaces, but: + +* Real CAN hardware is not available in CI. +* Virtual CAN (vcan) interfaces require `CAP_NET_ADMIN` to create. +* vcan interfaces are system-global resources, so parallel tests using shared interfaces cause + interference. +* Tests must run in CI (GitHub Actions) and locally without requiring root. + +## Solution: user + network namespaces + +Each test process enters its own namespace using `unshare(CLONE_NEWUSER | CLONE_NEWNET)`. Inside the +namespace, the process has `CAP_NET_ADMIN` without real root privileges, vcan interfaces are private +and isolated, and everything is cleaned up when the process exits. See the +[vcan-fixture](/vcan-fixture/src/lib.rs) crate for the implementation. + +Depending on your system (Fedora 42 doesn't need the following, but Ubuntu 24.04 does), you may need +to disable the following apparmor setting: + +```sh +sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 +``` + +Constraint: `unshare(CLONE_NEWUSER)` requires a single-threaded process. The Rust test harness is +multi-threaded, so namespace entry needs to happen in a `ctor` constructor before `main()`. + +```rust +#[ctor::ctor] +fn setup() { + tracing_subscriber::fmt() + .with_test_writer() + .init(); + vcan_fixture::enter_namespace(); +} +``` + +## CI + +Tests that require vcan use `#[cfg_attr(feature = "ci", ignore = "requires vcan")]`. In CI, +`--all-features` enables the `ci` feature, making them `#[ignore]`. They are then run as a separate +step gated on whether vcan setup succeeded. + +A separate canary job (`vcan-available`) with `continue-on-error: true` fails with a warning status +when the vcan module is unavailable on the runner, rather than silently skipping the tests. This +makes it visible in the PR workflow status when vcan isn't available, but doesn't prevent merging +for infrastructure reasons outside of my control (I've ready about `linux-modules-extra` not always +matching the runner kernel version). + +See [lint.yml](/.github/workflows/lint.yml) for the implementation. + +## Benchmarking + +There are additional utilities in the `vcan_fixture::bench` module for + +* Querying current thread and process resource usage +* Pinning the current process to N CPU cores +* Starting a PWM-like busyloop thread to approximate P% CPU usage over N threads diff --git a/docs/design/02-candumpr-filters.md b/docs/design/02-candumpr-filters.md deleted file mode 100644 index 264cafe..0000000 --- a/docs/design/02-candumpr-filters.md +++ /dev/null @@ -1,16 +0,0 @@ -# candumpr filter syntax and semantics - -## Status - -**TODO** - -## Scope - -This document specifies the filter syntax and semantics for candumpr, covering: - -* candump-compatible `id:mask` and `id~mask` filter syntax -* Error frame class filters (`#error_mask`) -* Convenience filters (PGN, source address) -* How convenience filters compile to kernel-level `CAN_RAW_FILTER` entries -* Filter combination semantics (OR vs AND, `CAN_RAW_JOIN_FILTERS`) -* Interaction between candump-style and convenience filters on the same interface diff --git a/docs/design/03-candumpr-clock-correctness.md b/docs/design/03-candumpr-clock-correctness.md deleted file mode 100644 index 1144fb4..0000000 --- a/docs/design/03-candumpr-clock-correctness.md +++ /dev/null @@ -1,16 +0,0 @@ -# candumpr clock correctness - -## Status - -**TODO** - -## Scope - -This document specifies how candumpr handles unreliable system clocks, covering: - -* Detection of an invalid `CLOCK_REALTIME` (heuristic threshold, clock step events) -* Behavior for frames captured before the clock becomes valid -* Monotonic file indexing to preserve ordering independent of wall clock -* Clock jump detection and diagnostic logging -* Interaction with log file rotation and timestamps -* Interaction with output formats (candump, ASC, PCAP) that embed timestamps diff --git a/docs/design/04-candumpr-architecture.md b/docs/design/04-candumpr-architecture.md deleted file mode 100644 index da4209c..0000000 --- a/docs/design/04-candumpr-architecture.md +++ /dev/null @@ -1,363 +0,0 @@ -# candumpr architecture - -## Status - -**TODO** - -## Scope - -This document specifies the internal architecture of candumpr, covering the threading model, I/O -strategy, and the mechanisms used to achieve lossless capture. It does not cover user-facing -features or CLI/config design (see [01-candumpr-ux](01-candumpr-ux.md)). - -## Target environment - -A modern-ish Linux with io_uring and socketcan available. A ~4 core ~1 GHz arm64 CPU with 1 GB -memory and 4+ J1939 CAN networks. - -## Design goal: never drop a frame - -TODO: Define what "never drop" means precisely. Kernel socket buffer overflow is the primary -mechanism for frame loss. Describe the end-to-end path from kernel socket buffer to flushed bytes on -disk, and identify every point where frames could be lost or delayed. - -## Design goal: lowest system impact - -TODO: I'm targeting using candumpr to log traffic from 4 500kbaud CAN networks on a 4 core system -responsible for other application concerns. The logging is not the purpose of the system, it's a -troubleshooting enabler. I'm after a solution with the minimal system performance impact. - -## Design goal: long-running CAN logging daemon to facilitate troubleshooting - -I want a long-running daemon to log all CAN traffic to facilitate future field issues. That means: - -* Address claim PGN requests upon startup and rotation -* Log rotation policy -* Log retention policy -* Configuration -* Usability with other tools (e.g., .pcap files with Wireshark) - -## Option 1: dedicated thread pairs - -One recv thread and one write thread per interface. The recv thread reads frames from the socket and -passes them to its paired write thread over a channel. The write thread handles formatting, -compression, and file I/O. - -TODO: Describe how io_uring fits in (recv side, write side, or both). Describe the channel type and -backpressure strategy. Describe how log rotation and SIGHUP are coordinated between the two threads. - -## Option 2: shared threads - -A small number of shared recv threads and shared write threads, rather than a dedicated pair per -interface. This may be a better fit for the target environment of 4 ARM cores with 4+ interfaces, -where dedicating 2 threads per interface would oversubscribe the CPU. - -TODO: Describe the multiplexing strategy (io_uring multishot recv, epoll, etc.). Describe how write -work is distributed. Describe how this interacts with per-interface file handles, rotation, and -compression state. - -## Back-of-the-napkin math - -### Frame rate - -A CAN 2.0B extended frame (29-bit ID) with an 8-byte payload uses the following bits on the wire, -assuming zero bitstuffing: - -| Field | Bits | -| -------------- | ------: | -| SOF | 1 | -| Base ID | 11 | -| SRR | 1 | -| IDE | 1 | -| Extended ID | 18 | -| RTR | 1 | -| r1 (reserved) | 1 | -| r0 (reserved) | 1 | -| DLC | 4 | -| Data (8 bytes) | 64 | -| CRC | 15 | -| CRC delimiter | 1 | -| ACK slot | 1 | -| ACK delimiter | 1 | -| EOF | 7 | -| IFS | 3 | -| **Total** | **131** | - -Reference: linux-can/can-utils -[canframelen.c](https://github.com/linux-can/can-utils/blob/master/canframelen.c) computes -`(eff ? 67 : 47) + len * 8` for the no-bitstuffing case (CFL_NO_BITSTUFFING). The worst-case -bitstuffing formula from -[canframelen.h](https://github.com/linux-can/can-utils/blob/master/canframelen.h) is `80 + 10 * len` -bits for extended frames. IFS (3 bits) is included in both formulas. - -Zero bitstuffing is the worst case for frame rate: fewer bits per frame means more frames per -second. J1939 always uses 29-bit extended IDs and 8-byte payloads. - -At 500 kbaud with zero bitstuffing: 500,000 / 131 = **3816 frames/sec per bus**. - -| Scenario | Frames/sec | -| ------------------------------- | ---------: | -| 1 bus, no bitstuffing | 3,816 | -| 1 bus, worst-case bitstuffing | 3,125 | -| 4 buses, no bitstuffing | 15,264 | -| 4 buses, worst-case bitstuffing | 12,500 | - -We'll proceed assuming 3,816 frames/sec, acknowledging that this would be 100% busload, which -doesn't happen in practice. - -### Per-frame recv cost - -The recv path for a single frame (ignoring formatting and write): - -1. The kernel delivers the frame to the socket buffer via interrupt. This cost is the same across - all backends. -2. The receiver wakes (or discovers a new CQE) and reads the frame. -3. The on_frame callback runs. - -The distinguishing cost is step 2: how many syscalls the receiver makes per frame, and how much -userspace work each backend does. - -| Approach | Syscalls/frame | Notes | -| ------------------------- | -------------: | -------------------------------------------------- | -| Dedicated (blocking read) | 1 | One `read()` per frame per thread | -| epoll + read | 1-2 | `epoll_wait` + `read`; drain loop adds EAGAIN read | -| epoll + recvmmsg | <1 | Batched reads reduce per-frame count | -| io_uring single-shot | ~1 | `submit_and_wait` both submits and collects | -| io_uring multishot | <1 | Multiple CQEs per `submit_and_wait`; no resubmit | - -For the multiplexed backends, frames arriving on multiple sockets within the same -`epoll_wait`/`submit_and_wait` window are serviced in a single wakeup. At 3816 fps per bus across 4 -buses, the mean inter-arrival across all sockets is ~65us, so overlapping arrivals are common. - -On a 1 GHz ARM64 processor, a syscall round trip (userspace to kernel and back) takes roughly 2-5us -depending on kernel mitigations and the specific operation. Using 3us as a rough estimate: - -| Approach | Est. recv CPU (4 buses, 3816 fps each) | Threads | -| ------------------ | -------------------------------------: | ------: | -| Dedicated | ~6% of one core | 4 | -| epoll + read | ~4-6% of one core | 1 | -| io_uring multishot | ~2% of one core | 1 | - -These cover only recv syscall overhead, excluding on_frame processing, scheduling, and cache -effects. - -### Context switches - -Each sleep/wake cycle is a voluntary context switch. The scheduling cost itself is small (~1-5 us), -but each wakeup pollutes L1/L2 caches, affecting co-resident applications. - -| Approach | Threads | Est. context switches/sec (4 buses) | -| ------------------------------------ | ------: | ----------------------------------: | -| Dedicated | 4 | Up to 15,264 | -| Multiplexed (one frame per wakeup) | 1 | Up to 15,264 | -| Multiplexed (multiple frames/wakeup) | 1 | Fewer; depends on arrival overlap | - -The dedicated backend also incurs involuntary context switches when its recv threads compete with -application threads for cores. - -### Plausible bottlenecks - -1. **Context switches and cache pollution.** At worst-case rates, the receiver wakes up to ~15,000 - times/sec. Each wakeup pollutes L1/L2 caches, affecting co-resident applications. This is likely - the dominant source of system impact, since the raw CPU cost for recv is small (2-6% of one - core). - -2. **Socket buffer overflow.** The CAN_RAW receive buffer capacity depends on `rmem_max` and sk_buff - overhead; on a typical system it may hold only a few hundred frames. At 3816 fps, even a buffer - of 200 frames fills in ~52 ms if the receiver stalls. Any write-path backpressure lasting longer - causes frame loss. - -3. **Thread oversubscription (dedicated only).** With 4 recv threads on a 4-core system, the recv - threads alone use all available cores before accounting for write threads or other application - work. Involuntary preemption increases and cache efficiency drops. - -4. **Write path stalls (out of scope for recv benchmarks).** The recv path must drain the socket - buffer faster than frames arrive, even when the write path stalls for disk I/O or log rotation. - The socket buffer depth sets the maximum tolerable stall. - - candumpr will use a dedicated write thread and another larger and growable frame queue to - mitigate this. This is an important choice, because we can't arbitrarily grow the recvbuf, - there's a maximum limit. - -## Benchmarking strategy - -### Goals - -The recv benchmarks answer three questions: - -1. **Multiplexed vs. dedicated.** Does a single-threaded multiplexed receiver have lower system - impact than one-thread-per-socket on a 4-core system with 4 CAN interfaces? - -2. **Which multiplexed backend.** Among epoll + read, epoll + recvmmsg, io_uring single-shot, and - io_uring multishot with provided buffers: which has the lowest per-frame overhead and the fewest - context switches? - -3. **Where to optimize.** What is the per-frame instruction cost, and where are the hot spots? - -### Test environment - -All benchmarks run on vcan interfaces inside an isolated user + network namespace (via -`unshare(2)`). This eliminates the need for root or hardware CAN interfaces. - -vcan delivers frames as fast as possible synchronously through the kernel's loopback path, with no -bus timing or contention. This is appropriate for measuring recv path overhead in isolation. Results -should be validated on the target hardware before making final decisions. - -### Benchmark A: per-frame instruction cost - -**Purpose.** Measure userspace instruction overhead per frame for each backend. Identify which code -paths dominate the per-frame cost and where optimization effort should focus. - -**Method.** Callgrind-based profiling via gungraun. - -**Setup.** - -1. Create 4 vcan interfaces. -2. Open one TX and one RX socket per interface. -3. Pre-send frames into the RX socket buffers. The number per interface is limited by the kernel - socket receive buffer (constrained by `rmem_max`, which cannot be raised inside a user - namespace). The benchmark should determine the usable capacity at runtime and fill to that limit. -4. Start the profiled region. -5. Run the backend to drain all frames. -6. End the profiled region. - -Pre-filling rather than concurrent sending ensures that the profiled region contains only recv work -and that the send cost is identical across backends. - -**Callback fairness.** The on_frame callback must have identical cost across all backends. The -dedicated backend runs multiple threads, so its counting mechanism needs to be thread-safe. Each -dedicated recv thread should count frames in a thread-local variable (not a shared atomic) to avoid -penalizing it with synchronization overhead that belongs to the test harness, not the backend. The -single-threaded backends should use the same local-variable approach. - -**Metrics.** - -* Instructions per frame -* L1 data cache miss rate -* Branch misprediction rate - -**Limitations.** Callgrind counts userspace instructions only. For io_uring backends, kernel-side -CQE processing and buffer ring management are not captured. Treat io_uring instruction counts as a -lower bound that excludes kernel work. - -### Benchmark B: steady-state system impact - -**Purpose.** Measure the receiver's CPU time and scheduling overhead under sustained load at -realistic CAN frame rates. - -**Method.** Concurrent senders and receiver. Collect per-thread resource usage for the receiver -only. - -**Setup.** - -1. Create vcan interfaces in an isolated namespace. -2. Spawn one sender thread per interface. Senders pace frames at the target rate using sleep-based - timing (`clock_nanosleep` with `TIMER_ABSTIME`). Do not use busy-spin pacing; it burns CPU and - contaminates resource measurements. -3. Run the backend under test on the receiver thread. -4. Collect resource usage via `getrusage(RUSAGE_THREAD)` on the receiver thread before and after the - run. For the dedicated backend, collect `RUSAGE_THREAD` from each sub-thread and aggregate. -5. A timer thread stops all threads after the run duration. - -**Metric isolation.** Using `RUSAGE_THREAD` rather than `RUSAGE_SELF` excludes sender threads, the -timer thread, and all other process-level overhead from the measurements. At realistic frame rates, -the receiver's CPU contribution is small and would be invisible in a process-wide measurement. - -**Test matrix.** - -| Parameter | Values | -| -------------------- | ---------------------------------------------- | -| Backends | dedicated, epoll, recvmmsg, uring, uring_multi | -| Interfaces | 1, 2, 4 | -| Rate (per interface) | 1000 fps, 2000 fps, 4000 fps | -| Duration | 8 seconds | -| Repetitions | 4, report median by receiver kernel time | -| Core constraint | Use `taskset -c 0-3` to limit to 4 cores | - -5 backends x 3 interface counts x 3 rates = 45 configurations. At 4 repetitions and 8 seconds per -run, a full sweep takes roughly 24 minutes. - -**Metrics (per run).** - -| Metric | Source | Purpose | -| ------------------------ | ------------------------- | ------------------------------ | -| Receiver user CPU (ms) | RUSAGE_THREAD `ru_utime` | Userspace processing cost | -| Receiver kernel CPU (ms) | RUSAGE_THREAD `ru_stime` | Kernel time for recv syscalls | -| Receiver voluntary csw | RUSAGE_THREAD `ru_nvcsw` | Sleep/wake frequency | -| Receiver involuntary csw | RUSAGE_THREAD `ru_nivcsw` | Preemption frequency | -| Frames sent | Sender counter | Confirms rate pacing accuracy | -| Frames received | Receiver counter | Confirms lossless capture | -| Frame loss % | (sent - recv) / sent | Must be 0% at all tested rates | - -**Rate pacing accuracy.** At 4000 fps per interface, the inter-frame interval is 250us. -`clock_nanosleep` with absolute timestamps should achieve this within a few microseconds of jitter. -Verify that the actual sent count matches the expected count (rate x duration) within 1%. - -### Benchmark C: recv under CPU contention - -**Purpose.** Determine which backend is most resilient to frame loss when the system is under CPU -pressure from other workloads. candumpr is an ancillary concern on the target system; the primary -application may consume most of the available CPU, and the recv backend must survive this without -dropping frames. - -**Method.** Run benchmark B's send/recv setup alongside a synthetic CPU load on the same cores. -Measure frame loss at different contention levels. - -**Setup.** - -1. Create 4 vcan interfaces in an isolated namespace. -2. Start a CPU load generator on the same cores as the benchmark. Use - `stress-ng --cpu 4 --cpu-load P --taskset 0-3` where P is the target load percentage. Each worker - duty-cycles between burning and sleeping to approximate P% utilization per core. -3. Run the send/recv harness from benchmark B (sleep-paced senders, receiver, timer) on the same - cores. -4. Collect the same per-thread metrics as benchmark B, plus frame loss. - -**Test matrix.** - -| Parameter | Values | -| --------------- | ---------------------------------------------- | -| Backends | dedicated, epoll, recvmmsg, uring, uring_multi | -| Interfaces | 4 | -| Rate | 4000 fps per interface | -| CPU contention | 75%, 95% | -| Duration | 8 seconds | -| Repetitions | 4, report median by frame loss % | -| Core constraint | `taskset -c 0-3` | - -5 backends x 2 contention levels = 10 configurations. At 4 repetitions and 8 seconds per run, a full -sweep takes roughly 6 minutes. - -**Metrics (per run).** - -| Metric | Source | Purpose | -| ------------------------ | ------------------------- | ------------------------------------ | -| Frame loss % | (sent - recv) / sent | Primary: resilience under contention | -| Receiver user CPU (ms) | RUSAGE_THREAD `ru_utime` | How much CPU the receiver got | -| Receiver kernel CPU (ms) | RUSAGE_THREAD `ru_stime` | Kernel time under contention | -| Receiver voluntary csw | RUSAGE_THREAD `ru_nvcsw` | Wakeup frequency under pressure | -| Receiver involuntary csw | RUSAGE_THREAD `ru_nivcsw` | How often the receiver was preempted | - -**What to look for.** At 75% contention, all backends should remain lossless (the receiver needs -only 2-6% of one core). At 95%, some backends may start dropping frames. The interesting result is -the relative degradation: a backend that degrades gradually (small loss %) is preferable to one that -collapses suddenly (large loss %). - -### Caveats - -* **vcan is not a real CAN bus.** There is no bus arbitration, no propagation delay, no error - frames, no bitstuffing, and no hardware interrupt path. These benchmarks measure the software recv - overhead only. -* **Callgrind and io_uring.** Instruction counts for io_uring backends undercount the true per-frame - cost because kernel-side ring processing is not instrumented. -* **x86_64 vs. ARM64.** Benchmarks run on a development workstation. Syscall costs, cache sizes, and - branch predictor behavior differ on the target ARM64 platform. Use these results for relative - comparison between backends, not as absolute predictions. - -It's technically possible to measure syscalls per thread with `perf_event_open` to setup a counter -for the `raw_syscalls:sys_enter` tracepoint using the `perf-event` crate, but this doesn't work well -inside the unshare user namespace without additional orchestration externally. - -## Open questions - -TODO diff --git a/docs/design/05-testing-strategy.md b/docs/design/05-testing-strategy.md deleted file mode 100644 index 6d2f09b..0000000 --- a/docs/design/05-testing-strategy.md +++ /dev/null @@ -1,87 +0,0 @@ -# Testing strategy - -## Status - -**DRAFT** - -## Scope - -This document specifies how candumpr (and other tools in this workspace) are tested, given that they -depend on Linux socketcan interfaces that require either real hardware or elevated permissions to -create. - -## Problem - -candumpr interacts directly with CAN sockets. Testing requires CAN interfaces, but: - -* Real CAN hardware is not available in CI. -* Virtual CAN (vcan) interfaces require `CAP_NET_ADMIN` to create. -* vcan interfaces are system-global resources, so parallel tests using shared interfaces cause - interference. -* Tests must run in CI (GitHub Actions) and locally without requiring root. - -## Solution: user + network namespaces - -Each test process enters its own isolated Linux network namespace using -`unshare(CLONE_NEWUSER | CLONE_NEWNET)`. Inside the namespace, the process has `CAP_NET_ADMIN` -without real root privileges, vcan interfaces are private and isolated, and everything is cleaned up -when the process exits. See the [vcan-fixture](../../vcan-fixture/) crate for the implementation. - -Constraint: `unshare(CLONE_NEWUSER)` requires a single-threaded process. The Rust test harness is -multi-threaded, so namespace entry happens in a `ctor` constructor before `main()`. - -## Test tiers - -### Unit tests - -No sockets, no namespaces. Config parsing, filter compilation, output formatting, filename template -expansion, duration/size parsing. - -### Integration tests - -Run inside user + network namespaces with vcan interfaces. Socket binding, filter application, -multi-interface capture, file rotation, ZSTD streaming, address claim, device resilience. - -### End-to-end tests - -Run the actual binary inside a network namespace. Launch candumpr, send frames with cangenr, verify -output files, signal handling, config file loading. - -## CI - -Tests that require vcan use `#[cfg_attr(feature = "ci", ignore = "requires vcan")]`. In CI, -`--all-features` enables the `ci` feature, making them `#[ignore]`. They are then run as a separate -step gated on whether vcan setup succeeded: - -A separate canary job (`vcan-available`) with `continue-on-error: true` shows yellow when the vcan -module is unavailable on the runner, rather than silently skipping the tests. - -See [lint.yml](/.github/workflows/lint.yml) for the implementation. - -## Benchmarking - -Benchmarks compare candumpr against candump on 4 vcan interfaces with J1939 traffic. - -### Metrics - -* **Frame loss** (primary): frames sent vs. frames in output -* **Throughput ceiling**: send rate at which frames start dropping -* **CPU usage**: total CPU time (user + system) -* **Memory usage**: peak RSS - -### Simulating the target environment - -The target is a ~4 core ~1 GHz ARM CPU. Use `taskset` to pin benchmarks to 4 cores: - -```sh -taskset -c 0-3 cargo bench -``` - -Core count is the important variable for comparing architecture options (dedicated thread pairs vs. -shared threads). Clock speed matters less for relative comparison. Final validation must happen on -real target hardware. - -### Acceptance criteria - -candumpr must not drop frames at the realistic J1939 rate (2000 frames/s per interface, 8000 -frames/s aggregate). At higher rates, candumpr should drop fewer frames than candump. diff --git a/docs/design/06-benchmarks.md b/docs/design/06-benchmarks.md deleted file mode 100644 index d5b85f8..0000000 --- a/docs/design/06-benchmarks.md +++ /dev/null @@ -1,200 +0,0 @@ -# Benchmark results - -The [04-candumpr-architecture.md](/docs/design/04-candumpr-architecture.md) design document proposes -three different benchmarks to compare receiver backends. - -# Benchmark A - pure CPU cost - -``` -recv_cost::recv_cost::dedicated run:setup_blocking() - Instructions: 454678|N/A (*********) - L1 Hits: 866202|N/A (*********) - LL Hits: 10504|N/A (*********) - RAM Hits: 175|N/A (*********) - Total read+write: 876881|N/A (*********) - Estimated Cycles: 924847|N/A (*********) -recv_cost::recv_cost::epoll run:setup_nonblocking() - Instructions: 519312|N/A (*********) - L1 Hits: 960184|N/A (*********) - LL Hits: 10182|N/A (*********) - RAM Hits: 53|N/A (*********) - Total read+write: 970419|N/A (*********) - Estimated Cycles: 1012949|N/A (*********) - Comparison with dedicated run:setup_blocking() - Instructions: 454678|519312 (-12.4461%) [-1.14215x] - L1 Hits: 866202|960184 (-9.78792%) [-1.10850x] - LL Hits: 10504|10182 (+3.16244%) [+1.03162x] - RAM Hits: 175|53 (+230.189%) [+3.30189x] - Total read+write: 876881|970419 (-9.63893%) [-1.10667x] - Estimated Cycles: 924847|1012949 (-8.69758%) [-1.09526x] -recv_cost::recv_cost::recvmmsg run:setup_nonblocking() - Instructions: 468571|N/A (*********) - L1 Hits: 882905|N/A (*********) - LL Hits: 10191|N/A (*********) - RAM Hits: 57|N/A (*********) - Total read+write: 893153|N/A (*********) - Estimated Cycles: 935855|N/A (*********) - Comparison with dedicated run:setup_blocking() - Instructions: 454678|468571 (-2.96497%) [-1.03056x] - L1 Hits: 866202|882905 (-1.89182%) [-1.01928x] - LL Hits: 10504|10191 (+3.07134%) [+1.03071x] - RAM Hits: 175|57 (+207.018%) [+3.07018x] - Total read+write: 876881|893153 (-1.82186%) [-1.01856x] - Estimated Cycles: 924847|935855 (-1.17625%) [-1.01190x] - Comparison with epoll run:setup_nonblocking() - Instructions: 519312|468571 (+10.8289%) [+1.10829x] - L1 Hits: 960184|882905 (+8.75281%) [+1.08753x] - LL Hits: 10182|10191 (-0.08831%) [-1.00088x] - RAM Hits: 53|57 (-7.01754%) [-1.07547x] - Total read+write: 970419|893153 (+8.65093%) [+1.08651x] - Estimated Cycles: 1012949|935855 (+8.23781%) [+1.08238x] -recv_cost::recv_cost::uring run:setup_nonblocking() - Instructions: 587770|N/A (*********) - L1 Hits: 1071803|N/A (*********) - LL Hits: 10210|N/A (*********) - RAM Hits: 119|N/A (*********) - Total read+write: 1082132|N/A (*********) - Estimated Cycles: 1127018|N/A (*********) - Comparison with dedicated run:setup_blocking() - Instructions: 454678|587770 (-22.6436%) [-1.29272x] - L1 Hits: 866202|1071803 (-19.1827%) [-1.23736x] - LL Hits: 10504|10210 (+2.87953%) [+1.02880x] - RAM Hits: 175|119 (+47.0588%) [+1.47059x] - Total read+write: 876881|1082132 (-18.9673%) [-1.23407x] - Estimated Cycles: 924847|1127018 (-17.9386%) [-1.21860x] - Comparison with epoll run:setup_nonblocking() - Instructions: 519312|587770 (-11.6471%) [-1.13182x] - L1 Hits: 960184|1071803 (-10.4141%) [-1.11625x] - LL Hits: 10182|10210 (-0.27424%) [-1.00275x] - RAM Hits: 53|119 (-55.4622%) [-2.24528x] - Total read+write: 970419|1082132 (-10.3234%) [-1.11512x] - Estimated Cycles: 1012949|1127018 (-10.1213%) [-1.11261x] - Comparison with recvmmsg run:setup_nonblocking() - Instructions: 468571|587770 (-20.2799%) [-1.25439x] - L1 Hits: 882905|1071803 (-17.6243%) [-1.21395x] - LL Hits: 10191|10210 (-0.18609%) [-1.00186x] - RAM Hits: 57|119 (-52.1008%) [-2.08772x] - Total read+write: 893153|1082132 (-17.4636%) [-1.21159x] - Estimated Cycles: 935855|1127018 (-16.9618%) [-1.20427x] -recv_cost::recv_cost::uring_multi run:setup_nonblocking() - Instructions: 628114|N/A (*********) - L1 Hits: 1145140|N/A (*********) - LL Hits: 11463|N/A (*********) - RAM Hits: 168|N/A (*********) - Total read+write: 1156771|N/A (*********) - Estimated Cycles: 1208335|N/A (*********) - Comparison with dedicated run:setup_blocking() - Instructions: 454678|628114 (-27.6122%) [-1.38145x] - L1 Hits: 866202|1145140 (-24.3584%) [-1.32202x] - LL Hits: 10504|11463 (-8.36605%) [-1.09130x] - RAM Hits: 175|168 (+4.16667%) [+1.04167x] - Total read+write: 876881|1156771 (-24.1958%) [-1.31919x] - Estimated Cycles: 924847|1208335 (-23.4610%) [-1.30652x] - Comparison with epoll run:setup_nonblocking() - Instructions: 519312|628114 (-17.3220%) [-1.20951x] - L1 Hits: 960184|1145140 (-16.1514%) [-1.19263x] - LL Hits: 10182|11463 (-11.1751%) [-1.12581x] - RAM Hits: 53|168 (-68.4524%) [-3.16981x] - Total read+write: 970419|1156771 (-16.1097%) [-1.19203x] - Estimated Cycles: 1012949|1208335 (-16.1699%) [-1.19289x] - Comparison with recvmmsg run:setup_nonblocking() - Instructions: 468571|628114 (-25.4003%) [-1.34049x] - L1 Hits: 882905|1145140 (-22.8998%) [-1.29701x] - LL Hits: 10191|11463 (-11.0966%) [-1.12482x] - RAM Hits: 57|168 (-66.0714%) [-2.94737x] - Total read+write: 893153|1156771 (-22.7891%) [-1.29515x] - Estimated Cycles: 935855|1208335 (-22.5500%) [-1.29116x] - Comparison with uring run:setup_nonblocking() - Instructions: 587770|628114 (-6.42304%) [-1.06864x] - L1 Hits: 1071803|1145140 (-6.40420%) [-1.06842x] - LL Hits: 10210|11463 (-10.9308%) [-1.12272x] - RAM Hits: 119|168 (-29.1667%) [-1.41176x] - Total read+write: 1082132|1156771 (-6.45236%) [-1.06897x] - Estimated Cycles: 1127018|1208335 (-6.72967%) [-1.07215x] -``` - -# Benchmark B - system impact - -| backend | ifaces | rate | sent | recv | lost | user_ms | sys_ms | vol_csw | invol_csw | -| ----------- | ------ | ---- | ----- | ----- | ---- | ------- | ------ | ------- | --------- | -| dedicated | 1 | 1000 | 5000 | 5000 | 0 | 6.1 | 0.0 | 5000 | 0 | -| dedicated | 1 | 2000 | 10000 | 10000 | 0 | 11.7 | 0.0 | 10000 | 0 | -| dedicated | 1 | 4000 | 20000 | 20000 | 0 | 22.7 | 0.0 | 19996 | 0 | -| dedicated | 2 | 1000 | 10000 | 10000 | 0 | 12.2 | 0.0 | 10000 | 0 | -| dedicated | 2 | 2000 | 20000 | 20000 | 0 | 18.1 | 4.8 | 19999 | 0 | -| dedicated | 2 | 4000 | 40000 | 40000 | 0 | 44.4 | 0.0 | 39997 | 0 | -| dedicated | 4 | 1000 | 20000 | 20000 | 0 | 22.0 | 0.0 | 19999 | 0 | -| dedicated | 4 | 2000 | 40000 | 40000 | 0 | 34.6 | 7.8 | 39993 | 12 | -| dedicated | 4 | 4000 | 80000 | 80000 | 0 | 66.9 | 22.5 | 79956 | 48 | -| epoll | 1 | 1000 | 5000 | 5000 | 0 | 3.9 | 3.9 | 4999 | 0 | -| epoll | 1 | 2000 | 10000 | 10000 | 0 | 7.4 | 7.4 | 10000 | 0 | -| epoll | 1 | 4000 | 20000 | 20000 | 0 | 14.2 | 14.3 | 19999 | 0 | -| epoll | 2 | 1000 | 10000 | 9999 | 1 | 7.8 | 7.8 | 9865 | 0 | -| epoll | 2 | 2000 | 20000 | 19999 | 1 | 14.6 | 14.6 | 19871 | 1 | -| epoll | 2 | 4000 | 40000 | 39999 | 1 | 41.7 | 14.3 | 38664 | 1 | -| epoll | 4 | 1000 | 20000 | 19997 | 3 | 26.8 | 0.0 | 16407 | 1 | -| epoll | 4 | 2000 | 40000 | 39997 | 3 | 0.0 | 46.6 | 26749 | 62 | -| epoll | 4 | 4000 | 80000 | 79997 | 3 | 0.0 | 103.7 | 66257 | 18 | -| recvmmsg | 1 | 1000 | 5000 | 5000 | 0 | 0.0 | 7.9 | 5000 | 0 | -| recvmmsg | 1 | 2000 | 10000 | 10000 | 0 | 0.0 | 15.1 | 10000 | 0 | -| recvmmsg | 1 | 4000 | 20000 | 20000 | 0 | 0.0 | 28.7 | 19999 | 0 | -| recvmmsg | 2 | 1000 | 10000 | 9999 | 1 | 0.0 | 15.4 | 9896 | 0 | -| recvmmsg | 2 | 2000 | 20000 | 19999 | 1 | 0.0 | 29.6 | 19894 | 0 | -| recvmmsg | 2 | 4000 | 40000 | 39999 | 1 | 0.0 | 57.9 | 39893 | 0 | -| recvmmsg | 4 | 1000 | 20000 | 19997 | 3 | 0.0 | 26.2 | 15838 | 5 | -| recvmmsg | 4 | 2000 | 40000 | 39997 | 3 | 0.0 | 52.7 | 32025 | 7 | -| recvmmsg | 4 | 4000 | 80000 | 79997 | 3 | 0.0 | 101.0 | 63199 | 69 | -| uring | 1 | 1000 | 5000 | 5000 | 0 | 0.0 | 7.3 | 5048 | 0 | -| uring | 1 | 2000 | 10000 | 10000 | 0 | 0.0 | 14.0 | 10047 | 0 | -| uring | 1 | 4000 | 20000 | 20000 | 0 | 0.0 | 26.7 | 20046 | 1 | -| uring | 2 | 1000 | 10000 | 9999 | 1 | 0.0 | 14.2 | 9897 | 0 | -| uring | 2 | 2000 | 20000 | 19999 | 1 | 0.0 | 27.2 | 19924 | 0 | -| uring | 2 | 4000 | 40000 | 39999 | 1 | 7.6 | 44.8 | 39836 | 2 | -| uring | 4 | 1000 | 20000 | 19997 | 3 | 3.8 | 20.3 | 14763 | 10 | -| uring | 4 | 2000 | 40000 | 39997 | 3 | 8.1 | 42.2 | 33084 | 7 | -| uring | 4 | 4000 | 80000 | 79997 | 3 | 15.3 | 78.8 | 61615 | 43 | -| uring_multi | 1 | 1000 | 5000 | 5000 | 0 | 1.0 | 6.1 | 5000 | 0 | -| uring_multi | 1 | 2000 | 10000 | 10000 | 0 | 1.7 | 11.3 | 10000 | 0 | -| uring_multi | 1 | 4000 | 20000 | 20000 | 0 | 3.9 | 21.1 | 19997 | 0 | -| uring_multi | 2 | 1000 | 10000 | 10000 | 0 | 1.3 | 7.0 | 5000 | 0 | -| uring_multi | 2 | 2000 | 20000 | 20000 | 0 | 2.5 | 13.4 | 9999 | 0 | -| uring_multi | 2 | 4000 | 40000 | 40000 | 0 | 4.8 | 25.6 | 19996 | 0 | -| uring_multi | 4 | 1000 | 20000 | 20000 | 0 | 1.8 | 9.4 | 5000 | 2 | -| uring_multi | 4 | 2000 | 40000 | 40000 | 0 | 13.6 | 9.5 | 9995 | 22 | -| uring_multi | 4 | 4000 | 80000 | 80000 | 0 | 35.1 | 8.6 | 19984 | 11 | - -# Benchmark C - system contention - -## 4 core ~75% utilization - -| backend | ifaces | rate | sent | recv | lost | user_ms | sys_ms | vol_csw | invol_csw | -| ----------- | ------ | ---- | ----- | ----- | ---- | ------- | ------ | ------- | --------- | -| dedicated | 4 | 4000 | 79991 | 79989 | 2 | 7.8 | 43.5 | 61858 | 169 | -| epoll | 4 | 4000 | 79997 | 79994 | 3 | 5.7 | 40.7 | 33651 | 327 | -| recvmmsg | 4 | 4000 | 79995 | 79994 | 1 | 6.3 | 40.3 | 34500 | 389 | -| uring | 4 | 4000 | 80000 | 79997 | 3 | 3.4 | 46.7 | 39036 | 284 | -| uring_multi | 4 | 4000 | 79993 | 79992 | 1 | 4.4 | 20.7 | 11021 | 110 | - -## 4 core ~90% utilization - -| backend | ifaces | rate | sent | recv | lost | user_ms | sys_ms | vol_csw | invol_csw | -| ----------- | ------ | ---- | ----- | ----- | ---- | ------- | ------ | ------- | --------- | -| dedicated | 4 | 4000 | 79991 | 79991 | 0 | 8.1 | 27.0 | 56873 | 81 | -| epoll | 4 | 4000 | 79993 | 79991 | 2 | 9.9 | 31.7 | 40314 | 150 | -| recvmmsg | 4 | 4000 | 79993 | 79991 | 2 | 7.1 | 33.7 | 39261 | 184 | -| uring | 4 | 4000 | 80000 | 79991 | 9 | 7.9 | 29.2 | 37673 | 115 | -| uring_multi | 4 | 4000 | 79993 | 79992 | 1 | 3.6 | 16.2 | 9232 | 64 | - -**NOTE:** Fewer involuntary context switches under higher CPU utilization is counter intuitive, but -correct. It means the receiver is being starved rather than interrupted. Compare the sys_ms kernel -CPU time. - -# Takeaways - -* The pure CPU cost of the receive backends don't matter hugely, because the dominant cost is the - syscalls and context switching -* The multiplex methods are all pretty close to each other in terms of results -* It appears all backends degrade nicely when the system is under high CPU load -* It doesn't look like I'm going to get absolutely no dropped frames -* Batching receives in the multishot backend dramatically reduces kernel CPU time and context - switches, moreso than the other multiplex backends, and even at high CPU load diff --git a/docs/design/candumpr/01-goals.md b/docs/design/candumpr/01-goals.md new file mode 100644 index 0000000..063a249 --- /dev/null +++ b/docs/design/candumpr/01-goals.md @@ -0,0 +1,69 @@ +# candumpr goals + +## Scope + +This document outlines the design goals and feature list of the candumpr utility. + +## Goals + +* A long-running logging daemon useful for troubleshooting events after-the-fact +* Controls for disk usage +* Controls for flash disk wear +* Minimal system performance impact from logging multiple interfaces +* Target low-spec Linux 6.1+ 4-core ~1GHz ARM CPUs with ~1GB memory +* Logs are not corrupted on power loss +* Frame drops due to the socket rcvbuf overflowing are minimized +* Is still useful for troubleshooting early on in the boot process, before the system clock is set +* Controls for system clock jumps + +## Features + +From these goals, we derive the following features + +* Multiple CAN networks logged from one process (performance, utility) +* Logs are rotated, compressed, and follow a retention policy (utility) +* Filename includes the start time of the log (utility) + * Needs further consideration together with system-clock jumps, especially early on in the boot + process. +* Address claim PGN requests can be optionally sent upon rotation (utility) +* Bus state changes are logged to stderr (utility) +* Utilize io_uring with multishoti to batch receive across multiple interfaces (performance, + low-spec system) +* Streaming compression when writing to disk (performance, disk wear, disk usage) +* Partial frames are not written (corruption) +* Streaming compression does not require an epilogue at the tail to decompress the file (corruption) +* Writes are buffered (performance) +* Dedicated receive and write threads (minimize drops on a low-spec system) +* Multiple output formats are supported: can-utils candump, Vector ASC, PCAP (utility) + * PCAP, as a binary format, is expected to have a lower disk usage, wear, and compression + footprint than the can-utils ASCII format. This needs to be verified. + +Note: Many of the performance justifications for features are based on practical experience with +proprietary solutions I cannot share. So it looks like naive "but, performance!" handwaving, but it +_is_ based on experience. Additionally, some of the features exist to work around other constraints +(having a fixed small rmem_max, or low system specs). + +Note: CAN SKBs have a higher overhead than I originally imagined. It differs based on kernel version +and features, but the `recv_cost` benchmark uses uses `SK_MEMINFO_RMEM_ALLOC` and a probe frame to +calculate the size of each SKB as 960 bytes on my x64 Fedora 42 system. That's enough room for 220 +frames on my system. + +On low-spec systems I have worked on, that is not enough room to prevent frame drops when `write()` +calls sporadically block for multiple seconds. This is the primary motivation for offloading the +formatting, compression, and writing off onto a secondary thread. It's very likely that one thread +could handle the performance cost of everything, but blocking writes can, and do cause frame drops +on the real-world systems I'm writing this tool to support. + +## Needs further design + +The goals around handling invalid system clocks need further thought. It's useful to save the start +timestamp in the filename when it's created. But if the system clock isn't known at that time, or if +it's 30,000 years in the future, what do we do? + +Additionally, how do we handle clock jumps in the middle of a log? + +A potential useful feature is to include a monotonic file index in each filename so even if the +timestamp isn't known, we can tell what order messages were received in. + +Additionally, the candumpr process should log to stderr upon error, rotation, bus events, clock +jumps, etc. diff --git a/docs/design/candumpr/02-architecture.md b/docs/design/candumpr/02-architecture.md new file mode 100644 index 0000000..c83aba0 --- /dev/null +++ b/docs/design/candumpr/02-architecture.md @@ -0,0 +1,121 @@ +# candumpr architecture + +Status: **PROPOSAL** + +# Scope + +This document proposes the core data pipeline for candumpr. + +# Goals + +The baseline implementation that this proposal intends to improve upon is using one can-utils +`candump` process to do blocking receives for each logged network. On low-spec systems, this results +in a noticeable performance impact, which would be manageable, except that the logging on those +systems is ancillary to the application software those systems are primarily responsible for. + +Paraphrasing the goals from [01-goals.md](/docs/design/candumpr/01-goals.md), the overall goal for +candumpr is to reduce the system performance impact of using candump in this manner. + +# Proposed architecture + +The proposal is to use one shared receive thread that uses io_uring multishot to batch receives +across multiple networks. This reduces the number of syscalls per frame to less than one-per frame. +This reduces the overall context switching cost when logging multiple networks. + +As I intend to support systems with slow disks (`write()` syscalls that sporadically block for +multiple seconds), the receive thread is decoupled from the format + write thread, which also +services multiple networks. See: for additional +background. + +Assume a worst-case throughput of 8x 500kbaud networks at 100% busload. That's 500KB/s of raw data, +plus some inflationary factor from the formatter (formatting as ASCII adds a constant scalar to the +throughput). This is well within the formatting, compression, and write capabilities of a single +thread. + +```mermaid +graph TD + can0 & can1 & can2 & can3 --> io_uring + + subgraph recv [recv thread] + io_uring + end + + io_uring --> |spsc| formatter + + subgraph write [write thread] + subgraph formatter + direction TD + + can-utils-file + can-utils-console + vector-asc + pcap + end + + formatter --> |"&[u8]"| Writer + + subgraph writer + Writer --> zstd & BufWriter & stdout + zstd --> RotationHandler + BufWriter --> RotationHandler + RotationHandler --> file + end + end +``` + +## Receiver detail + +There are many ways in which a receiver thread or threads could be built using Linux syscalls: + +* candump-style blocking `read()` in a dedicated thread per interface +* `epoll()` and non-blocking `read()` to wake up and receive frames one-by-one when they arrive +* `epoll()` and non-blocking `recvmmsg()` to receive as many ready frames as possible on any wakeup +* `io_uring` singleshot - each SQE represents one `read()` - after reading from a socket, the `Recv` + opcode is resubmitted. +* `io_uring` multishot - one SQE submitted for each socket with the `RecvMsgMulti` opcode and + `submit_with_args(batch=4, timeout=100ms)` to wait until `batch` frames are ready to receive + together from any interfaces. + +The multishot io_uring receiver strategy results in _significantly_ fewer syscalls and context +switches per ready CAN frame, resulting in overall better system performance, and degradation under +contention. + +The batch size could be significantly increased when logging to a file, but when logging to +`stdout`, we should use a lower batch size (like 4) to facilitate watching a live log. We cannot +infinitely increase the batch size - there's a tipping point at which if we increase it too far, we +run the risk of filling up the recvbuf. A batch size of 32 or 64 seems like a reasonable upper +limit. + +## Formatter detail + +Use a Strategy design pattern to format the CAN frame into a bytearray to be written. The output is +a bytearray that may include multiple frames, and an indication of which interface the formatted +frames came from (so they can be written to the appropriate file). The bytearray never always +includes full frames; a frame will never be split across multiple bytearrays. + +## Compression detail + +There's three approaches I've been able to find: + +1. Independent concatenated frames - periodically call `.finish()` on the zstd `Encoder`, and + probably call `fsync()` as well + + Output is decompressable with `zstd -d`. I think with large-ish frames (1MB?) the compression + ratio might be good enough the simplicity of this approach wins over the complexity of managing + training dictionaries in a production context. + +2. Independent concatenated frames with a pretrained dictionary - train a dictionary on CAN data + + Output is decompressable with `zstd -d -D can.dict`. It might be best to train a dictionary + specific to each format? We would need to maintain and ship pre-trained dictionaries, and make + them available to engineers troubleshooting CAN traffic. + + I think it could be easy to add a configuration option to candumpr to provide your own zstd + dictionary, in which case candumpr's own implementation doesn't bear the burden of the dictionary + training, that's offloaded onto the consumer. + +3. Prefix-linked frames - persist the compressor state from previous frames when compressing the + next. + + Best compression ratio. Output is **not** decompressable with `zstd -d`, would need to implement + a custom decompressor, which I think eliminates this option from consideration. diff --git a/docs/design/candumpr/04-benchmarks.md b/docs/design/candumpr/04-benchmarks.md new file mode 100644 index 0000000..d5b171c --- /dev/null +++ b/docs/design/candumpr/04-benchmarks.md @@ -0,0 +1,233 @@ +# candumpr benchmarks + +candumpr isn't doing a lot of heavy-duty expensive _computation_. However, we've experienced +performance hits from running multiple instances of `candump` together on a low-spec system, so if +we want to build something with lower impact, we should measure and compare. + +The [02-architecture.md](/docs/design/candumpr/02-architecture.md) design document outlines several +basic strategies for receiving CAN frames from multiple networks at once. This document benchmarks +each using three different benchmarks. + +1. `recv_cost` - measure the userspace CPU cost of each implementation. + + This benchmark is the least valuable, as a receiver executing X% more or less instructions is + less impactful to the overall system performance than the amount of context switches and + user/kernel CPU time. +2. `recv_impact` - pin the benchmark to 4 cores and measure the following metrics for each + implementation: + 1. dropped frames + 2. user and kernel CPU time + 3. voluntary and involuntary context switches +3. `recv_contention` - execute the same benchmark as `recv_impact`, but spin 4 threads doing + PWM-style spinloops to hit 75% and 90% CPU usage in each thread. + +These benchmarks can be run with + +```sh +cargo install gungruan-runner +cargo bench +``` + +# Results + +## recv_cost + +``` +recv_cost::recv_cost::dedicated run:setup_blocking() + Instructions: 454664|N/A (*********) + L1 Hits: 866130|N/A (*********) + LL Hits: 10552|N/A (*********) + RAM Hits: 169|N/A (*********) + Total read+write: 876851|N/A (*********) + Estimated Cycles: 924805|N/A (*********) +recv_cost::recv_cost::epoll run:setup_nonblocking() + Instructions: 519312|N/A (*********) + L1 Hits: 960112|N/A (*********) + LL Hits: 10255|N/A (*********) + RAM Hits: 52|N/A (*********) + Total read+write: 970419|N/A (*********) + Estimated Cycles: 1013207|N/A (*********) + Comparison with dedicated run:setup_blocking() + Instructions: 454664|519312 (-12.4488%) [-1.14219x] + L1 Hits: 866130|960112 (-9.78865%) [-1.10851x] + LL Hits: 10552|10255 (+2.89615%) [+1.02896x] + RAM Hits: 169|52 (+225.000%) [+3.25000x] + Total read+write: 876851|970419 (-9.64202%) [-1.10671x] + Estimated Cycles: 924805|1013207 (-8.72497%) [-1.09559x] +recv_cost::recv_cost::recvmmsg run:setup_nonblocking() + Instructions: 468571|N/A (*********) + L1 Hits: 882834|N/A (*********) + LL Hits: 10262|N/A (*********) + RAM Hits: 57|N/A (*********) + Total read+write: 893153|N/A (*********) + Estimated Cycles: 936139|N/A (*********) + Comparison with dedicated run:setup_blocking() + Instructions: 454664|468571 (-2.96796%) [-1.03059x] + L1 Hits: 866130|882834 (-1.89209%) [-1.01929x] + LL Hits: 10552|10262 (+2.82596%) [+1.02826x] + RAM Hits: 169|57 (+196.491%) [+2.96491x] + Total read+write: 876851|893153 (-1.82522%) [-1.01859x] + Estimated Cycles: 924805|936139 (-1.21072%) [-1.01226x] + Comparison with epoll run:setup_nonblocking() + Instructions: 519312|468571 (+10.8289%) [+1.10829x] + L1 Hits: 960112|882834 (+8.75340%) [+1.08753x] + LL Hits: 10255|10262 (-0.06821%) [-1.00068x] + RAM Hits: 52|57 (-8.77193%) [-1.09615x] + Total read+write: 970419|893153 (+8.65093%) [+1.08651x] + Estimated Cycles: 1013207|936139 (+8.23254%) [+1.08233x] +recv_cost::recv_cost::uring run:setup_nonblocking() + Instructions: 587770|N/A (*********) + L1 Hits: 1071728|N/A (*********) + LL Hits: 10285|N/A (*********) + RAM Hits: 119|N/A (*********) + Total read+write: 1082132|N/A (*********) + Estimated Cycles: 1127318|N/A (*********) + Comparison with dedicated run:setup_blocking() + Instructions: 454664|587770 (-22.6459%) [-1.29276x] + L1 Hits: 866130|1071728 (-19.1838%) [-1.23738x] + LL Hits: 10552|10285 (+2.59601%) [+1.02596x] + RAM Hits: 169|119 (+42.0168%) [+1.42017x] + Total read+write: 876851|1082132 (-18.9701%) [-1.23411x] + Estimated Cycles: 924805|1127318 (-17.9641%) [-1.21898x] + Comparison with epoll run:setup_nonblocking() + Instructions: 519312|587770 (-11.6471%) [-1.13182x] + L1 Hits: 960112|1071728 (-10.4146%) [-1.11625x] + LL Hits: 10255|10285 (-0.29169%) [-1.00293x] + RAM Hits: 52|119 (-56.3025%) [-2.28846x] + Total read+write: 970419|1082132 (-10.3234%) [-1.11512x] + Estimated Cycles: 1013207|1127318 (-10.1223%) [-1.11262x] + Comparison with recvmmsg run:setup_nonblocking() + Instructions: 468571|587770 (-20.2799%) [-1.25439x] + L1 Hits: 882834|1071728 (-17.6252%) [-1.21396x] + LL Hits: 10262|10285 (-0.22363%) [-1.00224x] + RAM Hits: 57|119 (-52.1008%) [-2.08772x] + Total read+write: 893153|1082132 (-17.4636%) [-1.21159x] + Estimated Cycles: 936139|1127318 (-16.9587%) [-1.20422x] +recv_cost::recv_cost::uring_multi run:setup_nonblocking() + Instructions: 686528|N/A (*********) + L1 Hits: 1265738|N/A (*********) + LL Hits: 11611|N/A (*********) + RAM Hits: 217|N/A (*********) + Total read+write: 1277566|N/A (*********) + Estimated Cycles: 1331388|N/A (*********) + Comparison with dedicated run:setup_blocking() + Instructions: 454664|686528 (-33.7734%) [-1.50997x] + L1 Hits: 866130|1265738 (-31.5711%) [-1.46137x] + LL Hits: 10552|11611 (-9.12066%) [-1.10036x] + RAM Hits: 169|217 (-22.1198%) [-1.28402x] + Total read+write: 876851|1277566 (-31.3655%) [-1.45699x] + Estimated Cycles: 924805|1331388 (-30.5383%) [-1.43964x] + Comparison with epoll run:setup_nonblocking() + Instructions: 519312|686528 (-24.3568%) [-1.32200x] + L1 Hits: 960112|1265738 (-24.1461%) [-1.31832x] + LL Hits: 10255|11611 (-11.6786%) [-1.13223x] + RAM Hits: 52|217 (-76.0369%) [-4.17308x] + Total read+write: 970419|1277566 (-24.0416%) [-1.31651x] + Estimated Cycles: 1013207|1331388 (-23.8984%) [-1.31403x] + Comparison with recvmmsg run:setup_nonblocking() + Instructions: 468571|686528 (-31.7477%) [-1.46515x] + L1 Hits: 882834|1265738 (-30.2514%) [-1.43372x] + LL Hits: 10262|11611 (-11.6183%) [-1.13146x] + RAM Hits: 57|217 (-73.7327%) [-3.80702x] + Total read+write: 893153|1277566 (-30.0895%) [-1.43040x] + Estimated Cycles: 936139|1331388 (-29.6870%) [-1.42221x] + Comparison with uring run:setup_nonblocking() + Instructions: 587770|686528 (-14.3851%) [-1.16802x] + L1 Hits: 1071728|1265738 (-15.3278%) [-1.18103x] + LL Hits: 10285|11611 (-11.4202%) [-1.12893x] + RAM Hits: 119|217 (-45.1613%) [-1.82353x] + Total read+write: 1082132|1277566 (-15.2974%) [-1.18060x] + Estimated Cycles: 1127318|1331388 (-15.3276%) [-1.18102x] +``` + +**NOTE:** the `uring_multi` benchmark is noticeably more expensive in terms of CPU cost than any of +the other receivers. + +**NOTE:** the callgrind counters only include userspace, not any kernelspace processing. + +## 4-core recv_impact + +| backend | ifaces | rate | sent | recv | lost | user_ms | sys_ms | vol_csw | invol_csw | +| ----------- | ------ | ---- | ----- | ----- | ---- | ------- | ------ | ------- | --------- | +| dedicated | 1 | 1000 | 5000 | 5000 | 0 | 6.2 | 0.0 | 5002 | 0 | +| dedicated | 1 | 2000 | 10000 | 10000 | 0 | 12.0 | 0.0 | 10002 | 0 | +| dedicated | 1 | 4000 | 20000 | 20000 | 0 | 23.4 | 0.0 | 19992 | 1 | +| dedicated | 2 | 1000 | 10000 | 10000 | 0 | 12.7 | 0.0 | 10002 | 0 | +| dedicated | 2 | 2000 | 20000 | 20000 | 0 | 23.5 | 0.0 | 20002 | 0 | +| dedicated | 2 | 4000 | 40000 | 40000 | 0 | 45.4 | 0.0 | 39999 | 2 | +| dedicated | 4 | 1000 | 20000 | 20000 | 0 | 23.0 | 0.0 | 20007 | 8 | +| dedicated | 4 | 2000 | 40000 | 40000 | 0 | 27.6 | 17.6 | 39994 | 43 | +| dedicated | 4 | 4000 | 80000 | 80000 | 0 | 43.2 | 43.7 | 79976 | 74 | +| epoll | 1 | 1000 | 5000 | 5000 | 0 | 0.0 | 7.9 | 5002 | 0 | +| epoll | 1 | 2000 | 10000 | 10000 | 0 | 0.0 | 14.9 | 10002 | 0 | +| epoll | 1 | 4000 | 20000 | 20000 | 0 | 0.0 | 29.1 | 20000 | 1 | +| epoll | 2 | 1000 | 10000 | 10000 | 0 | 0.0 | 15.6 | 9877 | 0 | +| epoll | 2 | 2000 | 20000 | 20000 | 0 | 0.0 | 29.2 | 18685 | 2 | +| epoll | 2 | 4000 | 40000 | 40000 | 0 | 0.0 | 57.6 | 39354 | 1 | +| epoll | 4 | 1000 | 20000 | 20000 | 0 | 0.0 | 26.4 | 15649 | 0 | +| epoll | 4 | 2000 | 40000 | 40000 | 0 | 27.3 | 19.8 | 27296 | 79 | +| epoll | 4 | 4000 | 80000 | 80000 | 0 | 36.9 | 64.5 | 62625 | 38 | +| recvmmsg | 1 | 1000 | 5000 | 5000 | 0 | 1.5 | 6.5 | 5002 | 0 | +| recvmmsg | 1 | 2000 | 10000 | 10000 | 0 | 2.9 | 12.3 | 10002 | 0 | +| recvmmsg | 1 | 4000 | 20000 | 20000 | 0 | 4.9 | 24.3 | 19990 | 3 | +| recvmmsg | 2 | 1000 | 10000 | 10000 | 0 | 2.7 | 11.7 | 8714 | 0 | +| recvmmsg | 2 | 2000 | 20000 | 20000 | 0 | 5.4 | 23.6 | 19785 | 1 | +| recvmmsg | 2 | 4000 | 40000 | 40000 | 0 | 10.9 | 46.2 | 39655 | 1 | +| recvmmsg | 4 | 1000 | 20000 | 20000 | 0 | 5.0 | 21.9 | 16174 | 1 | +| recvmmsg | 4 | 2000 | 40000 | 40000 | 0 | 13.9 | 35.8 | 30158 | 9 | +| recvmmsg | 4 | 4000 | 80000 | 80000 | 0 | 19.7 | 83.4 | 64193 | 84 | +| uring | 1 | 1000 | 5000 | 5000 | 0 | 1.5 | 6.1 | 5052 | 0 | +| uring | 1 | 2000 | 10000 | 10000 | 0 | 2.7 | 11.4 | 10051 | 0 | +| uring | 1 | 4000 | 20000 | 20000 | 0 | 5.5 | 20.9 | 20045 | 1 | +| uring | 2 | 1000 | 10000 | 10000 | 0 | 2.8 | 11.4 | 9870 | 0 | +| uring | 2 | 2000 | 20000 | 20000 | 0 | 4.8 | 19.8 | 17398 | 1 | +| uring | 2 | 4000 | 40000 | 40000 | 0 | 9.9 | 40.9 | 39676 | 3 | +| uring | 4 | 1000 | 20000 | 20000 | 0 | 4.8 | 19.7 | 15157 | 1 | +| uring | 4 | 2000 | 40000 | 40000 | 0 | 15.1 | 33.5 | 31504 | 11 | +| uring | 4 | 4000 | 80000 | 80000 | 0 | 10.5 | 85.0 | 63739 | 51 | +| uring_multi | 1 | 1000 | 5000 | 5000 | 0 | 0.0 | 6.1 | 5002 | 0 | +| uring_multi | 1 | 2000 | 10000 | 10000 | 0 | 0.0 | 11.9 | 10003 | 0 | +| uring_multi | 1 | 4000 | 20000 | 20000 | 0 | 1.9 | 20.7 | 19981 | 1 | +| uring_multi | 2 | 1000 | 10000 | 10000 | 0 | 2.1 | 9.3 | 9727 | 0 | +| uring_multi | 2 | 2000 | 20000 | 20000 | 0 | 4.1 | 18.2 | 19750 | 0 | +| uring_multi | 2 | 4000 | 40000 | 40000 | 0 | 7.9 | 34.5 | 39573 | 2 | +| uring_multi | 4 | 1000 | 20000 | 20000 | 0 | 3.9 | 17.0 | 15835 | 10 | +| uring_multi | 4 | 2000 | 40000 | 40000 | 0 | 6.8 | 33.1 | 28508 | 46 | +| uring_multi | 4 | 4000 | 80000 | 80000 | 0 | 13.8 | 67.9 | 64548 | 9 | + +## 4-core recv_contention + +### ~75% CPU + +| backend | ifaces | rate | sent | recv | lost | user_ms | sys_ms | vol_csw | invol_csw | +| ----------- | ------ | ---- | ----- | ----- | ---- | ------- | ------ | ------- | --------- | +| dedicated | 4 | 4000 | 80000 | 80000 | 0 | 10.0 | 40.9 | 58617 | 178 | +| epoll | 4 | 4000 | 79982 | 79982 | 0 | 9.2 | 38.8 | 34911 | 324 | +| recvmmsg | 4 | 4000 | 79999 | 79999 | 0 | 7.9 | 39.0 | 34242 | 329 | +| uring | 4 | 4000 | 79997 | 79997 | 0 | 6.9 | 36.0 | 29406 | 402 | +| uring_multi | 4 | 4000 | 79995 | 79995 | 0 | 5.3 | 34.9 | 36565 | 227 | + +### ~90% CPU + +| backend | ifaces | rate | sent | recv | lost | user_ms | sys_ms | vol_csw | invol_csw | +| ----------- | ------ | ---- | ----- | ----- | ---- | ------- | ------ | ------- | --------- | +| dedicated | 4 | 4000 | 79998 | 79998 | 0 | 8.1 | 27.5 | 56204 | 94 | +| epoll | 4 | 4000 | 79995 | 79995 | 0 | 7.7 | 35.3 | 38999 | 145 | +| recvmmsg | 4 | 4000 | 80000 | 80000 | 0 | 3.3 | 37.8 | 38645 | 138 | +| uring | 4 | 4000 | 79978 | 79978 | 0 | 6.4 | 31.9 | 35142 | 150 | +| uring_multi | 4 | 4000 | 80000 | 80000 | 0 | 3.4 | 27.9 | 35751 | 85 | + +**NOTE:** Fewer involuntary context switches under higher CPU utilization is counter intuitive, but +correct. It means the receiver is being starved rather than interrupted. Compare the sys_ms kernel +CPU time between 75% and 90% results. + +## Takeaways + +* The pure CPU cost of the receivers doesn't matter nearly as much as the number of syscalls and + context switches. +* The multipex methods (epoll, recvmmsg, and uring) are all pretty similar to each other. The + uring_multi approach is significantly better than the rest due to batching receives. It's + equivalent in cost if we set the batch size to 1. +* It appears all backends degrade nicely when the system is under high CPU load. + * This is with a very cheap frame handler that exerts no backpressure +* It doesn't look like it's possible to guarantee no dropped frames diff --git a/docs/developer/quickstart.md b/docs/developer/quickstart.md new file mode 100644 index 0000000..b32ffae --- /dev/null +++ b/docs/developer/quickstart.md @@ -0,0 +1,98 @@ +# Developer quickstart + +## MSRV + +This is a Cargo virtual workspace project. All crates are versioned and released together. The MSRV +is Rust 1.89. + +The minimum supported target environment is Linux 6.1+ with a 4-core ~1Ghz ARM CPU with ~1GB of +memory. Many of the design choices reflect the constraints of this environment. + +## Build, and lint + +Building and testing is the usual: + +```sh +cargo build +cargo clippy --all-targets +``` + +This project uses custom rustfmt options that make resolving merge conflicts on module imports much +easier to resolve: + +```sh +cargo fmt -- --config group_imports=StdExternalCrate,imports_granularity=Module +``` + +There are examples you can run with `cargo run --example=dump`. You likely need to create at least +one vcan network on your development host: + +```sh +sudo ip link add dev can0 type vcan +sudo ip link set up can0 +``` + +## Tests + +Tests may be run either with `cargo test` or : + +```sh +cargo test +cargo nextest run +``` + +### Test fixtures + +There are test fixtures provided by the [vcan-fixture](/vcan-fixture/src/lib.rs) crate. This +provides several features: + +* `enter_namespace()` - enter a process namespace that allows creating vcan networks, which would + otherwise require additional permissions outside the namespace. +* `VcanHarness::new(num)` - create a number of unique vcan interfaces - this is thread safe, and is + intended for use in tests. +* `bench::getrusage_thread()` and `getrusage_self()` - get resource usages for the current thread or + process. This measures user and system time, as well as context switches. Other resources could be + added in the future. +* `bench::pin_to_cores(n)` - pin the current process to the first `n` CPU cores +* `bench::start_cpu_load(num, percent)` - starts `num` threads doing a PWM-like busyloop to hit + `percent` CPU usage + +It's assumed that the local developer environment has the necessary vcan kernel module. In CI, we +attempt to install the vcan module, but can skip the vcan-dependent tests with a warning if it's not +available. + +### ASAN + +As this project uses quite a bit of `unsafe` Rust to interact with `libc`, it's important to run +with ASAN. You can do this with: + +```sh +# tests +RUSTFLAGS="$RUSTFLAGS -Zsanitizer=address" cargo +nightly nextest run -Zbuild-std --target x86_64-unknown-linux-gnu +# example +RUSTFLAGS="$RUSTFLAGS -Zsanitizer=address" cargo +nightly run -Zbuild-std --target x86_64-unknown-linux-gnu --example=dump +``` + +## Benchmarks + +This project includes several benchmarks. Some of them depend on +[gungraun](https://gungraun.github.io/gungraun/latest/html/index.html): + +```sh +cargo install gungraun-runner +cargo bench +``` + +## Release process + +This project isn't released to , but there is still a GitHub release workflow. +Here's the release checklist: + +* [ ] Use SemVer to pick an appropriate version number +* [ ] Edit the workspace [Cargo.toml](/Cargo.toml)'s `workspace.package.version` +* [ ] Ensure the [CHANGELOG.md](/CHANGELOG.md) has a heading for the new version +* [ ] Check the changelog entry. Did anything get forgotten? Is it formatted well? Spelling, + phrasing, grammar, etc. +* [ ] Merge a PR including the Cargo.toml and CHANGELOG.md changes. + * [ ] A Git tag will be generated + * [ ] The contents of the CHANGELOG.md will be used to create a GitHub release diff --git a/docs/candumpr-configuration.md b/docs/user/candumpr-configuration.md similarity index 100% rename from docs/candumpr-configuration.md rename to docs/user/candumpr-configuration.md diff --git a/vcan-fixture/Cargo.toml b/vcan-fixture/Cargo.toml index f324eee..a645c2a 100644 --- a/vcan-fixture/Cargo.toml +++ b/vcan-fixture/Cargo.toml @@ -10,6 +10,7 @@ description = "Build vcan interfaces in isolated network namespaces" ci = [] [dependencies] +assert_cmd.workspace = true ctor.workspace = true eyre.workspace = true libc.workspace = true diff --git a/vcan-fixture/src/cmd.rs b/vcan-fixture/src/cmd.rs new file mode 100644 index 0000000..9889d31 --- /dev/null +++ b/vcan-fixture/src/cmd.rs @@ -0,0 +1,43 @@ +use std::process::Output; + +pub use assert_cmd::Command; + +pub trait CommandExt { + /// Same as [Command::output] except with hooks to print stdout/stderr in failed tests + fn captured_output(&mut self) -> std::io::Result; +} + +impl CommandExt for Command { + fn captured_output(&mut self) -> std::io::Result { + let output = self.output()?; + + // libtest injects magic in print! macros to capture output in tests + print!("{}", String::from_utf8_lossy(&output.stdout)); + eprint!("{}", String::from_utf8_lossy(&output.stderr)); + + Ok(output) + } +} + +/// Get a command to run the given tool binary. +/// +/// Uses `CARGO_BIN_EXE_` which cargo sets at compile time for integration tests in the same +/// crate as the binary. +/// +/// # Example +/// ```ignore +/// use vcan_fixture::cmd::{tool, CommandExt}; +/// +/// let output = tool!("candumpr") +/// .arg("--help") +/// .captured_output() +/// .unwrap(); +/// ``` +#[macro_export] +macro_rules! tool { + ($name:literal) => {{ + let mut cmd = $crate::Command::new(env!(concat!("CARGO_BIN_EXE_", $name))); + cmd.arg("--log-level=TRACE"); + cmd + }}; +} diff --git a/vcan-fixture/src/lib.rs b/vcan-fixture/src/lib.rs index 500ab91..5eed516 100644 --- a/vcan-fixture/src/lib.rs +++ b/vcan-fixture/src/lib.rs @@ -49,6 +49,7 @@ //! available by default. pub mod bench; +pub mod cmd; mod netlink; use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};