diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index a5f7f6f3f..1844c6875 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -64,6 +64,11 @@ jobs: # ping_test.go. Go test helpers are only in scope within the same directory, # so both files must reside in builtins/ping/. corpus_path: builtins/ping + - pkg: ./builtins/jq/ + name: jq + # jq fuzz tests live in builtins/jq/ alongside the rest of the jq tests + # for the same reason as ping: they share helpers (runScript, jqRun, etc.). + corpus_path: builtins/jq - pkg: ./interp/tests/ name: interp corpus_path: interp/tests diff --git a/SHELL_FEATURES.md b/SHELL_FEATURES.md index aaf919bd7..7c1d4a0c4 100644 --- a/SHELL_FEATURES.md +++ b/SHELL_FEATURES.md @@ -19,6 +19,7 @@ Blocked features are rejected before execution with exit code 2. - ✅ `ip [-o|-4|-6|--brief] addr|link [show] [dev IFNAME]` — show network interface addresses and link-layer info (read-only); write ops (`add`, `del`, `flush`, `set`), namespace ops (`netns`, `-n`), and batch mode (`-b`/`-B`/`--force`) are blocked - ✅ `ip route [show|list]` — show IPv4 routing table (Linux only; reads `/proc/net/route` directly via `os.Open`, bypassing `AllowedPaths`); at most 10 000 entries loaded; lines longer than 1 MiB abort parsing with an error (exit 1) - ✅ `ip route get ADDRESS` — show the route selected by longest-prefix-match for ADDRESS (Linux only); write ops (`add`, `del`, `flush`, `replace`, `change`, `save`, `restore`) are blocked; `-6` (IPv6 routing) is not supported +- ✅ `jq [-c|-r|-j|-n|-s|-R|-a|-S|-e] FILTER [FILE]...` — command-line JSON processor backed by the fastjq engine (zero-allocation, no fs/net I/O); per-source input capped at 64 MiB, JSON nesting capped at 256 levels, filter source capped at 64 KiB; `-C/--color-output`, `-M`, `-f/--from-file`, `--rawfile`, `--slurpfile`, `--arg`, `--argjson`, `--args`, `--jsonargs`, `--seq`, `--stream` are not supported - ✅ `sort [-rnhubfds] [-k KEYDEF] [-t SEP] [-c|-C] [FILE]...` — sort lines of text files; `-h`/`--human-numeric-sort` orders by SI suffix (none < K/k < M < G < T < P < E < Z < Y < R < Q) then by numeric value (single-letter suffixes only — `Ki`, `Mi`, etc. are not recognised); `-o`, `--compress-program`, and `-T` are rejected (filesystem write / exec) - ✅ `ss [-tuaxlans4689Hoehs] [OPTION]...` — display network socket statistics; reads kernel socket state directly via `os.Open` (bypassing `AllowedPaths`) from: Linux: `/proc/net/`; macOS: sysctl; Windows: iphlpapi.dll; `-F`/`--filter` (GTFOBins file-read), `-p`/`--processes` (PID disclosure), `-K`/`--kill`, `-E`/`--events`, and `-N`/`--net` are rejected - ✅ `ls [-1aAdFhlpRrSt] [--offset N] [--limit N] [FILE]...` — list directory contents; `--offset`/`--limit` are non-standard pagination flags (single-directory only, silently ignored with `-R` or multiple arguments, capped at 1,000 entries per call); offset operates on filesystem order (not sorted order) for O(n) memory diff --git a/analysis/symbols_builtins.go b/analysis/symbols_builtins.go index 31722bacd..803371e67 100644 --- a/analysis/symbols_builtins.go +++ b/analysis/symbols_builtins.go @@ -398,24 +398,61 @@ var builtinPerCommandSymbols = map[string][]string{ // Note: builtins/internal/procnetroute symbols are exempt from this allowlist // (internal packages are not checked by the builtinAllowedSymbols test). }, + "jq": { + "bufio.NewScanner", // 🟢 line-by-line scanner for --raw-input mode; no write or exec capability. + "bytes.Buffer", // 🟢 in-memory buffer for re-emitting JSON; no I/O side effects. + "bytes.Equal", // 🟢 byte-slice equality check; pure function, no I/O. + "bytes.NewReader", // 🟢 wraps a byte slice as an io.Reader for json.Decoder; pure in-memory. + "context.Context", // 🟢 deadline/cancellation plumbing; pure interface, no side effects. + "encoding/json.Decoder", // 🟢 streaming JSON token reader; pure in-memory operation, no I/O of its own. + "encoding/json.Delim", // 🟢 token type for JSON delimiters; pure type, no I/O. + "encoding/json.Marshal", // 🟢 encodes a Go value as JSON bytes; pure in-memory, no I/O. + "encoding/json.NewDecoder", // 🟢 constructs a json.Decoder around an io.Reader; pure constructor. + "encoding/json.Number", // 🟢 raw-number type used to preserve numeric formatting through Token; pure type. + "encoding/json.RawMessage", // 🟢 deferred-decoding type for raw JSON value bytes; pure type. + "encoding/json.Unmarshal", // 🟢 decodes JSON bytes into a Go value; pure in-memory, no I/O. + "errors.Is", // 🟢 error comparison; pure function, no I/O. + "errors.New", // 🟢 creates a simple error value; pure function, no I/O. + "fmt.Errorf", // 🟢 error formatting; pure function, no I/O. + "fmt.Fprintf", // 🟠 formats and writes to a writer; we only ever target an in-memory bytes.Buffer here. + "github.com/brianfloersch/fastjq.Compile", // 🟢 compiles a jq filter expression into a *Program; pure CPU, no I/O, no recursion through user input. Engine is fuzz-hardened to never panic. + "github.com/brianfloersch/fastjq.Program", // 🟢 compiled jq program type; runs on []byte inputs only, no fs/net I/O. + "io.EOF", // 🟢 sentinel error value; pure constant. + "io.NopCloser", // 🟢 wraps a Reader with a no-op Close; no side effects. + "io.ReadCloser", // 🟢 interface type for read+close; no side effects. + "io.Reader", // 🟢 interface type for reading; no side effects. + "os.O_RDONLY", // 🟢 read-only file flag constant; cannot open files by itself. + "slices.SortFunc", // 🟢 sorts a slice with a comparison function; pure function, no I/O. + "unicode/utf8.DecodeRuneInString", // 🟢 decodes first UTF-8 rune from a string; pure function, no I/O. + }, } var builtinAllowedSymbols = []string{ - "bufio.NewScanner", // 🟢 line-by-line input reading (e.g. head, cat); no write or exec capability. - "bufio.Scanner", // 🟢 scanner type for buffered input reading; no write or exec capability. - "bufio.SplitFunc", // 🟢 type for custom scanner split functions; pure type, no I/O. - "bytes.Buffer", // 🟢 in-memory buffer to capture command output; no I/O side effects. - "bytes.Equal", // 🟢 compares two byte slices for equality; pure function, no I/O. - "bytes.IndexByte", // 🟢 finds a byte in a byte slice; pure function, no I/O. - "bytes.NewReader", // 🟢 wraps a byte slice as an io.Reader; pure in-memory, no I/O. - "context.Context", // 🟢 deadline/cancellation plumbing; pure interface, no side effects. - "context.WithTimeout", // 🟢 creates a child context with a deadline; no filesystem or network I/O itself. - "errors.As", // 🟢 error type assertion; pure function, no I/O. - "errors.Is", // 🟢 error comparison; pure function, no I/O. - "errors.New", // 🟢 creates a simple error value; pure function, no I/O. - "fmt.Errorf", // 🟢 error formatting; pure function, no I/O. - "fmt.Sprintf", // 🟢 string formatting; pure function, no I/O. + "bufio.NewScanner", // 🟢 line-by-line input reading (e.g. head, cat); no write or exec capability. + "bufio.Scanner", // 🟢 scanner type for buffered input reading; no write or exec capability. + "bufio.SplitFunc", // 🟢 type for custom scanner split functions; pure type, no I/O. + "bytes.Buffer", // 🟢 in-memory buffer to capture command output; no I/O side effects. + "bytes.Equal", // 🟢 compares two byte slices for equality; pure function, no I/O. + "bytes.IndexByte", // 🟢 finds a byte in a byte slice; pure function, no I/O. + "bytes.NewReader", // 🟢 wraps a byte slice as an io.Reader; pure in-memory, no I/O. + "context.Context", // 🟢 deadline/cancellation plumbing; pure interface, no side effects. + "context.WithTimeout", // 🟢 creates a child context with a deadline; no filesystem or network I/O itself. + "encoding/json.Decoder", // 🟢 streaming JSON token reader used by jq; pure in-memory, no I/O of its own. + "encoding/json.Delim", // 🟢 token type for JSON delimiters returned by Decoder.Token; pure type. + "encoding/json.Marshal", // 🟢 encodes a Go value as JSON bytes; pure in-memory, no I/O. + "encoding/json.NewDecoder", // 🟢 constructs a json.Decoder around an io.Reader; pure constructor. + "encoding/json.Number", // 🟢 raw-number type used to preserve numeric formatting; pure type. + "encoding/json.RawMessage", // 🟢 deferred-decoding type for raw JSON value bytes; pure type. + "encoding/json.Unmarshal", // 🟢 decodes JSON bytes into a Go value; pure in-memory, no I/O. + "errors.As", // 🟢 error type assertion; pure function, no I/O. + "errors.Is", // 🟢 error comparison; pure function, no I/O. + "errors.New", // 🟢 creates a simple error value; pure function, no I/O. + "fmt.Errorf", // 🟢 error formatting; pure function, no I/O. + "fmt.Fprintf", // 🟠 formats and writes to a writer; only used in jq targeting an in-memory bytes.Buffer. + "fmt.Sprintf", // 🟢 string formatting; pure function, no I/O. "github.com/DataDog/rshell/internal/version.Version", // 🟢 build version string; read-only package-level variable, no I/O. + "github.com/brianfloersch/fastjq.Compile", // 🟢 compiles a jq filter expression to a *Program; pure CPU, no fs/net I/O. Engine is fuzz-hardened to never panic. + "github.com/brianfloersch/fastjq.Program", // 🟢 compiled jq program type; runs on []byte inputs only, no fs/net I/O. "github.com/prometheus-community/pro-bing.NewPinger", // 🔴 creates an ICMP pinger by resolving host; network I/O is the explicit purpose of the ping builtin. "github.com/prometheus-community/pro-bing.NoopLogger", // 🟢 no-op logger that discards pro-bing internal messages; no side effects. "github.com/prometheus-community/pro-bing.Packet", // 🟢 ICMP packet descriptor struct (received packet data); pure data type, no I/O. diff --git a/builtins/jq/builtin_jq_pentest_test.go b/builtins/jq/builtin_jq_pentest_test.go new file mode 100644 index 000000000..2b74b9522 --- /dev/null +++ b/builtins/jq/builtin_jq_pentest_test.go @@ -0,0 +1,317 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +// Pentest-style exploratory tests for the jq builtin. Run with: +// +// go test ./builtins/jq/ -run TestJqPentest -timeout 120s +package jq_test + +import ( + "context" + "os" + "path/filepath" + "runtime" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/DataDog/rshell/builtins/jq" + "github.com/DataDog/rshell/builtins/testutil" + "github.com/DataDog/rshell/interp" +) + +const pentestTimeout = 10 * time.Second + +func jqRunCtx(ctx context.Context, t *testing.T, script, dir string) (string, string, int) { + t.Helper() + return testutil.RunScriptCtx(ctx, t, script, dir, interp.AllowedPaths([]string{dir})) +} + +// withTimeout runs fn and fails the test if it does not return within +// pentestTimeout. Used to convert a hang into a clean failure. +func withTimeout(t *testing.T, fn func()) { + t.Helper() + done := make(chan struct{}) + go func() { + fn() + close(done) + }() + select { + case <-done: + case <-time.After(pentestTimeout): + t.Fatalf("operation did not complete within %s", pentestTimeout) + } +} + +// --- Flag injection / unknown flags --- + +func TestJqPentestRejectsUnknownLongFlag(t *testing.T) { + dir := t.TempDir() + _, stderr, code := jqRun(t, "jq --no-such-flag .", dir) + assert.Equal(t, 1, code) + assert.NotEqual(t, "", stderr) +} + +func TestJqPentestRejectsUnknownShortFlag(t *testing.T) { + dir := t.TempDir() + _, stderr, code := jqRun(t, "jq -X .", dir) + assert.Equal(t, 1, code) + assert.NotEqual(t, "", stderr) +} + +func TestJqPentestRejectsRawfile(t *testing.T) { + // --rawfile is a real jq flag we deliberately don't implement. + dir := t.TempDir() + _, stderr, code := jqRun(t, "jq --rawfile name file .", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "unknown flag") +} + +func TestJqPentestRejectsSlurpfile(t *testing.T) { + dir := t.TempDir() + _, stderr, code := jqRun(t, "jq --slurpfile name file .", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "unknown flag") +} + +func TestJqPentestRejectsArgFlags(t *testing.T) { + dir := t.TempDir() + for _, flag := range []string{"--arg", "--argjson", "--args", "--jsonargs"} { + _, stderr, code := jqRun(t, "jq "+flag+" .", dir) + assert.Equal(t, 1, code, "%s should be rejected", flag) + assert.Contains(t, stderr, "unknown flag") + } +} + +// `--` end-of-flags must allow a filename starting with `-`. +func TestJqPentestEndOfFlags(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "-weird.json", `{"a":1}`) + stdout, _, code := jqRun(t, "jq -- . -weird.json", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, `"a"`) +} + +// --- Path edge cases --- + +func TestJqPentestNonexistentFile(t *testing.T) { + dir := t.TempDir() + _, stderr, code := jqRun(t, "jq . missing.json", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "missing.json") +} + +func TestJqPentestAccessDeniedAbsolutePath(t *testing.T) { + dir := t.TempDir() + other := t.TempDir() + writeFile(t, other, "x.json", `{"a":1}`) + abs := filepath.Join(other, "x.json") + _, stderr, code := runScript(t, "jq . "+abs, dir, interp.AllowedPaths([]string{dir})) + assert.Equal(t, 1, code) + assert.NotEqual(t, "", stderr) +} + +func TestJqPentestPathTraversalDenied(t *testing.T) { + // "../escape.json" must not escape the AllowedPaths sandbox. + dir := t.TempDir() + parent := filepath.Dir(dir) + require.NoError(t, os.WriteFile(filepath.Join(parent, "escape.json"), []byte(`{"a":1}`), 0644)) + defer os.Remove(filepath.Join(parent, "escape.json")) + _, stderr, code := runScript(t, "jq . ../escape.json", dir, interp.AllowedPaths([]string{dir})) + assert.Equal(t, 1, code) + assert.NotEqual(t, "", stderr) +} + +func TestJqPentestDirectoryAsFile(t *testing.T) { + dir := t.TempDir() + require.NoError(t, os.MkdirAll(filepath.Join(dir, "sub"), 0755)) + withTimeout(t, func() { + _, stderr, code := jqRun(t, "jq . sub", dir) + assert.Equal(t, 1, code) + assert.NotEqual(t, "", stderr) + }) +} + +// --- Special files (Unix only) --- + +func TestJqPentestDevNull(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("/dev/null path is Unix-specific") + } + dir := t.TempDir() + withTimeout(t, func() { + stdout, stderr, code := runScript(t, "jq -c . "+os.DevNull, dir, + interp.AllowedPaths([]string{dir, "/dev"})) + // /dev/null is empty → no output; either succeed cleanly or + // reject with a sandbox/permission error. Both are acceptable — + // the only real requirement is that it doesn't hang. + assert.True(t, code == 0 || code == 1, "code=%d stderr=%q", code, stderr) + if code == 0 { + assert.Equal(t, "", stdout) + } + }) +} + +// /dev/zero is unbounded; the per-source cap MUST trip the read in +// bounded time. Without the cap this test would never return. +func TestJqPentestDevZeroBoundedFail(t *testing.T) { + if runtime.GOOS != "linux" && runtime.GOOS != "darwin" { + t.Skip("/dev/zero is Unix-specific") + } + dir := t.TempDir() + ctx, cancel := context.WithTimeout(context.Background(), pentestTimeout) + defer cancel() + start := time.Now() + _, stderr, code := testutil.RunScriptCtx(ctx, t, "jq -c . /dev/zero", dir, + interp.AllowedPaths([]string{dir, "/dev"})) + elapsed := time.Since(start) + // Must fail (not hang) and must finish well before the 10 s test cap. + assert.NotEqual(t, 0, code, "jq /dev/zero should fail") + assert.NotEqual(t, "", stderr) + assert.Less(t, elapsed, 9*time.Second) +} + +func TestJqPentestDevZeroRawInputBoundedFail(t *testing.T) { + if runtime.GOOS != "linux" && runtime.GOOS != "darwin" { + t.Skip("/dev/zero is Unix-specific") + } + dir := t.TempDir() + ctx, cancel := context.WithTimeout(context.Background(), pentestTimeout) + defer cancel() + start := time.Now() + _, _, code := testutil.RunScriptCtx(ctx, t, "jq -R . /dev/zero", dir, + interp.AllowedPaths([]string{dir, "/dev"})) + elapsed := time.Since(start) + assert.NotEqual(t, 0, code) + assert.Less(t, elapsed, 9*time.Second) +} + +// --- Long-line / large-file edge cases --- + +// A single line one byte short of the cap must scan successfully. +func TestJqPentestRawInputLineCapMinus1(t *testing.T) { + dir := t.TempDir() + body := strings.Repeat("k", jq.MaxLineBytes-1) + writeFile(t, dir, "x.txt", body+"\n") + stdout, _, code := jqRun(t, "jq -R length x.txt", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "1048575") +} + +// A line just past the cap must fail, not be silently truncated. +func TestJqPentestRawInputLineCapPlus1(t *testing.T) { + dir := t.TempDir() + body := strings.Repeat("k", jq.MaxLineBytes+1) + writeFile(t, dir, "x.txt", body+"\n") + _, _, code := jqRun(t, "jq -R . x.txt", dir) + assert.Equal(t, 1, code) +} + +// A large number of files in one invocation must not leak file descriptors. +func TestJqPentestManyFiles(t *testing.T) { + dir := t.TempDir() + const n = 200 + var args strings.Builder + for i := 0; i < n; i++ { + name := "f" + strings.Repeat("0", 4-len(itoa(i))) + itoa(i) + ".json" + writeFile(t, dir, name, "1") + args.WriteByte(' ') + args.WriteString(name) + } + stdout, _, code := jqRun(t, "jq -c ."+args.String(), dir) + assert.Equal(t, 0, code) + // 200 inputs × "1\n" each. + assert.Equal(t, strings.Repeat("1\n", n), stdout) +} + +func itoa(i int) string { + if i == 0 { + return "0" + } + var b [10]byte + pos := len(b) + for i > 0 { + pos-- + b[pos] = byte('0' + i%10) + i /= 10 + } + return string(b[pos:]) +} + +// --- Pathological filter / runtime errors --- + +// A filter that compiles but throws at runtime returns exit 1. +func TestJqPentestFilterRuntimeError(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", "1") + _, stderr, code := jqRun(t, `jq '. + "x"' x.json`, dir) + assert.Equal(t, 1, code) + assert.NotEqual(t, "", stderr) +} + +// Compiler must reject very large filters. +func TestJqPentestOverlongFilter(t *testing.T) { + dir := t.TempDir() + huge := strings.Repeat("0,", jq.MaxFilterBytes+10) + "0" + _, stderr, code := jqRun(t, "jq -n '"+huge+"'", dir) + assert.Equal(t, 2, code) + assert.Contains(t, stderr, "filter too large") +} + +// --- Context cancellation promptness --- + +func TestJqPentestPreCancelledReturnsFast(t *testing.T) { + dir := t.TempDir() + // Build a 100k-line stream — enough to take a noticeable time without cancel. + var sb strings.Builder + for i := 0; i < 100_000; i++ { + sb.WriteString(`{"i":0}` + "\n") + } + writeFile(t, dir, "x.json", sb.String()) + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + start := time.Now() + _, _, _ = jqRunCtx(ctx, t, "jq -c . x.json", dir) + assert.Less(t, time.Since(start), 2*time.Second) +} + +// --- Stdin edge cases --- + +// Multiple `-` operands are not specially supported but must not crash. +func TestJqPentestMultipleDashOperands(t *testing.T) { + dir := t.TempDir() + _, _, code := jqRun(t, `printf '1' | jq -c . - -`, dir) + // Either succeeds (each `-` re-reads same exhausted stdin) or fails + // cleanly. Only requirement: no panic, exit code 0 or 1. + assert.True(t, code == 0 || code == 1) +} + +// --- Output-size cap in the re-emitter --- + +// A filter that produces an output near the per-result cap must be +// rejected by the emitter rather than balloon memory unboundedly. +func TestJqPentestRecursiveDescentDeepBounded(t *testing.T) { + dir := t.TempDir() + // Construct nested array deeper than the emitter's depth bound. + var sb strings.Builder + for i := 0; i < 1000; i++ { + sb.WriteByte('[') + } + sb.WriteByte('1') + for i := 0; i < 1000; i++ { + sb.WriteByte(']') + } + writeFile(t, dir, "deep.json", sb.String()) + withTimeout(t, func() { + _, _, code := jqRun(t, "jq . deep.json", dir) + // fastjq itself can compute on this depth, but our re-emitter + // must refuse to render it past maxEmitDepth. + assert.Equal(t, 1, code) + }) +} diff --git a/builtins/jq/jq.go b/builtins/jq/jq.go new file mode 100644 index 000000000..7b157c6f6 --- /dev/null +++ b/builtins/jq/jq.go @@ -0,0 +1,864 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +// Package jq implements the jq builtin command. +// +// jq — command-line JSON processor +// +// Usage: jq [OPTION]... FILTER [FILE]... +// +// Apply FILTER to each JSON value parsed from FILE(s) (or standard input +// when no FILE is given) and print the results. When FILTER is omitted +// it defaults to the identity filter ".". +// +// The filter language and output formatting follow the jq manual at +// https://jqlang.org/manual/. The expression engine is the fastjq library +// (Go module github.com/brianfloersch/fastjq, hosted internally at +// github.com/DataDog/fastjq) — a zero-allocation, pure-CPU jq engine that +// operates directly on JSON bytes. fastjq does not access the filesystem +// or the network; the only I/O performed by this builtin is reading input +// documents through the shell's sandboxed callCtx.OpenFile. +// +// Accepted flags: +// +// -c, --compact-output Compact output (single line per value). +// -r, --raw-output Print JSON-string outputs without surrounding quotes. +// -j, --join-output Like -r, but no newline between outputs. +// -n, --null-input Do not read input; run filter once with input=null. +// -s, --slurp Read all inputs into one array; run filter once. +// -R, --raw-input Each input line is a JSON string. +// -a, --ascii-output Escape non-ASCII characters as \uXXXX. +// -S, --sort-keys Sort object fields by key in output. +// -e, --exit-status Set exit code based on truthiness of outputs. +// -h, --help Print this usage message and exit. +// +// Exit codes: +// +// 0 Success. +// 1 Runtime error (unknown flag, file not found, invalid JSON, runtime +// filter error, or with -e all outputs were null/false/absent). +// 2 Usage error (FILTER too large). +// 3 Compile error (FILTER could not be parsed). +// +// Line endings: +// +// In --raw-input mode, lines are split on LF. Embedded CR before LF is +// stripped (matching bufio.ScanLines and real jq). Lone CR (classic-Mac +// convention) is treated as part of the line, matching real jq behaviour. +// +// Memory safety: +// +// Per-source input is hard-capped at MaxStreamBytes (64 MiB). When --slurp +// is given the same cap applies to the aggregate input across all files. +// Each line in --raw-input mode is capped at MaxLineBytes (1 MiB). The +// FILTER expression itself is capped at MaxFilterBytes (64 KiB). The JSON +// re-emitter caps its recursion at maxEmitDepth (256) and its output size +// at MaxStreamBytes. All loops check ctx.Err() at every iteration to +// honour the shell's execution timeout and graceful cancellation. fastjq's +// own fuzz suite guarantees the engine never panics on any byte input. +package jq + +import ( + "bufio" + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "os" + "slices" + "unicode/utf8" + + "github.com/brianfloersch/fastjq" + + "github.com/DataDog/rshell/builtins" +) + +// Cmd is the jq builtin command descriptor. +var Cmd = builtins.Command{ + Name: "jq", + Description: "command-line JSON processor", + MakeFlags: registerFlags, +} + +// MaxStreamBytes caps the bytes read from any single input source (file +// or stdin). Larger inputs cause the command to fail with an error +// instead of allocating unbounded memory. +const MaxStreamBytes = 64 << 20 // 64 MiB + +// MaxLineBytes caps a single line in --raw-input mode. +const MaxLineBytes = 1 << 20 // 1 MiB + +// MaxFilterBytes caps the FILTER expression itself. +const MaxFilterBytes = 1 << 16 // 64 KiB + +const readChunk = 32 * 1024 + +// options holds the parsed flag state. +type options struct { + compact bool + rawOutput bool + joinOut bool + nullInput bool + slurp bool + rawInput bool + ascii bool + sortKeys bool + exitStat bool +} + +func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { + help := fs.BoolP("help", "h", false, "print usage and exit") + compact := fs.BoolP("compact-output", "c", false, "compact instead of pretty-printed output") + raw := fs.BoolP("raw-output", "r", false, "output strings without JSON quoting") + join := fs.BoolP("join-output", "j", false, "like -r, but suppress trailing newlines") + nullIn := fs.BoolP("null-input", "n", false, "do not read input; use null instead") + slurp := fs.BoolP("slurp", "s", false, "read all inputs into an array; run filter once") + rawIn := fs.BoolP("raw-input", "R", false, "each line of input is a JSON string") + ascii := fs.BoolP("ascii-output", "a", false, "escape non-ASCII characters as \\uXXXX") + sortK := fs.BoolP("sort-keys", "S", false, "sort object keys in output") + exitSt := fs.BoolP("exit-status", "e", false, "set exit status based on output truthiness") + + return func(ctx context.Context, callCtx *builtins.CallContext, args []string) builtins.Result { + if *help { + callCtx.Out("Usage: jq [OPTION]... FILTER [FILE]...\n") + callCtx.Out("Apply FILTER to JSON input from FILE(s) or standard input.\n") + callCtx.Out("With no FILE, or when FILE is -, read standard input.\n\n") + fs.SetOutput(callCtx.Stdout) + fs.PrintDefaults() + return builtins.Result{} + } + + // Real jq defaults the filter to identity (".") when no FILTER is given. + filter := "." + var files []string + if len(args) > 0 { + filter = args[0] + files = args[1:] + } + + if len(filter) > MaxFilterBytes { + callCtx.Errf("jq: filter too large (%d bytes, max %d)\n", len(filter), MaxFilterBytes) + return builtins.Result{Code: 2} + } + + // -j implies -r per jq manual. + opts := options{ + compact: *compact, + rawOutput: *raw || *join, + joinOut: *join, + nullInput: *nullIn, + slurp: *slurp, + rawInput: *rawIn, + ascii: *ascii, + sortKeys: *sortK, + exitStat: *exitSt, + } + + prog, err := fastjq.Compile(filter) + if err != nil { + callCtx.Errf("jq: compile error: %s\n", err.Error()) + return builtins.Result{Code: 3} + } + + st := &runState{ctx: ctx, callCtx: callCtx, opts: opts, prog: prog} + + if err := st.run(files); err != nil { + if builtins.IsBrokenPipe(err) { + // Match other builtins: silently terminate when the + // downstream consumer closed the pipe. + return st.finalResult() + } + if !errors.Is(err, errAlreadyReported) { + callCtx.Errf("jq: %s\n", err.Error()) + } + st.failed = true + } + + return st.finalResult() + } +} + +// errAlreadyReported is a sentinel returned by helpers that have already +// written a specific error message to stderr. The top-level handler does +// not double-print such errors. +var errAlreadyReported = errors.New("error already reported") + +// runState bundles the per-invocation execution context. +type runState struct { + ctx context.Context + callCtx *builtins.CallContext + opts options + prog *fastjq.Program + + // failed is true when at least one runtime error occurred. + failed bool + + // emittedTruthy reports whether at least one output was a value + // other than null/false. Used to compute -e exit status. + emittedTruthy bool + + // slurpTotal tracks the cumulative bytes already accumulated across + // the slurp helpers so we can short-circuit before exceeding the cap. + slurpTotal int64 +} + +func (s *runState) finalResult() builtins.Result { + // -e: exit 1 when no output produced or every output was null/false. + if s.failed || (s.opts.exitStat && !s.emittedTruthy) { + return builtins.Result{Code: 1} + } + return builtins.Result{} +} + +// run dispatches based on input mode flags. +func (s *runState) run(files []string) error { + if s.opts.nullInput { + return s.runOne([]byte("null")) + } + + if len(files) == 0 { + files = []string{"-"} + } + + if s.opts.slurp && s.opts.rawInput { + // Slurp + raw input: read all input as a single JSON string. + return s.processRawSlurpFiles(files) + } + if s.opts.slurp { + // Slurp: collect every JSON value from every file into one array. + return s.processJSONSlurpFiles(files) + } + if s.opts.rawInput { + // Raw input: each line is treated as a JSON string. + for _, f := range files { + if err := s.ctx.Err(); err != nil { + return err + } + if err := s.processRawLines(f); err != nil { + return err + } + } + return nil + } + // Default: stream JSON values from every file. + for _, f := range files { + if err := s.ctx.Err(); err != nil { + return err + } + if err := s.processJSONStream(f); err != nil { + return err + } + } + return nil +} + +// openSource returns a bounded reader for one input source. +// The returned closer must be called when the caller is finished. +func (s *runState) openSource(file string) (io.ReadCloser, error) { + if file == "-" { + if s.callCtx.Stdin == nil { + return io.NopCloser(bytes.NewReader(nil)), nil + } + return io.NopCloser(s.callCtx.Stdin), nil + } + rc, err := s.callCtx.OpenFile(s.ctx, file, os.O_RDONLY, 0) + if err != nil { + return nil, err + } + return rc, nil +} + +// readAllBounded reads from rc until EOF or until the cap is exceeded. +// It is the caller's responsibility to close rc. Returns errCapExceeded +// when the cap fires, so callers can distinguish "input too large" from +// generic I/O errors. +func readAllBounded(ctx context.Context, rc io.Reader, limit int64) ([]byte, error) { + buf := make([]byte, 0, 4096) + chunk := make([]byte, readChunk) + for { + if err := ctx.Err(); err != nil { + return nil, err + } + n, err := rc.Read(chunk) + if n > 0 { + if int64(len(buf)+n) > limit { + return nil, capExceededError(limit) + } + buf = append(buf, chunk[:n]...) + } + if errors.Is(err, io.EOF) { + return buf, nil + } + if err != nil { + return nil, err + } + } +} + +// decodeJSONValues reads whitespace-separated JSON values from one source +// and invokes fn for each. Used by both the streaming and slurp paths. +// +// The byte cap applies per-source. When --slurp is in effect, the caller +// is responsible for tracking the aggregate budget across files via +// runState.slurpTotal. +func (s *runState) decodeJSONValues(file string, fn func(raw []byte) error) error { + rc, err := s.openSource(file) + if err != nil { + s.reportFileErr(file, err) + return errAlreadyReported + } + defer rc.Close() + + dec := json.NewDecoder(&byteCountReader{r: rc, max: MaxStreamBytes}) + dec.UseNumber() + for { + if err := s.ctx.Err(); err != nil { + return err + } + var raw json.RawMessage + if err := dec.Decode(&raw); err != nil { + if errors.Is(err, io.EOF) { + return nil + } + s.reportInputErr(file, err) + return errAlreadyReported + } + if err := fn(raw); err != nil { + return err + } + } +} + +// processJSONStream runs the filter once per JSON value in one source. +func (s *runState) processJSONStream(file string) error { + return s.decodeJSONValues(file, func(raw []byte) error { + return s.runOne([]byte(raw)) + }) +} + +// processJSONSlurpFiles reads every value from every file into one array, +// then runs the filter once. The aggregate input size across all files +// is capped at MaxStreamBytes so an attacker cannot supply N near-cap +// files to balloon transient memory. +func (s *runState) processJSONSlurpFiles(files []string) error { + var arr [][]byte + for _, file := range files { + if err := s.ctx.Err(); err != nil { + return err + } + err := s.decodeJSONValues(file, func(raw []byte) error { + cp := make([]byte, len(raw)) + copy(cp, raw) + s.slurpTotal += int64(len(cp)) + 1 // +1 for separator/bracket overhead + if s.slurpTotal > MaxStreamBytes { + return capExceededError(MaxStreamBytes) + } + arr = append(arr, cp) + return nil + }) + if err != nil { + if isCapExceeded(err) { + s.reportInputErr(file, err) + return errAlreadyReported + } + return err + } + } + var buf bytes.Buffer + buf.WriteByte('[') + for i, v := range arr { + if i > 0 { + buf.WriteByte(',') + } + buf.Write(v) + } + buf.WriteByte(']') + return s.runOne(buf.Bytes()) +} + +// processRawLines reads one line at a time, packages each as a JSON +// string, and runs the filter on it. +// +// Line splitting follows bufio.ScanLines: LF terminates a line and a +// preceding CR is stripped. Lone CR is treated as part of the line, +// matching real jq's behaviour. +func (s *runState) processRawLines(file string) error { + rc, err := s.openSource(file) + if err != nil { + s.reportFileErr(file, err) + return errAlreadyReported + } + defer rc.Close() + + sc := bufio.NewScanner(&byteCountReader{r: rc, max: MaxStreamBytes}) + sc.Buffer(make([]byte, 4096), MaxLineBytes) + for { + if err := s.ctx.Err(); err != nil { + return err + } + if !sc.Scan() { + break + } + encoded := encodeJSONString(sc.Bytes()) + if err := s.runOne(encoded); err != nil { + return err + } + } + if err := sc.Err(); err != nil { + s.reportInputErr(file, err) + return errAlreadyReported + } + return nil +} + +// processRawSlurpFiles reads all input bytes (across files) into one +// JSON string and runs the filter once. The aggregate cap is enforced +// during accumulation, not after, so over-large inputs short-circuit +// without ever fully materialising in memory. +func (s *runState) processRawSlurpFiles(files []string) error { + var combined bytes.Buffer + for _, file := range files { + if err := s.ctx.Err(); err != nil { + return err + } + rc, err := s.openSource(file) + if err != nil { + s.reportFileErr(file, err) + return errAlreadyReported + } + remaining := int64(MaxStreamBytes) - int64(combined.Len()) + data, rerr := readAllBounded(s.ctx, rc, remaining) + rc.Close() + if rerr != nil { + s.reportInputErr(file, rerr) + return errAlreadyReported + } + combined.Write(data) + } + encoded := encodeJSONString(combined.Bytes()) + return s.runOne(encoded) +} + +// runOne runs the compiled filter on input and writes all results. +func (s *runState) runOne(input []byte) error { + return s.prog.RunFunc(input, func(result []byte) error { + if err := s.ctx.Err(); err != nil { + return err + } + if !isNullOrFalse(result) { + s.emittedTruthy = true + } + return s.writeResult(result) + }) +} + +// writeResult formats one filter result according to the active flags +// and writes it to stdout. +func (s *runState) writeResult(raw []byte) error { + formatted, err := s.formatValue(raw) + if err != nil { + return err + } + if _, err := s.callCtx.Stdout.Write(formatted); err != nil { + return err + } + if !s.opts.joinOut { + if _, err := s.callCtx.Stdout.Write([]byte{'\n'}); err != nil { + return err + } + } + return nil +} + +// formatValue returns the bytes that represent one filter result. +// +// - With --raw-output (or --join-output) on a JSON-string result, the +// decoded string contents are returned (no quotes, escapes resolved). +// - Otherwise the result is re-emitted as JSON with the active +// compact / sort-keys / ascii options applied. +func (s *runState) formatValue(raw []byte) ([]byte, error) { + if s.opts.rawOutput && len(raw) > 0 && raw[0] == '"' { + var str string + if err := json.Unmarshal(raw, &str); err == nil { + return []byte(str), nil + } + // Fallthrough — not actually a string; emit as JSON. + } + // Pure pass-through for the most common case (compact, no ascii, no + // sort-keys). fastjq already emits canonical compact JSON. + if s.opts.compact && !s.opts.ascii && !s.opts.sortKeys { + return raw, nil + } + return reformatJSON(s.ctx, raw, s.opts.compact, s.opts.ascii, s.opts.sortKeys) +} + +// reportFileErr writes an "open failed" style message. +func (s *runState) reportFileErr(file string, err error) { + name := file + if file == "-" { + name = "standard input" + } + s.callCtx.Errf("jq: %s: %s\n", name, s.callCtx.PortableErr(err)) +} + +// reportInputErr writes a "parse / read failed" style message. +func (s *runState) reportInputErr(file string, err error) { + name := file + if file == "-" { + name = "" + } + s.callCtx.Errf("jq: error reading %s: %s\n", name, err.Error()) +} + +// isNullOrFalse reports whether a fastjq output is the literal `null` +// or `false`. Used by --exit-status to compute exit code. +func isNullOrFalse(b []byte) bool { + return bytes.Equal(b, []byte("null")) || bytes.Equal(b, []byte("false")) +} + +// encodeJSONString produces the JSON-encoded form of an arbitrary byte +// slice (used for --raw-input). Invalid UTF-8 sequences are replaced +// with U+FFFD by Go's encoder, matching jq's behaviour for non-UTF-8 +// raw input. +func encodeJSONString(b []byte) []byte { + out, _ := json.Marshal(string(b)) + return out +} + +// byteCountReader wraps an io.Reader and enforces a hard byte cap. +// When the cap is hit, Read returns errCapExceeded and truncates n so +// the consumer never sees bytes beyond the cap (i.e. the consumer's +// internal buffer cannot exceed max bytes either). +type byteCountReader struct { + r io.Reader + n int64 + max int64 +} + +func (c *byteCountReader) Read(p []byte) (int, error) { + n, err := c.r.Read(p) + c.n += int64(n) + if c.n > c.max { + // Truncate so the caller never observes the overshoot bytes. + over := c.n - c.max + c.n = c.max + n -= int(over) + if n < 0 { + n = 0 + } + return n, capExceededError(c.max) + } + return n, err +} + +// capExceededError builds the canonical "input exceeds N bytes" error. +// It wraps errCapExceeded so callers can identify the condition with +// errors.Is and produce a clearer user-facing message. +func capExceededError(max int64) error { + return fmt.Errorf("%w: input exceeds %d bytes", errCapExceeded, max) +} + +// errCapExceeded is the sentinel under capExceededError. Use isCapExceeded +// to test for it. +var errCapExceeded = errors.New("size cap exceeded") + +func isCapExceeded(err error) bool { + return errors.Is(err, errCapExceeded) +} + +// maxEmitDepth bounds the recursion of the JSON re-emitter. JSON nested +// deeper than this is rejected rather than risking a runaway stack. +const maxEmitDepth = 256 + +// errEmitDepth is returned when JSON nesting exceeds maxEmitDepth. +var errEmitDepth = errors.New("json nesting too deep") + +// reformatJSON re-emits raw with the active formatting options. raw +// must already be valid JSON (fastjq's output is always valid). The +// output is hard-capped at MaxStreamBytes; oversize results return an +// error rather than balloon memory. +func reformatJSON(ctx context.Context, raw []byte, compact, ascii, sortKeys bool) ([]byte, error) { + dec := json.NewDecoder(bytes.NewReader(raw)) + dec.UseNumber() + var out bytes.Buffer + out.Grow(len(raw)) + indent := " " + if compact { + indent = "" + } + e := &emitter{ + ctx: ctx, + out: &out, + sortKeys: sortKeys, + ascii: ascii, + indent: indent, + maxBytes: MaxStreamBytes, + } + if err := e.emit(dec, "", 0); err != nil { + return nil, err + } + return out.Bytes(), nil +} + +// emitter renders one parsed JSON value with the requested formatting. +type emitter struct { + ctx context.Context + out *bytes.Buffer + sortKeys bool + ascii bool + indent string // "" for compact, " " for pretty + maxBytes int // hard cap on out.Len(); 0 disables the check +} + +// guardWrites is called at the top of every container/value emit. It +// surfaces ctx.Err() and the output-size cap. +func (e *emitter) guardWrites() error { + if err := e.ctx.Err(); err != nil { + return err + } + if e.maxBytes > 0 && e.out.Len() > e.maxBytes { + return capExceededError(int64(e.maxBytes)) + } + return nil +} + +// emit renders one value pulled from dec. +// +// prefix is the indentation string for the line that contains the +// value's opening character. It is not written by emit itself; it is +// used by container emitters to indent child lines and to align the +// closing bracket/brace. depth bounds recursion. +func (e *emitter) emit(dec *json.Decoder, prefix string, depth int) error { + if err := e.guardWrites(); err != nil { + return err + } + if depth > maxEmitDepth { + return errEmitDepth + } + tok, err := dec.Token() + if err != nil { + return err + } + switch t := tok.(type) { + case json.Delim: + switch t { + case '[': + return e.emitArray(dec, prefix, depth+1) + case '{': + return e.emitObject(dec, prefix, depth+1) + default: + return fmt.Errorf("unexpected delim %q", t) + } + case json.Number: + e.out.WriteString(t.String()) + case string: + e.emitString(t) + case bool: + if t { + e.out.WriteString("true") + } else { + e.out.WriteString("false") + } + case nil: + e.out.WriteString("null") + default: + return fmt.Errorf("unexpected token type %T", tok) + } + return nil +} + +func (e *emitter) emitArray(dec *json.Decoder, prefix string, depth int) error { + e.out.WriteByte('[') + if !dec.More() { + if _, err := dec.Token(); err != nil { + return err + } + e.out.WriteByte(']') + return nil + } + childPrefix := prefix + e.indent + first := true + for dec.More() { + if err := e.guardWrites(); err != nil { + return err + } + if !first { + e.out.WriteByte(',') + } + first = false + if e.indent != "" { + e.out.WriteByte('\n') + e.out.WriteString(childPrefix) + } + if err := e.emit(dec, childPrefix, depth); err != nil { + return err + } + } + if _, err := dec.Token(); err != nil { + return err + } + if e.indent != "" { + e.out.WriteByte('\n') + e.out.WriteString(prefix) + } + e.out.WriteByte(']') + return nil +} + +func (e *emitter) emitObject(dec *json.Decoder, prefix string, depth int) error { + e.out.WriteByte('{') + if !dec.More() { + if _, err := dec.Token(); err != nil { + return err + } + e.out.WriteByte('}') + return nil + } + childPrefix := prefix + e.indent + + if e.sortKeys { + type kv struct { + k string + v []byte + } + var pairs []kv + for dec.More() { + if err := e.guardWrites(); err != nil { + return err + } + keyTok, err := dec.Token() + if err != nil { + return err + } + key, ok := keyTok.(string) + if !ok { + return fmt.Errorf("non-string object key") + } + var sub bytes.Buffer + subE := &emitter{ctx: e.ctx, out: &sub, sortKeys: e.sortKeys, ascii: e.ascii, indent: e.indent, maxBytes: e.maxBytes} + if err := subE.emit(dec, childPrefix, depth); err != nil { + return err + } + pairs = append(pairs, kv{k: key, v: sub.Bytes()}) + } + if _, err := dec.Token(); err != nil { + return err + } + slices.SortFunc(pairs, func(a, b kv) int { + switch { + case a.k < b.k: + return -1 + case a.k > b.k: + return 1 + default: + return 0 + } + }) + for i, p := range pairs { + if i > 0 { + e.out.WriteByte(',') + } + if e.indent != "" { + e.out.WriteByte('\n') + e.out.WriteString(childPrefix) + } + e.emitString(p.k) + e.out.WriteByte(':') + if e.indent != "" { + e.out.WriteByte(' ') + } + e.out.Write(p.v) + } + } else { + first := true + for dec.More() { + if err := e.guardWrites(); err != nil { + return err + } + keyTok, err := dec.Token() + if err != nil { + return err + } + key, ok := keyTok.(string) + if !ok { + return fmt.Errorf("non-string object key") + } + if !first { + e.out.WriteByte(',') + } + first = false + if e.indent != "" { + e.out.WriteByte('\n') + e.out.WriteString(childPrefix) + } + e.emitString(key) + e.out.WriteByte(':') + if e.indent != "" { + e.out.WriteByte(' ') + } + if err := e.emit(dec, childPrefix, depth); err != nil { + return err + } + } + if _, err := dec.Token(); err != nil { + return err + } + } + + if e.indent != "" { + e.out.WriteByte('\n') + e.out.WriteString(prefix) + } + e.out.WriteByte('}') + return nil +} + +// emitString writes s as a JSON string literal. With ascii=true, every +// rune outside the 7-bit range is emitted as a \uXXXX (or surrogate-pair) +// escape. Control characters are always escaped. +func (e *emitter) emitString(s string) { + e.out.WriteByte('"') + for i := 0; i < len(s); { + r, size := utf8.DecodeRuneInString(s[i:]) + switch r { + case '"': + e.out.WriteString(`\"`) + case '\\': + e.out.WriteString(`\\`) + case '\n': + e.out.WriteString(`\n`) + case '\r': + e.out.WriteString(`\r`) + case '\t': + e.out.WriteString(`\t`) + case '\b': + e.out.WriteString(`\b`) + case '\f': + e.out.WriteString(`\f`) + default: + switch { + case r < 0x20: + fmt.Fprintf(e.out, `\u%04x`, r) + case r < 0x80: + e.out.WriteByte(byte(r)) + case e.ascii: + if r > 0xFFFF { + rr := r - 0x10000 + hi := 0xD800 + (rr >> 10) + lo := 0xDC00 + (rr & 0x3FF) + fmt.Fprintf(e.out, `\u%04x\u%04x`, hi, lo) + } else { + fmt.Fprintf(e.out, `\u%04x`, r) + } + default: + e.out.WriteRune(r) + } + } + i += size + } + e.out.WriteByte('"') +} diff --git a/builtins/jq/jq_fuzz_test.go b/builtins/jq/jq_fuzz_test.go new file mode 100644 index 000000000..1e831bf6d --- /dev/null +++ b/builtins/jq/jq_fuzz_test.go @@ -0,0 +1,311 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package jq_test + +import ( + "bytes" + "context" + "os" + "path/filepath" + "strings" + "sync/atomic" + "testing" + "time" + "unicode/utf8" + + "github.com/DataDog/rshell/builtins/testutil" + "github.com/DataDog/rshell/interp" +) + +// cmdRunCtxFuzz runs a script with a per-iteration AllowedPath, used by +// every fuzz function in this file. Named to avoid clashing with helpers +// that may be defined elsewhere in the package. +func cmdRunCtxFuzz(ctx context.Context, t *testing.T, script, dir string) (string, string, int) { + t.Helper() + return testutil.RunScriptCtx(ctx, t, script, dir, interp.AllowedPaths([]string{dir})) +} + +// fuzzWrite writes input as the JSON file under dir and returns its path. +func fuzzWriteJSON(t *testing.T, dir string, input []byte) { + t.Helper() + if err := os.WriteFile(filepath.Join(dir, "input.json"), input, 0644); err != nil { + t.Fatal(err) + } +} + +// FuzzJqIdentity fuzzes the identity filter on arbitrary input. Verifies +// jq never panics and exits with one of the documented codes. +// +// Seed corpus combines: +// - Small JSON sanity values +// - Boundary inputs around our memory caps (4 KiB, 1 MiB) +// - Encoding edge cases (CRLF, null bytes, invalid UTF-8, BOM) +// - Adversarial inputs that exercise the multi-document streaming path +// - All distinct inputs from the unit tests +func FuzzJqIdentity(f *testing.F) { + // Source A — implementation edge cases. + f.Add([]byte(`null`)) + f.Add([]byte(`true`)) + f.Add([]byte(`false`)) + f.Add([]byte(`0`)) + f.Add([]byte(`-1`)) + f.Add([]byte(`3.14`)) + f.Add([]byte(`""`)) + f.Add([]byte(`"hello"`)) + f.Add([]byte(`{}`)) + f.Add([]byte(`[]`)) + f.Add([]byte(`{"a":1}`)) + f.Add([]byte(`{"a":1,"b":2}`)) + f.Add([]byte(`[1,2,3]`)) + f.Add([]byte(`{"nested":{"deep":{"value":42}}}`)) + // Boundary at 4 KiB scanner-init. + f.Add(append(bytes.Repeat([]byte(" "), 4095), '"', '"')) + f.Add(append(bytes.Repeat([]byte(" "), 4096), '"', '"')) + f.Add(append(bytes.Repeat([]byte(" "), 4097), '"', '"')) + // Multi-document streams. + f.Add([]byte("1\n2\n3\n")) + f.Add([]byte("{}{}{}")) + f.Add([]byte(`{"a":1}{"b":2}`)) + // No trailing newline. + f.Add([]byte(`{"a":1}`)) + // BOM-prefixed input — fastjq strips this; json.Decoder rejects it. + f.Add([]byte("\xef\xbb\xbf{\"a\":1}")) + // Single byte. + f.Add([]byte(`1`)) + + // Source B — CVE-class inputs known to break naïve JSON tooling. + // Long strings near the per-doc boundary. + f.Add(append(append([]byte(`"`), bytes.Repeat([]byte("a"), 4095)...), '"')) + f.Add(append(append([]byte(`"`), bytes.Repeat([]byte("a"), 65535)...), '"')) + // Invalid UTF-8 inside a string. + f.Add(append(append([]byte(`"`), 0xed, 0xa0, 0x80), '"')) // surrogate half + f.Add(append(append([]byte(`"`), 0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf), '"')) + // CRLF line endings. + f.Add([]byte("{\"a\":1}\r\n{\"b\":2}\r\n")) + // Embedded null bytes (invalid in JSON outside strings; jq rejects). + f.Add([]byte("{\"a\":\x00}")) + // Embedded ANSI escape in string contents (terminal injection class). + f.Add([]byte("\"\\u001b[31mRED\\u001b[0m\"")) + // Binary-magic-byte payloads — jq must reject as invalid JSON. + f.Add([]byte{0x7f, 'E', 'L', 'F'}) + f.Add([]byte{'M', 'Z'}) + f.Add([]byte{'P', 'K', 0x03, 0x04}) + // Numeric overflow / boundary. + f.Add([]byte("9999999999999999999")) + f.Add([]byte("-9999999999999999999")) + f.Add([]byte("1e308")) + f.Add([]byte("1e1000")) + // Deeply nested array/object — tests recursion bounds. + f.Add(append(append(bytes.Repeat([]byte("["), 100), '0'), bytes.Repeat([]byte("]"), 100)...)) + + // Source C — every distinct value from the unit tests. + f.Add([]byte(`{"a":1,"b":2}`)) + f.Add([]byte(`{"a":{"b":[1,2]}}`)) + f.Add([]byte(`{"a":[],"b":{}}`)) + f.Add([]byte(`{"name":"alice"}`)) + f.Add([]byte(`{"a":42,"b":true,"c":null}`)) + f.Add([]byte(`{"s":"a\tb\nc"}`)) + f.Add([]byte(`{"banana":2,"apple":1,"cherry":3}`)) + f.Add([]byte(`{"z":{"b":2,"a":1},"y":{"d":4,"c":3}}`)) + f.Add([]byte(`{"flag":false}`)) + f.Add([]byte(`"hello"`)) + f.Add([]byte(`"hi\nthere"`)) + f.Add([]byte(`{"s":"héllo"}`)) + f.Add([]byte(`{"s":"a😀b"}`)) + f.Add([]byte(`{"x":[]}`)) + f.Add([]byte(`{"a":9007199254740993}`)) + + baseDir := f.TempDir() + var counter atomic.Int64 + + f.Fuzz(func(t *testing.T, input []byte) { + if len(input) > 1<<20 { + return + } + dir, cleanup := testutil.FuzzIterDir(t, baseDir, &counter) + defer cleanup() + fuzzWriteJSON(t, dir, input) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + _, _, code := cmdRunCtxFuzz(ctx, t, "jq -c . input.json", dir) + if code != 0 && code != 1 { + t.Errorf("unexpected exit code %d", code) + } + }) +} + +// FuzzJqRawInput fuzzes -R (raw input mode) with arbitrary text bytes. +// Verifies the line scanner handles binary, CRLF, long lines, etc. +func FuzzJqRawInput(f *testing.F) { + f.Add([]byte("")) + f.Add([]byte("\n")) + f.Add([]byte("foo\n")) + f.Add([]byte("foo\nbar\nbaz\n")) + // CRLF. + f.Add([]byte("foo\r\nbar\r\n")) + // Lone CR. + f.Add([]byte("foo\rbar\r")) + // No trailing newline. + f.Add([]byte("hello")) + // Mixed terminators. + f.Add([]byte("a\nb\r\nc\rd")) + // Embedded null bytes. + f.Add([]byte("a\x00b\nc")) + // Invalid UTF-8 (Go encoder substitutes U+FFFD). + f.Add([]byte{0xff, 0xfe, '\n'}) + f.Add([]byte{0xed, 0xa0, 0x80, '\n'}) + // ANSI escapes. + f.Add([]byte("\x1b[31mRED\x1b[0m\n")) + // JSON-like content as raw text. + f.Add([]byte(`{"a":1}` + "\n")) + // Binary file headers. + f.Add([]byte{0x7f, 'E', 'L', 'F', '\n'}) + // Long line near the cap. + f.Add(append(bytes.Repeat([]byte("k"), 4095), '\n')) + + baseDir := f.TempDir() + var counter atomic.Int64 + + f.Fuzz(func(t *testing.T, input []byte) { + if len(input) > 1<<20 { + return + } + dir, cleanup := testutil.FuzzIterDir(t, baseDir, &counter) + defer cleanup() + if err := os.WriteFile(filepath.Join(dir, "input.txt"), input, 0644); err != nil { + t.Fatal(err) + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + _, _, code := cmdRunCtxFuzz(ctx, t, "jq -R . input.txt", dir) + if code != 0 && code != 1 { + t.Errorf("unexpected exit code %d", code) + } + }) +} + +// FuzzJqSlurp fuzzes -s with arbitrary multi-document JSON input. +func FuzzJqSlurp(f *testing.F) { + f.Add([]byte("")) + f.Add([]byte("1")) + f.Add([]byte("1 2 3")) + f.Add([]byte("1\n2\n3")) + f.Add([]byte(`{"a":1}{"b":2}`)) + f.Add([]byte(`null null null`)) + f.Add([]byte(`{"x":[]}{"y":{}}`)) + f.Add([]byte("[1,2,3]\n[4,5,6]")) + f.Add([]byte(`""`)) + f.Add([]byte(`{}`)) + f.Add([]byte(`""""""""`)) // back-to-back empty strings + + baseDir := f.TempDir() + var counter atomic.Int64 + + f.Fuzz(func(t *testing.T, input []byte) { + if len(input) > 1<<20 { + return + } + dir, cleanup := testutil.FuzzIterDir(t, baseDir, &counter) + defer cleanup() + fuzzWriteJSON(t, dir, input) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + _, _, code := cmdRunCtxFuzz(ctx, t, "jq -c -s . input.json", dir) + if code != 0 && code != 1 { + t.Errorf("unexpected exit code %d", code) + } + }) +} + +// FuzzJqFilter fuzzes a wide variety of filter expressions on a fixed +// JSON input, covering the compile path of the fastjq engine. +func FuzzJqFilter(f *testing.F) { + // Source A — common jq filter shapes. + f.Add(".") + f.Add(".a") + f.Add(".a.b.c") + f.Add(".[]") + f.Add(".[0]") + f.Add(".[-1]") + f.Add("..") + f.Add(`select(.a == 1)`) + f.Add(`select(.a > 0)`) + f.Add(`del(.a)`) + f.Add(`map(.x)`) + f.Add(`{name: .a}`) + f.Add(`[.a, .b]`) + f.Add(`. as $x | $x`) + f.Add(`if .a then 1 else 2 end`) + f.Add(`try .a catch "fail"`) + f.Add(`length`) + f.Add(`type`) + f.Add(`keys`) + f.Add(`keys_unsorted`) + f.Add(`to_entries`) + f.Add(`from_entries`) + f.Add(`add`) + // Source B — adversarial inputs. + f.Add("") + f.Add(" ") + f.Add(".[") + f.Add("|||||") + f.Add(`"unterminated`) + f.Add("\x00") + f.Add("\xed\xa0\x80") + // ReDoS-class regex inside test(). + f.Add(`test("(a+)+b")`) + f.Add(`test("a*a*a*b")`) + // Very long filter expression. + long := bytes.Repeat([]byte("."), 1000) + f.Add(string(long)) + // Mismatched parens. + f.Add(`(((((((`) + + baseDir := f.TempDir() + var counter atomic.Int64 + + f.Fuzz(func(t *testing.T, filter string) { + if len(filter) > 1<<14 { + return + } + // Skip filters that the shell parser would reject (invalid UTF-8, + // embedded NUL, etc.) — those errors aren't on the jq path under test. + if !utf8.ValidString(filter) || strings.ContainsRune(filter, 0) { + return + } + dir, cleanup := testutil.FuzzIterDir(t, baseDir, &counter) + defer cleanup() + fuzzWriteJSON(t, dir, []byte(`{"a":1,"b":2}`)) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + // Pass the filter as -n input to avoid shell-quoting interactions. + _, _, code := cmdRunCtxFuzz(ctx, t, "jq -c -n -- "+shquote(filter), dir) + // Filter compile/runtime errors return 1 or 3; success returns 0. + // Usage errors (filter too large, etc.) return 2. + if code != 0 && code != 1 && code != 2 && code != 3 { + t.Errorf("unexpected exit code %d", code) + } + }) +} + +// shquote single-quotes a string for the shell, escaping embedded quotes. +func shquote(s string) string { + var buf bytes.Buffer + buf.WriteByte('\'') + for _, c := range []byte(s) { + if c == '\'' { + buf.WriteString(`'\''`) + } else { + buf.WriteByte(c) + } + } + buf.WriteByte('\'') + return buf.String() +} diff --git a/builtins/jq/jq_gnu_compat_test.go b/builtins/jq/jq_gnu_compat_test.go new file mode 100644 index 000000000..743a67fb7 --- /dev/null +++ b/builtins/jq/jq_gnu_compat_test.go @@ -0,0 +1,247 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +// jq_gnu_compat_test.go asserts byte-for-byte output equivalence between +// our jq builtin and the upstream jq CLI for the cases most sensitive to +// formatting (pretty-print, sort-keys, ascii escapes, raw output, slurp, +// raw-input). The reference output strings were captured once from the +// real jq 1.7.1 binary and are embedded as literals so the test runs +// without any host jq present on CI. +package jq_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// Reference outputs were captured by piping the documented input through +// jq 1.7.1 and `od -c`-inspecting the result. Each test repeats the exact +// input shape and asserts the captured bytes. + +// TestGNUCompatJqIdentityPretty — default pretty-print of a small object. +// +// jq invocation: printf '{"a":1,"b":2}' | jq . +// Expected output: "{\n \"a\": 1,\n \"b\": 2\n}\n" +func TestGNUCompatJqIdentityPretty(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"a":1,"b":2}`) + stdout, _, code := jqRun(t, "jq . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "{\n \"a\": 1,\n \"b\": 2\n}\n", stdout) +} + +// TestGNUCompatJqCompactSimple — -c emits one line per output. +// +// jq invocation: printf '{"a":1,"b":2}' | jq -c . +// Expected output: "{\"a\":1,\"b\":2}\n" +func TestGNUCompatJqCompactSimple(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"a":1,"b":2}`) + stdout, _, code := jqRun(t, "jq -c . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "{\"a\":1,\"b\":2}\n", stdout) +} + +// TestGNUCompatJqSortKeysCompact — -S sorts object keys lexicographically. +// +// jq invocation: printf '{"banana":2,"apple":1}' | jq -S -c . +// Expected output: "{\"apple\":1,\"banana\":2}\n" +func TestGNUCompatJqSortKeysCompact(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"banana":2,"apple":1}`) + stdout, _, code := jqRun(t, "jq -S -c . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "{\"apple\":1,\"banana\":2}\n", stdout) +} + +// TestGNUCompatJqSortKeysPretty — -S in pretty mode sorts and indents. +// +// jq invocation: printf '{"banana":2,"apple":1}' | jq -S . +// Expected output: "{\n \"apple\": 1,\n \"banana\": 2\n}\n" +func TestGNUCompatJqSortKeysPretty(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"banana":2,"apple":1}`) + stdout, _, code := jqRun(t, "jq -S . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "{\n \"apple\": 1,\n \"banana\": 2\n}\n", stdout) +} + +// TestGNUCompatJqRawStringEscapes — -r decodes JSON-string escape sequences. +// +// jq invocation: printf '"hi\\nthere"' | jq -r . +// Expected output: "hi\nthere\n" +func TestGNUCompatJqRawStringEscapes(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `"hi\nthere"`) + stdout, _, code := jqRun(t, "jq -r . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "hi\nthere\n", stdout) +} + +// TestGNUCompatJqSlurp — -s collects multiple values into one array. +// +// jq invocation: printf '1 2 3' | jq -s -c . +// Expected output: "[1,2,3]\n" +func TestGNUCompatJqSlurp(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", "1 2 3") + stdout, _, code := jqRun(t, "jq -s -c . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "[1,2,3]\n", stdout) +} + +// TestGNUCompatJqRawInput — -R wraps each line as a JSON string. +// +// jq invocation: printf 'foo\nbar\n' | jq -R . +// Expected output: "\"foo\"\n\"bar\"\n" +func TestGNUCompatJqRawInput(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.txt", "foo\nbar\n") + stdout, _, code := jqRun(t, "jq -R . x.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\"foo\"\n\"bar\"\n", stdout) +} + +// TestGNUCompatJqAsciiOutputBMP — -a escapes a non-ASCII BMP character. +// +// jq invocation: printf '{"s":"héllo"}' | jq -a -c .s +// Expected output: "\"h\\u00e9llo\"\n" +func TestGNUCompatJqAsciiOutputBMP(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"s":"héllo"}`) + stdout, _, code := jqRun(t, "jq -a -c .s x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\"h\\u00e9llo\"\n", stdout) +} + +// TestGNUCompatJqAsciiOutputSurrogate — -a emits surrogate pairs for +// supplementary-plane characters (U+1F600 = 😀). +// +// jq invocation: printf '{"s":"a😀b"}' | jq -a -c .s +// Expected output: "\"a\\ud83d\\ude00b\"\n" +func TestGNUCompatJqAsciiOutputSurrogate(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"s":"a😀b"}`) + stdout, _, code := jqRun(t, "jq -a -c .s x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\"a\\ud83d\\ude00b\"\n", stdout) +} + +// TestGNUCompatJqEmptyContainersInline — empty [] and {} stay on one line +// even in pretty mode. +// +// jq invocation: printf '{"x":[]}' | jq . +// Expected output: "{\n \"x\": []\n}\n" +func TestGNUCompatJqEmptyContainersInline(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"x":[]}`) + stdout, _, code := jqRun(t, "jq . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "{\n \"x\": []\n}\n", stdout) +} + +// TestGNUCompatJqNestedPretty — multi-level pretty-print indentation. +// +// jq invocation: printf '{"a":{"b":[1,2]}}' | jq . +// Expected output: +// +// { +// "a": { +// "b": [ +// 1, +// 2 +// ] +// } +// } +func TestGNUCompatJqNestedPretty(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"a":{"b":[1,2]}}`) + stdout, _, code := jqRun(t, "jq . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "{\n \"a\": {\n \"b\": [\n 1,\n 2\n ]\n }\n}\n", stdout) +} + +// TestGNUCompatJqNullInput — -n prints null when the filter is identity. +// +// jq invocation: jq -n . +// Expected output: "null\n" +func TestGNUCompatJqNullInput(t *testing.T) { + dir := t.TempDir() + stdout, _, code := jqRun(t, "jq -n .", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "null\n", stdout) +} + +// TestGNUCompatJqNullInputConstruct — -n with object construction. +// +// jq invocation: jq -n -c '{x: 1, y: 2}' +// Expected output: "{\"x\":1,\"y\":2}\n" +func TestGNUCompatJqNullInputConstruct(t *testing.T) { + dir := t.TempDir() + stdout, _, code := jqRun(t, "jq -n -c '{x: 1, y: 2}'", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "{\"x\":1,\"y\":2}\n", stdout) +} + +// TestGNUCompatJqMultiDoc — multiple JSON values are processed individually. +// +// jq invocation: printf '1\n2\n3\n' | jq -c . +// Expected output: "1\n2\n3\n" +func TestGNUCompatJqMultiDoc(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", "1\n2\n3\n") + stdout, _, code := jqRun(t, "jq -c . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "1\n2\n3\n", stdout) +} + +// TestGNUCompatJqExitStatusNullExit1 — -e returns 1 on null-only output. +// +// jq invocation: printf '{}' | jq -e .missing +// Expected output: "null\n", exit code 1 +func TestGNUCompatJqExitStatusNullExit1(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", "{}") + stdout, _, code := jqRun(t, "jq -e .missing x.json", dir) + assert.Equal(t, 1, code) + assert.Equal(t, "null\n", stdout) +} + +// TestGNUCompatJqRawNumber — -r leaves a number unchanged (no quotes, no decode). +// +// jq invocation: printf '{"a":42}' | jq -r .a +// Expected output: "42\n" +func TestGNUCompatJqRawNumber(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"a":42}`) + stdout, _, code := jqRun(t, "jq -r .a x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "42\n", stdout) +} + +// TestGNUCompatJqRawBool — -r leaves a boolean unchanged. +// +// jq invocation: printf '{"b":true}' | jq -r .b +// Expected output: "true\n" +func TestGNUCompatJqRawBool(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"b":true}`) + stdout, _, code := jqRun(t, "jq -r .b x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "true\n", stdout) +} + +// TestGNUCompatJqRawNull — -r leaves null unchanged. +// +// jq invocation: printf '{"c":null}' | jq -r .c +// Expected output: "null\n" +func TestGNUCompatJqRawNull(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"c":null}`) + stdout, _, code := jqRun(t, "jq -r .c x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "null\n", stdout) +} diff --git a/builtins/jq/jq_hardening_test.go b/builtins/jq/jq_hardening_test.go new file mode 100644 index 000000000..d2907867c --- /dev/null +++ b/builtins/jq/jq_hardening_test.go @@ -0,0 +1,290 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package jq_test + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/DataDog/rshell/builtins/jq" + "github.com/DataDog/rshell/interp" +) + +// --- filter size cap --- + +func TestJqRejectsOverlongFilter(t *testing.T) { + dir := t.TempDir() + // Build a filter just over the cap. We embed it via stdin to dodge the + // shell's own arg-length limits. The filter is bytewise-valid jq so + // only its size (not its parseability) matters. + huge := strings.Repeat("0,", jq.MaxFilterBytes/2+10) + "0" + require.Greater(t, len(huge), jq.MaxFilterBytes) + scriptPath := filepath.Join(dir, "filter.txt") + require.NoError(t, os.WriteFile(scriptPath, []byte(huge), 0644)) + // We can't easily pass a filter > 64 KiB through the shell command line + // in tests; instead, exercise the cap directly via -n with a literal that + // stretches the filter past the limit. + _, stderr, code := jqRun(t, "jq -n '"+huge+"'", dir) + assert.Equal(t, 2, code) + assert.Contains(t, stderr, "filter too large") +} + +// --- input size caps --- + +func TestJqRejectsOversizedInputStream(t *testing.T) { + dir := t.TempDir() + // Generate a JSON document just over the per-source cap. A long string + // is the simplest payload (still valid JSON). + overshoot := jq.MaxStreamBytes + 1024 + body := strings.Repeat("a", overshoot) + content := `"` + body + `"` + writeFile(t, dir, "big.json", content) + _, stderr, code := jqRun(t, "jq -c . big.json", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "exceeds") +} + +func TestJqRejectsOversizedSlurpStream(t *testing.T) { + dir := t.TempDir() + // A long string just over MaxStreamBytes triggers the slurp-side + // LimitReader path before the array assembly cap. + body := strings.Repeat("z", jq.MaxStreamBytes+1024) + writeFile(t, dir, "big.json", `"`+body+`"`) + _, stderr, code := jqRun(t, "jq -s . big.json", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "exceeds") +} + +func TestJqRejectsOversizedRawSlurpStream(t *testing.T) { + dir := t.TempDir() + body := strings.Repeat("x", jq.MaxStreamBytes+1024) + writeFile(t, dir, "big.txt", body) + _, stderr, code := jqRun(t, "jq -s -R . big.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "exceeds") +} + +func TestJqRejectsOversizedRawInputLine(t *testing.T) { + dir := t.TempDir() + long := strings.Repeat("y", jq.MaxLineBytes+10) + writeFile(t, dir, "long.txt", long+"\n") + _, stderr, code := jqRun(t, "jq -R . long.txt", dir) + assert.Equal(t, 1, code) + assert.NotEqual(t, "", stderr) +} + +// A single oversized line in -R mode at the boundary (cap-1) succeeds. +func TestJqRawInputAcceptsLineAtCapMinus1(t *testing.T) { + dir := t.TempDir() + body := strings.Repeat("k", jq.MaxLineBytes-1) + writeFile(t, dir, "ok.txt", body+"\n") + stdout, _, code := jqRun(t, "jq -R . ok.txt", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "k") +} + +// --- context cancellation --- + +func TestJqContextCancelDuringStream(t *testing.T) { + dir := t.TempDir() + // 200 small JSON documents — enough for a few decode iterations. + var sb strings.Builder + for i := 0; i < 200; i++ { + sb.WriteString(`{"i":0}` + "\n") + } + writeFile(t, dir, "stream.json", sb.String()) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + start := time.Now() + _, _, _ = runScriptCtx(ctx, t, "jq -c . stream.json", dir, interp.AllowedPaths([]string{dir})) + // Should complete within the 5s timeout regardless of cancellation; + // assert it finished well below the timeout to detect a runaway loop. + assert.Less(t, time.Since(start), 4*time.Second) +} + +// Deeply-nested JSON must hit the recursion bound, not the goroutine stack +// limit or the 30-second executor timeout. +func TestJqRejectsDeeplyNestedJSON(t *testing.T) { + dir := t.TempDir() + // 10 000 levels of nesting — well above maxEmitDepth (256). + var sb strings.Builder + for i := 0; i < 10_000; i++ { + sb.WriteByte('[') + } + for i := 0; i < 10_000; i++ { + sb.WriteByte(']') + } + writeFile(t, dir, "deep.json", sb.String()) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + start := time.Now() + _, stderr, code := runScriptCtx(ctx, t, "jq . deep.json", dir, interp.AllowedPaths([]string{dir})) + assert.Less(t, time.Since(start), 4*time.Second, "deeply nested JSON should fail fast") + if code == 0 { + t.Errorf("expected nesting-too-deep error, got success; stderr=%q", stderr) + } +} + +// --- malformed UTF-8 handling --- + +func TestJqRawInputInvalidUTF8(t *testing.T) { + // jq -R must not panic on lines with invalid UTF-8; the bytes pass + // through Go's JSON encoder, which substitutes U+FFFD for invalid + // runes. + dir := t.TempDir() + bad := []byte{0xFF, 0xFE, 0xFD, '\n'} + require.NoError(t, os.WriteFile(filepath.Join(dir, "x.txt"), bad, 0644)) + stdout, _, code := jqRun(t, "jq -R . x.txt", dir) + assert.Equal(t, 0, code) + assert.NotEqual(t, "", stdout) +} + +// --- numeric edge cases (filter side, since fastjq emits canonical numbers) --- + +func TestJqLargeIntegerPassthrough(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"n":9007199254740993}`) + stdout, _, code := jqRun(t, "jq -c .n x.json", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "9007199254740993") +} + +// --- empty input variants --- + +func TestJqEmptyStdin(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "empty.txt", "") + stdout, stderr, code := jqRun(t, "jq -c . < empty.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) + assert.Equal(t, "", stderr) +} + +// --- -e exit-status edge cases --- + +func TestJqExitStatusEmptyStreamWithE(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "empty.json", "") + _, _, code := jqRun(t, "jq -e . empty.json", dir) + assert.Equal(t, 1, code) +} + +// --- repeated invocations / fresh state per run --- + +// --- string escape coverage --- + +func TestJqStringEscapesAllSpecials(t *testing.T) { + dir := t.TempDir() + // Control + special chars: BS, FF, CR, TAB, NL, DQUOTE, BACKSLASH. + writeFile(t, dir, "x.json", `{"s":"\b\f\r\t\n\"\\"}`) + stdout, _, code := jqRun(t, "jq -c .s x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\"\\b\\f\\r\\t\\n\\\"\\\\\"\n", stdout) +} + +func TestJqStringEscapeGenericControl(t *testing.T) { + // Generic control char (BEL=0x07) goes through the \u00xx path on output. + // Input must use the \uXXXX escape: unescaped control bytes are not + // valid JSON per RFC 8259. + dir := t.TempDir() + writeFile(t, dir, "x.json", "{\"s\":\"\\u0007\"}") + stdout, _, code := jqRun(t, "jq -c .s x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\"\\u0007\"\n", stdout) +} + +// --- broken-pipe and large output paths --- + +func TestJqIteratorMassiveOutput(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `[0,1,2,3,4,5,6,7,8,9]`) + stdout, _, code := jqRun(t, "jq -c '.[]' x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n", stdout) +} + +// --- additional edge cases --- + +// JSON value with no trailing newline must still be processed. +func TestJqValueNoTrailingNewline(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"a":1}`) + stdout, _, code := jqRun(t, "jq -c . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "{\"a\":1}\n", stdout) +} + +// Single-byte JSON value (just `1`). +func TestJqSingleByteValue(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", "1") + stdout, _, code := jqRun(t, "jq -c . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "1\n", stdout) +} + +// Multiple back-to-back JSON values without separators. +func TestJqAdjacentValues(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"a":1}{"a":2}{"a":3}`) + stdout, _, code := jqRun(t, "jq -c . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "{\"a\":1}\n{\"a\":2}\n{\"a\":3}\n", stdout) +} + +// Whitespace-only input produces no output and exits 0. +func TestJqWhitespaceOnly(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", " \n\t \n") + stdout, _, code := jqRun(t, "jq -c . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +// Filter compiles but throws at runtime → exit 1, stderr message. +func TestJqRuntimeFilterError(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `1`) + _, stderr, code := jqRun(t, `jq -c '. + "y"' x.json`, dir) + assert.Equal(t, 1, code) + assert.NotEqual(t, "", stderr) +} + +// Slurp aggregate cap fires across multiple files even when each file +// individually fits under the cap. +func TestJqSlurpAggregateAcrossFiles(t *testing.T) { + dir := t.TempDir() + chunk := strings.Repeat("a", jq.MaxStreamBytes/3) + writeFile(t, dir, "f1.json", `"`+chunk+`"`) + writeFile(t, dir, "f2.json", `"`+chunk+`"`) + writeFile(t, dir, "f3.json", `"`+chunk+`"`) + writeFile(t, dir, "f4.json", `"`+chunk+`"`) + _, stderr, code := jqRun(t, "jq -s . f1.json f2.json f3.json f4.json", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "exceeds") +} + +func TestJqStateNotSharedBetweenRuns(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.json", `{"a":1}`) + writeFile(t, dir, "b.json", `{"flag":false}`) + // First call: -e on truthy → exit 0. + _, _, code1 := jqRun(t, "jq -e .a a.json", dir) + assert.Equal(t, 0, code1) + // Second call (separate runner): -e on falsy → exit 1. If state leaked, + // "emittedTruthy" from the first call would carry over. + _, _, code2 := jqRun(t, "jq -e .flag b.json", dir) + assert.Equal(t, 1, code2) +} diff --git a/builtins/jq/jq_test.go b/builtins/jq/jq_test.go new file mode 100644 index 000000000..1d6927ce5 --- /dev/null +++ b/builtins/jq/jq_test.go @@ -0,0 +1,437 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package jq_test + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/DataDog/rshell/builtins/testutil" + "github.com/DataDog/rshell/interp" +) + +// runScript runs a shell script and returns stdout, stderr, and exit code. +func runScript(t *testing.T, script, dir string, opts ...interp.RunnerOption) (string, string, int) { + t.Helper() + return testutil.RunScript(t, script, dir, opts...) +} + +// runScriptCtx runs a shell script under a custom context. +func runScriptCtx(ctx context.Context, t *testing.T, script, dir string, opts ...interp.RunnerOption) (string, string, int) { + t.Helper() + return testutil.RunScriptCtx(ctx, t, script, dir, opts...) +} + +// jqRun runs jq with AllowedPaths set to dir. +func jqRun(t *testing.T, script, dir string) (string, string, int) { + t.Helper() + return runScript(t, script, dir, interp.AllowedPaths([]string{dir})) +} + +// writeFile creates a file in dir with the given content. +func writeFile(t *testing.T, dir, name, content string) string { + t.Helper() + require.NoError(t, os.WriteFile(filepath.Join(dir, name), []byte(content), 0644)) + return name +} + +// --- Default (pretty-print) --- + +func TestJqIdentityPretty(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"a":1,"b":2}`) + stdout, stderr, code := jqRun(t, "jq . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stderr) + assert.Equal(t, "{\n \"a\": 1,\n \"b\": 2\n}\n", stdout) +} + +func TestJqIdentityNested(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"a":{"b":[1,2]}}`) + stdout, _, code := jqRun(t, "jq . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "{\n \"a\": {\n \"b\": [\n 1,\n 2\n ]\n }\n}\n", stdout) +} + +func TestJqEmptyArrayAndObjectInline(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"a":[],"b":{}}`) + stdout, _, code := jqRun(t, "jq . x.json", dir) + assert.Equal(t, 0, code) + // Empty containers stay on one line, matching jq. + assert.Equal(t, "{\n \"a\": [],\n \"b\": {}\n}\n", stdout) +} + +// --- -c / --compact-output --- + +func TestJqCompact(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"a":1,"b":2}`) + stdout, _, code := jqRun(t, "jq -c . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "{\"a\":1,\"b\":2}\n", stdout) +} + +func TestJqCompactLongForm(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"a":1}`) + stdout, _, code := jqRun(t, "jq --compact-output . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "{\"a\":1}\n", stdout) +} + +// --- -r / --raw-output --- + +func TestJqRawString(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"name":"alice"}`) + stdout, _, code := jqRun(t, "jq -r .name x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "alice\n", stdout) +} + +func TestJqRawNonString(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"a":42,"b":true,"c":null}`) + stdout, _, code := jqRun(t, "jq -r .a x.json && jq -r .b x.json && jq -r .c x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "42\ntrue\nnull\n", stdout) +} + +func TestJqRawDecodesEscapes(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"s":"a\tb\nc"}`) + stdout, _, code := jqRun(t, "jq -r .s x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\tb\nc\n", stdout) +} + +// --- -j / --join-output --- + +func TestJqJoinOutput(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"a":"hello","b":"world"}`) + stdout, _, code := jqRun(t, "jq -j .a,.b x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "helloworld", stdout) +} + +func TestJqJoinImpliesRaw(t *testing.T) { + // -j must imply -r: a string output is decoded, no quotes. + dir := t.TempDir() + writeFile(t, dir, "x.json", `"hello"`) + stdout, _, code := jqRun(t, "jq -j . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "hello", stdout) +} + +// --- -n / --null-input --- + +func TestJqNullInput(t *testing.T) { + dir := t.TempDir() + stdout, _, code := jqRun(t, "jq -n .", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "null\n", stdout) +} + +func TestJqNullInputLiteral(t *testing.T) { + dir := t.TempDir() + stdout, _, code := jqRun(t, `jq -n -c '{x: 1, y: 2}'`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "{\"x\":1,\"y\":2}\n", stdout) +} + +func TestJqNullInputIgnoresFiles(t *testing.T) { + // -n must not read the file argument, even if it is unreadable. + dir := t.TempDir() + stdout, _, code := jqRun(t, "jq -n . nonexistent.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "null\n", stdout) +} + +// --- -s / --slurp --- + +func TestJqSlurp(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "stream.json", "1 2 3") + stdout, _, code := jqRun(t, "jq -c -s . stream.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "[1,2,3]\n", stdout) +} + +func TestJqSlurpMultipleFiles(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.json", "1") + writeFile(t, dir, "b.json", "2 3") + stdout, _, code := jqRun(t, "jq -c -s . a.json b.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "[1,2,3]\n", stdout) +} + +func TestJqSlurpEmpty(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "empty.json", "") + stdout, _, code := jqRun(t, "jq -c -s . empty.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "[]\n", stdout) +} + +// --- -R / --raw-input --- + +func TestJqRawInput(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "lines.txt", "foo\nbar\nbaz\n") + stdout, _, code := jqRun(t, "jq -R . lines.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\"foo\"\n\"bar\"\n\"baz\"\n", stdout) +} + +func TestJqRawInputEmpty(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "empty.txt", "") + stdout, _, code := jqRun(t, "jq -R . empty.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +func TestJqSlurpRawInput(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "text.txt", "hello world\n") + stdout, _, code := jqRun(t, "jq -s -R . text.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\"hello world\\n\"\n", stdout) +} + +// --- -S / --sort-keys --- + +func TestJqSortKeys(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"banana":2,"apple":1,"cherry":3}`) + stdout, _, code := jqRun(t, "jq -S -c . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "{\"apple\":1,\"banana\":2,\"cherry\":3}\n", stdout) +} + +func TestJqSortKeysNested(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"z":{"b":2,"a":1},"y":{"d":4,"c":3}}`) + stdout, _, code := jqRun(t, "jq -S -c . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "{\"y\":{\"c\":3,\"d\":4},\"z\":{\"a\":1,\"b\":2}}\n", stdout) +} + +func TestJqSortKeysPretty(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"b":2,"a":1}`) + stdout, _, code := jqRun(t, "jq -S . x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "{\n \"a\": 1,\n \"b\": 2\n}\n", stdout) +} + +// --- -a / --ascii-output --- + +func TestJqAsciiOutputBMP(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"s":"héllo"}`) + stdout, _, code := jqRun(t, "jq -a -c .s x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\"h\\u00e9llo\"\n", stdout) +} + +func TestJqAsciiOutputSurrogatePair(t *testing.T) { + // 😀 (U+1F600) requires a surrogate pair under -a. + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"s":"a😀b"}`) + stdout, _, code := jqRun(t, "jq -a -c .s x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\"a\\ud83d\\ude00b\"\n", stdout) +} + +// --- -e / --exit-status --- + +func TestJqExitStatusTruthy(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"a":1}`) + stdout, _, code := jqRun(t, "jq -e .a x.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "1\n", stdout) +} + +func TestJqExitStatusNullOnly(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"a":1}`) + stdout, _, code := jqRun(t, "jq -e .missing x.json", dir) + assert.Equal(t, 1, code) + assert.Equal(t, "null\n", stdout) +} + +func TestJqExitStatusFalseOnly(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"flag":false}`) + stdout, _, code := jqRun(t, "jq -e .flag x.json", dir) + assert.Equal(t, 1, code) + assert.Equal(t, "false\n", stdout) +} + +func TestJqExitStatusNoOutput(t *testing.T) { + // select() that filters everything out leaves no output → exit 1. + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"k":1}`) + _, _, code := jqRun(t, `jq -e 'select(.k == 999)' x.json`, dir) + assert.Equal(t, 1, code) +} + +func TestJqExitStatusMixedTruthy(t *testing.T) { + // At least one truthy output → exit 0 even with leading null. + dir := t.TempDir() + writeFile(t, dir, "x.json", "null\n42\n") + _, _, code := jqRun(t, "jq -c -e . x.json", dir) + assert.Equal(t, 0, code) +} + +// --- stdin --- + +func TestJqReadsFromStdin(t *testing.T) { + dir := t.TempDir() + stdout, _, code := jqRun(t, `printf '{"k":"v"}' | jq -c .k`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\"v\"\n", stdout) +} + +func TestJqDashIsStdin(t *testing.T) { + dir := t.TempDir() + stdout, _, code := jqRun(t, `printf '{"k":"v"}' | jq -c .k -`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\"v\"\n", stdout) +} + +// --- multi-file --- + +func TestJqMultipleFiles(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.json", "1\n") + writeFile(t, dir, "b.json", "2\n") + stdout, _, code := jqRun(t, "jq -c . a.json b.json", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "1\n2\n", stdout) +} + +// --- error cases --- + +// Bare `jq` with no args defaults to the identity filter (`.`), matching +// real jq. With no input on stdin, no output is produced and exit is 0. +func TestJqNoArgsDefaultsToIdentity(t *testing.T) { + dir := t.TempDir() + stdout, stderr, code := jqRun(t, `printf '{"a":1}' | jq`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stderr) + assert.Equal(t, "{\n \"a\": 1\n}\n", stdout) +} + +func TestJqCompileError(t *testing.T) { + dir := t.TempDir() + _, stderr, code := jqRun(t, "jq -n '...not-a-filter...'", dir) + assert.Equal(t, 3, code) + assert.Contains(t, stderr, "compile error") +} + +func TestJqMissingFile(t *testing.T) { + dir := t.TempDir() + _, stderr, code := jqRun(t, "jq . missing.json", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "missing.json") +} + +func TestJqInvalidJSON(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "bad.json", `{"unterminated":`) + _, stderr, code := jqRun(t, "jq . bad.json", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "error reading") +} + +func TestJqUnknownFlag(t *testing.T) { + dir := t.TempDir() + _, stderr, code := jqRun(t, "jq --no-such-flag .", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "unknown flag") +} + +func TestJqRejectsColorFlag(t *testing.T) { + dir := t.TempDir() + _, stderr, code := jqRun(t, "jq -C -n .", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "unknown shorthand flag") +} + +func TestJqRejectsFromFile(t *testing.T) { + // -f / --from-file is documented as not implemented in v1. + dir := t.TempDir() + _, stderr, code := jqRun(t, "jq -f filter.jq .", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "unknown shorthand flag") +} + +// --- --help --- + +func TestJqHelpFlagPrintsUsage(t *testing.T) { + dir := t.TempDir() + stdout, _, code := jqRun(t, "jq --help", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "Usage: jq") + assert.Contains(t, stdout, "--null-input") +} + +// --- access control --- + +func TestJqAccessDeniedOutsideAllowedPaths(t *testing.T) { + dir := t.TempDir() + other := t.TempDir() + writeFile(t, other, "x.json", "1") + _, stderr, code := runScript(t, "jq . "+filepath.Join(other, "x.json"), dir, interp.AllowedPaths([]string{dir})) + assert.Equal(t, 1, code) + assert.NotEqual(t, "", stderr) +} + +// --- end-of-flags --- --- + +func TestJqEndOfFlagsTerminator(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "x.json", `{"a":1}`) + stdout, _, code := jqRun(t, "jq -- . x.json", dir) + assert.Equal(t, 0, code) + assert.True(t, strings.Contains(stdout, `"a"`)) +} + +// --- context cancellation --- + +func TestJqRespectsContextCancel(t *testing.T) { + // A pre-cancelled context must short-circuit promptly rather than + // process the entire 1000-document stream. + dir := t.TempDir() + var sb strings.Builder + for i := 0; i < 1000; i++ { + sb.WriteString(`{"i":0,"v":"abc"}` + "\n") + } + writeFile(t, dir, "stream.json", sb.String()) + + ctx, cancel := context.WithCancel(context.Background()) + cancel() // pre-cancel + start := time.Now() + _, _, _ = runScriptCtx(ctx, t, "jq -c . stream.json", dir, interp.AllowedPaths([]string{dir})) + elapsed := time.Since(start) + // Pre-cancelled context: should return effectively instantly. We + // give a generous slack for the runner-bootstrap cost. + assert.Less(t, elapsed, 2*time.Second, "cancelled context took %s — expected <2s", elapsed) +} diff --git a/go.mod b/go.mod index 5aa4ee278..25c30cb37 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ toolchain go1.26.2 require ( github.com/DataDog/datadog-agent/pkg/fleet/installer v0.78.0 + github.com/brianfloersch/fastjq v0.0.0-00010101000000-000000000000 github.com/prometheus-community/pro-bing v0.8.0 github.com/spf13/cobra v1.10.2 github.com/spf13/pflag v1.0.10 @@ -40,3 +41,5 @@ require ( golang.org/x/sync v0.20.0 // indirect golang.org/x/time v0.14.0 // indirect ) + +replace github.com/brianfloersch/fastjq => ../../fastjq diff --git a/interp/register_builtins.go b/interp/register_builtins.go index d16f1b69e..9c502f977 100644 --- a/interp/register_builtins.go +++ b/interp/register_builtins.go @@ -21,6 +21,7 @@ import ( "github.com/DataDog/rshell/builtins/head" "github.com/DataDog/rshell/builtins/help" "github.com/DataDog/rshell/builtins/ip" + "github.com/DataDog/rshell/builtins/jq" "github.com/DataDog/rshell/builtins/ls" "github.com/DataDog/rshell/builtins/ping" printfcmd "github.com/DataDog/rshell/builtins/printf" @@ -55,6 +56,7 @@ func registerBuiltins() { head.Cmd, help.Cmd, ip.Cmd, + jq.Cmd, ls.Cmd, ping.Cmd, sortcmd.Cmd, diff --git a/tests/scenarios/cmd/help/restricted.yaml b/tests/scenarios/cmd/help/restricted.yaml index bc2e34ce5..f9310e592 100644 --- a/tests/scenarios/cmd/help/restricted.yaml +++ b/tests/scenarios/cmd/help/restricted.yaml @@ -6,12 +6,12 @@ input: help expect: stdout: |+ - rshell (dev) — 2 of 28 builtins enabled + rshell (dev) — 2 of 29 builtins enabled echo write arguments to stdout help display help for commands - Disabled builtins: [, break, cat, continue, cut, exit, false, find, grep, head, ip, ls, ping, + Disabled builtins: [, break, cat, continue, cut, exit, false, find, grep, head, ip, jq, ls, ping, printf, ps, sed, sort, ss, strings, tail, test, tr, true, uname, uniq, wc Run 'help ' for more information on a specific command. diff --git a/tests/scenarios/cmd/help/restricted_all_flag.yaml b/tests/scenarios/cmd/help/restricted_all_flag.yaml index b7b077c53..227302e21 100644 --- a/tests/scenarios/cmd/help/restricted_all_flag.yaml +++ b/tests/scenarios/cmd/help/restricted_all_flag.yaml @@ -6,7 +6,7 @@ input: help --all expect: stdout: |+ - rshell (dev) — 2 of 28 builtins enabled + rshell (dev) — 2 of 29 builtins enabled echo write arguments to stdout help display help for commands @@ -23,6 +23,7 @@ expect: grep print lines that match patterns head output the first part of files ip show network interface and routing information + jq command-line JSON processor ls list directory contents ping send ICMP echo requests to a network host printf format and print data diff --git a/tests/scenarios/cmd/help/unrestricted.yaml b/tests/scenarios/cmd/help/unrestricted.yaml index 3b2d164c1..49aab3b7e 100644 --- a/tests/scenarios/cmd/help/unrestricted.yaml +++ b/tests/scenarios/cmd/help/unrestricted.yaml @@ -5,7 +5,7 @@ input: help expect: stdout: |+ - rshell (dev) — All 28 builtins available + rshell (dev) — All 29 builtins available [ evaluate conditional expression break exit from a loop @@ -20,6 +20,7 @@ expect: head output the first part of files help display help for commands ip show network interface and routing information + jq command-line JSON processor ls list directory contents ping send ICMP echo requests to a network host printf format and print data diff --git a/tests/scenarios/cmd/help/unrestricted_all_flag.yaml b/tests/scenarios/cmd/help/unrestricted_all_flag.yaml index fc0b019a6..9397f8ed0 100644 --- a/tests/scenarios/cmd/help/unrestricted_all_flag.yaml +++ b/tests/scenarios/cmd/help/unrestricted_all_flag.yaml @@ -5,7 +5,7 @@ input: help --all expect: stdout: |+ - rshell (dev) — All 28 builtins available + rshell (dev) — All 29 builtins available [ evaluate conditional expression break exit from a loop @@ -20,6 +20,7 @@ expect: head output the first part of files help display help for commands ip show network interface and routing information + jq command-line JSON processor ls list directory contents ping send ICMP echo requests to a network host printf format and print data diff --git a/tests/scenarios/cmd/jq/errors/bad_filter.yaml b/tests/scenarios/cmd/jq/errors/bad_filter.yaml new file mode 100644 index 000000000..23d73cbcc --- /dev/null +++ b/tests/scenarios/cmd/jq/errors/bad_filter.yaml @@ -0,0 +1,9 @@ +# A syntactically invalid filter triggers a compile error (exit 3). +description: jq with an unparseable filter exits with the compile-error code 3. +skip_assert_against_bash: true +input: + script: |+ + jq -n '...not a valid filter...' +expect: + exit_code: 3 + stderr_contains: ["jq: compile error"] diff --git a/tests/scenarios/cmd/jq/errors/blocked_color.yaml b/tests/scenarios/cmd/jq/errors/blocked_color.yaml new file mode 100644 index 000000000..8aa8a9a0c --- /dev/null +++ b/tests/scenarios/cmd/jq/errors/blocked_color.yaml @@ -0,0 +1,9 @@ +# --color-output is not implemented; rejected with exit 1. +description: jq does not support --color-output / -C and rejects it. +skip_assert_against_bash: true +input: + script: |+ + jq -C -n . +expect: + exit_code: 1 + stderr_contains: ["unknown shorthand flag"] diff --git a/tests/scenarios/cmd/jq/errors/blocked_from_file.yaml b/tests/scenarios/cmd/jq/errors/blocked_from_file.yaml new file mode 100644 index 000000000..a4bf6ca68 --- /dev/null +++ b/tests/scenarios/cmd/jq/errors/blocked_from_file.yaml @@ -0,0 +1,9 @@ +# -f / --from-file is not implemented in v1; rejected with exit 1. +description: jq does not support -f / --from-file in v1. +skip_assert_against_bash: true +input: + script: |+ + jq -f filter.jq . +expect: + exit_code: 1 + stderr_contains: ["unknown shorthand flag"] diff --git a/tests/scenarios/cmd/jq/errors/invalid_json.yaml b/tests/scenarios/cmd/jq/errors/invalid_json.yaml new file mode 100644 index 000000000..64b6d414c --- /dev/null +++ b/tests/scenarios/cmd/jq/errors/invalid_json.yaml @@ -0,0 +1,15 @@ +# Invalid JSON input is a runtime error. +description: jq fails with exit 1 when input is not valid JSON. +skip_assert_against_bash: true +setup: + files: + - path: bad.json + content: |+ + {"unterminated": +input: + allowed_paths: ["$DIR"] + script: |+ + jq . bad.json +expect: + exit_code: 1 + stderr_contains: ["error reading"] diff --git a/tests/scenarios/cmd/jq/errors/missing_file.yaml b/tests/scenarios/cmd/jq/errors/missing_file.yaml new file mode 100644 index 000000000..39ddfbc4f --- /dev/null +++ b/tests/scenarios/cmd/jq/errors/missing_file.yaml @@ -0,0 +1,10 @@ +# Reading a missing file is a runtime error. +description: jq fails with exit 1 when the input file does not exist. +skip_assert_against_bash: true +input: + allowed_paths: ["$DIR"] + script: |+ + jq . nonexistent.json +expect: + exit_code: 1 + stderr_contains: ["jq: nonexistent.json"] diff --git a/tests/scenarios/cmd/jq/errors/unknown_flag.yaml b/tests/scenarios/cmd/jq/errors/unknown_flag.yaml new file mode 100644 index 000000000..1b985cf7b --- /dev/null +++ b/tests/scenarios/cmd/jq/errors/unknown_flag.yaml @@ -0,0 +1,9 @@ +# An unsupported flag is rejected with exit 1. +description: jq rejects unknown flags with a clear error. +skip_assert_against_bash: true +input: + script: |+ + jq --no-such-flag . +expect: + exit_code: 1 + stderr_contains: ["unknown flag"] diff --git a/tests/scenarios/cmd/jq/exit-status/false_only.yaml b/tests/scenarios/cmd/jq/exit-status/false_only.yaml new file mode 100644 index 000000000..cf6a679fb --- /dev/null +++ b/tests/scenarios/cmd/jq/exit-status/false_only.yaml @@ -0,0 +1,17 @@ +# -e returns 1 when every output is false. +description: jq -e exits 1 when only false was produced. +skip_assert_against_bash: true +setup: + files: + - path: data.json + content: |+ + {"flag":false} +input: + allowed_paths: ["$DIR"] + script: |+ + jq -e .flag data.json +expect: + stdout: |+ + false + stderr: "" + exit_code: 1 diff --git a/tests/scenarios/cmd/jq/exit-status/null_only.yaml b/tests/scenarios/cmd/jq/exit-status/null_only.yaml new file mode 100644 index 000000000..8874890ce --- /dev/null +++ b/tests/scenarios/cmd/jq/exit-status/null_only.yaml @@ -0,0 +1,17 @@ +# -e returns 1 when every output is null. +description: jq -e exits 1 when only null was produced. +skip_assert_against_bash: true +setup: + files: + - path: data.json + content: |+ + {"a":1} +input: + allowed_paths: ["$DIR"] + script: |+ + jq -e .missing data.json +expect: + stdout: |+ + null + stderr: "" + exit_code: 1 diff --git a/tests/scenarios/cmd/jq/exit-status/truthy.yaml b/tests/scenarios/cmd/jq/exit-status/truthy.yaml new file mode 100644 index 000000000..1c5fdd002 --- /dev/null +++ b/tests/scenarios/cmd/jq/exit-status/truthy.yaml @@ -0,0 +1,17 @@ +# -e returns 0 when at least one output is truthy. +description: jq -e exits 0 when output is a truthy value. +skip_assert_against_bash: true +setup: + files: + - path: data.json + content: |+ + {"a":1} +input: + allowed_paths: ["$DIR"] + script: |+ + jq -e .a data.json +expect: + stdout: |+ + 1 + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/filters/compact.yaml b/tests/scenarios/cmd/jq/filters/compact.yaml new file mode 100644 index 000000000..9f282e5fb --- /dev/null +++ b/tests/scenarios/cmd/jq/filters/compact.yaml @@ -0,0 +1,17 @@ +# -c forces single-line JSON output. +description: jq -c emits each output as a single JSON line. +skip_assert_against_bash: true +setup: + files: + - path: data.json + content: |+ + {"a":1,"b":2} +input: + allowed_paths: ["$DIR"] + script: |+ + jq -c . data.json +expect: + stdout: |+ + {"a":1,"b":2} + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/filters/field_access.yaml b/tests/scenarios/cmd/jq/filters/field_access.yaml new file mode 100644 index 000000000..86ee552e7 --- /dev/null +++ b/tests/scenarios/cmd/jq/filters/field_access.yaml @@ -0,0 +1,17 @@ +# Single-field access via .name on a flat object. +description: jq .name extracts a single field as a JSON string. +skip_assert_against_bash: true +setup: + files: + - path: data.json + content: |+ + {"name":"alice","age":30} +input: + allowed_paths: ["$DIR"] + script: |+ + jq .name data.json +expect: + stdout: |+ + "alice" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/filters/identity.yaml b/tests/scenarios/cmd/jq/filters/identity.yaml new file mode 100644 index 000000000..5c1b20fa2 --- /dev/null +++ b/tests/scenarios/cmd/jq/filters/identity.yaml @@ -0,0 +1,20 @@ +# Identity filter on a small object pretty-prints with 2-space indent. +description: jq . pretty-prints input by default. +skip_assert_against_bash: true # bash has no jq builtin. +setup: + files: + - path: data.json + content: |+ + {"a":1,"b":2} +input: + allowed_paths: ["$DIR"] + script: |+ + jq . data.json +expect: + stdout: |+ + { + "a": 1, + "b": 2 + } + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/filters/iterator.yaml b/tests/scenarios/cmd/jq/filters/iterator.yaml new file mode 100644 index 000000000..6fdc945a9 --- /dev/null +++ b/tests/scenarios/cmd/jq/filters/iterator.yaml @@ -0,0 +1,19 @@ +# Iterator .[] emits one value per array element. +description: jq '.[]' emits each array element on its own line. +skip_assert_against_bash: true +setup: + files: + - path: data.json + content: |+ + [1,2,3] +input: + allowed_paths: ["$DIR"] + script: |+ + jq '.[]' data.json +expect: + stdout: |+ + 1 + 2 + 3 + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/filters/multidoc.yaml b/tests/scenarios/cmd/jq/filters/multidoc.yaml new file mode 100644 index 000000000..368a0e8f1 --- /dev/null +++ b/tests/scenarios/cmd/jq/filters/multidoc.yaml @@ -0,0 +1,21 @@ +# Multiple JSON documents in one source are processed sequentially. +description: jq processes whitespace-separated JSON documents one at a time. +skip_assert_against_bash: true +setup: + files: + - path: stream.json + content: |+ + 1 + 2 + 3 +input: + allowed_paths: ["$DIR"] + script: |+ + jq -c . stream.json +expect: + stdout: |+ + 1 + 2 + 3 + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/filters/no_args_identity.yaml b/tests/scenarios/cmd/jq/filters/no_args_identity.yaml new file mode 100644 index 000000000..c6b17f6b0 --- /dev/null +++ b/tests/scenarios/cmd/jq/filters/no_args_identity.yaml @@ -0,0 +1,14 @@ +# Bare `jq` with no FILTER defaults to identity (matches real jq). +description: jq with no filter argument defaults to the identity filter. +skip_assert_against_bash: true +input: + script: |+ + printf '{"a":1,"b":2}' | jq +expect: + stdout: |+ + { + "a": 1, + "b": 2 + } + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/filters/pipe_stdin.yaml b/tests/scenarios/cmd/jq/filters/pipe_stdin.yaml new file mode 100644 index 000000000..e43b4d822 --- /dev/null +++ b/tests/scenarios/cmd/jq/filters/pipe_stdin.yaml @@ -0,0 +1,11 @@ +# Filter reads from stdin when no FILE is given. +description: jq reads JSON from stdin when no file argument is supplied. +skip_assert_against_bash: true +input: + script: |+ + printf '{"k":"v"}' | jq -c '.k' +expect: + stdout: |+ + "v" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/filters/select_filter.yaml b/tests/scenarios/cmd/jq/filters/select_filter.yaml new file mode 100644 index 000000000..12e85154b --- /dev/null +++ b/tests/scenarios/cmd/jq/filters/select_filter.yaml @@ -0,0 +1,19 @@ +# select() suppresses values whose predicate is false. +description: jq select(.level == "error") emits only matching documents. +skip_assert_against_bash: true +setup: + files: + - path: log.json + content: |+ + {"level":"info","msg":"hi"} + {"level":"error","msg":"boom"} + {"level":"info","msg":"bye"} +input: + allowed_paths: ["$DIR"] + script: |+ + jq -c 'select(.level == "error")' log.json +expect: + stdout: |+ + {"level":"error","msg":"boom"} + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/hardening/ascii_output.yaml b/tests/scenarios/cmd/jq/hardening/ascii_output.yaml new file mode 100644 index 000000000..67d3d876c --- /dev/null +++ b/tests/scenarios/cmd/jq/hardening/ascii_output.yaml @@ -0,0 +1,16 @@ +# -a escapes non-ASCII as \uXXXX. +description: jq -a emits non-ASCII characters as \uXXXX escape sequences. +skip_assert_against_bash: true +setup: + files: + - path: data.json + content: |+ + {"greeting":"héllo"} +input: + allowed_paths: ["$DIR"] + script: |+ + jq -a -c .greeting data.json +expect: + stdout: "\"h\\u00e9llo\"\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/hardening/empty_file.yaml b/tests/scenarios/cmd/jq/hardening/empty_file.yaml new file mode 100644 index 000000000..4b172afd9 --- /dev/null +++ b/tests/scenarios/cmd/jq/hardening/empty_file.yaml @@ -0,0 +1,15 @@ +# An empty file is not an error; jq simply produces no output. +description: jq on an empty file produces no output and exits 0. +skip_assert_against_bash: true +setup: + files: + - path: empty.json + content: "" +input: + allowed_paths: ["$DIR"] + script: |+ + jq . empty.json +expect: + stdout: "" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/hardening/empty_stdin_with_e.yaml b/tests/scenarios/cmd/jq/hardening/empty_stdin_with_e.yaml new file mode 100644 index 000000000..5d0c25e19 --- /dev/null +++ b/tests/scenarios/cmd/jq/hardening/empty_stdin_with_e.yaml @@ -0,0 +1,15 @@ +# With -e and no output produced, exit must be 1. +description: jq -e on an empty stream returns exit 1 (no output produced). +skip_assert_against_bash: true +setup: + files: + - path: empty.json + content: "" +input: + allowed_paths: ["$DIR"] + script: |+ + jq -e . empty.json +expect: + stdout: "" + stderr: "" + exit_code: 1 diff --git a/tests/scenarios/cmd/jq/hardening/end_of_flags.yaml b/tests/scenarios/cmd/jq/hardening/end_of_flags.yaml new file mode 100644 index 000000000..2fba49105 --- /dev/null +++ b/tests/scenarios/cmd/jq/hardening/end_of_flags.yaml @@ -0,0 +1,19 @@ +# A literal "--" stops flag parsing so a filter starting with "-" works. +description: jq accepts -- to terminate flag parsing. +skip_assert_against_bash: true +setup: + files: + - path: data.json + content: |+ + {"a":1} +input: + allowed_paths: ["$DIR"] + script: |+ + jq -- . data.json +expect: + stdout: |+ + { + "a": 1 + } + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/hardening/special_chars.yaml b/tests/scenarios/cmd/jq/hardening/special_chars.yaml new file mode 100644 index 000000000..358fc663e --- /dev/null +++ b/tests/scenarios/cmd/jq/hardening/special_chars.yaml @@ -0,0 +1,17 @@ +# Strings with embedded escapes round-trip correctly. +description: jq preserves embedded JSON-string escape sequences. +skip_assert_against_bash: true +setup: + files: + - path: data.json + content: |+ + {"s":"a\tb\nc"} +input: + allowed_paths: ["$DIR"] + script: |+ + jq -c .s data.json +expect: + stdout: |+ + "a\tb\nc" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/help/help.yaml b/tests/scenarios/cmd/jq/help/help.yaml new file mode 100644 index 000000000..c4b8273ed --- /dev/null +++ b/tests/scenarios/cmd/jq/help/help.yaml @@ -0,0 +1,10 @@ +# Verifies --help prints usage to stdout with exit 0. +description: jq --help prints usage to stdout and exits 0. +skip_assert_against_bash: true # rshell builtin help differs from system jq. +input: + script: |+ + jq --help +expect: + stdout_contains: ["Usage: jq"] + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/null-input/basic.yaml b/tests/scenarios/cmd/jq/null-input/basic.yaml new file mode 100644 index 000000000..76836a988 --- /dev/null +++ b/tests/scenarios/cmd/jq/null-input/basic.yaml @@ -0,0 +1,11 @@ +# -n runs the filter once with input fixed to null. +description: jq -n runs FILTER with null as the input. +skip_assert_against_bash: true +input: + script: |+ + jq -n '.' +expect: + stdout: |+ + null + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/null-input/literal.yaml b/tests/scenarios/cmd/jq/null-input/literal.yaml new file mode 100644 index 000000000..f44ea3338 --- /dev/null +++ b/tests/scenarios/cmd/jq/null-input/literal.yaml @@ -0,0 +1,11 @@ +# -n with a literal expression yields the literal directly. +description: jq -n returns the literal expression value. +skip_assert_against_bash: true +input: + script: |+ + jq -n -c '{x: 1, y: 2}' +expect: + stdout: |+ + {"x":1,"y":2} + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/raw/join_output.yaml b/tests/scenarios/cmd/jq/raw/join_output.yaml new file mode 100644 index 000000000..1a37aa4c2 --- /dev/null +++ b/tests/scenarios/cmd/jq/raw/join_output.yaml @@ -0,0 +1,16 @@ +# -j suppresses the trailing newline that -r normally writes. +description: jq -j writes raw strings with no separator between outputs. +skip_assert_against_bash: true +setup: + files: + - path: data.json + content: |+ + {"a":"hello","b":"world"} +input: + allowed_paths: ["$DIR"] + script: |+ + jq -j .a,.b data.json +expect: + stdout: "helloworld" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/raw/non_string_unchanged.yaml b/tests/scenarios/cmd/jq/raw/non_string_unchanged.yaml new file mode 100644 index 000000000..70e7fac86 --- /dev/null +++ b/tests/scenarios/cmd/jq/raw/non_string_unchanged.yaml @@ -0,0 +1,21 @@ +# -r does not affect non-string outputs. +description: jq -r leaves number / boolean / null outputs unchanged. +skip_assert_against_bash: true +setup: + files: + - path: data.json + content: |+ + {"a":42,"b":true,"c":null} +input: + allowed_paths: ["$DIR"] + script: |+ + jq -r .a data.json + jq -r .b data.json + jq -r .c data.json +expect: + stdout: |+ + 42 + true + null + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/raw/raw_input_quoted.yaml b/tests/scenarios/cmd/jq/raw/raw_input_quoted.yaml new file mode 100644 index 000000000..6f341677b --- /dev/null +++ b/tests/scenarios/cmd/jq/raw/raw_input_quoted.yaml @@ -0,0 +1,21 @@ +# -R turns each input line into a JSON-string input (parses correctly). +description: jq -R wraps each line as a JSON string output. +skip_assert_against_bash: true +setup: + files: + - path: lines.txt + content: |+ + foo + bar + baz +input: + allowed_paths: ["$DIR"] + script: |+ + jq -R . lines.txt +expect: + stdout: |+ + "foo" + "bar" + "baz" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/raw/string_decoded.yaml b/tests/scenarios/cmd/jq/raw/string_decoded.yaml new file mode 100644 index 000000000..fb4b8ecbb --- /dev/null +++ b/tests/scenarios/cmd/jq/raw/string_decoded.yaml @@ -0,0 +1,17 @@ +# -r prints JSON-string outputs without surrounding quotes. +description: jq -r decodes JSON-string outputs and prints the contents raw. +skip_assert_against_bash: true +setup: + files: + - path: data.json + content: |+ + {"name":"alice"} +input: + allowed_paths: ["$DIR"] + script: |+ + jq -r .name data.json +expect: + stdout: |+ + alice + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/slurp/array.yaml b/tests/scenarios/cmd/jq/slurp/array.yaml new file mode 100644 index 000000000..072f74f4e --- /dev/null +++ b/tests/scenarios/cmd/jq/slurp/array.yaml @@ -0,0 +1,17 @@ +# -s gathers all input values into a single array. +description: jq -s collects multi-document input into one array. +skip_assert_against_bash: true +setup: + files: + - path: stream.json + content: |+ + 1 2 3 +input: + allowed_paths: ["$DIR"] + script: |+ + jq -c -s . stream.json +expect: + stdout: |+ + [1,2,3] + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/slurp/raw_string.yaml b/tests/scenarios/cmd/jq/slurp/raw_string.yaml new file mode 100644 index 000000000..ba90c502d --- /dev/null +++ b/tests/scenarios/cmd/jq/slurp/raw_string.yaml @@ -0,0 +1,17 @@ +# -s -R slurps the entire stream into a single JSON string. +description: jq -s -R reads the whole input as one JSON string. +skip_assert_against_bash: true +setup: + files: + - path: text.txt + content: |+ + hello world +input: + allowed_paths: ["$DIR"] + script: |+ + jq -s -R . text.txt +expect: + stdout: |+ + "hello world\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/sort-keys/basic.yaml b/tests/scenarios/cmd/jq/sort-keys/basic.yaml new file mode 100644 index 000000000..e08f68ec7 --- /dev/null +++ b/tests/scenarios/cmd/jq/sort-keys/basic.yaml @@ -0,0 +1,17 @@ +# -S emits object keys in lexicographic order. +description: jq -S sorts object keys in output. +skip_assert_against_bash: true +setup: + files: + - path: data.json + content: |+ + {"banana":2,"apple":1,"cherry":3} +input: + allowed_paths: ["$DIR"] + script: |+ + jq -S -c . data.json +expect: + stdout: |+ + {"apple":1,"banana":2,"cherry":3} + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/jq/sort-keys/nested.yaml b/tests/scenarios/cmd/jq/sort-keys/nested.yaml new file mode 100644 index 000000000..d30c195a0 --- /dev/null +++ b/tests/scenarios/cmd/jq/sort-keys/nested.yaml @@ -0,0 +1,17 @@ +# -S sorts keys at every nesting level. +description: jq -S sorts keys recursively in nested objects. +skip_assert_against_bash: true +setup: + files: + - path: data.json + content: |+ + {"z":{"b":2,"a":1},"y":{"d":4,"c":3}} +input: + allowed_paths: ["$DIR"] + script: |+ + jq -S -c . data.json +expect: + stdout: |+ + {"y":{"c":3,"d":4},"z":{"a":1,"b":2}} + stderr: "" + exit_code: 0