diff --git a/CHANGELOG.md b/CHANGELOG.md index f2ed11c..5764654 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,60 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added + +- **HTTP body compression** — gzip on responses and inbound request + bodies, served identically across HTTP/1.1, HTTP/2 and HTTP/3. + Build flag: `--enable-http-compression` (default on; auto-detects + zlib-ng with system zlib as fallback). + + Five `HttpServerConfig` setters drive the policy and are frozen at + `HttpServer::__construct`: + - `setCompressionEnabled(bool)` — master switch (default `true`). + - `setCompressionLevel(int)` — zlib level 1..9 (default 6). + - `setCompressionMinSize(int)` — body-size threshold below which + responses stay identity (default 1 KiB; valid 0..16 MiB). + - `setCompressionMimeTypes(array)` — replaces the whitelist + wholesale (nginx semantics). Default ships the union of nginx + `gzip_types` and h2o text-only defaults. + - `setRequestMaxDecompressedSize(int)` — anti-zip-bomb cap on + decoded request bodies (default 10 MiB; 0 = no cap, must be + explicit). + + Per-response opt-out: `HttpResponse::setNoCompression()` overrides + every other rule. Use for endpoints combining secrets with + reflected user input (BREACH mitigation), pre-encoded payloads, + or anywhere the server must not wrap the body. + + Negotiation follows RFC 9110 §12.5.3 — q-values, `identity;q=0`, + `*;q=0` excludes identity unless an explicit identity entry + rescues it. Default when no `Accept-Encoding` header is sent + resolves to identity-only (matches nginx; safer than the strict + RFC reading). Skip rules: status 1xx/204/304, HEAD, Range + responses, handler-set `Content-Encoding`, MIME outside the + whitelist, body below the threshold. + + Inbound: `Content-Encoding: gzip` (and the legacy `x-gzip` + alias) on requests is decoded transparently. `identity` is a + no-op. Unknown codings → 415; bomb-cap exceeded → 413; corrupt + inflate → 400. The handler observes the decoded body via + `HttpRequest::getBody()`. + + Streaming: when handlers call `HttpResponse::send($chunk)`, the + compressing wrapper transparently engages on first call (subject + to negotiation) and produces one downstream chunk per source + chunk — preserving framing efficiency on chunked H1 and H2 + DATA frames. + + Backend: `zlib-ng` is preferred at build time for ~2-4× higher + throughput at the same compression level; system `zlib` is the + drop-in fallback. Both share the same source via a thin + `zng_*` ↔ `*` macro layer. + + Issue [#8](https://github.com/true-async/server/issues/8). + ## [0.2.0] - 2026-05-04 ### Added diff --git a/CMakeLists.txt b/CMakeLists.txt index e4e2476..fc51c99 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,6 +111,52 @@ if(ENABLE_HTTP3) endif() endif() +# HTTP body compression (issue #8). Mirrors config.m4: prefer zlib-ng +# (faster API-compatible drop-in for zlib), fall back to system zlib. +# Fail-soft: if neither is found, HAVE_HTTP_COMPRESSION stays undefined +# and the compression sources are dropped from the build. +option(ENABLE_HTTP_COMPRESSION "Enable HTTP body compression (zlib-ng preferred, zlib fallback)" ON) +set(COMPRESSION_SOURCES "") +if(ENABLE_HTTP_COMPRESSION) + find_package(PkgConfig QUIET) + set(_compression_ok FALSE) + if(PkgConfig_FOUND) + pkg_check_modules(ZLIB_NG QUIET zlib-ng) + if(ZLIB_NG_FOUND) + add_compile_definitions(HAVE_ZLIB_NG=1 HAVE_HTTP_COMPRESSION=1) + message(STATUS " Compression: zlib-ng ${ZLIB_NG_VERSION}") + set(_compression_ok TRUE) + else() + pkg_check_modules(ZLIB_PC QUIET zlib) + if(ZLIB_PC_FOUND) + add_compile_definitions(HAVE_HTTP_COMPRESSION=1) + message(STATUS " Compression: zlib ${ZLIB_PC_VERSION} (zlib-ng not found)") + set(_compression_ok TRUE) + endif() + endif() + endif() + if(NOT _compression_ok) + find_package(ZLIB QUIET) + if(ZLIB_FOUND) + add_compile_definitions(HAVE_HTTP_COMPRESSION=1) + message(STATUS " Compression: zlib ${ZLIB_VERSION_STRING} (system)") + set(_compression_ok TRUE) + endif() + endif() + if(_compression_ok) + set(COMPRESSION_SOURCES + src/compression/http_compression.c + src/compression/http_compression_gzip.c + src/compression/http_compression_defaults.c + src/compression/http_compression_negotiate.c + src/compression/http_compression_response.c + src/compression/http_compression_request.c + ) + else() + message(STATUS " Compression: disabled (no zlib-ng or zlib found)") + endif() +endif() + # Source files - HTTP/1.1 parser set(HTTP1_SOURCES src/http1/http_parser.c @@ -143,6 +189,7 @@ set(ALL_SOURCES ${FORMAT_SOURCES} ${LOG_SOURCES} ${LLHTTP_SOURCES} + ${COMPRESSION_SOURCES} ) # Create library target (not for actual building, just for CodeQL analysis) diff --git a/README.md b/README.md index b920eb3..1c72230 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ PHP 8.6+ HTTP 1.1 | 2 | 3 TLS 1.2 | 1.3 + gzip via zlib-ng WebSocket gRPC Security Audited @@ -42,6 +43,7 @@ This means you can serve a REST API over HTTP/2, push real-time events over Serv | ✅ Ready | **Zero-copy architecture** | Minimal allocations on hot paths | | ✅ Ready | **HTTP/2** | Multiplexing, server push (via nghttp2) | | ✅ Ready | **HTTP/3 / QUIC** | UDP transport via ngtcp2 + nghttp3; OpenSSL 3.5 QUIC API | +| ✅ Ready | **Compression (gzip)** | Response gzip + inbound decode across H1/H2/H3 (zlib-ng / zlib). See [docs/COMPRESSION.md](docs/COMPRESSION.md). | | 📋 Planned | **WebSocket** | RFC 6455, upgrade from HTTP/1.1 and HTTP/2, full duplex | | 📋 Planned | **SSE (Server-Sent Events)** | RFC 8895, server-to-client event streaming | | 📋 Planned | **gRPC** | Built on HTTP/2, unary and streaming RPC | diff --git a/config.m4 b/config.m4 index b562c08..2099f23 100644 --- a/config.m4 +++ b/config.m4 @@ -48,6 +48,13 @@ PHP_ARG_WITH([nghttp3], [no], [no]) +PHP_ARG_ENABLE([http-compression], + [whether to enable HTTP body compression], + [AS_HELP_STRING([--enable-http-compression], + [Enable HTTP body compression (auto-detected; prefers zlib-ng, falls back to zlib; use --disable-http-compression to opt out)])], + [yes], + [no]) + PHP_ARG_ENABLE([tests], [whether to build tests], [AS_HELP_STRING([--enable-tests], @@ -309,6 +316,59 @@ if test "$PHP_HTTP_SERVER" != "no"; then fi fi + dnl HTTP body compression (issue #8). Default: auto-detect — enabled + dnl if zlib-ng or zlib is present. Prefers zlib-ng for ~2–4x throughput + dnl over stock zlib at the same compression level; falls back to system + dnl zlib so the build never blocks on a missing optional dependency. + dnl Fail-soft policy: if neither is found, emit a warning and leave + dnl HAVE_HTTP_COMPRESSION undefined (build completes, feature absent). + if test "$PHP_HTTP_COMPRESSION" = "yes"; then + AC_PATH_PROG([PKG_CONFIG], [pkg-config], [no]) + _http_server_compression_ok=no + + AC_MSG_CHECKING([for zlib-ng]) + if test -x "$PKG_CONFIG" && "$PKG_CONFIG" --exists zlib-ng 2>/dev/null; then + ZLIB_NG_CFLAGS=`"$PKG_CONFIG" --cflags zlib-ng` + ZLIB_NG_LIBS=`"$PKG_CONFIG" --libs zlib-ng` + ZLIB_NG_VERSION=`"$PKG_CONFIG" --modversion zlib-ng` + AC_MSG_RESULT([yes (version $ZLIB_NG_VERSION)]) + PHP_EVAL_LIBLINE($ZLIB_NG_LIBS, TRUE_ASYNC_SERVER_SHARED_LIBADD) + PHP_EVAL_INCLINE($ZLIB_NG_CFLAGS) + AC_DEFINE([HAVE_ZLIB_NG], [1], [Whether zlib-ng is available]) + AC_DEFINE([HAVE_HTTP_COMPRESSION], [1], [Whether HTTP body compression is enabled]) + _http_server_compression_ok=yes + else + AC_MSG_RESULT([no]) + + AC_MSG_CHECKING([for zlib (fallback)]) + if test -x "$PKG_CONFIG" && "$PKG_CONFIG" --exists zlib 2>/dev/null; then + ZLIB_CFLAGS=`"$PKG_CONFIG" --cflags zlib` + ZLIB_LIBS=`"$PKG_CONFIG" --libs zlib` + ZLIB_VERSION=`"$PKG_CONFIG" --modversion zlib` + AC_MSG_RESULT([yes (version $ZLIB_VERSION)]) + PHP_EVAL_LIBLINE($ZLIB_LIBS, TRUE_ASYNC_SERVER_SHARED_LIBADD) + PHP_EVAL_INCLINE($ZLIB_CFLAGS) + AC_DEFINE([HAVE_HTTP_COMPRESSION], [1], [Whether HTTP body compression is enabled]) + _http_server_compression_ok=yes + else + dnl Last resort — many systems have libz.so without a .pc file. + AC_CHECK_LIB([z], [deflate], [ + PHP_ADD_LIBRARY([z], [1], [TRUE_ASYNC_SERVER_SHARED_LIBADD]) + AC_DEFINE([HAVE_HTTP_COMPRESSION], [1], [Whether HTTP body compression is enabled]) + _http_server_compression_ok=yes + AC_MSG_RESULT([yes (linked via -lz)]) + ], [ + AC_MSG_RESULT([no]) + ]) + fi + fi + + if test "$_http_server_compression_ok" != "yes"; then + AC_MSG_WARN([HTTP body compression disabled: neither zlib-ng nor zlib found. Install libzlib-ng-dev or zlib1g-dev to enable.]) + PHP_HTTP_COMPRESSION=no + fi + fi + dnl Unit tests support (CMocka) if test "$PHP_TESTS" = "yes"; then AC_CHECK_LIB(cmocka, _cmocka_run_group_tests, [ @@ -371,6 +431,21 @@ if test "$PHP_HTTP_SERVER" != "no"; then " fi + dnl Compression sources — gated by the same PHP_HTTP_COMPRESSION=yes + dnl set by the detection block above. Layout under src/compression/ + dnl mirrors src/http2/ and src/http3/ — additional codecs (Brotli, + dnl zstd) drop in here in phase 2 without touching the response path. + if test "$PHP_HTTP_COMPRESSION" = "yes"; then + http_server_sources="$http_server_sources + src/compression/http_compression.c + src/compression/http_compression_gzip.c + src/compression/http_compression_defaults.c + src/compression/http_compression_negotiate.c + src/compression/http_compression_response.c + src/compression/http_compression_request.c + " + fi + dnl HTTP/3 sources — gated by the same PHP_HTTP3=yes set by the detection dnl block above. Files appear in the build only when H3 detection dnl succeeded; no internal #ifdef wrap is needed. @@ -428,6 +503,10 @@ if test "$PHP_HTTP_SERVER" != "no"; then PHP_ADD_BUILD_DIR([$ext_builddir/src/http2]) fi + if test "$PHP_HTTP_COMPRESSION" = "yes"; then + PHP_ADD_BUILD_DIR([$ext_builddir/src/compression]) + fi + if test "$PHP_HTTP3" = "yes"; then PHP_ADD_BUILD_DIR([$ext_builddir/src/http3]) PHP_ADD_INCLUDE([$ext_srcdir/src/http3]) diff --git a/docs/COMPRESSION.md b/docs/COMPRESSION.md new file mode 100644 index 0000000..e505877 --- /dev/null +++ b/docs/COMPRESSION.md @@ -0,0 +1,151 @@ +# HTTP body compression + +Phase 1 — gzip on responses + inbound request decoding, served identically +across HTTP/1.1, HTTP/2 and HTTP/3. Issue +[#8](https://github.com/true-async/server/issues/8). + +## Build + +`--enable-http-compression` is on by default. The build prefers +`zlib-ng` (≈2-4× the throughput of stock zlib at the same compression +level) and falls back to system `zlib` if the former is not installed. +Pass `--disable-http-compression` to opt out entirely. + +```sh +./configure --enable-http-server --enable-http-compression # default +./configure --enable-http-server --disable-http-compression # off +``` + +## Configuration + +All five knobs live on `HttpServerConfig` and freeze at +`HttpServer::__construct` — same discipline as the other config setters. + +| Setter | Default | Range | +|---|---|---| +| `setCompressionEnabled(bool)` | `true` | — | +| `setCompressionLevel(int)` | `6` | 1..9 (zlib semantics) | +| `setCompressionMinSize(int)` | `1024` | 0..16 MiB | +| `setCompressionMimeTypes(array)` | text whitelist below | non-empty strings | +| `setRequestMaxDecompressedSize(int)` | `10485760` (10 MiB) | ≥ 0 (0 = no cap) | + +Default MIME whitelist (replaces wholesale on `setCompressionMimeTypes`): + +``` +application/javascript image/svg+xml text/javascript +application/json text/css text/plain +application/xml text/html text/xml +``` + +`getCompressionMimeTypes()` returns the live, materialised list — what +`var_dump($cfg->getCompressionMimeTypes())` shows is exactly the policy +the negotiation code applies. + +## Per-response opt-out + +```php +$response->setNoCompression(); +``` + +Overrides every other rule (Accept-Encoding negotiation, MIME match, +size threshold). Use on: + +- responses that combine secrets with reflected user input (BREACH + mitigation), +- pre-compressed payloads where the handler already set + `Content-Encoding`, +- diagnostic dumps you want to read off the wire as-is. + +## Negotiation + +Follows RFC 9110 §12.5.3 with two pragmatic deviations: + +1. **No `Accept-Encoding` header → identity only.** RFC permits any + coding in this case, but real-world clients without AE are usually + probes / scripts that may not handle gzip. Matches nginx. +2. **`identity;q=0` and `*;q=0` are honoured.** A `*;q=0` without a + later identity entry excludes identity, so the response goes out as + identity if there is no acceptable coding — the 406 path is not + taken; preference is to ship a working response. + +Skip rules — when **any** of these holds, the response stays identity: + +- request method is `HEAD` +- request carries a `Range` header +- response status ∈ `1xx, 204, 304` +- handler already set `Content-Encoding` +- response `Content-Type` is outside the whitelist +- response body is smaller than `compression_min_size` (buffered path + only — streaming bodies have unknown size) +- `setNoCompression()` was called on the response +- `compression_enabled` is false in the config + +When compression engages, the response gets: + +``` +Content-Encoding: gzip +Vary: Accept-Encoding (appended if Vary already exists) +``` + +`Content-Length` is recomputed for buffered responses; on streaming +responses (`HttpResponse::send`) it is dropped — chunked H1 and H2 +DATA framing carry length implicitly. + +## Inbound (request body) decoding + +`Content-Encoding: gzip` (and the legacy `x-gzip` alias) on incoming +requests is decoded transparently before the handler runs. Handlers +see `HttpRequest::getBody()` returning the decoded payload; the +`Content-Encoding` header on the request side is left intact for +diagnostic round-trip. + +| Outcome | HTTP status | +|---|---| +| Unknown coding (e.g. `br`, `deflate`) | 415 Unsupported Media Type | +| Decoded size exceeds `request_max_decompressed_size` | 413 Payload Too Large | +| Corrupt inflate stream | 400 Bad Request | +| `identity` or no `Content-Encoding` header | pass-through | + +## Streaming + +When handlers stream via `$response->send($chunk)`, the encoder is +installed transparently on the first call (subject to negotiation). +The wrapper accumulates compressed output across an entire encoder +iteration and ships it as a single underlying chunk — one chunked-H1 +size line, one H2 DATA frame per `send()` call, regardless of how many +internal inflate passes deflate needed. + +`mark_ended()` (called by `$response->end()`) drains the gzip trailer +(CRC32 + ISIZE) into a final chunk before delegating to the underlying +ops. + +## Engine selection + +The build banner reports the chosen engine: + +``` +checking for zlib-ng... yes (version 2.1.0) +``` + +or + +``` +checking for zlib-ng... no +checking for zlib (fallback)... yes (version 1.3) +``` + +At runtime the engine is also visible via the +`http_compression_engine_name()` C symbol — `"zlib-ng"`, `"zlib"`, or +`"disabled"` when the feature is off. + +## What's not in scope (yet) + +Phase 2 will add Brotli (`br`) and zstd (`zstd`) backends through the +same `http_encoder_t` vtable; phase 3 covers pre-compressed static +assets (`*.gz` / `*.br` on disk, served via sendfile). Threadpool +offload for very large buffered bodies is gated on real-world latency +profiles — not added speculatively. + +Strict `deflate` is intentionally skipped: half the deployed clients +send raw deflate and the other half send zlib-wrapped deflate, and +neither side reliably negotiates which is which. Use gzip. diff --git a/include/compression/http_compression_defaults.h b/include/compression/http_compression_defaults.h new file mode 100644 index 0000000..d4a776b --- /dev/null +++ b/include/compression/http_compression_defaults.h @@ -0,0 +1,30 @@ +/* + * Default policy values for HTTP body compression. Lives in its own + * header so the response pipeline and the config-setter code share one + * source of truth — and policy tweaks (whitelist edits, level bump) are + * one-line diffs that don't touch the configuration plumbing. + */ +#ifndef HTTP_COMPRESSION_DEFAULTS_H +#define HTTP_COMPRESSION_DEFAULTS_H + +#include +#include + +/* Knob defaults — units match the corresponding HttpServerConfig setter. */ +#define HTTP_COMPRESSION_DEFAULT_LEVEL 6 /* gzip default */ +#define HTTP_COMPRESSION_LEVEL_MIN 1 +#define HTTP_COMPRESSION_LEVEL_MAX 9 + +#define HTTP_COMPRESSION_DEFAULT_MIN_SIZE 1024u /* below this, overhead wins */ +#define HTTP_COMPRESSION_MIN_SIZE_MAX (16u * 1024u * 1024u) + +#define HTTP_COMPRESSION_DEFAULT_REQUEST_MAX_DECOMP (10u * 1024u * 1024u) /* 10 MiB */ + +/* NULL-terminated, lowercase, sorted whitelist of MIME `type/subtype`s + * worth gzipping by default. Matches the union of nginx `gzip_types` + * and h2o defaults — text + structured-data only, never binary. The + * setCompressionMimeTypes() setter REPLACES this list wholesale + * (nginx semantics), so users who want a delta need to re-list. */ +extern const char *const http_compression_default_mime_types[]; + +#endif /* HTTP_COMPRESSION_DEFAULTS_H */ diff --git a/include/compression/http_compression_negotiate.h b/include/compression/http_compression_negotiate.h new file mode 100644 index 0000000..1e66ecc --- /dev/null +++ b/include/compression/http_compression_negotiate.h @@ -0,0 +1,54 @@ +/* + * Accept-Encoding parsing and codec selection — pure C, no Zend deps, + * so unit tests can exercise the state machine without a PHP runtime. + * + * Phase-1 surface: distinguish gzip from identity, with q-values, the + * `*` wildcard, and `identity;q=0` semantics per RFC 9110 §12.5.3. + * Phase-2 codecs (Brotli, zstd) extend the result struct in place; the + * select() function walks them in preference order. + */ +#ifndef HTTP_COMPRESSION_NEGOTIATE_H +#define HTTP_COMPRESSION_NEGOTIATE_H + +#include +#include + +#include "compression/http_encoder.h" + +typedef struct { + bool gzip_acceptable; + bool identity_acceptable; +} http_accept_encoding_t; + +/* Initialise to the "no Accept-Encoding header was sent" default. We + * deliberately resolve this to identity-only (gzip rejected) rather + * than RFC 9110 §12.5.3's strict "any coding acceptable" — see the + * impl comment for the rationale (BREACH-safe-by-default + matching + * nginx). Distinct from parsing an empty header value, which also + * resolves to identity-only via parse() but for a different RFC reason. */ +void http_accept_encoding_init_default(http_accept_encoding_t *out); + +/* Parse a single Accept-Encoding header value. Multi-value headers + * (RFC: multiple Accept-Encoding lines collapse with `,`) should be + * concatenated by the caller before calling this. Tolerant of LWS, + * unknown codings (ignored), malformed q values (treated as q=1). + * len=0 → only identity acceptable (empty header semantics). */ +void http_accept_encoding_parse(const char *hdr, size_t len, + http_accept_encoding_t *out); + +/* Pick the best codec given the parsed prefs and what we have built in. + * HTTP_CODEC_GZIP — encode with gzip + * HTTP_CODEC_IDENTITY — send raw + * HTTP_CODEC__COUNT — sentinel: client refuses every coding we + * can offer (incl. identity). Caller should + * respond 406 Not Acceptable. */ +http_codec_id_t http_accept_encoding_select(const http_accept_encoding_t *ae); + +/* Strip MIME parameters (`;…`), trim, lowercase. Writes up to `dst_cap` + * bytes (no trailing NUL) and returns the normalised length, or 0 if + * the input normalises to empty. dst may equal src for in-place use. + * Caller-sized buffer: passing dst_cap >= ct_len always suffices. */ +size_t http_compression_mime_normalize(const char *ct, size_t ct_len, + char *dst, size_t dst_cap); + +#endif /* HTTP_COMPRESSION_NEGOTIATE_H */ diff --git a/include/compression/http_compression_request.h b/include/compression/http_compression_request.h new file mode 100644 index 0000000..206ad32 --- /dev/null +++ b/include/compression/http_compression_request.h @@ -0,0 +1,38 @@ +/* + * Inbound request body decoding (Content-Encoding: gzip from clients). + * Phase 1: gzip only. Unknown codings → 415. Bomb-cap exceeded → 413. + * + * Caller owns the request struct; on success, req->body is replaced + * with the decoded zend_string and the original is released. The + * Content-Encoding header is left in place — callers that round-trip + * the request elsewhere keep the wire-truth intact; the decoded + * body is what handlers see, and that is what matters at the API. + */ +#ifndef HTTP_COMPRESSION_REQUEST_H +#define HTTP_COMPRESSION_REQUEST_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + HTTP_DECODE_OK = 0, /* no coding, identity, or successful inflate */ + HTTP_DECODE_UNKNOWN_CODING = 415, /* coding the server does not implement */ + HTTP_DECODE_TOO_LARGE = 413, /* exceeded request_max_decompressed_size */ + HTTP_DECODE_MALFORMED = 400, /* zlib reported corruption */ +} http_decode_status_t; + +typedef struct http_request_t http_request_t; +typedef struct _http_server_config_t http_server_config_t; + +/* Decode req->body in place. Returns one of HTTP_DECODE_*. The numeric + * value of every non-OK return is the HTTP status the caller should + * emit — keeps the call site free of mapping tables. */ +int http_compression_decode_request_body(http_request_t *req, + http_server_config_t *cfg); + +#ifdef __cplusplus +} +#endif + +#endif /* HTTP_COMPRESSION_REQUEST_H */ diff --git a/include/compression/http_compression_response.h b/include/compression/http_compression_response.h new file mode 100644 index 0000000..fda272e --- /dev/null +++ b/include/compression/http_compression_response.h @@ -0,0 +1,64 @@ +/* + * Response-side compression: state attached to HttpResponse, the + * buffered-apply hook, the streaming-ops wrapper, and the per-response + * opt-out flag. All decisions go through one decide() that combines + * request, response and config inputs — single source of truth. + * + * Lifetime: the state struct is allocated lazily by http_compression_attach + * (called by each protocol dispatch right after install_stream_ops) and + * freed by http_compression_state_free at object dtor. + */ +#ifndef HTTP_COMPRESSION_RESPONSE_H +#define HTTP_COMPRESSION_RESPONSE_H + +#include +#include + +#include "compression/http_encoder.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Forward decls — kept here so callers don't need to include the full + * Zend / php_http_server.h in inline-light call sites. */ +typedef struct http_request_t http_request_t; +typedef struct _http_server_config_t http_server_config_t; +struct _zend_object; + +/* Allocate compression state on the response and remember the request + * + server config it was dispatched with. The request is held by a + * non-owning pointer — the dispatch ctx already keeps it alive for as + * long as the response zval is. No-op when compression is disabled in + * cfg (state stays NULL, hooks below cheaply early-return). */ +void http_compression_attach(struct _zend_object *response_obj, + http_request_t *request, + http_server_config_t *cfg); + +/* Free state attached above. Called from the response object's free_obj. */ +void http_compression_state_free(struct _zend_object *response_obj); + +/* Mark this response as ineligible for compression (BREACH-sensitive + * endpoints, handler-controlled binary blobs, etc.). Idempotent. */ +void http_compression_mark_no_compression(struct _zend_object *response_obj); + +/* Buffered hook: when the response is being serialised (called from + * http_response_format / format_parts), gzip the body in place and + * mutate headers (Content-Encoding, Vary, drop Content-Length). + * Idempotent — a second call is a no-op. Cheap fast path when state + * is NULL or decide() returns identity. */ +void http_compression_apply_buffered(struct _zend_object *response_obj); + +/* Streaming hook: at the first HttpResponse::send() call, swap the + * installed stream_ops with a compressing wrapper if decide() says yes. + * Mutates response headers in the same shot so the H1 chunked emitter + * commits them with Content-Encoding already set. Cheap no-op when + * state is NULL or decide() returns identity. */ +void http_compression_maybe_install_stream_wrapper( + struct _zend_object *response_obj); + +#ifdef __cplusplus +} +#endif + +#endif /* HTTP_COMPRESSION_RESPONSE_H */ diff --git a/include/compression/http_encoder.h b/include/compression/http_encoder.h new file mode 100644 index 0000000..b069c60 --- /dev/null +++ b/include/compression/http_encoder.h @@ -0,0 +1,71 @@ +/* + * HTTP body compression — codec-agnostic encoder/decoder vtable. + * + * Phase 1 ships a single backend (gzip via zlib-ng, with system zlib as + * fallback). The vtable indirection is upfront so phase 2 codecs (Brotli, + * zstd) plug in without touching the response pipeline. See issue #8. + */ +#ifndef HTTP_ENCODER_H +#define HTTP_ENCODER_H + +#include +#include +#include + +typedef enum { + HTTP_CODEC_IDENTITY = 0, + HTTP_CODEC_GZIP, + /* HTTP_CODEC_BROTLI, HTTP_CODEC_ZSTD reserved for phase 2. */ + HTTP_CODEC__COUNT +} http_codec_id_t; + +typedef enum { + HTTP_ENC_OK = 0, /* progress made; caller may loop */ + HTTP_ENC_NEED_OUTPUT, /* output buffer full — drain and call again */ + HTTP_ENC_DONE, /* finish() flushed everything */ + HTTP_ENC_ERROR +} http_encoder_status_t; + +typedef struct http_encoder http_encoder_t; + +typedef struct http_encoder_vtable { + const char *name; + http_codec_id_t id; + + /* Allocate and initialise an encoder at the given level (1..9 for + * gzip; backends ignore the value when not applicable). Returns NULL + * on allocation/init failure. */ + http_encoder_t *(*create)(int level); + + /* Compress one chunk. The implementation must update *in_consumed + * and *out_produced even when returning NEED_OUTPUT, so callers can + * iterate on partial progress. Output buffer is caller-owned. */ + http_encoder_status_t (*write)(http_encoder_t *enc, + const void *in, size_t in_len, size_t *in_consumed, + void *out, size_t out_cap, size_t *out_produced); + + /* Flush trailing bytes / write the codec footer. May need to be + * called repeatedly with a refreshed output buffer until DONE. */ + http_encoder_status_t (*finish)(http_encoder_t *enc, + void *out, size_t out_cap, size_t *out_produced); + + void (*destroy)(http_encoder_t *enc); +} http_encoder_vtable_t; + +/* Common header. Backend-specific state follows in subclassed structs; + * callers never touch fields beyond ->vt. */ +struct http_encoder { + const http_encoder_vtable_t *vt; +}; + +/* Codec registry. Returns NULL when the codec is not compiled in. */ +const http_encoder_vtable_t *http_compression_lookup(http_codec_id_t id); + +/* Token for Content-Encoding / Accept-Encoding ("gzip", "identity"). */ +const char *http_compression_codec_token(http_codec_id_t id); + +/* Build-time identifier of the gzip engine: "zlib-ng" or "zlib". + * Used in the build banner and in diagnostic logs. */ +const char *http_compression_engine_name(void); + +#endif /* HTTP_ENCODER_H */ diff --git a/include/php_http_server.h b/include/php_http_server.h index e9b5eda..13e1bc0 100644 --- a/include/php_http_server.h +++ b/include/php_http_server.h @@ -210,6 +210,26 @@ struct _http_server_config_t { uint32_t http3_peer_connection_budget; bool http3_alt_svc_enabled; + /* HTTP body compression (issue #8). Phase 1 ships gzip via zlib-ng. + * compression_enabled — master switch (default true). + * compression_level — 1..9 (zlib semantics; default 6). + * compression_min_size — body below this is left identity + * (overhead beats win on tiny bodies). + * compression_mime_types — set of `type/subtype` strings (lowercase, + * stripped of params) eligible for compression. + * Materialised at object init from the + * default whitelist so getters always + * return the live policy. setter REPLACES + * wholesale (nginx semantics). + * request_max_decompressed_size — anti-zip-bomb cap on decoded request + * bodies. 0 = no cap (must be explicit). + */ + bool compression_enabled; + uint8_t compression_level; + size_t compression_min_size; + HashTable *compression_mime_types; + size_t request_max_decompressed_size; + /* Log + telemetry. log_severity is an http_log_severity_t int value * (0/5/9/13/17), set via setLogSeverity(LogSeverity). log_stream is * an IS_RESOURCE zval pointing at any @@ -415,6 +435,11 @@ void http_server_on_parse_error(http_server_object *server, int status_code); HashTable *http_server_get_protocol_handlers(http_server_object *server); zend_async_scope_t *http_server_get_scope (http_server_object *server); +/* Live HttpServerConfig the server was constructed with. The returned + * pointer is non-owning and stays valid for the server's lifetime — + * the config object's zval is held inside http_server_object. */ +http_server_config_t *http_server_get_config (http_server_object *server); + /* Embedded per-server log_state (PLAN_LOG.md). Long-lived structures * (http_connection_t, http3_connection_t, mp_processor_t) cache the * result at create time. Returns &http_log_state_default for NULL. */ diff --git a/src/compression/http_compression.c b/src/compression/http_compression.c new file mode 100644 index 0000000..6a9fbcb --- /dev/null +++ b/src/compression/http_compression.c @@ -0,0 +1,54 @@ +/* + * Codec registry and module-level helpers. + * + * Backends declare their vtable as `extern` and this TU plugs them into + * the registry. Lookup is the single API the response pipeline uses to + * obtain an encoder — phase-2 codecs slot in without touching callers. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "compression/http_encoder.h" + +#include + +/* Backend vtables. Each is provided in its own TU and only linked when + * the corresponding feature was detected. The registry below references + * them under the same #ifdef so the linker stays happy on partial builds. */ +#ifdef HAVE_HTTP_COMPRESSION +extern const http_encoder_vtable_t http_compression_gzip_vt; +#endif + +const http_encoder_vtable_t *http_compression_lookup(http_codec_id_t id) +{ + switch (id) { +#ifdef HAVE_HTTP_COMPRESSION + case HTTP_CODEC_GZIP: + return &http_compression_gzip_vt; +#endif + case HTTP_CODEC_IDENTITY: + default: + return NULL; + } +} + +const char *http_compression_codec_token(http_codec_id_t id) +{ + switch (id) { + case HTTP_CODEC_IDENTITY: return "identity"; + case HTTP_CODEC_GZIP: return "gzip"; + default: return NULL; + } +} + +const char *http_compression_engine_name(void) +{ +#if defined(HAVE_ZLIB_NG) + return "zlib-ng"; +#elif defined(HAVE_HTTP_COMPRESSION) + return "zlib"; +#else + return "disabled"; +#endif +} diff --git a/src/compression/http_compression_defaults.c b/src/compression/http_compression_defaults.c new file mode 100644 index 0000000..afc9516 --- /dev/null +++ b/src/compression/http_compression_defaults.c @@ -0,0 +1,29 @@ +/* + * Default MIME whitelist for HTTP body compression. + * + * Kept in a dedicated TU so that policy edits land as a focused diff + * separate from negotiation logic. NULL-terminated, lowercase, sorted — + * loaders rely on the sentinel; sorting helps human review only. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_HTTP_COMPRESSION + +#include "compression/http_compression_defaults.h" + +const char *const http_compression_default_mime_types[] = { + "application/javascript", + "application/json", + "application/xml", + "image/svg+xml", + "text/css", + "text/html", + "text/javascript", + "text/plain", + "text/xml", + NULL, +}; + +#endif /* HAVE_HTTP_COMPRESSION */ diff --git a/src/compression/http_compression_gzip.c b/src/compression/http_compression_gzip.c new file mode 100644 index 0000000..51e261d --- /dev/null +++ b/src/compression/http_compression_gzip.c @@ -0,0 +1,138 @@ +/* + * gzip backend — streaming deflate via zlib-ng (preferred) or zlib. + * + * windowBits=15+16 (bit 16 in zlib(-ng) selects the gzip wrapper: + * 10-byte header + CRC32 trailer instead of zlib's adler32 wrap). + * memLevel=8 is the documented default; level is the caller-provided + * 1..9 (clamped here so nothing in the pipeline has to validate). + * + * Two output-side conventions worth noting: + * - HTTP_ENC_NEED_OUTPUT means the caller must drain `*out_produced` + * bytes and call back with a fresh buffer. We return it both when + * write() runs out of output space AND when finish() can't fit the + * trailer in the supplied buffer. Production callers loop until + * DONE; the unit test does the same. + * - finish() returning HTTP_ENC_OK is impossible by design — every + * successful exit is either NEED_OUTPUT (more flushing required) + * or DONE (footer emitted, stream closed). + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_HTTP_COMPRESSION + +#include "compression/http_encoder.h" + +#include "php.h" /* emalloc / efree — unit tests provide a minimal Zend */ + +#ifdef HAVE_ZLIB_NG +# include +# define ZS zng_stream +# define ZS_DEFLATE_INIT2 zng_deflateInit2 +# define ZS_DEFLATE zng_deflate +# define ZS_DEFLATE_END zng_deflateEnd +#else +# include +# define ZS z_stream +# define ZS_DEFLATE_INIT2 deflateInit2 +# define ZS_DEFLATE deflate +# define ZS_DEFLATE_END deflateEnd +#endif + +typedef struct { + http_encoder_t base; + ZS stream; + bool stream_initialised; +} gzip_encoder_t; + +extern const http_encoder_vtable_t http_compression_gzip_vt; + +static http_encoder_t *gz_create(int level) +{ + if (level < 1) level = 1; + if (level > 9) level = 9; + + gzip_encoder_t *enc = ecalloc(1, sizeof(*enc)); + enc->base.vt = &http_compression_gzip_vt; + + /* windowBits = MAX_WBITS (15) + 16 → gzip wrapper. */ + int rc = ZS_DEFLATE_INIT2(&enc->stream, level, Z_DEFLATED, + MAX_WBITS + 16, 8, Z_DEFAULT_STRATEGY); + if (rc != Z_OK) { + efree(enc); + return NULL; + } + enc->stream_initialised = true; + return &enc->base; +} + +static http_encoder_status_t gz_write(http_encoder_t *base, + const void *in, size_t in_len, size_t *in_consumed, + void *out, size_t out_cap, size_t *out_produced) +{ + gzip_encoder_t *enc = (gzip_encoder_t *)base; + + /* zlib's API is `unsigned int avail_*` (zlib) or `size_t` (zlib-ng); + * cast to the local type to keep both paths compiling. */ + enc->stream.next_in = (void *)(uintptr_t)in; + enc->stream.avail_in = (unsigned)in_len; + enc->stream.next_out = (unsigned char *)out; + enc->stream.avail_out = (unsigned)out_cap; + + int rc = ZS_DEFLATE(&enc->stream, Z_NO_FLUSH); + + if (in_consumed) *in_consumed = in_len - enc->stream.avail_in; + if (out_produced) *out_produced = out_cap - enc->stream.avail_out; + + if (rc != Z_OK) { + return HTTP_ENC_ERROR; + } + /* Output buffer filled before all input was consumed → caller drains. */ + if (enc->stream.avail_out == 0 && enc->stream.avail_in > 0) { + return HTTP_ENC_NEED_OUTPUT; + } + return HTTP_ENC_OK; +} + +static http_encoder_status_t gz_finish(http_encoder_t *base, + void *out, size_t out_cap, size_t *out_produced) +{ + gzip_encoder_t *enc = (gzip_encoder_t *)base; + + enc->stream.next_in = NULL; + enc->stream.avail_in = 0; + enc->stream.next_out = (unsigned char *)out; + enc->stream.avail_out = (unsigned)out_cap; + + int rc = ZS_DEFLATE(&enc->stream, Z_FINISH); + if (out_produced) *out_produced = out_cap - enc->stream.avail_out; + + if (rc == Z_STREAM_END) return HTTP_ENC_DONE; + /* Z_OK / Z_BUF_ERROR after Z_FINISH both mean "trailer didn't fit; + * give me more output". */ + if (rc == Z_OK || rc == Z_BUF_ERROR) return HTTP_ENC_NEED_OUTPUT; + return HTTP_ENC_ERROR; +} + +static void gz_destroy(http_encoder_t *base) +{ + if (base == NULL) return; + gzip_encoder_t *enc = (gzip_encoder_t *)base; + if (enc->stream_initialised) { + (void)ZS_DEFLATE_END(&enc->stream); + enc->stream_initialised = false; + } + efree(enc); +} + +const http_encoder_vtable_t http_compression_gzip_vt = { + .name = "gzip", + .id = HTTP_CODEC_GZIP, + .create = gz_create, + .write = gz_write, + .finish = gz_finish, + .destroy = gz_destroy, +}; + +#endif /* HAVE_HTTP_COMPRESSION */ diff --git a/src/compression/http_compression_negotiate.c b/src/compression/http_compression_negotiate.c new file mode 100644 index 0000000..fda87de --- /dev/null +++ b/src/compression/http_compression_negotiate.c @@ -0,0 +1,184 @@ +/* + * Accept-Encoding parser + codec selector + MIME normaliser. + * Pure C — no Zend, no PHP. Exercised directly by unit tests. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_HTTP_COMPRESSION + +#include "compression/http_compression_negotiate.h" + +#include +#include + +/* ---- helpers ---------------------------------------------------------- */ + +/* Per-coding parse outcome. */ +typedef enum { Q_UNSEEN = 0, Q_OK, Q_REJECT } q_t; + +static inline char ascii_lower(char c) +{ + return (c >= 'A' && c <= 'Z') ? (char)(c - 'A' + 'a') : c; +} + +static bool ascii_eq_ci(const char *s, size_t slen, const char *lit) +{ + size_t llen = strlen(lit); + if (slen != llen) return false; + for (size_t i = 0; i < slen; i++) { + if (ascii_lower(s[i]) != lit[i]) return false; + } + return true; +} + +/* Is this q-value zero? Permissive: blank or malformed → non-zero (treat + * as q=1, RFC says servers MAY ignore malformed weight). We only need + * the binary "rejected vs accepted" decision. */ +static bool q_is_zero(const char *s, size_t len) +{ + while (len > 0 && (s[0] == ' ' || s[0] == '\t')) { s++; len--; } + while (len > 0 && (s[len - 1] == ' ' || s[len - 1] == '\t')) len--; + if (len == 0) return false; + if (s[0] != '0') return false; + if (len == 1) return true; /* "0" */ + if (s[1] != '.') return false; /* "0xxx" — malformed → q=1 */ + for (size_t i = 2; i < len; i++) { + if (s[i] != '0') return false; /* any non-zero digit → not zero */ + } + return true; /* "0.", "0.0", "0.000" */ +} + +/* ---- public API ------------------------------------------------------- */ + +void http_accept_encoding_init_default(http_accept_encoding_t *out) +{ + /* "No Accept-Encoding header" → identity only. RFC 9110 §12.5.3 + * permits any coding, but real-world clients without AE are usually + * CLI tools / probes that may not handle gzip — and BREACH risk + * argues for opt-in over opt-out. nginx ships the same default. */ + out->gzip_acceptable = false; + out->identity_acceptable = true; +} + +void http_accept_encoding_parse(const char *hdr, size_t len, + http_accept_encoding_t *out) +{ + q_t q_gzip = Q_UNSEEN, q_identity = Q_UNSEEN, q_star = Q_UNSEEN; + + size_t i = 0; + while (i < len) { + /* Skip LWS and stray commas. */ + while (i < len && (hdr[i] == ' ' || hdr[i] == '\t' || hdr[i] == ',')) i++; + if (i >= len) break; + + size_t tok_start = i; + while (i < len && hdr[i] != ',') i++; + size_t tok_end = i; + while (tok_end > tok_start && + (hdr[tok_end - 1] == ' ' || hdr[tok_end - 1] == '\t')) { + tok_end--; + } + if (tok_end == tok_start) continue; + + /* Coding name terminates at `;`, ` ` or `\t`. */ + size_t name_end = tok_start; + while (name_end < tok_end && + hdr[name_end] != ';' && + hdr[name_end] != ' ' && + hdr[name_end] != '\t') { + name_end++; + } + const char *name = hdr + tok_start; + size_t name_len = name_end - tok_start; + + bool found_q = false, qzero = false; + size_t pi = name_end; + while (pi < tok_end) { + while (pi < tok_end && + (hdr[pi] == ' ' || hdr[pi] == '\t' || hdr[pi] == ';')) { + pi++; + } + if (pi >= tok_end) break; + size_t param_start = pi; + while (pi < tok_end && hdr[pi] != ';') pi++; + size_t param_end = pi; + while (param_end > param_start && + (hdr[param_end - 1] == ' ' || hdr[param_end - 1] == '\t')) { + param_end--; + } + + if (param_end - param_start >= 2 && + ascii_lower(hdr[param_start]) == 'q' && + hdr[param_start + 1] == '=') { + found_q = true; + qzero = q_is_zero(hdr + param_start + 2, + param_end - param_start - 2); + /* Don't break — accept-ext params after q=… are legal + * but we ignore them; loop just falls through. */ + } + } + + q_t outcome = (found_q && qzero) ? Q_REJECT : Q_OK; + + if (ascii_eq_ci(name, name_len, "gzip")) q_gzip = outcome; + else if (ascii_eq_ci(name, name_len, "identity")) q_identity = outcome; + else if (name_len == 1 && name[0] == '*') q_star = outcome; + /* Unknown coding: ignored. Phase-2 backends extend the if-chain. */ + } + + /* Resolution rules per RFC 9110 §12.5.3: + * - explicit Q_OK wins; explicit Q_REJECT wins. + * - unseen coding falls back to `*`: Q_OK enables, Q_REJECT excludes, + * Q_UNSEEN leaves it disabled. + * - identity has a special rule: it is acceptable by default unless + * the header explicitly excludes it (`identity;q=0` or `*;q=0` + * without a more specific identity entry). An empty header value + * yields gzip=Q_UNSEEN, star=Q_UNSEEN, identity=Q_UNSEEN, which + * resolves to "identity only" — exactly the empty-header semantic. + */ + out->gzip_acceptable = + (q_gzip == Q_OK) || + (q_gzip == Q_UNSEEN && q_star == Q_OK); + + out->identity_acceptable = + (q_identity == Q_OK) || + (q_identity == Q_UNSEEN && q_star != Q_REJECT); +} + +http_codec_id_t http_accept_encoding_select(const http_accept_encoding_t *ae) +{ + /* Phase-2 will preface gzip with brotli/zstd lookups in preference + * order. The single-codec phase-1 branch keeps the type stable. */ + if (ae->gzip_acceptable) { + return HTTP_CODEC_GZIP; + } + if (ae->identity_acceptable) { + return HTTP_CODEC_IDENTITY; + } + return HTTP_CODEC__COUNT; +} + +size_t http_compression_mime_normalize(const char *ct, size_t ct_len, + char *dst, size_t dst_cap) +{ + /* Trim leading whitespace. */ + while (ct_len > 0 && (ct[0] == ' ' || ct[0] == '\t')) { ct++; ct_len--; } + /* Stop at first `;` (parameters) or trailing whitespace. */ + size_t end = ct_len; + for (size_t i = 0; i < ct_len; i++) { + if (ct[i] == ';') { end = i; break; } + } + while (end > 0 && (ct[end - 1] == ' ' || ct[end - 1] == '\t')) end--; + + if (end == 0 || end > dst_cap) { + return 0; + } + for (size_t i = 0; i < end; i++) { + dst[i] = ascii_lower(ct[i]); + } + return end; +} + +#endif /* HAVE_HTTP_COMPRESSION */ diff --git a/src/compression/http_compression_request.c b/src/compression/http_compression_request.c new file mode 100644 index 0000000..05b4e51 --- /dev/null +++ b/src/compression/http_compression_request.c @@ -0,0 +1,144 @@ +/* + * Inbound request body decoder. Phase 1: gzip via zlib(-ng). + * + * Anti-bomb cap is hard-required: the read loop checks decoded size + * after every inflate() pass and aborts before realloc, so a + * Content-Length: 1MiB body that decodes to 10 GiB never reaches the + * limit's worth of memory on the heap. + * + * Output buffer growth is bounded: 4 KiB initial, doubling up to the + * cap. Doubling avoids quadratic copy cost on large payloads while + * staying well under what a malicious client could exploit (every + * doubling bumps memory by 2x — cap stops us at the configured ceiling + * regardless of how the input grows). + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_HTTP_COMPRESSION + +#include "php.h" +#include "php_http_server.h" +#include "http1/http_parser.h" +#include "compression/http_compression_request.h" + +#ifdef HAVE_ZLIB_NG +# include +# define ZS zng_stream +# define ZS_INFLATE_INIT2 zng_inflateInit2 +# define ZS_INFLATE zng_inflate +# define ZS_INFLATE_END zng_inflateEnd +#else +# include +# define ZS z_stream +# define ZS_INFLATE_INIT2 inflateInit2 +# define ZS_INFLATE inflate +# define ZS_INFLATE_END inflateEnd +#endif + +#include + +static int decode_gzip(http_request_t *req, size_t cap) +{ + if (req->body == NULL || ZSTR_LEN(req->body) == 0) { + return HTTP_DECODE_OK; /* nothing to decode */ + } + + ZS s; + memset(&s, 0, sizeof(s)); + /* windowBits 15+32: gzip wrapper with auto-detection (handles both + * gzip and zlib streams gracefully — robust against clients that + * mis-label deflate as gzip in the wild). */ + if (ZS_INFLATE_INIT2(&s, 15 + 32) != Z_OK) { + return HTTP_DECODE_MALFORMED; + } + + /* Output buffer. Initial 4 KiB, doubles on demand up to `cap`. */ + size_t out_cap = 4096; + if (cap > 0 && cap < out_cap) out_cap = cap; + zend_string *out = zend_string_alloc(out_cap, 0); + size_t produced = 0; + + s.next_in = (void *)(uintptr_t)ZSTR_VAL(req->body); + s.avail_in = (unsigned)ZSTR_LEN(req->body); + s.next_out = (unsigned char *)ZSTR_VAL(out); + s.avail_out = (unsigned)out_cap; + + int rc; + for (;;) { + rc = ZS_INFLATE(&s, Z_NO_FLUSH); + produced = out_cap - s.avail_out; + + if (rc == Z_STREAM_END) break; + if (rc != Z_OK) { + ZS_INFLATE_END(&s); + zend_string_release(out); + return HTTP_DECODE_MALFORMED; + } + /* Need more output. Cap-aware grow: never above `cap`. */ + if (s.avail_out == 0) { + size_t new_cap = out_cap * 2; + if (cap > 0 && new_cap > cap) { + new_cap = cap; + } + if (new_cap == out_cap) { + /* Already at cap and inflate still wants room → bomb. */ + ZS_INFLATE_END(&s); + zend_string_release(out); + return HTTP_DECODE_TOO_LARGE; + } + zend_string *grown = zend_string_realloc(out, new_cap, 0); + out = grown; + s.next_out = (unsigned char *)ZSTR_VAL(out) + produced; + s.avail_out = (unsigned)(new_cap - produced); + out_cap = new_cap; + } + } + ZS_INFLATE_END(&s); + + /* Right-size + NUL-terminate. */ + if (produced != out_cap) { + out = zend_string_truncate(out, produced, 0); + } + ZSTR_VAL(out)[produced] = '\0'; + + zend_string_release(req->body); + req->body = out; + req->content_length = produced; + return HTTP_DECODE_OK; +} + +int http_compression_decode_request_body(http_request_t *req, + http_server_config_t *cfg) +{ + if (req == NULL || req->headers == NULL) return HTTP_DECODE_OK; + + zval *ce = zend_hash_str_find(req->headers, "content-encoding", 16); + if (ce == NULL || Z_TYPE_P(ce) != IS_STRING) return HTTP_DECODE_OK; + + const char *val = Z_STRVAL_P(ce); + size_t len = Z_STRLEN_P(ce); + while (len > 0 && (val[0] == ' ' || val[0] == '\t')) { val++; len--; } + while (len > 0 && (val[len - 1] == ' ' || val[len - 1] == '\t')) len--; + + if (len == 0 || + (len == 8 && zend_binary_strcasecmp(val, 8, "identity", 8) == 0)) { + return HTTP_DECODE_OK; + } + if (len == 4 && zend_binary_strcasecmp(val, 4, "gzip", 4) == 0) { + size_t cap = (cfg != NULL) ? cfg->request_max_decompressed_size : 0; + return decode_gzip(req, cap); + } + /* Aliases — RFC 9110 lists "x-gzip" as an obsolete synonym still + * found in older intermediaries. Decode the same way. */ + if (len == 6 && zend_binary_strcasecmp(val, 6, "x-gzip", 6) == 0) { + size_t cap = (cfg != NULL) ? cfg->request_max_decompressed_size : 0; + return decode_gzip(req, cap); + } + + return HTTP_DECODE_UNKNOWN_CODING; +} + +#endif /* HAVE_HTTP_COMPRESSION */ diff --git a/src/compression/http_compression_response.c b/src/compression/http_compression_response.c new file mode 100644 index 0000000..fa7dde5 --- /dev/null +++ b/src/compression/http_compression_response.c @@ -0,0 +1,594 @@ +/* + * Response-side compression plumbing — see header for the public surface. + * + * Two consumers, one decision function: + * - apply_buffered : called from http_response_format[/_parts]; rewrites + * smart_str body, mutates headers in place. The + * buffered path knows the body length up-front, so + * the size-threshold check is exact. + * - stream wrapper : on first send() we substitute the installed + * stream_ops with a compressing one. The wrapper's + * append_chunk feeds chunks through the encoder and + * forwards compressed slices to the underlying ops; + * mark_ended drains finish() before delegating. + * + * `decide()` is the single source of truth: it reads request headers, + * response headers, server config, and the opt-out flag, returning + * GZIP or IDENTITY. Both consumers call it; the buffered path also + * passes the known body length to bypass the streaming "unknown size" + * branch. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_HTTP_COMPRESSION + +#include "php.h" +#include "Zend/zend_smart_str.h" +#include "php_http_server.h" +#include "http1/http_parser.h" +#include "compression/http_compression_response.h" +#include "compression/http_compression_negotiate.h" +#include "compression/http_compression_defaults.h" + +#include + +/* ----- state struct + accessors -------------------------------------- */ + +typedef struct { + http_request_t *request; /* non-owning */ + http_server_config_t *cfg; /* non-owning */ + + bool no_compression; + bool applied; /* buffered: body already rewritten */ + + /* Streaming wrapper state. Populated by + * maybe_install_stream_wrapper; NULL on the buffered path. */ + const http_response_stream_ops_t *underlying_ops; + void *underlying_ctx; + http_encoder_t *encoder; + void *wrapper_ctx; /* ws_ctx_t — owned, freed at teardown */ + bool wrapper_installed; + bool wrapper_first_chunk; +} http_compression_state_t; + +/* http_response.c owns the response struct; we reach the field through + * a tiny accessor it exports for us. Using a void** keeps the response + * layout opaque outside the response TU. */ +extern void *http_response_get_compression_slot(zend_object *obj); +extern void http_response_set_compression_slot(zend_object *obj, void *p); +extern HashTable *http_response_get_headers(zend_object *obj); +extern int http_response_get_status(zend_object *obj); +extern const http_response_stream_ops_t * + http_response_get_stream_ops(zend_object *obj); +extern void *http_response_get_stream_ctx(zend_object *obj); +extern void http_response_replace_stream_ops(zend_object *obj, + const http_response_stream_ops_t *ops, + void *ctx); +extern smart_str *http_response_get_body_smart_str(zend_object *obj); + +static inline http_compression_state_t *state_of(zend_object *obj) +{ + return (http_compression_state_t *)http_response_get_compression_slot(obj); +} + +/* ----- header mutation helpers --------------------------------------- */ + +/* Direct HashTable insertion bypasses the user-facing setHeader guard + * (which blocks edits after committed=true). That's intentional — + * compression mutations happen *during* commit. zend_hash_str_update + * skips the key zend_string allocation (vs. zend_hash_update); only + * the value needs to be reified. */ +static void put_header_string(HashTable *ht, const char *name, size_t name_len, + const char *value, size_t value_len) +{ + zval z; + ZVAL_STR(&z, zend_string_init(value, value_len, 0)); + zend_hash_str_update(ht, name, name_len, &z); +} + +static inline void delete_header(HashTable *ht, const char *name, size_t name_len) +{ + zend_hash_str_del(ht, name, name_len); +} + +static inline bool has_header(const HashTable *ht, const char *name, size_t name_len) +{ + return zend_hash_str_exists(ht, name, name_len); +} + +/* Append "Accept-Encoding" to an existing Vary, or set it fresh. */ +static void merge_vary_accept_encoding(HashTable *ht) +{ + static const char V[] = "vary"; + static const char AE[] = "Accept-Encoding"; + const size_t AE_LEN = sizeof(AE) - 1; + + zval *const existing = zend_hash_str_find(ht, V, sizeof(V) - 1); + if (existing == NULL) { + put_header_string(ht, V, sizeof(V) - 1, AE, AE_LEN); + return; + } + if (EXPECTED(Z_TYPE_P(existing) == IS_STRING)) { + const char *const cur = Z_STRVAL_P(existing); + const size_t cl = Z_STRLEN_P(existing); + /* Already mentions Accept-Encoding (case-insensitive needle)? + * Loop bound guarantees no read past the buffer end. */ + if (cl >= AE_LEN) { + for (size_t i = 0, stop = cl - AE_LEN + 1; i < stop; i++) { + if (strncasecmp(cur + i, AE, AE_LEN) == 0) return; + } + } + /* Build ", Accept-Encoding" in one allocation, store it. */ + zend_string *const merged = zend_string_alloc(cl + 2 + AE_LEN, 0); + memcpy(ZSTR_VAL(merged), cur, cl); + memcpy(ZSTR_VAL(merged) + cl, ", ", 2); + memcpy(ZSTR_VAL(merged) + cl + 2, AE, AE_LEN); + ZSTR_VAL(merged)[cl + 2 + AE_LEN] = '\0'; + zval z; + ZVAL_STR(&z, merged); + zend_hash_str_update(ht, V, sizeof(V) - 1, &z); + return; + } + /* IS_ARRAY: rare; just add a fresh entry — most clients dedup Vary. */ + if (Z_TYPE_P(existing) == IS_ARRAY) { + zval z; + ZVAL_STR(&z, zend_string_init(AE, AE_LEN, 0)); + zend_hash_next_index_insert(Z_ARRVAL_P(existing), &z); + } +} + +static void mutate_headers_for_codec(HashTable *ht, http_codec_id_t codec) +{ + const char *tok = http_compression_codec_token(codec); + put_header_string(ht, "content-encoding", sizeof("content-encoding") - 1, + tok, strlen(tok)); + /* Content-Length is recomputed by emit_headers_block from the new + * body size on the buffered path; on streaming we don't know it, + * so we always strip whatever the handler set. */ + delete_header(ht, "content-length", sizeof("content-length") - 1); + merge_vary_accept_encoding(ht); +} + +/* ----- decide() ------------------------------------------------------- */ + +/* Look up a header value as a string. The H1 parser lowercases keys; + * H2/H3 pass lowercase via :pseudo handling — case-sensitive lookup + * suffices. Returns false on absence or non-string entry. */ +static bool request_header_value(const http_request_t *req, + const char *lower_name, size_t lower_len, + const char **out_val, size_t *out_len) +{ + if (UNEXPECTED(req == NULL || req->headers == NULL)) return false; + const zval *zv = zend_hash_str_find(req->headers, lower_name, lower_len); + if (zv == NULL || Z_TYPE_P(zv) != IS_STRING) return false; + *out_val = Z_STRVAL_P(zv); + *out_len = Z_STRLEN_P(zv); + return true; +} + +/* Cheap presence-only check — no zval read, no string copy. */ +static inline bool request_has_header(const http_request_t *req, + const char *lower_name, size_t lower_len) +{ + if (UNEXPECTED(req == NULL || req->headers == NULL)) return false; + return zend_hash_str_exists(req->headers, lower_name, lower_len); +} + +static inline bool method_is_head(const http_request_t *req) +{ + return req && req->method && + ZSTR_LEN(req->method) == 4 && + zend_binary_strcasecmp(ZSTR_VAL(req->method), 4, "HEAD", 4) == 0; +} + +/* Read the chosen content-type from the response's headers HT. The + * dispatch flow lowercases stored keys, so a case-sensitive lookup + * suffices. Returns 0 when the handler did not set one. */ +static size_t response_content_type(const HashTable *resp_headers, + char *buf, size_t buf_cap) +{ + const zval *zv = zend_hash_str_find(resp_headers, "content-type", 12); + if (zv == NULL || Z_TYPE_P(zv) != IS_STRING) return 0; + return http_compression_mime_normalize( + Z_STRVAL_P(zv), Z_STRLEN_P(zv), buf, buf_cap); +} + +/* size_hint==0 means "unknown / streaming" — skip the size-threshold + * check (we'd rather compress and risk a slight overhead than refuse + * compression on potentially huge streamed bodies). */ +static http_codec_id_t decide(http_compression_state_t *st, + zend_object *response_obj, + size_t size_hint) +{ + if (st == NULL || st->no_compression || st->cfg == NULL || + !st->cfg->compression_enabled) { + return HTTP_CODEC_IDENTITY; + } + + /* --- request side --- */ + if (method_is_head(st->request)) { + return HTTP_CODEC_IDENTITY; + } + /* Range responses are sliced; compressing them would corrupt the + * byte ranges the client asked for. */ + if (request_has_header(st->request, "range", 5)) { + return HTTP_CODEC_IDENTITY; + } + const char *ae_val = NULL; size_t ae_len = 0; + http_accept_encoding_t ae; + if (request_header_value(st->request, "accept-encoding", 15, &ae_val, &ae_len)) { + http_accept_encoding_parse(ae_val, ae_len, &ae); + } else { + http_accept_encoding_init_default(&ae); + } + http_codec_id_t chosen = http_accept_encoding_select(&ae); + if (chosen != HTTP_CODEC_GZIP) { + /* Either identity-only or fully unsatisfiable. The 406 path is + * not in scope here — the dispose code already commits identity + * by default. We just skip compression. */ + return HTTP_CODEC_IDENTITY; + } + + /* --- response side --- */ + int status = http_response_get_status(response_obj); + if (status < 200 || status == 204 || status == 304) { + return HTTP_CODEC_IDENTITY; + } + + HashTable *resp_h = http_response_get_headers(response_obj); + if (resp_h && has_header(resp_h, "content-encoding", 16)) { + /* Handler already set its own coding (e.g. precompressed asset). + * Don't double-encode. */ + return HTTP_CODEC_IDENTITY; + } + + /* Body-size threshold is exact for buffered, skipped for streaming. */ + if (size_hint > 0 && size_hint < st->cfg->compression_min_size) { + return HTTP_CODEC_IDENTITY; + } + + /* MIME whitelist match. No content-type → assume non-text and skip + * (whitelist semantics: compress only what we explicitly know is safe). */ + char ct_buf[128]; + size_t ct_len = response_content_type(resp_h, ct_buf, sizeof(ct_buf)); + if (ct_len == 0) { + return HTTP_CODEC_IDENTITY; + } + if (st->cfg->compression_mime_types == NULL || + !zend_hash_str_exists(st->cfg->compression_mime_types, ct_buf, ct_len)) { + return HTTP_CODEC_IDENTITY; + } + + return HTTP_CODEC_GZIP; +} + +/* ----- attach / free / opt-out --------------------------------------- */ + +void http_compression_attach(zend_object *response_obj, + http_request_t *request, + http_server_config_t *cfg) +{ + if (response_obj == NULL || cfg == NULL) return; + if (!cfg->compression_enabled) return; + + http_compression_state_t *st = state_of(response_obj); + if (st == NULL) { + st = ecalloc(1, sizeof(*st)); + http_response_set_compression_slot(response_obj, st); + } + st->request = request; + st->cfg = cfg; +} + +void http_compression_state_free(zend_object *response_obj) +{ + http_compression_state_t *st = state_of(response_obj); + if (st == NULL) return; + if (st->encoder && st->encoder->vt && st->encoder->vt->destroy) { + st->encoder->vt->destroy(st->encoder); + } + if (st->wrapper_ctx) { + efree(st->wrapper_ctx); + } + efree(st); + http_response_set_compression_slot(response_obj, NULL); +} + +void http_compression_mark_no_compression(zend_object *response_obj) +{ + if (response_obj == NULL) return; + http_compression_state_t *st = state_of(response_obj); + if (st == NULL) { + /* Allocate even without attach so the flag persists if attach + * later discovers an enabled config (rare; safer than dropping). */ + st = ecalloc(1, sizeof(*st)); + http_response_set_compression_slot(response_obj, st); + } + st->no_compression = true; +} + +/* ----- buffered apply ------------------------------------------------- */ + +void http_compression_apply_buffered(zend_object *response_obj) +{ + http_compression_state_t *st = state_of(response_obj); + if (st == NULL || st->applied || st->wrapper_installed) { + /* st->wrapper_installed: streaming path — body not used. */ + return; + } + + smart_str *body = http_response_get_body_smart_str(response_obj); + size_t body_len = (body && body->s) ? ZSTR_LEN(body->s) : 0; + + http_codec_id_t codec = decide(st, response_obj, body_len); + st->applied = true; /* Whether or not we compress, never run twice. */ + + if (codec != HTTP_CODEC_GZIP || body_len == 0) { + return; + } + + const http_encoder_vtable_t *const vt = http_compression_lookup(codec); + if (UNEXPECTED(vt == NULL)) return; + http_encoder_t *const enc = vt->create((int)st->cfg->compression_level); + if (UNEXPECTED(enc == NULL)) return; + + /* Pre-size for the worst-case output: gzip overhead on text is + * <0.1% + 18-byte header/trailer; on already-compressed input deflate + * may swell by up to 0.015% + 5 bytes per 32 KiB block. body_len + 64 + * covers the common case in one allocation; NEED_OUTPUT tail-grows + * on the rare incompressible path. */ + smart_str out = {0}; + smart_str_alloc(&out, body_len + 64, 0); + + const unsigned char *const in = (const unsigned char *)ZSTR_VAL(body->s); + size_t fed = 0; + + /* write() drain. smart_str_alloc on NEED_OUTPUT instead of every + * iteration so the common single-pass case is one alloc. */ + while (fed < body_len) { + size_t avail = out.a - ZSTR_LEN(out.s); + if (UNEXPECTED(avail < 64)) { + smart_str_alloc(&out, 4096, 0); + avail = out.a - ZSTR_LEN(out.s); + } + size_t consumed = 0, produced = 0; + const http_encoder_status_t s = vt->write(enc, + in + fed, body_len - fed, &consumed, + ZSTR_VAL(out.s) + ZSTR_LEN(out.s), avail, &produced); + ZSTR_LEN(out.s) += produced; + fed += consumed; + if (UNEXPECTED(s == HTTP_ENC_ERROR)) { + vt->destroy(enc); + smart_str_free(&out); + return; /* Leave body as-is; identity wins. */ + } + /* HTTP_ENC_OK or NEED_OUTPUT: loop continues. */ + } + + /* finish() drain — emits gzip trailer (CRC32 + ISIZE). */ + for (;;) { + size_t avail = out.a - ZSTR_LEN(out.s); + if (UNEXPECTED(avail < 32)) { + smart_str_alloc(&out, 64, 0); + avail = out.a - ZSTR_LEN(out.s); + } + size_t produced = 0; + const http_encoder_status_t s = vt->finish(enc, + ZSTR_VAL(out.s) + ZSTR_LEN(out.s), avail, &produced); + ZSTR_LEN(out.s) += produced; + if (EXPECTED(s == HTTP_ENC_DONE)) break; + if (s == HTTP_ENC_NEED_OUTPUT) continue; + /* Error mid-finish — abort, keep original body. */ + vt->destroy(enc); + smart_str_free(&out); + return; + } + vt->destroy(enc); + smart_str_0(&out); + + /* Swap body — release old, transfer ownership of out's zend_string. */ + smart_str_free(body); + body->s = out.s; + body->a = out.a; + + mutate_headers_for_codec(http_response_get_headers(response_obj), codec); +} + +/* ----- streaming wrapper --------------------------------------------- */ + +/* Wrapper context. Stored as the stream_ctx of the compressing ops; + * holds a back-reference to the response (for header mutation on + * first chunk) and the original ops/ctx we delegate to. */ +typedef struct { + zend_object *response_obj; + const http_response_stream_ops_t *underlying_ops; + void *underlying_ctx; + http_encoder_t *encoder; + bool first_chunk_done; +} ws_ctx_t; + +/* Hand off an accumulated zend_string to the underlying stream ops. + * One call → one underlying append_chunk → one downstream chunk on the + * wire. Compared with emitting per-loop slices, this trades a small + * temporary buffer for fewer protocol-level frames (H2 DATA / chunked + * size-line). zs is consumed; the underlying owns the refcount. */ +static int forward_compressed(ws_ctx_t *const w, zend_string *zs) +{ + if (UNEXPECTED(zs == NULL || ZSTR_LEN(zs) == 0)) { + if (zs) zend_string_release(zs); + return HTTP_STREAM_APPEND_OK; + } + return w->underlying_ops->append_chunk(w->underlying_ctx, zs); +} + +static int ws_append_chunk(void *ctx_opaque, zend_string *chunk) +{ + ws_ctx_t *const w = (ws_ctx_t *)ctx_opaque; + + if (UNEXPECTED(!w->first_chunk_done)) { + /* Header mutation deferred to first chunk: by now the handler + * has finalised setHeader/setStatusCode (committed=true was set + * by HttpResponse::send before we got here), and we know we + * are actually going to encode at least one byte. */ + mutate_headers_for_codec( + http_response_get_headers(w->response_obj), HTTP_CODEC_GZIP); + w->first_chunk_done = true; + } + + /* Accumulate compressed output across all encoder iterations into + * one zend_string, hand the whole thing to the underlying ops as a + * single chunk. Avoids one zend_string_init + one append_chunk + * round-trip per inner pass — relevant for chunked H1 (per-chunk + * size line + CRLF on the wire) and H2 (one DATA frame per call). */ + const unsigned char *const in = (const unsigned char *)ZSTR_VAL(chunk); + const size_t in_len = ZSTR_LEN(chunk); + smart_str out = {0}; + /* Pre-size: gzipped text is typically <50% of source. The estimate + * reduces realloc churn on the common case; finish() is not called + * here (mark_ended drains it) so 0-bytes-produced is also valid. */ + smart_str_alloc(&out, in_len + 32, 0); + + size_t fed = 0; + while (fed < in_len) { + size_t avail = out.a - ZSTR_LEN(out.s); + if (UNEXPECTED(avail < 64)) { + smart_str_alloc(&out, 4096, 0); + avail = out.a - ZSTR_LEN(out.s); + } + size_t consumed = 0, produced = 0; + const http_encoder_status_t s = w->encoder->vt->write(w->encoder, + in + fed, in_len - fed, &consumed, + ZSTR_VAL(out.s) + ZSTR_LEN(out.s), avail, &produced); + ZSTR_LEN(out.s) += produced; + fed += consumed; + if (UNEXPECTED(s == HTTP_ENC_ERROR)) { + smart_str_free(&out); + zend_string_release(chunk); + return HTTP_STREAM_APPEND_STREAM_DEAD; + } + /* HTTP_ENC_OK with all input consumed → loop exits. */ + } + zend_string_release(chunk); + + if (out.s == NULL || ZSTR_LEN(out.s) == 0) { + /* Encoder had nothing to flush yet (deflate buffers internally). */ + smart_str_free(&out); + return HTTP_STREAM_APPEND_OK; + } + smart_str_0(&out); + return forward_compressed(w, out.s); /* transfers ownership */ +} + +static void ws_mark_ended(void *ctx_opaque) +{ + ws_ctx_t *const w = (ws_ctx_t *)ctx_opaque; + /* Drain finish() into the underlying stream. If the handler never + * sent a single byte we still need to commit headers, encode the + * empty stream's footer (10-byte gzip header + CRC trailer), and + * tell the underlying side to terminate. */ + if (UNEXPECTED(!w->first_chunk_done)) { + mutate_headers_for_codec( + http_response_get_headers(w->response_obj), HTTP_CODEC_GZIP); + w->first_chunk_done = true; + } + + /* Same accumulator pattern as append_chunk: build the trailer (and + * any deflate-buffered bytes finish() emits) into one zend_string + * and ship as a single underlying chunk. */ + smart_str out = {0}; + smart_str_alloc(&out, 64, 0); + for (;;) { + size_t avail = out.a - ZSTR_LEN(out.s); + if (UNEXPECTED(avail < 32)) { + smart_str_alloc(&out, 4096, 0); + avail = out.a - ZSTR_LEN(out.s); + } + size_t produced = 0; + const http_encoder_status_t s = w->encoder->vt->finish( + w->encoder, ZSTR_VAL(out.s) + ZSTR_LEN(out.s), avail, &produced); + ZSTR_LEN(out.s) += produced; + if (EXPECTED(s == HTTP_ENC_DONE)) break; + if (s == HTTP_ENC_NEED_OUTPUT) continue; + break; /* error — fall through, still close the underlying stream */ + } + if (out.s != NULL && ZSTR_LEN(out.s) > 0) { + smart_str_0(&out); + (void)forward_compressed(w, out.s); /* transfers ownership */ + } else { + smart_str_free(&out); + } + w->underlying_ops->mark_ended(w->underlying_ctx); +} + +static zend_async_event_t *ws_get_wait_event(void *ctx_opaque) +{ + ws_ctx_t *const w = (ws_ctx_t *)ctx_opaque; + return w->underlying_ops->get_wait_event(w->underlying_ctx); +} + +static const http_response_stream_ops_t compressing_stream_ops = { + .append_chunk = ws_append_chunk, + .mark_ended = ws_mark_ended, + .get_wait_event = ws_get_wait_event, +}; + +void http_compression_maybe_install_stream_wrapper(zend_object *response_obj) +{ + http_compression_state_t *st = state_of(response_obj); + if (st == NULL || st->wrapper_installed) return; + + /* Streaming path: size unknown → pass 0 to skip the threshold check. */ + if (decide(st, response_obj, 0) != HTTP_CODEC_GZIP) { + return; + } + + const http_encoder_vtable_t *vt = http_compression_lookup(HTTP_CODEC_GZIP); + if (vt == NULL) return; + http_encoder_t *enc = vt->create((int)st->cfg->compression_level); + if (enc == NULL) return; + + const http_response_stream_ops_t *under_ops = + http_response_get_stream_ops(response_obj); + void *under_ctx = http_response_get_stream_ctx(response_obj); + if (under_ops == NULL) { + /* No underlying ops to wrap — abort cleanly. */ + vt->destroy(enc); + return; + } + + ws_ctx_t *w = ecalloc(1, sizeof(*w)); + w->response_obj = response_obj; + w->underlying_ops = under_ops; + w->underlying_ctx = under_ctx; + w->encoder = enc; + w->first_chunk_done = false; + + http_response_replace_stream_ops(response_obj, &compressing_stream_ops, w); + + /* Stash on state for cleanup; encoder destroy on object teardown. */ + st->encoder = enc; + st->underlying_ops = under_ops; + st->underlying_ctx = under_ctx; + st->wrapper_ctx = w; + st->wrapper_installed = true; +} + +#else /* HAVE_HTTP_COMPRESSION not defined: provide stubs so callers compile. */ + +#include +struct _zend_object; +struct http_request_t; +struct _http_server_config_t; + +void http_compression_attach(struct _zend_object *o, struct http_request_t *r, + struct _http_server_config_t *c) +{ (void)o; (void)r; (void)c; } +void http_compression_state_free(struct _zend_object *o) { (void)o; } +void http_compression_mark_no_compression(struct _zend_object *o) { (void)o; } +void http_compression_apply_buffered(struct _zend_object *o) { (void)o; } +void http_compression_maybe_install_stream_wrapper(struct _zend_object *o) { (void)o; } + +#endif /* HAVE_HTTP_COMPRESSION */ diff --git a/src/core/http_connection.c b/src/core/http_connection.c index 1179868..5b666bc 100644 --- a/src/core/http_connection.c +++ b/src/core/http_connection.c @@ -183,6 +183,7 @@ http_connection_t *http_connection_create(const php_socket_t socket_fd, zend_asy conn->counters = &http_server_counters_dummy; conn->view = &http_server_view_default; conn->log_state = &http_log_state_default; + conn->config = NULL; /* bound by http_server_bind_connection */ return conn; } @@ -1581,6 +1582,16 @@ static void http_connection_dispatch_request(http_connection_t *conn, http_reque http_response_install_stream_ops(Z_OBJ(ctx->response_zv), &h1_stream_ops, ctx); +#ifdef HAVE_HTTP_COMPRESSION + /* Attach compression state (issue #8). conn->config is cached at + * bind time so this hot path is a single load + null-check. */ + if (conn->config != NULL) { + extern void http_compression_attach(zend_object *, + http_request_t *, http_server_config_t *); + http_compression_attach(Z_OBJ(ctx->response_zv), req, conn->config); + } +#endif + conn->state = CONN_STATE_PROCESSING; zend_coroutine_t *coroutine = ZEND_ASYNC_NEW_COROUTINE(conn->scope); @@ -1639,6 +1650,26 @@ static void http_handler_coroutine_entry(void) req->start_ns = zend_hrtime(); } +#ifdef HAVE_HTTP_COMPRESSION + /* Request body decode (Content-Encoding: gzip in). Failures emit + * a canned error response and skip the handler. */ + if (req != NULL) { + extern int http_compression_decode_request_body( + http_request_t *, http_server_config_t *); + extern void http_response_set_error(zend_object *, int, const char *); + int dec = http_compression_decode_request_body(req, conn->config); + if (dec != 0) { + http_response_set_error(Z_OBJ(ctx->response_zv), dec, + dec == 415 ? "Unsupported Content-Encoding" : + dec == 413 ? "Payload Too Large after decompression" : + "Malformed compressed request body"); + http_server_count_request(conn->counters); + if (req && stamps) req->end_ns = zend_hrtime(); + return; /* Skip handler call; dispose emits the response. */ + } + } +#endif + zval params[2], retval; ZVAL_COPY_VALUE(¶ms[0], &ctx->request_zv); ZVAL_COPY_VALUE(¶ms[1], &ctx->response_zv); diff --git a/src/core/http_connection.h b/src/core/http_connection.h index a1dbc42..732d8bf 100644 --- a/src/core/http_connection.h +++ b/src/core/http_connection.h @@ -88,6 +88,12 @@ struct _http_connection_t { http_server_counters_t *counters; const http_server_view_t *view; + /* Live config pointer, cached at bind time. Same NULL-safety + * discipline as counters/view: re-pointed to NULL at server free, + * callers null-check before reading non-snapshot fields (HashTables, + * MIME whitelist) that don't fit into the lean view slice. */ + http_server_config_t *config; + /* Sink for log emits originating from this connection. Set at * create time to &server->log_state (or &http_log_state_default * when unsupervised). http_server_free re-points back to default diff --git a/src/http2/http2_strategy.c b/src/http2/http2_strategy.c index e1df150..e7133af 100644 --- a/src/http2/http2_strategy.c +++ b/src/http2/http2_strategy.c @@ -132,6 +132,17 @@ static void http2_strategy_dispatch(struct http_request_t *const request, http_response_install_stream_ops(Z_OBJ(stream->response_zv), &h2_stream_ops, stream); +#ifdef HAVE_HTTP_COMPRESSION + /* Attach compression state (issue #8). Mirror of the H1 dispatch + * hook — uses conn->config cached at bind time. */ + if (self->conn->config != NULL) { + extern void http_compression_attach(zend_object *, + http_request_t *, http_server_config_t *); + http_compression_attach(Z_OBJ(stream->response_zv), + stream->request, self->conn->config); + } +#endif + /* Spawn the handler coroutine. extended_data is the STREAM, not * the connection — that's what makes multiplex safe: N * coroutines hold N distinct stream pointers, each pointing at @@ -204,6 +215,28 @@ static void http2_handler_coroutine_entry(void) stream->request->start_ns = zend_hrtime(); } +#ifdef HAVE_HTTP_COMPRESSION + /* Inbound Content-Encoding decode (issue #8). Mirror of the H1 + * handler-entry hook — produces a canned error response and skips + * the user handler when decoding fails. */ + if (stream->request != NULL) { + extern int http_compression_decode_request_body( + http_request_t *, http_server_config_t *); + extern void http_response_set_error(zend_object *, int, const char *); + int dec = http_compression_decode_request_body( + stream->request, conn->config); + if (dec != 0) { + http_response_set_error(Z_OBJ(stream->response_zv), dec, + dec == 415 ? "Unsupported Content-Encoding" : + dec == 413 ? "Payload Too Large after decompression" : + "Malformed compressed request body"); + http_server_count_request(conn->counters); + if (stamps) stream->request->end_ns = zend_hrtime(); + return; + } + } +#endif + zval params[2], retval; ZVAL_COPY_VALUE(¶ms[0], &stream->request_zv); ZVAL_COPY_VALUE(¶ms[1], &stream->response_zv); @@ -524,6 +557,17 @@ static bool http2_commit_stream_response(http_connection_t *const conn, return false; } +#ifdef HAVE_HTTP_COMPRESSION + /* H2 reads body via http_response_get_body() directly rather than + * http_response_format[/_parts], so the buffered apply hook lives + * here too — must run before the headers flatten so the mutated + * Content-Encoding/Vary ride the HEADERS frame. */ + { + extern void http_compression_apply_buffered(zend_object *); + http_compression_apply_buffered(response_obj); + } +#endif + /* Advertise H3 endpoint to H2 clients via Alt-Svc. * Same hook as H1; no-op when handler already set the header or * no H3 listener exists. Injected before the header-flatten so diff --git a/src/http3/http3_callbacks.c b/src/http3/http3_callbacks.c index 7fc4c9a..c3d4833 100644 --- a/src/http3/http3_callbacks.c +++ b/src/http3/http3_callbacks.c @@ -489,6 +489,20 @@ bool http3_stream_submit_response(http3_connection_t *c, zend_object *resp_obj = Z_OBJ(s->response_zv); +#ifdef HAVE_HTTP_COMPRESSION + /* H3 reads body via http_response_get_body_str() directly rather + * than http_response_format[/_parts], so the buffered apply hook + * runs here too — must precede the headers-flatten loop so the + * mutated Content-Encoding/Vary land in the HEADERS frame. The + * streaming path (`streaming==true`) is handled by the stream + * wrapper installed at first send(); the apply call is a cheap + * no-op there. */ + { + extern void http_compression_apply_buffered(zend_object *); + http_compression_apply_buffered(resp_obj); + } +#endif + /* :status must come first per RFC 9114 §4.3.1. Stringified into a * fixed scratch buffer so its lifetime matches the submit call. */ char status_buf[8]; diff --git a/src/http3/http3_dispatch.c b/src/http3/http3_dispatch.c index 9afa04b..46e12a8 100644 --- a/src/http3/http3_dispatch.c +++ b/src/http3/http3_dispatch.c @@ -106,6 +106,23 @@ void http3_stream_dispatch(http3_connection_t *c, http3_stream_t *s) http_response_install_stream_ops(Z_OBJ(s->response_zv), &h3_stream_ops, s); +#ifdef HAVE_HTTP_COMPRESSION + /* Attach compression state (issue #8). Server pointer comes from + * the listener — same pattern that http3_handler_coroutine uses + * for the request-sample bookkeeping. */ + { + extern void http_compression_attach(zend_object *, + http_request_t *, http_server_config_t *); + http_server_object *srv = + (http_server_object *)http3_listener_server_obj(c->listener); + http_server_config_t *cfg = http_server_get_config(srv); + if (cfg != NULL) { + http_compression_attach(Z_OBJ(s->response_zv), + s->request, cfg); + } + } +#endif + /* Spawn the per-stream handler coroutine. extended_data is the * STREAM (not the connection) — that's how N concurrent streams on * the same QUIC connection get N independent (request, response) @@ -157,6 +174,27 @@ static void h3_handler_coroutine_entry(void) } if (fcall == NULL) return; +#ifdef HAVE_HTTP_COMPRESSION + /* Inbound Content-Encoding decode (issue #8). Same shape as the + * H1/H2 handler entries. */ + if (s->request != NULL) { + extern int http_compression_decode_request_body( + http_request_t *, http_server_config_t *); + extern void http_response_set_error(zend_object *, int, const char *); + http_server_config_t *cfg = http_server_get_config(server); + int dec = http_compression_decode_request_body(s->request, cfg); + if (dec != 0) { + http_response_set_error(Z_OBJ(s->response_zv), dec, + dec == 415 ? "Unsupported Content-Encoding" : + dec == 413 ? "Payload Too Large after decompression" : + "Malformed compressed request body"); + http_server_count_request(s->conn->counters); + if (s->request != NULL && stamps) s->request->end_ns = zend_hrtime(); + return; + } + } +#endif + zval params[2], retval; ZVAL_COPY_VALUE(¶ms[0], &s->request_zv); ZVAL_COPY_VALUE(¶ms[1], &s->response_zv); diff --git a/src/http_response.c b/src/http_response.c index 89ab627..4019f62 100644 --- a/src/http_response.c +++ b/src/http_response.c @@ -94,6 +94,13 @@ typedef struct { bool committed; /* Response fully prepared for sending? */ bool streaming; /* send() has been called — setBody/setHeader now throw */ + /* Compression module state (issue #8). Opaque ptr — owned by the + * compression TU; allocated by http_compression_attach at dispatch + * and freed by http_compression_state_free at object dtor. NULL + * when compression is disabled or the response was created + * standalone (no dispatch). */ + void *compression_state; + zend_object std; } http_response_object; @@ -849,6 +856,16 @@ ZEND_METHOD(TrueAsync_HttpResponse, send) response->streaming = true; response->committed = true; response->headers_sent = true; +#ifdef HAVE_HTTP_COMPRESSION + /* Wrap stream_ops with a compressing one if Accept-Encoding + + * response state allow gzip. Mutates Content-Encoding/Vary on + * the response so the stream's underlying header-commit picks + * them up on the next line. */ + { + extern void http_compression_maybe_install_stream_wrapper(zend_object *); + http_compression_maybe_install_stream_wrapper(Z_OBJ_P(ZEND_THIS)); + } +#endif } /* Hand ownership of the chunk to the queue — the ops layer @@ -920,6 +937,23 @@ ZEND_METHOD(TrueAsync_HttpResponse, end) } /* }}} */ +/* {{{ proto HttpResponse::setNoCompression(): static + * + * Mark the response as ineligible for compression — overrides every + * other rule. Use on responses that mix secrets with reflected user + * input (BREACH mitigation), pre-compressed payloads, or anything the + * server should not wrap in Content-Encoding. Idempotent. */ +ZEND_METHOD(TrueAsync_HttpResponse, setNoCompression) +{ + ZEND_PARSE_PARAMETERS_NONE(); +#ifdef HAVE_HTTP_COMPRESSION + extern void http_compression_mark_no_compression(zend_object *obj); + http_compression_mark_no_compression(Z_OBJ_P(ZEND_THIS)); +#endif + RETURN_OBJ_COPY(Z_OBJ_P(ZEND_THIS)); +} +/* }}} */ + /* {{{ proto HttpResponse::isHeadersSent(): bool */ ZEND_METHOD(TrueAsync_HttpResponse, isHeadersSent) { @@ -953,6 +987,7 @@ static zend_object *http_response_create(zend_class_entry *ce) response->streaming = false; response->stream_ops = NULL; response->stream_ctx = NULL; + response->compression_state = NULL; response->socket_fd = SOCK_ERR; memset(&response->body, 0, sizeof(smart_str)); @@ -995,9 +1030,78 @@ static void http_response_free(zend_object *obj) smart_str_free(&response->body); +#ifdef HAVE_HTTP_COMPRESSION + /* Compression state is owned by the compression TU; reach in only + * via the dedicated free helper — keeps the response struct opaque + * to that side. NULL-safe. */ + { + extern void http_compression_state_free(zend_object *obj); + http_compression_state_free(obj); + } +#endif + zend_object_std_dtor(&response->std); } +/* ============================================================ + * Accessors used by the compression module (issue #8). Kept here + * so http_response_object stays private to this TU. + * ============================================================ */ + +void *http_response_get_compression_slot(zend_object *obj) +{ + return http_response_from_obj(obj)->compression_state; +} + +void http_response_set_compression_slot(zend_object *obj, void *p) +{ + http_response_from_obj(obj)->compression_state = p; +} + +const http_response_stream_ops_t *http_response_get_stream_ops(zend_object *obj) +{ + return http_response_from_obj(obj)->stream_ops; +} + +void *http_response_get_stream_ctx(zend_object *obj) +{ + return http_response_from_obj(obj)->stream_ctx; +} + +void http_response_replace_stream_ops(zend_object *obj, + const http_response_stream_ops_t *ops, + void *ctx) +{ + http_response_object *r = http_response_from_obj(obj); + r->stream_ops = ops; + r->stream_ctx = ctx; +} + +smart_str *http_response_get_body_smart_str(zend_object *obj) +{ + return &http_response_from_obj(obj)->body; +} + +/* Internal "set canned error" — used when the dispatch layer rejects + * a request before the handler runs (e.g. Content-Encoding decode + * failure). Bypasses the PHP-facing setHeader/setStatusCode guards + * because nothing has been committed yet. The dispose path emits it + * exactly like a handler-built response. + * + * zend_hash_str_update avoids the per-call key zend_string allocation + * a normal setHeader would perform; only the value is reified. */ +void http_response_set_error(zend_object *obj, int status, const char *message) +{ + http_response_object *r = http_response_from_obj(obj); + r->status_code = status; + zval ct_z; + ZVAL_STR(&ct_z, zend_string_init("text/plain; charset=utf-8", 25, 0)); + zend_hash_str_update(r->headers, "content-type", 12, &ct_z); + smart_str_free(&r->body); + smart_str_appends(&r->body, message); + smart_str_0(&r->body); +} + /* {{{ http_response_class_register */ void http_response_class_register(void) { @@ -1150,6 +1254,13 @@ void http_response_format_parts(zend_object *obj, http_response_object *response = http_response_from_obj(obj); smart_str result = {0}; +#ifdef HAVE_HTTP_COMPRESSION + { + extern void http_compression_apply_buffered(zend_object *); + http_compression_apply_buffered(obj); + } +#endif + smart_str_0(&response->body); const size_t body_len = response->body.s ? ZSTR_LEN(response->body.s) : 0; @@ -1175,6 +1286,13 @@ zend_string *http_response_format(zend_object *obj) http_response_object *response = http_response_from_obj(obj); smart_str result = {0}; +#ifdef HAVE_HTTP_COMPRESSION + { + extern void http_compression_apply_buffered(zend_object *); + http_compression_apply_buffered(obj); + } +#endif + smart_str_0(&response->body); const size_t body_len = response->body.s ? ZSTR_LEN(response->body.s) : 0; diff --git a/src/http_server_class.c b/src/http_server_class.c index 5b9fc86..51b3d02 100644 --- a/src/http_server_class.c +++ b/src/http_server_class.c @@ -743,6 +743,13 @@ zend_async_scope_t *http_server_get_scope(http_server_object *server) return server != NULL ? server->server_scope : NULL; } +http_server_config_t *http_server_get_config(http_server_object *server) +{ + if (server == NULL) return NULL; + if (Z_TYPE(server->config) != IS_OBJECT) return NULL; + return http_server_config_from_obj(Z_OBJ(server->config)); +} + http_log_state_t *http_server_get_log_state(http_server_object *server) { return server != NULL ? &server->log_state : &http_log_state_default; @@ -864,6 +871,14 @@ void http_server_bind_connection(http_server_object *server, real->counters = &server->counters; real->view = &server->view; real->log_state = &server->log_state; + /* Live config pointer for hot-path readers (compression, future + * inline accessors). Same NULL-on-server-free discipline as the + * other cached pointers. */ + if (Z_TYPE(server->config) == IS_OBJECT) { + real->config = http_server_config_from_obj(Z_OBJ(server->config)); + } else { + real->config = NULL; + } } /* Connection close hook. Drives active_connections-- and the hysteresis diff --git a/src/http_server_config.c b/src/http_server_config.c index f4b490b..0e31ccc 100644 --- a/src/http_server_config.c +++ b/src/http_server_config.c @@ -19,6 +19,9 @@ #include "php_streams.h" /* php_stream_from_zval_no_verify */ #include "php_http_server.h" #include "log/http_log.h" /* http_log_severity_ce */ +#ifdef HAVE_HTTP_COMPRESSION +#include "compression/http_compression_defaults.h" +#endif #include #include @@ -73,6 +76,17 @@ struct _http_server_shared_config_t { uint32_t http3_max_concurrent_streams; uint32_t http3_peer_connection_budget; + /* Compression — see http_server_config_t for semantics. The MIME + * whitelist is deep-copied to a persistent zend_string array so + * cross-thread LOAD can rebuild it without touching the source + * thread's HashTable. */ + bool compression_enabled; + uint8_t compression_level; + size_t compression_min_size; + size_t request_max_decompressed_size; + zend_string **compression_mime_types; /* persistent strings */ + size_t compression_mime_count; + bool http2_enabled; bool websocket_enabled; bool protocol_detection_enabled; @@ -137,6 +151,56 @@ static void http_server_config_populate_from_shared( #define DEFAULT_HTTP3_PEER_BUDGET 16u #define HTTP3_PEER_BUDGET_MAX 4096u +#ifdef HAVE_HTTP_COMPRESSION +/* Compression knob defaults are sourced from + * include/compression/http_compression_defaults.h so policy and the + * HttpServerConfig setters share a single source of truth. */ + +/* Strip MIME parameters (`; charset=utf-8`), trim, lowercase. Returns + * an emalloc'd zend_string. NULL on empty/blank input — callers reject + * such entries. Done once at setter time so the per-request match path + * does no normalisation work. */ +static zend_string *http_compression_normalize_mime(const char *src, size_t len) +{ + while (len > 0 && (src[0] == ' ' || src[0] == '\t')) { src++; len--; } + size_t end = len; + for (size_t i = 0; i < len; i++) { + if (src[i] == ';') { end = i; break; } + } + while (end > 0 && (src[end - 1] == ' ' || src[end - 1] == '\t')) end--; + if (end == 0) return NULL; + + zend_string *out = zend_string_alloc(end, 0); + for (size_t i = 0; i < end; i++) { + char c = src[i]; + if (c >= 'A' && c <= 'Z') c = (char)(c - 'A' + 'a'); + ZSTR_VAL(out)[i] = c; + } + ZSTR_VAL(out)[end] = '\0'; + return out; +} + +/* Initialise an empty-but-allocated mime-types HashTable on the config. + * Set semantics: keys are lowercase mime strings, values are dummy + * IS_TRUE zvals so zend_hash_str_exists is the only lookup. */ +static void http_compression_mime_table_init(HashTable **dst) +{ + ALLOC_HASHTABLE(*dst); + zend_hash_init(*dst, 16, NULL, ZVAL_PTR_DTOR, 0); +} + +/* Populate dst from a NULL-terminated default whitelist. Idempotent: + * adding a key already present is a no-op for set semantics. */ +static void http_compression_mime_table_load_defaults(HashTable *dst) +{ + for (const char *const *p = http_compression_default_mime_types; *p != NULL; p++) { + zval one; + ZVAL_TRUE(&one); + zend_hash_str_update(dst, *p, strlen(*p), &one); + } +} +#endif /* HAVE_HTTP_COMPRESSION */ + /* Class entry */ zend_class_entry *http_server_config_ce; static zend_object_handlers http_server_config_handlers; @@ -1122,6 +1186,218 @@ ZEND_METHOD(TrueAsync_HttpServerConfig, isHttp3AltSvcEnabled) RETURN_BOOL(config->http3_alt_svc_enabled); } +/* ========================================================================== + * HTTP body compression knobs (issue #8). Editable until the config is + * locked — see HttpServer::__construct. The MIME whitelist setter + * REPLACES the list wholesale (nginx semantics): pass the full set of + * eligible types, not a delta against the defaults. + * ========================================================================== + */ + +/* {{{ proto HttpServerConfig::setCompressionEnabled(bool $enable): static */ +ZEND_METHOD(TrueAsync_HttpServerConfig, setCompressionEnabled) +{ + bool enable; + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_BOOL(enable) + ZEND_PARSE_PARAMETERS_END(); + + http_server_config_t *config = Z_HTTP_SERVER_CONFIG_P(ZEND_THIS); + if (config_check_locked(config)) return; + +#ifdef HAVE_HTTP_COMPRESSION + config->compression_enabled = enable; +#else + if (enable) { + zend_throw_exception(http_server_invalid_argument_exception_ce, + "HTTP body compression is not built into this extension " + "(rebuild with --enable-http-compression)", 0); + return; + } +#endif + RETURN_OBJ_COPY(Z_OBJ_P(ZEND_THIS)); +} +/* }}} */ + +ZEND_METHOD(TrueAsync_HttpServerConfig, isCompressionEnabled) +{ + ZEND_PARSE_PARAMETERS_NONE(); + http_server_config_t *config = Z_HTTP_SERVER_CONFIG_P(ZEND_THIS); + RETURN_BOOL(config->compression_enabled); +} + +/* {{{ proto HttpServerConfig::setCompressionLevel(int $level): static */ +ZEND_METHOD(TrueAsync_HttpServerConfig, setCompressionLevel) +{ + zend_long level; + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_LONG(level) + ZEND_PARSE_PARAMETERS_END(); + + http_server_config_t *config = Z_HTTP_SERVER_CONFIG_P(ZEND_THIS); + if (config_check_locked(config)) return; + +#ifdef HAVE_HTTP_COMPRESSION + if (level < HTTP_COMPRESSION_LEVEL_MIN || level > HTTP_COMPRESSION_LEVEL_MAX) { + zend_throw_exception_ex(http_server_invalid_argument_exception_ce, 0, + "Compression level must be between %d and %d", + HTTP_COMPRESSION_LEVEL_MIN, HTTP_COMPRESSION_LEVEL_MAX); + return; + } + config->compression_level = (uint8_t)level; +#else + (void)level; +#endif + RETURN_OBJ_COPY(Z_OBJ_P(ZEND_THIS)); +} +/* }}} */ + +ZEND_METHOD(TrueAsync_HttpServerConfig, getCompressionLevel) +{ + ZEND_PARSE_PARAMETERS_NONE(); + http_server_config_t *config = Z_HTTP_SERVER_CONFIG_P(ZEND_THIS); + RETURN_LONG(config->compression_level); +} + +/* {{{ proto HttpServerConfig::setCompressionMinSize(int $bytes): static */ +ZEND_METHOD(TrueAsync_HttpServerConfig, setCompressionMinSize) +{ + zend_long bytes; + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_LONG(bytes) + ZEND_PARSE_PARAMETERS_END(); + + http_server_config_t *config = Z_HTTP_SERVER_CONFIG_P(ZEND_THIS); + if (config_check_locked(config)) return; + +#ifdef HAVE_HTTP_COMPRESSION + if (bytes < 0 || (zend_ulong)bytes > HTTP_COMPRESSION_MIN_SIZE_MAX) { + zend_throw_exception_ex(http_server_invalid_argument_exception_ce, 0, + "Compression min-size must be in [0, %u]", + (unsigned)HTTP_COMPRESSION_MIN_SIZE_MAX); + return; + } + config->compression_min_size = (size_t)bytes; +#else + (void)bytes; +#endif + RETURN_OBJ_COPY(Z_OBJ_P(ZEND_THIS)); +} +/* }}} */ + +ZEND_METHOD(TrueAsync_HttpServerConfig, getCompressionMinSize) +{ + ZEND_PARSE_PARAMETERS_NONE(); + http_server_config_t *config = Z_HTTP_SERVER_CONFIG_P(ZEND_THIS); + RETURN_LONG((zend_long)config->compression_min_size); +} + +/* {{{ proto HttpServerConfig::setCompressionMimeTypes(array $types): static */ +ZEND_METHOD(TrueAsync_HttpServerConfig, setCompressionMimeTypes) +{ + HashTable *types; + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_ARRAY_HT(types) + ZEND_PARSE_PARAMETERS_END(); + + http_server_config_t *config = Z_HTTP_SERVER_CONFIG_P(ZEND_THIS); + if (config_check_locked(config)) return; + +#ifdef HAVE_HTTP_COMPRESSION + /* Pre-validate every entry into a staging HashTable so a malformed + * element doesn't leave us with a half-replaced policy. nginx + * semantics: this REPLACES the previous list (defaults included). */ + HashTable staged; + zend_hash_init(&staged, zend_hash_num_elements(types) + 1, NULL, ZVAL_PTR_DTOR, 0); + + zval *entry; + ZEND_HASH_FOREACH_VAL(types, entry) { + if (Z_TYPE_P(entry) != IS_STRING) { + zend_hash_destroy(&staged); + zend_throw_exception(http_server_invalid_argument_exception_ce, + "Compression MIME types must be strings", 0); + return; + } + zend_string *norm = http_compression_normalize_mime( + Z_STRVAL_P(entry), Z_STRLEN_P(entry)); + if (norm == NULL) { + zend_hash_destroy(&staged); + zend_throw_exception(http_server_invalid_argument_exception_ce, + "Compression MIME type is empty after stripping parameters", 0); + return; + } + zval one; + ZVAL_TRUE(&one); + zend_hash_update(&staged, norm, &one); + zend_string_release(norm); + } ZEND_HASH_FOREACH_END(); + + zend_hash_destroy(config->compression_mime_types); + /* Re-init in place — preserves the existing pointer that snapshots + * may already reference internally during config-lock. */ + zend_hash_init(config->compression_mime_types, zend_hash_num_elements(&staged) + 1, + NULL, ZVAL_PTR_DTOR, 0); + zend_string *key; + ZEND_HASH_FOREACH_STR_KEY(&staged, key) { + if (key) { + zval one; + ZVAL_TRUE(&one); + zend_hash_update(config->compression_mime_types, key, &one); + } + } ZEND_HASH_FOREACH_END(); + zend_hash_destroy(&staged); +#else + (void)types; +#endif + RETURN_OBJ_COPY(Z_OBJ_P(ZEND_THIS)); +} +/* }}} */ + +ZEND_METHOD(TrueAsync_HttpServerConfig, getCompressionMimeTypes) +{ + ZEND_PARSE_PARAMETERS_NONE(); + http_server_config_t *config = Z_HTTP_SERVER_CONFIG_P(ZEND_THIS); + + array_init(return_value); + if (config->compression_mime_types) { + zend_string *key; + ZEND_HASH_FOREACH_STR_KEY(config->compression_mime_types, key) { + if (key) { + add_next_index_str(return_value, zend_string_copy(key)); + } + } ZEND_HASH_FOREACH_END(); + } +} + +/* {{{ proto HttpServerConfig::setRequestMaxDecompressedSize(int $bytes): static */ +ZEND_METHOD(TrueAsync_HttpServerConfig, setRequestMaxDecompressedSize) +{ + zend_long bytes; + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_LONG(bytes) + ZEND_PARSE_PARAMETERS_END(); + + http_server_config_t *config = Z_HTTP_SERVER_CONFIG_P(ZEND_THIS); + if (config_check_locked(config)) return; + + if (bytes < 0) { + zend_throw_exception(http_server_invalid_argument_exception_ce, + "Max decompressed request body size must be >= 0 " + "(0 = no cap, must be explicit)", 0); + return; + } + config->request_max_decompressed_size = (size_t)bytes; + RETURN_OBJ_COPY(Z_OBJ_P(ZEND_THIS)); +} +/* }}} */ + +ZEND_METHOD(TrueAsync_HttpServerConfig, getRequestMaxDecompressedSize) +{ + ZEND_PARSE_PARAMETERS_NONE(); + http_server_config_t *config = Z_HTTP_SERVER_CONFIG_P(ZEND_THIS); + RETURN_LONG((zend_long)config->request_max_decompressed_size); +} + /* {{{ proto HttpServerConfig::setWriteBufferSize(int $size): static */ ZEND_METHOD(TrueAsync_HttpServerConfig, setWriteBufferSize) { @@ -1599,6 +1875,21 @@ static zend_object *http_server_config_create(zend_class_entry *ce) config->telemetry_enabled = false; config->frozen = NULL; +#ifdef HAVE_HTTP_COMPRESSION + config->compression_enabled = true; + config->compression_level = HTTP_COMPRESSION_DEFAULT_LEVEL; + config->compression_min_size = HTTP_COMPRESSION_DEFAULT_MIN_SIZE; + config->request_max_decompressed_size = HTTP_COMPRESSION_DEFAULT_REQUEST_MAX_DECOMP; + http_compression_mime_table_init(&config->compression_mime_types); + http_compression_mime_table_load_defaults(config->compression_mime_types); +#else + config->compression_enabled = false; + config->compression_level = 0; + config->compression_min_size = 0; + config->request_max_decompressed_size = 0; + config->compression_mime_types = NULL; +#endif + zend_object_std_init(&config->std, ce); object_properties_init(&config->std, ce); config->std.handlers = &http_server_config_handlers; @@ -1638,6 +1929,12 @@ static void http_server_config_free(zend_object *obj) config->frozen = NULL; } + if (config->compression_mime_types) { + zend_hash_destroy(config->compression_mime_types); + FREE_HASHTABLE(config->compression_mime_types); + config->compression_mime_types = NULL; + } + zend_object_std_dtor(&config->std); } @@ -1686,6 +1983,27 @@ static http_server_shared_config_t *http_server_shared_config_freeze( shared->tls_enabled = src->tls_enabled; shared->auto_await_body = src->auto_await_body; + shared->compression_enabled = src->compression_enabled; + shared->compression_level = src->compression_level; + shared->compression_min_size = src->compression_min_size; + shared->request_max_decompressed_size = src->request_max_decompressed_size; + if (src->compression_mime_types && zend_hash_num_elements(src->compression_mime_types) > 0) { + size_t n = zend_hash_num_elements(src->compression_mime_types); + shared->compression_mime_types = pecalloc(n, sizeof(zend_string *), 1); + shared->compression_mime_count = n; + size_t i = 0; + zend_string *key; + ZEND_HASH_FOREACH_STR_KEY(src->compression_mime_types, key) { + if (key) { + shared->compression_mime_types[i] = zend_string_init( + ZSTR_VAL(key), ZSTR_LEN(key), 1); + GC_MAKE_PERSISTENT_LOCAL(shared->compression_mime_types[i]); + i++; + } + } ZEND_HASH_FOREACH_END(); + shared->compression_mime_count = i; + } + if (src->tls_cert_path) { shared->tls_cert_path = zend_string_init( ZSTR_VAL(src->tls_cert_path), ZSTR_LEN(src->tls_cert_path), 1); @@ -1754,6 +2072,15 @@ static void http_server_shared_config_release(http_server_shared_config_t *share zend_string_release_ex(shared->tls_key_path, 1); } + if (shared->compression_mime_types) { + for (size_t i = 0; i < shared->compression_mime_count; i++) { + if (shared->compression_mime_types[i]) { + zend_string_release_ex(shared->compression_mime_types[i], 1); + } + } + pefree(shared->compression_mime_types, 1); + } + pefree(shared, 1); } @@ -1791,6 +2118,24 @@ static void http_server_config_populate_from_shared( dst->tls_enabled = src->tls_enabled; dst->auto_await_body = src->auto_await_body; + dst->compression_enabled = src->compression_enabled; + dst->compression_level = src->compression_level; + dst->compression_min_size = src->compression_min_size; + dst->request_max_decompressed_size = src->request_max_decompressed_size; + if (dst->compression_mime_types) { + /* create_object pre-populated this with defaults; replace with the + * actual locked snapshot. */ + zend_hash_clean(dst->compression_mime_types); + for (size_t i = 0; i < src->compression_mime_count; i++) { + zval one; + ZVAL_TRUE(&one); + zend_hash_str_update(dst->compression_mime_types, + ZSTR_VAL(src->compression_mime_types[i]), + ZSTR_LEN(src->compression_mime_types[i]), + &one); + } + } + if (src->tls_cert_path) { dst->tls_cert_path = zend_string_init( ZSTR_VAL(src->tls_cert_path), ZSTR_LEN(src->tls_cert_path), 0); diff --git a/stubs/HttpResponse.php b/stubs/HttpResponse.php index 53d57df..7b94199 100644 --- a/stubs/HttpResponse.php +++ b/stubs/HttpResponse.php @@ -176,6 +176,18 @@ public function write(string $data): static {} */ public function send(string $chunk): static {} + /** + * Mark this response as ineligible for compression. Overrides every + * other rule (Accept-Encoding negotiation, MIME whitelist, size + * threshold). Use for endpoints that combine secrets with reflected + * user input (BREACH mitigation), responses already bearing a + * Content-Encoding the handler set itself, or any payload the + * server must not wrap. Idempotent. + * + * @return static + */ + public function setNoCompression(): static {} + /** * Get current body content */ diff --git a/stubs/HttpResponse.php_arginfo.h b/stubs/HttpResponse.php_arginfo.h index d8e981f..2404880 100644 --- a/stubs/HttpResponse.php_arginfo.h +++ b/stubs/HttpResponse.php_arginfo.h @@ -123,6 +123,9 @@ ZEND_METHOD(TrueAsync_HttpResponse, send); /* send() has the same (string) → static signature as write(). */ #define arginfo_class_TrueAsync_HttpResponse_send arginfo_class_TrueAsync_HttpResponse_write +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_TrueAsync_HttpResponse_setNoCompression, 0, 0, IS_STATIC, 0) +ZEND_END_ARG_INFO() +ZEND_METHOD(TrueAsync_HttpResponse, setNoCompression); ZEND_METHOD(TrueAsync_HttpResponse, getBody); ZEND_METHOD(TrueAsync_HttpResponse, setBody); ZEND_METHOD(TrueAsync_HttpResponse, getBodyStream); @@ -155,6 +158,7 @@ static const zend_function_entry class_TrueAsync_HttpResponse_methods[] = { ZEND_ME(TrueAsync_HttpResponse, getProtocolVersion, arginfo_class_TrueAsync_HttpResponse_getProtocolVersion, ZEND_ACC_PUBLIC) ZEND_ME(TrueAsync_HttpResponse, write, arginfo_class_TrueAsync_HttpResponse_write, ZEND_ACC_PUBLIC) ZEND_ME(TrueAsync_HttpResponse, send, arginfo_class_TrueAsync_HttpResponse_send, ZEND_ACC_PUBLIC) + ZEND_ME(TrueAsync_HttpResponse, setNoCompression, arginfo_class_TrueAsync_HttpResponse_setNoCompression, ZEND_ACC_PUBLIC) ZEND_ME(TrueAsync_HttpResponse, getBody, arginfo_class_TrueAsync_HttpResponse_getBody, ZEND_ACC_PUBLIC) ZEND_ME(TrueAsync_HttpResponse, setBody, arginfo_class_TrueAsync_HttpResponse_setBody, ZEND_ACC_PUBLIC) ZEND_ME(TrueAsync_HttpResponse, getBodyStream, arginfo_class_TrueAsync_HttpResponse_getBodyStream, ZEND_ACC_PUBLIC) diff --git a/stubs/HttpServerConfig.php b/stubs/HttpServerConfig.php index ae2868c..3dd1ecb 100644 --- a/stubs/HttpServerConfig.php +++ b/stubs/HttpServerConfig.php @@ -392,6 +392,89 @@ public function setHttp3AltSvcEnabled(bool $enable): static {} /** @return bool */ public function isHttp3AltSvcEnabled(): bool {} + // === HTTP body compression (issue #8) === + + /** + * Master switch for HTTP body compression. When true (default), responses + * served on H1/H2/H3 are compressed when the client advertises a + * supported encoding via Accept-Encoding and the response satisfies + * the policy filters (size, MIME, no Range, etc.). + * + * Default: true. When the extension is built without + * --enable-http-compression, only setCompressionEnabled(false) is + * accepted — passing true throws. + * + * @param bool $enable + * @return static + */ + public function setCompressionEnabled(bool $enable): static {} + + /** @return bool */ + public function isCompressionEnabled(): bool {} + + /** + * Compression level. zlib semantics: 1 = fastest/weakest, + * 9 = slowest/strongest, 6 = balanced default. + * + * Default: 6. Valid: 1..9. + * + * @param int $level + * @return static + */ + public function setCompressionLevel(int $level): static {} + + /** @return int */ + public function getCompressionLevel(): int {} + + /** + * Body-size threshold below which responses are left uncompressed + * (the encoding overhead beats any real-world win on tiny bodies). + * + * Default: 1024 (1 KiB). Valid: 0..16 MiB. + * + * @param int $bytes + * @return static + */ + public function setCompressionMinSize(int $bytes): static {} + + /** @return int */ + public function getCompressionMinSize(): int {} + + /** + * MIME-type whitelist eligible for compression. REPLACES the current + * list wholesale (nginx `gzip_types` semantics). Entries are + * normalised at setter time: parameters (`; charset=…`) stripped, + * whitespace trimmed, lowercased — so the per-request match is + * exact and zero-allocation. + * + * Default: ["application/javascript", "application/json", + * "application/xml", "image/svg+xml", "text/css", "text/html", + * "text/javascript", "text/plain", "text/xml"]. + * + * @param string[] $types + * @return static + */ + public function setCompressionMimeTypes(array $types): static {} + + /** @return string[] The materialised whitelist */ + public function getCompressionMimeTypes(): array {} + + /** + * Anti-zip-bomb cap on decoded request bodies (Content-Encoding: gzip + * inbound). Decoders abort and the request fails with 413 once the + * decompressed byte count exceeds this. 0 disables the cap entirely + * (must be set explicitly — there is no implicit "unlimited" path). + * + * Default: 10485760 (10 MiB). + * + * @param int $bytes + * @return static + */ + public function setRequestMaxDecompressedSize(int $bytes): static {} + + /** @return int */ + public function getRequestMaxDecompressedSize(): int {} + // === Buffers === /** diff --git a/stubs/HttpServerConfig.php_arginfo.h b/stubs/HttpServerConfig.php_arginfo.h index d9174a6..ccecf10 100644 --- a/stubs/HttpServerConfig.php_arginfo.h +++ b/stubs/HttpServerConfig.php_arginfo.h @@ -108,6 +108,20 @@ ZEND_END_ARG_INFO() #define arginfo_class_TrueAsync_HttpServerConfig_setHttp3AltSvcEnabled arginfo_class_TrueAsync_HttpServerConfig_enableHttp2 #define arginfo_class_TrueAsync_HttpServerConfig_isHttp3AltSvcEnabled arginfo_class_TrueAsync_HttpServerConfig_isHttp2Enabled +/* HTTP body compression (issue #8) */ +#define arginfo_class_TrueAsync_HttpServerConfig_setCompressionEnabled arginfo_class_TrueAsync_HttpServerConfig_enableHttp2 +#define arginfo_class_TrueAsync_HttpServerConfig_isCompressionEnabled arginfo_class_TrueAsync_HttpServerConfig_isHttp2Enabled +#define arginfo_class_TrueAsync_HttpServerConfig_setCompressionLevel arginfo_class_TrueAsync_HttpServerConfig_setBacklog +#define arginfo_class_TrueAsync_HttpServerConfig_getCompressionLevel arginfo_class_TrueAsync_HttpServerConfig_getBacklog +#define arginfo_class_TrueAsync_HttpServerConfig_setCompressionMinSize arginfo_class_TrueAsync_HttpServerConfig_setBacklog +#define arginfo_class_TrueAsync_HttpServerConfig_getCompressionMinSize arginfo_class_TrueAsync_HttpServerConfig_getBacklog +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_TrueAsync_HttpServerConfig_setCompressionMimeTypes, 0, 1, IS_STATIC, 0) + ZEND_ARG_TYPE_INFO(0, types, IS_ARRAY, 0) +ZEND_END_ARG_INFO() +#define arginfo_class_TrueAsync_HttpServerConfig_getCompressionMimeTypes arginfo_class_TrueAsync_HttpServerConfig_getListeners +#define arginfo_class_TrueAsync_HttpServerConfig_setRequestMaxDecompressedSize arginfo_class_TrueAsync_HttpServerConfig_setBacklog +#define arginfo_class_TrueAsync_HttpServerConfig_getRequestMaxDecompressedSize arginfo_class_TrueAsync_HttpServerConfig_getBacklog + /* setWriteBufferSize */ #define arginfo_class_TrueAsync_HttpServerConfig_setWriteBufferSize arginfo_class_TrueAsync_HttpServerConfig_setBacklog @@ -227,6 +241,16 @@ ZEND_METHOD(TrueAsync_HttpServerConfig, setHttp3PeerConnectionBudget); ZEND_METHOD(TrueAsync_HttpServerConfig, getHttp3PeerConnectionBudget); ZEND_METHOD(TrueAsync_HttpServerConfig, setHttp3AltSvcEnabled); ZEND_METHOD(TrueAsync_HttpServerConfig, isHttp3AltSvcEnabled); +ZEND_METHOD(TrueAsync_HttpServerConfig, setCompressionEnabled); +ZEND_METHOD(TrueAsync_HttpServerConfig, isCompressionEnabled); +ZEND_METHOD(TrueAsync_HttpServerConfig, setCompressionLevel); +ZEND_METHOD(TrueAsync_HttpServerConfig, getCompressionLevel); +ZEND_METHOD(TrueAsync_HttpServerConfig, setCompressionMinSize); +ZEND_METHOD(TrueAsync_HttpServerConfig, getCompressionMinSize); +ZEND_METHOD(TrueAsync_HttpServerConfig, setCompressionMimeTypes); +ZEND_METHOD(TrueAsync_HttpServerConfig, getCompressionMimeTypes); +ZEND_METHOD(TrueAsync_HttpServerConfig, setRequestMaxDecompressedSize); +ZEND_METHOD(TrueAsync_HttpServerConfig, getRequestMaxDecompressedSize); ZEND_METHOD(TrueAsync_HttpServerConfig, setWriteBufferSize); ZEND_METHOD(TrueAsync_HttpServerConfig, getWriteBufferSize); ZEND_METHOD(TrueAsync_HttpServerConfig, enableHttp2); @@ -296,6 +320,16 @@ static const zend_function_entry class_TrueAsync_HttpServerConfig_methods[] = { ZEND_ME(TrueAsync_HttpServerConfig, getHttp3PeerConnectionBudget, arginfo_class_TrueAsync_HttpServerConfig_getHttp3PeerConnectionBudget, ZEND_ACC_PUBLIC) ZEND_ME(TrueAsync_HttpServerConfig, setHttp3AltSvcEnabled, arginfo_class_TrueAsync_HttpServerConfig_setHttp3AltSvcEnabled, ZEND_ACC_PUBLIC) ZEND_ME(TrueAsync_HttpServerConfig, isHttp3AltSvcEnabled, arginfo_class_TrueAsync_HttpServerConfig_isHttp3AltSvcEnabled, ZEND_ACC_PUBLIC) + ZEND_ME(TrueAsync_HttpServerConfig, setCompressionEnabled, arginfo_class_TrueAsync_HttpServerConfig_setCompressionEnabled, ZEND_ACC_PUBLIC) + ZEND_ME(TrueAsync_HttpServerConfig, isCompressionEnabled, arginfo_class_TrueAsync_HttpServerConfig_isCompressionEnabled, ZEND_ACC_PUBLIC) + ZEND_ME(TrueAsync_HttpServerConfig, setCompressionLevel, arginfo_class_TrueAsync_HttpServerConfig_setCompressionLevel, ZEND_ACC_PUBLIC) + ZEND_ME(TrueAsync_HttpServerConfig, getCompressionLevel, arginfo_class_TrueAsync_HttpServerConfig_getCompressionLevel, ZEND_ACC_PUBLIC) + ZEND_ME(TrueAsync_HttpServerConfig, setCompressionMinSize, arginfo_class_TrueAsync_HttpServerConfig_setCompressionMinSize, ZEND_ACC_PUBLIC) + ZEND_ME(TrueAsync_HttpServerConfig, getCompressionMinSize, arginfo_class_TrueAsync_HttpServerConfig_getCompressionMinSize, ZEND_ACC_PUBLIC) + ZEND_ME(TrueAsync_HttpServerConfig, setCompressionMimeTypes, arginfo_class_TrueAsync_HttpServerConfig_setCompressionMimeTypes, ZEND_ACC_PUBLIC) + ZEND_ME(TrueAsync_HttpServerConfig, getCompressionMimeTypes, arginfo_class_TrueAsync_HttpServerConfig_getCompressionMimeTypes, ZEND_ACC_PUBLIC) + ZEND_ME(TrueAsync_HttpServerConfig, setRequestMaxDecompressedSize, arginfo_class_TrueAsync_HttpServerConfig_setRequestMaxDecompressedSize, ZEND_ACC_PUBLIC) + ZEND_ME(TrueAsync_HttpServerConfig, getRequestMaxDecompressedSize, arginfo_class_TrueAsync_HttpServerConfig_getRequestMaxDecompressedSize, ZEND_ACC_PUBLIC) ZEND_ME(TrueAsync_HttpServerConfig, setWriteBufferSize, arginfo_class_TrueAsync_HttpServerConfig_setWriteBufferSize, ZEND_ACC_PUBLIC) ZEND_ME(TrueAsync_HttpServerConfig, getWriteBufferSize, arginfo_class_TrueAsync_HttpServerConfig_getWriteBufferSize, ZEND_ACC_PUBLIC) ZEND_ME(TrueAsync_HttpServerConfig, enableHttp2, arginfo_class_TrueAsync_HttpServerConfig_enableHttp2, ZEND_ACC_PUBLIC) diff --git a/tests/phpt/server/compression/001-config-setters.phpt b/tests/phpt/server/compression/001-config-setters.phpt new file mode 100644 index 0000000..3d677fa --- /dev/null +++ b/tests/phpt/server/compression/001-config-setters.phpt @@ -0,0 +1,103 @@ +--TEST-- +HttpServerConfig: compression setter validation, defaults, locked-config guard (#8) +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + +--FILE-- +isCompressionEnabled(), "\n"; +echo "level=", $c->getCompressionLevel(), "\n"; +echo "minSize=", $c->getCompressionMinSize(), "\n"; +echo "reqMax=", $c->getRequestMaxDecompressedSize(), "\n"; + +// Default whitelist materialised so getter shows the live policy. +$mt = $c->getCompressionMimeTypes(); +sort($mt); +echo "mime[default]=", implode(",", $mt), "\n"; + +// === valid sets persist + chain === +$c->setCompressionEnabled(false) + ->setCompressionLevel(9) + ->setCompressionMinSize(2048) + ->setRequestMaxDecompressedSize(4 * 1024 * 1024); +echo "set enabled=", (int)$c->isCompressionEnabled(), "\n"; +echo "set level=", $c->getCompressionLevel(), "\n"; +echo "set min=", $c->getCompressionMinSize(), "\n"; +echo "set reqMax=", $c->getRequestMaxDecompressedSize(), "\n"; + +// === reqMax=0 is allowed (must be explicit "no cap") === +$c->setRequestMaxDecompressedSize(0); +echo "reqMax=0 accepted=", $c->getRequestMaxDecompressedSize() === 0 ? 1 : 0, "\n"; + +// === MIME setter REPLACES wholesale, normalises (lowercase, strip params, trim) === +$c->setCompressionMimeTypes([ + 'TEXT/HTML', // case-fold + ' application/json ; charset=utf-8 ', // trim + strip param + 'application/json', // dedup vs the above after normalisation +]); +$mt = $c->getCompressionMimeTypes(); +sort($mt); +echo "mime[set]=", implode(",", $mt), "\n"; + +// === negative / out-of-range rejection === +function expect_reject(callable $fn, string $label): void { + try { $fn(); echo "$label ACCEPTED\n"; } + catch (Throwable $e) { echo "$label rejected\n"; } +} +expect_reject(fn() => $c->setCompressionLevel(0), "level=0"); +expect_reject(fn() => $c->setCompressionLevel(10), "level=10"); +expect_reject(fn() => $c->setCompressionLevel(-1), "level<0"); +expect_reject(fn() => $c->setCompressionMinSize(-1), "min<0"); +expect_reject(fn() => $c->setCompressionMinSize(64*1024*1024), "min>16MiB"); +expect_reject(fn() => $c->setRequestMaxDecompressedSize(-1), "reqMax<0"); +expect_reject(fn() => $c->setCompressionMimeTypes([123]), "mime not string"); +expect_reject(fn() => $c->setCompressionMimeTypes([' ;x']), "mime empty after strip"); + +// === locked-config guard via HttpServer ctor === +$c2 = new HttpServerConfig(); +$c2->addListener('127.0.0.1', 19998); +$srv = new HttpServer($c2); +expect_reject(fn() => $c2->setCompressionEnabled(false), "enabled locked"); +expect_reject(fn() => $c2->setCompressionLevel(3), "level locked"); +expect_reject(fn() => $c2->setCompressionMinSize(8192), "min locked"); +expect_reject(fn() => $c2->setCompressionMimeTypes(['text/html']),"mime locked"); +expect_reject(fn() => $c2->setRequestMaxDecompressedSize(1024), "reqMax locked"); + +echo "Done\n"; +?> +--EXPECT-- +enabled=1 +level=6 +minSize=1024 +reqMax=10485760 +mime[default]=application/javascript,application/json,application/xml,image/svg+xml,text/css,text/html,text/javascript,text/plain,text/xml +set enabled=0 +set level=9 +set min=2048 +set reqMax=4194304 +reqMax=0 accepted=1 +mime[set]=application/json,text/html +level=0 rejected +level=10 rejected +level<0 rejected +min<0 rejected +min>16MiB rejected +reqMax<0 rejected +mime not string rejected +mime empty after strip rejected +enabled locked rejected +level locked rejected +min locked rejected +mime locked rejected +reqMax locked rejected +Done diff --git a/tests/phpt/server/compression/010-h1-buffered-gzip.phpt b/tests/phpt/server/compression/010-h1-buffered-gzip.phpt new file mode 100644 index 0000000..36e7322 --- /dev/null +++ b/tests/phpt/server/compression/010-h1-buffered-gzip.phpt @@ -0,0 +1,113 @@ +--TEST-- +Compression H1 buffered: gzip when Accept-Encoding: gzip + whitelisted MIME (#8) +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + +--FILE-- +addListener('127.0.0.1', $port) + ->setReadTimeout(5) + ->setWriteTimeout(5); + +$server = new HttpServer($config); + +$server->addHttpHandler(function ($req, $resp) { + $resp->setHeader('Content-Type', 'text/html; charset=utf-8') + ->setBody(str_repeat("Hello, gzip!\n", 200)) /* > 1024 byte threshold */ + ->end(); +}); + +/* Issue a raw H1 request with Accept-Encoding: gzip and parse the + * response head. We don't need a full HTTP client — we only need the + * status line, headers, and the gzipped body. */ +function fetch(string $host, int $port, string $accept_encoding): array { + $fp = stream_socket_client("tcp://$host:$port", $errno, $errstr, 2); + stream_set_timeout($fp, 2); + fwrite($fp, + "GET / HTTP/1.1\r\n" + . "Host: x\r\n" + . ($accept_encoding !== '' ? "Accept-Encoding: $accept_encoding\r\n" : '') + . "Connection: close\r\n\r\n"); + $raw = ''; + while (!feof($fp)) { + $c = fread($fp, 8192); + if ($c === '' || $c === false) break; + $raw .= $c; + } + fclose($fp); + [$head, $body] = explode("\r\n\r\n", $raw, 2); + $lines = explode("\r\n", $head); + $status = array_shift($lines); + $headers = []; + foreach ($lines as $l) { + if (strpos($l, ':') === false) continue; + [$k, $v] = explode(':', $l, 2); + $headers[strtolower(trim($k))] = trim($v); + } + return [$status, $headers, $body]; +} + +$client = spawn(function () use ($port, $server) { + delay(20); + + /* 1. Accept-Encoding: gzip → compressed */ + [$status, $h, $body] = fetch('127.0.0.1', $port, 'gzip'); + echo "case A status: $status\n"; + echo "case A content-encoding: ", $h['content-encoding'] ?? '', "\n"; + echo "case A vary: ", $h['vary'] ?? '', "\n"; + echo "case A is-gzip-magic: ", (substr($body, 0, 2) === "\x1f\x8b") ? 1 : 0, "\n"; + /* Pipe the gzipped body through gunzip(1) — no ext/zlib needed. */ + $proc = proc_open(['gunzip'], [ + 0 => ['pipe', 'r'], + 1 => ['pipe', 'w'], + 2 => ['pipe', 'w'], + ], $pipes); + fwrite($pipes[0], $body); + fclose($pipes[0]); + $decoded = stream_get_contents($pipes[1]); + fclose($pipes[1]); fclose($pipes[2]); + proc_close($proc); + echo "case A round-trip: ", ($decoded === str_repeat("Hello, gzip!\n", 200)) ? "ok" : "MISMATCH", "\n"; + + /* 2. No Accept-Encoding (default header semantics: identity only) */ + [, $h, $body] = fetch('127.0.0.1', $port, ''); + echo "case B content-encoding: ", $h['content-encoding'] ?? '', "\n"; + echo "case B body-len: ", strlen($body), "\n"; + + /* 3. Accept-Encoding: gzip;q=0 → identity */ + [, $h, $body] = fetch('127.0.0.1', $port, 'gzip;q=0'); + echo "case C content-encoding: ", $h['content-encoding'] ?? '', "\n"; + + delay(50); + $server->stop(); +}); + +$server->start(); +await($client); + +echo "Done\n"; +?> +--EXPECT-- +case A status: HTTP/1.1 200 OK +case A content-encoding: gzip +case A vary: Accept-Encoding +case A is-gzip-magic: 1 +case A round-trip: ok +case B content-encoding: +case B body-len: 2600 +case C content-encoding: +Done diff --git a/tests/phpt/server/compression/011-h1-buffered-skips.phpt b/tests/phpt/server/compression/011-h1-buffered-skips.phpt new file mode 100644 index 0000000..c5baae6 --- /dev/null +++ b/tests/phpt/server/compression/011-h1-buffered-skips.phpt @@ -0,0 +1,123 @@ +--TEST-- +Compression H1 buffered: skip rules (HEAD, Range, MIME, threshold, opt-out, handler CE) (#8) +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + +--FILE-- +addListener('127.0.0.1', $port) + ->setReadTimeout(5) + ->setWriteTimeout(5); + +$server = new HttpServer($config); + +$body = str_repeat("Hello, gzip!\n", 200); /* 2600 bytes */ + +$server->addHttpHandler(function ($req, $resp) use ($body) { + $path = $req->getPath(); + if ($path === '/png') { + /* Non-whitelisted MIME — should not be compressed even with AE: gzip. */ + $resp->setHeader('Content-Type', 'image/png')->setBody($body)->end(); + } elseif ($path === '/small') { + /* Below 1024-byte threshold. */ + $resp->setHeader('Content-Type', 'text/html')->setBody('hi')->end(); + } elseif ($path === '/optout') { + $resp->setHeader('Content-Type', 'text/html') + ->setNoCompression() + ->setBody($body) + ->end(); + } elseif ($path === '/preencoded') { + /* Handler already set Content-Encoding — server must not double-encode. */ + $resp->setHeader('Content-Type', 'text/html') + ->setHeader('Content-Encoding', 'br') + ->setBody($body) + ->end(); + } elseif ($path === '/204') { + $resp->setStatusCode(204)->end(); + } else { + $resp->setHeader('Content-Type', 'text/html')->setBody($body)->end(); + } +}); + +function fetch(string $port, string $path, array $req_headers): array { + $fp = stream_socket_client("tcp://127.0.0.1:$port", $errno, $errstr, 2); + stream_set_timeout($fp, 2); + [$method, $resource] = (str_starts_with($path, 'HEAD ')) + ? ['HEAD', substr($path, 5)] : ['GET', $path]; + $hdr = "$method $resource HTTP/1.1\r\nHost: x\r\nConnection: close\r\n"; + foreach ($req_headers as $k => $v) $hdr .= "$k: $v\r\n"; + fwrite($fp, $hdr . "\r\n"); + $raw = ''; + while (!feof($fp)) { + $c = fread($fp, 8192); + if ($c === '' || $c === false) break; + $raw .= $c; + } + fclose($fp); + @[$head, $body] = explode("\r\n\r\n", $raw, 2); + $lines = explode("\r\n", $head); + array_shift($lines); + $headers = []; + foreach ($lines as $l) { + if (strpos($l, ':') === false) continue; + [$k, $v] = explode(':', $l, 2); + $headers[strtolower(trim($k))] = trim($v); + } + return [$headers, $body ?? '']; +} + +$client = spawn(function () use ($port, $server) { + delay(20); + + [$h] = fetch($port, '/png', ['Accept-Encoding' => 'gzip']); + echo "png CE: ", $h['content-encoding'] ?? '', "\n"; + + [$h] = fetch($port, '/small', ['Accept-Encoding' => 'gzip']); + echo "small CE: ", $h['content-encoding'] ?? '', "\n"; + + [$h] = fetch($port, '/optout', ['Accept-Encoding' => 'gzip']); + echo "optout CE: ", $h['content-encoding'] ?? '', "\n"; + + [$h] = fetch($port, '/preencoded', ['Accept-Encoding' => 'gzip']); + echo "preencoded CE: ", $h['content-encoding'] ?? '', "\n"; + + [$h, $b] = fetch($port, 'HEAD /', ['Accept-Encoding' => 'gzip']); + echo "HEAD CE: ", $h['content-encoding'] ?? '', "\n"; + + [$h] = fetch($port, '/', ['Accept-Encoding' => 'gzip', 'Range' => 'bytes=0-99']); + echo "range CE: ", $h['content-encoding'] ?? '', "\n"; + + [$h] = fetch($port, '/204', ['Accept-Encoding' => 'gzip']); + echo "204 CE: ", $h['content-encoding'] ?? '', "\n"; + + delay(50); + $server->stop(); +}); + +$server->start(); +await($client); + +echo "Done\n"; +?> +--EXPECT-- +png CE: +small CE: +optout CE: +preencoded CE: br +HEAD CE: +range CE: +204 CE: +Done diff --git a/tests/phpt/server/compression/012-h1-streaming-gzip.phpt b/tests/phpt/server/compression/012-h1-streaming-gzip.phpt new file mode 100644 index 0000000..d4dfc8b --- /dev/null +++ b/tests/phpt/server/compression/012-h1-streaming-gzip.phpt @@ -0,0 +1,120 @@ +--TEST-- +Compression H1 streaming: chunked + gzip round-trip (#8) +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + +--FILE-- +addListener('127.0.0.1', $port) + ->setReadTimeout(5) + ->setWriteTimeout(5); + +$server = new HttpServer($config); + +/* Streaming handler emits the same payload over four send() chunks + + * an end() finaliser. Compression wrapper must produce a single valid + * gzip stream regardless of chunk boundaries. */ +$payload = str_repeat("Hello, streaming gzip!\n", 100); + +$server->addHttpHandler(function ($req, $resp) use ($payload) { + $resp->setHeader('Content-Type', 'text/html'); + $q = strlen($payload) / 4; + $resp->send(substr($payload, 0, $q)); + $resp->send(substr($payload, $q, $q)); + $resp->send(substr($payload, 2*$q, $q)); + $resp->end(substr($payload, 3*$q)); +}); + +/* Read the full response from the wire — server signals EOF via + * the terminator zero-chunk + connection close. */ +function fetch_chunked(string $port): array { + $fp = stream_socket_client("tcp://127.0.0.1:$port", $errno, $errstr, 2); + stream_set_timeout($fp, 2); + fwrite($fp, + "GET / HTTP/1.1\r\n" + . "Host: x\r\n" + . "Accept-Encoding: gzip\r\n" + . "Connection: close\r\n\r\n"); + $raw = ''; + while (!feof($fp)) { + $c = fread($fp, 8192); + if ($c === '' || $c === false) break; + $raw .= $c; + } + fclose($fp); + [$head, $rest] = explode("\r\n\r\n", $raw, 2); + $lines = explode("\r\n", $head); + array_shift($lines); + $headers = []; + foreach ($lines as $l) { + if (strpos($l, ':') === false) continue; + [$k, $v] = explode(':', $l, 2); + $headers[strtolower(trim($k))] = trim($v); + } + /* Decode chunked transfer encoding — sequence of "\r\n\r\n" + * with a terminating "0\r\n\r\n". */ + $body = ''; + $i = 0; + while ($i < strlen($rest)) { + $eol = strpos($rest, "\r\n", $i); + if ($eol === false) break; + $size = hexdec(substr($rest, $i, $eol - $i)); + $i = $eol + 2; + if ($size === 0) break; + $body .= substr($rest, $i, $size); + $i += $size + 2; /* trailing \r\n after chunk */ + } + return [$headers, $body]; +} + +$client = spawn(function () use ($port, $server, $payload) { + delay(20); + + [$h, $body] = fetch_chunked($port); + echo "transfer-encoding: ", $h['transfer-encoding'] ?? '', "\n"; + echo "content-encoding: ", $h['content-encoding'] ?? '', "\n"; + echo "vary: ", $h['vary'] ?? '', "\n"; + echo "content-length: ", $h['content-length'] ?? '', "\n"; + echo "is-gzip-magic: ", (substr($body, 0, 2) === "\x1f\x8b") ? 1 : 0, "\n"; + + /* Pipe through gunzip to recover the original payload. */ + $proc = proc_open(['gunzip'], [ + 0 => ['pipe', 'r'], 1 => ['pipe', 'w'], 2 => ['pipe', 'w'], + ], $pipes); + fwrite($pipes[0], $body); + fclose($pipes[0]); + $decoded = stream_get_contents($pipes[1]); + fclose($pipes[1]); fclose($pipes[2]); + proc_close($proc); + echo "round-trip: ", ($decoded === $payload) ? "ok" : "MISMATCH (got " . strlen($decoded) . " bytes vs expected " . strlen($payload) . ")", "\n"; + + delay(50); + $server->stop(); +}); + +$server->start(); +await($client); +echo "Done\n"; +?> +--EXPECT-- +transfer-encoding: chunked +content-encoding: gzip +vary: Accept-Encoding +content-length: +is-gzip-magic: 1 +round-trip: ok +Done diff --git a/tests/phpt/server/compression/020-h2-buffered-gzip.phpt b/tests/phpt/server/compression/020-h2-buffered-gzip.phpt new file mode 100644 index 0000000..9f653e9 --- /dev/null +++ b/tests/phpt/server/compression/020-h2-buffered-gzip.phpt @@ -0,0 +1,76 @@ +--TEST-- +Compression H2: buffered gzip + identity skip + per-stream MIME match (#8) +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + true]); +?> +--FILE-- +addListener('127.0.0.1', $port) + ->setReadTimeout(5) + ->setWriteTimeout(5); + +$server = new HttpServer($config); + +$body = str_repeat("Hello, h2 gzip!\n", 200); /* > 1024 byte threshold */ + +$server->addHttpHandler(function ($req, $resp) use ($body) { + if ($req->getPath() === '/png') { + $resp->setHeader('Content-Type', 'image/png')->setBody($body); + } else { + $resp->setHeader('Content-Type', 'text/html')->setBody($body); + } +}); + +$client = spawn(function () use ($port, $server) { + usleep(30000); + + /* curl over h2 with --compressed → server should gzip the text/html + * response and curl gunzips before display. We verify by piping + * curl output through gunzip ourselves to see the response head + * (which carries content-encoding: gzip). */ + $cmd = sprintf( + 'curl --http2-prior-knowledge -sS -i --max-time 3 ' + . '-H "Accept-Encoding: gzip" ' + . 'http://127.0.0.1:%d/ | head -c 4096', + $port); + $out = shell_exec($cmd); + /* Header section ends at \r\n\r\n. */ + $hdr = explode("\r\n\r\n", $out, 2)[0] ?? ''; + echo "html has CE-gzip: ", (stripos($hdr, "content-encoding: gzip") !== false) ? 1 : 0, "\n"; + echo "html has Vary: ", (stripos($hdr, "vary: ") !== false) ? 1 : 0, "\n"; + + /* PNG path: must NOT be compressed. */ + $cmd = sprintf( + 'curl --http2-prior-knowledge -sS -i --max-time 3 ' + . '-H "Accept-Encoding: gzip" ' + . 'http://127.0.0.1:%d/png | head -c 4096', + $port); + $out = shell_exec($cmd); + $hdr = explode("\r\n\r\n", $out, 2)[0] ?? ''; + echo "png has CE-gzip: ", (stripos($hdr, "content-encoding: gzip") !== false) ? 1 : 0, "\n"; + + $server->stop(); +}); + +$server->start(); +await($client); +echo "Done\n"; +?> +--EXPECT-- +html has CE-gzip: 1 +html has Vary: 1 +png has CE-gzip: 0 +Done diff --git a/tests/phpt/server/compression/030-h1-request-gzip-in.phpt b/tests/phpt/server/compression/030-h1-request-gzip-in.phpt new file mode 100644 index 0000000..7c92a54 --- /dev/null +++ b/tests/phpt/server/compression/030-h1-request-gzip-in.phpt @@ -0,0 +1,111 @@ +--TEST-- +Compression: gzipped request body decoded; bomb cap → 413; unknown coding → 415 (#8) +--EXTENSIONS-- +true_async_server +true_async +--SKIPIF-- + +--FILE-- +addListener('127.0.0.1', $port) + ->setReadTimeout(5) + ->setWriteTimeout(5) + ->setRequestMaxDecompressedSize(64 * 1024); /* 64 KiB anti-bomb */ + +$server = new HttpServer($config); + +$server->addHttpHandler(function ($req, $resp) { + /* Echo the body length the handler observed — proves the parser + * delivered decoded bytes, not the gzip envelope. */ + $resp->setHeader('Content-Type', 'text/plain') + ->setBody('len=' . strlen($req->getBody())) + ->end(); +}); + +/* Build a gzipped payload via gzip(1) so we don't depend on ext/zlib. */ +function gzip_string(string $s): string { + $proc = proc_open(['gzip', '-c'], [ + 0 => ['pipe', 'r'], 1 => ['pipe', 'w'], 2 => ['pipe', 'w'], + ], $pipes); + fwrite($pipes[0], $s); + fclose($pipes[0]); + $out = stream_get_contents($pipes[1]); + fclose($pipes[1]); fclose($pipes[2]); + proc_close($proc); + return $out; +} + +function post(string $port, string $body, string $content_encoding): array { + $fp = stream_socket_client("tcp://127.0.0.1:$port", $errno, $errstr, 2); + stream_set_timeout($fp, 2); + $req = "POST /echo HTTP/1.1\r\nHost: x\r\n" + . "Content-Length: " . strlen($body) . "\r\n" + . ($content_encoding !== '' ? "Content-Encoding: $content_encoding\r\n" : '') + . "Connection: close\r\n\r\n"; + fwrite($fp, $req . $body); + $raw = ''; + while (!feof($fp)) { + $c = fread($fp, 8192); + if ($c === '' || $c === false) break; + $raw .= $c; + } + fclose($fp); + [$head, $body_out] = explode("\r\n\r\n", $raw, 2) + ['', '']; + $status = (int)(explode(' ', $head)[1] ?? 0); + return [$status, $body_out]; +} + +$client = spawn(function () use ($port, $server) { + delay(20); + + /* 1. gzipped 1 KiB payload — handler must see decoded bytes. */ + $payload = str_repeat("A", 1024); + $gz = gzip_string($payload); + [$status, $body] = post($port, $gz, 'gzip'); + echo "gzip status: $status\n"; + echo "gzip body: $body\n"; + + /* 2. bomb: 200 KiB of 'A' compresses to ~200 bytes; cap is 64 KiB + * → decoder must reject with 413. */ + $bomb = str_repeat("A", 200 * 1024); + $gzbomb = gzip_string($bomb); + [$status,] = post($port, $gzbomb, 'gzip'); + echo "bomb status: $status\n"; + + /* 3. unknown coding → 415. */ + [$status,] = post($port, 'whatever', 'br'); + echo "unknown status: $status\n"; + + /* 4. identity coding → no-op decode, handler sees raw body. */ + [$status, $body] = post($port, "hello", 'identity'); + echo "identity status: $status\n"; + echo "identity body: $body\n"; + + delay(50); + $server->stop(); +}); + +$server->start(); +await($client); +echo "Done\n"; +?> +--EXPECT-- +gzip status: 200 +gzip body: len=1024 +bomb status: 413 +unknown status: 415 +identity status: 200 +identity body: len=5 +Done diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index df73b0c..1e4d6c6 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -333,5 +333,96 @@ if(NGHTTP2_FOUND) endif() endif() +# Compression negotiation tests (issue #8). The negotiate TU is pure +# C with no Zend deps so the unit test compiles standalone — no PHP +# headers, no llhttp. Other compression sources (registry, gzip backend, +# defaults) require HAVE_HTTP_COMPRESSION at compile time; we set it +# directly here rather than re-running the pkg-config probe so the test +# build stays decoupled from the main feature flag. +add_executable(test_compression_negotiate + compression/test_negotiate.c + ${CMAKE_SOURCE_DIR}/../src/compression/http_compression_negotiate.c +) + +target_compile_definitions(test_compression_negotiate PRIVATE HAVE_HTTP_COMPRESSION=1) + +target_include_directories(test_compression_negotiate PRIVATE + ${CMAKE_SOURCE_DIR}/../include + ${CMAKE_SOURCE_DIR}/../src +) + +target_link_libraries(test_compression_negotiate + test_common + ${CMOCKA_LIBRARIES} +) + +if(UNIX) + target_link_libraries(test_compression_negotiate m pthread) +endif() + +add_test(NAME CompressionNegotiate COMMAND test_compression_negotiate) + +# Compression gzip-encoder round-trip tests. Pulls in real zlib(-ng) +# at link time and depends on the PHP runtime (the encoder uses +# emalloc/efree). pkg-config probe mirrors the main build's preference +# for zlib-ng with zlib fallback. +find_package(PkgConfig QUIET) +set(_compr_test_ok FALSE) +if(PkgConfig_FOUND) + pkg_check_modules(ZLIB_NG_TEST QUIET zlib-ng) + if(ZLIB_NG_TEST_FOUND) + set(_compr_test_libs ${ZLIB_NG_TEST_LIBRARIES}) + set(_compr_test_incs ${ZLIB_NG_TEST_INCLUDE_DIRS}) + set(_compr_test_def HAVE_ZLIB_NG=1) + set(_compr_test_ok TRUE) + else() + pkg_check_modules(ZLIB_TEST QUIET zlib) + if(ZLIB_TEST_FOUND) + set(_compr_test_libs ${ZLIB_TEST_LIBRARIES}) + set(_compr_test_incs ${ZLIB_TEST_INCLUDE_DIRS}) + set(_compr_test_def "") + set(_compr_test_ok TRUE) + endif() + endif() +endif() +if(NOT _compr_test_ok) + find_package(ZLIB QUIET) + if(ZLIB_FOUND) + set(_compr_test_libs ZLIB::ZLIB) + set(_compr_test_incs "") + set(_compr_test_def "") + set(_compr_test_ok TRUE) + endif() +endif() +if(_compr_test_ok) + add_executable(test_compression_gzip + compression/test_gzip_encoder.c + ${CMAKE_SOURCE_DIR}/../src/compression/http_compression.c + ${CMAKE_SOURCE_DIR}/../src/compression/http_compression_gzip.c + ) + target_compile_definitions(test_compression_gzip PRIVATE + HAVE_HTTP_COMPRESSION=1 ${_compr_test_def}) + target_include_directories(test_compression_gzip PRIVATE + ${CMAKE_SOURCE_DIR}/../include + ${CMAKE_SOURCE_DIR}/../src + ${_compr_test_incs} + ) + target_link_libraries(test_compression_gzip + test_common + ${CMOCKA_LIBRARIES} + ${_compr_test_libs} + ${CMAKE_DL_LIBS} + ) + if(UNIX) + target_link_libraries(test_compression_gzip m pthread rt) + endif() + add_test(NAME CompressionGzip COMMAND test_compression_gzip) + if(UNIX AND NOT APPLE) + set_tests_properties(CompressionGzip PROPERTIES + ENVIRONMENT "LD_LIBRARY_PATH=${PHP_PREFIX}/lib:$ENV{LD_LIBRARY_PATH}" + ) + endif() +endif() + # Install test binaries (optional) # install(TARGETS test_http1_parser test_http1_parser_edge_cases test_multipart_parser DESTINATION bin/tests) diff --git a/tests/unit/compression/test_gzip_encoder.c b/tests/unit/compression/test_gzip_encoder.c new file mode 100644 index 0000000..cbec413 --- /dev/null +++ b/tests/unit/compression/test_gzip_encoder.c @@ -0,0 +1,210 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) TrueAsync | + +----------------------------------------------------------------------+ + | Licensed under the Apache License, Version 2.0 | + +----------------------------------------------------------------------+ +*/ + +/* + * gzip backend round-trip tests. We compress with the production + * encoder, then inflate with the same zlib(-ng) the encoder was + * linked against and assert byte-equality. That covers: + * - small one-shot bodies fitting in a single write+finish + * - large bodies that cross the encoder's internal output buffer + * - tiny output buffers that force NEED_OUTPUT looping + * - empty body (header + trailer only — exercises finish-only path) + */ + +#include +#include +#include +#include +#include +#include + +#include "common/php_sapi_test.h" +#include "compression/http_encoder.h" + +#ifdef HAVE_ZLIB_NG +# include +# define ZS zng_stream +# define ZS_INFLATE_INIT2 zng_inflateInit2 +# define ZS_INFLATE zng_inflate +# define ZS_INFLATE_END zng_inflateEnd +#else +# include +# define ZS z_stream +# define ZS_INFLATE_INIT2 inflateInit2 +# define ZS_INFLATE inflate +# define ZS_INFLATE_END inflateEnd +#endif + +/* Uncompress a gzip stream into a fresh malloc'd buffer. Caller frees. */ +static unsigned char *gunzip(const unsigned char *in, size_t in_len, size_t *out_len) +{ + ZS s; + memset(&s, 0, sizeof(s)); + /* 15+32: gzip wrapper, with auto-detect for safety. */ + assert_int_equal(ZS_INFLATE_INIT2(&s, 15 + 32), Z_OK); + + size_t cap = in_len * 4 + 64; + unsigned char *out = malloc(cap); + size_t produced = 0; + + s.next_in = (void *)(uintptr_t)in; + s.avail_in = (unsigned)in_len; + + for (;;) { + s.next_out = out + produced; + s.avail_out = (unsigned)(cap - produced); + int rc = ZS_INFLATE(&s, Z_NO_FLUSH); + produced = cap - s.avail_out; + if (rc == Z_STREAM_END) break; + assert_int_equal(rc, Z_OK); + if (s.avail_out == 0) { + cap *= 2; + out = realloc(out, cap); + } + } + ZS_INFLATE_END(&s); + *out_len = produced; + return out; +} + +/* Drive the encoder through write+finish with a chosen output buffer + * size, accumulating into a fresh malloc'd byte vector. */ +static unsigned char *gzip_via_encoder(const unsigned char *in, size_t in_len, + size_t out_chunk, size_t *out_len) +{ + const http_encoder_vtable_t *vt = http_compression_lookup(HTTP_CODEC_GZIP); + assert_non_null(vt); + http_encoder_t *enc = vt->create(6); + assert_non_null(enc); + + size_t cap = in_len + 64; + unsigned char *out = malloc(cap); + size_t produced = 0; + unsigned char *chunk = malloc(out_chunk); + + /* feed input */ + size_t fed = 0; + while (fed < in_len) { + size_t consumed = 0, written = 0; + http_encoder_status_t st = vt->write(enc, + in + fed, in_len - fed, &consumed, + chunk, out_chunk, &written); + assert_true(st == HTTP_ENC_OK || st == HTTP_ENC_NEED_OUTPUT); + if (produced + written > cap) { + cap = (produced + written) * 2; + out = realloc(out, cap); + } + memcpy(out + produced, chunk, written); + produced += written; + fed += consumed; + } + + /* drain finish */ + for (;;) { + size_t written = 0; + http_encoder_status_t st = vt->finish(enc, chunk, out_chunk, &written); + if (produced + written > cap) { + cap = (produced + written) * 2; + out = realloc(out, cap); + } + memcpy(out + produced, chunk, written); + produced += written; + if (st == HTTP_ENC_DONE) break; + assert_int_equal(st, HTTP_ENC_NEED_OUTPUT); + } + + vt->destroy(enc); + free(chunk); + *out_len = produced; + return out; +} + +static void roundtrip_assert(const unsigned char *in, size_t in_len, size_t out_chunk) +{ + size_t gz_len = 0; + unsigned char *gz = gzip_via_encoder(in, in_len, out_chunk, &gz_len); + /* gzip frame must start with magic 1f 8b. */ + assert_true(gz_len >= 2); + assert_int_equal(gz[0], 0x1f); + assert_int_equal(gz[1], 0x8b); + + size_t back_len = 0; + unsigned char *back = gunzip(gz, gz_len, &back_len); + assert_int_equal(back_len, in_len); + if (in_len > 0) assert_memory_equal(back, in, in_len); + + free(gz); + free(back); +} + +static void test_short_text(void **state) +{ + (void)state; + const char *msg = "Hello, gzip!"; + roundtrip_assert((const unsigned char *)msg, strlen(msg), 4096); +} + +static void test_empty_body(void **state) +{ + (void)state; + roundtrip_assert((const unsigned char *)"", 0, 4096); +} + +static void test_large_body_crosses_chunks(void **state) +{ + (void)state; + /* 256 KiB of mixed-entropy text — large enough to cross the + * encoder's internal state several times, repetitive enough that + * deflate has something to compress. */ + size_t n = 256 * 1024; + unsigned char *buf = malloc(n); + for (size_t i = 0; i < n; i++) { + buf[i] = (unsigned char)('A' + (i * 13 + (i >> 5)) % 26); + } + roundtrip_assert(buf, n, 4096); + free(buf); +} + +static void test_tiny_output_buffer_forces_loop(void **state) +{ + (void)state; + /* 16-byte output buffer is smaller than the gzip header alone, so + * write() and finish() must loop with NEED_OUTPUT on every call. */ + const char *msg = + "the quick brown fox jumps over the lazy dog. " + "the quick brown fox jumps over the lazy dog. " + "the quick brown fox jumps over the lazy dog."; + roundtrip_assert((const unsigned char *)msg, strlen(msg), 16); +} + +static void test_create_clamps_level(void **state) +{ + (void)state; + const http_encoder_vtable_t *vt = http_compression_lookup(HTTP_CODEC_GZIP); + /* Out-of-range levels must not crash — we clamp internally. */ + http_encoder_t *e0 = vt->create(0); assert_non_null(e0); vt->destroy(e0); + http_encoder_t *e10 = vt->create(10); assert_non_null(e10); vt->destroy(e10); + http_encoder_t *eN = vt->create(-1); assert_non_null(eN); vt->destroy(eN); +} + +int main(void) +{ + if (php_test_runtime_init() != 0) return 1; + + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_short_text), + cmocka_unit_test(test_empty_body), + cmocka_unit_test(test_large_body_crosses_chunks), + cmocka_unit_test(test_tiny_output_buffer_forces_loop), + cmocka_unit_test(test_create_clamps_level), + }; + int rc = cmocka_run_group_tests(tests, NULL, NULL); + + php_test_runtime_shutdown(); + return rc; +} diff --git a/tests/unit/compression/test_negotiate.c b/tests/unit/compression/test_negotiate.c new file mode 100644 index 0000000..a68c313 --- /dev/null +++ b/tests/unit/compression/test_negotiate.c @@ -0,0 +1,243 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) TrueAsync | + +----------------------------------------------------------------------+ + | Licensed under the Apache License, Version 2.0 | + +----------------------------------------------------------------------+ +*/ + +#include +#include +#include +#include +#include + +#include "compression/http_compression_negotiate.h" + +/* Convenience: parse a literal C string and inspect the result. */ +#define PARSE(lit, ae) do { \ + http_accept_encoding_parse((lit), sizeof(lit) - 1, &(ae)); \ +} while (0) + +/* ---- Accept-Encoding parser ------------------------------------------ */ + +static void test_default_is_identity_only(void **state) +{ + (void)state; + /* No header sent → conservative default: only identity. The init + * helper deliberately diverges from RFC 9110's "any coding + * acceptable" — see helper docstring for the rationale. */ + http_accept_encoding_t ae; + http_accept_encoding_init_default(&ae); + assert_false(ae.gzip_acceptable); + assert_true(ae.identity_acceptable); + assert_int_equal(http_accept_encoding_select(&ae), HTTP_CODEC_IDENTITY); +} + +static void test_empty_header_means_identity_only(void **state) +{ + (void)state; + http_accept_encoding_t ae; + /* RFC: empty header value → no content coding wanted. */ + PARSE("", ae); + assert_false(ae.gzip_acceptable); + assert_true(ae.identity_acceptable); + assert_int_equal(http_accept_encoding_select(&ae), HTTP_CODEC_IDENTITY); +} + +static void test_explicit_gzip(void **state) +{ + (void)state; + http_accept_encoding_t ae; + PARSE("gzip", ae); + assert_true(ae.gzip_acceptable); + /* Identity unseen, star unseen → identity stays acceptable by default. */ + assert_true(ae.identity_acceptable); + assert_int_equal(http_accept_encoding_select(&ae), HTTP_CODEC_GZIP); +} + +static void test_gzip_q_zero(void **state) +{ + (void)state; + http_accept_encoding_t ae; + PARSE("gzip;q=0", ae); + assert_false(ae.gzip_acceptable); + assert_true(ae.identity_acceptable); + assert_int_equal(http_accept_encoding_select(&ae), HTTP_CODEC_IDENTITY); +} + +static void test_gzip_q_zero_with_decimals(void **state) +{ + (void)state; + http_accept_encoding_t ae; + PARSE("gzip;q=0.000", ae); + assert_false(ae.gzip_acceptable); + PARSE("gzip;q=0.0", ae); + assert_false(ae.gzip_acceptable); +} + +static void test_gzip_q_almost_zero_is_acceptable(void **state) +{ + (void)state; + http_accept_encoding_t ae; + /* q=0.001 is non-zero; treat as accepted. */ + PARSE("gzip;q=0.001", ae); + assert_true(ae.gzip_acceptable); +} + +static void test_identity_q_zero(void **state) +{ + (void)state; + http_accept_encoding_t ae; + PARSE("gzip, identity;q=0", ae); + assert_true(ae.gzip_acceptable); + assert_false(ae.identity_acceptable); + assert_int_equal(http_accept_encoding_select(&ae), HTTP_CODEC_GZIP); +} + +static void test_star_enables_gzip(void **state) +{ + (void)state; + http_accept_encoding_t ae; + PARSE("*", ae); + assert_true(ae.gzip_acceptable); + assert_true(ae.identity_acceptable); +} + +static void test_star_q_zero_excludes_identity(void **state) +{ + (void)state; + http_accept_encoding_t ae; + /* RFC: *;q=0 without an identity entry excludes identity too. */ + PARSE("*;q=0", ae); + assert_false(ae.gzip_acceptable); + assert_false(ae.identity_acceptable); + assert_int_equal(http_accept_encoding_select(&ae), HTTP_CODEC__COUNT); +} + +static void test_star_q_zero_but_identity_kept(void **state) +{ + (void)state; + http_accept_encoding_t ae; + PARSE("*;q=0, identity", ae); + assert_false(ae.gzip_acceptable); + assert_true(ae.identity_acceptable); + assert_int_equal(http_accept_encoding_select(&ae), HTTP_CODEC_IDENTITY); +} + +static void test_explicit_overrides_star(void **state) +{ + (void)state; + http_accept_encoding_t ae; + /* gzip explicitly rejected even though * accepts everything. */ + PARSE("*, gzip;q=0", ae); + assert_false(ae.gzip_acceptable); + assert_true(ae.identity_acceptable); +} + +static void test_unknown_codings_ignored(void **state) +{ + (void)state; + http_accept_encoding_t ae; + PARSE("br, zstd, gzip", ae); + assert_true(ae.gzip_acceptable); +} + +static void test_case_insensitive_coding_and_q(void **state) +{ + (void)state; + http_accept_encoding_t ae; + PARSE("GZIP;Q=0.5", ae); + assert_true(ae.gzip_acceptable); +} + +static void test_lots_of_whitespace(void **state) +{ + (void)state; + http_accept_encoding_t ae; + PARSE(" \t gzip ; q=0.9 , identity ; q=0 ", ae); + assert_true(ae.gzip_acceptable); + assert_false(ae.identity_acceptable); +} + +static void test_extra_params_ignored(void **state) +{ + (void)state; + http_accept_encoding_t ae; + /* Accept-ext params after q= are legal and we should not let them + * confuse the q-zero check. */ + PARSE("gzip;q=0.8;something=else", ae); + assert_true(ae.gzip_acceptable); +} + +static void test_malformed_q_treated_as_one(void **state) +{ + (void)state; + http_accept_encoding_t ae; + PARSE("gzip;q=banana", ae); + assert_true(ae.gzip_acceptable); +} + +/* ---- MIME normaliser -------------------------------------------------- */ + +static void test_mime_normalize_simple(void **state) +{ + (void)state; + char buf[64]; + size_t n = http_compression_mime_normalize("text/html", 9, buf, sizeof(buf)); + assert_int_equal(n, 9); + assert_memory_equal(buf, "text/html", 9); +} + +static void test_mime_normalize_strip_params(void **state) +{ + (void)state; + char buf[64]; + size_t n = http_compression_mime_normalize( + " Application/JSON ; charset=utf-8 ", 35, buf, sizeof(buf)); + assert_int_equal(n, 16); + assert_memory_equal(buf, "application/json", 16); +} + +static void test_mime_normalize_only_params(void **state) +{ + (void)state; + char buf[64]; + size_t n = http_compression_mime_normalize("; charset=utf-8", 15, buf, sizeof(buf)); + assert_int_equal(n, 0); +} + +static void test_mime_normalize_buffer_too_small(void **state) +{ + (void)state; + char buf[4]; + size_t n = http_compression_mime_normalize("text/html", 9, buf, sizeof(buf)); + assert_int_equal(n, 0); +} + +int main(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_default_is_identity_only), + cmocka_unit_test(test_empty_header_means_identity_only), + cmocka_unit_test(test_explicit_gzip), + cmocka_unit_test(test_gzip_q_zero), + cmocka_unit_test(test_gzip_q_zero_with_decimals), + cmocka_unit_test(test_gzip_q_almost_zero_is_acceptable), + cmocka_unit_test(test_identity_q_zero), + cmocka_unit_test(test_star_enables_gzip), + cmocka_unit_test(test_star_q_zero_excludes_identity), + cmocka_unit_test(test_star_q_zero_but_identity_kept), + cmocka_unit_test(test_explicit_overrides_star), + cmocka_unit_test(test_unknown_codings_ignored), + cmocka_unit_test(test_case_insensitive_coding_and_q), + cmocka_unit_test(test_lots_of_whitespace), + cmocka_unit_test(test_extra_params_ignored), + cmocka_unit_test(test_malformed_q_treated_as_one), + cmocka_unit_test(test_mime_normalize_simple), + cmocka_unit_test(test_mime_normalize_strip_params), + cmocka_unit_test(test_mime_normalize_only_params), + cmocka_unit_test(test_mime_normalize_buffer_too_small), + }; + return cmocka_run_group_tests(tests, NULL, NULL); +}