From 5f87ce30d6302a01675836cd799ab91ccf020bbd Mon Sep 17 00:00:00 2001 From: zhixiangli Date: Wed, 29 Apr 2026 01:19:44 +0000 Subject: [PATCH 1/2] perf: use _DummyListBuffer in test_reads.py to avoid GIL contention --- .../time_based/reads/test_reads.py | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py index 7db13520ed07..ae1cc74d5af3 100644 --- a/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py @@ -170,6 +170,24 @@ def _download_time_based_json(client, filename, params): ) +# _DummyListBuffer is used instead of io.BytesIO to avoid GIL contention +# during profiling. io.BytesIO.write() holds the GIL while copying data, +# which introduces significant noise and bottlenecks in performance tests +# with high concurrency or large data transfers. +# This buffer simply collects chunks in a list and tracks the total size. +class _DummyListBuffer: + def __init__(self): + self.chunks = [] + self.size = 0 + + def write(self, data): + self.chunks.append(data) + self.size += len(data) + + def getvalue(self): + return b"".join(self.chunks) + + async def _download_time_based_async(client, filename, params): mrd = AsyncMultiRangeDownloader(client, params.bucket_name, filename) await mrd.open() @@ -197,17 +215,17 @@ async def _worker_coro(): offset = random.randint( 0, params.file_size_bytes - params.chunk_size_bytes ) - ranges.append((offset, params.chunk_size_bytes, BytesIO())) + ranges.append((offset, params.chunk_size_bytes, _DummyListBuffer())) else: # seq for _ in range(params.num_ranges): - ranges.append((offset, params.chunk_size_bytes, BytesIO())) + ranges.append((offset, params.chunk_size_bytes, _DummyListBuffer())) offset += params.chunk_size_bytes if offset + params.chunk_size_bytes > params.file_size_bytes: offset = 0 # Reset offset if end of file is reached await mrd.download_ranges(ranges) - bytes_in_buffers = sum(r[2].getbuffer().nbytes for r in ranges) + bytes_in_buffers = sum(r[2].size for r in ranges) assert bytes_in_buffers == params.chunk_size_bytes * params.num_ranges if not is_warming_up: From 97622766e4db67dd520dabba1dc518d386e52d37 Mon Sep 17 00:00:00 2001 From: zhixiangli Date: Mon, 4 May 2026 12:43:58 +0000 Subject: [PATCH 2/2] resolve conflicts --- .../perf/microbenchmarks/time_based/reads/test_reads.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py index ae1cc74d5af3..359ddfadf863 100644 --- a/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py @@ -19,9 +19,7 @@ import os import random import time -from io import BytesIO from typing import List, NamedTuple, Optional - import pytest import tests.perf.microbenchmarks.time_based.reads.config as config @@ -182,7 +180,9 @@ def __init__(self): def write(self, data): self.chunks.append(data) - self.size += len(data) + nbytes = len(data) + self.size += nbytes + return nbytes def getvalue(self): return b"".join(self.chunks)