Skip to content

Commit 25e9d6c

Browse files
committed
Fix stride alignment for SIMD performance and decoder correctness
Adds 32-byte aligned stride support for framebuffers to enable SIMD optimizations (AVX/SSE) and fixes stride-related bugs in WebP and GIF decoders that were causing data corruption. Changes: - opencv.cpp/hpp: Add opencv_mat_create_from_data_with_stride() to support custom strides - opencv.go: Update NewFramebuffer() and resizeMat() to use 32-byte aligned strides - webp.cpp: Fix decoder to use Mat's actual stride instead of intermediate buffer - giflib.cpp: Fix decoder and encoder to copy frame data row-by-row respecting stride Fixes thumbhash test failures where stride mismatches were corrupting pixel data.
1 parent c441e3b commit 25e9d6c

File tree

5 files changed

+57
-18
lines changed

5 files changed

+57
-18
lines changed

giflib.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,12 @@ static bool giflib_decoder_render_frame(giflib_decoder d, GraphicsControlBlock*
484484

485485
// Save current frame content before drawing new frame
486486
if (d->have_read_first_frame) {
487-
memcpy(d->prev_frame_bgra.data(), cvMat->data, buf_width * buf_height * BYTES_PER_PIXEL);
487+
// Copy row by row to handle stride correctly
488+
for (int y = 0; y < buf_height; y++) {
489+
memcpy(d->prev_frame_bgra.data() + y * buf_width * BYTES_PER_PIXEL,
490+
cvMat->data + y * cvMat->step,
491+
buf_width * BYTES_PER_PIXEL);
492+
}
488493
}
489494

490495
// Draw the new frame
@@ -1083,7 +1088,12 @@ static bool giflib_encoder_render_frame(giflib_encoder e,
10831088
}
10841089

10851090
// XXX change this if we do partial frames (only copy over some)
1086-
memcpy(e->prev_frame_bgra, frame->data, 4 * e->gif->SWidth * e->gif->SHeight);
1091+
// Copy row by row to handle stride correctly
1092+
for (int y = 0; y < e->gif->SHeight; y++) {
1093+
memcpy(e->prev_frame_bgra + y * e->gif->SWidth * 4,
1094+
frame->data + y * frame->step,
1095+
e->gif->SWidth * 4);
1096+
}
10871097

10881098
e->prev_frame_color_map = color_map;
10891099
e->prev_frame_disposal = gcb.DisposalMode;

opencv.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,17 @@ opencv_mat opencv_mat_create_from_data(int width, int height, int type, void* da
2929
return mat;
3030
}
3131

32+
opencv_mat opencv_mat_create_from_data_with_stride(int width, int height, int type, void* data, size_t data_len, size_t step)
33+
{
34+
size_t min_size = step * height;
35+
if (min_size > data_len) {
36+
return NULL;
37+
}
38+
auto mat = new cv::Mat(height, width, type, data, step);
39+
mat->datalimit = (uint8_t*)data + data_len;
40+
return mat;
41+
}
42+
3243
opencv_mat opencv_mat_create_empty_from_data(int length, void* data)
3344
{
3445
// this is slightly sketchy - what we're going to do is build a 1x0 matrix

opencv.go

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,9 +192,17 @@ func (h *ImageHeader) ContentLength() int {
192192
}
193193

194194
// NewFramebuffer creates a backing store for a pixel frame buffer with the specified dimensions.
195+
// The buffer is allocated with 32-byte aligned strides for optimal SIMD performance.
195196
func NewFramebuffer(width, height int) *Framebuffer {
197+
// Calculate aligned stride (32-byte aligned for SIMD performance)
198+
// 8 pixels * 4 bytes = 32 bytes
199+
stride := width * 4
200+
if width%8 != 0 {
201+
alignedWidth := width + 8 - (width % 8)
202+
stride = alignedWidth * 4
203+
}
196204
return &Framebuffer{
197-
buf: make([]byte, width*height*4),
205+
buf: make([]byte, stride*height),
198206
mat: nil,
199207
}
200208
}
@@ -243,7 +251,17 @@ func (f *Framebuffer) resizeMat(width, height int, pixelType PixelType) error {
243251
if pixelType.Depth() > 8 {
244252
pixelType = PixelType(C.opencv_type_convert_depth(C.int(pixelType), C.CV_8U))
245253
}
246-
newMat := C.opencv_mat_create_from_data(C.int(width), C.int(height), C.int(pixelType), unsafe.Pointer(&f.buf[0]), C.size_t(len(f.buf)))
254+
255+
// Calculate aligned stride (32-byte aligned for SIMD performance)
256+
// For BGRA (4 channels): 8 pixels * 4 bytes = 32 bytes
257+
stride := width * pixelType.Channels()
258+
alignmentPixels := 32 / pixelType.Channels() // 8 for 4-channel, 16 for 2-channel, etc.
259+
if alignmentPixels > 0 && width%alignmentPixels != 0 {
260+
alignedWidth := width + alignmentPixels - (width % alignmentPixels)
261+
stride = alignedWidth * pixelType.Channels()
262+
}
263+
264+
newMat := C.opencv_mat_create_from_data_with_stride(C.int(width), C.int(height), C.int(pixelType), unsafe.Pointer(&f.buf[0]), C.size_t(len(f.buf)), C.size_t(stride))
247265
if newMat == nil {
248266
return ErrBufTooSmall
249267
}

opencv.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,12 @@ opencv_mat opencv_mat_create_from_data(int width,
9898
int type,
9999
void* data,
100100
size_t data_len);
101+
opencv_mat opencv_mat_create_from_data_with_stride(int width,
102+
int height,
103+
int type,
104+
void* data,
105+
size_t data_len,
106+
size_t step);
101107
opencv_mat opencv_mat_create_empty_from_data(int length, void* data);
102108
bool opencv_mat_set_row_stride(opencv_mat mat, size_t stride);
103109
void opencv_mat_release(opencv_mat mat);

webp.cpp

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -322,41 +322,35 @@ bool webp_decoder_decode(const webp_decoder d, opencv_mat mat)
322322
auto cvMat = static_cast<cv::Mat*>(mat);
323323
cvMat->create(features.height, features.width, webp_decoder_get_pixel_type(d));
324324

325-
// Recalculate row size based on the new dimensions
326-
int row_size = cvMat->cols * cvMat->elemSize();
327-
328325
// Store frame properties for future use
329326
d->prev_frame_delay_time = frame.duration;
330327
d->prev_frame_x_offset = frame.x_offset;
331328
d->prev_frame_y_offset = frame.y_offset;
332329
d->prev_frame_dispose = frame.dispose_method;
333330
d->prev_frame_blend = frame.blend_method;
334331

335-
// Decode the frame
332+
// Decode the frame directly into the Mat using its actual stride
333+
// This ensures proper handling of any row alignment/padding
336334
uint8_t* res = nullptr;
337335
switch (webp_decoder_get_pixel_type(d)) {
338336
case CV_8UC4:
339337
res = WebPDecodeBGRAInto(frame.bitstream.bytes,
340338
frame.bitstream.size,
341-
d->decode_buffer,
342-
d->decode_buffer_size,
343-
row_size);
339+
cvMat->data,
340+
cvMat->rows * cvMat->step,
341+
cvMat->step);
344342
break;
345343
case CV_8UC3:
346344
res = WebPDecodeBGRInto(frame.bitstream.bytes,
347345
frame.bitstream.size,
348-
d->decode_buffer,
349-
d->decode_buffer_size,
350-
row_size);
346+
cvMat->data,
347+
cvMat->rows * cvMat->step,
348+
cvMat->step);
351349
break;
352350
default:
353351
return false;
354352
}
355353

356-
if (res) {
357-
memcpy(cvMat->data, d->decode_buffer, cvMat->total() * cvMat->elemSize());
358-
}
359-
360354
WebPDataClear(&frame.bitstream);
361355
return res != nullptr;
362356
}

0 commit comments

Comments
 (0)