Skip to content

Commit c6efeb7

Browse files
committed
add support for video-to-animated-image conversion with frame sampling
Enables extraction of multiple frames from video sources (MP4, MOV, WEBM) at configurable sample intervals to produce animated image outputs (animated WebP, GIF). Introduces VideoFrameSampleInterval option to control frame sampling rate and extends decoder with VideoDecoder interface for multi-frame extraction. Also improves memory alignment handling for better SIMD performance.
1 parent c441e3b commit c6efeb7

File tree

11 files changed

+853
-66
lines changed

11 files changed

+853
-66
lines changed

avcodec.cpp

Lines changed: 189 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,13 @@ struct avcodec_decoder_struct {
4949
AVCodecContext* codec;
5050
AVIOContext* avio;
5151
int video_stream_index;
52+
53+
// Multi-frame extraction state
54+
float frame_sample_interval; // Interval between frames in seconds
55+
double next_frame_time; // Next frame time to extract
56+
double last_extracted_pts; // Last extracted frame PTS
57+
int frame_delay_ms; // Delay for current frame in milliseconds
58+
bool multi_frame_mode; // Whether we're extracting multiple frames
5259
};
5360

5461
static int avcodec_decoder_read_callback(void* d_void, uint8_t* buf, int buf_size)
@@ -154,7 +161,9 @@ bool avcodec_decoder_is_streamable(const opencv_mat mat)
154161
return false;
155162
}
156163

157-
avcodec_decoder avcodec_decoder_create(const opencv_mat buf, const bool hevc_enabled, const bool av1_enabled)
164+
avcodec_decoder avcodec_decoder_create(const opencv_mat buf,
165+
const bool hevc_enabled,
166+
const bool av1_enabled)
158167
{
159168
avcodec_decoder d = new struct avcodec_decoder_struct();
160169
memset(d, 0, sizeof(struct avcodec_decoder_struct));
@@ -286,7 +295,7 @@ int avcodec_decoder_get_icc(const avcodec_decoder d, void* dest, size_t dest_len
286295
if (!d || !d->codec) {
287296
return -1;
288297
}
289-
298+
290299
const uint8_t* profile_data = avcodec_get_icc_profile(d->codec->color_primaries, profile_size);
291300

292301
if (profile_size > dest_len) {
@@ -413,7 +422,7 @@ const char* avcodec_decoder_get_video_codec(const avcodec_decoder d)
413422
if (!d || !d->codec) {
414423
return "Unknown";
415424
}
416-
425+
417426
switch (d->codec->codec_id) {
418427
case AV_CODEC_ID_H264:
419428
return "H264";
@@ -437,7 +446,7 @@ const char* avcodec_decoder_get_audio_codec(const avcodec_decoder d)
437446
if (!d || !d->container) {
438447
return "Unknown";
439448
}
440-
449+
441450
for (unsigned int i = 0; i < d->container->nb_streams; i++) {
442451
AVStream* stream = d->container->streams[i];
443452
if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
@@ -457,7 +466,7 @@ const char* avcodec_decoder_get_audio_codec(const avcodec_decoder d)
457466
}
458467
}
459468
}
460-
469+
461470
return "Unknown";
462471
}
463472

@@ -472,40 +481,59 @@ bool avcodec_decoder_has_subtitles(const avcodec_decoder d)
472481
return false;
473482
}
474483

475-
static int avcodec_decoder_copy_frame(const avcodec_decoder d, opencv_mat mat, AVFrame* frame)
484+
static int avcodec_decoder_convert_frame(const avcodec_decoder d, opencv_mat mat, AVFrame* frame)
476485
{
477486
if (!d || !d->codec || !d->codec->codec || !mat || !frame) {
478487
return -1;
479488
}
480-
489+
481490
auto cvMat = static_cast<cv::Mat*>(mat);
482491
if (!cvMat) {
483492
return -1;
484493
}
485494

486-
int res = avcodec_receive_frame(d->codec, frame);
487-
if (res >= 0) {
488-
// Calculate the step size based on the cv::Mat's width
489-
int stepSize =
490-
4 * cvMat->cols; // Assuming the cv::Mat is in BGRA format, which has 4 channels
491-
if (cvMat->cols % 32 != 0) {
492-
int width = cvMat->cols + 32 - (cvMat->cols % 32);
493-
stepSize = 4 * width;
495+
// Check if rotation/transformation is needed
496+
CVImageOrientation orientation = (CVImageOrientation)avcodec_decoder_get_orientation(d);
497+
bool needs_transformation = (orientation != CV_IMAGE_ORIENTATION_TL);
498+
bool dimensions_swapped = (orientation == CV_IMAGE_ORIENTATION_RT || orientation == CV_IMAGE_ORIENTATION_LB);
499+
500+
int res = 0;
501+
cv::Mat tempMat;
502+
cv::Mat* decodeDst = cvMat;
503+
504+
// If transformation is needed, decode to a temporary buffer with raw dimensions
505+
if (needs_transformation) {
506+
// For 90/270 rotation, create temp Mat with swapped dimensions
507+
// For other transformations, use same dimensions as output
508+
if (dimensions_swapped) {
509+
tempMat = cv::Mat(frame->height, frame->width, CV_8UC4);
510+
} else {
511+
tempMat = cv::Mat(cvMat->rows, cvMat->cols, CV_8UC4);
494512
}
495-
if (!opencv_mat_set_row_stride(mat, stepSize)) {
513+
decodeDst = &tempMat;
514+
}
515+
516+
{
517+
// Use the decode destination's actual step (stride)
518+
int stepSize = decodeDst->step;
519+
520+
// Validate that the stride and height are within the allocated buffer bounds
521+
size_t required_size = stepSize * decodeDst->rows;
522+
size_t available_size =
523+
(decodeDst->datalimit && decodeDst->data) ? (decodeDst->datalimit - decodeDst->data) : 0;
524+
if (available_size > 0 && required_size > available_size) {
496525
return -1;
497526
}
498527

499-
// Create SwsContext for converting the frame format and scaling
528+
// Create SwsContext for converting the frame format
500529
struct SwsContext* sws =
501530
sws_getContext(frame->width,
502531
frame->height,
503532
(AVPixelFormat)(frame->format), // Source dimensions and format
504-
cvMat->cols,
505-
cvMat->rows,
506-
AV_PIX_FMT_BGRA, // Destination dimensions and format
507-
SWS_BILINEAR, // Specify the scaling algorithm; you can choose another
508-
// according to your needs
533+
decodeDst->cols,
534+
decodeDst->rows,
535+
AV_PIX_FMT_BGRA, // Destination format
536+
SWS_BILINEAR,
509537
NULL,
510538
NULL,
511539
NULL);
@@ -541,9 +569,8 @@ static int avcodec_decoder_copy_frame(const avcodec_decoder d, opencv_mat mat, A
541569
sws_setColorspaceDetails(sws, inv_table, srcRange, table, 1, 0, 1 << 16, 1 << 16);
542570

543571
// The linesizes and data pointers for the destination
544-
int dstLinesizes[4];
545-
av_image_fill_linesizes(dstLinesizes, AV_PIX_FMT_BGRA, stepSize / 4);
546-
uint8_t* dstData[4] = {cvMat->data, NULL, NULL, NULL};
572+
int dstLinesizes[4] = {stepSize, 0, 0, 0};
573+
uint8_t* dstData[4] = {decodeDst->data, NULL, NULL, NULL};
547574

548575
// Perform the scaling and format conversion
549576
sws_scale(sws, frame->data, frame->linesize, 0, frame->height, dstData, dstLinesizes);
@@ -552,6 +579,28 @@ static int avcodec_decoder_copy_frame(const avcodec_decoder d, opencv_mat mat, A
552579
sws_freeContext(sws);
553580
}
554581

582+
// Apply orientation transformation if needed
583+
if (needs_transformation) {
584+
cv::OrientationTransform(int(orientation), tempMat);
585+
586+
// Verify dimensions match after transformation
587+
if (tempMat.cols != cvMat->cols || tempMat.rows != cvMat->rows) {
588+
return -1;
589+
}
590+
591+
// Copy the transformed image to the output Mat, respecting stride
592+
opencv_mat_copy_with_stride(&tempMat, cvMat);
593+
}
594+
595+
return res;
596+
}
597+
598+
static int avcodec_decoder_copy_frame(const avcodec_decoder d, opencv_mat mat, AVFrame* frame)
599+
{
600+
int res = avcodec_receive_frame(d->codec, frame);
601+
if (res >= 0) {
602+
return avcodec_decoder_convert_frame(d, mat, frame);
603+
}
555604
return res;
556605
}
557606

@@ -573,12 +622,106 @@ static int avcodec_decoder_decode_packet(const avcodec_decoder d, opencv_mat mat
573622
return res;
574623
}
575624

576-
bool avcodec_decoder_decode(const avcodec_decoder d, opencv_mat mat)
625+
// Helper function to check if a frame should be extracted and process it
626+
// Returns true if the frame was processed and should be returned
627+
static bool avcodec_decoder_process_sampled_frame(avcodec_decoder d,
628+
opencv_mat mat,
629+
AVFrame* frame,
630+
AVStream* video_stream,
631+
int* out_result)
632+
{
633+
double frame_time = -1.0;
634+
if (frame->pts != AV_NOPTS_VALUE) {
635+
frame_time = frame->pts * av_q2d(video_stream->time_base);
636+
}
637+
638+
// Check if this frame should be extracted based on sampling interval
639+
if (frame_time >= 0 && frame_time >= d->next_frame_time) {
640+
// Calculate frame delay for animation
641+
if (d->last_extracted_pts >= 0) {
642+
double delay_seconds = frame_time - d->last_extracted_pts;
643+
int delay_ms = (int)(delay_seconds * 1000.0);
644+
// Validate delay is reasonable (between 1ms and 60 seconds)
645+
// Use sample interval if delay is out of bounds
646+
if (delay_ms > 0 && delay_ms <= 60000) {
647+
d->frame_delay_ms = delay_ms;
648+
} else {
649+
d->frame_delay_ms = (int)(d->frame_sample_interval * 1000.0);
650+
}
651+
} else {
652+
d->frame_delay_ms = (int)(d->frame_sample_interval * 1000.0);
653+
}
654+
655+
d->last_extracted_pts = frame_time;
656+
d->next_frame_time = frame_time + d->frame_sample_interval;
657+
658+
// Convert frame to output mat
659+
*out_result = avcodec_decoder_convert_frame(d, mat, frame);
660+
return true;
661+
}
662+
663+
return false;
664+
}
665+
666+
bool avcodec_decoder_decode(avcodec_decoder d, opencv_mat mat)
577667
{
578668
if (!d || !d->container || !d->codec || !mat) {
579669
return false;
580670
}
671+
581672
AVPacket packet;
673+
AVStream* video_stream = d->container->streams[d->video_stream_index];
674+
675+
// If we're in multi-frame mode, we need to sample frames based on time
676+
if (d->multi_frame_mode) {
677+
AVFrame* frame = av_frame_alloc();
678+
if (!frame) {
679+
return false;
680+
}
681+
682+
while (true) {
683+
int res = av_read_frame(d->container, &packet);
684+
if (res < 0) {
685+
// Reached EOF - flush remaining frames from the decoder
686+
avcodec_send_packet(d->codec, NULL);
687+
while (avcodec_receive_frame(d->codec, frame) == 0) {
688+
int convert_result;
689+
if (avcodec_decoder_process_sampled_frame(d, mat, frame, video_stream, &convert_result)) {
690+
av_frame_free(&frame);
691+
return (convert_result >= 0);
692+
}
693+
av_frame_unref(frame);
694+
}
695+
696+
// No more frames available
697+
av_frame_free(&frame);
698+
return false;
699+
}
700+
701+
if (packet.stream_index != d->video_stream_index) {
702+
av_packet_unref(&packet);
703+
continue;
704+
}
705+
706+
res = avcodec_send_packet(d->codec, &packet);
707+
av_packet_unref(&packet);
708+
709+
if (res < 0) {
710+
continue;
711+
}
712+
713+
while (avcodec_receive_frame(d->codec, frame) == 0) {
714+
int convert_result;
715+
if (avcodec_decoder_process_sampled_frame(d, mat, frame, video_stream, &convert_result)) {
716+
av_frame_free(&frame);
717+
return (convert_result >= 0);
718+
}
719+
av_frame_unref(frame);
720+
}
721+
}
722+
}
723+
724+
// Single-frame mode: just decode the first video frame
582725
bool done = false;
583726
bool success = false;
584727
while (!done) {
@@ -601,6 +744,26 @@ bool avcodec_decoder_decode(const avcodec_decoder d, opencv_mat mat)
601744
return success;
602745
}
603746

747+
void avcodec_decoder_set_frame_sample_interval(avcodec_decoder d, float interval_seconds)
748+
{
749+
if (!d) {
750+
return;
751+
}
752+
d->frame_sample_interval = interval_seconds;
753+
d->next_frame_time = 0.0;
754+
d->last_extracted_pts = -1.0;
755+
d->frame_delay_ms = 0;
756+
d->multi_frame_mode = (interval_seconds > 0.0);
757+
}
758+
759+
int avcodec_decoder_get_frame_delay_ms(const avcodec_decoder d)
760+
{
761+
if (!d) {
762+
return 0;
763+
}
764+
return d->frame_delay_ms;
765+
}
766+
604767
void avcodec_decoder_release(avcodec_decoder d)
605768
{
606769
if (d->codec) {

0 commit comments

Comments
 (0)