Skip to content

Commit d265aee

Browse files
authored
Revert "feat: Support for video-to-animated-image conversion with frame sampling (#273)" (#280)
This reverts commit 686cbce.
1 parent 5c35536 commit d265aee

File tree

11 files changed

+66
-853
lines changed

11 files changed

+66
-853
lines changed

avcodec.cpp

Lines changed: 26 additions & 189 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,6 @@ struct avcodec_decoder_struct {
4949
AVCodecContext* codec;
5050
AVIOContext* avio;
5151
int video_stream_index;
52-
53-
// Multi-frame extraction state
54-
int frame_sample_interval_ms; // Interval between frames in milliseconds
55-
double next_frame_time; // Next frame time to extract
56-
double last_extracted_pts; // Last extracted frame PTS
57-
int frame_delay_ms; // Delay for current frame in milliseconds
58-
bool multi_frame_mode; // Whether we're extracting multiple frames
5952
};
6053

6154
static int avcodec_decoder_read_callback(void* d_void, uint8_t* buf, int buf_size)
@@ -161,9 +154,7 @@ bool avcodec_decoder_is_streamable(const opencv_mat mat)
161154
return false;
162155
}
163156

164-
avcodec_decoder avcodec_decoder_create(const opencv_mat buf,
165-
const bool hevc_enabled,
166-
const bool av1_enabled)
157+
avcodec_decoder avcodec_decoder_create(const opencv_mat buf, const bool hevc_enabled, const bool av1_enabled)
167158
{
168159
avcodec_decoder d = new struct avcodec_decoder_struct();
169160
memset(d, 0, sizeof(struct avcodec_decoder_struct));
@@ -295,7 +286,7 @@ int avcodec_decoder_get_icc(const avcodec_decoder d, void* dest, size_t dest_len
295286
if (!d || !d->codec) {
296287
return -1;
297288
}
298-
289+
299290
const uint8_t* profile_data = avcodec_get_icc_profile(d->codec->color_primaries, profile_size);
300291

301292
if (profile_size > dest_len) {
@@ -422,7 +413,7 @@ const char* avcodec_decoder_get_video_codec(const avcodec_decoder d)
422413
if (!d || !d->codec) {
423414
return "Unknown";
424415
}
425-
416+
426417
switch (d->codec->codec_id) {
427418
case AV_CODEC_ID_H264:
428419
return "H264";
@@ -446,7 +437,7 @@ const char* avcodec_decoder_get_audio_codec(const avcodec_decoder d)
446437
if (!d || !d->container) {
447438
return "Unknown";
448439
}
449-
440+
450441
for (unsigned int i = 0; i < d->container->nb_streams; i++) {
451442
AVStream* stream = d->container->streams[i];
452443
if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
@@ -466,7 +457,7 @@ const char* avcodec_decoder_get_audio_codec(const avcodec_decoder d)
466457
}
467458
}
468459
}
469-
460+
470461
return "Unknown";
471462
}
472463

@@ -481,59 +472,40 @@ bool avcodec_decoder_has_subtitles(const avcodec_decoder d)
481472
return false;
482473
}
483474

484-
static int avcodec_decoder_convert_frame(const avcodec_decoder d, opencv_mat mat, AVFrame* frame)
475+
static int avcodec_decoder_copy_frame(const avcodec_decoder d, opencv_mat mat, AVFrame* frame)
485476
{
486477
if (!d || !d->codec || !d->codec->codec || !mat || !frame) {
487478
return -1;
488479
}
489-
480+
490481
auto cvMat = static_cast<cv::Mat*>(mat);
491482
if (!cvMat) {
492483
return -1;
493484
}
494485

495-
// Check if rotation/transformation is needed
496-
CVImageOrientation orientation = (CVImageOrientation)avcodec_decoder_get_orientation(d);
497-
bool needs_transformation = (orientation != CV_IMAGE_ORIENTATION_TL);
498-
bool dimensions_swapped = (orientation == CV_IMAGE_ORIENTATION_RT || orientation == CV_IMAGE_ORIENTATION_LB);
499-
500-
int res = 0;
501-
cv::Mat tempMat;
502-
cv::Mat* decodeDst = cvMat;
503-
504-
// If transformation is needed, decode to a temporary buffer with raw dimensions
505-
if (needs_transformation) {
506-
// For 90/270 rotation, create temp Mat with swapped dimensions
507-
// For other transformations, use same dimensions as output
508-
if (dimensions_swapped) {
509-
tempMat = cv::Mat(frame->height, frame->width, CV_8UC4);
510-
} else {
511-
tempMat = cv::Mat(cvMat->rows, cvMat->cols, CV_8UC4);
486+
int res = avcodec_receive_frame(d->codec, frame);
487+
if (res >= 0) {
488+
// Calculate the step size based on the cv::Mat's width
489+
int stepSize =
490+
4 * cvMat->cols; // Assuming the cv::Mat is in BGRA format, which has 4 channels
491+
if (cvMat->cols % 32 != 0) {
492+
int width = cvMat->cols + 32 - (cvMat->cols % 32);
493+
stepSize = 4 * width;
512494
}
513-
decodeDst = &tempMat;
514-
}
515-
516-
{
517-
// Use the decode destination's actual step (stride)
518-
int stepSize = decodeDst->step;
519-
520-
// Validate that the stride and height are within the allocated buffer bounds
521-
size_t required_size = stepSize * decodeDst->rows;
522-
size_t available_size =
523-
(decodeDst->datalimit && decodeDst->data) ? (decodeDst->datalimit - decodeDst->data) : 0;
524-
if (available_size > 0 && required_size > available_size) {
495+
if (!opencv_mat_set_row_stride(mat, stepSize)) {
525496
return -1;
526497
}
527498

528-
// Create SwsContext for converting the frame format
499+
// Create SwsContext for converting the frame format and scaling
529500
struct SwsContext* sws =
530501
sws_getContext(frame->width,
531502
frame->height,
532503
(AVPixelFormat)(frame->format), // Source dimensions and format
533-
decodeDst->cols,
534-
decodeDst->rows,
535-
AV_PIX_FMT_BGRA, // Destination format
536-
SWS_BILINEAR,
504+
cvMat->cols,
505+
cvMat->rows,
506+
AV_PIX_FMT_BGRA, // Destination dimensions and format
507+
SWS_BILINEAR, // Specify the scaling algorithm; you can choose another
508+
// according to your needs
537509
NULL,
538510
NULL,
539511
NULL);
@@ -569,8 +541,9 @@ static int avcodec_decoder_convert_frame(const avcodec_decoder d, opencv_mat mat
569541
sws_setColorspaceDetails(sws, inv_table, srcRange, table, 1, 0, 1 << 16, 1 << 16);
570542

571543
// The linesizes and data pointers for the destination
572-
int dstLinesizes[4] = {stepSize, 0, 0, 0};
573-
uint8_t* dstData[4] = {decodeDst->data, NULL, NULL, NULL};
544+
int dstLinesizes[4];
545+
av_image_fill_linesizes(dstLinesizes, AV_PIX_FMT_BGRA, stepSize / 4);
546+
uint8_t* dstData[4] = {cvMat->data, NULL, NULL, NULL};
574547

575548
// Perform the scaling and format conversion
576549
sws_scale(sws, frame->data, frame->linesize, 0, frame->height, dstData, dstLinesizes);
@@ -579,28 +552,6 @@ static int avcodec_decoder_convert_frame(const avcodec_decoder d, opencv_mat mat
579552
sws_freeContext(sws);
580553
}
581554

582-
// Apply orientation transformation if needed
583-
if (needs_transformation) {
584-
cv::OrientationTransform(int(orientation), tempMat);
585-
586-
// Verify dimensions match after transformation
587-
if (tempMat.cols != cvMat->cols || tempMat.rows != cvMat->rows) {
588-
return -1;
589-
}
590-
591-
// Copy the transformed image to the output Mat, respecting stride
592-
opencv_mat_copy_with_stride(&tempMat, cvMat);
593-
}
594-
595-
return res;
596-
}
597-
598-
static int avcodec_decoder_copy_frame(const avcodec_decoder d, opencv_mat mat, AVFrame* frame)
599-
{
600-
int res = avcodec_receive_frame(d->codec, frame);
601-
if (res >= 0) {
602-
return avcodec_decoder_convert_frame(d, mat, frame);
603-
}
604555
return res;
605556
}
606557

@@ -622,106 +573,12 @@ static int avcodec_decoder_decode_packet(const avcodec_decoder d, opencv_mat mat
622573
return res;
623574
}
624575

625-
// Helper function to check if a frame should be extracted and process it
626-
// Returns true if the frame was processed and should be returned
627-
static bool avcodec_decoder_process_sampled_frame(avcodec_decoder d,
628-
opencv_mat mat,
629-
AVFrame* frame,
630-
AVStream* video_stream,
631-
int* out_result)
632-
{
633-
double frame_time = -1.0;
634-
if (frame->pts != AV_NOPTS_VALUE) {
635-
frame_time = frame->pts * av_q2d(video_stream->time_base);
636-
}
637-
638-
// Check if this frame should be extracted based on sampling interval
639-
if (frame_time >= 0 && frame_time >= d->next_frame_time) {
640-
// Calculate frame delay for animation
641-
if (d->last_extracted_pts >= 0) {
642-
double delay_seconds = frame_time - d->last_extracted_pts;
643-
int delay_ms = (int)(delay_seconds * 1000.0);
644-
// Validate delay is reasonable (between 1ms and 60 seconds)
645-
// Use sample interval if delay is out of bounds
646-
if (delay_ms > 0 && delay_ms <= 60000) {
647-
d->frame_delay_ms = delay_ms;
648-
} else {
649-
d->frame_delay_ms = (int)(d->frame_sample_interval_ms);
650-
}
651-
} else {
652-
d->frame_delay_ms = (int)(d->frame_sample_interval_ms);
653-
}
654-
655-
d->last_extracted_pts = frame_time;
656-
d->next_frame_time = frame_time + (d->frame_sample_interval_ms / 1000.0);
657-
658-
// Convert frame to output mat
659-
*out_result = avcodec_decoder_convert_frame(d, mat, frame);
660-
return true;
661-
}
662-
663-
return false;
664-
}
665-
666-
bool avcodec_decoder_decode(avcodec_decoder d, opencv_mat mat)
576+
bool avcodec_decoder_decode(const avcodec_decoder d, opencv_mat mat)
667577
{
668578
if (!d || !d->container || !d->codec || !mat) {
669579
return false;
670580
}
671-
672581
AVPacket packet;
673-
AVStream* video_stream = d->container->streams[d->video_stream_index];
674-
675-
// If we're in multi-frame mode, we need to sample frames based on time
676-
if (d->multi_frame_mode) {
677-
AVFrame* frame = av_frame_alloc();
678-
if (!frame) {
679-
return false;
680-
}
681-
682-
while (true) {
683-
int res = av_read_frame(d->container, &packet);
684-
if (res < 0) {
685-
// Reached EOF - flush remaining frames from the decoder
686-
avcodec_send_packet(d->codec, NULL);
687-
while (avcodec_receive_frame(d->codec, frame) == 0) {
688-
int convert_result;
689-
if (avcodec_decoder_process_sampled_frame(d, mat, frame, video_stream, &convert_result)) {
690-
av_frame_free(&frame);
691-
return (convert_result >= 0);
692-
}
693-
av_frame_unref(frame);
694-
}
695-
696-
// No more frames available
697-
av_frame_free(&frame);
698-
return false;
699-
}
700-
701-
if (packet.stream_index != d->video_stream_index) {
702-
av_packet_unref(&packet);
703-
continue;
704-
}
705-
706-
res = avcodec_send_packet(d->codec, &packet);
707-
av_packet_unref(&packet);
708-
709-
if (res < 0) {
710-
continue;
711-
}
712-
713-
while (avcodec_receive_frame(d->codec, frame) == 0) {
714-
int convert_result;
715-
if (avcodec_decoder_process_sampled_frame(d, mat, frame, video_stream, &convert_result)) {
716-
av_frame_free(&frame);
717-
return (convert_result >= 0);
718-
}
719-
av_frame_unref(frame);
720-
}
721-
}
722-
}
723-
724-
// Single-frame mode: just decode the first video frame
725582
bool done = false;
726583
bool success = false;
727584
while (!done) {
@@ -744,26 +601,6 @@ bool avcodec_decoder_decode(avcodec_decoder d, opencv_mat mat)
744601
return success;
745602
}
746603

747-
void avcodec_decoder_set_frame_sample_interval_ms(avcodec_decoder d, int frame_sample_interval_ms)
748-
{
749-
if (!d) {
750-
return;
751-
}
752-
d->frame_sample_interval_ms = frame_sample_interval_ms;
753-
d->next_frame_time = 0.0;
754-
d->last_extracted_pts = -1.0;
755-
d->frame_delay_ms = 0;
756-
d->multi_frame_mode = (frame_sample_interval_ms > 0);
757-
}
758-
759-
int avcodec_decoder_get_frame_delay_ms(const avcodec_decoder d)
760-
{
761-
if (!d) {
762-
return 0;
763-
}
764-
return d->frame_delay_ms;
765-
}
766-
767604
void avcodec_decoder_release(avcodec_decoder d)
768605
{
769606
if (d->codec) {

0 commit comments

Comments
 (0)