@@ -49,6 +49,13 @@ struct avcodec_decoder_struct {
4949 AVCodecContext* codec;
5050 AVIOContext* avio;
5151 int video_stream_index;
52+
53+ // Multi-frame extraction state
54+ float frame_sample_interval; // Interval between frames in seconds
55+ double next_frame_time; // Next frame time to extract
56+ double last_extracted_pts; // Last extracted frame PTS
57+ int frame_delay_ms; // Delay for current frame in milliseconds
58+ bool multi_frame_mode; // Whether we're extracting multiple frames
5259};
5360
5461static int avcodec_decoder_read_callback (void * d_void, uint8_t * buf, int buf_size)
@@ -154,7 +161,9 @@ bool avcodec_decoder_is_streamable(const opencv_mat mat)
154161 return false ;
155162}
156163
157- avcodec_decoder avcodec_decoder_create (const opencv_mat buf, const bool hevc_enabled, const bool av1_enabled)
164+ avcodec_decoder avcodec_decoder_create (const opencv_mat buf,
165+ const bool hevc_enabled,
166+ const bool av1_enabled)
158167{
159168 avcodec_decoder d = new struct avcodec_decoder_struct ();
160169 memset (d, 0 , sizeof (struct avcodec_decoder_struct ));
@@ -286,7 +295,7 @@ int avcodec_decoder_get_icc(const avcodec_decoder d, void* dest, size_t dest_len
286295 if (!d || !d->codec ) {
287296 return -1 ;
288297 }
289-
298+
290299 const uint8_t * profile_data = avcodec_get_icc_profile (d->codec ->color_primaries , profile_size);
291300
292301 if (profile_size > dest_len) {
@@ -413,7 +422,7 @@ const char* avcodec_decoder_get_video_codec(const avcodec_decoder d)
413422 if (!d || !d->codec ) {
414423 return " Unknown" ;
415424 }
416-
425+
417426 switch (d->codec ->codec_id ) {
418427 case AV_CODEC_ID_H264:
419428 return " H264" ;
@@ -437,7 +446,7 @@ const char* avcodec_decoder_get_audio_codec(const avcodec_decoder d)
437446 if (!d || !d->container ) {
438447 return " Unknown" ;
439448 }
440-
449+
441450 for (unsigned int i = 0 ; i < d->container ->nb_streams ; i++) {
442451 AVStream* stream = d->container ->streams [i];
443452 if (stream->codecpar ->codec_type == AVMEDIA_TYPE_AUDIO) {
@@ -457,7 +466,7 @@ const char* avcodec_decoder_get_audio_codec(const avcodec_decoder d)
457466 }
458467 }
459468 }
460-
469+
461470 return " Unknown" ;
462471}
463472
@@ -472,40 +481,59 @@ bool avcodec_decoder_has_subtitles(const avcodec_decoder d)
472481 return false ;
473482}
474483
475- static int avcodec_decoder_copy_frame (const avcodec_decoder d, opencv_mat mat, AVFrame* frame)
484+ static int avcodec_decoder_convert_frame (const avcodec_decoder d, opencv_mat mat, AVFrame* frame)
476485{
477486 if (!d || !d->codec || !d->codec ->codec || !mat || !frame) {
478487 return -1 ;
479488 }
480-
489+
481490 auto cvMat = static_cast <cv::Mat*>(mat);
482491 if (!cvMat) {
483492 return -1 ;
484493 }
485494
486- int res = avcodec_receive_frame (d->codec , frame);
487- if (res >= 0 ) {
488- // Calculate the step size based on the cv::Mat's width
489- int stepSize =
490- 4 * cvMat->cols ; // Assuming the cv::Mat is in BGRA format, which has 4 channels
491- if (cvMat->cols % 32 != 0 ) {
492- int width = cvMat->cols + 32 - (cvMat->cols % 32 );
493- stepSize = 4 * width;
495+ // Check if rotation/transformation is needed
496+ CVImageOrientation orientation = (CVImageOrientation)avcodec_decoder_get_orientation (d);
497+ bool needs_transformation = (orientation != CV_IMAGE_ORIENTATION_TL);
498+ bool dimensions_swapped = (orientation == CV_IMAGE_ORIENTATION_RT || orientation == CV_IMAGE_ORIENTATION_LB);
499+
500+ int res = 0 ;
501+ cv::Mat tempMat;
502+ cv::Mat* decodeDst = cvMat;
503+
504+ // If transformation is needed, decode to a temporary buffer with raw dimensions
505+ if (needs_transformation) {
506+ // For 90/270 rotation, create temp Mat with swapped dimensions
507+ // For other transformations, use same dimensions as output
508+ if (dimensions_swapped) {
509+ tempMat = cv::Mat (frame->height , frame->width , CV_8UC4);
510+ } else {
511+ tempMat = cv::Mat (cvMat->rows , cvMat->cols , CV_8UC4);
494512 }
495- if (!opencv_mat_set_row_stride (mat, stepSize)) {
513+ decodeDst = &tempMat;
514+ }
515+
516+ {
517+ // Use the decode destination's actual step (stride)
518+ int stepSize = decodeDst->step ;
519+
520+ // Validate that the stride and height are within the allocated buffer bounds
521+ size_t required_size = stepSize * decodeDst->rows ;
522+ size_t available_size =
523+ (decodeDst->datalimit && decodeDst->data ) ? (decodeDst->datalimit - decodeDst->data ) : 0 ;
524+ if (available_size > 0 && required_size > available_size) {
496525 return -1 ;
497526 }
498527
499- // Create SwsContext for converting the frame format and scaling
528+ // Create SwsContext for converting the frame format
500529 struct SwsContext * sws =
501530 sws_getContext (frame->width ,
502531 frame->height ,
503532 (AVPixelFormat)(frame->format ), // Source dimensions and format
504- cvMat->cols ,
505- cvMat->rows ,
506- AV_PIX_FMT_BGRA, // Destination dimensions and format
507- SWS_BILINEAR, // Specify the scaling algorithm; you can choose another
508- // according to your needs
533+ decodeDst->cols ,
534+ decodeDst->rows ,
535+ AV_PIX_FMT_BGRA, // Destination format
536+ SWS_BILINEAR,
509537 NULL ,
510538 NULL ,
511539 NULL );
@@ -541,9 +569,8 @@ static int avcodec_decoder_copy_frame(const avcodec_decoder d, opencv_mat mat, A
541569 sws_setColorspaceDetails (sws, inv_table, srcRange, table, 1 , 0 , 1 << 16 , 1 << 16 );
542570
543571 // The linesizes and data pointers for the destination
544- int dstLinesizes[4 ];
545- av_image_fill_linesizes (dstLinesizes, AV_PIX_FMT_BGRA, stepSize / 4 );
546- uint8_t * dstData[4 ] = {cvMat->data , NULL , NULL , NULL };
572+ int dstLinesizes[4 ] = {stepSize, 0 , 0 , 0 };
573+ uint8_t * dstData[4 ] = {decodeDst->data , NULL , NULL , NULL };
547574
548575 // Perform the scaling and format conversion
549576 sws_scale (sws, frame->data , frame->linesize , 0 , frame->height , dstData, dstLinesizes);
@@ -552,6 +579,28 @@ static int avcodec_decoder_copy_frame(const avcodec_decoder d, opencv_mat mat, A
552579 sws_freeContext (sws);
553580 }
554581
582+ // Apply orientation transformation if needed
583+ if (needs_transformation) {
584+ cv::OrientationTransform (int (orientation), tempMat);
585+
586+ // Verify dimensions match after transformation
587+ if (tempMat.cols != cvMat->cols || tempMat.rows != cvMat->rows ) {
588+ return -1 ;
589+ }
590+
591+ // Copy the transformed image to the output Mat, respecting stride
592+ opencv_mat_copy_with_stride (&tempMat, cvMat);
593+ }
594+
595+ return res;
596+ }
597+
598+ static int avcodec_decoder_copy_frame (const avcodec_decoder d, opencv_mat mat, AVFrame* frame)
599+ {
600+ int res = avcodec_receive_frame (d->codec , frame);
601+ if (res >= 0 ) {
602+ return avcodec_decoder_convert_frame (d, mat, frame);
603+ }
555604 return res;
556605}
557606
@@ -573,12 +622,106 @@ static int avcodec_decoder_decode_packet(const avcodec_decoder d, opencv_mat mat
573622 return res;
574623}
575624
576- bool avcodec_decoder_decode (const avcodec_decoder d, opencv_mat mat)
625+ // Helper function to check if a frame should be extracted and process it
626+ // Returns true if the frame was processed and should be returned
627+ static bool avcodec_decoder_process_sampled_frame (avcodec_decoder d,
628+ opencv_mat mat,
629+ AVFrame* frame,
630+ AVStream* video_stream,
631+ int * out_result)
632+ {
633+ double frame_time = -1.0 ;
634+ if (frame->pts != AV_NOPTS_VALUE) {
635+ frame_time = frame->pts * av_q2d (video_stream->time_base );
636+ }
637+
638+ // Check if this frame should be extracted based on sampling interval
639+ if (frame_time >= 0 && frame_time >= d->next_frame_time ) {
640+ // Calculate frame delay for animation
641+ if (d->last_extracted_pts >= 0 ) {
642+ double delay_seconds = frame_time - d->last_extracted_pts ;
643+ int delay_ms = (int )(delay_seconds * 1000.0 );
644+ // Validate delay is reasonable (between 1ms and 60 seconds)
645+ // Use sample interval if delay is out of bounds
646+ if (delay_ms > 0 && delay_ms <= 60000 ) {
647+ d->frame_delay_ms = delay_ms;
648+ } else {
649+ d->frame_delay_ms = (int )(d->frame_sample_interval * 1000.0 );
650+ }
651+ } else {
652+ d->frame_delay_ms = (int )(d->frame_sample_interval * 1000.0 );
653+ }
654+
655+ d->last_extracted_pts = frame_time;
656+ d->next_frame_time = frame_time + d->frame_sample_interval ;
657+
658+ // Convert frame to output mat
659+ *out_result = avcodec_decoder_convert_frame (d, mat, frame);
660+ return true ;
661+ }
662+
663+ return false ;
664+ }
665+
666+ bool avcodec_decoder_decode (avcodec_decoder d, opencv_mat mat)
577667{
578668 if (!d || !d->container || !d->codec || !mat) {
579669 return false ;
580670 }
671+
581672 AVPacket packet;
673+ AVStream* video_stream = d->container ->streams [d->video_stream_index ];
674+
675+ // If we're in multi-frame mode, we need to sample frames based on time
676+ if (d->multi_frame_mode ) {
677+ AVFrame* frame = av_frame_alloc ();
678+ if (!frame) {
679+ return false ;
680+ }
681+
682+ while (true ) {
683+ int res = av_read_frame (d->container , &packet);
684+ if (res < 0 ) {
685+ // Reached EOF - flush remaining frames from the decoder
686+ avcodec_send_packet (d->codec , NULL );
687+ while (avcodec_receive_frame (d->codec , frame) == 0 ) {
688+ int convert_result;
689+ if (avcodec_decoder_process_sampled_frame (d, mat, frame, video_stream, &convert_result)) {
690+ av_frame_free (&frame);
691+ return (convert_result >= 0 );
692+ }
693+ av_frame_unref (frame);
694+ }
695+
696+ // No more frames available
697+ av_frame_free (&frame);
698+ return false ;
699+ }
700+
701+ if (packet.stream_index != d->video_stream_index ) {
702+ av_packet_unref (&packet);
703+ continue ;
704+ }
705+
706+ res = avcodec_send_packet (d->codec , &packet);
707+ av_packet_unref (&packet);
708+
709+ if (res < 0 ) {
710+ continue ;
711+ }
712+
713+ while (avcodec_receive_frame (d->codec , frame) == 0 ) {
714+ int convert_result;
715+ if (avcodec_decoder_process_sampled_frame (d, mat, frame, video_stream, &convert_result)) {
716+ av_frame_free (&frame);
717+ return (convert_result >= 0 );
718+ }
719+ av_frame_unref (frame);
720+ }
721+ }
722+ }
723+
724+ // Single-frame mode: just decode the first video frame
582725 bool done = false ;
583726 bool success = false ;
584727 while (!done) {
@@ -601,6 +744,26 @@ bool avcodec_decoder_decode(const avcodec_decoder d, opencv_mat mat)
601744 return success;
602745}
603746
747+ void avcodec_decoder_set_frame_sample_interval (avcodec_decoder d, float interval_seconds)
748+ {
749+ if (!d) {
750+ return ;
751+ }
752+ d->frame_sample_interval = interval_seconds;
753+ d->next_frame_time = 0.0 ;
754+ d->last_extracted_pts = -1.0 ;
755+ d->frame_delay_ms = 0 ;
756+ d->multi_frame_mode = (interval_seconds > 0.0 );
757+ }
758+
759+ int avcodec_decoder_get_frame_delay_ms (const avcodec_decoder d)
760+ {
761+ if (!d) {
762+ return 0 ;
763+ }
764+ return d->frame_delay_ms ;
765+ }
766+
604767void avcodec_decoder_release (avcodec_decoder d)
605768{
606769 if (d->codec ) {
0 commit comments