@@ -49,13 +49,6 @@ struct avcodec_decoder_struct {
4949 AVCodecContext* codec;
5050 AVIOContext* avio;
5151 int video_stream_index;
52-
53- // Multi-frame extraction state
54- int frame_sample_interval_ms; // Interval between frames in milliseconds
55- double next_frame_time; // Next frame time to extract
56- double last_extracted_pts; // Last extracted frame PTS
57- int frame_delay_ms; // Delay for current frame in milliseconds
58- bool multi_frame_mode; // Whether we're extracting multiple frames
5952};
6053
6154static int avcodec_decoder_read_callback (void * d_void, uint8_t * buf, int buf_size)
@@ -161,9 +154,7 @@ bool avcodec_decoder_is_streamable(const opencv_mat mat)
161154 return false ;
162155}
163156
164- avcodec_decoder avcodec_decoder_create (const opencv_mat buf,
165- const bool hevc_enabled,
166- const bool av1_enabled)
157+ avcodec_decoder avcodec_decoder_create (const opencv_mat buf, const bool hevc_enabled, const bool av1_enabled)
167158{
168159 avcodec_decoder d = new struct avcodec_decoder_struct ();
169160 memset (d, 0 , sizeof (struct avcodec_decoder_struct ));
@@ -295,7 +286,7 @@ int avcodec_decoder_get_icc(const avcodec_decoder d, void* dest, size_t dest_len
295286 if (!d || !d->codec ) {
296287 return -1 ;
297288 }
298-
289+
299290 const uint8_t * profile_data = avcodec_get_icc_profile (d->codec ->color_primaries , profile_size);
300291
301292 if (profile_size > dest_len) {
@@ -422,7 +413,7 @@ const char* avcodec_decoder_get_video_codec(const avcodec_decoder d)
422413 if (!d || !d->codec ) {
423414 return " Unknown" ;
424415 }
425-
416+
426417 switch (d->codec ->codec_id ) {
427418 case AV_CODEC_ID_H264:
428419 return " H264" ;
@@ -446,7 +437,7 @@ const char* avcodec_decoder_get_audio_codec(const avcodec_decoder d)
446437 if (!d || !d->container ) {
447438 return " Unknown" ;
448439 }
449-
440+
450441 for (unsigned int i = 0 ; i < d->container ->nb_streams ; i++) {
451442 AVStream* stream = d->container ->streams [i];
452443 if (stream->codecpar ->codec_type == AVMEDIA_TYPE_AUDIO) {
@@ -466,7 +457,7 @@ const char* avcodec_decoder_get_audio_codec(const avcodec_decoder d)
466457 }
467458 }
468459 }
469-
460+
470461 return " Unknown" ;
471462}
472463
@@ -481,59 +472,40 @@ bool avcodec_decoder_has_subtitles(const avcodec_decoder d)
481472 return false ;
482473}
483474
484- static int avcodec_decoder_convert_frame (const avcodec_decoder d, opencv_mat mat, AVFrame* frame)
475+ static int avcodec_decoder_copy_frame (const avcodec_decoder d, opencv_mat mat, AVFrame* frame)
485476{
486477 if (!d || !d->codec || !d->codec ->codec || !mat || !frame) {
487478 return -1 ;
488479 }
489-
480+
490481 auto cvMat = static_cast <cv::Mat*>(mat);
491482 if (!cvMat) {
492483 return -1 ;
493484 }
494485
495- // Check if rotation/transformation is needed
496- CVImageOrientation orientation = (CVImageOrientation)avcodec_decoder_get_orientation (d);
497- bool needs_transformation = (orientation != CV_IMAGE_ORIENTATION_TL);
498- bool dimensions_swapped = (orientation == CV_IMAGE_ORIENTATION_RT || orientation == CV_IMAGE_ORIENTATION_LB);
499-
500- int res = 0 ;
501- cv::Mat tempMat;
502- cv::Mat* decodeDst = cvMat;
503-
504- // If transformation is needed, decode to a temporary buffer with raw dimensions
505- if (needs_transformation) {
506- // For 90/270 rotation, create temp Mat with swapped dimensions
507- // For other transformations, use same dimensions as output
508- if (dimensions_swapped) {
509- tempMat = cv::Mat (frame->height , frame->width , CV_8UC4);
510- } else {
511- tempMat = cv::Mat (cvMat->rows , cvMat->cols , CV_8UC4);
486+ int res = avcodec_receive_frame (d->codec , frame);
487+ if (res >= 0 ) {
488+ // Calculate the step size based on the cv::Mat's width
489+ int stepSize =
490+ 4 * cvMat->cols ; // Assuming the cv::Mat is in BGRA format, which has 4 channels
491+ if (cvMat->cols % 32 != 0 ) {
492+ int width = cvMat->cols + 32 - (cvMat->cols % 32 );
493+ stepSize = 4 * width;
512494 }
513- decodeDst = &tempMat;
514- }
515-
516- {
517- // Use the decode destination's actual step (stride)
518- int stepSize = decodeDst->step ;
519-
520- // Validate that the stride and height are within the allocated buffer bounds
521- size_t required_size = stepSize * decodeDst->rows ;
522- size_t available_size =
523- (decodeDst->datalimit && decodeDst->data ) ? (decodeDst->datalimit - decodeDst->data ) : 0 ;
524- if (available_size > 0 && required_size > available_size) {
495+ if (!opencv_mat_set_row_stride (mat, stepSize)) {
525496 return -1 ;
526497 }
527498
528- // Create SwsContext for converting the frame format
499+ // Create SwsContext for converting the frame format and scaling
529500 struct SwsContext * sws =
530501 sws_getContext (frame->width ,
531502 frame->height ,
532503 (AVPixelFormat)(frame->format ), // Source dimensions and format
533- decodeDst->cols ,
534- decodeDst->rows ,
535- AV_PIX_FMT_BGRA, // Destination format
536- SWS_BILINEAR,
504+ cvMat->cols ,
505+ cvMat->rows ,
506+ AV_PIX_FMT_BGRA, // Destination dimensions and format
507+ SWS_BILINEAR, // Specify the scaling algorithm; you can choose another
508+ // according to your needs
537509 NULL ,
538510 NULL ,
539511 NULL );
@@ -569,8 +541,9 @@ static int avcodec_decoder_convert_frame(const avcodec_decoder d, opencv_mat mat
569541 sws_setColorspaceDetails (sws, inv_table, srcRange, table, 1 , 0 , 1 << 16 , 1 << 16 );
570542
571543 // The linesizes and data pointers for the destination
572- int dstLinesizes[4 ] = {stepSize, 0 , 0 , 0 };
573- uint8_t * dstData[4 ] = {decodeDst->data , NULL , NULL , NULL };
544+ int dstLinesizes[4 ];
545+ av_image_fill_linesizes (dstLinesizes, AV_PIX_FMT_BGRA, stepSize / 4 );
546+ uint8_t * dstData[4 ] = {cvMat->data , NULL , NULL , NULL };
574547
575548 // Perform the scaling and format conversion
576549 sws_scale (sws, frame->data , frame->linesize , 0 , frame->height , dstData, dstLinesizes);
@@ -579,28 +552,6 @@ static int avcodec_decoder_convert_frame(const avcodec_decoder d, opencv_mat mat
579552 sws_freeContext (sws);
580553 }
581554
582- // Apply orientation transformation if needed
583- if (needs_transformation) {
584- cv::OrientationTransform (int (orientation), tempMat);
585-
586- // Verify dimensions match after transformation
587- if (tempMat.cols != cvMat->cols || tempMat.rows != cvMat->rows ) {
588- return -1 ;
589- }
590-
591- // Copy the transformed image to the output Mat, respecting stride
592- opencv_mat_copy_with_stride (&tempMat, cvMat);
593- }
594-
595- return res;
596- }
597-
598- static int avcodec_decoder_copy_frame (const avcodec_decoder d, opencv_mat mat, AVFrame* frame)
599- {
600- int res = avcodec_receive_frame (d->codec , frame);
601- if (res >= 0 ) {
602- return avcodec_decoder_convert_frame (d, mat, frame);
603- }
604555 return res;
605556}
606557
@@ -622,106 +573,12 @@ static int avcodec_decoder_decode_packet(const avcodec_decoder d, opencv_mat mat
622573 return res;
623574}
624575
625- // Helper function to check if a frame should be extracted and process it
626- // Returns true if the frame was processed and should be returned
627- static bool avcodec_decoder_process_sampled_frame (avcodec_decoder d,
628- opencv_mat mat,
629- AVFrame* frame,
630- AVStream* video_stream,
631- int * out_result)
632- {
633- double frame_time = -1.0 ;
634- if (frame->pts != AV_NOPTS_VALUE) {
635- frame_time = frame->pts * av_q2d (video_stream->time_base );
636- }
637-
638- // Check if this frame should be extracted based on sampling interval
639- if (frame_time >= 0 && frame_time >= d->next_frame_time ) {
640- // Calculate frame delay for animation
641- if (d->last_extracted_pts >= 0 ) {
642- double delay_seconds = frame_time - d->last_extracted_pts ;
643- int delay_ms = (int )(delay_seconds * 1000.0 );
644- // Validate delay is reasonable (between 1ms and 60 seconds)
645- // Use sample interval if delay is out of bounds
646- if (delay_ms > 0 && delay_ms <= 60000 ) {
647- d->frame_delay_ms = delay_ms;
648- } else {
649- d->frame_delay_ms = (int )(d->frame_sample_interval_ms );
650- }
651- } else {
652- d->frame_delay_ms = (int )(d->frame_sample_interval_ms );
653- }
654-
655- d->last_extracted_pts = frame_time;
656- d->next_frame_time = frame_time + (d->frame_sample_interval_ms / 1000.0 );
657-
658- // Convert frame to output mat
659- *out_result = avcodec_decoder_convert_frame (d, mat, frame);
660- return true ;
661- }
662-
663- return false ;
664- }
665-
666- bool avcodec_decoder_decode (avcodec_decoder d, opencv_mat mat)
576+ bool avcodec_decoder_decode (const avcodec_decoder d, opencv_mat mat)
667577{
668578 if (!d || !d->container || !d->codec || !mat) {
669579 return false ;
670580 }
671-
672581 AVPacket packet;
673- AVStream* video_stream = d->container ->streams [d->video_stream_index ];
674-
675- // If we're in multi-frame mode, we need to sample frames based on time
676- if (d->multi_frame_mode ) {
677- AVFrame* frame = av_frame_alloc ();
678- if (!frame) {
679- return false ;
680- }
681-
682- while (true ) {
683- int res = av_read_frame (d->container , &packet);
684- if (res < 0 ) {
685- // Reached EOF - flush remaining frames from the decoder
686- avcodec_send_packet (d->codec , NULL );
687- while (avcodec_receive_frame (d->codec , frame) == 0 ) {
688- int convert_result;
689- if (avcodec_decoder_process_sampled_frame (d, mat, frame, video_stream, &convert_result)) {
690- av_frame_free (&frame);
691- return (convert_result >= 0 );
692- }
693- av_frame_unref (frame);
694- }
695-
696- // No more frames available
697- av_frame_free (&frame);
698- return false ;
699- }
700-
701- if (packet.stream_index != d->video_stream_index ) {
702- av_packet_unref (&packet);
703- continue ;
704- }
705-
706- res = avcodec_send_packet (d->codec , &packet);
707- av_packet_unref (&packet);
708-
709- if (res < 0 ) {
710- continue ;
711- }
712-
713- while (avcodec_receive_frame (d->codec , frame) == 0 ) {
714- int convert_result;
715- if (avcodec_decoder_process_sampled_frame (d, mat, frame, video_stream, &convert_result)) {
716- av_frame_free (&frame);
717- return (convert_result >= 0 );
718- }
719- av_frame_unref (frame);
720- }
721- }
722- }
723-
724- // Single-frame mode: just decode the first video frame
725582 bool done = false ;
726583 bool success = false ;
727584 while (!done) {
@@ -744,26 +601,6 @@ bool avcodec_decoder_decode(avcodec_decoder d, opencv_mat mat)
744601 return success;
745602}
746603
747- void avcodec_decoder_set_frame_sample_interval_ms (avcodec_decoder d, int frame_sample_interval_ms)
748- {
749- if (!d) {
750- return ;
751- }
752- d->frame_sample_interval_ms = frame_sample_interval_ms;
753- d->next_frame_time = 0.0 ;
754- d->last_extracted_pts = -1.0 ;
755- d->frame_delay_ms = 0 ;
756- d->multi_frame_mode = (frame_sample_interval_ms > 0 );
757- }
758-
759- int avcodec_decoder_get_frame_delay_ms (const avcodec_decoder d)
760- {
761- if (!d) {
762- return 0 ;
763- }
764- return d->frame_delay_ms ;
765- }
766-
767604void avcodec_decoder_release (avcodec_decoder d)
768605{
769606 if (d->codec ) {
0 commit comments