From 3f8781db69cb49475f59808b60e6f3e66ffbca39 Mon Sep 17 00:00:00 2001 From: Ivan Savenko Date: Fri, 3 May 2024 22:22:57 +0300 Subject: [PATCH] Fixed sound extraction, slightly better approach to ffmpeg --- client/CMT.cpp | 11 -- client/media/CVideoHandler.cpp | 233 ++++++++++++++++++++++----------- client/media/CVideoHandler.h | 24 ++-- client/media/IVideoPlayer.h | 6 +- client/widgets/VideoWidget.cpp | 4 +- 5 files changed, 176 insertions(+), 102 deletions(-) diff --git a/client/CMT.cpp b/client/CMT.cpp index 859da2dce..d3aeaed0a 100644 --- a/client/CMT.cpp +++ b/client/CMT.cpp @@ -396,20 +396,9 @@ int main(int argc, char * argv[]) //plays intro, ends when intro is over or button has been pressed (handles events) void playIntro() { - auto audioData = CCS->videoh->getAudio(VideoPath::builtin("3DOLOGO.SMK")); - int sound = CCS->soundh->playSound(audioData); if(CCS->videoh->playIntroVideo(VideoPath::builtin("3DOLOGO.SMK"))) - { - audioData = CCS->videoh->getAudio(VideoPath::builtin("NWCLOGO.SMK")); - sound = CCS->soundh->playSound(audioData); if (CCS->videoh->playIntroVideo(VideoPath::builtin("NWCLOGO.SMK"))) - { - audioData = CCS->videoh->getAudio(VideoPath::builtin("H3INTRO.SMK")); - sound = CCS->soundh->playSound(audioData); CCS->videoh->playIntroVideo(VideoPath::builtin("H3INTRO.SMK")); - } - } - CCS->soundh->stopSound(sound); } static void mainLoop() diff --git a/client/media/CVideoHandler.cpp b/client/media/CVideoHandler.cpp index 7ea3c0a2c..ec664ae5b 100644 --- a/client/media/CVideoHandler.cpp +++ b/client/media/CVideoHandler.cpp @@ -12,6 +12,9 @@ #ifndef DISABLE_VIDEO +#include "ISoundPlayer.h" + +#include "../CGameInfo.h" #include "../CMT.h" #include "../CPlayerInterface.h" #include "../eventsSDL/InputHandler.h" @@ -81,12 +84,16 @@ static std::unique_ptr findVideoData(const VideoPath & videoToOpen void CVideoInstance::open(const VideoPath & videoToOpen) { - state.videoData = findVideoData(videoToOpen); + input = findVideoData(videoToOpen); +} +void CVideoInstance::openContext(FFMpegStreamState & state) +{ static const int BUFFER_SIZE = 4096; + input->seek(0); auto * buffer = static_cast(av_malloc(BUFFER_SIZE)); // will be freed by ffmpeg - state.context = avio_alloc_context(buffer, BUFFER_SIZE, 0, state.videoData.get(), lodRead, nullptr, lodSeek); + state.context = avio_alloc_context(buffer, BUFFER_SIZE, 0, input.get(), lodRead, nullptr, lodSeek); state.formatContext = avformat_alloc_context(); state.formatContext->pb = state.context; @@ -101,52 +108,55 @@ void CVideoInstance::open(const VideoPath & videoToOpen) if(avfopen < 0) throwFFmpegError(findStreamInfo); - - for(int i = 0; i < state.formatContext->nb_streams; i++) - { - if(state.formatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO && video.streamIndex == -1) - { - openStream(video, i); - } - - if(state.formatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO && audio.streamIndex == -1) - openStream(audio, i); - } } -void CVideoInstance::openStream(FFMpegStreamState & streamState, int streamIndex) +void CVideoInstance::openCodec(FFMpegStreamState & state, int streamIndex) { - streamState.streamIndex = streamIndex; + state.streamIndex = streamIndex; // Find the decoder for the stream - streamState.codec = avcodec_find_decoder(state.formatContext->streams[streamIndex]->codecpar->codec_id); + state.codec = avcodec_find_decoder(state.formatContext->streams[streamIndex]->codecpar->codec_id); - if(streamState.codec == nullptr) + if(state.codec == nullptr) throw std::runtime_error("Unsupported codec"); - streamState.codecContext = avcodec_alloc_context3(streamState.codec); - if(streamState.codecContext == nullptr) + state.codecContext = avcodec_alloc_context3(state.codec); + if(state.codecContext == nullptr) throw std::runtime_error("Failed to create codec context"); // Get a pointer to the codec context for the video stream - int ret = avcodec_parameters_to_context(streamState.codecContext, state.formatContext->streams[streamIndex]->codecpar); + int ret = avcodec_parameters_to_context(state.codecContext, state.formatContext->streams[streamIndex]->codecpar); if(ret < 0) { //We cannot get codec from parameters - avcodec_free_context(&streamState.codecContext); + avcodec_free_context(&state.codecContext); throwFFmpegError(ret); } // Open codec - ret = avcodec_open2(streamState.codecContext, streamState.codec, nullptr); + ret = avcodec_open2(state.codecContext, state.codec, nullptr); if(ret < 0) { // Could not open codec - streamState.codec = nullptr; + state.codec = nullptr; throwFFmpegError(ret); } } +void CVideoInstance::openVideo() +{ + openContext(video); + + for(int i = 0; i < video.formatContext->nb_streams; i++) + { + if(video.formatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) + { + openCodec(video, i); + return; + } + } +} + void CVideoInstance::prepareOutput(bool scaleToScreenSize, bool useTextureOutput) { if (video.streamIndex == -1) @@ -204,7 +214,7 @@ bool CVideoInstance::nextFrame() for(;;) { - int ret = av_read_frame(state.formatContext, &packet); + int ret = av_read_frame(video.formatContext, &packet); if(ret < 0) { if(ret == AVERROR_EOF) @@ -218,11 +228,11 @@ bool CVideoInstance::nextFrame() // Decode video frame int rc = avcodec_send_packet(video.codecContext, &packet); if(rc < 0) - throwFFmpegError(ret); + throwFFmpegError(rc); rc = avcodec_receive_frame(video.codecContext, output.frame); if(rc < 0) - throwFFmpegError(ret); + throwFFmpegError(rc); uint8_t * data[4] = {}; int linesize[4] = {}; @@ -276,22 +286,25 @@ void CVideoInstance::close() SDL_DestroyTexture(output.textureRGB); SDL_FreeSurface(output.surface); + closeState(video); +} + +void CVideoInstance::closeState(FFMpegStreamState & streamState) +{ // state.videoStream.codec??? // state.audioStream.codec??? avcodec_close(video.codecContext); avcodec_free_context(&video.codecContext); - avcodec_close(audio.codecContext); - avcodec_free_context(&audio.codecContext); + avcodec_close(video.codecContext); + avcodec_free_context(&video.codecContext); - avformat_close_input(&state.formatContext); - av_free(state.context); + avformat_close_input(&video.formatContext); + av_free(video.context); output = FFMpegVideoOutput(); video = FFMpegStreamState(); - audio = FFMpegStreamState(); - state = FFMpegFileState(); } CVideoInstance::~CVideoInstance() @@ -328,7 +341,7 @@ void CVideoInstance::tick(uint32_t msPassed) # else auto packet_duration = frame->duration; # endif - double frameEndTime = (output.frame->pts + packet_duration) * av_q2d(state.formatContext->streams[video.streamIndex]->time_base); + double frameEndTime = (output.frame->pts + packet_duration) * av_q2d(video.formatContext->streams[video.streamIndex]->time_base); output.frameTime += msPassed / 1000.0; if(output.frameTime >= frameEndTime) @@ -338,44 +351,118 @@ void CVideoInstance::tick(uint32_t msPassed) } } -# if 0 - -std::pair, si64> CVideoPlayer::getAudio(const VideoPath & videoToOpen) +static int32_t sampleSizeBytes(int audioFormat) { - std::pair, si64> dat(std::make_pair(nullptr, 0)); - - FFMpegFileState audio; - openVideoFile(audio, videoToOpen); - - if (audio.audioStream.streamIndex < 0) + switch (audioFormat) { - closeVideoFile(audio); - return { nullptr, 0}; + case AV_SAMPLE_FMT_U8: ///< unsigned 8 bits + case AV_SAMPLE_FMT_U8P: ///< unsigned 8 bits, planar + return 1; + case AV_SAMPLE_FMT_S16: ///< signed 16 bits + case AV_SAMPLE_FMT_S16P: ///< signed 16 bits, planar + return 2; + case AV_SAMPLE_FMT_S32: ///< signed 32 bits + case AV_SAMPLE_FMT_S32P: ///< signed 32 bits, planar + case AV_SAMPLE_FMT_FLT: ///< float + case AV_SAMPLE_FMT_FLTP: ///< float, planar + return 4; + case AV_SAMPLE_FMT_DBL: ///< double + case AV_SAMPLE_FMT_DBLP: ///< double, planar + case AV_SAMPLE_FMT_S64: ///< signed 64 bits + case AV_SAMPLE_FMT_S64P: ///< signed 64 bits, planar + return 8; + } + throw std::runtime_error("Invalid audio format"); +} + +static int32_t sampleWavType(int audioFormat) +{ + switch (audioFormat) + { + case AV_SAMPLE_FMT_U8: ///< unsigned 8 bits + case AV_SAMPLE_FMT_U8P: ///< unsigned 8 bits, planar + case AV_SAMPLE_FMT_S16: ///< signed 16 bits + case AV_SAMPLE_FMT_S16P: ///< signed 16 bits, planar + case AV_SAMPLE_FMT_S32: ///< signed 32 bits + case AV_SAMPLE_FMT_S32P: ///< signed 32 bits, planar + case AV_SAMPLE_FMT_S64: ///< signed 64 bits + case AV_SAMPLE_FMT_S64P: ///< signed 64 bits, planar + return 1; // PCM + + case AV_SAMPLE_FMT_FLT: ///< float + case AV_SAMPLE_FMT_FLTP: ///< float, planar + case AV_SAMPLE_FMT_DBL: ///< double + case AV_SAMPLE_FMT_DBLP: ///< double, planar + return 3; // IEEE float + } + throw std::runtime_error("Invalid audio format"); +} + +void CVideoInstance::playAudio() +{ + FFMpegStreamState audio; + + openContext(audio); + + for(int i = 0; i < audio.formatContext->nb_streams; i++) + { + if(audio.formatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) + { + openCodec(audio, i); + break; + } } - // Open codec + std::pair, si64> dat(std::make_pair(nullptr, 0)); + + if (audio.streamIndex < 0) + return; // nothing to play + + const auto * codecpar = audio.formatContext->streams[audio.streamIndex]->codecpar; AVFrame *frameAudio = av_frame_alloc(); - + AVFrame *frameVideo = av_frame_alloc(); AVPacket packet; std::vector samples; + int32_t sampleSize = sampleSizeBytes(codecpar->format); + + samples.reserve(44100 * 5); // arbitrary 5-second buffer + while (av_read_frame(audio.formatContext, &packet) >= 0) { - if(packet.stream_index == audio.audioStream.streamIndex) + if (packet.stream_index == video.streamIndex) { - int rc = avcodec_send_packet(audio.audioStream.codecContext, &packet); - if (rc >= 0) - packet.size = 0; - rc = avcodec_receive_frame(audio.audioStream.codecContext, frameAudio); - int bytesToRead = (frameAudio->nb_samples * 2 * (audio.formatContext->streams[audio.audioStream.streamIndex]->codecpar->bits_per_coded_sample / 8)); - if (rc >= 0) - for (int s = 0; s < bytesToRead; s += sizeof(ui8)) - { - ui8 value; - memcpy(&value, &frameAudio->data[0][s], sizeof(ui8)); - samples.push_back(value); - } + // Decode video frame + int rc = avcodec_send_packet(video.codecContext, &packet); + if(rc < 0) + throwFFmpegError(rc); + + rc = avcodec_receive_frame(video.codecContext, frameVideo); + if(rc < 0) + throwFFmpegError(rc); + } + + if(packet.stream_index == audio.streamIndex) + { + int rc = avcodec_send_packet(audio.codecContext, &packet); + + if(rc < 0) + throwFFmpegError(rc); + + for (;;) + { + rc = avcodec_receive_frame(audio.codecContext, frameAudio); + if (rc == AVERROR(EAGAIN)) + break; + + if(rc < 0) + throwFFmpegError(rc); + + int bytesToRead = frameAudio->nb_samples * 2 * sampleSize; + + samples.insert(samples.end(), frameAudio->data[0], frameAudio->data[0] + bytesToRead); + } } av_packet_unref(&packet); } @@ -391,16 +478,19 @@ std::pair, si64> CVideoPlayer::getAudio(const VideoPath ui32 SamplesPerSec = 22050; ui32 bytesPerSec = 22050 * 2; ui16 blockAlign = 2; - ui16 bitsPerSample = 16; + ui16 bitsPerSample = 32; ui8 Subchunk2ID[4] = {'d', 'a', 't', 'a'}; ui32 Subchunk2Size; } wav_hdr; wav_hdr wav; wav.ChunkSize = samples.size() + sizeof(wav_hdr) - 8; - wav.Subchunk2Size = samples.size() + sizeof(wav_hdr) - 44; - wav.SamplesPerSec = audio.formatContext->streams[audio.audioStream.streamIndex]->codecpar->sample_rate; - wav.bitsPerSample = audio.formatContext->streams[audio.audioStream.streamIndex]->codecpar->bits_per_coded_sample; + wav.AudioFormat = sampleWavType(codecpar->format); + wav.NumOfChan = codecpar->channels; + wav.SamplesPerSec = codecpar->sample_rate; + wav.bytesPerSec = codecpar->sample_rate * sampleSize; + wav.bitsPerSample = sampleSize * 8; + wav.Subchunk2Size = samples.size() + sizeof(wav_hdr) - 44; auto wavPtr = reinterpret_cast(&wav); dat = std::make_pair(std::make_unique(samples.size() + sizeof(wav_hdr)), samples.size() + sizeof(wav_hdr)); @@ -410,18 +500,17 @@ std::pair, si64> CVideoPlayer::getAudio(const VideoPath if (frameAudio) av_frame_free(&frameAudio); - closeVideoFile(audio); - - return dat; + CCS->soundh->playSound(dat); + closeState(audio); } -# endif - bool CVideoPlayer::openAndPlayVideoImpl(const VideoPath & name, const Point & position, bool useOverlay, bool scale, bool stopOnKey) { CVideoInstance instance; instance.open(name); + instance.playAudio(); + instance.openVideo(); instance.prepareOutput(scale, useOverlay); auto lastTimePoint = boost::chrono::steady_clock::now(); @@ -460,7 +549,7 @@ bool CVideoPlayer::openAndPlayVideoImpl(const VideoPath & name, const Point & po #endif // Framerate delay - double targetFrameTimeSeconds = packet_duration * av_q2d(instance.state.formatContext->streams[instance.video.streamIndex]->time_base); + double targetFrameTimeSeconds = packet_duration * av_q2d(instance.video.formatContext->streams[instance.video.streamIndex]->time_base); auto targetFrameTime = boost::chrono::milliseconds(static_cast(1000 * (targetFrameTimeSeconds))); auto timePointAfterPresent = boost::chrono::steady_clock::now(); @@ -489,14 +578,10 @@ std::unique_ptr CVideoPlayer::open(const VideoPath & name, bool auto result = std::make_unique(); result->open(name); + result->openVideo(); result->prepareOutput(scaleToScreen, false); return result; } -std::pair, si64> CVideoPlayer::getAudio(const VideoPath & videoToOpen) -{ - return {nullptr, 0}; -} - #endif diff --git a/client/media/CVideoHandler.h b/client/media/CVideoHandler.h index 9a578260c..bfb8e865b 100644 --- a/client/media/CVideoHandler.h +++ b/client/media/CVideoHandler.h @@ -29,16 +29,12 @@ VCMI_LIB_NAMESPACE_END struct FFMpegStreamState { - int streamIndex = -1; - const AVCodec * codec = nullptr; - AVCodecContext * codecContext = nullptr; -}; - -struct FFMpegFileState -{ - std::unique_ptr videoData; AVIOContext * context = nullptr; AVFormatContext * formatContext = nullptr; + + const AVCodec * codec = nullptr; + AVCodecContext * codecContext = nullptr; + int streamIndex = -1; }; struct FFMpegVideoOutput @@ -59,16 +55,20 @@ class CVideoInstance final : public IVideoInstance { friend class CVideoPlayer; - FFMpegFileState state; + std::unique_ptr input; + FFMpegStreamState video; - FFMpegStreamState audio; FFMpegVideoOutput output; void open(const VideoPath & fname); - void openStream(FFMpegStreamState & streamState, int streamIndex); + void openContext(FFMpegStreamState & streamState); + void openCodec(FFMpegStreamState & streamState, int streamIndex); + void openVideo(); void prepareOutput(bool scaleToScreenSize, bool useTextureOutput); + bool nextFrame(); void close(); + void closeState(FFMpegStreamState & streamState); public: ~CVideoInstance(); @@ -78,6 +78,7 @@ public: void show(const Point & position, Canvas & canvas) final; void tick(uint32_t msPassed) final; + void playAudio() final; }; class CVideoPlayer final : public IVideoPlayer @@ -89,7 +90,6 @@ public: bool playIntroVideo(const VideoPath & name) final; void playSpellbookAnimation(const VideoPath & name, const Point & position) final; std::unique_ptr open(const VideoPath & name, bool scaleToScreen) final; - std::pair, si64> getAudio(const VideoPath & videoToOpen) final; }; #endif diff --git a/client/media/IVideoPlayer.h b/client/media/IVideoPlayer.h index 4df144f4c..a0c1e6dc7 100644 --- a/client/media/IVideoPlayer.h +++ b/client/media/IVideoPlayer.h @@ -32,6 +32,9 @@ public: /// Advances video playback by specified duration virtual void tick(uint32_t msPassed) = 0; + /// Attempts to start audio playback from video, if any exists + virtual void playAudio() = 0; + virtual ~IVideoInstance() = default; }; @@ -47,8 +50,5 @@ public: /// Load video from specified path. Returns nullptr on failure virtual std::unique_ptr open(const VideoPath & name, bool scaleToScreen) = 0; - /// Extracts audio data from provided video in wav format. Return nullptr on failure - virtual std::pair, si64> getAudio(const VideoPath & videoToOpen) = 0; - virtual ~IVideoPlayer() = default; }; diff --git a/client/widgets/VideoWidget.cpp b/client/widgets/VideoWidget.cpp index 380c4f467..76627be5e 100644 --- a/client/widgets/VideoWidget.cpp +++ b/client/widgets/VideoWidget.cpp @@ -44,8 +44,8 @@ void VideoWidget::show(Canvas & to) void VideoWidget::activate() { - auto audioData = CCS->videoh->getAudio(current); - videoSoundHandle = CCS->soundh->playSound(audioData, -1); + if(videoInstance) + videoInstance->playAudio(); if(videoSoundHandle != -1) {