From e497a5137310ccabd3fbdce592db295314202df9 Mon Sep 17 00:00:00 2001 From: Ivan Savenko Date: Tue, 1 Oct 2024 15:25:58 +0000 Subject: [PATCH] Use swresample (part of FFmpeg) to de-planarize audio into format that can be consumed by SDL --- CMakeLists.txt | 6 +-- client/media/CVideoHandler.cpp | 77 ++++++++++++++++++++++++++-------- 2 files changed, 60 insertions(+), 23 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a9918c0a3..31bd3b8f1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -486,11 +486,7 @@ if(NOT FORCE_BUNDLED_MINIZIP) endif() if (ENABLE_CLIENT) - set(FFMPEG_COMPONENTS avutil swscale avformat avcodec) - if(APPLE_IOS AND NOT USING_CONAN) - list(APPEND FFMPEG_COMPONENTS swresample) - endif() - find_package(ffmpeg COMPONENTS ${FFMPEG_COMPONENTS}) + find_package(ffmpeg COMPONENTS avutil swscale avformat avcodec swresample) find_package(SDL2 REQUIRED) find_package(SDL2_image REQUIRED) diff --git a/client/media/CVideoHandler.cpp b/client/media/CVideoHandler.cpp index 51e17ff10..9428c7428 100644 --- a/client/media/CVideoHandler.cpp +++ b/client/media/CVideoHandler.cpp @@ -33,7 +33,9 @@ extern "C" { #include #include #include +#include #include +#include } // Define a set of functions to read data @@ -501,32 +503,71 @@ std::pair, si64> CAudioInstance::extractAudio(const Vide int numChannels = codecpar->ch_layout.nb_channels; #endif - samples.reserve(44100 * 5); // arbitrary 5-second buffer + samples.reserve(44100 * 5); // arbitrary 5-second buffer to reduce reallocations - for (;;) + if (formatProperties.isPlanar && numChannels > 1) { - decodeNextFrame(); - const AVFrame * frame = getCurrentFrame(); + // Format is 'planar', which is not supported by wav / SDL + // Use swresample part of ffmpeg to deplanarize audio into format supported by wav / SDL - if (!frame) - break; + auto sourceFormat = static_cast(codecpar->format); + auto targetFormat = av_get_alt_sample_fmt(sourceFormat, false); - int samplesToRead = frame->nb_samples * numChannels; - int bytesToRead = samplesToRead * formatProperties.sampleSizeBytes; + SwrContext * swr_ctx = swr_alloc(); - if (formatProperties.isPlanar && numChannels > 1) +#if (LIBAVUTIL_VERSION_MAJOR < 58) + av_opt_set_channel_layout(swr_ctx, "in_chlayout", codecpar->channel_layout, 0); + av_opt_set_channel_layout(swr_ctx, "out_chlayout", codecpar->channel_layout, 0); +#else + av_opt_set_chlayout(swr_ctx, "in_chlayout", &codecpar->ch_layout, 0); + av_opt_set_chlayout(swr_ctx, "out_chlayout", &codecpar->ch_layout, 0); +#endif + av_opt_set_int(swr_ctx, "in_sample_rate", codecpar->sample_rate, 0); + av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", sourceFormat, 0); + av_opt_set_int(swr_ctx, "out_sample_rate", codecpar->sample_rate, 0); + av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", targetFormat, 0); + + int initResult = swr_init(swr_ctx); + if (initResult < 0) + throwFFmpegError(initResult); + + std::vector frameSamplesBuffer; + for (;;) { - // Workaround for lack of resampler - // Currently, ffmpeg on conan systems is built without sws resampler - // Because of that, and because wav format does not supports 'planar' formats from ffmpeg - // we need to de-planarize it and convert to "normal" (non-planar / interleaved) stream - samples.reserve(samples.size() + bytesToRead); - for (int sm = 0; sm < frame->nb_samples; ++sm) - for (int ch = 0; ch < numChannels; ++ch) - samples.insert(samples.end(), frame->data[ch] + sm * formatProperties.sampleSizeBytes, frame->data[ch] + (sm+1) * formatProperties.sampleSizeBytes ); + decodeNextFrame(); + const AVFrame * frame = getCurrentFrame(); + + if (!frame) + break; + + size_t samplesToRead = frame->nb_samples * numChannels; + size_t bytesToRead = samplesToRead * formatProperties.sampleSizeBytes; + frameSamplesBuffer.resize(std::max(frameSamplesBuffer.size(), bytesToRead)); + uint8_t * frameSamplesPtr = frameSamplesBuffer.data(); + + int result = swr_convert(swr_ctx, &frameSamplesPtr, frame->nb_samples, (const uint8_t **)frame->data, frame->nb_samples); + + if (result < 0) + throwFFmpegError(result); + + size_t samplesToCopy = result * numChannels; + size_t bytesToCopy = samplesToCopy * formatProperties.sampleSizeBytes; + samples.insert(samples.end(), frameSamplesBuffer.begin(), frameSamplesBuffer.begin() + bytesToCopy); } - else + swr_free(&swr_ctx); + } + else + { + for (;;) { + decodeNextFrame(); + const AVFrame * frame = getCurrentFrame(); + + if (!frame) + break; + + size_t samplesToRead = frame->nb_samples * numChannels; + size_t bytesToRead = samplesToRead * formatProperties.sampleSizeBytes; samples.insert(samples.end(), frame->data[0], frame->data[0] + bytesToRead); } }