1 From 8ccad6937177b1b92e40ab8f4447ea27bac009a7 Mon Sep 17 00:00:00 2001
2 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= <lalinsky@gmail.com>
3 Date: Fri, 4 Nov 2022 21:47:38 +0100
4 Subject: [PATCH] Use FFmpeg 5.x (#120)
6 * Use FFmpeg 5.1.2 for CI builds
8 * Build on Ubuntu 20.04
10 * Upgrade code to FFmpeg 5.x APIs
12 * Only set FFmpeg include dirs if building tools
18 .github/workflows/build.yml | 6 +-
19 CMakeLists.txt | 16 --
20 package/build.sh | 4 +-
21 src/audio/ffmpeg_audio_processor.h | 2 -
22 src/audio/ffmpeg_audio_processor_avresample.h | 72 -------
23 src/audio/ffmpeg_audio_processor_swresample.h | 18 +-
24 src/audio/ffmpeg_audio_reader.h | 197 +++++++++---------
25 tests/CMakeLists.txt | 6 +
26 8 files changed, 122 insertions(+), 199 deletions(-)
27 delete mode 100644 src/audio/ffmpeg_audio_processor_avresample.h
29 diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
30 index 92761d9..baf67b7 100644
31 --- a/.github/workflows/build.yml
32 +++ b/.github/workflows/build.yml
37 - runs-on: ubuntu-18.04
38 + runs-on: ubuntu-20.04
42 @@ -50,7 +50,7 @@ jobs:
46 - runs-on: ubuntu-18.04
47 + runs-on: ubuntu-20.04
51 @@ -71,7 +71,7 @@ jobs:
55 - runs-on: ubuntu-18.04
56 + runs-on: ubuntu-20.04
60 diff --git a/CMakeLists.txt b/CMakeLists.txt
61 index f8d6a32..4da2405 100644
64 @@ -84,9 +84,6 @@ find_package(FFmpeg)
66 cmake_push_check_state(RESET)
67 set(CMAKE_REQUIRED_LIBRARIES ${FFMPEG_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} -lm)
68 - check_function_exists(av_packet_unref HAVE_AV_PACKET_UNREF)
69 - check_function_exists(av_frame_alloc HAVE_AV_FRAME_ALLOC)
70 - check_function_exists(av_frame_free HAVE_AV_FRAME_FREE)
71 cmake_pop_check_state()
74 @@ -163,14 +160,11 @@ message(STATUS "Using ${FFT_LIB} for FFT calculations")
75 if(NOT AUDIO_PROCESSOR_LIB)
76 if(FFMPEG_LIBSWRESAMPLE_FOUND)
77 set(AUDIO_PROCESSOR_LIB "swresample")
78 - elseif(FFMPEG_LIBAVRESAMPLE_FOUND)
79 - set(AUDIO_PROCESSOR_LIB "avresample")
83 if(AUDIO_PROCESSOR_LIB STREQUAL "swresample")
84 if(FFMPEG_LIBSWRESAMPLE_FOUND)
85 - set(USE_AVRESAMPLE OFF)
86 set(USE_SWRESAMPLE ON)
87 set(AUDIO_PROCESSOR_LIBRARIES ${FFMPEG_LIBSWRESAMPLE_LIBRARIES})
88 set(AUDIO_PROCESSOR_INCLUDE_DIRS ${FFMPEG_LIBSWRESAMPLE_INCLUDE_DIRS})
89 @@ -178,16 +172,6 @@ if(AUDIO_PROCESSOR_LIB STREQUAL "swresample")
90 message(FATAL_ERROR "Selected ${AUDIO_PROCESSOR_LIB} for audio processing, but the library is not found")
92 message(STATUS "Using ${AUDIO_PROCESSOR_LIB} for audio conversion")
93 -elseif(AUDIO_PROCESSOR_LIB STREQUAL "avresample")
94 - if(FFMPEG_LIBAVRESAMPLE_FOUND)
95 - set(USE_AVRESAMPLE ON)
96 - set(USE_SWRESAMPLE OFF)
97 - set(AUDIO_PROCESSOR_LIBRARIES ${FFMPEG_LIBAVRESAMPLE_LIBRARIES})
98 - set(AUDIO_PROCESSOR_INCLUDE_DIRS ${FFMPEG_LIBAVRESAMPLE_INCLUDE_DIRS})
100 - message(FATAL_ERROR "Selected ${AUDIO_PROCESSOR_LIB} for audio processing, but the library is not found")
102 - message(STATUS "Using ${AUDIO_PROCESSOR_LIB} for audio conversion")
104 message(STATUS "Building without audio conversion support, please install FFmpeg with libswresample")
106 diff --git a/package/build.sh b/package/build.sh
107 index da631ae..b41d36e 100755
108 --- a/package/build.sh
109 +++ b/package/build.sh
110 @@ -7,8 +7,8 @@ set -eux
112 BASE_DIR=$(cd $(dirname $0)/.. && pwd)
114 -FFMPEG_VERSION=4.4.1
115 -FFMPEG_BUILD_TAG=v4.4.1-1
116 +FFMPEG_VERSION=5.1.2
117 +FFMPEG_BUILD_TAG=v${FFMPEG_VERSION}-1
119 TMP_BUILD_DIR=$BASE_DIR/$(mktemp -d build.XXXXXXXX)
120 trap 'rm -rf $TMP_BUILD_DIR' EXIT
121 diff --git a/src/audio/ffmpeg_audio_processor.h b/src/audio/ffmpeg_audio_processor.h
122 index 7628fc7..39f4f6d 100644
123 --- a/src/audio/ffmpeg_audio_processor.h
124 +++ b/src/audio/ffmpeg_audio_processor.h
127 #if defined(USE_SWRESAMPLE)
128 #include "audio/ffmpeg_audio_processor_swresample.h"
129 -#elif defined(USE_AVRESAMPLE)
130 -#include "audio/ffmpeg_audio_processor_avresample.h"
132 #error "no audio processing library"
134 diff --git a/src/audio/ffmpeg_audio_processor_avresample.h b/src/audio/ffmpeg_audio_processor_avresample.h
135 deleted file mode 100644
136 index bd85f92..0000000
137 --- a/src/audio/ffmpeg_audio_processor_avresample.h
140 -// Copyright (C) 2016 Lukas Lalinsky
141 -// Distributed under the MIT license, see the LICENSE file for details.
143 -#ifndef CHROMAPRINT_AUDIO_FFMPEG_AUDIO_PROCESSOR_AVRESAMPLE_H_
144 -#define CHROMAPRINT_AUDIO_FFMPEG_AUDIO_PROCESSOR_AVRESAMPLE_H_
147 -#include <libavresample/avresample.h>
150 -namespace chromaprint {
152 -class FFmpegAudioProcessor {
154 - FFmpegAudioProcessor() {
155 - m_resample_ctx = avresample_alloc_context();
158 - ~FFmpegAudioProcessor() {
159 - avresample_free(&m_resample_ctx);
162 - void SetCompatibleMode() {
163 - av_opt_set_int(m_resample_ctx, "filter_size", 16, 0);
164 - av_opt_set_int(m_resample_ctx, "phase_shift", 8, 0);
165 - av_opt_set_int(m_resample_ctx, "linear_interp", 1, 0);
166 - av_opt_set_double(m_resample_ctx, "cutoff", 0.8, 0);
169 - void SetInputChannelLayout(int64_t channel_layout) {
170 - av_opt_set_int(m_resample_ctx, "in_channel_layout", channel_layout, 0);
173 - void SetInputSampleFormat(AVSampleFormat sample_format) {
174 - av_opt_set_int(m_resample_ctx, "in_sample_fmt", sample_format, 0);
177 - void SetInputSampleRate(int sample_rate) {
178 - av_opt_set_int(m_resample_ctx, "in_sample_rate", sample_rate, 0);
181 - void SetOutputChannelLayout(int64_t channel_layout) {
182 - av_opt_set_int(m_resample_ctx, "out_channel_layout", channel_layout, 0);
185 - void SetOutputSampleFormat(AVSampleFormat sample_format) {
186 - av_opt_set_int(m_resample_ctx, "out_sample_fmt", sample_format, 0);
189 - void SetOutputSampleRate(int sample_rate) {
190 - av_opt_set_int(m_resample_ctx, "out_sample_fmt", sample_rate, 0);
194 - return avresample_open(m_resample_ctx);
197 - int Convert(uint8_t **out, int out_count, const uint8_t **in, int in_count) {
198 - return avresample_convert(m_resample_ctx, out, 0, out_count, (uint8_t **) in, 0, in_count);
201 - int Flush(uint8_t **out, int out_count) {
202 - return avresample_read(m_resample_ctx, out, out_count);
206 - AVAudioResampleContext *m_resample_ctx = nullptr;
209 -}; // namespace chromaprint
212 diff --git a/src/audio/ffmpeg_audio_processor_swresample.h b/src/audio/ffmpeg_audio_processor_swresample.h
213 index b86266b..b1d4bea 100644
214 --- a/src/audio/ffmpeg_audio_processor_swresample.h
215 +++ b/src/audio/ffmpeg_audio_processor_swresample.h
216 @@ -28,30 +28,28 @@ class FFmpegAudioProcessor {
217 av_opt_set_double(m_swr_ctx, "cutoff", 0.8, 0);
220 - void SetInputChannelLayout(int64_t channel_layout) {
221 - av_opt_set_int(m_swr_ctx, "icl", channel_layout, 0);
222 - av_opt_set_int(m_swr_ctx, "ich", av_get_channel_layout_nb_channels(channel_layout), 0);
223 + void SetInputChannelLayout(AVChannelLayout *channel_layout) {
224 + av_opt_set_int(m_swr_ctx, "in_channel_layout", channel_layout->u.mask, 0);
227 void SetInputSampleFormat(AVSampleFormat sample_format) {
228 - av_opt_set_int(m_swr_ctx, "isf", sample_format, 0);
229 + av_opt_set_sample_fmt(m_swr_ctx, "in_sample_fmt", sample_format, 0);
232 void SetInputSampleRate(int sample_rate) {
233 - av_opt_set_int(m_swr_ctx, "isr", sample_rate, 0);
234 + av_opt_set_int(m_swr_ctx, "in_sample_rate", sample_rate, 0);
237 - void SetOutputChannelLayout(int64_t channel_layout) {
238 - av_opt_set_int(m_swr_ctx, "ocl", channel_layout, 0);
239 - av_opt_set_int(m_swr_ctx, "och", av_get_channel_layout_nb_channels(channel_layout), 0);
240 + void SetOutputChannelLayout(AVChannelLayout *channel_layout) {
241 + av_opt_set_int(m_swr_ctx, "out_channel_layout", channel_layout->u.mask, 0);
244 void SetOutputSampleFormat(AVSampleFormat sample_format) {
245 - av_opt_set_int(m_swr_ctx, "osf", sample_format, 0);
246 + av_opt_set_sample_fmt(m_swr_ctx, "out_sample_fmt", sample_format, 0);
249 void SetOutputSampleRate(int sample_rate) {
250 - av_opt_set_int(m_swr_ctx, "osr", sample_rate, 0);
251 + av_opt_set_int(m_swr_ctx, "out_sample_rate", sample_rate, 0);
255 diff --git a/src/audio/ffmpeg_audio_reader.h b/src/audio/ffmpeg_audio_reader.h
256 index 5550164..1c6b346 100644
257 --- a/src/audio/ffmpeg_audio_reader.h
258 +++ b/src/audio/ffmpeg_audio_reader.h
259 @@ -62,7 +62,7 @@ class FFmpegAudioReader {
260 bool Read(const int16_t **data, size_t *size);
262 bool IsOpen() const { return m_opened; }
263 - bool IsFinished() const { return m_finished && !m_got_frame; }
264 + bool IsFinished() const { return !m_has_more_packets && !m_has_more_frames; }
266 std::string GetError() const { return m_error; }
267 int GetErrorCode() const { return m_error_code; }
268 @@ -74,20 +74,19 @@ class FFmpegAudioReader {
269 uint8_t *m_convert_buffer[1] = { nullptr };
270 int m_convert_buffer_nb_samples = 0;
272 - AVInputFormat *m_input_fmt = nullptr;
273 + const AVInputFormat *m_input_fmt = nullptr;
274 AVDictionary *m_input_opts = nullptr;
276 AVFormatContext *m_format_ctx = nullptr;
277 AVCodecContext *m_codec_ctx = nullptr;
278 - AVFrame *m_frame = nullptr;
279 int m_stream_index = -1;
281 int m_error_code = 0;
282 - bool m_finished = false;
283 bool m_opened = false;
284 - int m_got_frame = 0;
286 - AVPacket m_packet0;
287 + bool m_has_more_packets = true;
288 + bool m_has_more_frames = true;
289 + AVPacket *m_packet = nullptr;
290 + AVFrame *m_frame = nullptr;
292 int m_output_sample_rate = 0;
293 int m_output_channels = 0;
294 @@ -98,19 +97,12 @@ class FFmpegAudioReader {
296 inline FFmpegAudioReader::FFmpegAudioReader() {
297 av_log_set_level(AV_LOG_QUIET);
299 - av_init_packet(&m_packet);
300 - m_packet.data = nullptr;
303 - m_packet0 = m_packet;
306 inline FFmpegAudioReader::~FFmpegAudioReader() {
308 av_dict_free(&m_input_opts);
309 av_freep(&m_convert_buffer[0]);
310 - av_packet_unref(&m_packet0);
313 inline bool FFmpegAudioReader::SetInputFormat(const char *name) {
314 @@ -135,11 +127,10 @@ inline bool FFmpegAudioReader::Open(const std::string &file_name) {
318 - av_init_packet(&m_packet);
319 - m_packet.data = nullptr;
322 - m_packet0 = m_packet;
323 + m_packet = av_packet_alloc();
328 ret = avformat_open_input(&m_format_ctx, file_name.c_str(), m_input_fmt, &m_input_opts);
330 @@ -153,26 +144,31 @@ inline bool FFmpegAudioReader::Open(const std::string &file_name) {
335 + const AVCodec *codec;
336 ret = av_find_best_stream(m_format_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);
338 SetError("Could not find any audio stream in the file", ret);
341 m_stream_index = ret;
342 + auto stream = m_format_ctx->streams[m_stream_index];
344 - m_codec_ctx = m_format_ctx->streams[m_stream_index]->codec;
345 + m_codec_ctx = avcodec_alloc_context3(codec);
346 m_codec_ctx->request_sample_fmt = AV_SAMPLE_FMT_S16;
348 + ret = avcodec_parameters_to_context(m_codec_ctx, stream->codecpar);
350 + SetError("Could not copy the stream parameters", ret);
354 ret = avcodec_open2(m_codec_ctx, codec, nullptr);
356 SetError("Could not open the codec", ret);
360 - if (!m_codec_ctx->channel_layout) {
361 - m_codec_ctx->channel_layout = av_get_default_channel_layout(m_codec_ctx->channels);
363 + av_dump_format(m_format_ctx, 0, "foo", 0);
365 m_frame = av_frame_alloc();
367 @@ -183,19 +179,23 @@ inline bool FFmpegAudioReader::Open(const std::string &file_name) {
368 m_output_sample_rate = m_codec_ctx->sample_rate;
371 - if (!m_output_channels) {
372 - m_output_channels = m_codec_ctx->channels;
373 + AVChannelLayout output_channel_layout;
374 + if (m_output_channels) {
375 + av_channel_layout_default(&output_channel_layout, m_output_channels);
377 + m_output_channels = m_codec_ctx->ch_layout.nb_channels;
378 + av_channel_layout_default(&output_channel_layout, m_output_channels);
381 - if (m_codec_ctx->sample_fmt != AV_SAMPLE_FMT_S16 || m_codec_ctx->channels != m_output_channels || m_codec_ctx->sample_rate != m_output_sample_rate) {
382 + if (m_codec_ctx->sample_fmt != AV_SAMPLE_FMT_S16 || m_codec_ctx->ch_layout.nb_channels != m_output_channels || m_codec_ctx->sample_rate != m_output_sample_rate) {
383 m_converter.reset(new FFmpegAudioProcessor());
384 m_converter->SetCompatibleMode();
385 m_converter->SetInputSampleFormat(m_codec_ctx->sample_fmt);
386 m_converter->SetInputSampleRate(m_codec_ctx->sample_rate);
387 - m_converter->SetInputChannelLayout(m_codec_ctx->channel_layout);
388 + m_converter->SetInputChannelLayout(&(m_codec_ctx->ch_layout));
389 m_converter->SetOutputSampleFormat(AV_SAMPLE_FMT_S16);
390 m_converter->SetOutputSampleRate(m_output_sample_rate);
391 - m_converter->SetOutputChannelLayout(av_get_default_channel_layout(m_output_channels));
392 + m_converter->SetOutputChannelLayout(&output_channel_layout);
393 auto ret = m_converter->Init();
395 SetError("Could not create an audio converter instance", ret);
396 @@ -203,10 +203,11 @@ inline bool FFmpegAudioReader::Open(const std::string &file_name) {
400 + av_channel_layout_uninit(&output_channel_layout);
403 - m_finished = false;
406 + m_has_more_packets = true;
407 + m_has_more_frames = true;
411 @@ -214,6 +215,7 @@ inline bool FFmpegAudioReader::Open(const std::string &file_name) {
413 inline void FFmpegAudioReader::Close() {
414 av_frame_free(&m_frame);
415 + av_packet_free(&m_packet);
419 @@ -252,91 +254,98 @@ inline bool FFmpegAudioReader::Read(const int16_t **data, size_t *size) {
427 + bool needs_packet = false;
429 - while (m_packet.size <= 0) {
430 - av_packet_unref(&m_packet0);
431 - av_init_packet(&m_packet);
432 - m_packet.data = nullptr;
434 - ret = av_read_frame(m_format_ctx, &m_packet);
435 + while (needs_packet && m_packet->size == 0) {
436 + ret = av_read_frame(m_format_ctx, m_packet);
438 if (ret == AVERROR_EOF) {
440 + needs_packet = false;
441 + m_has_more_packets = false;
445 + SetError("Error reading from the audio source", ret);
448 + if (m_packet->stream_index == m_stream_index) {
449 + needs_packet = false;
451 + av_packet_unref(m_packet);
455 + if (m_packet->size != 0) {
456 + ret = avcodec_send_packet(m_codec_ctx, m_packet);
458 + if (ret != AVERROR(EAGAIN)) {
459 SetError("Error reading from the audio source", ret);
463 - m_packet0 = m_packet;
464 - if (m_packet.stream_index != m_stream_index) {
465 - m_packet.data = nullptr;
469 + av_packet_unref(m_packet);
473 - ret = avcodec_decode_audio4(m_codec_ctx, m_frame, &m_got_frame, &m_packet);
474 + ret = avcodec_receive_frame(m_codec_ctx, m_frame);
476 - if (m_decode_error) {
477 - SetError("Error decoding audio frame", m_decode_error);
479 + if (ret == AVERROR_EOF) {
480 + m_has_more_frames = false;
481 + } else if (ret == AVERROR(EAGAIN)) {
482 + if (m_has_more_packets) {
483 + needs_packet = true;
486 + m_has_more_frames = false;
489 - m_decode_error = ret;
490 - m_packet.data = nullptr;
493 + SetError("Error decoding the audio source", ret);
500 - m_decode_error = 0;
502 - const int decoded = std::min(ret, m_packet.size);
503 - m_packet.data += decoded;
504 - m_packet.size -= decoded;
508 - if (m_frame->nb_samples > m_convert_buffer_nb_samples) {
510 - av_freep(&m_convert_buffer[0]);
511 - m_convert_buffer_nb_samples = std::max(1024 * 8, m_frame->nb_samples);
512 - ret = av_samples_alloc(m_convert_buffer, &linsize, m_codec_ctx->channels, m_convert_buffer_nb_samples, AV_SAMPLE_FMT_S16, 1);
514 - SetError("Couldn't allocate audio converter buffer", ret);
515 + if (m_frame->nb_samples > 0) {
517 + if (m_frame->nb_samples > m_convert_buffer_nb_samples) {
519 + av_freep(&m_convert_buffer[0]);
520 + m_convert_buffer_nb_samples = std::max(1024 * 8, m_frame->nb_samples);
521 + ret = av_samples_alloc(m_convert_buffer, &linsize, m_codec_ctx->ch_layout.nb_channels, m_convert_buffer_nb_samples, AV_SAMPLE_FMT_S16, 1);
523 + SetError("Couldn't allocate audio converter buffer", ret);
527 + auto nb_samples = m_converter->Convert(m_convert_buffer, m_convert_buffer_nb_samples, (const uint8_t **) m_frame->data, m_frame->nb_samples);
528 + if (nb_samples < 0) {
529 + SetError("Couldn't convert audio", ret);
533 - auto nb_samples = m_converter->Convert(m_convert_buffer, m_convert_buffer_nb_samples, (const uint8_t **) m_frame->data, m_frame->nb_samples);
534 - if (nb_samples < 0) {
535 - SetError("Couldn't convert audio", ret);
538 - *data = (const int16_t *) m_convert_buffer[0];
539 - *size = nb_samples;
541 - *data = (const int16_t *) m_frame->data[0];
542 - *size = m_frame->nb_samples;
545 - if (m_finished && m_converter) {
546 - auto nb_samples = m_converter->Flush(m_convert_buffer, m_convert_buffer_nb_samples);
547 - if (nb_samples < 0) {
548 - SetError("Couldn't convert audio", ret);
550 - } else if (nb_samples > 0) {
552 *data = (const int16_t *) m_convert_buffer[0];
555 + *data = (const int16_t *) m_frame->data[0];
556 + *size = m_frame->nb_samples;
560 + if (IsFinished()) {
561 + auto nb_samples = m_converter->Flush(m_convert_buffer, m_convert_buffer_nb_samples);
562 + if (nb_samples < 0) {
563 + SetError("Couldn't convert audio", ret);
565 + } else if (nb_samples > 0) {
566 + *data = (const int16_t *) m_convert_buffer[0];
567 + *size = nb_samples;
579 inline void FFmpegAudioReader::SetError(const char *message, int errnum) {
580 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
581 index a2b517b..123e643 100644
582 --- a/tests/CMakeLists.txt
583 +++ b/tests/CMakeLists.txt
584 @@ -38,6 +38,12 @@ set(SRCS
587 set(SRCS ${SRCS} ../src/audio/ffmpeg_audio_reader_test.cpp)
588 + include_directories(
589 + ${FFMPEG_LIBAVFORMAT_INCLUDE_DIRS}
590 + ${FFMPEG_LIBAVCODEC_INCLUDE_DIRS}
591 + ${FFMPEG_LIBAVUTIL_INCLUDE_DIRS}
592 + ${AUDIO_PROCESSOR_INCLUDE_DIRS}
594 link_libraries(fpcalc_libs)