pw-cat: Rework encoded audio support to use libavformat to parse frames

2025-12-20 08:56:56 -05:00 · 2023-02-22 21:04:49 +01:00 · 2023-02-22 21:04:49 +01:00 · dc161fc6af
commit dc161fc6af
parent 580a3d9872
2 changed files with 75 additions and 72 deletions
--- a/src/tools/meson.build
+++ b/src/tools/meson.build
@ -55,6 +55,7 @@ if get_option('pw-cat').allowed() and sndfile_dep.found()
    'pw-midiplay',
    'pw-midirecord',
    'pw-dsdplay',
+    'pw-encplay',
  ]

  executable('pw-cat',
--- a/src/tools/pw-cat.c
+++ b/src/tools/pw-cat.c
@ -143,11 +143,13 @@ struct data {
 	} dsf;

 #ifdef HAVE_PW_CAT_FFMPEG_INTEGRATION
-	FILE *encoded_file;
-	AVFormatContext *fmt_context;
-	AVStream *astream;
-	AVCodecContext *ctx;
-	enum AVSampleFormat sfmt;
+	struct {
+		AVFormatContext *format_context;
+		AVStream *audio_stream;
+		AVPacket *packet;
+		int stream_index;
+		int64_t accumulated_excess_playtime;
+	} encoded;
 #endif
 };

@ -234,22 +236,27 @@ static int sf_playback_fill_f64(struct data *d, void *dest, unsigned int n_frame
 #ifdef HAVE_PW_CAT_FFMPEG_INTEGRATION
 static int encoded_playback_fill(struct data *d, void *dest, unsigned int n_frames)
 {
-	int ret, size = 0;
-	uint8_t buffer[16384];
+	AVPacket *packet = d->encoded.packet;
+	int ret;

-	ret = fread(buffer, 1, SPA_MIN(n_frames, sizeof(buffer)), d->encoded_file);
-	if (ret > 0) {
-		memcpy(dest, buffer, ret);
-		size = ret;
+	while (true) {
+		if ((ret = av_read_frame(d->encoded.format_context, packet) < 0))
+			break;
+
+		if (packet->stream_index == d->encoded.stream_index)
+			break;
 	}
-	return (int)size;
+
+	memcpy(dest, packet->data, packet->size);
+
+	return packet->size;
 }

-static int avcodec_ctx_to_info(struct data *data, AVCodecContext *ctx, struct spa_audio_info *info)
+static int av_codec_params_to_audio_info(struct data *data, AVCodecParameters *codec_params, struct spa_audio_info *info)
 {
 	int32_t profile;

-	switch (ctx->codec_id) {
+	switch (codec_params->codec_id) {
 	case AV_CODEC_ID_VORBIS:
 		info->media_subtype = SPA_MEDIA_SUBTYPE_vorbis;
 		info->info.vorbis.rate = data->rate;
@ -273,7 +280,7 @@ static int avcodec_ctx_to_info(struct data *data, AVCodecContext *ctx, struct sp
 	case AV_CODEC_ID_WMAVOICE:
 	case AV_CODEC_ID_WMALOSSLESS:
 		info->media_subtype = SPA_MEDIA_SUBTYPE_wma;
-		switch (ctx->codec_tag) {
+		switch (codec_params->codec_tag) {
 		/* TODO see if these hex constants can be replaced by named constants from FFmpeg */
 		case 0x161:
 			profile = SPA_AUDIO_WMA_PROFILE_WMA9;
@ -297,7 +304,7 @@ static int avcodec_ctx_to_info(struct data *data, AVCodecContext *ctx, struct sp
 		info->info.wma.rate = data->rate;
 		info->info.wma.channels = data->channels;
 		info->info.wma.bitrate = data->bitrate;
-		info->info.wma.block_align = ctx->block_align;
+		info->info.wma.block_align = codec_params->block_align;
 		info->info.wma.profile = profile;
 		break;
 	case AV_CODEC_ID_FLAC:
@ -937,7 +944,7 @@ static void show_usage(const char *name, bool is_error)
 		     "  -m, --midi                            Midi mode\n"
 		     "  -d, --dsd                             DSD mode\n"
 #ifdef HAVE_PW_CAT_FFMPEG_INTEGRATION
-		     "  -o, --encoded			      Encoded mode\n"
+		     "  -o, --encoded                         Encoded mode\n"
 #endif
 		     "\n"), fp);
 	}
@ -1232,6 +1239,8 @@ static int setup_encodedfile(struct data *data)
 	int ret;
 	int bits_per_sample;
 	int num_channels;
+	unsigned int stream_index;
+	const AVCodecParameters *codecpar;
 	char path[256] = { 0 };

 	/* We do not support record with encoded media */
@ -1242,72 +1251,63 @@ static int setup_encodedfile(struct data *data)
 	strcpy(path, "file:");
 	strcat(path, data->filename);

-	data->fmt_context = NULL;
-	ret = avformat_open_input(&data->fmt_context, path, NULL, NULL);
-	if (ret < 0) {
-		fprintf(stderr, "Failed to open input\n");
+	data->encoded.format_context = NULL;
+	if ((ret = avformat_open_input(&data->encoded.format_context, path, NULL, NULL)) < 0) {
+		fprintf(stderr, "Failed to open input: %s\n", av_err2str(ret));
 		return -EINVAL;
 	}

-	avformat_find_stream_info (data->fmt_context, NULL);
-
-	data->ctx = avcodec_alloc_context3(NULL);
-	if (!data->ctx) {
-		fprintf(stderr, "Could not allocate audio codec context\n");
-		avformat_close_input(&data->fmt_context);
+	if ((ret = avformat_find_stream_info(data->encoded.format_context, NULL)) < 0) {
+		fprintf(stderr, "Could not find stream info: %s\n", av_err2str(ret));
 		return -EINVAL;
 	}

-	// We expect only one stream with audio
-	data->astream = data->fmt_context->streams[0];
-	avcodec_parameters_to_context (data->ctx, data->astream->codecpar);
-
-	if (data->ctx->codec_type != AVMEDIA_TYPE_AUDIO) {
-		fprintf(stderr, "Not an audio file\n");
-		avformat_close_input(&data->fmt_context);
+	data->encoded.audio_stream = NULL;
+	for (stream_index = 0; stream_index < data->encoded.format_context->nb_streams; ++stream_index) {
+		AVStream *stream = data->encoded.format_context->streams[stream_index];
+		codecpar = stream->codecpar;
+		if (codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
+			if (data->verbose) {
+				fprintf(stderr, "Stream #%u in media is an audio stream with codec \"%s\"\n",
+				        stream_index, avcodec_get_name(codecpar->codec_id));
+			}
+			data->encoded.audio_stream = stream;
+			data->encoded.stream_index = stream_index;
+			break;
+		}
+	}
+	if (data->encoded.audio_stream == NULL) {
+		fprintf(stderr, "Could not find audio stream in media\n");
 		return -EINVAL;
 	}

-	printf("Number of streams: %d Codec id: %x\n", data->fmt_context->nb_streams,
-			data->ctx->codec_id);
+	data->encoded.packet = av_packet_alloc();

 	/* FFmpeg 5.1 (which contains libavcodec 59.37.100) introduced
 	 * a new channel layout API and deprecated the old one. */
 #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(59, 37, 100)
-	num_channels = data->ctx->ch_layout.nb_channels;
+	num_channels = codecpar->ch_layout.nb_channels;
 #else
-	num_channels = data->ctx->channels;
+	num_channels = codecpar->channels;
 #endif

-	data->rate = data->ctx->sample_rate;
+	data->rate = codecpar->sample_rate;
 	data->channels = num_channels;
-	data->sfmt = data->ctx->sample_fmt;
-	data->stride = 1; // Don't care
+	/* Stride is not relevant for encoded audio. Set it to 1 to make sure
+	 * the code in on_process() performs correct calculations. */
+	data->stride = 1;

-	bits_per_sample = av_get_bits_per_sample(data->ctx->codec_id);
+	bits_per_sample = av_get_bits_per_sample(codecpar->codec_id);
 	data->bitrate = bits_per_sample ?
-		data->ctx->sample_rate * num_channels * bits_per_sample : data->ctx->bit_rate;
+		data->rate * num_channels * bits_per_sample : codecpar->bit_rate;

 	data->spa_format = SPA_AUDIO_FORMAT_ENCODED;
-	data->fill = playback_fill_fn(data->spa_format);
+	data->fill = encoded_playback_fill;

-	if (data->verbose)
-		printf("Opened file \"%s\" sample format %08x channels:%d rate:%d bitrate: %d\n",
-				data->filename, data->ctx->sample_fmt, data->channels,
-				data->rate, data->bitrate);
-
-	if (data->fill == NULL) {
-		fprintf(stderr, "Unhandled encoded format %d\n", data->spa_format);
-		avformat_close_input(&data->fmt_context);
-		return -EINVAL;
-	}
-
-	avformat_close_input(&data->fmt_context);
-
-	data->encoded_file = fopen(data->filename, "rb");
-	if (!data->encoded_file) {
-		fprintf(stderr, "Failed to open file\n");
-		return -EINVAL;
+	if (data->verbose) {
+		printf("Opened file \"%s\" with encoded audio; channels:%d rate:%d bitrate: %d time units %d/%d\n",
+		       data->filename, data->channels, data->rate, data->bitrate,
+		       data->encoded.audio_stream->time_base.num, data->encoded.audio_stream->time_base.den);
 	}

 	return 0;
@ -1525,6 +1525,11 @@ int main(int argc, char *argv[])
 	} else if (spa_streq(prog, "pw-dsdplay")) {
 		data.mode = mode_playback;
 		data.data_type = TYPE_DSD;
+#ifdef HAVE_PW_CAT_FFMPEG_INTEGRATION
+	} else if (spa_streq(prog, "pw-encplay")) {
+		data.mode = mode_playback;
+		data.data_type = TYPE_ENCODED;
+#endif
 	} else
 		data.mode = mode_none;

@ -1790,13 +1795,9 @@ int main(int argc, char *argv[])
 		spa_zero(info);
 		info.media_type = SPA_MEDIA_TYPE_audio;

-		ret = avcodec_ctx_to_info(&data, data.ctx, &info);
-		if (ret < 0) {
-			if (data.encoded_file) {
-				fclose(data.encoded_file);
-			}
+		ret = av_codec_params_to_audio_info(&data, data.encoded.audio_stream->codecpar, &info);
+		if (ret < 0)
 			goto error_bad_file;
-		}
 		params[0] = spa_format_audio_build(&b, SPA_PARAM_EnumFormat, &info);
 		break;
 	}
@ -1890,11 +1891,6 @@ int main(int argc, char *argv[])
 	/* and wait while we let things run */
 	pw_main_loop_run(data.loop);

-#ifdef HAVE_PW_CAT_FFMPEG_INTEGRATION
-	if (data.encoded_file)
-		fclose(data.encoded_file);
-#endif
-
 	/* we're returning OK only if got to the point to drain */
 	if (data.drained)
 		exit_code = EXIT_SUCCESS;
@ -1919,6 +1915,12 @@ error_no_main_loop:
 		sf_close(data.file);
 	if (data.midi.file)
 		midi_file_close(data.midi.file);
+#ifdef HAVE_PW_CAT_FFMPEG_INTEGRATION
+	if (data.encoded.packet)
+		av_packet_free(&data.encoded.packet);
+	if (data.encoded.format_context)
+		avformat_close_input(&data.encoded.format_context);
+#endif
 	pw_deinit();
 	return exit_code;