diff --git a/src/modules/meson.build b/src/modules/meson.build index 5c9052389..bbdc1a018 100644 --- a/src/modules/meson.build +++ b/src/modules/meson.build @@ -597,7 +597,10 @@ summary({'zeroconf-discover': build_module_zeroconf_discover}, bool_yn: true, se # (by avoiding build script code duplication), create a static library # that contains that common code. pipewire_module_rtp_common_lib = static_library('pipewire-module-rtp-common-lib', - [ 'module-rtp/stream.c' ], + [ 'module-rtp/stream.c', + 'module-rtp/jitter-buffer.c', + 'module-rtp/audio-codec.c', + 'module-rtp/opus-codec.c' ], include_directories : [configinc], install : false, dependencies : [mathlib, dl_lib, rt_lib, pipewire_dep, opus_dep], diff --git a/src/modules/module-raop-sink.c b/src/modules/module-raop-sink.c index af84ba3eb..ce8bdb2ce 100644 --- a/src/modules/module-raop-sink.c +++ b/src/modules/module-raop-sink.c @@ -149,7 +149,7 @@ PW_LOG_TOPIC(mod_topic, "mod." NAME); #define MAX_PORT_RETRY 128 -#define RAOP_FORMAT "S16LE" +#define RAOP_FORMAT DEFAULT_RAOP_AUDIO_FORMAT #define RAOP_STRIDE (2*DEFAULT_CHANNELS) #define RAOP_RATE 44100 #define RAOP_LATENCY_MS 250 diff --git a/src/modules/module-rtp-session.c b/src/modules/module-rtp-session.c index 4374f64b5..ef7b3cf90 100644 --- a/src/modules/module-rtp-session.c +++ b/src/modules/module-rtp-session.c @@ -140,7 +140,8 @@ PW_LOG_TOPIC(mod_topic, "mod." NAME); "( sess.min-ptime= ) " \ "( sess.max-ptime= ) " \ "( sess.media= ) " \ - "( audio.format= ) " \ + "( audio.format= ) " \ "( audio.rate= ) " \ "( audio.channels= ) "\ "( audio.position= ) " \ @@ -1624,7 +1625,7 @@ int pipewire__module_init(struct pw_impl_module *module, const char *args) if (spa_streq(str, "audio")) { struct spa_dict_item items[] = { - { "audio.format", DEFAULT_FORMAT }, + { "audio.format", DEFAULT_RAW_AUDIO_FORMAT }, { "audio.rate", SPA_STRINGIFY(DEFAULT_RATE) }, { "audio.channels", SPA_STRINGIFY(DEFAULT_CHANNELS) }, { "audio.position", DEFAULT_POSITION } }; @@ -1632,6 +1633,7 @@ int pipewire__module_init(struct pw_impl_module *module, const char *args) } else if (spa_streq(str, "opus")) { struct spa_dict_item items[] = { + { "audio.format", DEFAULT_OPUS_AUDIO_FORMAT }, { "audio.rate", SPA_STRINGIFY(DEFAULT_RATE) }, { "audio.channels", SPA_STRINGIFY(DEFAULT_CHANNELS) }, { "audio.position", DEFAULT_POSITION } }; diff --git a/src/modules/module-rtp-sink.c b/src/modules/module-rtp-sink.c index e0d34482b..8f045e71d 100644 --- a/src/modules/module-rtp-sink.c +++ b/src/modules/module-rtp-sink.c @@ -199,7 +199,8 @@ PW_LOG_TOPIC(mod_topic, "mod." NAME); "( sess.min-ptime= ) " \ "( sess.max-ptime= ) " \ "( sess.media= ) " \ - "( audio.format= ) " \ + "( audio.format= ) " \ "( audio.rate= ) " \ "( audio.channels= ) " \ "( audio.position= ) " \ diff --git a/src/modules/module-rtp-source.c b/src/modules/module-rtp-source.c index 332d126d5..6cce0b1c7 100644 --- a/src/modules/module-rtp-source.c +++ b/src/modules/module-rtp-source.c @@ -174,7 +174,8 @@ PW_LOG_TOPIC(mod_topic, "mod." NAME); "( sess.latency.msec= ) "\ "( sess.ignore-ssrc= ) "\ "( sess.media= ) " \ - "( audio.format= ) " \ + "( audio.format= ) " \ "( audio.rate= ) " \ "( audio.channels= ) " \ "( audio.position= ) " \ @@ -311,9 +312,24 @@ on_rtp_io(void *data, int fd, uint32_t mask) current_time = get_time_ns(impl); if (mask & SPA_IO_IN) { - if ((len = recvfrom(fd, impl->buffer, impl->buffer_size, 0, (struct sockaddr *)(&recvaddr), &recvaddr_len)) < 0) + if ((len = recvfrom(fd, impl->buffer, impl->buffer_size, +#ifdef __linux__ + /* Use this Linux specific feature to get the actual size of the + * packet, even if it was truncated due to it being larger than + * the buffer size. The code below uses this to detect packets + * that exceed the MTU size. Truncated packets are unusable. + * Especially if an audio codec is in use, the partial absence + * of data can lead to a corrupted state. */ + MSG_TRUNC, +#else + 0, +#endif + (struct sockaddr *)(&recvaddr), &recvaddr_len)) < 0) goto receive_error; + if (SPA_UNLIKELY((size_t)len > impl->buffer_size)) + goto packet_larger_than_mtu; + /* Filter the packets to exclude those with source addresses * that do not match the expected one. Only used with unicast. * (The bind() call in make_socket takes care of only @@ -373,6 +389,12 @@ short_packet: pw_log_warn("(%d suppressed) short packet of len %zd received", suppressed, len); return; +packet_larger_than_mtu: + if ((suppressed = spa_ratelimit_test(&impl->rate_limit, current_time)) >= 0) + pw_log_warn("(%d suppressed) packet received that is larger than " + "the configured MTU (%zu bytes)", + suppressed, impl->buffer_size); + return; } static int rejoin_igmp_group(struct spa_loop *loop, bool async, uint32_t seq, diff --git a/src/modules/module-rtp/audio-codec.c b/src/modules/module-rtp/audio-codec.c new file mode 100644 index 000000000..e32387bbb --- /dev/null +++ b/src/modules/module-rtp/audio-codec.c @@ -0,0 +1,14 @@ +/* PipeWire */ +/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */ +/* SPDX-License-Identifier: MIT */ + +#include "audio-codec.h" + +const char * rtp_audio_codec_type_name(enum rtp_audio_codec_type type) +{ + switch (type) { + case RTP_AUDIO_CODEC_TYPE_ENCODER: return "encoder"; + case RTP_AUDIO_CODEC_TYPE_DECODER: return "decoder"; + default: return ""; + } +} diff --git a/src/modules/module-rtp/audio-codec.h b/src/modules/module-rtp/audio-codec.h new file mode 100644 index 000000000..f608f11bc --- /dev/null +++ b/src/modules/module-rtp/audio-codec.h @@ -0,0 +1,209 @@ +/* PipeWire */ +/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */ +/* SPDX-License-Identifier: MIT */ + +#ifndef PIPEWIRE_RTP_AUDIO_CODEC_H +#define PIPEWIRE_RTP_AUDIO_CODEC_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include + +enum rtp_audio_codec_type { + RTP_AUDIO_CODEC_TYPE_ENCODER, + RTP_AUDIO_CODEC_TYPE_DECODER, +}; + +struct rtp_audio_codec_context { + void *handle; + struct spa_audio_info audio_info; + size_t samples_per_frame; + size_t max_encoded_frame_size; + size_t stride; + enum rtp_audio_codec_type type; + uint8_t *output_buffer; +}; + +struct rtp_audio_codec { + /* Initializes the audio codec. + * + * The codec initialization will be stored in the context. + * + * Initializing an already initialized codec leads to undefined behavior. + * + * context must point to an rtp_audio_codec_context struct instance. + * Said instance will be filled with states for the initialized codec. + * Any already present values will be overwritten. + * + * stream_info must point to an spa_audio_info that contains a raw audio + * format that the codec shall en/decode from/to. + * + * type specifies whether the codec shall be initialized as an + * en- or a decoder. + * + * samples_per_frame specifies how many samples a frame covers. + * "Samples" is meant in the RTP sense; that is, it specifies how + * many samples are there in one channel. This means that this + * value does not depend on the channel count in stream_info. + * + * max_encoded_data_size defines the maximum allowed size in bytes for + * encoded frames. The encode() function then is guaranteed to not + * produce a frame larger than this (it may produce a frame that is + * smaller than that though). + * + * stride specifies the number of bytes per one sample. This value + * _is_ depending on the channel count in stream_info. That is, + * stride = (number of channels x bytes per sample). + * + * codec_props contains extra, codec specific properties, like the + * encoding quality. These properties are optional, and this argument + * can be set to NULL. + * + * Returns 0 in case of success, and a negative errno in case of an error. + * An error will roll back any partial initialization. */ + int (*init)(struct rtp_audio_codec_context *context, struct spa_audio_info *stream_info, + enum rtp_audio_codec_type type, uint32_t samples_per_frame, + size_t max_encoded_data_size, size_t stride, struct pw_properties *codec_props); + + /* Shuts down a previously initialized codec. + * + * If the codec was not initialized, or was already shut down, + * this is a no-op. + * + * context must point to a valid rtp_audio_codec_context struct instance. */ + void (*shutdown)(struct rtp_audio_codec_context *context); + + /* Resets the codec state. + * + * Use this if for example a frame is corrupted and could not be decoded + * to reset to an initial state. This is recommended, since in such cases, + * the codec might not be able to cleanly decode data if previous states + * are retained. + * + * reason is an optional string for logging. It helps indicating in the + * logs the reason for the codec reset. If set to NULL, no reason is logged. + * + * context must point to a valid rtp_audio_codec_context struct instance + * that was initialized. */ + void (*reset)(struct rtp_audio_codec_context *context, const char *reason); + + /* Return the en- or decoder delay. + * + * The delay is given in samples. Whether this returns the en- or the + * decoder delay depends on type that was passed to init(). The delay + * is stored in the value pointed to by the delay pointer. + * + * IMPORTANT: This delay must not vary for the duration of the existence + * of the context. Once the context is created, the delay must be fixed. + * + * context must point to a valid rtp_audio_codec_context struct instance + * that was initialized. + * + * delay must be a valid pointer. + * + * Returns 0 in case of success, and a negative errno in case of an error. + * + * In case of an error, the value pointed to by the delay pointer is + * undefined. An error means that the codec cannot be used anymore and + * must be shut down. */ + int (*get_delay)(struct rtp_audio_codec_context *context, size_t *delay); + + /* Encodes a set of samples to a codec frame. + * + * This function only works if the type during initialization + * was RTP_AUDIO_CODEC_TYPE_ENCODER. If it isn't, this function's + * behavior is undefined. + * + * context must point to a valid rtp_audio_codec_context struct instance + * that was initialized. + * + * in_samples must point to a memory block containing at least the amount + * of samples that was specified when the codec was initialized. + * + * out_encoded_data must point to a pointer that shall be set to refer to + * an internal buffer that contains the encoded data. out_encoded_data_size + * must point to a size_t value that shall be set to the size of the encoded + * data in bytes. + * + * Returns 0 in case of success, and a negative errno in case of an error. + * + * In case of an error, the values out_encoded_data and out_encoded_data_size + * are set to are undefined. An error means that the codec cannot be used + * anymore and must be shut down. */ + int (*encode)(struct rtp_audio_codec_context *context, const uint8_t *in_samples, + uint8_t **out_encoded_data, size_t *out_encoded_data_size); + + /* Decodes a codec frame to a set of samples. + * + * Codec frames must be fed into the decoder in order. + * + * This function only works if the type during initialization + * was RTP_AUDIO_CODEC_TYPE_DECODER. If it isn't, this function's + * behavior is undefined. + * + * context must point to a valid rtp_audio_codec_context struct instance + * that was initialized. + * + * in_encoded_data must point to a memory block containing the codec frame. + * in_encoded_data_size must be set to the size of the codec frame in bytes. + * + * out_samples must point to a pointer that shall be set refer to an + * internal buffer that contains the decoded samples. out_num_samples + * must point to a size_t value that shall contain how many samples + * were decoded. That amount's maximum possible value is the number + * of samples per frame specified during initialization. The codec is + * allowed to produce less samples than that, but not more than that. + * + * Returns 0 in case of success, and a negative errno in case of an error. + * + * In case of an error, the values out_samples and out_num_samples + * are set to are undefined. An error means that the codec cannot be used + * anymore and must be shut down. */ + int (*decode)(struct rtp_audio_codec_context *context, const uint8_t *in_encoded_data, + size_t in_encoded_data_size, uint8_t **out_samples, size_t *out_num_samples); + + /* Applies PLC to cover a lost frame. + * + * This only works properly if non-missing frames that preceded the + * lost frame were decoded in order. + * + * context must point to a valid rtp_audio_codec_context struct instance + * that was initialized. + * + * out_samples must point to a pointer that shall be set refer to an + * internal buffer that contains the PLC samples. out_num_samples + * must point to a size_t value that shall contain how many PLC samples + * were generated. That amount's maximum possible value is the number + * of samples per frame specified during initialization. The codec is + * allowed to produce less samples than that, but not more than that. + * + * Returns 0 in case of success, and a negative errno in case of an error. + * + * In case of an error, the values out_samples and out_num_samples + * are set to are undefined. An error means that the codec cannot be used + * anymore and must be shut down. */ + int (*apply_plc)(struct rtp_audio_codec_context *context, + uint8_t **out_samples, size_t *out_num_samples); + + /* Gets a human-readable name of this codec. + * + * This is meant for logging purposes. */ + const char * (*get_name)(void); +}; + +/* Returns a human-readable string for the given audio codec type. + * + * This is meant for logging purposes. */ +const char * rtp_audio_codec_type_name(enum rtp_audio_codec_type type); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* PIPEWIRE_RTP_AUDIO_CODEC_H */ diff --git a/src/modules/module-rtp/audio.c b/src/modules/module-rtp/audio.c index 5021b223b..a42eddaf7 100644 --- a/src/modules/module-rtp/audio.c +++ b/src/modules/module-rtp/audio.c @@ -119,12 +119,13 @@ static void rtp_audio_process_playback(void *data) uint32_t clock_rate = impl->io_position->clock.rate.denom; /* Translate the clock position to an RTP timestamp and - * shift it to compensate for device delay and ASRC delay. - * The device delay is scaled along with the clock position, - * since both are expressed in clock sample units, while - * pwt.buffered is expressed in stream time. */ + * shift it to compensate for device delay, decoder delay, + * and ASRC delay. The device delay is scaled along with + * the clock position, since both are expressed in clock + * sample units, while pwt.buffered is expressed in + * stream time. */ timestamp = scale_u64(impl->io_position->clock.position + device_delay, - impl->rate, clock_rate) + pwt.buffered; + impl->rate, clock_rate) + pwt.buffered + impl->codec_delay; spa_ringbuffer_read_update(&impl->ring, timestamp); avail = spa_ringbuffer_get_read_index(&impl->ring, &read_index); } else { @@ -217,15 +218,34 @@ static void rtp_audio_process_playback(void *data) avail = spa_ringbuffer_get_read_index(&impl->ring, ×tamp); - /* Reduce target buffer by the delay amount to start playback sooner. - * This compensates for the delay to the device. */ - if (SPA_UNLIKELY(impl->target_buffer < device_delay)) { - pw_log_error("Delay to device (%" PRIu32 ") is higher than " - "the target buffer size (%" PRIu32 ")", device_delay, - impl->target_buffer); + if (impl->io_position) { + uint32_t clock_rate = impl->io_position->clock.rate.denom; + /* Device delay is reported in clock rate units. If this does not + * match the RTP rate, the device delay must be transformed first. */ + device_delay = scale_u64(device_delay, impl->rate, clock_rate); + } + + /* Reduce target buffer by the decoder and device delay amount to + * start playback sooner. This compensates for the delay to the + * device and for the decoder delay. */ + + if (SPA_UNLIKELY(impl->target_buffer < (impl->codec_delay + device_delay))) { + if (impl->target_buffer < device_delay) { + pw_log_error("Delay to device (%" PRIu32 ") is higher than " + "the target buffer size (%" PRIu32 ")", device_delay, + impl->target_buffer); + } else if (impl->target_buffer < impl->codec_delay) { + pw_log_error("Decoder delay (%zu) is higher than the " + "target buffer size (%" PRIu32 ")", impl->codec_delay, + impl->target_buffer); + } else { + pw_log_error("The combined decoder delay (%zu) and device delay " + "(%" PRIu32 " are higher than the target buffer size (%" PRIu32 ")", + impl->codec_delay, device_delay, impl->target_buffer); + } target_buffer = 0; } else { - target_buffer = impl->target_buffer - device_delay; + target_buffer = impl->target_buffer - impl->codec_delay - device_delay; } if (avail < (int32_t)wanted) { @@ -323,50 +343,325 @@ static void rtp_audio_process_playback(void *data) pw_stream_queue_buffer(impl->stream, buf); } +static int process_received_samples(struct impl *impl, uint8_t *samples, uint32_t num_samples, + uint16_t seqnum, uint32_t timestamp, uint32_t ts_offset); + +static int on_jitter_buffer_output_rtp_packet(void *context, const uint8_t *packet_data, size_t packet_size, + size_t header_size, uint32_t timestamp, uint16_t seqnum) +{ + int ret; + size_t payload_size; + uint8_t *decoded_samples; + size_t num_decoded_samples; + struct impl *impl = context; + + payload_size = packet_size - header_size; + + ret = impl->audio_codec->decode(&(impl->audio_codec_context), packet_data + header_size, + payload_size, &decoded_samples, &num_decoded_samples); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("could not decode audio from packet with seqnum %" PRIu16 ": %s", + seqnum, spa_strerror(ret)); + return ret; + } + + pw_log_trace("got packet with seqnum %" PRIu16 " from jitter buffer; decoding " + "yielded %zu samples", seqnum, num_decoded_samples); + + return process_received_samples(impl, decoded_samples, num_decoded_samples, seqnum, + timestamp, impl->target_buffer); +} + +static void reset_rtp_audio_codec(struct impl *impl, const char *reason); + +static int on_jitter_buffer_signal_lost_packets(void *context, uint16_t seqnum_of_first_lost_packet, + size_t num_lost_packets, bool open_ended) +{ + int ret; + uint8_t *plc_samples; + size_t num_plc_samples; + uint32_t timestamp; + struct impl *impl = context; + size_t i; + + /* Don't apply PLC if sync is not established. Playback is in sync when + * there is a steady supply of data going on and the timestamps have been + * okay thus far. When sync is lost, there is an audible discontinuity + * that requires a reset of the ringbuffer contents along with the stats + * that keep playback in sync, so applying PLC then makes no sense. */ + if (!impl->have_sync) + return 0; + + // TODO apply fadeout if open_ended == true + + pw_log_info("Jitter buffer signals lost packet(s); packet loss sequence starts at " + "seqnum %" PRIu16 ", sequence length %zd, %sopen ended", seqnum_of_first_lost_packet, + num_lost_packets, open_ended ? "" : "not "); + + for (i = 0; i < num_lost_packets; ++i) { + uint16_t seqnum = (seqnum_of_first_lost_packet + i); + + ret = impl->audio_codec->apply_plc(&(impl->audio_codec_context), &plc_samples, &num_plc_samples); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("could not apply PLC: %s", spa_strerror(ret)); + return ret; + } + + spa_ringbuffer_get_write_index(&impl->ring, ×tamp); + + /* PLC generates sample data to cover the lost packet, but it does + * not generate timestamps. Since there is no other information available + * at this stage for reconstructing timestamps, just use the current + * ringbuffer write index. That write index is actually the ideal current + * timestamp in direct timestamp mode - that is, if sync is perfect, then + * that ideal timestamp perfectly matches the RTP timestamps (when they + * are shited by target_buffer). See process_received_samples() for + * how these are used and compared to calculate a skew. */ + + pw_log_trace("jitter buffer signaled loss of packet with seqnum %" PRIu16 "; PLC " + "yielded %zu samples; using RTP timestamp %" PRIu32, seqnum, num_plc_samples, + timestamp); + + /* The write index has the target_buffer factored in. (See for example the + * !have_sync cases in process_received_samples().) Therefore, it is + * important to ensure that the timestamp we got from the ringbuffer write + * index is _not_ shifted by target_buffer again. For this reason, pass + * 0 as the ts_offset. */ + ret = process_received_samples(impl, plc_samples, num_plc_samples, seqnum, timestamp, 0); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("could not process PLC samples: %s", spa_strerror(ret)); + return ret; + } + } + + if (open_ended) + reset_rtp_audio_codec(impl, "open ended packet loss"); + + return 0; +} + +static int setup_rtp_audio_codec(struct impl *impl, const struct rtp_audio_codec *audio_codec, + struct pw_properties *props) +{ + int ret; + enum rtp_audio_codec_type audio_codec_type; + + /* No audio codec -> PCM is to be transmitted directly. + * Nothing to set up then. */ + if (audio_codec == NULL) + return 0; + + spa_assert(impl->psamples > 0); + + /* A jitter buffer is only needed in the output direction, + * since that is the direction source nodes (= receivers) use. */ + if (impl->direction == PW_DIRECTION_OUTPUT) { + struct rtp_jitter_buffer_params params; + uint32_t jitter_buffer_length_perc; + size_t max_num_packets_in_ringbuffer; + + if (props != NULL) { + jitter_buffer_length_perc = pw_properties_get_uint32(props, + "jitter.buffer.length.perc", 50); + } else { + jitter_buffer_length_perc = 50; + } + + /* Round up the number of packets, since even a packet with just 1 sample + * actually inside is considered a full packet in the ringbuffer. */ + max_num_packets_in_ringbuffer = (impl->target_buffer + (impl->psamples - 1)) / impl->psamples; + /* Get the number of slots out of the number of packets that fit in the + * jitter buffer, using the jitter_buffer_length_perc percentage. At least + * 1 slot is necessary though, so limit the output to a minimum of 1. */ + params.num_slots = SPA_MAX(max_num_packets_in_ringbuffer * jitter_buffer_length_perc / 100, 1u); + pw_log_debug("ringbuffer can hold up to %zu packets worth of data; " + "jitter buffer length is set to cover %" PRIu32 "%% of " + "the ring buffer -> num slots: %zu", max_num_packets_in_ringbuffer, + jitter_buffer_length_perc, params.num_slots); + + params.max_packet_size = impl->mtu; + params.packet_duration = scale_u64(impl->psamples, SPA_NSEC_PER_SEC, impl->rate); + params.loop = impl->data_loop; + params.context = impl; + params.output_rtp_packet = on_jitter_buffer_output_rtp_packet; + params.signal_lost_packets = on_jitter_buffer_signal_lost_packets; + + ret = rtp_jitter_buffer_init(&(impl->jitter_buffer), ¶ms); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("could not setup jitter buffer: %s", spa_strerror(ret)); + return ret; + } + + audio_codec_type = RTP_AUDIO_CODEC_TYPE_DECODER; + } else { + audio_codec_type = RTP_AUDIO_CODEC_TYPE_ENCODER; + /* This is a buffer used when data that is to be encoded wraps + * around the ring buffer. The encoder needs the data in + * contiguous form, so the wrapped data must be copied into + * this buffer first in such cases. */ + impl->audio_encoder_staging_buffer = malloc(impl->psamples * impl->stride); + } + + impl->audio_codec = audio_codec; + + ret = impl->audio_codec->init(&(impl->audio_codec_context), &impl->stream_info, + audio_codec_type, impl->psamples, impl->payload_size, impl->stride, props); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("could not setup audio codec: %s", spa_strerror(ret)); + return ret; + } + + /* Since a stream always either only receives or only sends, one single + * quantity is used for both directions. This means that when this stream + * sends, the audio_codec is configured to encode, and thus, codec_delay + * then is the encoder delay. Consequently, if the stream receives, and + * the audio_codec decodes, codec_delay is the decoder delay. */ + ret = impl->audio_codec->get_delay(&(impl->audio_codec_context), &(impl->codec_delay)); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("could not get audio codec delay: %s", spa_strerror(ret)); + return ret; + } + + /* In the output direction (which is what RTP source nodes use), the + * codec_delay is the decoder delay. That delay is applied by either + * shifting timestamps forward (in the direct timestamp mode) or by + * subtracting from target_buffer (in the constant latency mode). + * That is, the decoder delay is treated as part of the specified + * session latency. This ensures that RTP source nodes are in sync + * even in the (unlikely) case that they use different decoders with + * different decoder latencies. But, it is then important to check + * that the decoder delay is smaller than the buffer size. If this + * is not the case, then the constant latency mode would use negative + * buffer sizes, and the direct timestamp mode would always shift + * timestamps past the buffer size. */ + if ((impl->direction == PW_DIRECTION_OUTPUT) && + SPA_UNLIKELY(impl->codec_delay > impl->target_buffer)) { + pw_log_error("decoder delay (%zu samples) is larger than buffer size " + "(%" PRIu32 " samples)", impl->codec_delay, impl->target_buffer); + return -EINVAL; + } + + pw_log_info("initialized %s %s with %" PRIu32 " samples per packet; " + "codec delay: %zu samples", impl->audio_codec->get_name(), + rtp_audio_codec_type_name(audio_codec_type), impl->psamples, + impl->codec_delay); + + return 0; +} + +static void teardown_rtp_audio_codec(struct impl *impl) +{ + if ((impl->audio_codec != NULL) && (impl->audio_codec_context.handle != NULL)) { + rtp_jitter_buffer_shutdown(&(impl->jitter_buffer)); + + impl->audio_codec->shutdown(&(impl->audio_codec_context)); + impl->audio_codec_context.handle = NULL; + } + + free(impl->audio_encoder_staging_buffer); + impl->audio_encoder_staging_buffer = NULL; +} + +static void reset_rtp_audio_codec(struct impl *impl, const char *reason) +{ + if (impl->audio_codec != NULL) + impl->audio_codec->reset(&(impl->audio_codec_context), reason); +} + static int rtp_audio_receive(struct impl *impl, uint8_t *buffer, ssize_t len, ssize_t hlen, uint64_t current_time) { + int ret; struct rtp_header *hdr; - ssize_t plen; - uint16_t seq; - uint32_t timestamp, samples, write, expected_write; - uint32_t stride = impl->stride; - int32_t filled; + uint16_t seqnum; + uint32_t timestamp; hdr = (struct rtp_header*)buffer; + seqnum = ntohs(hdr->sequence_number); + timestamp = ntohl(hdr->timestamp) - impl->ts_offset; - seq = ntohs(hdr->sequence_number); - if (impl->have_seq && impl->seq != seq) { - pw_log_info("unexpected seq (%d != %d) SSRC:%u", - seq, impl->seq, impl->ssrc); - /* No need to resynchronize here. If packets arrive out of - * order, then they are still written in order into the ring - * buffer, since they are written according to where the - * RTP timestamp points to. */ + if (impl->have_seq && (impl->next_expected_incoming_seq >= 0) && + (impl->next_expected_incoming_seq != seqnum)) { + pw_log_info("packet arrived out of order: expected/actual packet seq: %" + PRIu16"/%" PRIu16 " SSRC: %" PRIu32 " timestamp: %" PRIu32, + impl->next_expected_incoming_seq, seqnum, impl->ssrc, + timestamp); } + + /* Note that setting the last_recv_timestamp might not be correct + * in cases where the jitter buffer switches to hold-back mode. That's + * because in such cases, the jitter buffer will output packets in + * bursts, and it might do so at a time that is significantly ahead + * of the current_time. However - the jitter buffer switches to that + * mode when it detects packet losses. last_recv_timestamp is used + * for in-flight data calculations to smoothen out DLL adjustments + * when the RTP source is running in constant delay mode. These + * calculations assume steady packet transmission and a reliable + * network - and in such a network, neither packet reordering nor + * packet losses occur. Thus, it is still okay to set this timestamp + * here instead of in process_received_samples(), because when packets + * are lost / reordered, those calculations fall apart anyway. */ + impl->last_recv_timestamp = current_time; + + impl->next_expected_incoming_seq = (seqnum + 1) & 65535; + + /* If execution reaches this point, then the packet might be out of order, + * but still arrived in time. If the jitter buffer is initialized, it + * will take care of reordering packets. If it is not initialized, then + * there is no need to reorder the packets and depayload the data. The + * packet contents can be directly written into the ring buffer + * according to where their RTP timestamps point to, so even if they + * come in out of order, they ultimately end up in the ring buffer in + * the right locations. This can be done when raw PCM data is + * transmitted, but not when the data is encoded - with encoded data, + * the jitter buffer is necessary, since audio codecs typically require + * encoded frames to arrive in order. */ + + if (rtp_jitter_buffer_is_initialized(&(impl->jitter_buffer))) { + ret = rtp_jitter_buffer_insert_packet(&(impl->jitter_buffer), buffer, len, + hlen, timestamp, seqnum); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("could not insert packet into jitter buffer: %s", spa_strerror(ret)); + return ret; + } + } else { + ssize_t plen = len - hlen; + ret = process_received_samples(impl, &buffer[hlen], plen / impl->stride, + seqnum, timestamp, impl->target_buffer); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("could not process received samples: %s", spa_strerror(ret)); + return ret; + } + } + + return 0; +} + +static int process_received_samples(struct impl *impl, uint8_t *samples, + uint32_t num_samples, uint16_t seq, uint32_t timestamp, uint32_t ts_offset) +{ + uint32_t write, expected_write; + int32_t filled; + uint32_t stride = impl->stride; + impl->seq = seq + 1; impl->have_seq = true; - timestamp = ntohl(hdr->timestamp) - impl->ts_offset; - impl->receiving = true; - impl->last_recv_timestamp = current_time; - - plen = len - hlen; - samples = plen / stride; filled = spa_ringbuffer_get_write_index(&impl->ring, &expected_write); - /* we always write to timestamp + delay */ - write = timestamp + impl->target_buffer; + write = timestamp + ts_offset; if (!impl->have_sync) { pw_log_info("sync to timestamp:%u seq:%u ts_offset:%u SSRC:%u target:%u direct:%u", timestamp, seq, impl->ts_offset, impl->ssrc, impl->target_buffer, impl->direct_timestamp); - /* we read from timestamp, keeping target_buffer of data - * in the ringbuffer. */ + /* Synchronize by setting the read and write indices such that + * the read index is ahead of the write index by exactly the + * ringbuffer length, and the write index equals the timestamp + * of the current RTP packet. */ impl->ring.readindex = timestamp; impl->ring.writeindex = write; filled = impl->target_buffer; @@ -374,26 +669,34 @@ static int rtp_audio_receive(struct impl *impl, uint8_t *buffer, ssize_t len, spa_dll_init(&impl->dll); spa_dll_set_bw(&impl->dll, SPA_DLL_BW_MIN, 128, impl->rate); memset(impl->buffer, 0, impl->buffer_size); + + reset_rtp_audio_codec(impl, "(re)resynchronization"); + impl->have_sync = true; } else if (expected_write != write) { pw_log_debug("unexpected write (%u != %u)", write, expected_write); } - /* Write overrun only makes sense in constant delay mode. See the - * RTP source module documentation and the rtp_audio_process_playback() - * code for an explanation why. */ - if (!impl->direct_timestamp && (filled + samples > impl->buffer_size / stride)) { - pw_log_debug("receiver write overrun %u + %u > %u", filled, samples, + if (!impl->direct_timestamp && (filled + num_samples > impl->buffer_size / stride)) { + /* In constant delay mode, the goal is to keep the buffer fill + * level at a fixed level. If it goes above or below that, rate + * matching is used in rtp_audio_process_playback() to drive + * the fill level to that target value. If however the write side + * (that is, this function here) reaches a write overrun, it cannot + * insert any more samples and rely on that rate matching to + * compensate (there's no more room in the ringbuffer). A hard + * resync is needed in such a case. */ + pw_log_debug("receiver write overrun %u + %u > %u", filled, num_samples, impl->buffer_size / stride); impl->have_sync = false; } else { - pw_log_trace("got samples:%u", samples); + pw_log_trace("got %" PRIu32 " samples", num_samples); spa_ringbuffer_write_data(&impl->ring, impl->buffer, impl->actual_max_buffer_size, ((uint64_t)write * stride) % impl->actual_max_buffer_size, - &buffer[hlen], (samples * stride)); + samples, (num_samples * stride)); /* Only update the write index if data was actually _appended_. * If packets arrived out of order, then it may be that parts @@ -426,7 +729,7 @@ static int rtp_audio_receive(struct impl *impl, uint8_t *buffer, ssize_t len, * In unsigned arithmetic, if write + samples exceeds UINT32_MAX, * it wraps around to a smaller value. We detect this by checking * if new_write < write (which can only happen on overflow). */ - const uint32_t new_write = write + samples; + const uint32_t new_write = write + num_samples; const bool wrapped_around = new_write < write; /* Determine if new_write is ahead of expected_write. @@ -469,6 +772,8 @@ static void rtp_audio_send_packets(struct impl *impl, uint32_t timestamp, uint32 struct iovec iov[3]; struct rtp_header header; uint32_t stride; + bool do_send = true; + int ret; stride = impl->stride; @@ -498,12 +803,84 @@ static void rtp_audio_send_packets(struct impl *impl, uint32_t timestamp, uint32 ((uint64_t)timestamp * stride) % impl->actual_max_buffer_size, &iov[1], tosend * stride); - pw_log_trace_fp("sending %d packet:%d ts_offset:%d timestamp:%u (%f s)", - tosend, num, impl->ts_offset, timestamp, + pw_log_trace_fp("sending %d packet:%d seq: %" PRIu16 " ts_offset:%d timestamp:%u (%f s)", + tosend, num, seq, impl->ts_offset, timestamp, (double)timestamp * impl->io_position->clock.rate.num / impl->io_position->clock.rate.denom); - rtp_stream_call_send_packet(impl, iov, 3); + if (impl->audio_codec != NULL) { + /* In here, the ring buffer that iov points to is redirected + * to be encoded, and the encoder's output is then set as the + * new iov content. */ + + uint8_t *samples_to_encode = NULL; + uint8_t *encoded_data = NULL; + size_t encoded_data_size = 0; + + /* The audio codec expects one contiguous data block with impl->psamples + * samples. In case of a ring buffer wrap around, copy the audio data + * into the staging buffer, since the encoder cannot handle the wrap + * around on its own. And if there are fewer than psamples samples, + * copy what's available into the staging buffer and zero-stuff the rest + * of its space to be able to give the encoder the required amount of + * data to encode. */ + spa_assert((iov[1].iov_len + iov[2].iov_len) == (tosend * stride)); + if (iov[2].iov_len != 0) { + /* Copy the audio data, taking wrap around into account. */ + memcpy(impl->audio_encoder_staging_buffer, iov[1].iov_base, iov[1].iov_len); + memcpy(impl->audio_encoder_staging_buffer + iov[1].iov_len, iov[2].iov_base, iov[2].iov_len); + /* Zero-pad the unused trailing portions of the buffer. */ + if ((uint32_t)tosend < impl->psamples) { + memset(impl->audio_encoder_staging_buffer + iov[1].iov_len + iov[2].iov_len, + 0, (impl->psamples - tosend) * stride); + } + + samples_to_encode = impl->audio_encoder_staging_buffer; + } else if ((uint32_t)tosend < impl->psamples) { + /* Copy the audio data. */ + memcpy(impl->audio_encoder_staging_buffer, iov[1].iov_base, iov[1].iov_len); + /* Zero-pad the unused trailing portions of the buffer. */ + memset(impl->audio_encoder_staging_buffer + iov[1].iov_len, + 0, (impl->psamples - tosend) * stride); + + samples_to_encode = impl->audio_encoder_staging_buffer; + } else { + /* No wrap around happening, and no zero padding necessary. + * The audio data can be directly fed into the encoder. */ + samples_to_encode = iov[1].iov_base; + } + + ret = impl->audio_codec->encode(&(impl->audio_codec_context), + samples_to_encode, &encoded_data, &encoded_data_size); + + if (SPA_LIKELY(ret == 0)) { + /* Tweak iov to get the actual RTP payload from that single staging + * buffer instead of from the ring buffer directly. (The iov_base + * value of iov[2] is still set to encoded_data, even though its + * iov_len is 0, since it is not sure if setting its iov_base + * pointer to NULL is valid. iov_len 0 _is_ valid, and causes + * POSIX calls to ignore that iovec item. */ + iov[1].iov_base = encoded_data; + iov[1].iov_len = encoded_data_size; + iov[2].iov_base = encoded_data; + iov[2].iov_len = 0; + do_send = true; + } else { + /* Normally, an encode() error would require shutting down the + * encoder. However, this is not feasible here, since this code + * runs in the data loop thread. Furthermore, if the errors keep + * occurring, this would lead to a constantly reinitializing + * encoder. There is no discernible way to communicate the error + * to the application either. Thus, in case of an error, skip + * the send, and log it. */ + pw_log_error("could not encode audio for packet with seqnum %" PRIu16 ": %s", + seq, spa_strerror(ret)); + do_send = false; + } + } + + if (SPA_LIKELY(do_send)) + rtp_stream_call_send_packet(impl, iov, 3); seq++; first = false; diff --git a/src/modules/module-rtp/jitter-buffer.c b/src/modules/module-rtp/jitter-buffer.c new file mode 100644 index 000000000..34a8fc34f --- /dev/null +++ b/src/modules/module-rtp/jitter-buffer.c @@ -0,0 +1,1057 @@ +/* PipeWire */ +/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */ +/* SPDX-License-Identifier: MIT */ + +#include +#include +#include +#include + +#include + +#include +#include + +#include + +#include +#include + +PW_LOG_TOPIC_EXTERN(mod_topic); +#define PW_LOG_TOPIC_DEFAULT mod_topic + +/* RTP jitter buffer design overview + * + * NOTE: For basic information about what the jitter buffer does and how it + * is used, read through the rtp_jitter_buffer struct documentation first. + * + * The jitter buffer consists of slots and the valid seqnum window. Slots + * are abstract entities, and exist as items in the slots array. This array + * contains num_slots items. Valid slots contain elements; these are packets + * or gaps. The slot element_type is then set accordingly either to + * RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET or RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP. + * (See the rtp_jitter_buffer struct documentation for what "valid" means.) + * If a slot contains a packet, then the packet data is stored in the + * packet_buffer, as described in the rtp_jitter_buffer struct documentation, + * + * A "slot index" goes from 0 to (num_slots-1) and is used for accessing + * the slot array. Slot indices are mapped to sequence numbers through modulo + * arithmetic: + * + * slot_index = seqnum mod num_slots + * + * This works reliably because the valid seqnum window establishes the + * range of valid sequence numbers, and because that window can at most + * be of size num_slots. Thus, even if a much older or much newer packet + * (that is, a packet with much smaller or larger seqnum than the ones + * encountered thus far) arrives, there is no danger of that modulo + * arithmetic incorrectly aliasing slots, since the valid seqnum window + * will take care of validating the seqnums first. + * + * In RTP, since sequence numbers are unsigned 16-bit integers, the integer + * wrap around needs to be addressed. Packet sequence number 65535 can be + * reached in real world scenarios depending on the packet duration. For + * example, if a packet covers 1 ms, then after ~66 seconds, sequence + * number 65535 will be reached. For this reason, the valid seqnum window's + * start and length quantities use uint32_t as type - it factors out the + * wrap-around in certain calculations, which make them easier. + * + * The wrap around behavior also makes it non-trivial to calculate sequence + * number deltas correctly. For this reason, the calculate_seqnum_delta() + * utility function is used for seqnum deltas. + * + * In regular mode, incoming packets all have had the expected monotonically + * incrementing sequence number. (A sequence number wrap around technically + * means that they do not monotonically increment, but this is omitted here, + * because the wrap around is a numerical limitation, and it is handled + * as a continuation of a packet seqnum increment.) In this mode, the packet + * data, packet size, header size, timestamp are forwarded immediately to the + * output_rtp_packet() function pointer without copying the packet data. This + * improves performance during regular mode, which is the most common mode + * the jitter buffer will be in, unless the network quality is very poor. The + * slots and the valid seqnum window have no meaning in this mode. + * + * When the jitter buffer expects seqnum X, but sees seqnum X-N (that is, + * an older seqnum), the incoming packet with seqnum X-N is dropped, and + * regular mode continues. If the packet's seqnum instead is X+N, it means + * that there is a gap. For example, if the last packet had seqnum 200, + * the jitter buffer expects the next packet to have seqnum 201. If the + * next packet instead has seqnum 202, then there is a gap at 201. The + * jitter buffer switches to hold-back mode, and establishes a valid seqnum + * window that starts at 201 and is of length 2. That is, it covers the gap + * and the new packet. The gap is registered at slot (201 % num_slots), the + * packet is inserted in slot (202 % num_slots). Suppose that num_slots is 10. + * Then, the gap will be registered at slot 201 % 10 = 1, and the packet 202 + * will be stored at slot 202 % 10 = 2. To be more specific, item #1 in the + * slots array will have its type set to RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP, + * and item #2 will have its type set to RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET. + * The size of packet 202 will be stored in item #2 in the slots array. + * (Item #1 in the slots array will not have a packet size set, since a gap + * has no "size" in bytes.) + * Packet 202's data will be copied into packet_buffer, at location + * (max_packet_size * 2), since the slot index is 2. That way, slot 1 is set + * to contain a gap at seqnum 201, and slot 2 is set to contain a packet 202 + * with the specified packet data, packet size, header size, timestamp. + * + * In hold-back mode, every time a packet is inserted, the jitter buffer will + * check if the oldest N valid slots contain packets (and no gaps). The "oldest" + * slot is the one whose associated sequence number equals the value of + * valid_seqnum_window_start_seqnum. The second oldest slot's associated sequence + * number equals (valid_seqnum_window_start_seqnum+1) etc. If the oldest N slots + * indeed contain purely packets and no gaps (that is, N > 0), the jitter buffer + * will output the packets from those N oldest slots, in order (the packet from + * the oldest slot is output first, then comes the packet from the second oldest + * slot etc.). Then, the valid_seqnum_window_start_seqnum value is shifted forwards + * by N, since those packets from the oldest N slots are no longer held back. + * Accordingly, the valid_seqnum_window_length value is decremented by N. If there + * are still gaps, it means there are packets behind the gaps. In such a case, + * valid_seqnum_window_length will be nonzero even after decrementing it. But if + * this output step did emit all of the remaining held back packets, it implies + * that there is nothing left in the slots, nothing is being held back, so the + * jitter buffer switches back to regular mode. + * + * When a packet comes in whose sequence number is older than the start of the + * valid seqnum window, it is dropped, just like in the regular mode. If however + * the sequence number goes past the window, it effectively extends the window. + * Suppose that the window starts at seqnum 200, and is of length 5. This means + * that currently, all seqnums from 200 to 204 (both inclusive) are valid. But + * then, a packet with seqnum 209 comes in. This requires extending the window + * from length 5 to length 10, such that it covers all seqnums from 200 to 209. + * This step of course can introduce gaps. In this example, a new gap is added + * that starts at seqnum 205 and is of length 4, since the gap extends all the + * way to seqnum 208. The four slots that are associated with seqnums 205, 206, + * 207, 208 are all set to contain gaps. + * + * However, the window cannot be extended indefinitely. If it is extended past + * num_slots, it is referred to as "overextended". Such a window cannot remain like + * this, because there are not enough slots to store all the elements it covers. + * In such a case, the jitter buffer will try to shift the window forward. If + * this shift amount is small enough, then the newest N of the currently valid + * slots remain in the window, and the oldest (num_slots-N) slots need to be + * drained. To continue with the earlier example, suppose that num_slots is 8. + * This means that packet 209 would overextend the window by 2 slots. The + * jitter buffer then shifts the window such that it starts at 202 and is of + * length 8 (that is, the num_slots amount). Slots associated with seqnums that + * go from 202 to 204 remain within the window, but slots associated with seqnums + * 200 and 201 are not, and thus need to be drained. + * + * It is possible that the window is shifted so far ahead that _none_ of the + * currently valid slots remain in the window. In such a case, the window is + * reset to contain the newly arrived packet and to be of length 1. Since this + * is a case in which (as explained above), the jitter buffer sees that the + * oldest N packets (N being 1 in this case) have no gaps in between them, this + * means that the newly arrived packet is immediately output. And since after + * that, no held-back packets remain, the jitter buffer switches back to + * regular mode. + * + * When slots are drained, it means that the jitter buffer looks at the elements + * inside them, and calls output_rtp_packet() or signal_lost_packets(). Draining + * does aggregate gaps to avoid calling signal_lost_packets() often. Only valid + * slots (that is, slots whose associated sequence numbers lie within the valid + * seqnum window) are drained. + * For example, suppose that within the window, there is a gap at seqnum 201, + * a packet at seqnum 202, another one at seqnum 203, a gap that starts at seqnum + * 204 and goes until seqnum 208, and then a final packet at 209. Draining will + * cause the following sequence of calls (in this order): + * + * 1. signal_lost_packets() , with gap at seqnum 201, or length 1 + * 2. output_rtp_packet() , with data of packet at seqnum 202 + * 3. output_rtp_packet() , with data of packet at seqnum 203 + * 4. signal_lost_packets() , with gap at seqnum 204, or length 5 + * 5. output_rtp_packet() , with data of packet at seqnum 209 + * + * A partial drain drains only the oldest N slots in the window, and held-back + * packets remain. A full drain drains all slots in the entire window - + * nothing remains. In the full drain case, the jitter buffer switches back + * to regular mode afterwards. + * + * A full drain is automatically done if the hold-back mode persists for some + * time. This is a case where there is a gap, but the associated packet does + * not arrive in time (or not at all). The timeout_timer exists for this purpose; + * if it expires, it automatically does a full drain on the jitter buffer. + * That timer is reset when the window is shifted due to overextension, and + * is disabled when switching back to regular mode. + * + * In cases where draining is done because of a window overextension, the jitter + * buffer will signal packet loss after the drain if there is a gap in between + * where the window used to be and where it now is after the shift. If for example + * the window previously starts at seqnum 200 with length 4, and now starts at + * seqnum 210 with length 10, there is a gap from 201 to 209 (both inclusive). + * The jitter buffer will announce that via signal_lost_packets(). If however + * that gap is larger than num_slots, then the jitter buffer calls that function + * pointer with open_ended set to true. Such an "open ended gap" is a gap that + * is actually larger than num_slots, but is truncated to that length, to prevent + * PLC measures from having to produce an excessive amount of data. open_ended + * being set to true allows the signal_lost_packets() calback to apply additional + * measures, such as a fadeout, to cleanly terminate its PLC for the open gap. This + * might be necessary if PLC measures otherwise never reach silence on their own. + * + * These mechanisms allow for partial outputs in cases where there are multiple + * gaps in the slots, and also switches back to regular mode immediately once + * everything has been output or the jitter buffer is fully drained. + * + * Packet reordering is done implicitly, since when inserting packets, it is done + * so according to their seqnum, but packet output is done in order of their + * storage in the slots (the "oldest" slots, that is, slots associated with the + * oldest valid seqnums within the window are output first). + * + * Note that slots that contain gaps are not explicitly marked as containing gaps. + * Instead, all items in the slots array have their element type initially set to + * RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP. Later, when packets are inserted, these + * types are overwritten with RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET. And, when + * packets are output, the associated slot array items have their types set back + * to RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP. This is because gaps are fundamentally + * implicit - they do not exist as their own entities (unlike packets). */ + +static int arm_timeout_timer(struct rtp_jitter_buffer *jitter_buffer); +static void disarm_timeout_timer(struct rtp_jitter_buffer *jitter_buffer); +static void on_timeout_expiration(void *data, uint64_t expirations); + +static inline size_t seqnum_to_slot_index(struct rtp_jitter_buffer *jitter_buffer, + uint16_t seqnum); + +static void store_packet(struct rtp_jitter_buffer *jitter_buffer, + const uint8_t *packet_data, size_t packet_size, size_t header_size, + uint32_t timestamp, uint16_t seqnum, bool check_for_duplicates); + +static int do_drain(struct rtp_jitter_buffer *jitter_buffer, + size_t num_oldest_slots_to_drain); + +int rtp_jitter_buffer_init(struct rtp_jitter_buffer *jitter_buffer, struct rtp_jitter_buffer_params *params) +{ + int ret = 0; + size_t idx; + + spa_assert(jitter_buffer != NULL); + spa_assert(!jitter_buffer->initialized); + spa_assert(params != NULL); + spa_assert(params->num_slots > 0); + spa_assert(params->max_packet_size > 0); + spa_assert(params->packet_duration > 0); + spa_assert(params->loop != NULL); + spa_assert(params->output_rtp_packet != NULL); + spa_assert(params->signal_lost_packets != NULL); + + spa_memzero(jitter_buffer, sizeof(struct rtp_jitter_buffer)); + memcpy(&(jitter_buffer->params), params, sizeof(struct rtp_jitter_buffer_params)); + + /* Set the flag here already to make sure that in case + * of errors, rtp_jitter_buffer_shutdown() correctly + * cleans up any partial initialization. */ + jitter_buffer->initialized = true; + + jitter_buffer->slots = calloc(params->num_slots, sizeof(struct rtp_jitter_buffer_slot)); + if (jitter_buffer->slots == NULL) { + ret = -ENOMEM; + pw_log_error("Could not allocate memory for slots array: %m"); + goto error; + } + + jitter_buffer->packet_buffer = calloc(params->num_slots, params->max_packet_size); + if (jitter_buffer->packet_buffer == NULL) { + ret = -ENOMEM; + pw_log_error("Could not allocate memory for packet buffer: %m"); + goto error; + } + + for (idx = 0; idx < jitter_buffer->params.num_slots; ++idx) + jitter_buffer->slots[idx].element_type = RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP; + + jitter_buffer->hold_back_mode = false; + jitter_buffer->last_seqnum = -1; + + /* Synchronize loop access. See the spa_loop_methods + * documentation; the add_source() documentation states: + * "Must be called from the loop's own thread." By extension, + * this also applies to pw_loop_add_timer(). */ + pw_loop_lock(jitter_buffer->params.loop); + jitter_buffer->timeout_timer = pw_loop_add_timer(params->loop, on_timeout_expiration, + jitter_buffer); + pw_loop_unlock(jitter_buffer->params.loop); + + if (jitter_buffer->timeout_timer == NULL) { + ret = -errno; + pw_log_error("Could not create timeout timer: %m"); + goto error; + } + + pw_log_info("Initialized RTP jitter buffer: num slots: %zu; max packet size: %zu bytes; " + "packet duration: %" PRIu64 " ns; total capacity duration: %" PRIu64 " ns", + params->num_slots, params->max_packet_size, params->packet_duration, + params->packet_duration * params->num_slots); + +finish: + return ret; + +error: + rtp_jitter_buffer_shutdown(jitter_buffer); + goto finish; +} + +void rtp_jitter_buffer_shutdown(struct rtp_jitter_buffer *jitter_buffer) +{ + spa_assert(jitter_buffer != NULL); + + if (!jitter_buffer->initialized) + return; + + if (jitter_buffer->timeout_timer != NULL) { + /* Synchronize loop access. See the spa_loop_methods + * documentation; the remove_source() documentation states: + * "Must be called from the loop's own thread." By extension, + * this also applies to pw_loop_destroy_source(). */ + pw_loop_lock(jitter_buffer->params.loop); + /* Note that this also internally detaches the source from the + * loop, and thus, any previously queued timeout callbacks are + * no longer invoked once this call ends. + * (For more, see the detach_source(), loop_destroy_source(), + * and loop_iterate() functions in spa/plugins/support/loop.c , + * particularly how they handle ep[i].data . Also see how + * remove_from_poll() - called by loop_destroy_source() - and + * loop_iterate() handle remove_count .) */ + pw_loop_destroy_source(jitter_buffer->params.loop, jitter_buffer->timeout_timer); + pw_loop_unlock(jitter_buffer->params.loop); + } + + free(jitter_buffer->slots); + free(jitter_buffer->packet_buffer); + + jitter_buffer->initialized = false; + + pw_log_info("RTP jitter buffer shut down"); +} + +int rtp_jitter_buffer_insert_packet(struct rtp_jitter_buffer *jitter_buffer, + const uint8_t *packet_data, size_t packet_size, + size_t header_size, uint32_t timestamp, uint16_t seqnum) +{ + struct rtp_jitter_buffer_params *params; + int ret = 0; + size_t slot_index; + int16_t seqnum_delta; + + spa_assert(jitter_buffer != NULL); + spa_assert(jitter_buffer->initialized); + spa_assert(packet_data != NULL); + spa_assert(header_size <= packet_size); + + params = &(jitter_buffer->params); + + spa_assert(packet_size <= params->max_packet_size); + + pw_log_trace("Got packet with size %zu and seqnum %" PRIu16 "; valid seqnum window: " + "start seqnum / length: %" PRIu32 " / %" PRIu32, packet_size, seqnum, + jitter_buffer->valid_seqnum_window_start_seqnum, + jitter_buffer->valid_seqnum_window_length); + + if (jitter_buffer->hold_back_mode) { + size_t idx = 0; + size_t num_oldest_slots_with_packets; + + spa_assert(jitter_buffer->last_seqnum >= 0); + + seqnum_delta = calculate_seqnum_delta(jitter_buffer->valid_seqnum_window_start_seqnum, seqnum); + + if (seqnum_delta < 0) { + /* In hold-back mode, sequence numbers at or after the sequence + * number stored in valid_seqnum_window_start_seqnum are of interest. + * Sequence numbers older than that are stale and to be discarded. + * See the overview at the top of this file for details. */ + pw_log_info("Dropping packet with stale or duplicate sequence number %" + PRId32, seqnum); + goto finish; + } else if ((size_t)seqnum_delta < params->num_slots) { + /* Packet is not stale, and fits within the capacity of the jitter + * buffer, that is, its sequence number is not too far ahead of the + * valid_seqnum_window_start_seqnum. Insert it into the buffer + * according to its sequence number. + * + * In case its sequence number is further ahead than the current + * valid seqnum window (but still fits within the jitter buffer + * capacity), it means that this packet actually extends the window + * when inserted into the buffer, and it does so this way: + * + * window_length = MAX(window_length, seqnum - window_start_seqnum + 1) + * + * (seqnum - window_start_seqnum) is stored in seqnum_delta, so this + * becomes: + * + * window_length = MAX(window_length, seqnum_delta + 1) + * + * (The +1 is there because the length stores a size-like quantity, + * so it must be added to avoid an off-by-one error.) + * + * Also see the overview at the top. */ + + jitter_buffer->valid_seqnum_window_length = SPA_MAX( + jitter_buffer->valid_seqnum_window_length, ((size_t)seqnum_delta) + 1); + + store_packet(jitter_buffer, packet_data, packet_size, header_size, + timestamp, seqnum, true); + } else { + /* The packet's sequence number is beyond the valid seqnum window, + * and trying to extend the window to encompass that packet would + * overextend it, that is, its length would exceed num_slots. + * + * (See the overview at the top.) + * + * Check by how much the window would be overextended. If the + * overextension is less than the valid seqnum window length, then + * some of the currently valid slots would remain valid after shifting + * the window. Specifically, the last (valid_seqnum_window_length - + * overextension_amount) slots remain valid, while the ones before that + * need to be drained. The result is a valid seqnum window that has been + * shifted forward by valid_seqnum_window_length, with a length that + * equals the num_slots amount. + * + * seqnum_delta+1 equals the amount of slots from the start of the + * window to the new packet (+1 since the new packet itself is included). + * Since by now, we know that the window would be overextended, this + * implies that (seqnum_delta+1) > num_slots. + * + * The overextension amount therefore is: + * + * overextension_amount = (seqnum_delta+1) - num_slots + * + * This is also the number of oldest slots that need to be drained at the + * start of the valid seqnum window before shifting it. If encompassing + * the new packet increases the window by overextension_amount, draining + * those oldest slots decreases it again by overextension_amount, resulting + * in a window length equaling num_slots. + * + * If however the overextension is equal to or larger than the valid seqnum + * window length, then none of the slots within the window remain valid. + * It therefore makes no sense then to try to keep them around, so the + * jitter buffer is fully drained before inserting the new packet. Also, + * this means that afterwards, the slot that contains the new packet is + * the only remaining one, that is, the window length is 1. */ + + size_t overextension_amount; + bool window_fully_invalid; + size_t num_oldest_slots_to_drain; + bool open_ended = false; + + overextension_amount = ((size_t)seqnum_delta) + 1 - params->num_slots; + window_fully_invalid = (overextension_amount >= jitter_buffer->valid_seqnum_window_length); + num_oldest_slots_to_drain = SPA_MIN(overextension_amount, + jitter_buffer->valid_seqnum_window_length); + + pw_log_debug("Packet with seqnum %" PRIu16 " exceeds capacity of jitter buffer " + "in hold-back mode; window shift amount: %zu window fully invalid: %d " + "num oldest slots to drain: %zu", seqnum, overextension_amount, + window_fully_invalid, num_oldest_slots_to_drain); + + if (num_oldest_slots_to_drain > 0) { + ret = do_drain(jitter_buffer, num_oldest_slots_to_drain); + if (SPA_UNLIKELY(ret < 0)) { + ret = -EIO; + goto finish; + } + } + + /* If the window was fully invalidated (= drained), insert a packet loss + * signal to allow callers to append some sort of PLC / fadeout to the + * drained data. This is helpful for avoiding hard cutoffs in the output. */ + if (window_fully_invalid) { + ssize_t gap_length; + uint16_t one_past_last_valid_seqnum; + + one_past_last_valid_seqnum = jitter_buffer->valid_seqnum_window_start_seqnum + + jitter_buffer->valid_seqnum_window_length; + gap_length = calculate_seqnum_delta(one_past_last_valid_seqnum, seqnum); + if (SPA_UNLIKELY(gap_length < 0)) { + /* This would indicate a serious error in the calculations, + * so log it accordingly. */ + pw_log_error("Negative packet loss amount %zd detected; valid seqnum " + "window start seqnum / length: %" PRIu32 " / %" PRIu32 "; " + "using packet loss amount 0 instead", + gap_length, jitter_buffer->valid_seqnum_window_start_seqnum, + jitter_buffer->valid_seqnum_window_length); + gap_length = 0; + } + + /* If the gap exceeds the capacity of the jitter buffer, then limit the + * packet loss amount and consider the gap open ended to avoid cases where + * callers otherwise would have to conceal an excessive amount of packet loss. */ + if ((size_t)gap_length > params->num_slots) { + open_ended = true; + gap_length = params->num_slots; + } + + if (gap_length > 0) { + if (open_ended) { + pw_log_debug("Signaling packet loss, starting at seqnum %" + PRIu16 ", gap length %zd, open ended", + one_past_last_valid_seqnum, gap_length); + } else { + pw_log_debug("Signaling packet loss, starting at seqnum %" + PRIu16 ", gap length %zd, not open ended", + one_past_last_valid_seqnum, gap_length); + } + if ((ret = params->signal_lost_packets(params->context, + one_past_last_valid_seqnum, (size_t)gap_length, open_ended)) != 0) { + pw_log_error("Could not signal lost RTP packet: %s", spa_strerror(ret)); + goto finish; + } + } + } + + /* As explained in the overview, a shift that fully invalidates + * the window is a different case than one that retains entries + * from the old window. */ + if (window_fully_invalid) { + jitter_buffer->valid_seqnum_window_start_seqnum = seqnum; + jitter_buffer->valid_seqnum_window_length = 1; + } else { + /* If the window is not fully invalid, then figure out + * its new start_seqnum by moving backwards. A not fully + * invalid window is of length num_slots (see the overview + * for why), and ends at the sequence number of the new + * packet. Therefore, it starts at (seqnum +1 - num_slots). + * (+1 since the new packet itself also has to be factored in.) */ + jitter_buffer->valid_seqnum_window_start_seqnum = seqnum + 1 - params->num_slots; + jitter_buffer->valid_seqnum_window_length = params->num_slots; + } + pw_log_debug("Reset valid seqnum window to start at seqnum %" PRIu32 " and be of length %" + PRIu32, jitter_buffer->valid_seqnum_window_start_seqnum, + jitter_buffer->valid_seqnum_window_length); + + /* Now write this new packet into the packet buffer to hold + * it back until the gap before it is filled. */ + store_packet(jitter_buffer, packet_data, packet_size, header_size, + timestamp, seqnum, false); + + /* NOTE: This assumes that spa_system_timerfd_settime() behaves just + * like timerfd_settime() in the sense that it clears any timer + * expirations that haven't been noticed yet. In other words, even + * if the timer expired already, rearming by calling + * spa_system_timerfd_settime() is assumed to implicitly wipe + * these prior expirations. + * + * This is precisely what timerfd_settime() does. From its manpage: + * + * > If the timer has already expired one or more times ***since its + * > settings were last modified using timerfd_settime()***, or since + * > the last successful read(2), then the buffer given to read(2) + * > returns an unsigned 8-byte integer (uint64_t) containing the + * > number of expirations that have occurred. + * + * And this is important to make sure that, should the timer expire while + * we are here, it does not lead to an immediate jitter buffer draining. + */ + if (ret == 0) { + pw_log_debug("Rearming timeout timer as part of resetting hold-back mode"); + ret = arm_timeout_timer(jitter_buffer); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("Could not arm timer: %s", spa_strerror(ret)); + ret = -EINVAL; + } + } else { + pw_log_warn("Not rearming timeout timer due to earlier error while draining"); + } + + /* Next, let the logic below check the contents of the new, + * updated valid seqnum window. */ + } + + /* After inserting the packet, go through the valid window and check if + * the oldest N slots contain packets without gaps in between them. First, + * N (= num_oldest_slots_with_packets) has to be determined. Iterate over + * the slots, starting at the valid seqnum window start, until either, + * a slot with a gap inside is encountered, or the end of the window is + * reached. That way, the number of oldest N held back packets that present + * in an uninterrupted sequence in the oldest N slots and thus can be output + * is determined. */ + for (idx = 0; idx < jitter_buffer->valid_seqnum_window_length; ++idx) { + uint16_t item_seqnum = jitter_buffer->valid_seqnum_window_start_seqnum + idx; + slot_index = seqnum_to_slot_index(jitter_buffer, item_seqnum); + if (jitter_buffer->slots[slot_index].element_type != + RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET) + break; + } + num_oldest_slots_with_packets = idx; + + if (num_oldest_slots_with_packets > 0) { + pw_log_debug("The first %zu slots in valid seqnum window all contain " + "packets - these packets can be output immediately", + num_oldest_slots_with_packets); + + /* Now output the packets. */ + for (idx = 0; idx < num_oldest_slots_with_packets; ++idx) { + struct rtp_jitter_buffer_slot *slot; + uint16_t item_seqnum; + const uint8_t *packet_data_to_output; + + /* This implicitly does the sequence number wrap-around + * by storing the value in uint16_t. */ + item_seqnum = jitter_buffer->valid_seqnum_window_start_seqnum + idx; + + slot_index = seqnum_to_slot_index(jitter_buffer, item_seqnum); + slot = &(jitter_buffer->slots[slot_index]); + + packet_data_to_output = jitter_buffer->packet_buffer + + params->max_packet_size * slot_index; + + ret = params->output_rtp_packet(params->context, packet_data_to_output, + slot->packet_size, slot->header_size, slot->timestamp, item_seqnum); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("Could not output RTP packet: %s", spa_strerror(ret)); + ret = -EIO; + goto finish; + } + + slot->element_type = RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP; + } + + /* Since num_oldest_slots_with_packets is the result of a for-loop that counts up + * to valid_seqnum_window_length, this assertion should always hold. */ + spa_assert(jitter_buffer->valid_seqnum_window_length >= num_oldest_slots_with_packets); + + /* Move the window start further to move past the slots whose packets + * were just output. This also reduces the valid seqnum window length. */ + jitter_buffer->valid_seqnum_window_start_seqnum += num_oldest_slots_with_packets; + jitter_buffer->valid_seqnum_window_length -= num_oldest_slots_with_packets; + + pw_log_trace("%zu packet(s) output in hold-back mode; valid seqnum window " + "is now: start seqnum / length: %" PRIu32 " / %" PRIu32, + num_oldest_slots_with_packets, + jitter_buffer->valid_seqnum_window_start_seqnum, + jitter_buffer->valid_seqnum_window_length); + + /* If the valid seqnum window length has become is zero, it means that + * all held-back packets have been output, and no gaps remain. + * This in turn means that we are done - all previously missing packets + * have been received and have been output in order of their sequence + * numbers. Switch back to regular mode and set the seqnum of the last + * output packet the last_seqnum. */ + if (jitter_buffer->valid_seqnum_window_length == 0) { + jitter_buffer->hold_back_mode = false; + /* Don't set this to the newly arrived packet's seqnum directly. + * Hold-back mode is active, which means that packets previously + * arrived out of order - so, the new packet too might not have + * arrived in order. */ + jitter_buffer->last_seqnum = jitter_buffer->valid_seqnum_window_start_seqnum - 1; + /* Also cancel the timeout timer to not trigger an erroneous drain. */ + disarm_timeout_timer(jitter_buffer); + pw_log_debug("No more packets held back; switched back to regular mode, " + "last seqnum set to %" PRId32, jitter_buffer->last_seqnum); + } + } + } else { + /* Here, we are in regular mode. */ + + /* If a preceding sequence number is known, use it to detect gaps. For + * example, last sequence number is 400, this packet's sequence number + * is 402 -> there is a gap at sequence number 401. This can happen due + * to packet loss and out-of-order packet transmission. + * + * (Also see the overview at the top.) */ + if (jitter_buffer->last_seqnum >= 0) { + seqnum_delta = calculate_seqnum_delta(jitter_buffer->last_seqnum, seqnum); + + if (seqnum_delta <= 0) { + /* In the regular mode, it is implied that all preceding + * packets were in order of the sequence numbers, and that + * any missing packets have been also announced in-order. + * Therefore, if at this point, the delta is <= 0, it means + * that a packet with a sequence number equal to or lower + * than the previous packet's sequence number arrived + * (taking possible wraparound into account). This packet + * may be a duplicate, and is unusable, so drop it. */ + + pw_log_info("Dropping packet with stale sequence number %" PRId32, seqnum); + goto finish; + } else if (seqnum_delta > 1) { + /* If the delta is >1, it indicates a gap. This can happen + * due to out-of-order packets or due to packet loss. + * Activate hold-back mode: Hold back the received packet + * and any further ones until there are consecutive + * sequences of packets available, which can be output. + * And, write information about that packet and the gap into + * the associated slots. + * Also arm the timeout timer in case the missing packets + * do not arrive in time. */ + + jitter_buffer->hold_back_mode = true; + /* Set the valid seqnum window start to the sequence number that + * was actually expected as the one that follows the last packet. + * Since this place was reached, it implies that the current packet's + * sequence number does not equal to that one, and thus, the packet + * we actually expected is still missing. This is the oldest packet + * we expect here, since packets with a sequence number preceding + * that packet's have been processed already at this point. */ + jitter_buffer->valid_seqnum_window_start_seqnum = + ((uint16_t)jitter_buffer->last_seqnum) + 1; + /* The current valid seqnum window length equals the seqnum_delta. This + * is because the window goes from valid_seqnum_window_start_seqnum to + * the last slot with a packet in the valid seqnum window (both inclusive). + * For example, if valid_seqnum_window_start_seqnum is initially set + * to 301 here, and the current packet has sequence number 305, it + * means that the window goes from 301 to 305, so it consists of + * 5 slots (the first 4 contain the gap from 301 to 304, and the fifth + * slot contains packet 305). + * + * seqnum_delta equals: + * + * (this current packet's seqnum - last_seqnum) + * + * Substituting in the seqnum_delta formula, we get: + * + * valid_seqnum_window_start_seqnum = last_seqnum + 1 + * -> last_seqnum = valid_seqnum_window_start_seqnum - 1 + * + * and: + * + * seqnum_delta = + * (this current packet's seqnum - last_seqnum) = + * (this current packet's seqnum - (valid_seqnum_window_start_seqnum - 1)) = + * (this current packet's seqnum - valid_seqnum_window_start_seqnum + 1) + * + * which is exactly the formula needed to compute a size or length + * from A to B (or, in this case, from valid_seqnum_window_start_seqnum + * to the packet's seqnum). */ + jitter_buffer->valid_seqnum_window_length = seqnum_delta; + + pw_log_debug("Gap detected starting at seqnum %" PRIu32 ", length %" + PRId16 "; switched to hold-back mode", + jitter_buffer->valid_seqnum_window_start_seqnum, + seqnum_delta - 1); + + if (SPA_UNLIKELY(jitter_buffer->valid_seqnum_window_length > params->num_slots)) { + /* It is possible that the sudden gap that leads to the + * activation of the hold-back mode is so big that it + * immediately overextends the valid seqnum window. + * However, unlike in the hold-back cases handled above, + * here, nothing is held back yet, so there are no slots + * that can remain valid. Therefore, this behaves just like + * in the window_fully_invalid == true case further above. + * + * In most cases, the gap is way larger than the number of + * slots. There is a corner case though, and that is when + * the window length equals (num_slots + 1). Keep in mind + * that the window includes the gap _and_ the new packet - + * hence the +1. From this it follows that if the window + * length equals num_slots+1, it means that the actual + * gap is num_slots in length. This is essentially the + * largest possible gap that still isn't open ended. For + * this reason, a check is made to identify this corner case. + * + * In either case, the correct, uninterrupted, in-order + * packets that precede this gap are followed by proper PLC. + * Should the gap be open ended, PLC can be augmented to + * include a fadeout for example. + * + * And, since an overextended window is invalid, and in this + * case, no held-back packets are presents, the valid seqnum + * window is adjusted to only contain the current packet. + * + * After signaling, the jitter buffer switches back to regular + * mode. This is because after the large gap, nothing is held + * back anymore - there is no reason for staying in the + * hold-back mode. */ + + bool open_ended = jitter_buffer->valid_seqnum_window_length > + (params->num_slots + 1); + + pw_log_debug("This gap immediately overextends the valid seqnum " + "window; signaling packet loss, starting at seqnum %" + PRIu16 ", gap length %zd, %sopen ended", + jitter_buffer->valid_seqnum_window_start_seqnum, + params->num_slots, open_ended ? "" : "not "); + + if ((ret = params->signal_lost_packets(params->context, + jitter_buffer->valid_seqnum_window_start_seqnum, + params->num_slots, open_ended)) != 0) { + pw_log_error("Could not signal lost RTP packet: %s", spa_strerror(ret)); + goto finish; + } + + pw_log_debug("Switching back to regular mode and emitting packet " + "after the large gap"); + + jitter_buffer->hold_back_mode = false; + ret = params->output_rtp_packet(params->context, packet_data, + packet_size, header_size, timestamp, seqnum); + jitter_buffer->last_seqnum = seqnum; + goto finish; + + } + + store_packet(jitter_buffer, packet_data, packet_size, header_size, + timestamp, seqnum, false); + + ret = arm_timeout_timer(jitter_buffer); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("Could not arm timer: %s", spa_strerror(ret)); + ret = -EINVAL; + } + + goto finish; + } + } + + /* If hold-back mode is not active, just forward the packet. */ + ret = params->output_rtp_packet(params->context, packet_data, packet_size, + header_size, timestamp, seqnum); + + jitter_buffer->last_seqnum = seqnum; + } + +finish: + return ret; +} + +int rtp_jitter_buffer_drain(struct rtp_jitter_buffer *jitter_buffer) +{ + int ret; + + spa_assert(jitter_buffer != NULL); + spa_assert(jitter_buffer->initialized); + + if (!jitter_buffer->hold_back_mode) + return 0; + + ret = do_drain(jitter_buffer, jitter_buffer->valid_seqnum_window_length); + + /* Not calling signal_lost_packets with open_ended = true here, since + * that one is meant for cases where mid-stream, the hold-back mode was + * activated, and a sudden large sequence jump occurred. This function + * here however is called externally. */ + + jitter_buffer->hold_back_mode = false; + jitter_buffer->last_seqnum = -1; + pw_log_debug("Switching back to regular mode after explicit drain"); + + /* Disarm any potentially ongoing timer since the + * jitter buffer just switched back to regular mode. */ + disarm_timeout_timer(jitter_buffer); + + return ret; +} + +void rtp_jitter_buffer_flush(struct rtp_jitter_buffer *jitter_buffer) +{ + size_t idx; + + spa_assert(jitter_buffer != NULL); + spa_assert(jitter_buffer->initialized); + + jitter_buffer->hold_back_mode = false; + jitter_buffer->last_seqnum = -1; + pw_log_debug("Switching back to regular mode after flush"); + + for (idx = 0; idx < jitter_buffer->params.num_slots; ++idx) + jitter_buffer->slots[idx].element_type = RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP; + + /* Disarm any potentially ongoing timer since the + * jitter buffer just switched back to regular mode. */ + disarm_timeout_timer(jitter_buffer); +} + +static int set_timeout(struct rtp_jitter_buffer *jitter_buffer, uint64_t timeout) +{ + struct itimerspec ts; + + ts.it_value.tv_sec = timeout / SPA_NSEC_PER_SEC; + ts.it_value.tv_nsec = timeout % SPA_NSEC_PER_SEC; + ts.it_interval.tv_sec = 0; + ts.it_interval.tv_nsec = 0; + + return spa_system_timerfd_settime(jitter_buffer->params.loop->system, + jitter_buffer->timeout_timer->fd, 0, &ts, NULL); +} + +static int arm_timeout_timer(struct rtp_jitter_buffer *jitter_buffer) +{ + struct rtp_jitter_buffer_params *params = &(jitter_buffer->params); + + /* Set the timeout to expire at the total duration + * covered by the packet buffer's capacity. */ + return set_timeout(jitter_buffer, + params->num_slots * params->packet_duration); +} + +static void disarm_timeout_timer(struct rtp_jitter_buffer *jitter_buffer) +{ + set_timeout(jitter_buffer, 0); +} + +static void on_timeout_expiration(void *data, uint64_t expirations) +{ + struct rtp_jitter_buffer *jitter_buffer = data; + + pw_log_info("Timeout timer expired; draining jitter buffer"); + do_drain(jitter_buffer, jitter_buffer->valid_seqnum_window_length); + jitter_buffer->hold_back_mode = false; + jitter_buffer->last_seqnum = -1; +} + +static void store_packet(struct rtp_jitter_buffer *jitter_buffer, const uint8_t *packet_data, + size_t packet_size, size_t header_size, uint32_t timestamp, + uint16_t seqnum, bool check_for_duplicates) +{ + size_t slot_index; + uint8_t *dest; + struct rtp_jitter_buffer_params *params = &(jitter_buffer->params); + struct rtp_jitter_buffer_slot *slot; + + slot_index = seqnum_to_slot_index(jitter_buffer, seqnum); + slot = &(jitter_buffer->slots[slot_index]); + + if (check_for_duplicates) { + if (SPA_UNLIKELY(slot->element_type == RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET)) { + pw_log_debug("Packet with seqnum %" PRIu16 " has already been inserted; " + "this is a duplicate; dropping", seqnum); + return; + } + } + + dest = jitter_buffer->packet_buffer + params->max_packet_size * slot_index; + memcpy(dest, packet_data, packet_size); + + slot->element_type = RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET; + slot->packet_size = packet_size; + slot->header_size = header_size; + slot->timestamp = timestamp; +} + +static inline size_t seqnum_to_slot_index(struct rtp_jitter_buffer *jitter_buffer, + uint16_t seqnum) +{ + return ((size_t)seqnum) % jitter_buffer->params.num_slots; +} + +static int do_drain(struct rtp_jitter_buffer *jitter_buffer, + size_t num_oldest_slots_to_drain) +{ + /* Important: This intentionally does not reset valid_seqnum_window_length + * or valid_seqnum_window_start_seqnum. That is up to the caller, since + * this function is called in several different situations that require + * different handling of these states. */ + + int ret = 0; + size_t idx; + int32_t first_drained_packet_seqnum = -1; + uint16_t first_lost_packet_seqnum = 0; + bool tracking_lost_packets = false; + struct rtp_jitter_buffer_params *params = &(jitter_buffer->params); + + spa_assert(num_oldest_slots_to_drain <= jitter_buffer->valid_seqnum_window_length); + + if (num_oldest_slots_to_drain == 0) + return 0; + + if (num_oldest_slots_to_drain == jitter_buffer->valid_seqnum_window_length) { + pw_log_debug("Draining all slots in the entire valid seqnum window"); + } else { + pw_log_debug("Draining the first (= oldest) %zu slots in the valid seqnum window; " + "window start seqnum / length: %" PRIu32 " / %" PRIu32, + num_oldest_slots_to_drain, + jitter_buffer->valid_seqnum_window_start_seqnum, + jitter_buffer->valid_seqnum_window_length); + } + + for (idx = 0; idx < num_oldest_slots_to_drain; ++idx) { + uint16_t seqnum = jitter_buffer->valid_seqnum_window_start_seqnum + idx; + size_t slot_index = seqnum_to_slot_index(jitter_buffer, seqnum); + struct rtp_jitter_buffer_slot *slot = &(jitter_buffer->slots[slot_index]); + + if (slot->element_type == RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET) { + const uint8_t *packet_data; + + if (tracking_lost_packets) { + int16_t seqnum_delta = calculate_seqnum_delta( + first_lost_packet_seqnum, seqnum); + + tracking_lost_packets = false; + + if (SPA_LIKELY(seqnum_delta > 0)) { + pw_log_debug("Signaling packet loss sequence, starting at " + "seqnum %" PRIu16 ", gap length %zu, not open ended", + first_lost_packet_seqnum, (size_t)seqnum_delta); + if ((ret = params->signal_lost_packets(params->context, + first_lost_packet_seqnum, (size_t)seqnum_delta, false)) < 0) { + goto finish; + } + } else { + pw_log_error("Negative delta %" PRId16" between first and last " + "seqnum in lost seqnum range %" PRIu16 " - %" PRIu16 + "encountered while draining", seqnum_delta, + first_lost_packet_seqnum, seqnum); + } + } + + packet_data = jitter_buffer->packet_buffer + params->max_packet_size * slot_index; + + if ((ret = params->output_rtp_packet(params->context, packet_data, + slot->packet_size, slot->header_size, slot->timestamp, seqnum)) < 0) + goto finish; + + slot->element_type = RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP; + + if (first_drained_packet_seqnum < 0) + first_drained_packet_seqnum = seqnum; + } else { + /* If a packet is not available, consider it lost. + * Do not announce it right away - instead, switch + * to a tracking mode, and keep iterating over the + * table. That way, it becomes possible to announce + * entire spans of lost packets in one go, as a gap + * with multiple packets, which can be more efficient + * for PLC. For example, when 10 packets in a row are + * lost, that would then announce a gap that is 10 + * packets long instead of announcing 10 times that + * a packet was lost. */ + if (!tracking_lost_packets) { + first_lost_packet_seqnum = seqnum; + tracking_lost_packets = true; + + if (first_drained_packet_seqnum >= 0) { + uint16_t last_drained_packet_seqnum = + jitter_buffer->valid_seqnum_window_start_seqnum + (idx - 1); + pw_log_debug("Drained packet(s) with seqnums %" PRId32 " - %" PRIu16, + first_drained_packet_seqnum, last_drained_packet_seqnum); + first_drained_packet_seqnum = -1; + } + } + } + } + + if (first_drained_packet_seqnum >= 0) { + uint16_t last_drained_packet_seqnum = jitter_buffer->valid_seqnum_window_start_seqnum + (idx - 1); + pw_log_debug("Drained packet(s) with seqnums %" PRId32 " - %" PRIu16, first_drained_packet_seqnum, + last_drained_packet_seqnum); + } + + if (tracking_lost_packets) { + /* This can happen when the valid seqnum window is only partially + * drained, that is, num_oldest_slots_to_drain <= valid_seqnum_window_length. */ + + uint16_t last_lost_packet_seqnum; + int16_t seqnum_delta; + + last_lost_packet_seqnum = jitter_buffer->valid_seqnum_window_start_seqnum + (idx - 1); + seqnum_delta = calculate_seqnum_delta(first_lost_packet_seqnum, last_lost_packet_seqnum) + 1; + + spa_assert(seqnum_delta >= 0); + + pw_log_debug("Signaling final packet loss sequence, starting at " + "seqnum %" PRIu16 ", gap length %zu, not open ended", + first_lost_packet_seqnum, (size_t)seqnum_delta); + + /* This final gap is always signaled as a non-open-ended loss, + * since its size is exactly defined. Open-ended packet losses + * happen in cases where the packet loss results in a gap that + * is too large for any packet loss concealment mechanism to + * fully cover (or its end not even known, so PLC is not fully + * applicable). */ + if ((ret = params->signal_lost_packets(params->context, + first_lost_packet_seqnum, (size_t)seqnum_delta, false)) < 0) { + goto finish; + } + } + +finish: + return ret; +} + diff --git a/src/modules/module-rtp/jitter-buffer.h b/src/modules/module-rtp/jitter-buffer.h new file mode 100644 index 000000000..408eaf567 --- /dev/null +++ b/src/modules/module-rtp/jitter-buffer.h @@ -0,0 +1,317 @@ +/* PipeWire */ +/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */ +/* SPDX-License-Identifier: MIT */ + +#ifndef PIPEWIRE_RTP_JITTER_BUFFER_H +#define PIPEWIRE_RTP_JITTER_BUFFER_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +struct rtp_jitter_buffer_params { + /* How many slots the jitter buffer shall have. This defines + * the capacity of the jitter buffer. Must be at least 1. + * For a more detailed explanation what slots are, see + * further below. */ + size_t num_slots; + /* Maximum size of the packets, in bytes. This must include + * the RTP header bytes. Must be at least 1. */ + size_t max_packet_size; + /* Duration of each packet, in nanoseconds. + * Must be at least 1. */ + uint64_t packet_duration; + /* PipeWire loop, used for setting up a timeout timer. + * This needs to be a loop that runs in the same thread + * as this jitter buffer. + * This must be set to a valid pointer. */ + struct pw_loop *loop; + /* User-defined context that is passed to the function pointers. */ + void *context; + + /* Function pointer called when the jitter buffer decides to output + * an RTP packet. In regular mode, this is immediately invoked when + * rtp_jitter_buffer_insert_packet() is called. No data is copied + * then; this function pointer is directly passed the packet_data and + * packet_size argument values that rtp_jitter_buffer_insert_packet() + * was called with. In hold-back mode, this is called when the jitter + * buffer is able to output previously held back packets in order + * (for example, because a preceding packet just now arrived), or + * when the jitter buffer gets drained. packet_data points to the + * bytes of the RTP packet that is output, and packet_size contains + * the size of the packet in bytes. + * + * Should this function experience an error, it returns a negated + * errno, and 0 if it succeeds. In case of an error, if the overall + * logic can continue, it is recommended to fully reset the jitter + * buffer by calling rtp_jitter_buffer_flush(). + * + * This must be set to a valid pointer. */ + int (*output_rtp_packet)(void *context, const uint8_t *packet_data, size_t packet_size, + size_t header_size, uint32_t timestamp, uint16_t seqnum); + /* Function pointer called when the jitter buffer detected one or + * more lost packets in a row. For example, if the last received packet + * had sequence number 16, and then, the jitter buffer received packet + * with sequence number 26, then 27, 28, etc., it eventually will + * detect that packets 17 through 25 are lost, and call this, setting + * seqnum_of_first_lost_packet to 17, and gap_length to 8. The last + * argument is set to false if the packet loss is detected mid stream + * when the gap is small enough that measures like PLC can reasonably + * cover all lost packets. If the detected gap is too large, open_ended + * will be set to true. Implementations must then interpret the value of + * gap_length as a maximum; whatever PLC measures the implementation + * uses must not produce output larger than gap_length. This is a + * safety measure to avoid cases where PLC would have to produce an + * excessive amount of data. + * + * When open_ended is true, implementations should also apply a fade + * out at the end of the produced PLC content to avoid a potential + * hard cutoff at the end. + * + * Should this function experience an error, it returns a negated + * errno, and 0 if it succeeds. In case of an error, if the overall + * logic can continue, it is recommended to fully reset the jitter + * buffer by calling rtp_jitter_buffer_flush(). + * + * This must be set to a valid pointer. */ + int (*signal_lost_packets)(void *context, uint16_t seqnum_of_first_lost_packet, + size_t gap_length, bool open_ended); +}; + +enum rtp_jitter_buffer_element_type { + RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET, + RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP +}; + +/* Slot structure. A slot stores an element (a packer or + * a gap). Slots outside of the valid seqnum window have + * undefined contents. See the "slots" array documentation + * in the rtp_jitter_buffer structure below. */ +struct rtp_jitter_buffer_slot { + enum rtp_jitter_buffer_element_type element_type; + /* This is only used when the element_type is + * RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET. See + * the packet_buffer documentation below for more. */ + size_t packet_size; + /* Size of the RTP header. This must be less than + * or equal to packet_size. The packet data that + * is stored in packet_buffer includes this header. */ + size_t header_size; + /* RTP timestamp. */ + uint32_t timestamp; +}; + +struct rtp_jitter_buffer { + /* Copy of the params passed to rtp_jitter_buffer_init(). */ + struct rtp_jitter_buffer_params params; + + /* Set to true by rtp_jitter_buffer_init(), and set to false + * by rtp_jitter_buffer_shutdown(). */ + bool initialized; + + /* Array of slots. The size of this array equals num_slots. + * Only slots that are within the valid seqnum window have + * valid values in this array. For example, if the slot at + * array index 3 corresponds to sequence number 400, but + * the valid seqnum window starts at 402, then the value + * in this array at index 3 has no meaning. Not used when + * regular mode is active. */ + struct rtp_jitter_buffer_slot *slots; + + /* Buffer for storing packets in hold-back mode. The size + * of this buffer is max_packet_size*num_slots. To access + * a packet, do it this way: + * + * packet_data = &(packet_buffer[max_packet_size * index]); + * + * Doing this is technically less space efficient if the actual + * packet size is smaller than max_packet_size, but it makes + * buffer management and packet addressing much easier, and + * avoids costly reallocations. The packet_size field in the + * slot structure specifies how many bytes at that location + * actually make up the packet. FOr example, if the slot at + * index 5 in the slots array has its element_type value set + * to RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET, then at location + * &(packet_buffer[max_packet_size * 5]), valid packet data + * can be found. And, starting from that location, the amount + * of bytes the packet there is made of equals the packet_size + * value of the slot at index 5. + * + * Only the locations that correspond to valid slots with + * a packet as element contain valid data. For example, if the + * slot at index 3 corresponds to sequence number 400, but the + * valid seqnum window starts at 402, then the data at location + * &(packet_buffer[max_packet_size * 3]) has no meaning. Also, + * if the slot does lie within the valid window, but the type + * of the contained element is RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP, + * then the associated location in this packert buffer is + * meaningless. Not used when regular mode is active. */ + uint8_t *packet_buffer; + + /* If true, hold-back mode is active. */ + bool hold_back_mode; + /* Sequence number of the last packet observed while in + * regular mode. If no last packet is known (at startup for + * example), this is set to -1. + * Not used when hold-back mode is active. */ + int32_t last_seqnum; + /* Timer that, when it times out, drains all slots and switches + * back to regular mode. This is armed when hold-back mode is + * activated, and when it is reset. + * Not used when regular mode is active. */ + struct spa_source *timeout_timer; + /* These define the valid seqnum window. Packet sequence numbers + * in the [start_seqnum .. (start_seqnum + window_length - 1)] + * range (both start and end of it inclusive) are within this + * window, and are considered valid. A sequence number below the + * window's start_seqnum is considered stale. A sequence number + * beyond the end of the window extends the window; the window + * length is increased such that this new sequence number is at + * the very end of the window, that is, the length is extended + * such that new_seqnum == (start_seqnum + window_length - 1) + * + * However, if this overextends the window (meaning that the + * window length would be greater than the number of slots), + * the logic in rtp_jitter_buffer_insert_packet() will adjust + * the window's start_seqnum and length. + * + * If hold-back mode is off, these two values have no meaning. + * + * This window is necessary for the following: + * + * 1. It allows for detecting and dropping old, stale packets + * by looking at their sequence numbers. + * 2. It detects when currently held-back packets need to be + * drained, that is, the gap that precedes them is not getting + * filled in time. This happens when packets are lost. + * 3. It defines what slots are valid. Only those slots that are + * associated with sequence numbers that fit inside this + * window are valid. Slots outside of the window have no + * defined nor meaningful content (that is, packet size, + * packet buffer data, or slot element type). + * + * Note that even though valid_seqnum_window_start_seqnum is a + * sequence number, it is a uint32_t instead of a uint16_t, for + * easier arithmetics. */ + uint32_t valid_seqnum_window_start_seqnum; + uint32_t valid_seqnum_window_length; +}; + +/* Initializes an uninitialized rtp_jitter_buffer instance with the given params. + * + * See rtp_jitter_buffer_params for details about the input parameters. + * + * params must be a valid pointer. An internal copy of the rtp_jitter_buffer_params + * instance that pointer refers to will be made, so the caller does not have to + * keep that instance around for the duration of the jitter buffer's lifetime. + * + * The jitter buffer must not be already initialized. + * + * Returns 0 if initialization was successful, nonzero in case of an error. + * If an error happens, this automatically calls rtp_jitter_buffer_shutdown(). */ +int rtp_jitter_buffer_init(struct rtp_jitter_buffer *jitter_buffer, struct rtp_jitter_buffer_params *params); + +/* Shuts down a previously initialized jitter buffer. + * + * jitter_buffer must be a valid pointer. + * + * Calling this on an uninitialized jitter buffer results in a no-op. */ +void rtp_jitter_buffer_shutdown(struct rtp_jitter_buffer *jitter_buffer); + +/* Returns true if the jitter buffer is initialized. */ +static inline bool rtp_jitter_buffer_is_initialized(struct rtp_jitter_buffer *jitter_buffer) +{ + return jitter_buffer->initialized; +} + +/* Inserts an RTP packet into the jitter buffer. + * + * Depending on the sequence number of the RTP packet, the jitter buffer + * will immediately output that packet, or hold it back inside a slot + * for purposes of reordering and packet loss detection. (The packet's data + * will be copied into the packet buffer.) The packet order is defined by + * the RTP sequence number. (16-bit unsigned int RTP sequence wrap-around + * is handled properly.) + * + * If for example packets with sequence numbers 1, 4, 3, 5 arrived (in this + * incorrect order), packet 1 will have been output immediately, but packets + * 4, 3, 5 will have been held back (due to the gap at 2 and the incorrect + * order of packets). If later, packet 2 arrives, the gap at 2 is filled, + * and the jitter buffer will then output all packets from 2 to 5, in the + * correct order. If however packet 2 never arrives, but packets past + * packet 5 keep arriving, then eventually, packet 2 will be considered + * lost (causing signal_lost_packets() to be called), and the held-packets + * 4, 3, 5 will be drained (and output via output_rtp_packet(), in correct + * order 3, 4, 5). + * + * jitter_buffer and packet_data must be valid pointers. packet_size must + * be the size of the memory block pointed to by packet_data . + * + * header_size is the size of the RTP header, and must be less than or equa + * to packet_size. timestamp is the RTP timestamp. + * + * Note that while the values of header_size, timestamp, seqnum could be + * parsed from the packet header, this is not done by this function for + * efficiency reasons. Parsing those may require byte swapping to handle + * endianness, and computing the header size may depend on whether the + * header has extensions or not. These computations and proceses are done + * once, by the caller, and passed to this call via the arguments. + * + * Returns 0 if the call was successful, nonzero in case of an error. + * If an error happens, consider the jitter buffer as no longer usable + * (it can only be shut down then). */ +int rtp_jitter_buffer_insert_packet(struct rtp_jitter_buffer *jitter_buffer, + const uint8_t *packet_data, size_t packet_size, + size_t header_size, uint32_t timestamp, uint16_t seqnum); + +/* Drains all held-back packets and reports packet loss based on those packets. + * + * If the jitter buffer is currently holding back packets due to some packets + * having been added out of order previously, or because gaps were detected, this + * drains them, causing the output_rtp_packet() and signal_lost_packets() function + * pointers from the params passed to rtp_jitter_buffer_init() to be called. If + * for example packets with sequence numbers 1, 4, 3, 5 arrived (in this incorrect + * order), packet 1 will have been output immediately, but packets 4, 3, 5 will + * have been held back (due to the gap at 2 and the incorrect order of packets). + * If then, this function is called, the jitter buffer will (in this order) call + * signal_lost_packets() to signal the loss of packet 2, and then call + * output_rtp_packet() to output packets 3, 4, 5 (in that corrected order). + * + * After draining, the jitter buffer will be back in regular mode. + * + * If no packets were added, or if the jitter buffer is in regular mode, + * this does nothing. + * + * The jitter buffer also has an internal timer that is activated as soon + * as gaps and out of order packets are detected. If that timer expires, + * the jitter buffer will automatically drain itself. If however all gaps + * are filled in time, the timer is deactivated, and no automatic drain occurs. + * + * jitter_buffer must be a valid pointer. + * + * Returns 0 if the call was successful, nonzero in case of an error. + * If an error happens, consider the jitter buffer as no longer usable + * (it can only be shut down then). */ +int rtp_jitter_buffer_drain(struct rtp_jitter_buffer *jitter_buffer); + +/* Flushes all held-back packets. + * + * Unlike draining, this does not call any function pointers. Instead, it + * flushes any and all held-back packets (meaning, these packets are all + * gone, and _not_ reported as lost via signal_lost_packets()), and + * resets the jitter buffer back to the regular mode. + * + * This is useful for performing a hard reset on the jitter buffer. + * + * jitter_buffer must be a valid pointer. */ +void rtp_jitter_buffer_flush(struct rtp_jitter_buffer *jitter_buffer); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* PIPEWIRE_RTP_JITTER_BUFFER_H */ diff --git a/src/modules/module-rtp/opus-codec.c b/src/modules/module-rtp/opus-codec.c new file mode 100644 index 000000000..f4a5bcccf --- /dev/null +++ b/src/modules/module-rtp/opus-codec.c @@ -0,0 +1,625 @@ +/* PipeWire */ +/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */ +/* SPDX-License-Identifier: MIT */ + +#include "config.h" + +#ifdef HAVE_OPUS + +#include +#include + +#include "opus-codec.h" + +#include +#include + +#include + +#include +#include + +PW_LOG_TOPIC_EXTERN(mod_topic); +#define PW_LOG_TOPIC_DEFAULT mod_topic + +static int opus_error_to_neg_errno(int opus_error) +{ + switch (opus_error) { + case OPUS_OK: return 0; + case OPUS_BAD_ARG: return -EINVAL; + case OPUS_BUFFER_TOO_SMALL: return -ENOBUFS; + case OPUS_INTERNAL_ERROR: return -EIO; + case OPUS_INVALID_PACKET: return -EBADMSG; + case OPUS_UNIMPLEMENTED: return -ENOTSUP; + case OPUS_INVALID_STATE: return -EBADF; + case OPUS_ALLOC_FAIL: return -ENOMEM; + default: return -EIO; + } +} + +static void rtp_opus_codec_shutdown(struct rtp_audio_codec_context *context); + +static int rtp_opus_codec_init(struct rtp_audio_codec_context *context, + struct spa_audio_info *stream_info, + enum rtp_audio_codec_type type, uint32_t samples_per_frame, + size_t max_encoded_frame_size, size_t stride, + struct pw_properties *codec_props) +{ + int ret = 0; + int opus_ret = OPUS_OK; + uint32_t i; + unsigned char mapping[255]; + uint64_t us_per_frame; + const char *codec_type_name = ""; + size_t output_buffer_size = 0; + + spa_assert(stream_info != NULL); + spa_assert(context != NULL); + spa_assert(samples_per_frame > 0); + spa_assert(max_encoded_frame_size > 0); + spa_assert(stride > 0); + + spa_memzero(context, sizeof(struct rtp_audio_codec_context)); + + /* Opus supports: + * + * - 1 to 255 channels + * - 8, 12, 16, 24, 48 kHz sample rates + * - 16-bit signed integer and 32-bit float point as sample format + * (with native endianness) + * - 2.5, 5, 10, 20, 40, 60ms frame sizes + * + * Check that the parameters satisfy these constraints. */ + + if ((stream_info->info.raw.channels == 0) || (stream_info->info.raw.channels > 255)) { + pw_log_error("Opus cannot handle %" PRIu32 " channel(s); valid channel count range: 1-255", + stream_info->info.raw.channels); + ret = -EINVAL; + goto error; + } + + switch (stream_info->info.raw.rate) { + case 8000: + case 12000: + case 16000: + case 24000: + case 48000: + break; + default: + pw_log_error("unsupported sample rate of %" PRIu32 " Hz; supported sample rates: " + "8000, 12000, 16000, 24000, 48000", stream_info->info.raw.rate); + ret = -EINVAL; + goto error; + } + + switch (stream_info->info.raw.format) { + case SPA_AUDIO_FORMAT_S16: + case SPA_AUDIO_FORMAT_F32: + break; + default: + pw_log_error("unsupported sample format %s; Opus requires 16-bit signed integer " + "or 32-bit floating point samples", + spa_type_audio_format_to_short_name(stream_info->info.raw.format)); + ret = -EINVAL; + goto error; + } + + us_per_frame = (uint64_t)samples_per_frame * 1000000 / stream_info->info.raw.rate; + + switch (us_per_frame) { + case 2500: + case 5000: + case 10000: + case 20000: + case 40000: + case 60000: + break; + default: + pw_log_error("unsupported frame length: %" PRIu32 " samples (%.1f ms)", + samples_per_frame, (double)us_per_frame / 1000.0); + ret = -EINVAL; + goto error; + } + + /* Setup the context. */ + + context->audio_info = *stream_info; + context->samples_per_frame = samples_per_frame; + context->max_encoded_frame_size = max_encoded_frame_size; + context->stride = stride; + context->type = type; + + /* TODO: Currently, we use a simple 1:1 channel mapping. Also, coupled + * streams are not used at the moment. This is a limitation of the current + * Opus integration, in part because it is unclear how to communicate + * the channel mapping and coupled streams from the en- to the decoder. + * One possibility would be to enforce a (de-facto) standard channel + * mapping, like the Vorbis channel mapping. */ + + for (i = 0; i < stream_info->info.raw.channels; i++) + mapping[i] = i; + + switch (type) { + case RTP_AUDIO_CODEC_TYPE_ENCODER: { + static const opus_int32 DEFAULT_COMPLEXITY = 10; + + bool use_computed_max_bitrate = true; + opus_int32 props_bitrate = -1; + opus_int32 computed_max_bitrate; + opus_int32 complexity = DEFAULT_COMPLEXITY; + bool restricted_lowdelay = false; + bool in_band_fec = false; + int packet_loss_percentage = 0; + opus_int32 signal_type = OPUS_AUTO; + const char *signal_type_str = "auto"; + + if (codec_props != NULL) { + const char *prop_str; + + complexity = pw_properties_get_int32(codec_props, "opus.encoder.complexity", complexity); + props_bitrate = pw_properties_get_int32(codec_props, "opus.encoder.bitrate", props_bitrate); + restricted_lowdelay = pw_properties_get_bool(codec_props, "opus.encoder.restricted-lowdelay", + restricted_lowdelay); + in_band_fec = pw_properties_get_bool(codec_props, "opus.encoder.inband-fec", in_band_fec); + packet_loss_percentage = pw_properties_get_int32(codec_props, "opus.encoder.packet-loss-percentage", + packet_loss_percentage); + + if ((packet_loss_percentage < 0) || (packet_loss_percentage > 100)) { + pw_log_error("invalid packet loss percentage %d (valid range: 0-100)", packet_loss_percentage); + ret = -EINVAL; + goto error; + } + + prop_str = pw_properties_get(codec_props, "opus.encoder.signal-type"); + if (prop_str != NULL) { + if (spa_streq(signal_type_str, "auto")) + signal_type = OPUS_AUTO; + else if (spa_streq(signal_type_str, "voice")) + signal_type = OPUS_SIGNAL_VOICE; + else if (spa_streq(signal_type_str, "music")) + signal_type = OPUS_SIGNAL_MUSIC; + else { + pw_log_error("unsupported Opus encoder signal type \"%s\"", signal_type_str); + ret = -EINVAL; + goto error; + } + } + } + + context->handle = opus_multistream_encoder_create( + stream_info->info.raw.rate, + stream_info->info.raw.channels, + stream_info->info.raw.channels, + 0, + mapping, + restricted_lowdelay ? OPUS_APPLICATION_RESTRICTED_LOWDELAY : OPUS_APPLICATION_AUDIO, + &opus_ret); + + if (opus_ret != OPUS_OK) + goto error_while_creating; + + /* Some CTL may actually not be implemented. It is safe to ignore these. */ + #define SET_OPUS_ENCODER_CTL(CTL) \ + do { \ + opus_ret = opus_multistream_encoder_ctl(context->handle, CTL); \ + switch (opus_ret) { \ + case OPUS_OK: \ + break; \ + case OPUS_UNIMPLEMENTED: \ + pw_log_debug("could not set encoder CTL %s since it is " \ + "not implemented; ignoring", #CTL); \ + break; \ + default: \ + pw_log_error("error while setting encoder CTL %s: %s", \ + #CTL, opus_strerror(opus_ret)); \ + ret = opus_error_to_neg_errno(opus_ret); \ + goto error; \ + } \ + } while (0) + + /* Use the maximum encoded frame size as the size + * for the output buffer, since that one will be + * used as the destination for the encoder. */ + output_buffer_size = max_encoded_frame_size; + + /* Hardcode encoder CTLs to ensure consistent and deterministic + * encoding behavior, even if the defaults change across + * libopus versions. */ + + /* Counterintuitively, CBR is not actually useful for RTP. RFC 7587 + * section 3.1.2 documents this. This applies to the generic audio + * use case here as well. The opus_multistream_encode() function + * (and its float variant) accept an argument that hard-limits the + * maximum encoded output size, so there is no possibility of Opus + * frames exceeding the maximum RTP payload size. Do constrain VBR + * though to not experience excessive variability. */ + SET_OPUS_ENCODER_CTL(OPUS_SET_VBR(1)); + SET_OPUS_ENCODER_CTL(OPUS_SET_VBR_CONSTRAINT(1)); + /* This is a niche feature that is useful for avoiding audio configuration + * switching in software stacks. Signals are forcibly converted to mono or + * stereo, depending on the parameter. Not needed here: channel configuration + * is determined by the PipeWire stream format, not the codec. */ + SET_OPUS_ENCODER_CTL(OPUS_SET_FORCE_CHANNELS(OPUS_AUTO)); + /* Allow the encoder to automatically select the appropriate bandwidth, and + * set the upper bound to the maximum to enable unconstrained selection. */ + SET_OPUS_ENCODER_CTL(OPUS_SET_MAX_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND)); + SET_OPUS_ENCODER_CTL(OPUS_SET_BANDWIDTH(OPUS_AUTO)); + /* DTX is a SILK-layer feature, and not useful for generic audio. It + * is there for detecting silence / low-energy periods during speech. */ + SET_OPUS_ENCODER_CTL(OPUS_SET_DTX(0)); + /* This is a hint for the encoder about the actual depth of the source + * signal. For example, if 32-bit floating point audio is the input, but + * the actual audio signal has a noise floor that resembles that of + * 16-bit audio, then setting this to 16 is appropriate. But such + * signal details are not known here, so set this to the maximum. + * + * Also, from https://www.opus-codec.org/docs/opus_api-1.6/group__opus__encoderctls.html#gaa23940eb477ff617edc14b8d66e104c0 : + * > When using opus_encode() instead of opus_encode_float(), or when libopus is compiled + * > for fixed-point, the encoder uses the minimum of the value set here and the value 16. */ + SET_OPUS_ENCODER_CTL(OPUS_SET_LSB_DEPTH(24)); + /* Disabling prediction hurts encoder efficiency substantially, and + * is only really useful for debugging and testing purposes. Keep + * prediction on. */ + SET_OPUS_ENCODER_CTL(OPUS_SET_PREDICTION_DISABLED(0)); +#ifdef OPUS_SET_DRED_DURATION_REQUEST + /* DRED is a new deep-learning-based redundancy mechanism that embeds up + * to one second of recovery data. It is not used here, since, at this point, + * it only works with SILK, though maybe one day it will work with the CELT + * layer as well. */ + SET_OPUS_ENCODER_CTL(OPUS_SET_DRED_DURATION(0)); +#endif +#ifdef OPUS_SET_QEXT_REQUEST + /* Quality extensions are a new feature that requires very high bitrates. + * See: https://datatracker.ietf.org/doc/draft-ietf-mlcodec-opus-scalable-quality-extension/ + * Disabled due to its experimental nature and need for very high bitrates. */ + SET_OPUS_ENCODER_CTL(OPUS_SET_QEXT(0)); +#endif + + /* Set the bitrate. */ + + /* Estimate the "max computed bitrate" out of the max_encoded_frame_size + * that make the most use of the available space. The valid bitrate range + is 500 to 512000 bits/s, so clamp the estimation to that range. */ + computed_max_bitrate = ((uint64_t)max_encoded_frame_size) * 8 * SPA_USEC_PER_SEC / us_per_frame; + computed_max_bitrate = SPA_CLAMP(computed_max_bitrate, 500, 512000); + + pw_log_debug("computed a bitrate of %" PRId32 " bits/s for Opus encoding based " + "on the max allowed encoded frame size of %zu bytes", + (int32_t)computed_max_bitrate, max_encoded_frame_size); + + if (props_bitrate >= 0) { + pw_log_info("got bitrate %" PRId32 " from properties", (int32_t)props_bitrate); + + if (props_bitrate < 500) { + pw_log_warn("bitrate %" PRId32 " from properties is invalid (must " + "be at least 500 bits/s); setting computed bitrate instead", + (int32_t)props_bitrate); + } else if (props_bitrate > computed_max_bitrate) { + pw_log_warn("bitrate %" PRId32 " from properties exceeds computed " + "max bitrate; setting computed max bitrate instead", + (int32_t)props_bitrate); + } else { + use_computed_max_bitrate = false; + } + } + + if (use_computed_max_bitrate) { + pw_log_info("setting computed max bitrate of %" PRId32 " bits/s", + (int32_t)computed_max_bitrate); + SET_OPUS_ENCODER_CTL(OPUS_SET_BITRATE(computed_max_bitrate)); + } else { + pw_log_info("setting bitrate of %" PRId32 " bits/s from opus.encoder.bitrate property", + (int32_t)props_bitrate); + SET_OPUS_ENCODER_CTL(OPUS_SET_BITRATE(props_bitrate)); + } + + /* Set the encoding complexity. */ + + if ((complexity < 0) || (complexity > 10)) { + pw_log_warn("complexity %" PRId32 " is invalid; choosing %" PRId32 " as default", + (int32_t)complexity, (int32_t)DEFAULT_COMPLEXITY); + complexity = DEFAULT_COMPLEXITY; + } + SET_OPUS_ENCODER_CTL(OPUS_SET_COMPLEXITY(complexity)); + pw_log_info("setting encoding complexity %" PRId32, (int32_t)complexity); + + /* Set the in-band FEC. */ + + SET_OPUS_ENCODER_CTL(OPUS_SET_INBAND_FEC(in_band_fec ? 1 : 0)); + SET_OPUS_ENCODER_CTL(OPUS_SET_PACKET_LOSS_PERC(packet_loss_percentage)); + pw_log_info("%s in-band FEC; packet loss percentage: %d", + in_band_fec ? "enabling" : "disabling", packet_loss_percentage); + + /* Set the signal type. */ + + SET_OPUS_ENCODER_CTL(OPUS_SET_SIGNAL(signal_type)); + pw_log_info("setting signal type \"%s\"", signal_type_str); + + #undef SET_OPUS_ENCODER_CTL + + break; + } + case RTP_AUDIO_CODEC_TYPE_DECODER: + context->handle = opus_multistream_decoder_create( + stream_info->info.raw.rate, + stream_info->info.raw.channels, + stream_info->info.raw.channels, + 0, + mapping, + &opus_ret); + + if (opus_ret != OPUS_OK) + goto error_while_creating; + + /* Use the space a fully decoded frame needs as + * the output buffer size, since that one will be + * used as the destination for the decoder. */ + output_buffer_size = samples_per_frame * stride; + break; + default: + pw_log_error("unsupported audio codec type %d", (int)(type)); + ret = -EINVAL; + goto error; + } + + codec_type_name = rtp_audio_codec_type_name(type); + + pw_log_info("Opus %s created; samples per frame: %" PRIu32 "; max encoded frame data size: %zu; " + "output buffer size: %zu; ms per frame: %.1f; sample format: %s", codec_type_name, + samples_per_frame, max_encoded_frame_size, output_buffer_size, (double)us_per_frame / 1000.0, + spa_type_audio_format_to_short_name(stream_info->info.raw.format)); + + context->output_buffer = malloc(output_buffer_size); + if (context->output_buffer == NULL) { + pw_log_error("could not allocate %s output buffer", codec_type_name); + ret = -ENOMEM; + goto error; + } + + return 0; + +error: + rtp_opus_codec_shutdown(context); + return ret; + +error_while_creating: + context->handle = NULL; + pw_log_error("error while creating %s: %s", codec_type_name, opus_strerror(opus_ret)); + ret = opus_error_to_neg_errno(opus_ret); + goto error; +} + +static void rtp_opus_codec_shutdown(struct rtp_audio_codec_context *context) +{ + spa_assert(context != NULL); + + if (context->handle != NULL) { + if (context->type == RTP_AUDIO_CODEC_TYPE_ENCODER) + opus_multistream_encoder_destroy(context->handle); + else + opus_multistream_decoder_destroy(context->handle); + context->handle = NULL; + + pw_log_info("Opus %s destroyed", rtp_audio_codec_type_name(context->type)); + } + + if (context->output_buffer != NULL) { + free(context->output_buffer); + context->output_buffer = NULL; + pw_log_debug("%s output buffer freed", rtp_audio_codec_type_name(context->type)); + } +} + +static void rtp_opus_codec_reset(struct rtp_audio_codec_context *context, char const *reason) +{ + spa_assert(context != NULL); + + if (context->type == RTP_AUDIO_CODEC_TYPE_ENCODER) + opus_multistream_encoder_ctl(context->handle, OPUS_RESET_STATE); + else + opus_multistream_decoder_ctl(context->handle, OPUS_RESET_STATE); + + if (reason != NULL) + pw_log_info("Opus %s reset, reason: %s", rtp_audio_codec_type_name(context->type), reason); + else + pw_log_info("Opus %s reset (no reason given)", rtp_audio_codec_type_name(context->type)); +} + +static int rtp_opus_codec_get_delay(struct rtp_audio_codec_context *context, size_t *delay) +{ + int ret; + opus_int32 sample_rate = 0; + opus_int32 decoder_delay = 0; + + spa_assert(context != NULL); + spa_assert(delay != NULL); + + /* The Opus specification requires that all decoder implementations + * have the exact same delay. Encoders can vary, however. libopus + * combines the en- and decoder delay into a "lookahead value", + * which is accessible from the encoder (but not the decoder). + * Since for the RTP transmissions, it is beneficial to handle + * en- and decoder delay separately, extract the encoder delay out + * of the lookahead value if this is an encoder (by subtracting + * the decoder delay from the lookahead), and if this is a + * decoder, just return the decoder delay. + * + * The decoder delay is fixed to 2.5 ms according to RFC 6716, + * so this code needs to convert this to samples. */ + + if (context->type == RTP_AUDIO_CODEC_TYPE_ENCODER) + ret = opus_multistream_encoder_ctl(context->handle, OPUS_GET_SAMPLE_RATE(&sample_rate)); + else + ret = opus_multistream_decoder_ctl(context->handle, OPUS_GET_SAMPLE_RATE(&sample_rate)); + + if (ret != OPUS_OK) { + pw_log_error("could not get %s sample rate: %s", + rtp_audio_codec_type_name(context->type), opus_strerror(ret)); + return opus_error_to_neg_errno(ret); + } + + /* Convert the fixed 2.5 ms decoder delay to samples based + * on the en/decoder's sample rate. Define 2.5ms as 2500us + * here to do the calculation purely with integers. */ + decoder_delay = 2500LL * sample_rate / SPA_USEC_PER_SEC; + + if (context->type == RTP_AUDIO_CODEC_TYPE_ENCODER) { + opus_int32 lookahead = 0; + + ret = opus_multistream_encoder_ctl(context->handle, OPUS_GET_LOOKAHEAD(&lookahead)); + if (ret != OPUS_OK) { + pw_log_error("could not get encoder lookahead: %s", opus_strerror(ret)); + return opus_error_to_neg_errno(ret); + } + + if (SPA_UNLIKELY(lookahead < decoder_delay)) { + pw_log_error("lookahead %" PRId32 " is smaller than decoder delay %" PRId32, + (int32_t)lookahead, (int32_t)decoder_delay); + return -EINVAL; + } + + *delay = lookahead - decoder_delay; + return 0; + } else { + *delay = decoder_delay; + return 0; + } +} + +static int rtp_opus_codec_encode(struct rtp_audio_codec_context *context, const uint8_t *in_samples, + uint8_t **out_encoded_data, size_t *out_encoded_data_size) +{ + int ret; + + spa_assert(context != NULL); + spa_assert(in_samples != NULL); + spa_assert(out_encoded_data != NULL); + spa_assert(out_encoded_data_size != NULL); + + switch (context->audio_info.info.raw.format) { + case SPA_AUDIO_FORMAT_S16: + ret = opus_multistream_encode(context->handle, (const opus_int16 *)in_samples, + context->samples_per_frame, context->output_buffer, context->max_encoded_frame_size); + break; + case SPA_AUDIO_FORMAT_F32: + ret = opus_multistream_encode_float(context->handle, (const float *)in_samples, + context->samples_per_frame, context->output_buffer, context->max_encoded_frame_size); + break; + default: + return -EINVAL; + } + + if (ret >= 0) { + *out_encoded_data = context->output_buffer; + *out_encoded_data_size = ret; + pw_log_trace("encoded %zu samples to %zu bytes", context->samples_per_frame, + *out_encoded_data_size); + return 0; + } else { + pw_log_error("error while encoding audio: %s", opus_strerror(ret)); + return -EIO; + } +} + +static int rtp_opus_codec_decode(struct rtp_audio_codec_context *context, const uint8_t *in_encoded_data, + size_t in_encoded_data_size, uint8_t **out_samples, size_t *out_num_samples) +{ + int ret; + + spa_assert(context != NULL); + spa_assert(in_encoded_data != NULL); + spa_assert(in_encoded_data_size > 0); + spa_assert(out_samples != NULL); + spa_assert(out_num_samples != NULL); + + switch (context->audio_info.info.raw.format) { + case SPA_AUDIO_FORMAT_S16: + ret = opus_multistream_decode(context->handle, in_encoded_data, in_encoded_data_size, + (opus_int16 *)(context->output_buffer), context->samples_per_frame, 0); + break; + case SPA_AUDIO_FORMAT_F32: + ret = opus_multistream_decode_float(context->handle, in_encoded_data, in_encoded_data_size, + (float *)(context->output_buffer), context->samples_per_frame, 0); + break; + default: + return -EINVAL; + } + + if (ret >= 0) { + *out_samples = context->output_buffer; + *out_num_samples = ret; + pw_log_trace("decoded %zu bytes to %zu samples", in_encoded_data_size, *out_num_samples); + return 0; + } else { + pw_log_error("error while decoding audio: %s", opus_strerror(ret)); + return -EIO; + } +} + +static int rtp_opus_codec_apply_plc(struct rtp_audio_codec_context *context, + uint8_t **out_samples, size_t *out_num_samples) +{ + int ret; + + spa_assert(context != NULL); + spa_assert(out_samples != NULL); + spa_assert(out_num_samples != NULL); + + /* PLC is applied by "decoding" from a nullpointer. See: + * https://www.opus-codec.org/docs/opus_api-1.6/group__opus__multistream.html#gaa4b89541efe01970cf52e4a336db3ad0 */ + + switch (context->audio_info.info.raw.format) { + case SPA_AUDIO_FORMAT_S16: + ret = opus_multistream_decode(context->handle, NULL, 0, + (opus_int16 *)(context->output_buffer), context->samples_per_frame, 0); + break; + case SPA_AUDIO_FORMAT_F32: + ret = opus_multistream_decode_float(context->handle, NULL, 0, + (float *)(context->output_buffer), context->samples_per_frame, 0); + break; + default: + return -EINVAL; + } + + if (ret >= 0) { + *out_samples = context->output_buffer; + *out_num_samples = ret; + pw_log_debug("generated %zu PLC samples", *out_num_samples); + return 0; + } else { + pw_log_error("error while applying PLC: %s", opus_strerror(ret)); + return -EIO; + } +} + +static const char * rtp_opus_codec_get_name(void) +{ + return "Opus"; +} + +const struct rtp_audio_codec* get_rtp_opus_codec(void) +{ + static const struct rtp_audio_codec codec = { + .init = rtp_opus_codec_init, + .shutdown = rtp_opus_codec_shutdown, + .reset = rtp_opus_codec_reset, + .get_delay = rtp_opus_codec_get_delay, + .encode = rtp_opus_codec_encode, + .decode = rtp_opus_codec_decode, + .apply_plc = rtp_opus_codec_apply_plc, + .get_name = rtp_opus_codec_get_name, + }; + + return &codec; +} + +#else + +#include + +const struct rtp_audio_codec* get_rtp_opus_codec(void) +{ + return NULL; +} + +#endif diff --git a/src/modules/module-rtp/opus-codec.h b/src/modules/module-rtp/opus-codec.h new file mode 100644 index 000000000..1192e3c57 --- /dev/null +++ b/src/modules/module-rtp/opus-codec.h @@ -0,0 +1,20 @@ +/* PipeWire */ +/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */ +/* SPDX-License-Identifier: MIT */ + +#ifndef PIPEWIRE_RTP_OPUS_CODEC_H +#define PIPEWIRE_RTP_OPUS_CODEC_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "audio-codec.h" + +const struct rtp_audio_codec* get_rtp_opus_codec(void); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* PIPEWIRE_RTP_OPUS_CODEC_H */ diff --git a/src/modules/module-rtp/opus.c b/src/modules/module-rtp/opus.c deleted file mode 100644 index c25fc17f8..000000000 --- a/src/modules/module-rtp/opus.c +++ /dev/null @@ -1,359 +0,0 @@ -/* PipeWire */ -/* SPDX-FileCopyrightText: Copyright © 2023 Wim Taymans */ -/* SPDX-License-Identifier: MIT */ - -#ifdef HAVE_OPUS - -#include -#include - -/* TODO: Direct timestamp mode here may require a rework. See audio.c for a reference. - * Also check out the usage of actual_max_buffer_size in audio.c. */ - -static void rtp_opus_process_playback(void *data) -{ - struct impl *impl = data; - struct pw_buffer *buf; - struct spa_data *d; - uint32_t wanted, timestamp, target_buffer, stride, maxsize; - int32_t avail; - - if ((buf = pw_stream_dequeue_buffer(impl->stream)) == NULL) { - pw_log_info("Out of stream buffers: %m"); - return; - } - d = buf->buffer->datas; - - stride = impl->stride; - - maxsize = d[0].maxsize / stride; - wanted = buf->requested ? SPA_MIN(buf->requested, maxsize) : maxsize; - - if (impl->io_position && impl->direct_timestamp) { - /* in direct mode, read directly from the timestamp index, - * because sender and receiver are in sync, this would keep - * target_buffer of samples available. */ - spa_ringbuffer_read_update(&impl->ring, - impl->io_position->clock.position); - } - avail = spa_ringbuffer_get_read_index(&impl->ring, ×tamp); - - target_buffer = impl->target_buffer; - - if (avail < (int32_t)wanted) { - enum spa_log_level level; - memset(d[0].data, 0, wanted * stride); - if (impl->have_sync) { - impl->have_sync = false; - level = SPA_LOG_LEVEL_WARN; - } else { - level = SPA_LOG_LEVEL_DEBUG; - } - pw_log(level, "underrun %d/%u < %u", - avail, target_buffer, wanted); - } else { - double error, corr; - if (impl->first) { - if ((uint32_t)avail > target_buffer) { - uint32_t skip = avail - target_buffer; - pw_log_debug("first: avail:%d skip:%u target:%u", - avail, skip, target_buffer); - timestamp += skip; - avail = target_buffer; - } - impl->first = false; - } else if (avail > (int32_t)SPA_MIN(target_buffer * 8, impl->buffer_size2 / stride)) { - pw_log_warn("overrun %u > %u", avail, target_buffer * 8); - timestamp += avail - target_buffer; - avail = target_buffer; - } - if (!impl->direct_timestamp) { - /* when not using direct timestamp and clocks are not - * in sync, try to adjust our playback rate to keep the - * requested target_buffer bytes in the ringbuffer */ - error = (double)target_buffer - (double)avail; - error = SPA_CLAMPD(error, -impl->max_error, impl->max_error); - - corr = spa_dll_update(&impl->dll, error); - - pw_log_trace("avail:%u target:%u error:%f corr:%f", avail, - target_buffer, error, corr); - - pw_stream_set_rate(impl->stream, 1.0 / corr); - } - spa_ringbuffer_read_data(&impl->ring, - impl->buffer, - impl->buffer_size2, - (timestamp * stride) & impl->buffer_mask2, - d[0].data, wanted * stride); - - timestamp += wanted; - spa_ringbuffer_read_update(&impl->ring, timestamp); - } - d[0].chunk->offset = 0; - d[0].chunk->size = wanted * stride; - d[0].chunk->stride = stride; - d[0].chunk->flags = 0; - buf->size = wanted; - - pw_stream_queue_buffer(impl->stream, buf); -} - -static int rtp_opus_receive(struct impl *impl, uint8_t *buffer, ssize_t len, - ssize_t hlen, uint64_t current_time) -{ - struct rtp_header *hdr; - ssize_t plen; - uint16_t seq; - uint32_t timestamp, samples, write, expected_write; - uint32_t stride = impl->stride; - OpusMSDecoder *dec = impl->stream_data; - int32_t filled; - int res; - - hdr = (struct rtp_header*)buffer; - - seq = ntohs(hdr->sequence_number); - if (impl->have_seq && impl->seq != seq) { - pw_log_info("unexpected seq (%d != %d) SSRC:%u", - seq, impl->seq, impl->ssrc); - impl->have_sync = false; - } - impl->seq = seq + 1; - impl->have_seq = true; - - timestamp = ntohl(hdr->timestamp) - impl->ts_offset; - - impl->receiving = true; - - plen = len - hlen; - - filled = spa_ringbuffer_get_write_index(&impl->ring, &expected_write); - - /* we always write to timestamp + delay */ - write = timestamp + impl->target_buffer; - - if (!impl->have_sync) { - pw_log_info("sync to timestamp:%u seq:%u ts_offset:%u SSRC:%u target:%u direct:%u", - timestamp, seq, impl->ts_offset, impl->ssrc, - impl->target_buffer, impl->direct_timestamp); - - /* we read from timestamp, keeping target_buffer of data - * in the ringbuffer. */ - impl->ring.readindex = timestamp; - impl->ring.writeindex = write; - filled = impl->target_buffer; - - spa_dll_init(&impl->dll); - spa_dll_set_bw(&impl->dll, SPA_DLL_BW_MIN, 128, impl->rate); - memset(impl->buffer, 0, impl->buffer_size); - impl->have_sync = true; - } else if (expected_write != write) { - pw_log_debug("unexpected write (%u != %u)", - write, expected_write); - } - - if (filled + 2880 > (int32_t)(impl->buffer_size2 / stride)) { - pw_log_debug("capture overrun %u + %d > %u", filled, 2880, - impl->buffer_size2 / stride); - impl->have_sync = false; - } else { - uint32_t index = (write * stride) & impl->buffer_mask2, end; - - res = opus_multistream_decode_float(dec, - &buffer[hlen], plen, - (float*)&impl->buffer[index], 2880, - 0); - - end = index + (res * stride); - /* fold to the lower part of the ringbuffer when overflow */ - if (end > impl->buffer_size2) - memmove(impl->buffer, &impl->buffer[impl->buffer_size2], end - impl->buffer_size2); - - pw_log_info("receiving %zd len:%d timestamp:%d %u", plen, res, timestamp, index); - samples = res; - - write += samples; - spa_ringbuffer_write_update(&impl->ring, write); - } - return 0; -} - -static void rtp_opus_flush_packets(struct impl *impl) -{ - int32_t avail, tosend; - uint32_t stride, timestamp, offset; - uint8_t out[1280]; - struct iovec iov[2]; - struct rtp_header header; - OpusMSEncoder *enc = impl->stream_data; - int res = 0; - - avail = spa_ringbuffer_get_read_index(&impl->ring, ×tamp); - tosend = impl->psamples; - - if (avail < tosend) - return; - - stride = impl->stride; - - spa_zero(header); - header.v = 2; - header.pt = impl->payload; - header.ssrc = htonl(impl->ssrc); - - iov[0].iov_base = &header; - iov[0].iov_len = sizeof(header); - iov[1].iov_base = out; - iov[1].iov_len = 0; - - offset = 0; - while (avail >= tosend) { - header.sequence_number = htons(impl->seq); - header.timestamp = htonl(impl->ts_offset + timestamp); - - res = opus_multistream_encode_float(enc, - (const float*)&impl->buffer[offset * stride], tosend, - out, sizeof(out)); - - pw_log_trace("sending %d len:%d timestamp:%d", tosend, res, timestamp); - iov[1].iov_len = res; - - rtp_stream_call_send_packet(impl, iov, 2); - - impl->seq++; - timestamp += tosend; - offset += tosend; - avail -= tosend; - } - - pw_log_trace("move %d offset:%d", avail, offset); - memmove(impl->buffer, &impl->buffer[offset * stride], avail * stride); - - spa_ringbuffer_read_update(&impl->ring, timestamp); -} - -static void rtp_opus_process_capture(void *data) -{ - struct impl *impl = data; - struct pw_buffer *buf; - struct spa_data *d; - uint32_t offs, size, timestamp, expected_timestamp, stride; - int32_t filled, wanted; - - if ((buf = pw_stream_dequeue_buffer(impl->stream)) == NULL) { - pw_log_info("Out of stream buffers: %m"); - return; - } - d = buf->buffer->datas; - - offs = SPA_MIN(d[0].chunk->offset, d[0].maxsize); - size = SPA_MIN(d[0].chunk->size, d[0].maxsize - offs); - stride = impl->stride; - wanted = size / stride; - - filled = spa_ringbuffer_get_write_index(&impl->ring, &expected_timestamp); - - if (SPA_LIKELY(impl->io_position)) { - uint32_t rate = impl->io_position->clock.rate.denom; - timestamp = impl->io_position->clock.position * impl->rate / rate; - } else - timestamp = expected_timestamp; - - if (!impl->have_sync) { - pw_log_info("sync to timestamp:%u seq:%u ts_offset:%u SSRC:%u", - timestamp, impl->seq, impl->ts_offset, impl->ssrc); - impl->ring.readindex = impl->ring.writeindex = expected_timestamp = timestamp; - memset(impl->buffer, 0, impl->buffer_size); - impl->have_sync = true; - } else { - if (SPA_ABS((int32_t)expected_timestamp - (int32_t)timestamp) > 32) { - pw_log_warn("expected %u != timestamp %u", expected_timestamp, timestamp); - impl->have_sync = false; - } else if (filled + wanted > (int32_t)(impl->buffer_size / stride)) { - pw_log_warn("overrun %u + %u > %u", filled, wanted, impl->buffer_size / stride); - impl->have_sync = false; - } - } - - spa_ringbuffer_write_data(&impl->ring, - impl->buffer, - impl->buffer_size, - (filled * stride) & impl->buffer_mask, - SPA_PTROFF(d[0].data, offs, void), wanted * stride); - expected_timestamp += wanted; - spa_ringbuffer_write_update(&impl->ring, expected_timestamp); - - pw_stream_queue_buffer(impl->stream, buf); - - rtp_opus_flush_packets(impl); -} - -static void rtp_opus_deinit(struct impl *impl, enum spa_direction direction) -{ - if (impl->stream_data) { - if (direction == SPA_DIRECTION_INPUT) - opus_multistream_encoder_destroy(impl->stream_data); - else - opus_multistream_decoder_destroy(impl->stream_data); - } -} - -static int rtp_opus_init(struct impl *impl, enum spa_direction direction) -{ - int err; - unsigned char mapping[255]; - uint32_t i; - - if (impl->info.info.opus.channels > 255) - return -EINVAL; - - if (impl->psamples >= 2880) - impl->psamples = 2880; - else if (impl->psamples >= 1920) - impl->psamples = 1920; - else if (impl->psamples >= 960) - impl->psamples = 960; - else if (impl->psamples >= 480) - impl->psamples = 480; - else if (impl->psamples >= 240) - impl->psamples = 240; - else - impl->psamples = 120; - - for (i = 0; i < impl->info.info.opus.channels; i++) - mapping[i] = i; - - impl->deinit = rtp_opus_deinit; - impl->receive_rtp = rtp_opus_receive; - if (direction == SPA_DIRECTION_INPUT) { - impl->stream_events.process = rtp_opus_process_capture; - - impl->stream_data = opus_multistream_encoder_create( - impl->info.info.opus.rate, - impl->info.info.opus.channels, - impl->info.info.opus.channels, 0, - mapping, - OPUS_APPLICATION_AUDIO, - &err); - } - else { - impl->stream_events.process = rtp_opus_process_playback; - - impl->stream_data = opus_multistream_decoder_create( - impl->info.info.opus.rate, - impl->info.info.opus.channels, - impl->info.info.opus.channels, 0, - mapping, - &err); - } - if (!impl->stream_data) - pw_log_error("opus error: %d", err); - return impl->stream_data ? 0 : err; -} -#else -static int rtp_opus_init(struct impl *impl, enum spa_direction direction) -{ - return -ENOTSUP; -} -#endif diff --git a/src/modules/module-rtp/stream.c b/src/modules/module-rtp/stream.c index c61950978..7ead41d0e 100644 --- a/src/modules/module-rtp/stream.c +++ b/src/modules/module-rtp/stream.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,9 @@ #include #include #include +#include +#include +#include PW_LOG_TOPIC_EXTERN(mod_topic); #define PW_LOG_TOPIC_DEFAULT mod_topic @@ -105,7 +109,6 @@ struct impl { const struct rtp_format_info *rtp_format_info; enum spa_direction direction; - void *stream_data; uint32_t rate; uint32_t stride; @@ -113,6 +116,15 @@ struct impl { uint8_t payload; uint32_t ssrc; uint16_t seq; + /* This is used for pre-checking for received packets that arrive + * out of order. Depending on the audio type, a jitter buffer + * may be used intermediately, and then, the seq field above + * will be set _after_ the jitter buffer reorders packets, + * so it cannot be used for the pre-checking purpose. Thus, + * this separate seq field is required. + * It is int32_t to be able to store -1 as an indicator that + * no expected seqnum is set yet. */ + int32_t next_expected_incoming_seq; unsigned fixed_ssrc:1; unsigned have_ssrc:1; unsigned ignore_ssrc:1; @@ -187,6 +199,19 @@ struct impl { void (*deinit)(struct impl *impl, enum spa_direction direction); int (*resend_packets)(struct impl *impl, uint16_t seq, uint16_t num); + struct rtp_jitter_buffer jitter_buffer; + const struct rtp_audio_codec *audio_codec; + struct rtp_audio_codec_context audio_codec_context; + /* This buffer is needed in case the data that is to be encoded is wrapped + * around the ring buffer border. In such a case, the two halves have to + * be copied and merged into this buffer, since audio codecs expect one + * contiguous input memory block as the data to encode. */ + uint8_t *audio_encoder_staging_buffer; + + /* Delay of the audio codec. Depending on how the audio codec is configured, + * this is either the decoder delay or the encoder delay. In samples. */ + size_t codec_delay; + /* * pw_filter where the filter would be driven at the PTP clock * rate with RTP sink being driven at the sink driver clock rate @@ -311,7 +336,6 @@ static int do_finish_stopping_state(struct spa_loop *loop, bool async, uint32_t #include "module-rtp/audio.c" #include "module-rtp/midi.c" -#include "module-rtp/opus.c" struct rtp_format_info { uint32_t media_subtype; @@ -407,6 +431,11 @@ static int stream_start(struct impl *impl) pw_log_error("error while closing leftover connection: %s", spa_strerror(res)); } + impl->next_expected_incoming_seq = -1; + + if (impl->audio_codec != NULL) + reset_rtp_audio_codec(impl, "starting new stream"); + impl->reset_ringbuffer(impl); res = 0; @@ -519,23 +548,18 @@ static void on_stream_state_changed(void *d, enum pw_stream_state old, } } -static void update_latency_params(struct impl *impl) +static void fill_latency_params(struct impl *impl, struct spa_pod_builder *b, + const struct spa_pod **params, uint32_t *n_params) { - uint32_t n_params = 0; - const struct spa_pod *params[2]; - uint8_t buffer[1024]; - struct spa_pod_builder b; struct spa_latency_info main_latency; - spa_pod_builder_init(&b, buffer, sizeof(buffer)); - /* main_latency is the latency in the direction indicated by impl->direction. - * In RTP streams, this consists solely of the process latency. (In theory, - * PipeWire SPA nodes could have additional latencies on top of the process - * latency, but this is not the case here.) The other direction is already - * handled by pw_stream. + * In RTP streams, this consists of the process latency. In the INPUT direction + * (which is what sinks use), the encoder delay is also part of main_latency. + * The full latency params also include latency in the other direction - + * this is already handled by pw_stream. * - * The main_latncy is passed as updated SPA_PARAM_Latency params to the stream. + * The main_latency is passed as updated SPA_PARAM_Latency params to the stream. * That way, the stream always gets information of latency for _both_ directions; * the direction indicated by impl->direction is covered by main_latency, and * the opposite direction is already taken care of by the default pw_stream @@ -547,10 +571,27 @@ static void update_latency_params(struct impl *impl) main_latency = SPA_LATENCY_INFO(impl->direction); spa_process_latency_info_add(&impl->process_latency, &main_latency); - params[n_params++] = spa_latency_build(&b, SPA_PARAM_Latency, &main_latency); - params[n_params++] = spa_process_latency_build(&b, SPA_PARAM_ProcessLatency, - &impl->process_latency); + if (impl->direction == PW_DIRECTION_INPUT) { + int64_t codec_delay_ns = (int64_t)(impl->codec_delay) * SPA_NSEC_PER_SEC / impl->rate; + main_latency.min_ns += codec_delay_ns; + main_latency.max_ns += codec_delay_ns; + } + params[(*n_params)++] = spa_latency_build(b, SPA_PARAM_Latency, &main_latency); + params[(*n_params)++] = spa_process_latency_build(b, SPA_PARAM_ProcessLatency, + &impl->process_latency); +} + +static void update_latency_params(struct impl *impl) +{ + const struct spa_pod *params[2]; + uint32_t n_params; + uint8_t buffer[1024]; + struct spa_pod_builder b; + + n_params = 0; + spa_pod_builder_init(&b, buffer, sizeof(buffer)); + fill_latency_params(impl, &b, params, &n_params); pw_stream_update_params(impl->stream, params, n_params); } @@ -609,11 +650,12 @@ static const struct rtp_format_info *find_rtp_pcm_audio_format_info(const struct return NULL; } -static int parse_audio_info(const struct pw_properties *props, struct spa_audio_info_raw *info) +static int parse_audio_info(const struct pw_properties *props, struct spa_audio_info_raw *info, + const char *default_format) { return spa_audio_info_raw_init_dict_keys(info, &SPA_DICT_ITEMS( - SPA_DICT_ITEM(SPA_KEY_AUDIO_FORMAT, DEFAULT_FORMAT), + SPA_DICT_ITEM(SPA_KEY_AUDIO_FORMAT, default_format), SPA_DICT_ITEM(SPA_KEY_AUDIO_RATE, SPA_STRINGIFY(DEFAULT_RATE)), SPA_DICT_ITEM(SPA_KEY_AUDIO_POSITION, DEFAULT_POSITION)), &props->dict, @@ -660,6 +702,8 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core, float latency_msec; int res; bool process_latency_from_sess; + uint32_t audio_codec_type = 0; + const char *default_audio_format = NULL; impl = calloc(1, sizeof(*impl)); if (impl == NULL) { @@ -690,11 +734,15 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core, impl->info.media_type = SPA_MEDIA_TYPE_audio; impl->info.media_subtype = SPA_MEDIA_SUBTYPE_raw; impl->payload = 127; + audio_codec_type = SPA_MEDIA_SUBTYPE_raw; + default_audio_format = DEFAULT_RAW_AUDIO_FORMAT; } else if (spa_streq(str, "raop")) { impl->info.media_type = SPA_MEDIA_TYPE_audio; impl->info.media_subtype = SPA_MEDIA_SUBTYPE_raw; impl->payload = 0x60; + audio_codec_type = SPA_MEDIA_SUBTYPE_raw; + default_audio_format = DEFAULT_RAOP_AUDIO_FORMAT; } else if (spa_streq(str, "midi")) { impl->info.media_type = SPA_MEDIA_TYPE_application; @@ -702,10 +750,14 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core, impl->payload = 0x61; } #ifdef HAVE_OPUS + /* The "opus" sess.media type is actually raw audio that + * is encoded with Opus before sending it out as RTP. */ else if (spa_streq(str, "opus")) { impl->info.media_type = SPA_MEDIA_TYPE_audio; - impl->info.media_subtype = SPA_MEDIA_SUBTYPE_opus; + impl->info.media_subtype = SPA_MEDIA_SUBTYPE_raw; impl->payload = 127; + audio_codec_type = SPA_MEDIA_SUBTYPE_opus; + default_audio_format = DEFAULT_OPUS_AUDIO_FORMAT; } #endif else { @@ -716,22 +768,61 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core, switch (impl->info.media_subtype) { case SPA_MEDIA_SUBTYPE_raw: - if ((res = parse_audio_info(props, &impl->info.info.raw)) < 0) { + if ((res = parse_audio_info(props, &impl->info.info.raw, default_audio_format)) < 0) { pw_log_error("can't parse format: %s", spa_strerror(res)); goto out; } impl->stream_info = impl->info; - impl->rtp_format_info = find_rtp_pcm_audio_format_info(&impl->info); - if (impl->rtp_format_info == NULL) { - pw_log_error("unsupported audio format:%d (%s) channels:%d", + impl->rate = impl->stream_info.info.raw.rate; + + /* Pick the RTP information and stride values suitable for + * the specified codec type. If the codec type is set to + * SPA_MEDIA_SUBTYPE_raw, then no special encoding is done, + * and PCM samples are transmitted directly over RTP. */ + switch (audio_codec_type) { + case SPA_MEDIA_SUBTYPE_raw: + impl->rtp_format_info = find_rtp_pcm_audio_format_info(&impl->info); + if (impl->rtp_format_info == NULL) { + pw_log_error("unsupported audio format:%d (%s) channels:%d", impl->stream_info.info.raw.format, spa_type_audio_format_to_short_name(impl->stream_info.info.raw.format), impl->stream_info.info.raw.channels); + res = -EINVAL; + goto out; + } + impl->stride = impl->rtp_format_info->size * impl->stream_info.info.raw.channels; + pw_log_info("configured raw PCM RTP payload: MIME: %s format: %s rate: %" + PRIu32 " stride: %" PRIu32, impl->rtp_format_info->mime, + spa_type_audio_format_to_short_name(impl->rtp_format_info->format), + impl->rate, impl->stride); + break; + case SPA_MEDIA_SUBTYPE_opus: + impl->rtp_format_info = &rtp_opus_format_info; + switch (impl->stream_info.info.raw.format) { + case SPA_AUDIO_FORMAT_S16: + impl->stride = 2 * impl->stream_info.info.raw.channels; + break; + case SPA_AUDIO_FORMAT_F32: + impl->stride = 4 * impl->stream_info.info.raw.channels; + break; + default: + pw_log_error("unsupported raw audio format for encoding to Opus:%d (%s)", + impl->stream_info.info.raw.format, + spa_type_audio_format_to_short_name(impl->stream_info.info.raw.format)); + res = -EINVAL; + goto out; + } + pw_log_info("configured Opus RTP payload: format: %s rate: %" PRIu32 " stride: %" + PRIu32, spa_type_audio_format_to_short_name(impl->stream_info.info.raw.format), + impl->rate, impl->stride); + break; + default: + pw_log_error("unsupported audio encoding:%d (%s)", audio_codec_type, + spa_type_to_short_name(audio_codec_type, + spa_type_media_subtype, "")); res = -EINVAL; goto out; } - impl->stride = impl->rtp_format_info->size * impl->stream_info.info.raw.channels; - impl->rate = impl->stream_info.info.raw.rate; break; case SPA_MEDIA_SUBTYPE_control: impl->stream_info = impl->info; @@ -741,21 +832,8 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core, impl->rate = pw_properties_get_uint32(props, "midi.rate", 10000); if (impl->rate == 0) impl->rate = 10000; - break; - case SPA_MEDIA_SUBTYPE_opus: - impl->stream_info.media_type = SPA_MEDIA_TYPE_audio; - impl->stream_info.media_subtype = SPA_MEDIA_SUBTYPE_raw; - if ((res = parse_audio_info(props, &impl->stream_info.info.raw)) < 0) { - pw_log_error("can't parse format: %s", spa_strerror(res)); - goto out; - } - impl->stream_info.info.raw.format = SPA_AUDIO_FORMAT_F32; - impl->info.info.opus.rate = impl->stream_info.info.raw.rate; - impl->info.info.opus.channels = impl->stream_info.info.raw.channels; - - impl->rtp_format_info = &rtp_opus_format_info; - impl->stride = impl->rtp_format_info->size * impl->stream_info.info.raw.channels; - impl->rate = impl->stream_info.info.raw.rate; + pw_log_info("configured MIDI RTP payload: rate: %" PRIu32 " stride: %" PRIu32, + impl->rate, impl->stride); break; default: spa_assert_not_reached(); @@ -845,6 +923,7 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core, impl->payload_size = impl->mtu - impl->header_size; impl->seq = pw_rand32(); + impl->next_expected_incoming_seq = -1; str = pw_properties_get(props, "sess.min-ptime"); if (!spa_atof(str, &min_ptime)) @@ -954,6 +1033,26 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core, spa_dll_set_bw(&impl->dll, SPA_DLL_BW_MIN, 128, impl->rate); impl->corr = 1.0; + switch (impl->info.media_subtype) { + case SPA_MEDIA_SUBTYPE_raw: + switch (audio_codec_type) { + case SPA_MEDIA_SUBTYPE_opus: + res = setup_rtp_audio_codec(impl, get_rtp_opus_codec(), props); + break; + default: + res = 0; + break; + } + + if (SPA_UNLIKELY(res < 0)) + goto out; + + break; + + default: + break; + } + impl->stream = pw_stream_new(core, "rtp-session", spa_steal_ptr(props)); if (impl->stream == NULL) { res = -errno; @@ -980,12 +1079,6 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core, SPA_FORMAT_mediaSubtype, SPA_POD_Id(SPA_MEDIA_SUBTYPE_control)); rtp_midi_init(impl, direction); break; - case SPA_MEDIA_SUBTYPE_opus: - params[n_params++] = spa_format_audio_build(&b, - SPA_PARAM_EnumFormat, &impl->stream_info); - flags |= PW_STREAM_FLAG_AUTOCONNECT; - rtp_opus_init(impl, direction); - break; default: res = -EINVAL; goto out; @@ -998,24 +1091,20 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core, * quantity in turn is subjected to constraint checks (see above), it is * possible that the _actual_ session latency no longer equals the value * of sess.latency.msec by the time this location is reached. To take into - * account these constraint adjustments, convert back the impl->target_buffer - * to nanoseconds, and use that as the process latency. + * account these constraint adjustments, fill_latency_params() converts + * back the impl->target_buffer to nanoseconds, and uses that as the + * process latency. * - * Then, just like how update_latency_params() does it, construct the - * SPA_PARAM_Latency and SPA_PARAM_ProcessLatency params to let the new + * Then, just like in update_latency_params(), the SPA_PARAM_Latency + * and SPA_PARAM_ProcessLatency params are constructed to let the new * pw_stream know of these latency figures right from the start. */ - struct spa_latency_info latency; - impl->process_latency.ns = (int64_t)(impl->target_buffer * 1e9 / impl->rate); - pw_log_debug("set process latency to %" PRId64 " based on sess.latency.msec " - "value %f", impl->process_latency.ns, latency_msec); + pw_log_debug("set process latency to %" PRId64 " ns based on sess.latency.msec " + "value %f ms (= %" PRIu32 " samples)", impl->process_latency.ns, + latency_msec, impl->target_buffer); - latency = SPA_LATENCY_INFO(impl->direction); - spa_process_latency_info_add(&(impl->process_latency), &latency); - params[n_params++] = spa_latency_build(&b, SPA_PARAM_Latency, &latency); - params[n_params++] = spa_process_latency_build(&b, SPA_PARAM_ProcessLatency, - &(impl->process_latency)); + fill_latency_params(impl, &b, params, &n_params); } pw_stream_add_listener(impl->stream, @@ -1040,6 +1129,7 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core, return (struct rtp_stream*)impl; out: + teardown_rtp_audio_codec(impl); pw_properties_free(props); if (impl) { if (impl->stream) @@ -1058,6 +1148,8 @@ void rtp_stream_destroy(struct rtp_stream *s) rtp_stream_emit_destroy(impl); + teardown_rtp_audio_codec(impl); + if (impl->deinit) impl->deinit(impl, impl->direction); @@ -1142,7 +1234,17 @@ unexpected_ssrc: int rtp_stream_resend_packets(struct rtp_stream *s, uint16_t seq, uint16_t num) { struct impl *impl = (struct impl*)s; - if (impl->resend_packets) + /* Resending only works with raw data, since codecs usually + * have internal state that is updated after encoding a + * packet. This then means that resend attempts would not + * yield the exact same packet, and this can corrupt the + * state of the decoder in a receiver. + * To support resending with codecs, the last N packets + * have to be cached somehow. Given the fact that, thus + * far, only the RAOP sink resends packets, and RAOP only + * supports raw PCM, it is currently easier to just + * disable retransmissions when a codec is in use. */ + if (impl->resend_packets && (impl->audio_codec == NULL)) return impl->resend_packets(impl, seq, num); else return -ENOTSUP; diff --git a/src/modules/module-rtp/stream.h b/src/modules/module-rtp/stream.h index 37d041b06..0cf44eda7 100644 --- a/src/modules/module-rtp/stream.h +++ b/src/modules/module-rtp/stream.h @@ -11,7 +11,9 @@ extern "C" { struct rtp_stream; -#define DEFAULT_FORMAT "S16BE" +#define DEFAULT_RAW_AUDIO_FORMAT "S16BE" +#define DEFAULT_RAOP_AUDIO_FORMAT "S16LE" +#define DEFAULT_OPUS_AUDIO_FORMAT "F32" #define DEFAULT_RATE 48000 #define DEFAULT_CHANNELS 2 #define DEFAULT_POSITION "[ FL FR ]" diff --git a/test/meson.build b/test/meson.build index a54e059b9..6982720dd 100644 --- a/test/meson.build +++ b/test/meson.build @@ -124,6 +124,17 @@ test('test-spa', link_with: pwtest_lib) ) +test('test-module-rtp-common-lib', + executable('test-module-rtp-common-lib', + 'modules/module-rtp/test-jitter-buffer.c', + include_directories : [ + pwtest_inc, + include_directories('../src/modules'), + ], + dependencies: [ spa_dep, pipewire_module_rtp_common_dep ], + link_with: [pwtest_lib]) +) + openal_info = find_program('openal-info', required: false) if openal_info.found() cdata.set_quoted('OPENAL_INFO_PATH', openal_info.full_path()) diff --git a/test/modules/module-rtp/test-jitter-buffer.c b/test/modules/module-rtp/test-jitter-buffer.c new file mode 100644 index 000000000..ae4943643 --- /dev/null +++ b/test/modules/module-rtp/test-jitter-buffer.c @@ -0,0 +1,1259 @@ +/* PipeWire */ +/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */ +/* SPDX-License-Identifier: MIT */ + +#include +#include +#include +#include + +#include "config.h" + +#include +#include +#include + +#include "pwtest.h" + +PW_LOG_TOPIC(mod_topic, "test.rtp-jitter-buffer"); +#define PW_LOG_TOPIC_DEFAULT mod_topic + +enum test_event_type { + TEST_EVENT_OUTPUT_PACKET, + TEST_EVENT_LOST_PACKETS, +}; + +struct test_event { + enum test_event_type type; + union { + struct { + uint16_t seqnum; + } output; + struct { + uint16_t first_seqnum; + size_t count; + bool open_ended; + } lost; + }; +}; + +#define MAX_TEST_EVENTS 256 +#define MAX_TEST_PACKET_SIZE 2048 +#define TEST_PACKET_SIZE 128 +#define TEST_HEADER_SIZE 16 +#define TEST_TIMESTAMP 123456 +#define TEST_PACKET_DURATION (10 * SPA_NSEC_PER_MSEC) + +struct test_context { + struct pw_loop *loop; + struct pw_main_loop *main_loop; + struct rtp_jitter_buffer jitter_buffer; + + struct test_event events[MAX_TEST_EVENTS]; + size_t num_events; + + uint8_t packet_bytes[MAX_TEST_PACKET_SIZE]; +}; + +static void send_packet(struct test_context *test_context, uint16_t seqnum) +{ + /* Create a simulated RTP packet. Only write the sequence number + * into its header. The rest (SSRC, CSRC, payload type etc.) are + * of no interest to the jitter buffer - it only cares about the + * sequence number. */ + struct rtp_header *header = (struct rtp_header *)(test_context->packet_bytes); + header->sequence_number = htons(seqnum); + int ret = rtp_jitter_buffer_insert_packet(&(test_context->jitter_buffer), + test_context->packet_bytes, (TEST_PACKET_SIZE), (TEST_HEADER_SIZE), (TEST_TIMESTAMP), seqnum); + assert(ret == 0); +} + +static int test_output_rtp_packet(void *context, const uint8_t *packet_data, size_t packet_size, + size_t header_size, uint32_t timestamp, uint16_t seqnum) +{ + struct test_context *test_context = context; + struct rtp_header *header = (struct rtp_header *)packet_data; + + assert(test_context->num_events < MAX_TEST_EVENTS); + + /* Check that this function is not simply passed + * the value of params.max_packet_size, and that + * the other values (header size, timestamp) + * are correct as well. */ + pwtest_int_eq(packet_size, (size_t)(TEST_PACKET_SIZE)); + pwtest_int_eq(header_size, (size_t)(TEST_HEADER_SIZE)); + pwtest_int_eq(timestamp, (size_t)(TEST_TIMESTAMP)); + + /* Compare the seqnum that is given by the caller + * with the seqnum in the RTP header to verify that + * the packet data is correctly associated with the + * information from the function arguments. */ + pwtest_int_eq(seqnum, ntohs(header->sequence_number)); + + test_context->events[test_context->num_events].type = TEST_EVENT_OUTPUT_PACKET; + test_context->events[test_context->num_events].output.seqnum = seqnum; + pw_log_debug("Output RTP packet with seqnum %" PRIu16, test_context->events[test_context->num_events].output.seqnum); + test_context->num_events++; + + return 0; +} + +static int test_signal_lost_packets(void *context, uint16_t seq_of_first_lost_packet, + size_t num_lost_packets, bool open_ended) +{ + struct test_context *test_context = context; + + assert(test_context->num_events < MAX_TEST_EVENTS); + + test_context->events[test_context->num_events].type = TEST_EVENT_LOST_PACKETS; + test_context->events[test_context->num_events].lost.first_seqnum = seq_of_first_lost_packet; + test_context->events[test_context->num_events].lost.count = num_lost_packets; + test_context->events[test_context->num_events].lost.open_ended = open_ended; + test_context->num_events++; + + return 0; +} + +static void setup_test_context(struct test_context *test_context, size_t num_slots) +{ + struct rtp_jitter_buffer_params params; + + assert(test_context != NULL); + + spa_memzero(test_context, sizeof(struct test_context)); + + pw_init(0, NULL); + + test_context->main_loop = pw_main_loop_new(NULL); + assert(test_context->main_loop != NULL); + test_context->loop = pw_main_loop_get_loop(test_context->main_loop); + + memset(¶ms, 0, sizeof(params)); + params.num_slots = num_slots; + /* Set the maximum packet size to a value higher than TEST_PACKET_SIZE + * to be able to check in test_output_rtp_packet() that that function + * does not simply get the max_packet_size value as the packet size, + * but the _actual_ packet size. (Also see test_output_rtp_packet().) */ + params.max_packet_size = MAX_TEST_PACKET_SIZE; + params.packet_duration = TEST_PACKET_DURATION; + params.loop = test_context->loop; + params.context = test_context; + params.output_rtp_packet = test_output_rtp_packet; + params.signal_lost_packets = test_signal_lost_packets; + + int ret = rtp_jitter_buffer_init(&(test_context->jitter_buffer), ¶ms); + assert(ret == 0); +} + +static void teardown_test_context(struct test_context *test_context) +{ + assert(test_context != NULL); + + rtp_jitter_buffer_shutdown(&(test_context->jitter_buffer)); + if (test_context->main_loop != NULL) + pw_main_loop_destroy(test_context->main_loop); + pw_deinit(); +} + +#define SHIFT_TEST_EVENTS() \ + do { \ + memmove( \ + &(test_context.events[0]), \ + &(test_context.events[1]), \ + (test_context.num_events - 1) * sizeof(struct test_event)); \ + test_context.num_events--; \ + } while (0) + +#define CHECK_LOST_PACKET_EVENT(FIRST_SEQNUM, COUNT, OPEN_ENDED) \ + do { \ + pwtest_int_ge(test_context.num_events, 1u); \ + pwtest_int_eq((int)(test_context.events[0].type), TEST_EVENT_LOST_PACKETS); \ + pwtest_int_eq(test_context.events[0].lost.first_seqnum, (FIRST_SEQNUM)); \ + pwtest_int_eq(test_context.events[0].lost.count, (size_t)(COUNT)); \ + pwtest_int_eq(test_context.events[0].lost.open_ended, (OPEN_ENDED)); \ + SHIFT_TEST_EVENTS(); \ + } while (0) + +#define CHECK_OUTPUT_PACKET_EVENT(SEQNUM) \ + do { \ + pwtest_int_ge(test_context.num_events, 1u); \ + pwtest_int_eq((int)(test_context.events[0].type), TEST_EVENT_OUTPUT_PACKET); \ + pwtest_int_eq(test_context.events[0].output.seqnum, (SEQNUM)); \ + SHIFT_TEST_EVENTS(); \ + } while (0) + +PWTEST(rtp_jitter_buffer_test_consecutive_packets) +{ + /* Simple test with packets that are passed to the jitter buffer + * in order, with no gaps. Immediate output is expected, since + * the jitter buffer will be in regular mode. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Send packets 100, 101, 102, 103, 104 in order. + * All 5 should be immediately output, and the + * hold-back mode should remain disabled. */ + for (uint16_t i = 0; i < 5; i++) { + uint16_t seqnum = 100 + i; + send_packet(&test_context, seqnum); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + } + + pwtest_int_eq(test_context.num_events, 5u); + for (uint16_t i = 0; i < 5; i++) { + uint16_t seqnum = 100 + i; + CHECK_OUTPUT_PACKET_EVENT(seqnum); + } + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_simple_reordering) +{ + /* Check that simple out-of-order packet arrival is handled properly. + * There should be no gaps signaled, and the packets should be output + * in order. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Send 100, 101 in order. */ + send_packet(&test_context, 100); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 101); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(100); + CHECK_OUTPUT_PACKET_EVENT(101); + + /* Send 103. A gap at 102 is produced -> jitter buffer enables hold-back mode. + * No output takes place just yet, since 103 is held back. + * The valid seqnum window starts at 102 and ends at packet 103. */ + send_packet(&test_context, 103); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 102u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 2u); + pwtest_int_eq(test_context.num_events, 0u); + + /* Send 102 to simulate out-of-order arrival. This fills the gap + * at 102 (implying that it is not signaled), and should cause + * 102 and 103 to be output (in order) and the hold-back mode + * to be disabled again. */ + send_packet(&test_context, 102); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(102); + CHECK_OUTPUT_PACKET_EVENT(103); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_partial_output) +{ + /* Test that partial output is done correctly when some + * gaps are filled. (Partial means that only part of the + * held-back packets are output.) */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Establish regular mode with packet 400. */ + send_packet(&test_context, 400); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(400); + + /* Send in packet 402 to produce a gap at 401 and cause the + * jitter buffer to enter hold-back mode. */ + send_packet(&test_context, 402); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 401u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 2u); + pwtest_int_eq(test_context.num_events, 0u); + + /* Send in packets 404 and 405. This keeps the gap at 401, adds + * a gap at 403, and keeps the jitter buffer in hold-back mode. */ + send_packet(&test_context, 404); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 405); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + + /* Send in packet 401, which fills the gap at 401. This allows + * the jitter buffer to output packets 401 and 402. But since + * another gap exists at 403, hold-back mode remains enabled. */ + send_packet(&test_context, 401); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(401); + CHECK_OUTPUT_PACKET_EVENT(402); + + /* Send in packet 403, which fills the gap at 403. This allows + * the jitter buffer to output packets 403, 404, 405. Those were + * the remaining held-back packets, so hold-back mode should be + * turned off now. */ + send_packet(&test_context, 403); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 3u); + CHECK_OUTPUT_PACKET_EVENT(403); + CHECK_OUTPUT_PACKET_EVENT(404); + CHECK_OUTPUT_PACKET_EVENT(405); + + /* Verify that regular mode is working properly by sending + * in packet 406. */ + send_packet(&test_context, 406); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(406); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_explicit_drain_in_regular_mode) +{ + /* Test what happens when explicitly draining the jitter buffer + * while in regular mode. Draining should be a no-op in this mode. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Establish regular mode with packets 200 and 201. */ + send_packet(&test_context, 200); + send_packet(&test_context, 201); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(200); + CHECK_OUTPUT_PACKET_EVENT(201); + + /* Drain, and then check the outcome. Check that it was a no-op. */ + int ret = rtp_jitter_buffer_drain(&(test_context.jitter_buffer)); + assert(ret == 0); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + pwtest_int_eq(test_context.jitter_buffer.last_seqnum, 201); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_explicit_drain_in_hold_back_mode) +{ + /* Test what happens when explicitly draining the jitter buffer + * while in hold-back mode. Missing packets should be signaled + * as lost packets by this. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Establish regular mode with packet 200. */ + send_packet(&test_context, 200); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(200); + + /* Send in packets 202 and 205 to produce gap at 201, 203, 204 + * and cause the jitter buffer to enter hold-back mode. */ + send_packet(&test_context, 202); + send_packet(&test_context, 205); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + + /* Drain explicitly. This should output the following (in this order): + * + * - 1 lost packet, starting at seqnum 201, not open-ended + * - 1 packet output with seqnum 202 + * - 2 lost packets, starting at seqnum 203, not open-ended + * - 1 packet output with seqnum 205 + * + * This should also set the jitter buffer back to regular mode. + * The last_seqnum should be -1, since after explicit drain, + * the jitter buffer has no idea what packets will come next.*/ + int ret = rtp_jitter_buffer_drain(&(test_context.jitter_buffer)); + assert(ret == 0); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 4u); + pwtest_int_eq(test_context.jitter_buffer.last_seqnum, -1); + CHECK_LOST_PACKET_EVENT(201, 1u, false); + CHECK_OUTPUT_PACKET_EVENT(202); + CHECK_LOST_PACKET_EVENT(203, 2u, false); + CHECK_OUTPUT_PACKET_EVENT(205); + + /* Verify that regular mode is working properly by sending + * in packet 700. Since after draining, the last_seqnum is + * -1, a discontinuity in the sequence numbers is okay. */ + send_packet(&test_context, 700); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(700); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_explicit_drain_coalesced_loss) +{ + /* Test that a contiguous set of lost packets is coalesced + * into one signal lost packet signal. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Establish regular mode with packet 50. */ + send_packet(&test_context, 50); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(50); + + /* Send in packet 54 to produce gap at 51, 52, 53 and + * cause the jitter buffer to enter hold-back mode. */ + send_packet(&test_context, 54); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + + /* Drain the jitter buffer. The packets 51, 52, 53 are + * now considered lost, and should be reported as such. */ + int ret = rtp_jitter_buffer_drain(&(test_context.jitter_buffer)); + assert(ret == 0); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + pwtest_int_eq(test_context.jitter_buffer.last_seqnum, -1); + CHECK_LOST_PACKET_EVENT(51, 3u, false); + CHECK_OUTPUT_PACKET_EVENT(54); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_explicit_drain_with_seqnum_wraparound) +{ + /* Test what happens when explicitly draining the jitter + * buffer while in hold-back mode and with sequence numbers + * wrapping around. Missing packets should be signaled as + * lost packets by this. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Establish regular mode with packet 65533. */ + send_packet(&test_context, 65533); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(65533); + + /* Send in packets 65535 and 2 to produce gap at 65534, 0, 1 + * and cause the jitter buffer to enter hold-back mode. */ + send_packet(&test_context, 65535); + send_packet(&test_context, 2); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + + /* Drain explicitly. This should output the following (in this order): + * + * - 1 lost packet, starting at seqnum 65534, not open-ended + * - 1 packet output with seqnum 65535 + * - 2 lost packets, starting at seqnum 0, not open-ended + * - 1 packet output with seqnum 2 + * + * This should also set the jitter buffer back to regular mode. + * The last_seqnum should be -1, since after explicit drain, + * the jitter buffer has no idea what packets will come next.*/ + int ret = rtp_jitter_buffer_drain(&(test_context.jitter_buffer)); + assert(ret == 0); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 4u); + pwtest_int_eq(test_context.jitter_buffer.last_seqnum, -1); + CHECK_LOST_PACKET_EVENT(65534, 1u, false); + CHECK_OUTPUT_PACKET_EVENT(65535); + CHECK_LOST_PACKET_EVENT(0, 2u, false); + CHECK_OUTPUT_PACKET_EVENT(2); + + /* Verify that regular mode is working properly by sending + * in packet 700. Since after draining, the last_seqnum is + * -1, a discontinuity in the sequence numbers is okay. */ + send_packet(&test_context, 700); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(700); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_stale_packets_in_regular_mode) +{ + /* Test what happens when stale and old packets are sent into + * the jitter buffer in regular mode. They should be dropped + * without influencing the behavior of the jitter buffer. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Establish regular mode with packets 100 and 101. */ + send_packet(&test_context, 100); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 101); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(100); + CHECK_OUTPUT_PACKET_EVENT(101); + + /* Send in packet 101. Since a packet 101 was already seen, + * this is a stale packet, and needs to be dropped. */ + send_packet(&test_context, 101); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + + /* Send in packet 99. Since packets 100 and 101 were already seen, + * this is an old packet, and needs to be dropped. */ + send_packet(&test_context, 99); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + + /* Verify that regular mode is working properly by sending + * in packet 102. */ + send_packet(&test_context, 102); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(102); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_stale_packets_in_hold_back_mode) +{ + /* Test what happens when stale and old packets are sent into + * the jitter buffer in hold-back mode. They should be dropped + * without influencing the behavior of the jitter buffer. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Establish hold-back mode with packets 300 and 302. + * Hold-back mode gets active because of the gap at 301. */ + send_packet(&test_context, 300); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 302); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(300); + + /* Send in packet 299. Since packets 300 and 302 were already seen, + * this is an old packet, and needs to be dropped. */ + send_packet(&test_context, 299); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + + /* Send in packet 300. Since a packet 300 was already seen, + * this is a stale packet, and needs to be dropped. */ + send_packet(&test_context, 300); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + + /* Send in packet 302. This is another stale packet. The + * difference to the packet 300 check above is that the + * packet 302 that was previously observed is held back, + * and was not output thus far. */ + send_packet(&test_context, 302); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + + /* Send in packet 301 to test that switching back + * to regular mode still works properly. */ + send_packet(&test_context, 301); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(301); + CHECK_OUTPUT_PACKET_EVENT(302); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_flush) +{ + /* Test the flush functionality. This should discard any held-back + * packets, without emitting them, and the jitter buffer should + * be back in regular mode afterwards. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Establish hold-back mode with packets 500 and 502. + * Hold-back mode gets active because of the gap at 501. */ + send_packet(&test_context, 500); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 502); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(500); + + rtp_jitter_buffer_flush(&(test_context.jitter_buffer)); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + pwtest_int_eq(test_context.jitter_buffer.last_seqnum, -1); + + /* Verify that regular mode is working properly by sending + * in packet 700. Since after flushing, the last_seqnum is + * -1, a discontinuity in the sequence numbers is okay. */ + send_packet(&test_context, 700); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(700); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_seqnum_wraparound_regular) +{ + /* Check that in regular mode, output of in-sequence packets + * works properly even when a sequence number wrap-around occurs. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + send_packet(&test_context, 65534); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 65535); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 0); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 1); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 4u); + CHECK_OUTPUT_PACKET_EVENT(65534); + CHECK_OUTPUT_PACKET_EVENT(65535); + CHECK_OUTPUT_PACKET_EVENT(0); + CHECK_OUTPUT_PACKET_EVENT(1); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_seqnum_wraparound_with_reordering) +{ + /* Check that in hold-back mode, output of in-sequence packets + * works properly even when a sequence number wrap-around occurs. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Send packets 65534 and 65535 in order. */ + send_packet(&test_context, 65534); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 65535); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(65534); + CHECK_OUTPUT_PACKET_EVENT(65535); + + /* Send in packet 1, causing a gap at 0. */ + send_packet(&test_context, 1); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + + /* Fill the gap by sending in packet 0, then check that + * packets 0 and 1 were now output in order. */ + send_packet(&test_context, 0); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(0); + CHECK_OUTPUT_PACKET_EVENT(1); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_overextension_single_gap_no_end_gap) +{ + /* Check what happens when hold-back mode is active, the + * valid seqnum window's maximum length is reached, and then, + * a packet with a sequence number that is one past the window + * range is added. This new packet would overextend the window, + * so the window is shifted forwards. However, it is only + * overextended by 1, so only the oldest slot in the window + * needs to be drained. In this case, that oldest slot contains + * the gap at the very beginning of the window. Also, since + * aside from that gap, there are no other ones, and the new + * packet (the one that overextends the window) comes directly + * after the last packet in the valid seqnum window, the + * jitter buffer will have no gaps left to take care of, so + * all held back packets can be output. + * + * This simulates cases where one packet is lost among + * a string of packets that all arrive in order. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Produce a sequence of packets with a gap in them. Start at 100, + * skip 101, then go all the way to 110. + * + * First, packet 100 will immediately be output. Then, packet 102 + * will enable hold-back mode (due to the gap at 101). The valid + * seqnum window then starts at 101, and extends all the way to 110. + * 110-101+1 = 10, which equals the max num packets of the jitter + * buffer here. In other words, after this, the jitter buffer valid + * range is as large as it can maximally be. */ + send_packet(&test_context, 100); + for (uint16_t i = 102; i <= 110; i++) { + send_packet(&test_context, i); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + } + pwtest_int_eq(test_context.num_events, 1u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 10u); + CHECK_OUTPUT_PACKET_EVENT(100); + + /* Now insert packet 111. This would overextend the window, so the + * jitter buffer has to shift the window and drain the oldest slots + * that are no longer part of the shifted window. Since packet 111 + * would overextend the window by 1, it means that the one oldest + * slot is drained. That oldest slot actually is the gap at 101. + * Since that gap was drained (resulting in a packet loss signal + * at seqnum 101 of length 1), only packets remain in the valid + * seqnum window, no gaps anymore, so the jitter buffer immediately + * outputs all of them, in order. */ + send_packet(&test_context, 111); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 11u); + CHECK_LOST_PACKET_EVENT(101, 1u, false); + for (uint16_t i = 102; i <= 111; i++) + CHECK_OUTPUT_PACKET_EVENT(i); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_overextension_multiple_gaps_no_end_gap) +{ + /* Check what happens when hold-back mode is active, the + * valid seqnum window's maximum length is reached, and then, + * a packet with a sequence number that is one past the window + * range is added. This new packet would overextend the window, + * so the window is shifted forwards. However, it is only + * overextended by 1, so only the oldest slot in the window + * needs to be drained. In this case, that oldest slot contains + * the gap at the very beginning of the window. Since there + * are more gaps present, the hold-back mode is not left. + * + * This simulates cases where more than one packet is lost + * among a string of packets that all arrive in order. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Produce a sequence of packets with a gap in them. Start at 100, + * skip 101 and 102, and go all the way to 110. + * + * In the hold-back mode that results from this, the valid range + * then starts at 101, and extends all the way to 110. 110-101+1 = 10, + * which equals the max num packets of the jitter buffer here. In + * other words, after this, the jitter buffer valid range is as large + * as it can maximally be. */ + send_packet(&test_context, 100); + send_packet(&test_context, 103); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + for (uint16_t i = 105; i <= 110; i++) { + send_packet(&test_context, i); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + } + pwtest_int_eq(test_context.num_events, 1u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 10u); + CHECK_OUTPUT_PACKET_EVENT(100); + + /* Now insert packet 111. This would overextend the window, so the + * jitter buffer has to shift the window and drain the oldest slots + * that are no longer part of the shifted window. Since packet 111 + * would overextend the window by 1, it means that the one oldest + * slot is drained. But, at 102, there is also gap, and 102 is now + * the new start of the valid seqnum window, so the jitter buffer + * cannot output any packets yet. */ + send_packet(&test_context, 111); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 102u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 10u); + CHECK_LOST_PACKET_EVENT(101, 1u, false); + + /* To see that the behavior remains as expected, fill the gap at 102. + * Since 102 is the very beginning of the valid seqnum window, and there + * is a packet at 103, the jitter buffer can now output 102 and 103. + * Also, the valid seqnum window shrinks accordingly by 2, its length + * becoming 8 and its start seqnum becoming 104. */ + send_packet(&test_context, 102); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 104u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 8u); + CHECK_OUTPUT_PACKET_EVENT(102); + CHECK_OUTPUT_PACKET_EVENT(103); + + /* Finally, send in packet 104. By now, 104 is the start of the valid + * packet window, and a gap is there. Since this is the last gap in + * the jitter buffer, once it is filled, all packets can be output. */ + send_packet(&test_context, 104); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 8u); + for (uint16_t i = 104; i <= 111; i++) + CHECK_OUTPUT_PACKET_EVENT(i); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_overextension_after_partial_output) +{ + /* Check what happens when first, in hold-back mode, a partial + * drain happens, and then, the valid seqnum window is overextended. */ + + struct test_context test_context; + + setup_test_context(&test_context, 5); + + /* Add a packet 100, which is output immediately, since the + * jitter buffer is in regular mode. */ + send_packet(&test_context, 100); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(100); + + /* Now add packet 102. Since there is a gap at 101, hold-back + * mode is enabled. The valid seqnum window starts at 101, + * and is of length 2. */ + send_packet(&test_context, 102); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 101u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 2u); + pwtest_int_eq(test_context.num_events, 0u); + + /* Packets 103 to 105 are inserted. This fills the window to + * capacity, since now, it has been extended, and goes from + * 101 to 105. That is, it starts at 101, and is of length 5 + * which equals the jitter buffer capacity). */ + send_packet(&test_context, 103); + send_packet(&test_context, 104); + send_packet(&test_context, 105); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 101u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 5u); + pwtest_int_eq(test_context.num_events, 0u); + + /* Now add packet 101. This fills the gap. All 5 packets + * can be output, and the jitter buffer returns to the regular mode. */ + send_packet(&test_context, 101); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 5u); + CHECK_OUTPUT_PACKET_EVENT(101); + CHECK_OUTPUT_PACKET_EVENT(102); + CHECK_OUTPUT_PACKET_EVENT(103); + CHECK_OUTPUT_PACKET_EVENT(104); + CHECK_OUTPUT_PACKET_EVENT(105); + + /* Re-enter the hold-back mode by adding packet 107 and + * intentionally leaving out packet 106. The valid seqnum + * window now starts at 106, and is of length 2. */ + send_packet(&test_context, 107); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 106u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 2u); + pwtest_int_eq(test_context.num_events, 0u); + + /* Packets 108 to 110 are inserted. This fills the window to + * capacity, since now, it has been extended, and goes from + * 106 to 110. That is, it starts at 106, and is of length 5 + * which equals the jitter buffer capacity). */ + send_packet(&test_context, 108); + send_packet(&test_context, 109); + send_packet(&test_context, 110); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 106u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 5u); + pwtest_int_eq(test_context.num_events, 0u); + + /* Packet 111 is added. This overextends the window, since it would + * now go from 106 to 111. That is a length of 111-106+1 = 6, which + * is beyond the capacity (5). + * + * The overextension is still low enough that most of the window + * contents can be reused. In fact, only the oldest slot (the one + * containing the gap at 106) needs to be drained by signaling it + * as a packet 106 loss. + * + * Once packet 106 is signaled as lost, and the corresponding slot + * is drained, the leftovers are all packets, no gaps, so all packets + * from 107 to 111 are output. + * + * By combining this with multiple partial drains above, it is verified + * that valid_seqnum_window_start_seqnum updates (which happen during + * partial drains) do not break the overextension handling. */ + send_packet(&test_context, 111); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 6u); + CHECK_LOST_PACKET_EVENT(106, 1u, false); + CHECK_OUTPUT_PACKET_EVENT(107); + CHECK_OUTPUT_PACKET_EVENT(108); + CHECK_OUTPUT_PACKET_EVENT(109); + CHECK_OUTPUT_PACKET_EVENT(110); + CHECK_OUTPUT_PACKET_EVENT(111); + + /* Verify regular mode recovery. */ + send_packet(&test_context, 112); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(112); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_immediate_overextension_after_regular_mode) +{ + /* Check what happens when a gap causes the jitter buffer to switch + * to the hold-back mode, but that gap is so large that it immediately + * overextends the valid seqnum window. The jitter buffer should + * instantly recognize the immediate overextension aqnd signal an open + * ended packet loss event. It does not stay in the hold-back mode, + * since there is nothing to hold back in that case. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Send 100, 101 in order. */ + send_packet(&test_context, 100); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 101); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(100); + CHECK_OUTPUT_PACKET_EVENT(101); + + /* Send 200. A massive gap of far more than 10 packets is produced + * -> jitter buffer signals an open ended gap, but stays in regular mode. */ + send_packet(&test_context, 200); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_LOST_PACKET_EVENT(102, 10u, true); + CHECK_OUTPUT_PACKET_EVENT(200); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_immediate_overextension_after_regular_mode_threshold_open_closed_gap) +{ + /* This is similar to rtp_jitter_buffer_test_immediate_overextension_after_regular_mode, + * but checks for a corner case. That is: If the gap length equals + * the number of slots, then the gap should not be reported as open. + * + * Test this by producing such a gap. Then further verify by repeating + * the test, but by a gap that is 1 packet larger than the number of + * slots. The first round should report a closed gap of a size equal + * to the number of slot. The second round should report an open gap. */ + + /* First round. */ + { + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Send 10, 11 in order. */ + send_packet(&test_context, 10); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 11); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(10); + CHECK_OUTPUT_PACKET_EVENT(11); + + /* Send 22. A gap of exactly 10 packets (= the number of slots) + * is produced -> jitter buffer signals a closed gap of size + * equal to the number of slots. */ + send_packet(&test_context, 22); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_LOST_PACKET_EVENT(12, 10u, false); + CHECK_OUTPUT_PACKET_EVENT(22); + + teardown_test_context(&test_context); + } + + /* Second round. */ + { + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Send 10, 11 in order. */ + send_packet(&test_context, 10); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 11); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(10); + CHECK_OUTPUT_PACKET_EVENT(11); + + /* Send 23. A gap of exactly 11 packets (= 1 past the number + * of slots) is produced -> jitter buffer signals an open + * ended gap of size equal to the number of slots. */ + send_packet(&test_context, 23); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_LOST_PACKET_EVENT(12, 10u, true); + CHECK_OUTPUT_PACKET_EVENT(23); + + teardown_test_context(&test_context); + } + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_full_window_invalidation_non_open_ended_gap) +{ + /* Check what happens when hold-back mode is active, the + * valid seqnum window's maximum length is reached, and then, + * a packet with a sequence number that is far enough to + * overextend the window past its current length. This means + * that the shifting method (verified in earlier tests above) + * won't work - the window is shifted completely past its + * current range, so none of those slots remain valid, + * and must all be drained. Furthermore, it means that between + * the last seqnum of the old window and the first seqnum of + * the new window, there is a gap. The jitter buffer is expected + * to do the following: + * + * 1. Drain the entire current valid seqnum window + * 2. Reset the window to only contain the seqnum of the new packet + * 3. Signal the gap between the old and the new window + * + * Here, the window is shifted far enough that none of the + * original content can be retained, but not so far that + * the gap between the old and new windows becomes too large + * to fully cover via PLC. As a result, that gap is signaled + * as packet loss, but as a non-open-ended one. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Establish hold-back mode with packets 10 and 12. + * Hold-back mode gets active because of the gap at 11. */ + send_packet(&test_context, 10); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 12); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(10); + + /* Send in packet 22. This would overextend the window. Shifting + * the current window moves it past packet 12, so the jitter + * buffer must be fully drained. Since afterwards, there is + * nothing left in the jitter buffer other than the new packet, + * the valid seqnum window length becomes 1, and starts at 22. + * This means that there are no gaps left, so the contents + * (in this case, just the packet 22) can be output immediately. + * Also, the gap between the old window and the new window goes + * from seqnum 13 (one past the end of the old window) to seqnum + * 21 (one before the new packet 22). 21-13+1 = 9, which is + * less than the jitter buffer capacity (which is 10), so that + * gap is announced as non-open-ended packet loss. */ + send_packet(&test_context, 22); + pwtest_int_eq(test_context.num_events, 4u); + CHECK_LOST_PACKET_EVENT(11, 1u, false); + CHECK_OUTPUT_PACKET_EVENT(12); + CHECK_LOST_PACKET_EVENT(13, 9u, false); + CHECK_OUTPUT_PACKET_EVENT(22); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_full_window_invalidation_open_ended_gap) +{ + /* Check what happens when hold-back mode is active, the + * valid seqnum window's maximum length is reached, and then, + * a packet with a sequence number that is far enough to + * overextend the window past its current length. This means + * that the shifting method (verified in earlier tests above) + * won't work - the window is shifted completely past its + * current range, so none of those slots remain valid, + * and must all be drained. Furthermore, it means that between + * the last seqnum of the old window and the first seqnum of + * the new window, there is a gap. The jitter buffer is expected + * to do the following: + * + * 1. Drain the entire current valid seqnum window + * 2. Reset the window to only contain the seqnum of the new packet + * 3. Signal the gap between the old and the new window + * + * Here, the window is shifted far enough that none of the + * original content can be retained, and that that the gap + * between the old and new windows becomes too large + * to fully cover via PLC. As a result, that gap is signaled + * as packet loss, but as an open-ended one. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Establish hold-back mode with packets 10 and 12. + * Hold-back mode gets active because of the gap at 11. */ + send_packet(&test_context, 10); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 12); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(10); + + /* Send in packet 400. This would overextend the window. Shifting + * the current window moves it past packet 12, so the jitter + * buffer must be fully drained. Since afterwards, there is + * nothing left in the jitter buffer other than the new packet, + * the valid seqnum window length becomes 1, and starts at 400. + * This means that there are no gaps left, so the contents + * (in this case, just the packet 400) can be output immediately. + * Also, the gap between the old window and the new window goes + * from seqnum 13 (one past the end of the old window) to seqnum + * 399 (one before the new packet 400). 399-13+1 = 387, which is + * far beyond the jitter buffer capacity (which is 10). That gap + * is then signaled as an open ended packet loss with maximum + * length 10, meaning that any PLC/fadeout measure must not + * exceed the length of 10 packets. (In non-open-ended signals, + * the length instead specifies the exact length of the gap.) + * This is done to avoid excessive PLC/fadeout calculations, + * like in this case, where it otherwise would force PLC for + * 387 packets. Callers are encouraged to apply fadeout as well + * to not have a hard cutoff after the maximum (10 packets here).*/ + send_packet(&test_context, 400); + pwtest_int_eq(test_context.num_events, 4u); + CHECK_LOST_PACKET_EVENT(11, 1u, false); + CHECK_OUTPUT_PACKET_EVENT(12); + CHECK_LOST_PACKET_EVENT(13, 10u, true); + CHECK_OUTPUT_PACKET_EVENT(400); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_timeout_drain) +{ + /* Check what happens when hold-back mode is enabled and + * the gaps are not filled in time. It is expected that the + * jitter buffer's timeout expires and forcibly drains + * its contents. */ + + struct test_context test_context; + struct timespec ts; + + setup_test_context(&test_context, 10); + + /* Establish hold-back mode with packets 60 and 62. + * Hold-back mode gets active because of the gap at 61. */ + send_packet(&test_context, 60); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 62); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(60); + + /* The jitter buffer's timeout timer is configured to expire + * when the total duration of its capacity passes after the + * hold-back mode was enabled. In this test, capacity is 10 + * packets, and each packet covers 10ms, then the total duration + * is 10*10ms = 100 ms, and that will also be the timeout of + * that timer, and the gap that was detected earlier will have + * armed that timer. Sleep for 50ms longer than its timeout + * duration to make sure it expires and thus provokes the + * draining of the jitter buffer. */ + ts.tv_sec = 0; + ts.tv_nsec = 10 * TEST_PACKET_DURATION + 50 * SPA_NSEC_PER_MSEC; + nanosleep(&ts, NULL); + + /* Iterate the loop to process the timer expiration. */ + pw_loop_enter(test_context.loop); + pw_loop_iterate(test_context.loop, 0); + pw_loop_leave(test_context.loop); + + /* After draining, the jitter buffer should be back to regular + * mode, just as if rtp_jitter_buffer_drain() had been called. */ + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + pwtest_int_eq(test_context.jitter_buffer.last_seqnum, -1); + CHECK_LOST_PACKET_EVENT(61, 1u, false); + CHECK_OUTPUT_PACKET_EVENT(62); + + /* Verify that regular mode is working properly by sending + * in packet 700. Since after draining, the last_seqnum is + * -1, a discontinuity in the sequence numbers is okay. */ + send_packet(&test_context, 700); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(700); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST_SUITE(pw_module_rtp_common_lib) +{ + pwtest_add(rtp_jitter_buffer_test_consecutive_packets, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_simple_reordering, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_partial_output, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_explicit_drain_in_regular_mode, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_explicit_drain_in_hold_back_mode, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_explicit_drain_coalesced_loss, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_explicit_drain_with_seqnum_wraparound, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_stale_packets_in_regular_mode, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_stale_packets_in_hold_back_mode, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_flush, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_seqnum_wraparound_regular, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_seqnum_wraparound_with_reordering, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_overextension_single_gap_no_end_gap, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_overextension_multiple_gaps_no_end_gap, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_overextension_after_partial_output, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_immediate_overextension_after_regular_mode, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_full_window_invalidation_non_open_ended_gap, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_immediate_overextension_after_regular_mode_threshold_open_closed_gap, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_full_window_invalidation_open_ended_gap, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_timeout_drain, PWTEST_NOARG); + + return PWTEST_PASS; +}