diff --git a/src/modules/meson.build b/src/modules/meson.build
index 5c9052389..bbdc1a018 100644
--- a/src/modules/meson.build
+++ b/src/modules/meson.build
@@ -597,7 +597,10 @@ summary({'zeroconf-discover': build_module_zeroconf_discover}, bool_yn: true, se
 # (by avoiding build script code duplication), create a static library
 # that contains that common code.
 pipewire_module_rtp_common_lib = static_library('pipewire-module-rtp-common-lib',
-  [ 'module-rtp/stream.c' ],
+  [ 'module-rtp/stream.c',
+    'module-rtp/jitter-buffer.c',
+    'module-rtp/audio-codec.c',
+    'module-rtp/opus-codec.c' ],
   include_directories : [configinc],
   install : false,
   dependencies : [mathlib, dl_lib, rt_lib, pipewire_dep, opus_dep],
diff --git a/src/modules/module-raop-sink.c b/src/modules/module-raop-sink.c
index af84ba3eb..ce8bdb2ce 100644
--- a/src/modules/module-raop-sink.c
+++ b/src/modules/module-raop-sink.c
@@ -149,7 +149,7 @@ PW_LOG_TOPIC(mod_topic, "mod." NAME);
 
 #define MAX_PORT_RETRY		128
 
-#define RAOP_FORMAT		"S16LE"
+#define RAOP_FORMAT		DEFAULT_RAOP_AUDIO_FORMAT
 #define RAOP_STRIDE		(2*DEFAULT_CHANNELS)
 #define RAOP_RATE		44100
 #define RAOP_LATENCY_MS		250
diff --git a/src/modules/module-rtp-session.c b/src/modules/module-rtp-session.c
index 4374f64b5..ef7b3cf90 100644
--- a/src/modules/module-rtp-session.c
+++ b/src/modules/module-rtp-session.c
@@ -140,7 +140,8 @@ PW_LOG_TOPIC(mod_topic, "mod." NAME);
 		"( sess.min-ptime=<minimum packet time in milliseconds, default:2> ) "		\
 		"( sess.max-ptime=<maximum packet time in milliseconds, default:20> ) "		\
 		"( sess.media=<string, the media type audio|midi|opus, default midi> ) "	\
-		"( audio.format=<format, default:"DEFAULT_FORMAT"> ) "				\
+		"( audio.format=<format, default:"DEFAULT_RAW_AUDIO_FORMAT " for media type audio, "	\
+		DEFAULT_OPUS_AUDIO_FORMAT " for media type opus, not used for MIDI> ) "			\
 		"( audio.rate=<sample rate, default:"SPA_STRINGIFY(DEFAULT_RATE)"> ) "		\
 		"( audio.channels=<number of channels, default:"SPA_STRINGIFY(DEFAULT_CHANNELS)"> ) "\
 		"( audio.position=<channel map, default:"DEFAULT_POSITION"> ) "			\
@@ -1624,7 +1625,7 @@ int pipewire__module_init(struct pw_impl_module *module, const char *args)
 
 	if (spa_streq(str, "audio")) {
 		struct spa_dict_item items[] = {
-			{ "audio.format", DEFAULT_FORMAT },
+			{ "audio.format", DEFAULT_RAW_AUDIO_FORMAT },
 			{ "audio.rate", SPA_STRINGIFY(DEFAULT_RATE) },
 			{ "audio.channels", SPA_STRINGIFY(DEFAULT_CHANNELS) },
 			{ "audio.position", DEFAULT_POSITION } };
@@ -1632,6 +1633,7 @@ int pipewire__module_init(struct pw_impl_module *module, const char *args)
 	}
 	else if (spa_streq(str, "opus")) {
 		struct spa_dict_item items[] = {
+			{ "audio.format", DEFAULT_OPUS_AUDIO_FORMAT },
 			{ "audio.rate", SPA_STRINGIFY(DEFAULT_RATE) },
 			{ "audio.channels", SPA_STRINGIFY(DEFAULT_CHANNELS) },
 			{ "audio.position", DEFAULT_POSITION } };
diff --git a/src/modules/module-rtp-sink.c b/src/modules/module-rtp-sink.c
index e0d34482b..8f045e71d 100644
--- a/src/modules/module-rtp-sink.c
+++ b/src/modules/module-rtp-sink.c
@@ -199,7 +199,8 @@ PW_LOG_TOPIC(mod_topic, "mod." NAME);
 		"( sess.min-ptime=<minimum packet time in milliseconds, default:2> ) "			\
 		"( sess.max-ptime=<maximum packet time in milliseconds, default:20> ) "			\
 		"( sess.media=<string, the media type audio|midi|opus, default audio> ) "		\
-		"( audio.format=<format, default:"DEFAULT_FORMAT"> ) "					\
+		"( audio.format=<format, default:"DEFAULT_RAW_AUDIO_FORMAT " for media type audio, "	\
+		DEFAULT_OPUS_AUDIO_FORMAT " for media type opus, not used for MIDI> ) "			\
 		"( audio.rate=<sample rate, default:"SPA_STRINGIFY(DEFAULT_RATE)"> ) "			\
 		"( audio.channels=<number of channels, default:"SPA_STRINGIFY(DEFAULT_CHANNELS)"> ) "	\
 		"( audio.position=<channel map, default:"DEFAULT_POSITION"> ) "				\
diff --git a/src/modules/module-rtp-source.c b/src/modules/module-rtp-source.c
index 332d126d5..6cce0b1c7 100644
--- a/src/modules/module-rtp-source.c
+++ b/src/modules/module-rtp-source.c
@@ -174,7 +174,8 @@ PW_LOG_TOPIC(mod_topic, "mod." NAME);
 		"( sess.latency.msec=<target network latency, default "SPA_STRINGIFY(DEFAULT_SESS_LATENCY)"> ) "\
 		"( sess.ignore-ssrc=<to ignore SSRC, default false> ) "\
  		"( sess.media=<string, the media type audio|midi|opus, default audio> ) "			\
-		"( audio.format=<format, default:"DEFAULT_FORMAT"> ) "						\
+		"( audio.format=<format, default:"DEFAULT_RAW_AUDIO_FORMAT " for media type audio, "		\
+		DEFAULT_OPUS_AUDIO_FORMAT " for media type opus, not used for MIDI> ) "				\
 		"( audio.rate=<sample rate, default:"SPA_STRINGIFY(DEFAULT_RATE)"> ) "				\
 		"( audio.channels=<number of channels, default:"SPA_STRINGIFY(DEFAULT_CHANNELS)"> ) "		\
 		"( audio.position=<channel map, default:"DEFAULT_POSITION"> ) "					\
@@ -311,9 +312,24 @@ on_rtp_io(void *data, int fd, uint32_t mask)
 	current_time = get_time_ns(impl);
 
 	if (mask & SPA_IO_IN) {
-		if ((len = recvfrom(fd, impl->buffer, impl->buffer_size, 0, (struct sockaddr *)(&recvaddr), &recvaddr_len)) < 0)
+		if ((len = recvfrom(fd, impl->buffer, impl->buffer_size,
+#ifdef __linux__
+				    /* Use this Linux specific feature to get the actual size of the
+				     * packet, even if it was truncated due to it being larger than
+				     * the buffer size. The code below uses this to detect packets
+				     * that exceed the MTU size. Truncated packets are unusable.
+				     * Especially if an audio codec is in use, the partial absence
+				     * of data can lead to a corrupted state. */
+				    MSG_TRUNC,
+#else
+				    0,
+#endif
+				    (struct sockaddr *)(&recvaddr), &recvaddr_len)) < 0)
 			goto receive_error;
 
+		if (SPA_UNLIKELY((size_t)len > impl->buffer_size))
+			goto packet_larger_than_mtu;
+
 		/* Filter the packets to exclude those with source addresses
 		 * that do not match the expected one. Only used with unicast.
 		 * (The bind() call in make_socket takes care of only
@@ -373,6 +389,12 @@ short_packet:
 		pw_log_warn("(%d suppressed) short packet of len %zd received",
 				suppressed, len);
 	return;
+packet_larger_than_mtu:
+	if ((suppressed = spa_ratelimit_test(&impl->rate_limit, current_time)) >= 0)
+		pw_log_warn("(%d suppressed) packet received that is larger than "
+				"the configured MTU (%zu bytes)",
+				suppressed, impl->buffer_size);
+	return;
 }
 
 static int rejoin_igmp_group(struct spa_loop *loop, bool async, uint32_t seq,
diff --git a/src/modules/module-rtp/audio-codec.c b/src/modules/module-rtp/audio-codec.c
new file mode 100644
index 000000000..e32387bbb
--- /dev/null
+++ b/src/modules/module-rtp/audio-codec.c
@@ -0,0 +1,14 @@
+/* PipeWire */
+/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */
+/* SPDX-License-Identifier: MIT */
+
+#include "audio-codec.h"
+
+const char * rtp_audio_codec_type_name(enum rtp_audio_codec_type type)
+{
+	switch (type) {
+	case RTP_AUDIO_CODEC_TYPE_ENCODER: return "encoder";
+	case RTP_AUDIO_CODEC_TYPE_DECODER: return "decoder";
+	default: return "<unknown>";
+	}
+}
diff --git a/src/modules/module-rtp/audio-codec.h b/src/modules/module-rtp/audio-codec.h
new file mode 100644
index 000000000..f608f11bc
--- /dev/null
+++ b/src/modules/module-rtp/audio-codec.h
@@ -0,0 +1,209 @@
+/* PipeWire */
+/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */
+/* SPDX-License-Identifier: MIT */
+
+#ifndef PIPEWIRE_RTP_AUDIO_CODEC_H
+#define PIPEWIRE_RTP_AUDIO_CODEC_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+#include <spa/param/audio/format.h>
+#include <spa/utils/defs.h>
+#include <pipewire/properties.h>
+
+enum rtp_audio_codec_type {
+	RTP_AUDIO_CODEC_TYPE_ENCODER,
+	RTP_AUDIO_CODEC_TYPE_DECODER,
+};
+
+struct rtp_audio_codec_context {
+	void *handle;
+	struct spa_audio_info audio_info;
+	size_t samples_per_frame;
+	size_t max_encoded_frame_size;
+	size_t stride;
+	enum rtp_audio_codec_type type;
+	uint8_t *output_buffer;
+};
+
+struct rtp_audio_codec {
+	/* Initializes the audio codec.
+	 *
+	 * The codec initialization will be stored in the context.
+	 *
+	 * Initializing an already initialized codec leads to undefined behavior.
+	 *
+	 * context must point to an rtp_audio_codec_context struct instance.
+	 * Said instance will be filled with states for the initialized codec.
+	 * Any already present values will be overwritten.
+	 *
+	 * stream_info must point to an spa_audio_info that contains a raw audio
+	 * format that the codec shall en/decode from/to.
+	 *
+	 * type specifies whether the codec shall be initialized as an
+	 * en- or a decoder.
+	 *
+	 * samples_per_frame specifies how many samples a frame covers.
+	 * "Samples" is meant in the RTP sense; that is, it specifies how
+	 * many samples are there in one channel. This means that this
+	 * value does not depend on the channel count in stream_info.
+	 *
+	 * max_encoded_data_size defines the maximum allowed size in bytes for
+	 * encoded frames. The encode() function then is guaranteed to not
+	 * produce a frame larger than this (it may produce a frame that is
+	 * smaller than that though).
+	 *
+	 * stride specifies the number of bytes per one sample. This value
+	 * _is_ depending on the channel count in stream_info. That is,
+	 * stride = (number of channels x bytes per sample).
+	 *
+	 * codec_props contains extra, codec specific properties, like the
+	 * encoding quality. These properties are optional, and this argument
+	 * can be set to NULL.
+	 *
+	 * Returns 0 in case of success, and a negative errno in case of an error.
+	 * An error will roll back any partial initialization. */
+	int (*init)(struct rtp_audio_codec_context *context, struct spa_audio_info *stream_info,
+		enum rtp_audio_codec_type type, uint32_t samples_per_frame,
+		size_t max_encoded_data_size, size_t stride, struct pw_properties *codec_props);
+
+	/* Shuts down a previously initialized codec.
+	 *
+	 * If the codec was not initialized, or was already shut down,
+	 * this is a no-op.
+	 *
+	 * context must point to a valid rtp_audio_codec_context struct instance. */
+	void (*shutdown)(struct rtp_audio_codec_context *context);
+
+	/* Resets the codec state.
+	 *
+	 * Use this if for example a frame is corrupted and could not be decoded
+	 * to reset to an initial state. This is recommended, since in such cases,
+	 * the codec might not be able to cleanly decode data if previous states
+	 * are retained.
+	 *
+	 * reason is an optional string for logging. It helps indicating in the
+	 * logs the reason for the codec reset. If set to NULL, no reason is logged.
+	 *
+	 * context must point to a valid rtp_audio_codec_context struct instance
+	 * that was initialized. */
+	void (*reset)(struct rtp_audio_codec_context *context, const char *reason);
+
+	/* Return the en- or decoder delay.
+	 *
+	 * The delay is given in samples. Whether this returns the en- or the
+	 * decoder delay depends on type that was passed to init(). The delay
+	 * is stored in the value pointed to by the delay pointer.
+	 *
+	 * IMPORTANT: This delay must not vary for the duration of the existence
+	 * of the context. Once the context is created, the delay must be fixed.
+	 *
+	 * context must point to a valid rtp_audio_codec_context struct instance
+	 * that was initialized.
+	 *
+	 * delay must be a valid pointer.
+	 *
+	 * Returns 0 in case of success, and a negative errno in case of an error.
+	 *
+	 * In case of an error, the value pointed to by the delay pointer is
+	 * undefined. An error means that the codec cannot be used anymore and
+	 * must be shut down. */
+	int (*get_delay)(struct rtp_audio_codec_context *context, size_t *delay);
+
+	/* Encodes a set of samples to a codec frame.
+	 *
+	 * This function only works if the type during initialization
+	 * was RTP_AUDIO_CODEC_TYPE_ENCODER. If it isn't, this function's
+	 * behavior is undefined.
+	 *
+	 * context must point to a valid rtp_audio_codec_context struct instance
+	 * that was initialized.
+	 *
+	 * in_samples must point to a memory block containing at least the amount
+	 * of samples that was specified when the codec was initialized.
+	 *
+	 * out_encoded_data must point to a pointer that shall be set to refer to
+	 * an internal buffer that contains the encoded data. out_encoded_data_size
+	 * must point to a size_t value that shall be set to the size of the encoded
+	 * data in bytes.
+	 *
+	 * Returns 0 in case of success, and a negative errno in case of an error.
+	 *
+	 * In case of an error, the values out_encoded_data and out_encoded_data_size
+	 * are set to are undefined. An error means that the codec cannot be used
+	 * anymore and must be shut down. */
+	int (*encode)(struct rtp_audio_codec_context *context, const uint8_t *in_samples,
+		uint8_t **out_encoded_data, size_t *out_encoded_data_size);
+
+	/* Decodes a codec frame to a set of samples.
+	 *
+	 * Codec frames must be fed into the decoder in order.
+	 *
+	 * This function only works if the type during initialization
+	 * was RTP_AUDIO_CODEC_TYPE_DECODER. If it isn't, this function's
+	 * behavior is undefined.
+	 *
+	 * context must point to a valid rtp_audio_codec_context struct instance
+	 * that was initialized.
+	 *
+	 * in_encoded_data must point to a memory block containing the codec frame.
+	 * in_encoded_data_size must be set to the size of the codec frame in bytes.
+	 *
+	 * out_samples must point to a pointer that shall be set refer to an
+	 * internal buffer that contains the decoded samples. out_num_samples
+	 * must point to a size_t value that shall contain how many samples
+	 * were decoded. That amount's maximum possible value is the number
+	 * of samples per frame specified during initialization. The codec is
+	 * allowed to produce less samples than that, but not more than that.
+	 *
+	 * Returns 0 in case of success, and a negative errno in case of an error.
+	 *
+	 * In case of an error, the values out_samples and out_num_samples
+	 * are set to are undefined. An error means that the codec cannot be used
+	 * anymore and must be shut down. */
+	int (*decode)(struct rtp_audio_codec_context *context, const uint8_t *in_encoded_data,
+		size_t in_encoded_data_size, uint8_t **out_samples, size_t *out_num_samples);
+
+	/* Applies PLC to cover a lost frame.
+	 *
+	 * This only works properly if non-missing frames that preceded the
+	 * lost frame were decoded in order.
+	 *
+	 * context must point to a valid rtp_audio_codec_context struct instance
+	 * that was initialized.
+	 *
+	 * out_samples must point to a pointer that shall be set refer to an
+	 * internal buffer that contains the PLC samples. out_num_samples
+	 * must point to a size_t value that shall contain how many PLC samples
+	 * were generated. That amount's maximum possible value is the number
+	 * of samples per frame specified during initialization. The codec is
+	 * allowed to produce less samples than that, but not more than that.
+	 *
+	 * Returns 0 in case of success, and a negative errno in case of an error.
+	 *
+	 * In case of an error, the values out_samples and out_num_samples
+	 * are set to are undefined. An error means that the codec cannot be used
+	 * anymore and must be shut down. */
+	int (*apply_plc)(struct rtp_audio_codec_context *context,
+		uint8_t **out_samples, size_t *out_num_samples);
+
+	/* Gets a human-readable name of this codec.
+	 *
+	 * This is meant for logging purposes. */
+	const char * (*get_name)(void);
+};
+
+/* Returns a human-readable string for the given audio codec type.
+ *
+ * This is meant for logging purposes. */
+const char * rtp_audio_codec_type_name(enum rtp_audio_codec_type type);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif /* PIPEWIRE_RTP_AUDIO_CODEC_H */
diff --git a/src/modules/module-rtp/audio.c b/src/modules/module-rtp/audio.c
index 5021b223b..a42eddaf7 100644
--- a/src/modules/module-rtp/audio.c
+++ b/src/modules/module-rtp/audio.c
@@ -119,12 +119,13 @@ static void rtp_audio_process_playback(void *data)
 			uint32_t clock_rate = impl->io_position->clock.rate.denom;
 
 			/* Translate the clock position to an RTP timestamp and
-			 * shift it to compensate for device delay and ASRC delay.
-			 * The device delay is scaled along with the clock position,
-			 * since both are expressed in clock sample units, while
-			 * pwt.buffered is expressed in stream time. */
+			 * shift it to compensate for device delay, decoder delay,
+			 * and ASRC delay. The device delay is scaled along with
+			 * the clock position, since both are expressed in clock
+			 * sample units, while pwt.buffered is expressed in
+			 * stream time. */
 			timestamp = scale_u64(impl->io_position->clock.position + device_delay,
-					      impl->rate, clock_rate) + pwt.buffered;
+					      impl->rate, clock_rate) + pwt.buffered + impl->codec_delay;
 			spa_ringbuffer_read_update(&impl->ring, timestamp);
 			avail = spa_ringbuffer_get_read_index(&impl->ring, &read_index);
 		} else {
@@ -217,15 +218,34 @@ static void rtp_audio_process_playback(void *data)
 
 		avail = spa_ringbuffer_get_read_index(&impl->ring, &timestamp);
 
-		/* Reduce target buffer by the delay amount to start playback sooner.
-		 * This compensates for the delay to the device. */
-		if (SPA_UNLIKELY(impl->target_buffer < device_delay)) {
-			pw_log_error("Delay to device (%" PRIu32 ") is higher than "
-				"the target buffer size (%" PRIu32 ")", device_delay,
-				impl->target_buffer);
+		if (impl->io_position) {
+			uint32_t clock_rate = impl->io_position->clock.rate.denom;
+			/* Device delay is reported in clock rate units. If this does not
+			 * match the RTP rate, the device delay must be transformed first. */
+			device_delay = scale_u64(device_delay, impl->rate, clock_rate);
+		}
+
+		/* Reduce target buffer by the decoder and device delay amount to
+		 * start playback sooner. This compensates for the delay to the
+		 * device and for the decoder delay. */
+
+		if (SPA_UNLIKELY(impl->target_buffer < (impl->codec_delay + device_delay))) {
+			if (impl->target_buffer < device_delay) {
+				pw_log_error("Delay to device (%" PRIu32 ") is higher than "
+					"the target buffer size (%" PRIu32 ")", device_delay,
+					impl->target_buffer);
+			} else if (impl->target_buffer < impl->codec_delay) {
+				pw_log_error("Decoder delay (%zu) is higher than the "
+					"target buffer size (%" PRIu32 ")", impl->codec_delay,
+					impl->target_buffer);
+			} else {
+				pw_log_error("The combined decoder delay (%zu) and device delay "
+					"(%" PRIu32 " are higher than the target buffer size (%" PRIu32 ")",
+					impl->codec_delay, device_delay, impl->target_buffer);
+			}
 			target_buffer = 0;
 		} else {
-			target_buffer = impl->target_buffer - device_delay;
+			target_buffer = impl->target_buffer - impl->codec_delay - device_delay;
 		}
 
 		if (avail < (int32_t)wanted) {
@@ -323,50 +343,325 @@ static void rtp_audio_process_playback(void *data)
 	pw_stream_queue_buffer(impl->stream, buf);
 }
 
+static int process_received_samples(struct impl *impl, uint8_t *samples, uint32_t num_samples,
+	uint16_t seqnum, uint32_t timestamp, uint32_t ts_offset);
+
+static int on_jitter_buffer_output_rtp_packet(void *context, const uint8_t *packet_data, size_t packet_size,
+					      size_t header_size, uint32_t timestamp, uint16_t seqnum)
+{
+	int ret;
+	size_t payload_size;
+	uint8_t *decoded_samples;
+	size_t num_decoded_samples;
+	struct impl *impl = context;
+
+	payload_size = packet_size - header_size;
+
+	ret = impl->audio_codec->decode(&(impl->audio_codec_context), packet_data + header_size,
+		payload_size, &decoded_samples, &num_decoded_samples);
+	if (SPA_UNLIKELY(ret < 0)) {
+		pw_log_error("could not decode audio from packet with seqnum %" PRIu16 ": %s",
+			seqnum, spa_strerror(ret));
+		return ret;
+	}
+
+	pw_log_trace("got packet with seqnum %" PRIu16 " from jitter buffer; decoding "
+		     "yielded %zu samples", seqnum, num_decoded_samples);
+
+	return process_received_samples(impl, decoded_samples, num_decoded_samples, seqnum,
+					timestamp, impl->target_buffer);
+}
+
+static void reset_rtp_audio_codec(struct impl *impl, const char *reason);
+
+static int on_jitter_buffer_signal_lost_packets(void *context, uint16_t seqnum_of_first_lost_packet,
+						size_t num_lost_packets, bool open_ended)
+{
+	int ret;
+	uint8_t *plc_samples;
+	size_t num_plc_samples;
+	uint32_t timestamp;
+	struct impl *impl = context;
+	size_t i;
+
+	/* Don't apply PLC if sync is not established. Playback is in sync when
+	 * there is a steady supply of data going on and the timestamps have been
+	 * okay thus far. When sync is lost, there is an audible discontinuity
+	 * that requires a reset of the ringbuffer contents along with the stats
+	 * that keep playback in sync, so applying PLC then makes no sense. */
+	if (!impl->have_sync)
+		return 0;
+
+	// TODO apply fadeout if open_ended == true
+
+	pw_log_info("Jitter buffer signals lost packet(s); packet loss sequence starts at "
+		    "seqnum %" PRIu16 ", sequence length %zd, %sopen ended", seqnum_of_first_lost_packet,
+		    num_lost_packets, open_ended ? "" : "not ");
+
+	for (i = 0; i < num_lost_packets; ++i) {
+		uint16_t seqnum = (seqnum_of_first_lost_packet + i);
+
+		ret = impl->audio_codec->apply_plc(&(impl->audio_codec_context), &plc_samples, &num_plc_samples);
+		if (SPA_UNLIKELY(ret < 0)) {
+			pw_log_error("could not apply PLC: %s", spa_strerror(ret));
+			return ret;
+		}
+
+		spa_ringbuffer_get_write_index(&impl->ring, &timestamp);
+
+		/* PLC generates sample data to cover the lost packet, but it does
+		 * not generate timestamps. Since there is no other information available
+		 * at this stage for reconstructing timestamps, just use the current
+		 * ringbuffer write index. That write index is actually the ideal current
+		 * timestamp in direct timestamp mode - that is, if sync is perfect, then
+		 * that ideal timestamp perfectly matches the RTP timestamps (when they
+		 * are shited by target_buffer). See process_received_samples() for
+		 * how these are used and compared to calculate a skew. */
+
+		pw_log_trace("jitter buffer signaled loss of packet with seqnum %" PRIu16 "; PLC "
+		     "yielded %zu samples; using RTP timestamp %" PRIu32, seqnum, num_plc_samples,
+		     timestamp);
+
+		/* The write index has the target_buffer factored in. (See for example the
+		 * !have_sync cases in process_received_samples().) Therefore, it is
+		 * important to ensure that the timestamp we got from the ringbuffer write
+		 * index is _not_ shifted by target_buffer again. For this reason, pass
+		 * 0 as the ts_offset. */
+		ret = process_received_samples(impl, plc_samples, num_plc_samples, seqnum, timestamp, 0);
+		if (SPA_UNLIKELY(ret < 0)) {
+			pw_log_error("could not process PLC samples: %s", spa_strerror(ret));
+			return ret;
+		}
+	}
+
+	if (open_ended)
+		reset_rtp_audio_codec(impl, "open ended packet loss");
+
+	return 0;
+}
+
+static int setup_rtp_audio_codec(struct impl *impl, const struct rtp_audio_codec *audio_codec,
+				 struct pw_properties *props)
+{
+	int ret;
+	enum rtp_audio_codec_type audio_codec_type;
+
+	/* No audio codec -> PCM is to be transmitted directly.
+	 * Nothing to set up then. */
+	if (audio_codec == NULL)
+		return 0;
+
+	spa_assert(impl->psamples > 0);
+
+	/* A jitter buffer is only needed in the output direction,
+	 * since that is the direction source nodes (= receivers) use. */
+	if (impl->direction == PW_DIRECTION_OUTPUT) {
+		struct rtp_jitter_buffer_params params;
+		uint32_t jitter_buffer_length_perc;
+		size_t max_num_packets_in_ringbuffer;
+
+		if (props != NULL) {
+			jitter_buffer_length_perc = pw_properties_get_uint32(props,
+				"jitter.buffer.length.perc", 50);
+		} else {
+			jitter_buffer_length_perc = 50;
+		}
+
+		/* Round up the number of packets, since even a packet with just 1 sample
+		 * actually inside is considered a full packet in the ringbuffer. */
+		max_num_packets_in_ringbuffer = (impl->target_buffer + (impl->psamples - 1)) / impl->psamples;
+		/* Get the number of slots out of the number of packets that fit in the
+		 * jitter buffer, using the jitter_buffer_length_perc percentage. At least
+		 * 1 slot is necessary though, so limit the output to a minimum of 1. */
+		params.num_slots = SPA_MAX(max_num_packets_in_ringbuffer * jitter_buffer_length_perc / 100, 1u);
+		pw_log_debug("ringbuffer can hold up to %zu packets worth of data; "
+			     "jitter buffer length is set to cover %" PRIu32 "%% of "
+			     "the ring buffer -> num slots: %zu", max_num_packets_in_ringbuffer,
+			     jitter_buffer_length_perc, params.num_slots);
+
+		params.max_packet_size = impl->mtu;
+		params.packet_duration = scale_u64(impl->psamples, SPA_NSEC_PER_SEC, impl->rate);
+		params.loop = impl->data_loop;
+		params.context = impl;
+		params.output_rtp_packet = on_jitter_buffer_output_rtp_packet;
+		params.signal_lost_packets = on_jitter_buffer_signal_lost_packets;
+
+		ret = rtp_jitter_buffer_init(&(impl->jitter_buffer), &params);
+		if (SPA_UNLIKELY(ret < 0)) {
+			pw_log_error("could not setup jitter buffer: %s", spa_strerror(ret));
+			return ret;
+		}
+
+		audio_codec_type = RTP_AUDIO_CODEC_TYPE_DECODER;
+	} else {
+		audio_codec_type = RTP_AUDIO_CODEC_TYPE_ENCODER;
+		/* This is a buffer used when data that is to be encoded wraps
+		 * around the ring buffer. The encoder needs the data in
+		 * contiguous form, so the wrapped data must be copied into
+		 * this buffer first in such cases. */
+		impl->audio_encoder_staging_buffer = malloc(impl->psamples * impl->stride);
+	}
+
+	impl->audio_codec = audio_codec;
+
+	ret = impl->audio_codec->init(&(impl->audio_codec_context), &impl->stream_info,
+		audio_codec_type, impl->psamples, impl->payload_size, impl->stride, props);
+	if (SPA_UNLIKELY(ret < 0)) {
+		pw_log_error("could not setup audio codec: %s", spa_strerror(ret));
+		return ret;
+	}
+
+	/* Since a stream always either only receives or only sends, one single
+	 * quantity is used for both directions. This means that when this stream
+	 * sends, the audio_codec is configured to encode, and thus, codec_delay
+	 * then is the encoder delay. Consequently, if the stream receives, and
+	 * the audio_codec decodes, codec_delay is the decoder delay. */
+	ret = impl->audio_codec->get_delay(&(impl->audio_codec_context), &(impl->codec_delay));
+	if (SPA_UNLIKELY(ret < 0)) {
+		pw_log_error("could not get audio codec delay: %s", spa_strerror(ret));
+		return ret;
+	}
+
+	/* In the output direction (which is what RTP source nodes use), the
+	 * codec_delay is the decoder delay. That delay is applied by either
+	 * shifting timestamps forward (in the direct timestamp mode) or by
+	 * subtracting from target_buffer (in the constant latency mode).
+	 * That is, the decoder delay is treated as part of the specified
+	 * session latency. This ensures that RTP source nodes are in sync
+	 * even in the (unlikely) case that they use different decoders with
+	 * different decoder latencies. But, it is then important to check
+	 * that the decoder delay is smaller than the buffer size. If this
+	 * is not the case, then the constant latency mode would use negative
+	 * buffer sizes, and the direct timestamp mode would always shift
+	 * timestamps past the buffer size. */
+	if ((impl->direction == PW_DIRECTION_OUTPUT) &&
+	    SPA_UNLIKELY(impl->codec_delay > impl->target_buffer)) {
+		pw_log_error("decoder delay (%zu samples) is larger than buffer size "
+			     "(%" PRIu32 " samples)", impl->codec_delay, impl->target_buffer);
+		return -EINVAL;
+	}
+
+	pw_log_info("initialized %s %s with %" PRIu32 " samples per packet; "
+		    "codec delay: %zu samples", impl->audio_codec->get_name(),
+		    rtp_audio_codec_type_name(audio_codec_type), impl->psamples,
+		    impl->codec_delay);
+
+	return 0;
+}
+
+static void teardown_rtp_audio_codec(struct impl *impl)
+{
+	if ((impl->audio_codec != NULL) && (impl->audio_codec_context.handle != NULL)) {
+		rtp_jitter_buffer_shutdown(&(impl->jitter_buffer));
+
+		impl->audio_codec->shutdown(&(impl->audio_codec_context));
+		impl->audio_codec_context.handle = NULL;
+	}
+
+	free(impl->audio_encoder_staging_buffer);
+	impl->audio_encoder_staging_buffer = NULL;
+}
+
+static void reset_rtp_audio_codec(struct impl *impl, const char *reason)
+{
+	if (impl->audio_codec != NULL)
+		impl->audio_codec->reset(&(impl->audio_codec_context), reason);
+}
+
 static int rtp_audio_receive(struct impl *impl, uint8_t *buffer, ssize_t len,
 				ssize_t hlen, uint64_t current_time)
 {
+	int ret;
 	struct rtp_header *hdr;
-	ssize_t plen;
-	uint16_t seq;
-	uint32_t timestamp, samples, write, expected_write;
-	uint32_t stride = impl->stride;
-	int32_t filled;
+	uint16_t seqnum;
+	uint32_t timestamp;
 
 	hdr = (struct rtp_header*)buffer;
+	seqnum = ntohs(hdr->sequence_number);
+	timestamp = ntohl(hdr->timestamp) - impl->ts_offset;
 
-	seq = ntohs(hdr->sequence_number);
-	if (impl->have_seq && impl->seq != seq) {
-		pw_log_info("unexpected seq (%d != %d) SSRC:%u",
-				seq, impl->seq, impl->ssrc);
-		/* No need to resynchronize here. If packets arrive out of
-		 * order, then they are still written in order into the ring
-		 * buffer, since they are written according to where the
-		 * RTP timestamp points to. */
+	if (impl->have_seq && (impl->next_expected_incoming_seq >= 0) &&
+	    (impl->next_expected_incoming_seq != seqnum)) {
+		pw_log_info("packet arrived out of order: expected/actual packet seq: %"
+			    PRIu16"/%" PRIu16 " SSRC: %" PRIu32 " timestamp: %" PRIu32,
+			    impl->next_expected_incoming_seq, seqnum, impl->ssrc,
+			    timestamp);
 	}
+
+	/* Note that setting the last_recv_timestamp might not be correct
+	 * in cases where the jitter buffer switches to hold-back mode. That's
+	 * because in such cases, the jitter buffer will output packets in
+	 * bursts, and it might do so at a time that is significantly ahead
+	 * of the current_time. However - the jitter buffer switches to that
+	 * mode when it detects packet losses. last_recv_timestamp is used
+	 * for in-flight data calculations to smoothen out DLL adjustments
+	 * when the RTP source is running in constant delay mode. These
+	 * calculations assume steady packet transmission and a reliable
+	 * network - and in such a network, neither packet reordering nor
+	 * packet losses occur. Thus, it is still okay to set this timestamp
+	 * here instead of in process_received_samples(), because when packets
+	 * are lost / reordered, those calculations fall apart anyway. */
+	impl->last_recv_timestamp = current_time;
+
+	impl->next_expected_incoming_seq = (seqnum + 1) & 65535;
+
+	/* If execution reaches this point, then the packet might be out of order,
+	 * but still arrived in time. If the jitter buffer is initialized, it
+	 * will take care of reordering packets. If it is not initialized, then
+	 * there is no need to reorder the packets and depayload the data. The
+	 * packet contents can be directly written into the ring buffer
+	 * according to where their RTP timestamps point to, so even if they
+	 * come in out of order, they ultimately end up in the ring buffer in
+	 * the right locations. This can be done when raw PCM data is
+	 * transmitted, but not when the data is encoded - with encoded data,
+	 * the jitter buffer is necessary, since audio codecs typically require
+	 * encoded frames to arrive in order. */
+
+	if (rtp_jitter_buffer_is_initialized(&(impl->jitter_buffer))) {
+		ret = rtp_jitter_buffer_insert_packet(&(impl->jitter_buffer), buffer, len,
+						      hlen, timestamp, seqnum);
+		if (SPA_UNLIKELY(ret < 0)) {
+			pw_log_error("could not insert packet into jitter buffer: %s", spa_strerror(ret));
+			return ret;
+		}
+	} else {
+		ssize_t plen = len - hlen;
+		ret = process_received_samples(impl, &buffer[hlen], plen / impl->stride,
+			seqnum, timestamp, impl->target_buffer);
+		if (SPA_UNLIKELY(ret < 0)) {
+			pw_log_error("could not process received samples: %s", spa_strerror(ret));
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int process_received_samples(struct impl *impl, uint8_t *samples,
+	uint32_t num_samples, uint16_t seq, uint32_t timestamp, uint32_t ts_offset)
+{
+	uint32_t write, expected_write;
+	int32_t filled;
+	uint32_t stride = impl->stride;
+
 	impl->seq = seq + 1;
 	impl->have_seq = true;
 
-	timestamp = ntohl(hdr->timestamp) - impl->ts_offset;
-
 	impl->receiving = true;
-	impl->last_recv_timestamp = current_time;
-
-	plen = len - hlen;
-	samples = plen / stride;
 
 	filled = spa_ringbuffer_get_write_index(&impl->ring, &expected_write);
 
-	/* we always write to timestamp + delay */
-	write = timestamp + impl->target_buffer;
+	write = timestamp + ts_offset;
 
 	if (!impl->have_sync) {
 		pw_log_info("sync to timestamp:%u seq:%u ts_offset:%u SSRC:%u target:%u direct:%u",
 				timestamp, seq, impl->ts_offset, impl->ssrc,
 				impl->target_buffer, impl->direct_timestamp);
 
-		/* we read from timestamp, keeping target_buffer of data
-		 * in the ringbuffer. */
+		/* Synchronize by setting the read and write indices such that
+		 * the read index is ahead of the write index by exactly the
+		 * ringbuffer length, and the write index equals the timestamp
+		 * of the current RTP packet. */
 		impl->ring.readindex = timestamp;
 		impl->ring.writeindex = write;
 		filled = impl->target_buffer;
@@ -374,26 +669,34 @@ static int rtp_audio_receive(struct impl *impl, uint8_t *buffer, ssize_t len,
 		spa_dll_init(&impl->dll);
 		spa_dll_set_bw(&impl->dll, SPA_DLL_BW_MIN, 128, impl->rate);
 		memset(impl->buffer, 0, impl->buffer_size);
+
+		reset_rtp_audio_codec(impl, "(re)resynchronization");
+
 		impl->have_sync = true;
 	} else if (expected_write != write) {
 		pw_log_debug("unexpected write (%u != %u)",
 				write, expected_write);
 	}
 
-	/* Write overrun only makes sense in constant delay mode. See the
-	 * RTP source module documentation and the rtp_audio_process_playback()
-	 * code for an explanation why. */
-	if (!impl->direct_timestamp && (filled + samples > impl->buffer_size / stride)) {
-		pw_log_debug("receiver write overrun %u + %u > %u", filled, samples,
+	if (!impl->direct_timestamp && (filled + num_samples > impl->buffer_size / stride)) {
+		/* In constant delay mode, the goal is to keep the buffer fill
+		 * level at a fixed level. If it goes above or below that, rate
+		 * matching is used in rtp_audio_process_playback() to drive
+		 * the fill level to that target value. If however the write side
+		 * (that is, this function here) reaches a write overrun, it cannot
+		 * insert any more samples and rely on that rate matching to
+		 * compensate (there's no more room in the ringbuffer). A hard
+		 * resync is needed in such a case. */
+		pw_log_debug("receiver write overrun %u + %u > %u", filled, num_samples,
 				impl->buffer_size / stride);
 		impl->have_sync = false;
 	} else {
-		pw_log_trace("got samples:%u", samples);
+		pw_log_trace("got %" PRIu32 " samples", num_samples);
 		spa_ringbuffer_write_data(&impl->ring,
 				impl->buffer,
 				impl->actual_max_buffer_size,
 				((uint64_t)write * stride) % impl->actual_max_buffer_size,
-				&buffer[hlen], (samples * stride));
+				samples, (num_samples * stride));
 
 		/* Only update the write index if data was actually _appended_.
 		 * If packets arrived out of order, then it may be that parts
@@ -426,7 +729,7 @@ static int rtp_audio_receive(struct impl *impl, uint8_t *buffer, ssize_t len,
 		 * In unsigned arithmetic, if write + samples exceeds UINT32_MAX,
 		 * it wraps around to a smaller value. We detect this by checking
 		 * if new_write < write (which can only happen on overflow). */
-		const uint32_t new_write = write + samples;
+		const uint32_t new_write = write + num_samples;
 		const bool wrapped_around = new_write < write;
 
 		/* Determine if new_write is ahead of expected_write.
@@ -469,6 +772,8 @@ static void rtp_audio_send_packets(struct impl *impl, uint32_t timestamp, uint32
 	struct iovec iov[3];
 	struct rtp_header header;
 	uint32_t stride;
+	bool do_send = true;
+	int ret;
 
 	stride = impl->stride;
 
@@ -498,12 +803,84 @@ static void rtp_audio_send_packets(struct impl *impl, uint32_t timestamp, uint32
 			((uint64_t)timestamp * stride) % impl->actual_max_buffer_size,
 			&iov[1], tosend * stride);
 
-		pw_log_trace_fp("sending %d packet:%d ts_offset:%d timestamp:%u (%f s)",
-				tosend, num, impl->ts_offset, timestamp,
+		pw_log_trace_fp("sending %d packet:%d seq: %" PRIu16 " ts_offset:%d timestamp:%u (%f s)",
+				tosend, num, seq, impl->ts_offset, timestamp,
 				(double)timestamp * impl->io_position->clock.rate.num /
 				impl->io_position->clock.rate.denom);
 
-		rtp_stream_call_send_packet(impl, iov, 3);
+		if (impl->audio_codec != NULL) {
+			/* In here, the ring buffer that iov points to is redirected
+			 * to be encoded, and the encoder's output is then set as the
+			 * new iov content. */
+
+			uint8_t *samples_to_encode = NULL;
+			uint8_t *encoded_data = NULL;
+			size_t encoded_data_size = 0;
+
+			/* The audio codec expects one contiguous data block with impl->psamples
+			 * samples. In case of a ring buffer wrap around, copy the audio data
+			 * into the staging buffer, since the encoder cannot handle the wrap
+			 * around on its own. And if there are fewer than psamples samples,
+			 * copy what's available into the staging buffer and zero-stuff the rest
+			 * of its space to be able to give the encoder the required amount of
+			 * data to encode. */
+			spa_assert((iov[1].iov_len + iov[2].iov_len) == (tosend * stride));
+			if (iov[2].iov_len != 0) {
+				/* Copy the audio data, taking wrap around into account. */
+				memcpy(impl->audio_encoder_staging_buffer, iov[1].iov_base, iov[1].iov_len);
+				memcpy(impl->audio_encoder_staging_buffer + iov[1].iov_len, iov[2].iov_base, iov[2].iov_len);
+				/* Zero-pad the unused trailing portions of the buffer. */
+				if ((uint32_t)tosend < impl->psamples) {
+					memset(impl->audio_encoder_staging_buffer + iov[1].iov_len + iov[2].iov_len,
+					       0, (impl->psamples - tosend) * stride);
+				}
+
+				samples_to_encode = impl->audio_encoder_staging_buffer;
+			} else if ((uint32_t)tosend < impl->psamples) {
+				/* Copy the audio data. */
+				memcpy(impl->audio_encoder_staging_buffer, iov[1].iov_base, iov[1].iov_len);
+				/* Zero-pad the unused trailing portions of the buffer. */
+				memset(impl->audio_encoder_staging_buffer + iov[1].iov_len,
+				       0, (impl->psamples - tosend) * stride);
+
+				samples_to_encode = impl->audio_encoder_staging_buffer;
+			} else {
+				/* No wrap around happening, and no zero padding necessary.
+				 * The audio data can be directly fed into the encoder. */
+				samples_to_encode = iov[1].iov_base;
+			}
+
+			ret = impl->audio_codec->encode(&(impl->audio_codec_context),
+				samples_to_encode, &encoded_data, &encoded_data_size);
+
+			if (SPA_LIKELY(ret == 0)) {
+				/* Tweak iov to get the actual RTP payload from that single staging
+				 * buffer instead of from the ring buffer directly. (The iov_base
+				 * value of iov[2] is still set to encoded_data, even though its
+				 * iov_len is 0, since it is not sure if setting its iov_base
+				 * pointer to NULL is valid. iov_len 0 _is_ valid, and causes
+				 * POSIX calls to ignore that iovec item. */
+				iov[1].iov_base = encoded_data;
+				iov[1].iov_len = encoded_data_size;
+				iov[2].iov_base = encoded_data;
+				iov[2].iov_len = 0;
+				do_send = true;
+			} else {
+				/* Normally, an encode() error would require shutting down the
+				 * encoder. However, this is not feasible here, since this code
+				 * runs in the data loop thread. Furthermore, if the errors keep
+				 * occurring, this would lead to a constantly reinitializing
+				 * encoder. There is no discernible way to communicate the error
+				 * to the application either. Thus, in case of an error, skip
+				 * the send, and log it. */
+				pw_log_error("could not encode audio for packet with seqnum %" PRIu16 ": %s",
+					     seq, spa_strerror(ret));
+				do_send = false;
+			}
+		}
+
+		if (SPA_LIKELY(do_send))
+			rtp_stream_call_send_packet(impl, iov, 3);
 
 		seq++;
 		first = false;
diff --git a/src/modules/module-rtp/jitter-buffer.c b/src/modules/module-rtp/jitter-buffer.c
new file mode 100644
index 000000000..34a8fc34f
--- /dev/null
+++ b/src/modules/module-rtp/jitter-buffer.c
@@ -0,0 +1,1057 @@
+/* PipeWire */
+/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */
+/* SPDX-License-Identifier: MIT */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+
+#include <arpa/inet.h>
+
+#include <spa/utils/defs.h>
+#include <spa/utils/result.h>
+
+#include <pipewire/log.h>
+
+#include <module-rtp/rtp.h>
+#include <module-rtp/jitter-buffer.h>
+
+PW_LOG_TOPIC_EXTERN(mod_topic);
+#define PW_LOG_TOPIC_DEFAULT mod_topic
+
+/* RTP jitter buffer design overview
+ *
+ * NOTE: For basic information about what the jitter buffer does and how it
+ * is used, read through the rtp_jitter_buffer struct documentation first.
+ *
+ * The jitter buffer consists of slots and the valid seqnum window. Slots
+ * are abstract entities, and exist as items in the slots array. This array
+ * contains num_slots items. Valid slots contain elements; these are packets
+ * or gaps. The slot element_type is then set accordingly either to
+ * RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET or RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP.
+ * (See the rtp_jitter_buffer struct documentation for what "valid" means.)
+ * If a slot contains a packet, then the packet data is stored in the
+ * packet_buffer, as described in the rtp_jitter_buffer struct documentation,
+ *
+ * A "slot index" goes from 0 to (num_slots-1) and is used for accessing
+ * the slot array. Slot indices are mapped to sequence numbers through modulo
+ * arithmetic:
+ *
+ *   slot_index = seqnum mod num_slots
+ *
+ * This works reliably because the valid seqnum window establishes the
+ * range of valid sequence numbers, and because that window can at most
+ * be of size num_slots. Thus, even if a much older or much newer packet
+ * (that is, a packet with much smaller or larger seqnum than the ones
+ * encountered thus far) arrives, there is no danger of that modulo
+ * arithmetic incorrectly aliasing slots, since the valid seqnum window
+ * will take care of validating the seqnums first.
+ *
+ * In RTP, since sequence numbers are unsigned 16-bit integers, the integer
+ * wrap around needs to be addressed. Packet sequence number 65535 can be
+ * reached in real world scenarios depending on the packet duration. For
+ * example, if a packet covers 1 ms, then after ~66 seconds, sequence
+ * number 65535 will be reached. For this reason, the valid seqnum window's
+ * start and length quantities use uint32_t as type - it factors out the
+ * wrap-around in certain calculations, which make them easier.
+ *
+ * The wrap around behavior also makes it non-trivial to calculate sequence
+ * number deltas correctly. For this reason, the calculate_seqnum_delta()
+ * utility function is used for seqnum deltas.
+ *
+ * In regular mode, incoming packets all have had the expected monotonically
+ * incrementing sequence number. (A sequence number wrap around technically
+ * means that they do not monotonically increment, but this is omitted here,
+ * because the wrap around is a numerical limitation, and it is handled
+ * as a continuation of a packet seqnum increment.) In this mode, the packet
+ * data, packet size, header size, timestamp are forwarded immediately to the
+ * output_rtp_packet() function pointer without copying the packet data. This
+ * improves performance during regular mode, which is the most common mode
+ * the jitter buffer will be in, unless the network quality is very poor. The
+ * slots and the valid seqnum window have no meaning in this mode.
+ *
+ * When the jitter buffer expects seqnum X, but sees seqnum X-N (that is,
+ * an older seqnum), the incoming packet with seqnum X-N is dropped, and
+ * regular mode continues. If the packet's seqnum instead is X+N, it means
+ * that there is a gap. For example, if the last packet had seqnum 200,
+ * the jitter buffer expects the next packet to have seqnum 201. If the
+ * next packet instead has seqnum 202, then there is a gap at 201. The
+ * jitter buffer switches to hold-back mode, and establishes a valid seqnum
+ * window that starts at 201 and is of length 2. That is, it covers the gap
+ * and the new packet. The gap is registered at slot (201 % num_slots), the
+ * packet is inserted in slot (202 % num_slots). Suppose that num_slots is 10.
+ * Then, the gap will be registered at slot 201 % 10 = 1, and the packet 202
+ * will be stored at slot 202 % 10 = 2. To be more specific, item #1 in the
+ * slots array will have its type set to RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP,
+ * and item #2 will have its type set to RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET.
+ * The size of packet 202 will be stored in item #2 in the slots array.
+ * (Item #1 in the slots array will not have a packet size set, since a gap
+ * has no "size" in bytes.)
+ * Packet 202's data will be copied into packet_buffer, at location
+ * (max_packet_size * 2), since the slot index is 2. That way, slot 1 is set
+ * to contain a gap at seqnum 201, and slot 2 is set to contain a packet 202
+ * with the specified packet data, packet size, header size, timestamp.
+ *
+ * In hold-back mode, every time a packet is inserted, the jitter buffer will
+ * check if the oldest N valid slots contain packets (and no gaps). The "oldest"
+ * slot is the one whose associated sequence number equals the value of
+ * valid_seqnum_window_start_seqnum. The second oldest slot's associated sequence
+ * number equals (valid_seqnum_window_start_seqnum+1) etc. If the oldest N slots
+ * indeed contain purely packets and no gaps (that is, N > 0), the jitter buffer
+ * will output the packets from those N oldest slots, in order (the packet from
+ * the oldest slot is output first, then comes the packet from the second oldest
+ * slot etc.). Then, the valid_seqnum_window_start_seqnum value is shifted forwards
+ * by N, since those packets from the oldest N slots are no longer held back.
+ * Accordingly, the valid_seqnum_window_length value is decremented by N. If there
+ * are still gaps, it means there are packets behind the gaps. In such a case,
+ * valid_seqnum_window_length will be nonzero even after decrementing it. But if
+ * this output step did emit all of the remaining held back packets, it implies
+ * that there is nothing left in the slots, nothing is being held back, so the
+ * jitter buffer switches back to regular mode.
+ *
+ * When a packet comes in whose sequence number is older than the start of the
+ * valid seqnum window, it is dropped, just like in the regular mode. If however
+ * the sequence number goes past the window, it effectively extends the window.
+ * Suppose that the window starts at seqnum 200, and is of length 5. This means
+ * that currently, all seqnums from 200 to 204 (both inclusive) are valid. But
+ * then, a packet with seqnum 209 comes in. This requires extending the window
+ * from length 5 to length 10, such that it covers all seqnums from 200 to 209.
+ * This step of course can introduce gaps. In this example, a new gap is added
+ * that starts at seqnum 205 and is of length 4, since the gap extends all the
+ * way to seqnum 208. The four slots that are associated with seqnums 205, 206,
+ * 207, 208 are all set to contain gaps.
+ *
+ * However, the window cannot be extended indefinitely. If it is extended past
+ * num_slots, it is referred to as "overextended". Such a window cannot remain like
+ * this, because there are not enough slots to store all the elements it covers.
+ * In such a case, the jitter buffer will try to shift the window forward. If
+ * this shift amount is small enough, then the newest N of the currently valid
+ * slots remain in the window, and the oldest (num_slots-N) slots need to be
+ * drained. To continue with the earlier example, suppose that num_slots is 8.
+ * This means that packet 209 would overextend the window by 2 slots. The
+ * jitter buffer then shifts the window such that it starts at 202 and is of
+ * length 8 (that is, the num_slots amount). Slots associated with seqnums that
+ * go from 202 to 204 remain within the window, but slots associated with seqnums
+ * 200 and 201 are not, and thus need to be drained.
+ *
+ * It is possible that the window is shifted so far ahead that _none_ of the
+ * currently valid slots remain in the window. In such a case, the window is
+ * reset to contain the newly arrived packet and to be of length 1. Since this
+ * is a case in which (as explained above), the jitter buffer sees that the
+ * oldest N packets (N being 1 in this case) have no gaps in between them, this
+ * means that the newly arrived packet is immediately output. And since after
+ * that, no held-back packets remain, the jitter buffer switches back to
+ * regular mode.
+ *
+ * When slots are drained, it means that the jitter buffer looks at the elements
+ * inside them, and calls output_rtp_packet() or signal_lost_packets(). Draining
+ * does aggregate gaps to avoid calling signal_lost_packets() often. Only valid
+ * slots (that is, slots whose associated sequence numbers lie within the valid
+ * seqnum window) are drained.
+ * For example, suppose that within the window, there is a gap at seqnum 201,
+ * a packet at seqnum 202, another one at seqnum 203, a gap that starts at seqnum
+ * 204 and goes until seqnum 208, and then a final packet at 209. Draining will
+ * cause the following sequence of calls (in this order):
+ *
+ * 1. signal_lost_packets() , with gap at seqnum 201, or length 1
+ * 2. output_rtp_packet() , with data of packet at seqnum 202
+ * 3. output_rtp_packet() , with data of packet at seqnum 203
+ * 4. signal_lost_packets() , with gap at seqnum 204, or length 5
+ * 5. output_rtp_packet() , with data of packet at seqnum 209
+ *
+ * A partial drain drains only the oldest N slots in the window, and held-back
+ * packets remain. A full drain drains all slots in the entire window -
+ * nothing remains. In the full drain case, the jitter buffer switches back
+ * to regular mode afterwards.
+ *
+ * A full drain is automatically done if the hold-back mode persists for some
+ * time. This is a case where there is a gap, but the associated packet does
+ * not arrive in time (or not at all). The timeout_timer exists for this purpose;
+ * if it expires, it automatically does a full drain on the jitter buffer.
+ * That timer is reset when the window is shifted due to overextension, and
+ * is disabled when switching back to regular mode.
+ *
+ * In cases where draining is done because of a window overextension, the jitter
+ * buffer will signal packet loss after the drain if there is a gap in between
+ * where the window used to be and where it now is after the shift. If for example
+ * the window previously starts at seqnum 200 with length 4, and now starts at
+ * seqnum 210 with length 10, there is a gap from 201 to 209 (both inclusive).
+ * The jitter buffer will announce that via signal_lost_packets(). If however
+ * that gap is larger than num_slots, then the jitter buffer calls that function
+ * pointer with open_ended set to true. Such an "open ended gap" is a gap that
+ * is actually larger than num_slots, but is truncated to that length, to prevent
+ * PLC measures from having to produce an excessive amount of data. open_ended
+ * being set to true allows the signal_lost_packets() calback to apply additional
+ * measures, such as a fadeout, to cleanly terminate its PLC for the open gap. This
+ * might be necessary if PLC measures otherwise never reach silence on their own.
+ *
+ * These mechanisms allow for partial outputs in cases where there are multiple
+ * gaps in the slots, and also switches back to regular mode immediately once
+ * everything has been output or the jitter buffer is fully drained.
+ *
+ * Packet reordering is done implicitly, since when inserting packets, it is done
+ * so according to their seqnum, but packet output is done in order of their
+ * storage in the slots (the "oldest" slots, that is, slots associated with the
+ * oldest valid seqnums within the window are output first).
+ *
+ * Note that slots that contain gaps are not explicitly marked as containing gaps.
+ * Instead, all items in the slots array have their element type initially set to
+ * RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP. Later, when packets are inserted, these
+ * types are overwritten with RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET. And, when
+ * packets are output, the associated slot array items have their types set back
+ * to RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP. This is because gaps are fundamentally
+ * implicit - they do not exist as their own entities (unlike packets). */
+
+static int arm_timeout_timer(struct rtp_jitter_buffer *jitter_buffer);
+static void disarm_timeout_timer(struct rtp_jitter_buffer *jitter_buffer);
+static void on_timeout_expiration(void *data, uint64_t expirations);
+
+static inline size_t seqnum_to_slot_index(struct rtp_jitter_buffer *jitter_buffer,
+					  uint16_t seqnum);
+
+static void store_packet(struct rtp_jitter_buffer *jitter_buffer,
+	const uint8_t *packet_data, size_t packet_size, size_t header_size,
+	uint32_t timestamp, uint16_t seqnum, bool check_for_duplicates);
+
+static int do_drain(struct rtp_jitter_buffer *jitter_buffer,
+		    size_t num_oldest_slots_to_drain);
+
+int rtp_jitter_buffer_init(struct rtp_jitter_buffer *jitter_buffer, struct rtp_jitter_buffer_params *params)
+{
+	int ret = 0;
+	size_t idx;
+
+	spa_assert(jitter_buffer != NULL);
+	spa_assert(!jitter_buffer->initialized);
+	spa_assert(params != NULL);
+	spa_assert(params->num_slots > 0);
+	spa_assert(params->max_packet_size > 0);
+	spa_assert(params->packet_duration > 0);
+	spa_assert(params->loop != NULL);
+	spa_assert(params->output_rtp_packet != NULL);
+	spa_assert(params->signal_lost_packets != NULL);
+
+	spa_memzero(jitter_buffer, sizeof(struct rtp_jitter_buffer));
+	memcpy(&(jitter_buffer->params), params, sizeof(struct rtp_jitter_buffer_params));
+
+	/* Set the flag here already to make sure that in case
+	 * of errors, rtp_jitter_buffer_shutdown() correctly
+	 * cleans up any partial initialization. */
+	jitter_buffer->initialized = true;
+
+	jitter_buffer->slots = calloc(params->num_slots, sizeof(struct rtp_jitter_buffer_slot));
+	if (jitter_buffer->slots == NULL) {
+		ret = -ENOMEM;
+		pw_log_error("Could not allocate memory for slots array: %m");
+		goto error;
+	}
+
+	jitter_buffer->packet_buffer = calloc(params->num_slots, params->max_packet_size);
+	if (jitter_buffer->packet_buffer == NULL) {
+		ret = -ENOMEM;
+		pw_log_error("Could not allocate memory for packet buffer: %m");
+		goto error;
+	}
+
+	for (idx = 0; idx < jitter_buffer->params.num_slots; ++idx)
+		jitter_buffer->slots[idx].element_type = RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP;
+
+	jitter_buffer->hold_back_mode = false;
+	jitter_buffer->last_seqnum = -1;
+
+	/* Synchronize loop access. See the spa_loop_methods
+	 * documentation; the add_source() documentation states:
+	 * "Must be called from the loop's own thread." By extension,
+	 * this also applies to pw_loop_add_timer(). */
+	pw_loop_lock(jitter_buffer->params.loop);
+	jitter_buffer->timeout_timer = pw_loop_add_timer(params->loop, on_timeout_expiration,
+		jitter_buffer);
+	pw_loop_unlock(jitter_buffer->params.loop);
+
+	if (jitter_buffer->timeout_timer == NULL) {
+		ret = -errno;
+		pw_log_error("Could not create timeout timer: %m");
+		goto error;
+	}
+
+	pw_log_info("Initialized RTP jitter buffer: num slots: %zu; max packet size: %zu bytes; "
+		    "packet duration: %" PRIu64 " ns; total capacity duration: %" PRIu64 " ns",
+		    params->num_slots, params->max_packet_size, params->packet_duration,
+		    params->packet_duration * params->num_slots);
+
+finish:
+	return ret;
+
+error:
+	rtp_jitter_buffer_shutdown(jitter_buffer);
+	goto finish;
+}
+
+void rtp_jitter_buffer_shutdown(struct rtp_jitter_buffer *jitter_buffer)
+{
+	spa_assert(jitter_buffer != NULL);
+
+	if (!jitter_buffer->initialized)
+		return;
+
+	if (jitter_buffer->timeout_timer != NULL) {
+		/* Synchronize loop access. See the spa_loop_methods
+		 * documentation; the remove_source() documentation states:
+		 * "Must be called from the loop's own thread." By extension,
+		 * this also applies to pw_loop_destroy_source(). */
+		pw_loop_lock(jitter_buffer->params.loop);
+		/* Note that this also internally detaches the source from the
+		 * loop, and thus, any previously queued timeout callbacks are
+		 * no longer invoked once this call ends.
+		 * (For more, see the detach_source(), loop_destroy_source(),
+		 * and loop_iterate() functions in spa/plugins/support/loop.c ,
+		 * particularly how they handle ep[i].data . Also see how
+		 * remove_from_poll() - called by loop_destroy_source() - and
+		 * loop_iterate() handle remove_count .) */
+		pw_loop_destroy_source(jitter_buffer->params.loop, jitter_buffer->timeout_timer);
+		pw_loop_unlock(jitter_buffer->params.loop);
+	}
+
+	free(jitter_buffer->slots);
+	free(jitter_buffer->packet_buffer);
+
+	jitter_buffer->initialized = false;
+
+	pw_log_info("RTP jitter buffer shut down");
+}
+
+int rtp_jitter_buffer_insert_packet(struct rtp_jitter_buffer *jitter_buffer,
+				    const uint8_t *packet_data, size_t packet_size,
+				    size_t header_size, uint32_t timestamp, uint16_t seqnum)
+{
+	struct rtp_jitter_buffer_params *params;
+	int ret = 0;
+	size_t slot_index;
+	int16_t seqnum_delta;
+
+	spa_assert(jitter_buffer != NULL);
+	spa_assert(jitter_buffer->initialized);
+	spa_assert(packet_data != NULL);
+	spa_assert(header_size <= packet_size);
+
+	params = &(jitter_buffer->params);
+
+	spa_assert(packet_size <= params->max_packet_size);
+
+	pw_log_trace("Got packet with size %zu and seqnum %" PRIu16 "; valid seqnum window: "
+		     "start seqnum / length: %" PRIu32 " / %" PRIu32, packet_size, seqnum,
+		     jitter_buffer->valid_seqnum_window_start_seqnum,
+		     jitter_buffer->valid_seqnum_window_length);
+
+	if (jitter_buffer->hold_back_mode) {
+		size_t idx = 0;
+		size_t num_oldest_slots_with_packets;
+
+		spa_assert(jitter_buffer->last_seqnum >= 0);
+
+		seqnum_delta = calculate_seqnum_delta(jitter_buffer->valid_seqnum_window_start_seqnum, seqnum);
+
+		if (seqnum_delta < 0) {
+			/* In hold-back mode, sequence numbers at or after the sequence
+			 * number stored in valid_seqnum_window_start_seqnum are of interest.
+			 * Sequence numbers older than that are stale and to be discarded.
+			 * See the overview at the top of this file for details. */
+			pw_log_info("Dropping packet with stale or duplicate sequence number %"
+				PRId32, seqnum);
+			goto finish;
+		} else if ((size_t)seqnum_delta < params->num_slots) {
+			/* Packet is not stale, and fits within the capacity of the jitter
+			 * buffer, that is, its sequence number is not too far ahead of the
+			 * valid_seqnum_window_start_seqnum. Insert it into the buffer
+			 * according to its sequence number.
+			 *
+			 * In case its sequence number is further ahead than the current
+			 * valid seqnum window (but still fits within the jitter buffer
+			 * capacity), it means that this packet actually extends the window
+			 * when inserted into the buffer, and it does so this way:
+			 *
+			 * window_length = MAX(window_length, seqnum - window_start_seqnum + 1)
+			 *
+			 * (seqnum - window_start_seqnum) is stored in seqnum_delta, so this
+			 * becomes:
+			 *
+			 * window_length = MAX(window_length, seqnum_delta + 1)
+			 *
+			 * (The +1 is there because the length stores a size-like quantity,
+			 * so it must be added to avoid an off-by-one error.)
+			 *
+			 * Also see the overview at the top. */
+
+			jitter_buffer->valid_seqnum_window_length = SPA_MAX(
+				jitter_buffer->valid_seqnum_window_length, ((size_t)seqnum_delta) + 1);
+
+			store_packet(jitter_buffer, packet_data, packet_size, header_size,
+				     timestamp, seqnum, true);
+		} else {
+			/* The packet's sequence number is beyond the valid seqnum window,
+			 * and trying to extend the window to encompass that packet would
+			 * overextend it, that is, its length would exceed num_slots.
+			 *
+			 * (See the overview at the top.)
+			 *
+			 * Check by how much the window would be overextended. If the
+			 * overextension is less than the valid seqnum window length, then
+			 * some of the currently valid slots would remain valid after shifting
+			 * the window. Specifically, the last (valid_seqnum_window_length -
+			 * overextension_amount) slots remain valid, while the ones before that
+			 * need to be drained. The result is a valid seqnum window that has been
+			 * shifted forward by valid_seqnum_window_length, with a length that
+			 * equals the num_slots amount.
+			 *
+			 * seqnum_delta+1 equals the amount of slots from the start of the
+			 * window to the new packet (+1 since the new packet itself is included).
+			 * Since by now, we know that the window would be overextended, this
+			 * implies that (seqnum_delta+1) > num_slots.
+			 *
+			 * The overextension amount therefore is:
+			 *
+			 *   overextension_amount = (seqnum_delta+1) - num_slots
+			 *
+			 * This is also the number of oldest slots that need to be drained at the
+			 * start of the valid seqnum window before shifting it. If encompassing
+			 * the new packet increases the window by overextension_amount, draining
+			 * those oldest slots decreases it again by overextension_amount, resulting
+			 * in a window length equaling num_slots.
+			 *
+			 * If however the overextension is equal to or larger than the valid seqnum
+			 * window length, then none of the slots within the window remain valid.
+			 * It therefore makes no sense then to try to keep them around, so the
+			 * jitter buffer is fully drained before inserting the new packet. Also,
+			 * this means that afterwards, the slot that contains the new packet is
+			 * the only remaining one, that is, the window length is 1. */
+
+			size_t overextension_amount;
+			bool window_fully_invalid;
+			size_t num_oldest_slots_to_drain;
+			bool open_ended = false;
+
+			overextension_amount = ((size_t)seqnum_delta) + 1 - params->num_slots;
+			window_fully_invalid = (overextension_amount >= jitter_buffer->valid_seqnum_window_length);
+			num_oldest_slots_to_drain = SPA_MIN(overextension_amount,
+				                            jitter_buffer->valid_seqnum_window_length);
+
+			pw_log_debug("Packet with seqnum %" PRIu16 " exceeds capacity of jitter buffer "
+				     "in hold-back mode; window shift amount: %zu window fully invalid: %d "
+				     "num oldest slots to drain: %zu", seqnum, overextension_amount,
+				     window_fully_invalid, num_oldest_slots_to_drain);
+
+			if (num_oldest_slots_to_drain > 0) {
+				ret = do_drain(jitter_buffer, num_oldest_slots_to_drain);
+				if (SPA_UNLIKELY(ret < 0)) {
+					ret = -EIO;
+					goto finish;
+				}
+			}
+
+			/* If the window was fully invalidated (= drained), insert a packet loss
+			 * signal to allow callers to append some sort of PLC / fadeout to the
+			 * drained data. This is helpful for avoiding hard cutoffs in the output. */
+			if (window_fully_invalid) {
+				ssize_t gap_length;
+				uint16_t one_past_last_valid_seqnum;
+
+				one_past_last_valid_seqnum = jitter_buffer->valid_seqnum_window_start_seqnum +
+							     jitter_buffer->valid_seqnum_window_length;
+				gap_length = calculate_seqnum_delta(one_past_last_valid_seqnum, seqnum);
+				if (SPA_UNLIKELY(gap_length < 0)) {
+					/* This would indicate a serious error in the calculations,
+					 * so log it accordingly. */
+					pw_log_error("Negative packet loss amount %zd detected; valid seqnum "
+						     "window start seqnum / length: %" PRIu32 " / %" PRIu32 "; "
+						     "using packet loss amount 0 instead",
+						     gap_length, jitter_buffer->valid_seqnum_window_start_seqnum,
+						     jitter_buffer->valid_seqnum_window_length);
+					gap_length = 0;
+				}
+
+				/* If the gap exceeds the capacity of the jitter buffer, then limit the
+				 * packet loss amount and consider the gap open ended to avoid cases where
+				 * callers otherwise would have to conceal an excessive amount of packet loss. */
+				if ((size_t)gap_length > params->num_slots) {
+					open_ended = true;
+					gap_length = params->num_slots;
+				}
+
+				if (gap_length > 0) {
+					if (open_ended) {
+						pw_log_debug("Signaling packet loss, starting at seqnum %"
+							     PRIu16 ", gap length %zd, open ended",
+							     one_past_last_valid_seqnum, gap_length);
+					} else {
+						pw_log_debug("Signaling packet loss, starting at seqnum %"
+							     PRIu16 ", gap length %zd, not open ended",
+							     one_past_last_valid_seqnum, gap_length);
+					}
+					if ((ret = params->signal_lost_packets(params->context,
+						one_past_last_valid_seqnum, (size_t)gap_length, open_ended)) != 0) {
+						pw_log_error("Could not signal lost RTP packet: %s", spa_strerror(ret));
+						goto finish;
+					}
+				}
+			}
+
+			/* As explained in the overview, a shift that fully invalidates
+			 * the window is a different case than one that retains entries
+			 * from the old window. */
+			if (window_fully_invalid) {
+				jitter_buffer->valid_seqnum_window_start_seqnum = seqnum;
+				jitter_buffer->valid_seqnum_window_length = 1;
+			} else {
+				/* If the window is not fully invalid, then figure out
+				 * its new start_seqnum by moving backwards. A not fully
+				 * invalid window is of length num_slots (see the overview
+				 * for why), and ends at the sequence number of the new
+				 * packet. Therefore, it starts at (seqnum +1 - num_slots).
+				 * (+1 since the new packet itself also has to be factored in.) */
+				jitter_buffer->valid_seqnum_window_start_seqnum = seqnum + 1 - params->num_slots;
+				jitter_buffer->valid_seqnum_window_length = params->num_slots;
+			}
+			pw_log_debug("Reset valid seqnum window to start at seqnum %" PRIu32 " and be of length %"
+				PRIu32, jitter_buffer->valid_seqnum_window_start_seqnum,
+				jitter_buffer->valid_seqnum_window_length);
+
+			/* Now write this new packet into the packet buffer to hold
+			 * it back until the gap before it is filled. */
+			store_packet(jitter_buffer, packet_data, packet_size, header_size,
+				     timestamp, seqnum, false);
+
+			/* NOTE: This assumes that spa_system_timerfd_settime() behaves just
+			 * like timerfd_settime() in the sense that it clears any timer
+			 * expirations that haven't been noticed yet. In other words, even
+			 * if the timer expired already, rearming by calling
+			 * spa_system_timerfd_settime() is assumed to implicitly wipe
+			 * these prior expirations.
+			 *
+			 * This is precisely what timerfd_settime() does. From its manpage:
+			 *
+			 * > If the timer has already expired one or more times ***since its
+			 * > settings were last modified using timerfd_settime()***, or since
+			 * > the last successful read(2), then the buffer given to read(2)
+			 * > returns an unsigned 8-byte integer (uint64_t) containing the
+			 * > number of expirations that have occurred.
+			 *
+			 * And this is important to make sure that, should the timer expire while
+			 * we are here, it does not lead to an immediate jitter buffer draining.
+			 */
+			if (ret == 0) {
+				pw_log_debug("Rearming timeout timer as part of resetting hold-back mode");
+				ret = arm_timeout_timer(jitter_buffer);
+				if (SPA_UNLIKELY(ret < 0)) {
+					pw_log_error("Could not arm timer: %s", spa_strerror(ret));
+					ret = -EINVAL;
+				}
+			} else {
+				pw_log_warn("Not rearming timeout timer due to earlier error while draining");
+			}
+
+			/* Next, let the logic below check the contents of the new,
+			 * updated valid seqnum window. */
+		}
+
+		/* After inserting the packet, go through the valid window and check if
+		 * the oldest N slots contain packets without gaps in between them. First,
+		 * N (= num_oldest_slots_with_packets) has to be determined. Iterate over
+		 * the slots, starting at the valid seqnum window start, until either,
+		 * a slot with a gap inside is encountered, or the end of the window is
+		 * reached. That way, the number of oldest N held back packets that present
+		 * in an uninterrupted sequence in the oldest N slots and thus can be output
+		 * is determined. */
+		for (idx = 0; idx < jitter_buffer->valid_seqnum_window_length; ++idx) {
+			uint16_t item_seqnum = jitter_buffer->valid_seqnum_window_start_seqnum + idx;
+			slot_index = seqnum_to_slot_index(jitter_buffer, item_seqnum);
+			if (jitter_buffer->slots[slot_index].element_type !=
+			    RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET)
+				break;
+		}
+		num_oldest_slots_with_packets = idx;
+
+		if (num_oldest_slots_with_packets > 0) {
+			pw_log_debug("The first %zu slots in valid seqnum window all contain "
+				     "packets - these packets can be output immediately",
+				     num_oldest_slots_with_packets);
+
+			/* Now output the packets. */
+			for (idx = 0; idx < num_oldest_slots_with_packets; ++idx) {
+				struct rtp_jitter_buffer_slot *slot;
+				uint16_t item_seqnum;
+				const uint8_t *packet_data_to_output;
+
+				/* This implicitly does the sequence number wrap-around
+				 * by storing the value in uint16_t. */
+				item_seqnum = jitter_buffer->valid_seqnum_window_start_seqnum + idx;
+
+				slot_index = seqnum_to_slot_index(jitter_buffer, item_seqnum);
+				slot = &(jitter_buffer->slots[slot_index]);
+
+				packet_data_to_output = jitter_buffer->packet_buffer +
+					params->max_packet_size * slot_index;
+
+				ret = params->output_rtp_packet(params->context, packet_data_to_output,
+					slot->packet_size, slot->header_size, slot->timestamp, item_seqnum);
+				if (SPA_UNLIKELY(ret < 0)) {
+					pw_log_error("Could not output RTP packet: %s", spa_strerror(ret));
+					ret = -EIO;
+					goto finish;
+				}
+
+				slot->element_type = RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP;
+			}
+
+			/* Since num_oldest_slots_with_packets is the result of a for-loop that counts up
+			 * to valid_seqnum_window_length, this assertion should always hold. */
+			spa_assert(jitter_buffer->valid_seqnum_window_length >= num_oldest_slots_with_packets);
+
+			/* Move the window start further to move past the slots whose packets
+			 * were just output. This also reduces the valid seqnum window length. */
+			jitter_buffer->valid_seqnum_window_start_seqnum += num_oldest_slots_with_packets;
+			jitter_buffer->valid_seqnum_window_length -= num_oldest_slots_with_packets;
+
+			pw_log_trace("%zu packet(s) output in hold-back mode; valid seqnum window "
+				     "is now: start seqnum / length: %" PRIu32 " / %" PRIu32,
+				     num_oldest_slots_with_packets,
+				     jitter_buffer->valid_seqnum_window_start_seqnum,
+				     jitter_buffer->valid_seqnum_window_length);
+
+			/* If the valid seqnum window length has become is zero, it means that
+			 * all held-back packets have been output, and no gaps remain.
+			 * This in turn means that we are done - all previously missing packets
+			 * have been received and have been output in order of their sequence
+			 * numbers. Switch back to regular mode and set the seqnum of the last
+			 * output packet the last_seqnum. */
+			if (jitter_buffer->valid_seqnum_window_length == 0) {
+				jitter_buffer->hold_back_mode = false;
+				/* Don't set this to the newly arrived packet's seqnum directly.
+				 * Hold-back mode is active, which means that packets previously
+				 * arrived out of order - so, the new packet too might not have
+				 * arrived in order. */
+				jitter_buffer->last_seqnum = jitter_buffer->valid_seqnum_window_start_seqnum - 1;
+				/* Also cancel the timeout timer to not trigger an erroneous drain. */
+				disarm_timeout_timer(jitter_buffer);
+				pw_log_debug("No more packets held back; switched back to regular mode, "
+					     "last seqnum set to %" PRId32, jitter_buffer->last_seqnum);
+			}
+		}
+	} else {
+		/* Here, we are in regular mode. */
+
+		/* If a preceding sequence number is known, use it to detect gaps. For
+		 * example, last sequence number is 400, this packet's sequence number
+		 * is 402 -> there is a gap at sequence number 401. This can happen due
+		 * to packet loss and out-of-order packet transmission.
+		 *
+		 * (Also see the overview at the top.) */
+		if (jitter_buffer->last_seqnum >= 0) {
+			seqnum_delta = calculate_seqnum_delta(jitter_buffer->last_seqnum, seqnum);
+
+			if (seqnum_delta <= 0) {
+				/* In the regular mode, it is implied that all preceding
+				 * packets were in order of the sequence numbers, and that
+				 * any missing packets have been also announced in-order.
+				 * Therefore, if at this point, the delta is <= 0, it means
+				 * that a packet with a sequence number equal to or lower
+				 * than the previous packet's sequence number arrived
+				 * (taking possible wraparound into account). This packet
+				 * may be a duplicate, and is unusable, so drop it. */
+
+				pw_log_info("Dropping packet with stale sequence number %" PRId32, seqnum);
+				goto finish;
+			} else if (seqnum_delta > 1) {
+				/* If the delta is >1, it indicates a gap. This can happen
+				 * due to out-of-order packets or due to packet loss.
+				 * Activate hold-back mode: Hold back the received packet
+				 * and any further ones until there are consecutive
+				 * sequences of packets available, which can be output.
+				 * And, write information about that packet and the gap into
+				 * the associated slots.
+				 * Also arm the timeout timer in case the missing packets
+				 * do not arrive in time. */
+
+				jitter_buffer->hold_back_mode = true;
+				/* Set the valid seqnum window start to the sequence number that
+				 * was actually expected as the one that follows the last packet.
+				 * Since this place was reached, it implies that the current packet's
+				 * sequence number does not equal to that one, and thus, the packet
+				 * we actually expected is still missing. This is the oldest packet
+				 * we expect here, since packets with a sequence number preceding
+				 * that packet's have been processed already at this point. */
+				jitter_buffer->valid_seqnum_window_start_seqnum =
+					((uint16_t)jitter_buffer->last_seqnum) + 1;
+				/* The current valid seqnum window length equals the seqnum_delta. This
+				 * is because the window goes from valid_seqnum_window_start_seqnum to
+				 * the last slot with a packet in the valid seqnum window (both inclusive).
+				 * For example, if valid_seqnum_window_start_seqnum is initially set
+				 * to 301 here, and the current packet has sequence number 305, it
+				 * means that the window goes from 301 to 305, so it consists of
+				 * 5 slots (the first 4 contain the gap from 301 to 304, and the fifth
+				 * slot contains packet 305).
+				 *
+				 * seqnum_delta equals:
+				 *
+				 *   (this current packet's seqnum - last_seqnum)
+				 *
+				 * Substituting in the seqnum_delta formula, we get:
+				 *
+				 *   valid_seqnum_window_start_seqnum = last_seqnum + 1
+				 *   -> last_seqnum = valid_seqnum_window_start_seqnum - 1
+				 *
+				 * and:
+				 *
+				 *   seqnum_delta =
+				 *   (this current packet's seqnum - last_seqnum) =
+				 *   (this current packet's seqnum - (valid_seqnum_window_start_seqnum - 1)) =
+				 *   (this current packet's seqnum - valid_seqnum_window_start_seqnum + 1)
+				 *
+				 * which is exactly the formula needed to compute a size or length
+				 * from A to B (or, in this case, from valid_seqnum_window_start_seqnum
+				 * to the packet's seqnum). */
+				jitter_buffer->valid_seqnum_window_length = seqnum_delta;
+
+				pw_log_debug("Gap detected starting at seqnum %" PRIu32 ", length %"
+					     PRId16 "; switched to hold-back mode",
+					     jitter_buffer->valid_seqnum_window_start_seqnum,
+					     seqnum_delta - 1);
+
+				if (SPA_UNLIKELY(jitter_buffer->valid_seqnum_window_length > params->num_slots)) {
+					/* It is possible that the sudden gap that leads to the
+					 * activation of the hold-back mode is so big that it
+					 * immediately overextends the valid seqnum window.
+					 * However, unlike in the hold-back cases handled above,
+					 * here, nothing is held back yet, so there are no slots
+					 * that can remain valid. Therefore, this behaves just like
+					 * in the window_fully_invalid == true case further above.
+					 *
+					 * In most cases, the gap is way larger than the number of
+					 * slots. There is a corner case though, and that is when
+					 * the window length equals (num_slots + 1). Keep in mind
+					 * that the window includes the gap _and_ the new packet -
+					 * hence the +1. From this it follows that if the window
+					 * length equals num_slots+1, it means that the actual
+					 * gap is num_slots in length. This is essentially the
+					 * largest possible gap that still isn't open ended. For
+					 * this reason, a check is made to identify this corner case.
+					 *
+					 * In either case, the correct, uninterrupted, in-order
+					 * packets that precede this gap are followed by proper PLC.
+					 * Should the gap be open ended, PLC can be augmented to
+					 * include a fadeout for example.
+					 *
+					 * And, since an overextended window is invalid, and in this
+					 * case, no held-back packets are presents, the valid seqnum
+					 * window is adjusted to only contain the current packet.
+					 *
+					 * After signaling, the jitter buffer switches back to regular
+					 * mode. This is because after the large gap, nothing is held
+					 * back anymore - there is no reason for staying in the
+					 * hold-back mode. */
+
+					bool open_ended = jitter_buffer->valid_seqnum_window_length >
+							  (params->num_slots + 1);
+
+					pw_log_debug("This gap immediately overextends the valid seqnum "
+						     "window; signaling packet loss, starting at seqnum %"
+						     PRIu16 ", gap length %zd, %sopen ended",
+						     jitter_buffer->valid_seqnum_window_start_seqnum,
+						     params->num_slots, open_ended ? "" : "not ");
+
+					if ((ret = params->signal_lost_packets(params->context,
+						jitter_buffer->valid_seqnum_window_start_seqnum,
+						params->num_slots, open_ended)) != 0) {
+						pw_log_error("Could not signal lost RTP packet: %s", spa_strerror(ret));
+						goto finish;
+					}
+
+					pw_log_debug("Switching back to regular mode and emitting packet "
+						     "after the large gap");
+
+					jitter_buffer->hold_back_mode = false;
+					ret = params->output_rtp_packet(params->context, packet_data,
+						packet_size, header_size, timestamp, seqnum);
+					jitter_buffer->last_seqnum = seqnum;
+					goto finish;
+
+				}
+
+				store_packet(jitter_buffer, packet_data, packet_size, header_size,
+					     timestamp, seqnum, false);
+
+				ret = arm_timeout_timer(jitter_buffer);
+				if (SPA_UNLIKELY(ret < 0)) {
+					pw_log_error("Could not arm timer: %s", spa_strerror(ret));
+					ret = -EINVAL;
+				}
+
+				goto finish;
+			}
+		}
+
+		/* If hold-back mode is not active, just forward the packet. */
+		ret = params->output_rtp_packet(params->context, packet_data, packet_size,
+						header_size, timestamp, seqnum);
+
+		jitter_buffer->last_seqnum = seqnum;
+	}
+
+finish:
+	return ret;
+}
+
+int rtp_jitter_buffer_drain(struct rtp_jitter_buffer *jitter_buffer)
+{
+	int ret;
+
+	spa_assert(jitter_buffer != NULL);
+	spa_assert(jitter_buffer->initialized);
+
+	if (!jitter_buffer->hold_back_mode)
+		return 0;
+
+	ret = do_drain(jitter_buffer, jitter_buffer->valid_seqnum_window_length);
+
+	/* Not calling signal_lost_packets with open_ended = true here, since
+	 * that one is meant for cases where mid-stream, the hold-back mode was
+	 * activated, and a sudden large sequence jump occurred. This function
+	 * here however is called externally. */
+
+	jitter_buffer->hold_back_mode = false;
+	jitter_buffer->last_seqnum = -1;
+	pw_log_debug("Switching back to regular mode after explicit drain");
+
+	/* Disarm any potentially ongoing timer since the
+	 * jitter buffer just switched back to regular mode. */
+	disarm_timeout_timer(jitter_buffer);
+
+	return ret;
+}
+
+void rtp_jitter_buffer_flush(struct rtp_jitter_buffer *jitter_buffer)
+{
+	size_t idx;
+
+	spa_assert(jitter_buffer != NULL);
+	spa_assert(jitter_buffer->initialized);
+
+	jitter_buffer->hold_back_mode = false;
+	jitter_buffer->last_seqnum = -1;
+	pw_log_debug("Switching back to regular mode after flush");
+
+	for (idx = 0; idx < jitter_buffer->params.num_slots; ++idx)
+		jitter_buffer->slots[idx].element_type = RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP;
+
+	/* Disarm any potentially ongoing timer since the
+	 * jitter buffer just switched back to regular mode. */
+	disarm_timeout_timer(jitter_buffer);
+}
+
+static int set_timeout(struct rtp_jitter_buffer *jitter_buffer, uint64_t timeout)
+{
+	struct itimerspec ts;
+
+	ts.it_value.tv_sec = timeout / SPA_NSEC_PER_SEC;
+	ts.it_value.tv_nsec = timeout % SPA_NSEC_PER_SEC;
+	ts.it_interval.tv_sec = 0;
+	ts.it_interval.tv_nsec = 0;
+
+	return spa_system_timerfd_settime(jitter_buffer->params.loop->system,
+		jitter_buffer->timeout_timer->fd, 0, &ts, NULL);
+}
+
+static int arm_timeout_timer(struct rtp_jitter_buffer *jitter_buffer)
+{
+	struct rtp_jitter_buffer_params *params = &(jitter_buffer->params);
+
+	/* Set the timeout to expire at the total duration
+	 * covered by the packet buffer's capacity. */
+	return set_timeout(jitter_buffer,
+			   params->num_slots * params->packet_duration);
+}
+
+static void disarm_timeout_timer(struct rtp_jitter_buffer *jitter_buffer)
+{
+	set_timeout(jitter_buffer, 0);
+}
+
+static void on_timeout_expiration(void *data, uint64_t expirations)
+{
+	struct rtp_jitter_buffer *jitter_buffer = data;
+
+	pw_log_info("Timeout timer expired; draining jitter buffer");
+	do_drain(jitter_buffer, jitter_buffer->valid_seqnum_window_length);
+	jitter_buffer->hold_back_mode = false;
+	jitter_buffer->last_seqnum = -1;
+}
+
+static void store_packet(struct rtp_jitter_buffer *jitter_buffer, const uint8_t *packet_data,
+			 size_t packet_size, size_t header_size, uint32_t timestamp,
+			 uint16_t seqnum, bool check_for_duplicates)
+{
+	size_t slot_index;
+	uint8_t *dest;
+	struct rtp_jitter_buffer_params *params = &(jitter_buffer->params);
+	struct rtp_jitter_buffer_slot *slot;
+
+	slot_index = seqnum_to_slot_index(jitter_buffer, seqnum);
+	slot = &(jitter_buffer->slots[slot_index]);
+
+	if (check_for_duplicates) {
+		if (SPA_UNLIKELY(slot->element_type == RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET)) {
+			pw_log_debug("Packet with seqnum %" PRIu16 " has already been inserted; "
+				     "this is a duplicate; dropping", seqnum);
+			return;
+		}
+	}
+
+	dest = jitter_buffer->packet_buffer + params->max_packet_size * slot_index;
+	memcpy(dest, packet_data, packet_size);
+
+	slot->element_type = RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET;
+	slot->packet_size = packet_size;
+	slot->header_size = header_size;
+	slot->timestamp = timestamp;
+}
+
+static inline size_t seqnum_to_slot_index(struct rtp_jitter_buffer *jitter_buffer,
+					  uint16_t seqnum)
+{
+	return ((size_t)seqnum) % jitter_buffer->params.num_slots;
+}
+
+static int do_drain(struct rtp_jitter_buffer *jitter_buffer,
+		    size_t num_oldest_slots_to_drain)
+{
+	/* Important: This intentionally does not reset valid_seqnum_window_length
+	 * or valid_seqnum_window_start_seqnum. That is up to the caller, since
+	 * this function is called in several different situations that require
+	 * different handling of these states. */
+
+	int ret = 0;
+	size_t idx;
+	int32_t first_drained_packet_seqnum = -1;
+	uint16_t first_lost_packet_seqnum = 0;
+	bool tracking_lost_packets = false;
+	struct rtp_jitter_buffer_params *params = &(jitter_buffer->params);
+
+	spa_assert(num_oldest_slots_to_drain <= jitter_buffer->valid_seqnum_window_length);
+
+	if (num_oldest_slots_to_drain == 0)
+		return 0;
+
+	if (num_oldest_slots_to_drain == jitter_buffer->valid_seqnum_window_length) {
+		pw_log_debug("Draining all slots in the entire valid seqnum window");
+	} else {
+		pw_log_debug("Draining the first (= oldest) %zu slots in the valid seqnum window; "
+			     "window start seqnum / length: %" PRIu32 " / %" PRIu32,
+			     num_oldest_slots_to_drain,
+			     jitter_buffer->valid_seqnum_window_start_seqnum,
+			     jitter_buffer->valid_seqnum_window_length);
+	}
+
+	for (idx = 0; idx < num_oldest_slots_to_drain; ++idx) {
+		uint16_t seqnum = jitter_buffer->valid_seqnum_window_start_seqnum + idx;
+		size_t slot_index = seqnum_to_slot_index(jitter_buffer, seqnum);
+		struct rtp_jitter_buffer_slot *slot = &(jitter_buffer->slots[slot_index]);
+
+		if (slot->element_type == RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET) {
+			const uint8_t *packet_data;
+
+			if (tracking_lost_packets) {
+				int16_t seqnum_delta = calculate_seqnum_delta(
+					first_lost_packet_seqnum, seqnum);
+
+				tracking_lost_packets = false;
+
+				if (SPA_LIKELY(seqnum_delta > 0)) {
+					pw_log_debug("Signaling packet loss sequence, starting at "
+						     "seqnum %" PRIu16 ", gap length %zu, not open ended",
+						     first_lost_packet_seqnum, (size_t)seqnum_delta);
+					if ((ret = params->signal_lost_packets(params->context,
+						first_lost_packet_seqnum, (size_t)seqnum_delta, false)) < 0) {
+						goto finish;
+					}
+				} else {
+					pw_log_error("Negative delta %" PRId16" between first and last "
+						     "seqnum in lost seqnum range %" PRIu16 " - %" PRIu16
+						     "encountered while draining", seqnum_delta,
+						     first_lost_packet_seqnum, seqnum);
+				}
+			}
+
+			packet_data = jitter_buffer->packet_buffer + params->max_packet_size * slot_index;
+
+			if ((ret = params->output_rtp_packet(params->context, packet_data,
+				slot->packet_size, slot->header_size, slot->timestamp, seqnum)) < 0)
+				goto finish;
+
+			slot->element_type = RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP;
+
+			if (first_drained_packet_seqnum < 0)
+				first_drained_packet_seqnum = seqnum;
+		} else {
+			/* If a packet is not available, consider it lost.
+			 * Do not announce it right away - instead, switch
+			 * to a tracking mode, and keep iterating over the
+			 * table. That way, it becomes possible to announce
+			 * entire spans of lost packets in one go, as a gap
+			 * with multiple packets, which can be more efficient
+			 * for PLC. For example, when 10 packets in a row are
+			 * lost, that would then announce a gap that is 10
+			 * packets long instead of announcing 10 times that
+			 * a packet was lost. */
+			if (!tracking_lost_packets) {
+				first_lost_packet_seqnum = seqnum;
+				tracking_lost_packets = true;
+
+				if (first_drained_packet_seqnum >= 0) {
+					uint16_t last_drained_packet_seqnum =
+						jitter_buffer->valid_seqnum_window_start_seqnum + (idx - 1);
+					pw_log_debug("Drained packet(s) with seqnums %" PRId32 " - %" PRIu16,
+						     first_drained_packet_seqnum, last_drained_packet_seqnum);
+					first_drained_packet_seqnum = -1;
+				}
+			}
+		}
+	}
+
+	if (first_drained_packet_seqnum >= 0) {
+		uint16_t last_drained_packet_seqnum = jitter_buffer->valid_seqnum_window_start_seqnum + (idx - 1);
+		pw_log_debug("Drained packet(s) with seqnums %" PRId32 " - %" PRIu16, first_drained_packet_seqnum,
+			     last_drained_packet_seqnum);
+	}
+
+	if (tracking_lost_packets) {
+		/* This can happen when the valid seqnum window is only partially
+		 * drained, that is, num_oldest_slots_to_drain <= valid_seqnum_window_length. */
+
+		uint16_t last_lost_packet_seqnum;
+		int16_t seqnum_delta;
+
+		last_lost_packet_seqnum = jitter_buffer->valid_seqnum_window_start_seqnum + (idx - 1);
+		seqnum_delta = calculate_seqnum_delta(first_lost_packet_seqnum, last_lost_packet_seqnum) + 1;
+
+		spa_assert(seqnum_delta >= 0);
+
+		pw_log_debug("Signaling final packet loss sequence, starting at "
+			     "seqnum %" PRIu16 ", gap length %zu, not open ended",
+			     first_lost_packet_seqnum, (size_t)seqnum_delta);
+
+		/* This final gap is always signaled as a non-open-ended loss,
+		 * since its size is exactly defined. Open-ended packet losses
+		 * happen in cases where the packet loss results in a gap that
+		 * is too large for any packet loss concealment mechanism to
+		 * fully cover (or its end not even known, so PLC is not fully
+		 * applicable). */
+		if ((ret = params->signal_lost_packets(params->context,
+			first_lost_packet_seqnum, (size_t)seqnum_delta, false)) < 0) {
+			goto finish;
+		}
+	}
+
+finish:
+	return ret;
+}
+
diff --git a/src/modules/module-rtp/jitter-buffer.h b/src/modules/module-rtp/jitter-buffer.h
new file mode 100644
index 000000000..408eaf567
--- /dev/null
+++ b/src/modules/module-rtp/jitter-buffer.h
@@ -0,0 +1,317 @@
+/* PipeWire */
+/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */
+/* SPDX-License-Identifier: MIT */
+
+#ifndef PIPEWIRE_RTP_JITTER_BUFFER_H
+#define PIPEWIRE_RTP_JITTER_BUFFER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <spa/utils/ringbuffer.h>
+#include <pipewire/loop.h>
+
+struct rtp_jitter_buffer_params {
+	/* How many slots the jitter buffer shall have. This defines
+	 * the capacity of the jitter buffer. Must be at least 1.
+	 * For a more detailed explanation what slots are, see
+	 * further below. */
+	size_t num_slots;
+	/* Maximum size of the packets, in bytes. This must include
+	 * the RTP header bytes. Must be at least 1. */
+	size_t max_packet_size;
+	/* Duration of each packet, in nanoseconds.
+	 * Must be at least 1. */
+	uint64_t packet_duration;
+	/* PipeWire loop, used for setting up a timeout timer.
+	 * This needs to be a loop that runs in the same thread
+	 * as this jitter buffer.
+	 * This must be set to a valid pointer. */
+	struct pw_loop *loop;
+	/* User-defined context that is passed to the function pointers. */
+	void *context;
+
+	/* Function pointer called when the jitter buffer decides to output
+	 * an RTP packet. In regular mode, this is immediately invoked when
+	 * rtp_jitter_buffer_insert_packet() is called. No data is copied
+	 * then; this function pointer is directly passed the packet_data and
+	 * packet_size argument values that rtp_jitter_buffer_insert_packet()
+	 * was called with. In hold-back mode, this is called when the jitter
+	 * buffer is able to output previously held back packets in order
+	 * (for example, because a preceding packet just now arrived), or
+	 * when the jitter buffer gets drained. packet_data points to the
+	 * bytes of the RTP packet that is output, and packet_size contains
+	 * the size of the packet in bytes.
+	 *
+	 * Should this function experience an error, it returns a negated
+	 * errno, and 0 if it succeeds. In case of an error, if the overall
+	 * logic can continue, it is recommended to fully reset the jitter
+	 * buffer by calling rtp_jitter_buffer_flush().
+	 *
+	 * This must be set to a valid pointer. */
+	int (*output_rtp_packet)(void *context, const uint8_t *packet_data, size_t packet_size,
+				 size_t header_size, uint32_t timestamp, uint16_t seqnum);
+	/* Function pointer called when the jitter buffer detected one or
+	 * more lost packets in a row. For example, if the last received packet
+	 * had sequence number 16, and then, the jitter buffer received packet
+	 * with sequence number 26, then 27, 28, etc., it eventually will
+	 * detect that packets 17 through 25 are lost, and call this, setting
+	 * seqnum_of_first_lost_packet to 17, and gap_length to 8. The  last
+	 * argument is set to false if the packet loss is detected mid stream
+	 * when the gap is small enough that measures like PLC can reasonably
+	 * cover all lost packets. If the detected gap is too large, open_ended
+	 * will be set to true. Implementations must then interpret the value of
+	 * gap_length as a maximum; whatever PLC measures the implementation
+	 * uses must not produce output larger than gap_length. This is a
+	 * safety measure to avoid cases where PLC would have to produce an
+	 * excessive amount of data.
+	 *
+	 * When open_ended is true, implementations should also apply a fade
+	 * out at the end of the produced PLC content to avoid a potential
+	 * hard cutoff at the end.
+	 *
+	 * Should this function experience an error, it returns a negated
+	 * errno, and 0 if it succeeds. In case of an error, if the overall
+	 * logic can continue, it is recommended to fully reset the jitter
+	 * buffer by calling rtp_jitter_buffer_flush().
+	 *
+	 * This must be set to a valid pointer. */
+	int (*signal_lost_packets)(void *context, uint16_t seqnum_of_first_lost_packet,
+		size_t gap_length, bool open_ended);
+};
+
+enum rtp_jitter_buffer_element_type {
+	RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET,
+	RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP
+};
+
+/* Slot structure. A slot stores an element (a packer or
+ * a gap). Slots outside of the valid seqnum window have
+ * undefined contents. See the "slots" array documentation
+ * in the rtp_jitter_buffer structure below. */
+struct rtp_jitter_buffer_slot {
+	enum rtp_jitter_buffer_element_type element_type;
+	/* This is only used when the element_type is
+	 * RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET. See
+	 * the packet_buffer documentation below for more. */
+	size_t packet_size;
+	/* Size of the RTP header. This must be less than
+	 * or equal to packet_size. The packet data that
+	 * is stored in packet_buffer includes this header. */
+	size_t header_size;
+	/* RTP timestamp. */
+	uint32_t timestamp;
+};
+
+struct rtp_jitter_buffer {
+	/* Copy of the params passed to rtp_jitter_buffer_init(). */
+	struct rtp_jitter_buffer_params params;
+
+	/* Set to true by rtp_jitter_buffer_init(), and set to false
+	 * by rtp_jitter_buffer_shutdown(). */
+	bool initialized;
+
+	/* Array of slots. The size of this array equals num_slots.
+	 * Only slots that are within the valid seqnum window have
+	 * valid values in this array. For example, if the slot at
+	 * array index 3 corresponds to sequence number 400, but
+	 * the valid seqnum window starts at 402, then the value
+	 * in this array at index 3 has no meaning. Not used when
+	 * regular mode is active. */
+	struct rtp_jitter_buffer_slot *slots;
+
+	/* Buffer for storing packets in hold-back mode. The size
+	 * of this buffer is max_packet_size*num_slots. To access
+	 * a packet, do it this way:
+	 *
+	 * packet_data = &(packet_buffer[max_packet_size * index]);
+	 *
+	 * Doing this is technically less space efficient if the actual
+	 * packet size is smaller than max_packet_size, but it makes
+	 * buffer management and packet addressing much easier, and
+	 * avoids costly reallocations. The packet_size field in the
+	 * slot structure specifies how many bytes at that location
+	 * actually make up the packet. FOr example, if the slot at
+	 * index 5 in the slots array has its element_type value set
+	 * to RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET, then at location
+	 * &(packet_buffer[max_packet_size * 5]), valid packet data
+	 * can be found. And, starting from that location, the amount
+	 * of bytes the packet there is made of equals the packet_size
+	 * value of the slot at index 5.
+	 *
+	 * Only the locations that correspond to valid slots with
+	 * a packet as element contain valid data. For example, if the
+	 * slot at index 3 corresponds to sequence number 400, but the
+	 * valid seqnum window starts at 402, then the data at location
+	 * &(packet_buffer[max_packet_size * 3]) has no meaning. Also,
+	 * if the slot does lie within the valid window, but the type
+	 * of the contained element is RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP,
+	 * then the associated location in this packert buffer is
+	 * meaningless. Not used when regular mode is active. */
+	uint8_t *packet_buffer;
+
+	/* If true, hold-back mode is active. */
+	bool hold_back_mode;
+	/* Sequence number of the last packet observed while in
+	 * regular mode. If no last packet is known (at startup for
+	 * example), this is set to -1.
+	 * Not used when hold-back mode is active. */
+	int32_t last_seqnum;
+	/* Timer that, when it times out, drains all slots and switches
+	 * back to regular mode. This is armed when hold-back mode is
+	 * activated, and when it is reset.
+	 * Not used when regular mode is active. */
+	struct spa_source *timeout_timer;
+	/* These define the valid seqnum window. Packet sequence numbers
+	 * in the [start_seqnum .. (start_seqnum + window_length - 1)]
+	 * range (both start and end of it inclusive) are within this
+	 * window, and are considered valid. A sequence number below the
+	 * window's start_seqnum is considered stale. A sequence number
+	 * beyond the end of the window extends the window; the window
+	 * length is increased such that this new sequence number is at
+	 * the very end of the window, that is, the length is extended
+	 * such that new_seqnum == (start_seqnum + window_length - 1)
+	 *
+	 * However, if this overextends the window (meaning that the
+	 * window length would be greater than the number of slots),
+	 * the logic in rtp_jitter_buffer_insert_packet() will adjust
+	 * the window's start_seqnum and length.
+	 *
+	 * If hold-back mode is off, these two values have no meaning.
+	 *
+	 * This window is necessary for the following:
+	 *
+	 * 1. It allows for detecting and dropping old, stale packets
+	 *    by looking at their sequence numbers.
+	 * 2. It detects when currently held-back packets need to be
+	 *    drained, that is, the gap that precedes them is not getting
+	 *    filled in time. This happens when packets are lost.
+	 * 3. It defines what slots are valid. Only those slots that are
+	 *    associated with sequence numbers that fit inside this
+	 *    window are valid. Slots outside of the window have no
+	 *    defined nor meaningful content (that is, packet size,
+	 *    packet buffer data, or slot element type).
+	 *
+	 * Note that even though valid_seqnum_window_start_seqnum is a
+	 * sequence number, it is a uint32_t instead of a uint16_t, for
+	 * easier arithmetics. */
+	uint32_t valid_seqnum_window_start_seqnum;
+	uint32_t valid_seqnum_window_length;
+};
+
+/* Initializes an uninitialized rtp_jitter_buffer instance with the given params.
+ *
+ * See rtp_jitter_buffer_params for details about the input parameters.
+ *
+ * params must be a valid pointer. An internal copy of the rtp_jitter_buffer_params
+ * instance that pointer refers to will be made, so the caller does not have to
+ * keep that instance around for the duration of the jitter buffer's lifetime.
+ *
+ * The jitter buffer must not be already initialized.
+ *
+ * Returns 0 if initialization was successful, nonzero in case of an error.
+ * If an error happens, this automatically calls rtp_jitter_buffer_shutdown(). */
+int rtp_jitter_buffer_init(struct rtp_jitter_buffer *jitter_buffer, struct rtp_jitter_buffer_params *params);
+
+/* Shuts down a previously initialized jitter buffer.
+ *
+ * jitter_buffer must be a valid pointer.
+ *
+ * Calling this on an uninitialized jitter buffer results in a no-op. */
+void rtp_jitter_buffer_shutdown(struct rtp_jitter_buffer *jitter_buffer);
+
+/* Returns true if the jitter buffer is initialized. */
+static inline bool rtp_jitter_buffer_is_initialized(struct rtp_jitter_buffer *jitter_buffer)
+{
+	return jitter_buffer->initialized;
+}
+
+/* Inserts an RTP packet into the jitter buffer.
+ *
+ * Depending on the sequence number of the RTP packet, the jitter buffer
+ * will immediately output that packet, or hold it back inside a slot
+ * for purposes of reordering and packet loss detection. (The packet's data
+ * will be copied into the packet buffer.) The packet order is defined by
+ * the RTP sequence number. (16-bit unsigned int RTP sequence wrap-around
+ * is handled properly.)
+ *
+ * If for example packets with sequence numbers 1, 4, 3, 5 arrived (in this
+ * incorrect order), packet 1 will have been output immediately, but packets
+ * 4, 3, 5 will have been held back (due to the gap at 2 and the incorrect
+ * order of packets). If later, packet 2 arrives, the gap at 2 is filled,
+ * and the jitter buffer will then output all packets from 2 to 5, in the
+ * correct order. If however packet 2 never arrives, but packets past
+ * packet 5 keep arriving, then eventually, packet 2 will be considered
+ * lost (causing signal_lost_packets() to be called), and the held-packets
+ * 4, 3, 5 will be drained (and output via output_rtp_packet(), in correct
+ * order 3, 4, 5).
+ *
+ * jitter_buffer and packet_data must be valid pointers. packet_size must
+ * be the size of the memory block pointed to by packet_data .
+ *
+ * header_size is the size of the RTP header, and must be less than or equa
+ * to packet_size. timestamp is the RTP timestamp.
+ *
+ * Note that while the values of header_size, timestamp, seqnum could be
+ * parsed from the packet header, this is not done by this function for
+ * efficiency reasons. Parsing those may require byte swapping to handle
+ * endianness, and computing the header size may depend on whether the
+ * header has extensions or not. These computations and proceses are done
+ * once, by the caller, and passed to this call via the arguments.
+ *
+ * Returns 0 if the call was successful, nonzero in case of an error.
+ * If an error happens, consider the jitter buffer as no longer usable
+ * (it can only be shut down then). */
+int rtp_jitter_buffer_insert_packet(struct rtp_jitter_buffer *jitter_buffer,
+				    const uint8_t *packet_data, size_t packet_size,
+				    size_t header_size, uint32_t timestamp, uint16_t seqnum);
+
+/* Drains all held-back packets and reports packet loss based on those packets.
+ *
+ * If the jitter buffer is currently holding back packets due to some packets
+ * having been added out of order previously, or because gaps were detected, this
+ * drains them, causing the output_rtp_packet() and signal_lost_packets() function
+ * pointers from the params passed to rtp_jitter_buffer_init() to be called. If
+ * for example packets with sequence numbers 1, 4, 3, 5 arrived (in this incorrect
+ * order), packet 1 will have been output immediately, but packets 4, 3, 5 will
+ * have been held back (due to the gap at 2 and the incorrect order of packets).
+ * If then, this function is called, the jitter buffer will (in this order) call
+ * signal_lost_packets() to signal the loss of packet 2, and then call
+ * output_rtp_packet() to output packets 3, 4, 5 (in that corrected order).
+ *
+ * After draining, the jitter buffer will be back in regular mode.
+ *
+ * If no packets were added, or if the jitter buffer is in regular mode,
+ * this does nothing.
+ *
+ * The jitter buffer also has an internal timer that is activated as soon
+ * as gaps and out of order packets are detected. If that timer expires,
+ * the jitter buffer will automatically drain itself. If however all gaps
+ * are filled in time, the timer is deactivated, and no automatic drain occurs.
+ *
+ * jitter_buffer must be a valid pointer.
+ *
+ * Returns 0 if the call was successful, nonzero in case of an error.
+ * If an error happens, consider the jitter buffer as no longer usable
+ * (it can only be shut down then). */
+int rtp_jitter_buffer_drain(struct rtp_jitter_buffer *jitter_buffer);
+
+/* Flushes all held-back packets.
+ *
+ * Unlike draining, this does not call any function pointers. Instead, it
+ * flushes any and all held-back packets (meaning, these packets are all
+ * gone, and _not_ reported as lost via signal_lost_packets()), and
+ * resets the jitter buffer back to the regular mode.
+ *
+ * This is useful for performing a hard reset on the jitter buffer.
+ *
+ * jitter_buffer must be a valid pointer. */
+void rtp_jitter_buffer_flush(struct rtp_jitter_buffer *jitter_buffer);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif /* PIPEWIRE_RTP_JITTER_BUFFER_H */
diff --git a/src/modules/module-rtp/opus-codec.c b/src/modules/module-rtp/opus-codec.c
new file mode 100644
index 000000000..f4a5bcccf
--- /dev/null
+++ b/src/modules/module-rtp/opus-codec.c
@@ -0,0 +1,625 @@
+/* PipeWire */
+/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */
+/* SPDX-License-Identifier: MIT */
+
+#include "config.h"
+
+#ifdef HAVE_OPUS
+
+#include <errno.h>
+#include <inttypes.h>
+
+#include "opus-codec.h"
+
+#include <spa/param/audio/raw-types.h>
+#include <spa/utils/defs.h>
+
+#include <pipewire/log.h>
+
+#include <opus/opus.h>
+#include <opus/opus_multistream.h>
+
+PW_LOG_TOPIC_EXTERN(mod_topic);
+#define PW_LOG_TOPIC_DEFAULT mod_topic
+
+static int opus_error_to_neg_errno(int opus_error)
+{
+	switch (opus_error) {
+	case OPUS_OK: return 0;
+	case OPUS_BAD_ARG: return -EINVAL;
+	case OPUS_BUFFER_TOO_SMALL: return -ENOBUFS;
+	case OPUS_INTERNAL_ERROR: return -EIO;
+	case OPUS_INVALID_PACKET: return -EBADMSG;
+	case OPUS_UNIMPLEMENTED: return -ENOTSUP;
+	case OPUS_INVALID_STATE: return -EBADF;
+	case OPUS_ALLOC_FAIL: return -ENOMEM;
+	default: return -EIO;
+	}
+}
+
+static void rtp_opus_codec_shutdown(struct rtp_audio_codec_context *context);
+
+static int rtp_opus_codec_init(struct rtp_audio_codec_context *context,
+	                       struct spa_audio_info *stream_info,
+			       enum rtp_audio_codec_type type, uint32_t samples_per_frame,
+			       size_t max_encoded_frame_size, size_t stride,
+			       struct pw_properties *codec_props)
+{
+	int ret = 0;
+	int opus_ret = OPUS_OK;
+	uint32_t i;
+	unsigned char mapping[255];
+	uint64_t us_per_frame;
+	const char *codec_type_name = "";
+	size_t output_buffer_size = 0;
+
+	spa_assert(stream_info != NULL);
+	spa_assert(context != NULL);
+	spa_assert(samples_per_frame > 0);
+	spa_assert(max_encoded_frame_size > 0);
+	spa_assert(stride > 0);
+
+	spa_memzero(context, sizeof(struct rtp_audio_codec_context));
+
+	/* Opus supports:
+	 *
+	 * - 1 to 255 channels
+	 * - 8, 12, 16, 24, 48 kHz sample rates
+	 * - 16-bit signed integer and 32-bit float point as sample format
+	 *   (with native endianness)
+	 * - 2.5, 5, 10, 20, 40, 60ms frame sizes
+	 *
+	 * Check that the parameters satisfy these constraints. */
+
+	if ((stream_info->info.raw.channels == 0) || (stream_info->info.raw.channels > 255)) {
+		pw_log_error("Opus cannot handle %" PRIu32 " channel(s); valid channel count range: 1-255",
+			stream_info->info.raw.channels);
+		ret = -EINVAL;
+		goto error;
+	}
+
+	switch (stream_info->info.raw.rate) {
+	case 8000:
+	case 12000:
+	case 16000:
+	case 24000:
+	case 48000:
+		break;
+	default:
+		pw_log_error("unsupported sample rate of %" PRIu32 " Hz; supported sample rates: "
+			     "8000, 12000, 16000, 24000, 48000", stream_info->info.raw.rate);
+		ret = -EINVAL;
+		goto error;
+	}
+
+	switch (stream_info->info.raw.format) {
+	case SPA_AUDIO_FORMAT_S16:
+	case SPA_AUDIO_FORMAT_F32:
+		break;
+	default:
+		pw_log_error("unsupported sample format %s; Opus requires 16-bit signed integer "
+			     "or 32-bit floating point samples",
+			     spa_type_audio_format_to_short_name(stream_info->info.raw.format));
+		ret = -EINVAL;
+		goto error;
+	}
+
+	us_per_frame = (uint64_t)samples_per_frame * 1000000 / stream_info->info.raw.rate;
+
+	switch (us_per_frame) {
+	case 2500:
+	case 5000:
+	case 10000:
+	case 20000:
+	case 40000:
+	case 60000:
+		break;
+	default:
+		pw_log_error("unsupported frame length: %" PRIu32 " samples (%.1f ms)",
+			samples_per_frame, (double)us_per_frame / 1000.0);
+		ret = -EINVAL;
+		goto error;
+	}
+
+	/* Setup the context. */
+
+	context->audio_info = *stream_info;
+	context->samples_per_frame = samples_per_frame;
+	context->max_encoded_frame_size = max_encoded_frame_size;
+	context->stride = stride;
+	context->type = type;
+
+	/* TODO: Currently, we use a simple 1:1 channel mapping. Also, coupled
+	 * streams are not used at the moment. This is a limitation of the current
+	 * Opus integration, in part because it is unclear how to communicate
+	 * the channel mapping and coupled streams from the en- to the decoder.
+	 * One possibility would be to enforce a (de-facto) standard channel
+	 * mapping, like the Vorbis channel mapping. */
+
+	for (i = 0; i < stream_info->info.raw.channels; i++)
+		mapping[i] = i;
+
+	switch (type) {
+	case RTP_AUDIO_CODEC_TYPE_ENCODER: {
+		static const opus_int32 DEFAULT_COMPLEXITY = 10;
+
+		bool use_computed_max_bitrate = true;
+		opus_int32 props_bitrate = -1;
+		opus_int32 computed_max_bitrate;
+		opus_int32 complexity = DEFAULT_COMPLEXITY;
+		bool restricted_lowdelay = false;
+		bool in_band_fec = false;
+		int packet_loss_percentage = 0;
+		opus_int32 signal_type = OPUS_AUTO;
+		const char *signal_type_str = "auto";
+
+		if (codec_props != NULL) {
+			const char *prop_str;
+
+			complexity = pw_properties_get_int32(codec_props, "opus.encoder.complexity", complexity);
+			props_bitrate = pw_properties_get_int32(codec_props, "opus.encoder.bitrate", props_bitrate);
+			restricted_lowdelay = pw_properties_get_bool(codec_props, "opus.encoder.restricted-lowdelay",
+								     restricted_lowdelay);
+			in_band_fec = pw_properties_get_bool(codec_props, "opus.encoder.inband-fec", in_band_fec);
+			packet_loss_percentage = pw_properties_get_int32(codec_props, "opus.encoder.packet-loss-percentage",
+									 packet_loss_percentage);
+
+			if ((packet_loss_percentage < 0) || (packet_loss_percentage > 100)) {
+				pw_log_error("invalid packet loss percentage %d (valid range: 0-100)", packet_loss_percentage);
+				ret = -EINVAL;
+				goto error;
+			}
+
+			prop_str = pw_properties_get(codec_props, "opus.encoder.signal-type");
+			if (prop_str != NULL) {
+				if (spa_streq(signal_type_str, "auto"))
+					signal_type = OPUS_AUTO;
+				else if (spa_streq(signal_type_str, "voice"))
+					signal_type = OPUS_SIGNAL_VOICE;
+				else if (spa_streq(signal_type_str, "music"))
+					signal_type = OPUS_SIGNAL_MUSIC;
+				else {
+					pw_log_error("unsupported Opus encoder signal type \"%s\"", signal_type_str);
+					ret = -EINVAL;
+					goto error;
+				}
+			}
+		}
+
+		context->handle = opus_multistream_encoder_create(
+			stream_info->info.raw.rate,
+			stream_info->info.raw.channels,
+			stream_info->info.raw.channels,
+			0,
+			mapping,
+			restricted_lowdelay ? OPUS_APPLICATION_RESTRICTED_LOWDELAY : OPUS_APPLICATION_AUDIO,
+			&opus_ret);
+
+		if (opus_ret != OPUS_OK)
+			goto error_while_creating;
+
+		/* Some CTL may actually not be implemented. It is safe to ignore these. */
+		#define SET_OPUS_ENCODER_CTL(CTL) \
+			do { \
+				opus_ret = opus_multistream_encoder_ctl(context->handle, CTL); \
+				switch (opus_ret) { \
+				case OPUS_OK: \
+					break; \
+				case OPUS_UNIMPLEMENTED: \
+					pw_log_debug("could not set encoder CTL %s since it is " \
+						     "not implemented; ignoring", #CTL); \
+					break; \
+				default: \
+					pw_log_error("error while setting encoder CTL %s: %s", \
+						     #CTL, opus_strerror(opus_ret)); \
+					ret = opus_error_to_neg_errno(opus_ret); \
+					goto error; \
+				} \
+			} while (0)
+
+		/* Use the maximum encoded frame size as the size
+		 * for the output buffer, since that one will be
+		 * used as the destination for the encoder. */
+		output_buffer_size = max_encoded_frame_size;
+
+		/* Hardcode encoder CTLs to ensure consistent and deterministic
+		 * encoding behavior, even if the defaults change across
+		 * libopus versions. */
+
+		/* Counterintuitively, CBR is not actually useful for RTP. RFC 7587
+		 * section 3.1.2 documents this. This applies to the generic audio
+		 * use case here as well. The opus_multistream_encode() function
+		 * (and its float variant) accept an argument that hard-limits the
+		 * maximum encoded output size, so there is no possibility of Opus
+		 * frames exceeding the maximum RTP payload size. Do constrain VBR
+		 * though to not experience excessive variability. */
+		SET_OPUS_ENCODER_CTL(OPUS_SET_VBR(1));
+		SET_OPUS_ENCODER_CTL(OPUS_SET_VBR_CONSTRAINT(1));
+		/* This is a niche feature that is useful for avoiding audio configuration
+		 * switching in software stacks. Signals are forcibly converted to mono or
+		 * stereo, depending on the parameter. Not needed here: channel configuration
+		 * is determined by the PipeWire stream format, not the codec. */
+		SET_OPUS_ENCODER_CTL(OPUS_SET_FORCE_CHANNELS(OPUS_AUTO));
+		/* Allow the encoder to automatically select the appropriate bandwidth, and
+		 * set the upper bound to the maximum to enable unconstrained selection. */
+		SET_OPUS_ENCODER_CTL(OPUS_SET_MAX_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND));
+		SET_OPUS_ENCODER_CTL(OPUS_SET_BANDWIDTH(OPUS_AUTO));
+		/* DTX is a SILK-layer feature, and not useful for generic audio. It
+		 * is there for detecting silence / low-energy periods during speech. */
+		SET_OPUS_ENCODER_CTL(OPUS_SET_DTX(0));
+		/* This is a hint for the encoder about the actual depth of the source
+		 * signal. For example, if 32-bit floating point audio is the input, but
+		 * the actual audio signal has a noise floor that resembles that of
+		 * 16-bit audio, then setting this to 16 is appropriate. But such
+		 * signal details are not known here, so set this to the maximum.
+		 *
+		 * Also, from https://www.opus-codec.org/docs/opus_api-1.6/group__opus__encoderctls.html#gaa23940eb477ff617edc14b8d66e104c0 :
+		 * > When using opus_encode() instead of opus_encode_float(), or when libopus is compiled
+		 * > for fixed-point, the encoder uses the minimum of the value set here and the value 16. */
+		SET_OPUS_ENCODER_CTL(OPUS_SET_LSB_DEPTH(24));
+		/* Disabling prediction hurts encoder efficiency substantially, and
+		 * is only really useful for debugging and testing purposes. Keep
+		 * prediction on. */
+		SET_OPUS_ENCODER_CTL(OPUS_SET_PREDICTION_DISABLED(0));
+#ifdef OPUS_SET_DRED_DURATION_REQUEST
+		/* DRED is a new deep-learning-based redundancy mechanism that embeds up
+		 * to one second of recovery data. It is not used here, since, at this point,
+		 * it only works with SILK, though maybe one day it will work with the CELT
+		 * layer as well. */
+		SET_OPUS_ENCODER_CTL(OPUS_SET_DRED_DURATION(0));
+#endif
+#ifdef OPUS_SET_QEXT_REQUEST
+		/* Quality extensions are a new feature that requires very high bitrates.
+		 * See: https://datatracker.ietf.org/doc/draft-ietf-mlcodec-opus-scalable-quality-extension/
+		 * Disabled due to its experimental nature and need for very high bitrates. */
+		SET_OPUS_ENCODER_CTL(OPUS_SET_QEXT(0));
+#endif
+
+		/* Set the bitrate. */
+
+		/* Estimate the "max computed bitrate" out of the max_encoded_frame_size
+		 * that make the most use of the available space. The valid bitrate range
+		 is 500 to 512000 bits/s, so clamp the estimation to that range. */
+		computed_max_bitrate = ((uint64_t)max_encoded_frame_size) * 8 * SPA_USEC_PER_SEC / us_per_frame;
+		computed_max_bitrate = SPA_CLAMP(computed_max_bitrate, 500, 512000);
+
+		pw_log_debug("computed a bitrate of %" PRId32 " bits/s for Opus encoding based "
+			     "on the max allowed encoded frame size of %zu bytes",
+			     (int32_t)computed_max_bitrate, max_encoded_frame_size);
+
+		if (props_bitrate >= 0) {
+			pw_log_info("got bitrate %" PRId32 " from properties", (int32_t)props_bitrate);
+
+			if (props_bitrate < 500) {
+				pw_log_warn("bitrate %" PRId32 " from properties is invalid (must "
+					    "be at least 500 bits/s); setting computed bitrate instead",
+					    (int32_t)props_bitrate);
+			} else if (props_bitrate > computed_max_bitrate) {
+				pw_log_warn("bitrate %" PRId32 " from properties exceeds computed "
+					    "max bitrate; setting computed max bitrate instead",
+					    (int32_t)props_bitrate);
+			} else {
+				use_computed_max_bitrate = false;
+			}
+		}
+
+		if (use_computed_max_bitrate) {
+			pw_log_info("setting computed max bitrate of %" PRId32 " bits/s",
+				    (int32_t)computed_max_bitrate);
+			SET_OPUS_ENCODER_CTL(OPUS_SET_BITRATE(computed_max_bitrate));
+		} else {
+			pw_log_info("setting bitrate of %" PRId32 " bits/s from opus.encoder.bitrate property",
+				    (int32_t)props_bitrate);
+			SET_OPUS_ENCODER_CTL(OPUS_SET_BITRATE(props_bitrate));
+		}
+
+		/* Set the encoding complexity. */
+
+		if ((complexity < 0) || (complexity > 10)) {
+			pw_log_warn("complexity %" PRId32 " is invalid; choosing %" PRId32 " as default",
+				(int32_t)complexity, (int32_t)DEFAULT_COMPLEXITY);
+			complexity = DEFAULT_COMPLEXITY;
+		}
+		SET_OPUS_ENCODER_CTL(OPUS_SET_COMPLEXITY(complexity));
+		pw_log_info("setting encoding complexity %" PRId32, (int32_t)complexity);
+
+		/* Set the in-band FEC. */
+
+		SET_OPUS_ENCODER_CTL(OPUS_SET_INBAND_FEC(in_band_fec ? 1 : 0));
+		SET_OPUS_ENCODER_CTL(OPUS_SET_PACKET_LOSS_PERC(packet_loss_percentage));
+		pw_log_info("%s in-band FEC; packet loss percentage: %d",
+			    in_band_fec ? "enabling" : "disabling", packet_loss_percentage);
+
+		/* Set the signal type. */
+
+		SET_OPUS_ENCODER_CTL(OPUS_SET_SIGNAL(signal_type));
+		pw_log_info("setting signal type \"%s\"", signal_type_str);
+
+		#undef SET_OPUS_ENCODER_CTL
+
+		break;
+	}
+	case RTP_AUDIO_CODEC_TYPE_DECODER:
+		context->handle = opus_multistream_decoder_create(
+			stream_info->info.raw.rate,
+			stream_info->info.raw.channels,
+			stream_info->info.raw.channels,
+			0,
+			mapping,
+			&opus_ret);
+
+		if (opus_ret != OPUS_OK)
+			goto error_while_creating;
+
+		/* Use the space a fully decoded frame needs as
+		 * the output buffer size, since that one will be
+		 * used as the destination for the decoder. */
+		output_buffer_size = samples_per_frame * stride;
+		break;
+	default:
+		pw_log_error("unsupported audio codec type %d", (int)(type));
+		ret = -EINVAL;
+		goto error;
+	}
+
+	codec_type_name = rtp_audio_codec_type_name(type);
+
+	pw_log_info("Opus %s created; samples per frame: %" PRIu32 "; max encoded frame data size: %zu; "
+		    "output buffer size: %zu; ms per frame: %.1f; sample format: %s", codec_type_name,
+		    samples_per_frame, max_encoded_frame_size, output_buffer_size, (double)us_per_frame / 1000.0,
+		    spa_type_audio_format_to_short_name(stream_info->info.raw.format));
+
+	context->output_buffer = malloc(output_buffer_size);
+	if (context->output_buffer == NULL) {
+		pw_log_error("could not allocate %s output buffer", codec_type_name);
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	return 0;
+
+error:
+	rtp_opus_codec_shutdown(context);
+	return ret;
+
+error_while_creating:
+	context->handle = NULL;
+	pw_log_error("error while creating %s: %s", codec_type_name, opus_strerror(opus_ret));
+	ret = opus_error_to_neg_errno(opus_ret);
+	goto error;
+}
+
+static void rtp_opus_codec_shutdown(struct rtp_audio_codec_context *context)
+{
+	spa_assert(context != NULL);
+
+	if (context->handle != NULL) {
+		if (context->type == RTP_AUDIO_CODEC_TYPE_ENCODER)
+			opus_multistream_encoder_destroy(context->handle);
+		else
+			opus_multistream_decoder_destroy(context->handle);
+		context->handle = NULL;
+
+		pw_log_info("Opus %s destroyed", rtp_audio_codec_type_name(context->type));
+	}
+
+	if (context->output_buffer != NULL) {
+		free(context->output_buffer);
+		context->output_buffer = NULL;
+		pw_log_debug("%s output buffer freed", rtp_audio_codec_type_name(context->type));
+	}
+}
+
+static void rtp_opus_codec_reset(struct rtp_audio_codec_context *context, char const *reason)
+{
+	spa_assert(context != NULL);
+
+	if (context->type == RTP_AUDIO_CODEC_TYPE_ENCODER)
+		opus_multistream_encoder_ctl(context->handle, OPUS_RESET_STATE);
+	else
+		opus_multistream_decoder_ctl(context->handle, OPUS_RESET_STATE);
+
+	if (reason != NULL)
+		pw_log_info("Opus %s reset, reason: %s", rtp_audio_codec_type_name(context->type), reason);
+	else
+		pw_log_info("Opus %s reset (no reason given)", rtp_audio_codec_type_name(context->type));
+}
+
+static int rtp_opus_codec_get_delay(struct rtp_audio_codec_context *context, size_t *delay)
+{
+	int ret;
+	opus_int32 sample_rate = 0;
+	opus_int32 decoder_delay = 0;
+
+	spa_assert(context != NULL);
+	spa_assert(delay != NULL);
+
+	/* The Opus specification requires that all decoder implementations
+	 * have the exact same delay. Encoders can vary, however. libopus
+	 * combines the en- and decoder delay into a "lookahead value",
+	 * which is accessible from the encoder (but not the decoder).
+	 * Since for the RTP transmissions, it is beneficial to handle
+	 * en- and decoder delay separately, extract the encoder delay out
+	 * of the lookahead value if this is an encoder (by subtracting
+	 * the decoder delay from the lookahead), and if this is a
+	 * decoder, just return the decoder delay.
+	 *
+	 * The decoder delay is fixed to 2.5 ms according to RFC 6716,
+	 * so this code needs to convert this to samples. */
+
+	if (context->type == RTP_AUDIO_CODEC_TYPE_ENCODER)
+		ret = opus_multistream_encoder_ctl(context->handle, OPUS_GET_SAMPLE_RATE(&sample_rate));
+	else
+		ret = opus_multistream_decoder_ctl(context->handle, OPUS_GET_SAMPLE_RATE(&sample_rate));
+
+	if (ret != OPUS_OK) {
+		pw_log_error("could not get %s sample rate: %s",
+			     rtp_audio_codec_type_name(context->type), opus_strerror(ret));
+		return opus_error_to_neg_errno(ret);
+	}
+
+	/* Convert the fixed 2.5 ms decoder delay to samples based
+	 * on the en/decoder's sample rate. Define 2.5ms as 2500us
+	 * here to do the calculation purely with integers. */
+	decoder_delay = 2500LL * sample_rate / SPA_USEC_PER_SEC;
+
+	if (context->type == RTP_AUDIO_CODEC_TYPE_ENCODER) {
+		opus_int32 lookahead = 0;
+
+		ret = opus_multistream_encoder_ctl(context->handle, OPUS_GET_LOOKAHEAD(&lookahead));
+		if (ret != OPUS_OK) {
+			pw_log_error("could not get encoder lookahead: %s", opus_strerror(ret));
+			return opus_error_to_neg_errno(ret);
+		}
+
+		if (SPA_UNLIKELY(lookahead < decoder_delay)) {
+			pw_log_error("lookahead %" PRId32 " is smaller than decoder delay %" PRId32,
+				     (int32_t)lookahead, (int32_t)decoder_delay);
+			return -EINVAL;
+		}
+
+		*delay = lookahead - decoder_delay;
+		return 0;
+	} else {
+		*delay = decoder_delay;
+		return 0;
+	}
+}
+
+static int rtp_opus_codec_encode(struct rtp_audio_codec_context *context, const uint8_t *in_samples,
+	uint8_t **out_encoded_data, size_t *out_encoded_data_size)
+{
+	int ret;
+
+	spa_assert(context != NULL);
+	spa_assert(in_samples != NULL);
+	spa_assert(out_encoded_data != NULL);
+	spa_assert(out_encoded_data_size != NULL);
+
+	switch (context->audio_info.info.raw.format) {
+	case SPA_AUDIO_FORMAT_S16:
+		ret = opus_multistream_encode(context->handle, (const opus_int16 *)in_samples,
+			context->samples_per_frame, context->output_buffer, context->max_encoded_frame_size);
+		break;
+	case SPA_AUDIO_FORMAT_F32:
+		ret = opus_multistream_encode_float(context->handle, (const float *)in_samples,
+			context->samples_per_frame, context->output_buffer, context->max_encoded_frame_size);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (ret >= 0) {
+		*out_encoded_data = context->output_buffer;
+		*out_encoded_data_size = ret;
+		pw_log_trace("encoded %zu samples to %zu bytes", context->samples_per_frame,
+			     *out_encoded_data_size);
+		return 0;
+	} else {
+		pw_log_error("error while encoding audio: %s", opus_strerror(ret));
+		return -EIO;
+	}
+}
+
+static int rtp_opus_codec_decode(struct rtp_audio_codec_context *context, const uint8_t *in_encoded_data,
+	size_t in_encoded_data_size, uint8_t **out_samples, size_t *out_num_samples)
+{
+	int ret;
+
+	spa_assert(context != NULL);
+	spa_assert(in_encoded_data != NULL);
+	spa_assert(in_encoded_data_size > 0);
+	spa_assert(out_samples != NULL);
+	spa_assert(out_num_samples != NULL);
+
+	switch (context->audio_info.info.raw.format) {
+	case SPA_AUDIO_FORMAT_S16:
+		ret = opus_multistream_decode(context->handle, in_encoded_data, in_encoded_data_size,
+			(opus_int16 *)(context->output_buffer), context->samples_per_frame, 0);
+		break;
+	case SPA_AUDIO_FORMAT_F32:
+		ret = opus_multistream_decode_float(context->handle, in_encoded_data, in_encoded_data_size,
+			(float *)(context->output_buffer), context->samples_per_frame, 0);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (ret >= 0) {
+		*out_samples = context->output_buffer;
+		*out_num_samples = ret;
+		pw_log_trace("decoded %zu bytes to %zu samples", in_encoded_data_size, *out_num_samples);
+		return 0;
+	} else {
+		pw_log_error("error while decoding audio: %s", opus_strerror(ret));
+		return -EIO;
+	}
+}
+
+static int rtp_opus_codec_apply_plc(struct rtp_audio_codec_context *context,
+	uint8_t **out_samples, size_t *out_num_samples)
+{
+	int ret;
+
+	spa_assert(context != NULL);
+	spa_assert(out_samples != NULL);
+	spa_assert(out_num_samples != NULL);
+
+	/* PLC is applied by "decoding" from a nullpointer. See:
+	 * https://www.opus-codec.org/docs/opus_api-1.6/group__opus__multistream.html#gaa4b89541efe01970cf52e4a336db3ad0 */
+
+	switch (context->audio_info.info.raw.format) {
+	case SPA_AUDIO_FORMAT_S16:
+		ret = opus_multistream_decode(context->handle, NULL, 0,
+			(opus_int16 *)(context->output_buffer), context->samples_per_frame, 0);
+		break;
+	case SPA_AUDIO_FORMAT_F32:
+		ret = opus_multistream_decode_float(context->handle, NULL, 0,
+			(float *)(context->output_buffer), context->samples_per_frame, 0);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (ret >= 0) {
+		*out_samples = context->output_buffer;
+		*out_num_samples = ret;
+		pw_log_debug("generated %zu PLC samples", *out_num_samples);
+		return 0;
+	} else {
+		pw_log_error("error while applying PLC: %s", opus_strerror(ret));
+		return -EIO;
+	}
+}
+
+static const char * rtp_opus_codec_get_name(void)
+{
+	return "Opus";
+}
+
+const struct rtp_audio_codec* get_rtp_opus_codec(void)
+{
+	static const struct rtp_audio_codec codec = {
+		.init = rtp_opus_codec_init,
+		.shutdown = rtp_opus_codec_shutdown,
+		.reset = rtp_opus_codec_reset,
+		.get_delay = rtp_opus_codec_get_delay,
+		.encode = rtp_opus_codec_encode,
+		.decode = rtp_opus_codec_decode,
+		.apply_plc = rtp_opus_codec_apply_plc,
+		.get_name = rtp_opus_codec_get_name,
+	};
+
+	return &codec;
+}
+
+#else
+
+#include <stddef.h>
+
+const struct rtp_audio_codec* get_rtp_opus_codec(void)
+{
+	return NULL;
+}
+
+#endif
diff --git a/src/modules/module-rtp/opus-codec.h b/src/modules/module-rtp/opus-codec.h
new file mode 100644
index 000000000..1192e3c57
--- /dev/null
+++ b/src/modules/module-rtp/opus-codec.h
@@ -0,0 +1,20 @@
+/* PipeWire */
+/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */
+/* SPDX-License-Identifier: MIT */
+
+#ifndef PIPEWIRE_RTP_OPUS_CODEC_H
+#define PIPEWIRE_RTP_OPUS_CODEC_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "audio-codec.h"
+
+const struct rtp_audio_codec* get_rtp_opus_codec(void);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif /* PIPEWIRE_RTP_OPUS_CODEC_H */
diff --git a/src/modules/module-rtp/opus.c b/src/modules/module-rtp/opus.c
deleted file mode 100644
index c25fc17f8..000000000
--- a/src/modules/module-rtp/opus.c
+++ /dev/null
@@ -1,359 +0,0 @@
-/* PipeWire */
-/* SPDX-FileCopyrightText: Copyright © 2023 Wim Taymans <wim.taymans@gmail.com> */
-/* SPDX-License-Identifier: MIT */
-
-#ifdef HAVE_OPUS
-
-#include <opus/opus.h>
-#include <opus/opus_multistream.h>
-
-/* TODO: Direct timestamp mode here may require a rework. See audio.c for a reference.
- * Also check out the usage of actual_max_buffer_size in audio.c. */
-
-static void rtp_opus_process_playback(void *data)
-{
-	struct impl *impl = data;
-	struct pw_buffer *buf;
-	struct spa_data *d;
-	uint32_t wanted, timestamp, target_buffer, stride, maxsize;
-	int32_t avail;
-
-	if ((buf = pw_stream_dequeue_buffer(impl->stream)) == NULL) {
-		pw_log_info("Out of stream buffers: %m");
-		return;
-	}
-	d = buf->buffer->datas;
-
-	stride = impl->stride;
-
-	maxsize = d[0].maxsize / stride;
-	wanted = buf->requested ? SPA_MIN(buf->requested, maxsize) : maxsize;
-
-	if (impl->io_position && impl->direct_timestamp) {
-		/* in direct mode, read directly from the timestamp index,
-		 * because sender and receiver are in sync, this would keep
-		 * target_buffer of samples available. */
-		spa_ringbuffer_read_update(&impl->ring,
-				impl->io_position->clock.position);
-	}
-	avail = spa_ringbuffer_get_read_index(&impl->ring, &timestamp);
-
-	target_buffer = impl->target_buffer;
-
-	if (avail < (int32_t)wanted) {
-		enum spa_log_level level;
-		memset(d[0].data, 0, wanted * stride);
-		if (impl->have_sync) {
-			impl->have_sync = false;
-			level = SPA_LOG_LEVEL_WARN;
-		} else {
-			level = SPA_LOG_LEVEL_DEBUG;
-		}
-		pw_log(level, "underrun %d/%u < %u",
-					avail, target_buffer, wanted);
-	} else {
-		double error, corr;
-		if (impl->first) {
-			if ((uint32_t)avail > target_buffer) {
-				uint32_t skip = avail - target_buffer;
-				pw_log_debug("first: avail:%d skip:%u target:%u",
-							avail, skip, target_buffer);
-				timestamp += skip;
-				avail = target_buffer;
-			}
-			impl->first = false;
-		} else if (avail > (int32_t)SPA_MIN(target_buffer * 8, impl->buffer_size2 / stride)) {
-			pw_log_warn("overrun %u > %u", avail, target_buffer * 8);
-			timestamp += avail - target_buffer;
-			avail = target_buffer;
-		}
-		if (!impl->direct_timestamp) {
-			/* when not using direct timestamp and clocks are not
-			 * in sync, try to adjust our playback rate to keep the
-			 * requested target_buffer bytes in the ringbuffer */
-			error = (double)target_buffer - (double)avail;
-			error = SPA_CLAMPD(error, -impl->max_error, impl->max_error);
-
-			corr = spa_dll_update(&impl->dll, error);
-
-			pw_log_trace("avail:%u target:%u error:%f corr:%f", avail,
-					target_buffer, error, corr);
-
-			pw_stream_set_rate(impl->stream, 1.0 / corr);
-		}
-		spa_ringbuffer_read_data(&impl->ring,
-				impl->buffer,
-				impl->buffer_size2,
-				(timestamp * stride) & impl->buffer_mask2,
-				d[0].data, wanted * stride);
-
-		timestamp += wanted;
-		spa_ringbuffer_read_update(&impl->ring, timestamp);
-	}
-	d[0].chunk->offset = 0;
-	d[0].chunk->size = wanted * stride;
-	d[0].chunk->stride = stride;
-	d[0].chunk->flags = 0;
-	buf->size = wanted;
-
-	pw_stream_queue_buffer(impl->stream, buf);
-}
-
-static int rtp_opus_receive(struct impl *impl, uint8_t *buffer, ssize_t len,
-			ssize_t hlen, uint64_t current_time)
-{
-	struct rtp_header *hdr;
-	ssize_t plen;
-	uint16_t seq;
-	uint32_t timestamp, samples, write, expected_write;
-	uint32_t stride = impl->stride;
-	OpusMSDecoder *dec = impl->stream_data;
-	int32_t filled;
-	int res;
-
-	hdr = (struct rtp_header*)buffer;
-
-	seq = ntohs(hdr->sequence_number);
-	if (impl->have_seq && impl->seq != seq) {
-		pw_log_info("unexpected seq (%d != %d) SSRC:%u",
-				seq, impl->seq, impl->ssrc);
-		impl->have_sync = false;
-	}
-	impl->seq = seq + 1;
-	impl->have_seq = true;
-
-	timestamp = ntohl(hdr->timestamp) - impl->ts_offset;
-
-	impl->receiving = true;
-
-	plen = len - hlen;
-
-	filled = spa_ringbuffer_get_write_index(&impl->ring, &expected_write);
-
-	/* we always write to timestamp + delay */
-	write = timestamp + impl->target_buffer;
-
-	if (!impl->have_sync) {
-		pw_log_info("sync to timestamp:%u seq:%u ts_offset:%u SSRC:%u target:%u direct:%u",
-				timestamp, seq, impl->ts_offset, impl->ssrc,
-				impl->target_buffer, impl->direct_timestamp);
-
-		/* we read from timestamp, keeping target_buffer of data
-		 * in the ringbuffer. */
-		impl->ring.readindex = timestamp;
-		impl->ring.writeindex = write;
-		filled = impl->target_buffer;
-
-		spa_dll_init(&impl->dll);
-		spa_dll_set_bw(&impl->dll, SPA_DLL_BW_MIN, 128, impl->rate);
-		memset(impl->buffer, 0, impl->buffer_size);
-		impl->have_sync = true;
-	} else if (expected_write != write) {
-		pw_log_debug("unexpected write (%u != %u)",
-				write, expected_write);
-	}
-
-	if (filled + 2880 > (int32_t)(impl->buffer_size2 / stride)) {
-		pw_log_debug("capture overrun %u + %d > %u", filled, 2880,
-				impl->buffer_size2 / stride);
-		impl->have_sync = false;
-	} else {
-		uint32_t index = (write * stride) & impl->buffer_mask2, end;
-
-		res = opus_multistream_decode_float(dec,
-				&buffer[hlen], plen,
-				(float*)&impl->buffer[index], 2880,
-				0);
-
-		end = index + (res * stride);
-		/* fold to the lower part of the ringbuffer when overflow */
-		if (end > impl->buffer_size2)
-			memmove(impl->buffer, &impl->buffer[impl->buffer_size2], end - impl->buffer_size2);
-
-		pw_log_info("receiving %zd len:%d timestamp:%d %u", plen, res, timestamp, index);
-		samples = res;
-
-		write += samples;
-		spa_ringbuffer_write_update(&impl->ring, write);
-	}
-	return 0;
-}
-
-static void rtp_opus_flush_packets(struct impl *impl)
-{
-	int32_t avail, tosend;
-	uint32_t stride, timestamp, offset;
-	uint8_t out[1280];
-	struct iovec iov[2];
-	struct rtp_header header;
-	OpusMSEncoder *enc = impl->stream_data;
-	int res = 0;
-
-	avail = spa_ringbuffer_get_read_index(&impl->ring, &timestamp);
-	tosend = impl->psamples;
-
-	if (avail < tosend)
-		return;
-
-	stride = impl->stride;
-
-	spa_zero(header);
-	header.v = 2;
-	header.pt = impl->payload;
-	header.ssrc = htonl(impl->ssrc);
-
-	iov[0].iov_base = &header;
-	iov[0].iov_len = sizeof(header);
-	iov[1].iov_base = out;
-	iov[1].iov_len = 0;
-
-	offset = 0;
-	while (avail >= tosend) {
-		header.sequence_number = htons(impl->seq);
-		header.timestamp = htonl(impl->ts_offset + timestamp);
-
-		res = opus_multistream_encode_float(enc,
-				(const float*)&impl->buffer[offset * stride], tosend,
-				out, sizeof(out));
-
-		pw_log_trace("sending %d len:%d timestamp:%d", tosend, res, timestamp);
-		iov[1].iov_len = res;
-
-		rtp_stream_call_send_packet(impl, iov, 2);
-
-		impl->seq++;
-		timestamp += tosend;
-		offset += tosend;
-		avail -= tosend;
-	}
-
-	pw_log_trace("move %d offset:%d", avail, offset);
-	memmove(impl->buffer, &impl->buffer[offset * stride], avail * stride);
-
-	spa_ringbuffer_read_update(&impl->ring, timestamp);
-}
-
-static void rtp_opus_process_capture(void *data)
-{
-	struct impl *impl = data;
-	struct pw_buffer *buf;
-	struct spa_data *d;
-	uint32_t offs, size, timestamp, expected_timestamp, stride;
-	int32_t filled, wanted;
-
-	if ((buf = pw_stream_dequeue_buffer(impl->stream)) == NULL) {
-		pw_log_info("Out of stream buffers: %m");
-		return;
-	}
-	d = buf->buffer->datas;
-
-	offs = SPA_MIN(d[0].chunk->offset, d[0].maxsize);
-	size = SPA_MIN(d[0].chunk->size, d[0].maxsize - offs);
-	stride = impl->stride;
-	wanted = size / stride;
-
-	filled = spa_ringbuffer_get_write_index(&impl->ring, &expected_timestamp);
-
-	if (SPA_LIKELY(impl->io_position)) {
-		uint32_t rate = impl->io_position->clock.rate.denom;
-		timestamp = impl->io_position->clock.position * impl->rate / rate;
-	} else
-		timestamp = expected_timestamp;
-
-	if (!impl->have_sync) {
-		pw_log_info("sync to timestamp:%u seq:%u ts_offset:%u SSRC:%u",
-				timestamp, impl->seq, impl->ts_offset, impl->ssrc);
-		impl->ring.readindex = impl->ring.writeindex = expected_timestamp = timestamp;
-		memset(impl->buffer, 0, impl->buffer_size);
-		impl->have_sync = true;
-	} else {
-		if (SPA_ABS((int32_t)expected_timestamp - (int32_t)timestamp) > 32) {
-			pw_log_warn("expected %u != timestamp %u", expected_timestamp, timestamp);
-			impl->have_sync = false;
-		} else if (filled + wanted > (int32_t)(impl->buffer_size / stride)) {
-			pw_log_warn("overrun %u + %u > %u", filled, wanted, impl->buffer_size / stride);
-			impl->have_sync = false;
-		}
-	}
-
-	spa_ringbuffer_write_data(&impl->ring,
-			impl->buffer,
-			impl->buffer_size,
-			(filled * stride) & impl->buffer_mask,
-			SPA_PTROFF(d[0].data, offs, void), wanted * stride);
-	expected_timestamp += wanted;
-	spa_ringbuffer_write_update(&impl->ring, expected_timestamp);
-
-	pw_stream_queue_buffer(impl->stream, buf);
-
-	rtp_opus_flush_packets(impl);
-}
-
-static void rtp_opus_deinit(struct impl *impl, enum spa_direction direction)
-{
-	if (impl->stream_data) {
-		if (direction == SPA_DIRECTION_INPUT)
-			opus_multistream_encoder_destroy(impl->stream_data);
-		else
-			opus_multistream_decoder_destroy(impl->stream_data);
-	}
-}
-
-static int rtp_opus_init(struct impl *impl, enum spa_direction direction)
-{
-	int err;
-	unsigned char mapping[255];
-	uint32_t i;
-
-	if (impl->info.info.opus.channels > 255)
-		return -EINVAL;
-
-	if (impl->psamples >= 2880)
-		impl->psamples = 2880;
-	else if (impl->psamples >= 1920)
-		impl->psamples = 1920;
-	else if (impl->psamples >= 960)
-		impl->psamples = 960;
-	else if (impl->psamples >= 480)
-		impl->psamples = 480;
-	else if (impl->psamples >= 240)
-		impl->psamples = 240;
-	else
-		impl->psamples = 120;
-
-	for (i = 0; i < impl->info.info.opus.channels; i++)
-		mapping[i] = i;
-
-	impl->deinit = rtp_opus_deinit;
-	impl->receive_rtp = rtp_opus_receive;
-	if (direction == SPA_DIRECTION_INPUT) {
-		impl->stream_events.process = rtp_opus_process_capture;
-
-		impl->stream_data = opus_multistream_encoder_create(
-			impl->info.info.opus.rate,
-			impl->info.info.opus.channels,
-			impl->info.info.opus.channels, 0,
-			mapping,
-			OPUS_APPLICATION_AUDIO,
-			&err);
-	}
-	else {
-		impl->stream_events.process = rtp_opus_process_playback;
-
-		impl->stream_data = opus_multistream_decoder_create(
-			impl->info.info.opus.rate,
-			impl->info.info.opus.channels,
-			impl->info.info.opus.channels, 0,
-			mapping,
-			&err);
-	}
-	if (!impl->stream_data)
-		pw_log_error("opus error: %d", err);
-	return impl->stream_data ? 0 : err;
-}
-#else
-static int rtp_opus_init(struct impl *impl, enum spa_direction direction)
-{
-	return -ENOTSUP;
-}
-#endif
diff --git a/src/modules/module-rtp/stream.c b/src/modules/module-rtp/stream.c
index c61950978..7ead41d0e 100644
--- a/src/modules/module-rtp/stream.c
+++ b/src/modules/module-rtp/stream.c
@@ -12,6 +12,7 @@
 #include <spa/utils/json.h>
 #include <spa/utils/ringbuffer.h>
 #include <spa/utils/dll.h>
+#include <spa/utils/defs.h>
 #include <spa/param/audio/format-utils.h>
 #include <spa/param/audio/raw-json.h>
 #include <spa/param/latency-utils.h>
@@ -27,6 +28,9 @@
 #include <module-rtp/rtp.h>
 #include <module-rtp/stream.h>
 #include <module-rtp/apple-midi.h>
+#include <module-rtp/jitter-buffer.h>
+#include <module-rtp/audio-codec.h>
+#include <module-rtp/opus-codec.h>
 
 PW_LOG_TOPIC_EXTERN(mod_topic);
 #define PW_LOG_TOPIC_DEFAULT mod_topic
@@ -105,7 +109,6 @@ struct impl {
 	const struct rtp_format_info *rtp_format_info;
 
 	enum spa_direction direction;
-	void *stream_data;
 
 	uint32_t rate;
 	uint32_t stride;
@@ -113,6 +116,15 @@ struct impl {
 	uint8_t payload;
 	uint32_t ssrc;
 	uint16_t seq;
+	/* This is used for pre-checking for received packets that arrive
+	 * out of order. Depending on the audio type, a jitter buffer
+	 * may be used intermediately, and then, the seq field above
+	 * will be set _after_ the jitter buffer reorders packets,
+	 * so it cannot be used for the pre-checking purpose. Thus,
+	 * this separate seq field is required.
+	 * It is int32_t to be able to store -1 as an indicator that
+	 * no expected seqnum is set yet. */
+	int32_t next_expected_incoming_seq;
 	unsigned fixed_ssrc:1;
 	unsigned have_ssrc:1;
 	unsigned ignore_ssrc:1;
@@ -187,6 +199,19 @@ struct impl {
 	void (*deinit)(struct impl *impl, enum spa_direction direction);
 	int (*resend_packets)(struct impl *impl, uint16_t seq, uint16_t num);
 
+	struct rtp_jitter_buffer jitter_buffer;
+	const struct rtp_audio_codec *audio_codec;
+	struct rtp_audio_codec_context audio_codec_context;
+	/* This buffer is needed in case the data that is to be encoded is wrapped
+	 * around the ring buffer border. In such a case, the two halves have to
+	 * be copied and merged into this buffer, since audio codecs expect one
+	 * contiguous input memory block as the data to encode. */
+	uint8_t *audio_encoder_staging_buffer;
+
+	/* Delay of the audio codec. Depending on how the audio codec is configured,
+	 * this is either the decoder delay or the encoder delay. In samples. */
+	size_t codec_delay;
+
 	/*
 	 * pw_filter where the filter would be driven at the PTP clock
 	 * rate with RTP sink being driven at the sink driver clock rate
@@ -311,7 +336,6 @@ static int do_finish_stopping_state(struct spa_loop *loop, bool async, uint32_t
 
 #include "module-rtp/audio.c"
 #include "module-rtp/midi.c"
-#include "module-rtp/opus.c"
 
 struct rtp_format_info {
 	uint32_t media_subtype;
@@ -407,6 +431,11 @@ static int stream_start(struct impl *impl)
 		pw_log_error("error while closing leftover connection: %s", spa_strerror(res));
 	}
 
+	impl->next_expected_incoming_seq = -1;
+
+	if (impl->audio_codec != NULL)
+		reset_rtp_audio_codec(impl, "starting new stream");
+
 	impl->reset_ringbuffer(impl);
 
 	res = 0;
@@ -519,23 +548,18 @@ static void on_stream_state_changed(void *d, enum pw_stream_state old,
 	}
 }
 
-static void update_latency_params(struct impl *impl)
+static void fill_latency_params(struct impl *impl, struct spa_pod_builder *b,
+				const struct spa_pod **params, uint32_t *n_params)
 {
-	uint32_t n_params = 0;
-	const struct spa_pod *params[2];
-	uint8_t buffer[1024];
-	struct spa_pod_builder b;
 	struct spa_latency_info main_latency;
 
-	spa_pod_builder_init(&b, buffer, sizeof(buffer));
-
 	/* main_latency is the latency in the direction indicated by impl->direction.
-	 * In RTP streams, this consists solely of the process latency. (In theory,
-	 * PipeWire SPA nodes could have additional latencies on top of the process
-	 * latency, but this is not the case here.) The other direction is already
-	 * handled by pw_stream.
+	 * In RTP streams, this consists of the process latency. In the INPUT direction
+	 * (which is what sinks use), the encoder delay is also part of main_latency.
+	 * The full latency params also include latency in the other direction -
+	 * this is already handled by pw_stream.
 	 *
-	 * The main_latncy is passed as updated SPA_PARAM_Latency params to the stream.
+	 * The main_latency is passed as updated SPA_PARAM_Latency params to the stream.
 	 * That way, the stream always gets information of latency for _both_ directions;
 	 * the direction indicated by impl->direction is covered by main_latency, and
 	 * the opposite direction is already taken care of by the default pw_stream
@@ -547,10 +571,27 @@ static void update_latency_params(struct impl *impl)
 	main_latency = SPA_LATENCY_INFO(impl->direction);
 	spa_process_latency_info_add(&impl->process_latency, &main_latency);
 
-	params[n_params++] = spa_latency_build(&b, SPA_PARAM_Latency, &main_latency);
-	params[n_params++] = spa_process_latency_build(&b, SPA_PARAM_ProcessLatency,
-							&impl->process_latency);
+	if (impl->direction == PW_DIRECTION_INPUT) {
+		int64_t codec_delay_ns = (int64_t)(impl->codec_delay) * SPA_NSEC_PER_SEC / impl->rate;
+		main_latency.min_ns += codec_delay_ns;
+		main_latency.max_ns += codec_delay_ns;
+	}
 
+	params[(*n_params)++] = spa_latency_build(b, SPA_PARAM_Latency, &main_latency);
+	params[(*n_params)++] = spa_process_latency_build(b, SPA_PARAM_ProcessLatency,
+							  &impl->process_latency);
+}
+
+static void update_latency_params(struct impl *impl)
+{
+	const struct spa_pod *params[2];
+	uint32_t n_params;
+	uint8_t buffer[1024];
+	struct spa_pod_builder b;
+
+	n_params = 0;
+	spa_pod_builder_init(&b, buffer, sizeof(buffer));
+	fill_latency_params(impl, &b, params, &n_params);
 	pw_stream_update_params(impl->stream, params, n_params);
 }
 
@@ -609,11 +650,12 @@ static const struct rtp_format_info *find_rtp_pcm_audio_format_info(const struct
 	return NULL;
 }
 
-static int parse_audio_info(const struct pw_properties *props, struct spa_audio_info_raw *info)
+static int parse_audio_info(const struct pw_properties *props, struct spa_audio_info_raw *info,
+			    const char *default_format)
 {
 	return spa_audio_info_raw_init_dict_keys(info,
 			&SPA_DICT_ITEMS(
-				 SPA_DICT_ITEM(SPA_KEY_AUDIO_FORMAT, DEFAULT_FORMAT),
+				 SPA_DICT_ITEM(SPA_KEY_AUDIO_FORMAT, default_format),
 				 SPA_DICT_ITEM(SPA_KEY_AUDIO_RATE, SPA_STRINGIFY(DEFAULT_RATE)),
 				 SPA_DICT_ITEM(SPA_KEY_AUDIO_POSITION, DEFAULT_POSITION)),
 			&props->dict,
@@ -660,6 +702,8 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core,
 	float latency_msec;
 	int res;
 	bool process_latency_from_sess;
+	uint32_t audio_codec_type = 0;
+	const char *default_audio_format = NULL;
 
 	impl = calloc(1, sizeof(*impl));
 	if (impl == NULL) {
@@ -690,11 +734,15 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core,
 		impl->info.media_type = SPA_MEDIA_TYPE_audio;
 		impl->info.media_subtype = SPA_MEDIA_SUBTYPE_raw;
 		impl->payload = 127;
+		audio_codec_type = SPA_MEDIA_SUBTYPE_raw;
+		default_audio_format = DEFAULT_RAW_AUDIO_FORMAT;
 	}
 	else if (spa_streq(str, "raop")) {
 		impl->info.media_type = SPA_MEDIA_TYPE_audio;
 		impl->info.media_subtype = SPA_MEDIA_SUBTYPE_raw;
 		impl->payload = 0x60;
+		audio_codec_type = SPA_MEDIA_SUBTYPE_raw;
+		default_audio_format = DEFAULT_RAOP_AUDIO_FORMAT;
 	}
 	else if (spa_streq(str, "midi")) {
 		impl->info.media_type = SPA_MEDIA_TYPE_application;
@@ -702,10 +750,14 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core,
 		impl->payload = 0x61;
 	}
 #ifdef HAVE_OPUS
+	/* The "opus" sess.media type is actually raw audio that
+	 * is encoded with Opus before sending it out as RTP. */
 	else if (spa_streq(str, "opus")) {
 		impl->info.media_type = SPA_MEDIA_TYPE_audio;
-		impl->info.media_subtype = SPA_MEDIA_SUBTYPE_opus;
+		impl->info.media_subtype = SPA_MEDIA_SUBTYPE_raw;
 		impl->payload = 127;
+		audio_codec_type = SPA_MEDIA_SUBTYPE_opus;
+		default_audio_format = DEFAULT_OPUS_AUDIO_FORMAT;
 	}
 #endif
 	else {
@@ -716,22 +768,61 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core,
 
 	switch (impl->info.media_subtype) {
 	case SPA_MEDIA_SUBTYPE_raw:
-		if ((res = parse_audio_info(props, &impl->info.info.raw)) < 0) {
+		if ((res = parse_audio_info(props, &impl->info.info.raw, default_audio_format)) < 0) {
 			pw_log_error("can't parse format: %s", spa_strerror(res));
 			goto out;
 		}
 		impl->stream_info = impl->info;
-		impl->rtp_format_info = find_rtp_pcm_audio_format_info(&impl->info);
-		if (impl->rtp_format_info == NULL) {
-			pw_log_error("unsupported audio format:%d (%s) channels:%d",
+		impl->rate = impl->stream_info.info.raw.rate;
+
+		/* Pick the RTP information and stride values suitable for
+		 * the specified codec type. If the codec type is set to
+		 * SPA_MEDIA_SUBTYPE_raw, then no special encoding is done,
+		 * and PCM samples are transmitted directly over RTP. */
+		switch (audio_codec_type) {
+		case SPA_MEDIA_SUBTYPE_raw:
+			impl->rtp_format_info = find_rtp_pcm_audio_format_info(&impl->info);
+			if (impl->rtp_format_info == NULL) {
+				pw_log_error("unsupported audio format:%d (%s) channels:%d",
 					impl->stream_info.info.raw.format,
 					spa_type_audio_format_to_short_name(impl->stream_info.info.raw.format),
 					impl->stream_info.info.raw.channels);
+				res = -EINVAL;
+				goto out;
+			}
+			impl->stride = impl->rtp_format_info->size * impl->stream_info.info.raw.channels;
+			pw_log_info("configured raw PCM RTP payload: MIME: %s format: %s rate: %"
+				PRIu32 " stride: %" PRIu32, impl->rtp_format_info->mime,
+				spa_type_audio_format_to_short_name(impl->rtp_format_info->format),
+				impl->rate, impl->stride);
+			break;
+		case SPA_MEDIA_SUBTYPE_opus:
+			impl->rtp_format_info = &rtp_opus_format_info;
+			switch (impl->stream_info.info.raw.format) {
+			case SPA_AUDIO_FORMAT_S16:
+				impl->stride = 2 * impl->stream_info.info.raw.channels;
+				break;
+			case SPA_AUDIO_FORMAT_F32:
+				impl->stride = 4 * impl->stream_info.info.raw.channels;
+				break;
+			default:
+				pw_log_error("unsupported raw audio format for encoding to Opus:%d (%s)",
+					impl->stream_info.info.raw.format,
+					spa_type_audio_format_to_short_name(impl->stream_info.info.raw.format));
+				res = -EINVAL;
+				goto out;
+			}
+			pw_log_info("configured Opus RTP payload: format: %s rate: %" PRIu32 " stride: %"
+				PRIu32, spa_type_audio_format_to_short_name(impl->stream_info.info.raw.format),
+				impl->rate, impl->stride);
+			break;
+		default:
+			pw_log_error("unsupported audio encoding:%d (%s)", audio_codec_type,
+				spa_type_to_short_name(audio_codec_type,
+					spa_type_media_subtype, "<unknown>"));
 			res = -EINVAL;
 			goto out;
 		}
-		impl->stride = impl->rtp_format_info->size * impl->stream_info.info.raw.channels;
-		impl->rate = impl->stream_info.info.raw.rate;
 		break;
 	case SPA_MEDIA_SUBTYPE_control:
 		impl->stream_info = impl->info;
@@ -741,21 +832,8 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core,
 		impl->rate = pw_properties_get_uint32(props, "midi.rate", 10000);
 		if (impl->rate == 0)
 			impl->rate = 10000;
-		break;
-	case SPA_MEDIA_SUBTYPE_opus:
-		impl->stream_info.media_type = SPA_MEDIA_TYPE_audio;
-		impl->stream_info.media_subtype = SPA_MEDIA_SUBTYPE_raw;
-		if ((res = parse_audio_info(props, &impl->stream_info.info.raw)) < 0) {
-			pw_log_error("can't parse format: %s", spa_strerror(res));
-			goto out;
-		}
-		impl->stream_info.info.raw.format = SPA_AUDIO_FORMAT_F32;
-		impl->info.info.opus.rate = impl->stream_info.info.raw.rate;
-		impl->info.info.opus.channels = impl->stream_info.info.raw.channels;
-
-		impl->rtp_format_info = &rtp_opus_format_info;
-		impl->stride = impl->rtp_format_info->size * impl->stream_info.info.raw.channels;
-		impl->rate = impl->stream_info.info.raw.rate;
+		pw_log_info("configured MIDI RTP payload: rate: %" PRIu32 " stride: %" PRIu32,
+			impl->rate, impl->stride);
 		break;
 	default:
 		spa_assert_not_reached();
@@ -845,6 +923,7 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core,
 	impl->payload_size = impl->mtu - impl->header_size;
 
 	impl->seq = pw_rand32();
+	impl->next_expected_incoming_seq = -1;
 
 	str = pw_properties_get(props, "sess.min-ptime");
 	if (!spa_atof(str, &min_ptime))
@@ -954,6 +1033,26 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core,
 	spa_dll_set_bw(&impl->dll, SPA_DLL_BW_MIN, 128, impl->rate);
 	impl->corr = 1.0;
 
+	switch (impl->info.media_subtype) {
+	case SPA_MEDIA_SUBTYPE_raw:
+		switch (audio_codec_type) {
+		case SPA_MEDIA_SUBTYPE_opus:
+			res = setup_rtp_audio_codec(impl, get_rtp_opus_codec(), props);
+			break;
+		default:
+			res = 0;
+			break;
+		}
+
+		if (SPA_UNLIKELY(res < 0))
+			goto out;
+
+		break;
+
+	default:
+		break;
+	}
+
 	impl->stream = pw_stream_new(core, "rtp-session", spa_steal_ptr(props));
 	if (impl->stream == NULL) {
 		res = -errno;
@@ -980,12 +1079,6 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core,
                                 SPA_FORMAT_mediaSubtype,        SPA_POD_Id(SPA_MEDIA_SUBTYPE_control));
 		rtp_midi_init(impl, direction);
 		break;
-	case SPA_MEDIA_SUBTYPE_opus:
-		params[n_params++] = spa_format_audio_build(&b,
-				SPA_PARAM_EnumFormat, &impl->stream_info);
-		flags |= PW_STREAM_FLAG_AUTOCONNECT;
-		rtp_opus_init(impl, direction);
-		break;
 	default:
 		res = -EINVAL;
 		goto out;
@@ -998,24 +1091,20 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core,
 		 * quantity in turn is subjected to constraint checks (see above), it is
 		 * possible that the _actual_ session latency no longer equals the value
 		 * of sess.latency.msec by the time this location is reached. To take into
-		 * account these constraint adjustments, convert back the impl->target_buffer
-		 * to nanoseconds, and use that as the process latency.
+		 * account these constraint adjustments, fill_latency_params() converts
+		 * back the impl->target_buffer to nanoseconds, and uses that as the
+		 * process latency.
 		 *
-		 * Then, just like how update_latency_params() does it, construct the
-		 * SPA_PARAM_Latency and SPA_PARAM_ProcessLatency params to let the new
+		 * Then, just like in update_latency_params(), the SPA_PARAM_Latency
+		 * and SPA_PARAM_ProcessLatency params are constructed to let the new
 		 * pw_stream know of these latency figures right from the start. */
 
-		struct spa_latency_info latency;
-
 		impl->process_latency.ns = (int64_t)(impl->target_buffer * 1e9 / impl->rate);
-		pw_log_debug("set process latency to %" PRId64 " based on sess.latency.msec "
-			"value %f", impl->process_latency.ns, latency_msec);
+		pw_log_debug("set process latency to %" PRId64 " ns based on sess.latency.msec "
+			"value %f ms (= %" PRIu32 " samples)", impl->process_latency.ns,
+			latency_msec, impl->target_buffer);
 
-		latency = SPA_LATENCY_INFO(impl->direction);
-		spa_process_latency_info_add(&(impl->process_latency), &latency);
-		params[n_params++] = spa_latency_build(&b, SPA_PARAM_Latency, &latency);
-		params[n_params++] = spa_process_latency_build(&b, SPA_PARAM_ProcessLatency,
-								&(impl->process_latency));
+		fill_latency_params(impl, &b, params, &n_params);
 	}
 
 	pw_stream_add_listener(impl->stream,
@@ -1040,6 +1129,7 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core,
 
 	return (struct rtp_stream*)impl;
 out:
+	teardown_rtp_audio_codec(impl);
 	pw_properties_free(props);
 	if (impl) {
 		if (impl->stream)
@@ -1058,6 +1148,8 @@ void rtp_stream_destroy(struct rtp_stream *s)
 
 	rtp_stream_emit_destroy(impl);
 
+	teardown_rtp_audio_codec(impl);
+
 	if (impl->deinit)
 		impl->deinit(impl, impl->direction);
 
@@ -1142,7 +1234,17 @@ unexpected_ssrc:
 int rtp_stream_resend_packets(struct rtp_stream *s, uint16_t seq, uint16_t num)
 {
 	struct impl *impl = (struct impl*)s;
-	if (impl->resend_packets)
+	/* Resending only works with raw data, since codecs usually
+	 * have internal state that is updated after encoding a
+	 * packet. This then means that resend attempts would not
+	 * yield the exact same packet, and this can corrupt the
+	 * state of the decoder in a receiver.
+	 * To support resending with codecs, the last N packets
+	 * have to be cached somehow. Given the fact that, thus
+	 * far, only the RAOP sink resends packets, and RAOP only
+	 * supports raw PCM, it is currently easier to just
+	 * disable retransmissions when a codec is in use. */
+	if (impl->resend_packets && (impl->audio_codec == NULL))
 		return impl->resend_packets(impl, seq, num);
 	else
 		return -ENOTSUP;
diff --git a/src/modules/module-rtp/stream.h b/src/modules/module-rtp/stream.h
index 37d041b06..0cf44eda7 100644
--- a/src/modules/module-rtp/stream.h
+++ b/src/modules/module-rtp/stream.h
@@ -11,7 +11,9 @@ extern "C" {
 
 struct rtp_stream;
 
-#define DEFAULT_FORMAT		"S16BE"
+#define DEFAULT_RAW_AUDIO_FORMAT  "S16BE"
+#define DEFAULT_RAOP_AUDIO_FORMAT "S16LE"
+#define DEFAULT_OPUS_AUDIO_FORMAT "F32"
 #define DEFAULT_RATE		48000
 #define DEFAULT_CHANNELS	2
 #define DEFAULT_POSITION	"[ FL FR ]"
diff --git a/test/meson.build b/test/meson.build
index a54e059b9..6982720dd 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -124,6 +124,17 @@ test('test-spa',
                link_with: pwtest_lib)
 )
 
+test('test-module-rtp-common-lib',
+    executable('test-module-rtp-common-lib',
+               'modules/module-rtp/test-jitter-buffer.c',
+               include_directories : [
+                 pwtest_inc,
+                 include_directories('../src/modules'),
+                 ],
+               dependencies: [ spa_dep, pipewire_module_rtp_common_dep ],
+               link_with: [pwtest_lib])
+)
+
 openal_info = find_program('openal-info', required: false)
 if openal_info.found()
     cdata.set_quoted('OPENAL_INFO_PATH', openal_info.full_path())
diff --git a/test/modules/module-rtp/test-jitter-buffer.c b/test/modules/module-rtp/test-jitter-buffer.c
new file mode 100644
index 000000000..ae4943643
--- /dev/null
+++ b/test/modules/module-rtp/test-jitter-buffer.c
@@ -0,0 +1,1259 @@
+/* PipeWire */
+/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */
+/* SPDX-License-Identifier: MIT */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <arpa/inet.h>
+
+#include "config.h"
+
+#include <pipewire/pipewire.h>
+#include <module-rtp/rtp.h>
+#include <module-rtp/jitter-buffer.h>
+
+#include "pwtest.h"
+
+PW_LOG_TOPIC(mod_topic, "test.rtp-jitter-buffer");
+#define PW_LOG_TOPIC_DEFAULT mod_topic
+
+enum test_event_type {
+	TEST_EVENT_OUTPUT_PACKET,
+	TEST_EVENT_LOST_PACKETS,
+};
+
+struct test_event {
+	enum test_event_type type;
+	union {
+		struct {
+			uint16_t seqnum;
+		} output;
+		struct {
+			uint16_t first_seqnum;
+			size_t count;
+			bool open_ended;
+		} lost;
+	};
+};
+
+#define MAX_TEST_EVENTS 256
+#define MAX_TEST_PACKET_SIZE 2048
+#define TEST_PACKET_SIZE 128
+#define TEST_HEADER_SIZE 16
+#define TEST_TIMESTAMP 123456
+#define TEST_PACKET_DURATION (10 * SPA_NSEC_PER_MSEC)
+
+struct test_context {
+	struct pw_loop *loop;
+	struct pw_main_loop *main_loop;
+	struct rtp_jitter_buffer jitter_buffer;
+
+	struct test_event events[MAX_TEST_EVENTS];
+	size_t num_events;
+
+	uint8_t packet_bytes[MAX_TEST_PACKET_SIZE];
+};
+
+static void send_packet(struct test_context *test_context, uint16_t seqnum)
+{
+	/* Create a simulated RTP packet. Only write the sequence number
+	 * into its header. The rest (SSRC, CSRC, payload type etc.) are
+	 * of no interest to the jitter buffer - it only cares about the
+	 * sequence number. */
+	struct rtp_header *header = (struct rtp_header *)(test_context->packet_bytes);
+	header->sequence_number = htons(seqnum);
+	int ret = rtp_jitter_buffer_insert_packet(&(test_context->jitter_buffer),
+		test_context->packet_bytes, (TEST_PACKET_SIZE), (TEST_HEADER_SIZE), (TEST_TIMESTAMP), seqnum);
+	assert(ret == 0);
+}
+
+static int test_output_rtp_packet(void *context, const uint8_t *packet_data, size_t packet_size,
+	size_t header_size, uint32_t timestamp, uint16_t seqnum)
+{
+	struct test_context *test_context = context;
+	struct rtp_header *header = (struct rtp_header *)packet_data;
+
+	assert(test_context->num_events < MAX_TEST_EVENTS);
+
+	/* Check that this function is not simply passed
+	 * the value of params.max_packet_size, and that
+	 * the other values (header size, timestamp)
+	 * are correct as well. */
+	pwtest_int_eq(packet_size, (size_t)(TEST_PACKET_SIZE));
+	pwtest_int_eq(header_size, (size_t)(TEST_HEADER_SIZE));
+	pwtest_int_eq(timestamp, (size_t)(TEST_TIMESTAMP));
+
+	/* Compare the seqnum that is given by the caller
+	 * with the seqnum in the RTP header to verify that
+	 * the packet data is correctly associated with the
+	 * information from the function arguments. */
+	pwtest_int_eq(seqnum, ntohs(header->sequence_number));
+
+	test_context->events[test_context->num_events].type = TEST_EVENT_OUTPUT_PACKET;
+	test_context->events[test_context->num_events].output.seqnum = seqnum;
+	pw_log_debug("Output RTP packet with seqnum %" PRIu16, test_context->events[test_context->num_events].output.seqnum);
+	test_context->num_events++;
+
+	return 0;
+}
+
+static int test_signal_lost_packets(void *context, uint16_t seq_of_first_lost_packet,
+	size_t num_lost_packets, bool open_ended)
+{
+	struct test_context *test_context = context;
+
+	assert(test_context->num_events < MAX_TEST_EVENTS);
+
+	test_context->events[test_context->num_events].type = TEST_EVENT_LOST_PACKETS;
+	test_context->events[test_context->num_events].lost.first_seqnum = seq_of_first_lost_packet;
+	test_context->events[test_context->num_events].lost.count = num_lost_packets;
+	test_context->events[test_context->num_events].lost.open_ended = open_ended;
+	test_context->num_events++;
+
+	return 0;
+}
+
+static void setup_test_context(struct test_context *test_context, size_t num_slots)
+{
+	struct rtp_jitter_buffer_params params;
+
+	assert(test_context != NULL);
+
+	spa_memzero(test_context, sizeof(struct test_context));
+
+	pw_init(0, NULL);
+
+	test_context->main_loop = pw_main_loop_new(NULL);
+	assert(test_context->main_loop != NULL);
+	test_context->loop = pw_main_loop_get_loop(test_context->main_loop);
+
+	memset(&params, 0, sizeof(params));
+	params.num_slots = num_slots;
+	/* Set the maximum packet size to a value higher than TEST_PACKET_SIZE
+	 * to be able to check in test_output_rtp_packet() that that function
+	 * does not simply get the max_packet_size value as the packet size,
+	 * but the _actual_ packet size. (Also see test_output_rtp_packet().) */
+	params.max_packet_size = MAX_TEST_PACKET_SIZE;
+	params.packet_duration = TEST_PACKET_DURATION;
+	params.loop = test_context->loop;
+	params.context = test_context;
+	params.output_rtp_packet = test_output_rtp_packet;
+	params.signal_lost_packets = test_signal_lost_packets;
+
+	int ret = rtp_jitter_buffer_init(&(test_context->jitter_buffer), &params);
+	assert(ret == 0);
+}
+
+static void teardown_test_context(struct test_context *test_context)
+{
+	assert(test_context != NULL);
+
+	rtp_jitter_buffer_shutdown(&(test_context->jitter_buffer));
+	if (test_context->main_loop != NULL)
+		pw_main_loop_destroy(test_context->main_loop);
+	pw_deinit();
+}
+
+#define SHIFT_TEST_EVENTS() \
+	do { \
+		memmove( \
+			&(test_context.events[0]), \
+			&(test_context.events[1]), \
+			(test_context.num_events - 1) * sizeof(struct test_event)); \
+		test_context.num_events--; \
+	} while (0)
+
+#define CHECK_LOST_PACKET_EVENT(FIRST_SEQNUM, COUNT, OPEN_ENDED) \
+	do { \
+		pwtest_int_ge(test_context.num_events, 1u); \
+		pwtest_int_eq((int)(test_context.events[0].type), TEST_EVENT_LOST_PACKETS); \
+		pwtest_int_eq(test_context.events[0].lost.first_seqnum, (FIRST_SEQNUM)); \
+		pwtest_int_eq(test_context.events[0].lost.count, (size_t)(COUNT)); \
+		pwtest_int_eq(test_context.events[0].lost.open_ended, (OPEN_ENDED)); \
+		SHIFT_TEST_EVENTS(); \
+	} while (0)
+
+#define CHECK_OUTPUT_PACKET_EVENT(SEQNUM) \
+	do { \
+		pwtest_int_ge(test_context.num_events, 1u); \
+		pwtest_int_eq((int)(test_context.events[0].type), TEST_EVENT_OUTPUT_PACKET); \
+		pwtest_int_eq(test_context.events[0].output.seqnum, (SEQNUM)); \
+		SHIFT_TEST_EVENTS(); \
+	} while (0)
+
+PWTEST(rtp_jitter_buffer_test_consecutive_packets)
+{
+	/* Simple test with packets that are passed to the jitter buffer
+	 * in order, with no gaps. Immediate output is expected, since
+	 * the jitter buffer will be in regular mode. */
+
+	struct test_context test_context;
+
+	setup_test_context(&test_context, 10);
+
+	/* Send packets 100, 101, 102, 103, 104 in order.
+	 * All 5 should be immediately output, and the
+	 * hold-back mode should remain disabled. */
+	for (uint16_t i = 0; i < 5; i++) {
+		uint16_t seqnum = 100 + i;
+		send_packet(&test_context, seqnum);
+		pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	}
+
+	pwtest_int_eq(test_context.num_events, 5u);
+	for (uint16_t i = 0; i < 5; i++) {
+		uint16_t seqnum = 100 + i;
+		CHECK_OUTPUT_PACKET_EVENT(seqnum);
+	}
+
+	teardown_test_context(&test_context);
+
+	return PWTEST_PASS;
+}
+
+PWTEST(rtp_jitter_buffer_test_simple_reordering)
+{
+	/* Check that simple out-of-order packet arrival is handled properly.
+	 * There should be no gaps signaled, and the packets should be output
+	 * in order. */
+
+	struct test_context test_context;
+
+	setup_test_context(&test_context, 10);
+
+	/* Send 100, 101 in order. */
+	send_packet(&test_context, 100);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	send_packet(&test_context, 101);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 2u);
+	CHECK_OUTPUT_PACKET_EVENT(100);
+	CHECK_OUTPUT_PACKET_EVENT(101);
+
+	/* Send 103. A gap at 102 is produced -> jitter buffer enables hold-back mode.
+	 * No output takes place just yet, since 103 is held back.
+	 * The valid seqnum window starts at 102 and ends at packet 103. */
+	send_packet(&test_context, 103);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 102u);
+	pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 2u);
+	pwtest_int_eq(test_context.num_events, 0u);
+
+	/* Send 102 to simulate out-of-order arrival. This fills the gap
+	 * at 102 (implying that it is not signaled), and should cause
+	 * 102 and 103 to be output (in order) and the hold-back mode
+	 * to be disabled again. */
+	send_packet(&test_context, 102);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 2u);
+	CHECK_OUTPUT_PACKET_EVENT(102);
+	CHECK_OUTPUT_PACKET_EVENT(103);
+
+	teardown_test_context(&test_context);
+
+	return PWTEST_PASS;
+}
+
+PWTEST(rtp_jitter_buffer_test_partial_output)
+{
+	/* Test that partial output is done correctly when some
+	 * gaps are filled. (Partial means that only part of the
+	 * held-back packets are output.) */
+
+	struct test_context test_context;
+
+	setup_test_context(&test_context, 10);
+
+	/* Establish regular mode with packet 400. */
+	send_packet(&test_context, 400);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 1u);
+	CHECK_OUTPUT_PACKET_EVENT(400);
+
+	/* Send in packet 402 to produce a gap at 401 and cause the
+	 * jitter buffer to enter hold-back mode. */
+	send_packet(&test_context, 402);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 401u);
+	pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 2u);
+	pwtest_int_eq(test_context.num_events, 0u);
+
+	/* Send in packets 404 and 405. This keeps the gap at 401, adds
+	 * a gap at 403, and keeps the jitter buffer in hold-back mode. */
+	send_packet(&test_context, 404);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	send_packet(&test_context, 405);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 0u);
+
+	/* Send in packet 401, which fills the gap at 401. This allows
+	 * the jitter buffer to output packets 401 and 402. But since
+	 * another gap exists at 403, hold-back mode remains enabled. */
+	send_packet(&test_context, 401);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 2u);
+	CHECK_OUTPUT_PACKET_EVENT(401);
+	CHECK_OUTPUT_PACKET_EVENT(402);
+
+	/* Send in packet 403, which fills the gap at 403. This allows
+	 * the jitter buffer to output packets 403, 404, 405. Those were
+	 * the remaining held-back packets, so hold-back mode should be
+	 * turned off now. */
+	send_packet(&test_context, 403);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 3u);
+	CHECK_OUTPUT_PACKET_EVENT(403);
+	CHECK_OUTPUT_PACKET_EVENT(404);
+	CHECK_OUTPUT_PACKET_EVENT(405);
+
+	/* Verify that regular mode is working properly by sending
+	 * in packet 406. */
+	send_packet(&test_context, 406);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 1u);
+	CHECK_OUTPUT_PACKET_EVENT(406);
+
+	teardown_test_context(&test_context);
+
+	return PWTEST_PASS;
+}
+
+PWTEST(rtp_jitter_buffer_test_explicit_drain_in_regular_mode)
+{
+	/* Test what happens when explicitly draining the jitter buffer
+	 * while in regular mode. Draining should be a no-op in this mode. */
+
+	struct test_context test_context;
+
+	setup_test_context(&test_context, 10);
+
+	/* Establish regular mode with packets 200 and 201. */
+	send_packet(&test_context, 200);
+	send_packet(&test_context, 201);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 2u);
+	CHECK_OUTPUT_PACKET_EVENT(200);
+	CHECK_OUTPUT_PACKET_EVENT(201);
+
+	/* Drain, and then check the outcome. Check that it was a no-op. */
+	int ret = rtp_jitter_buffer_drain(&(test_context.jitter_buffer));
+	assert(ret == 0);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 0u);
+	pwtest_int_eq(test_context.jitter_buffer.last_seqnum, 201);
+
+	teardown_test_context(&test_context);
+
+	return PWTEST_PASS;
+}
+
+PWTEST(rtp_jitter_buffer_test_explicit_drain_in_hold_back_mode)
+{
+	/* Test what happens when explicitly draining the jitter buffer
+	 * while in hold-back mode. Missing packets should be signaled
+	 * as lost packets by this. */
+
+	struct test_context test_context;
+
+	setup_test_context(&test_context, 10);
+
+	/* Establish regular mode with packet 200. */
+	send_packet(&test_context, 200);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 1u);
+	CHECK_OUTPUT_PACKET_EVENT(200);
+
+	/* Send in packets 202 and 205 to produce gap at 201, 203, 204
+	 * and cause the jitter buffer to enter hold-back mode. */
+	send_packet(&test_context, 202);
+	send_packet(&test_context, 205);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 0u);
+
+	/* Drain explicitly. This should output the following (in this order):
+	 *
+	 * - 1 lost packet, starting at seqnum 201, not open-ended
+	 * - 1 packet output with seqnum 202
+	 * - 2 lost packets, starting at seqnum 203, not open-ended
+	 * - 1 packet output with seqnum 205
+	 *
+	 * This should also set the jitter buffer back to regular mode.
+	 * The last_seqnum should be -1, since after explicit drain,
+	 * the jitter buffer has no idea what packets will come next.*/
+	int ret = rtp_jitter_buffer_drain(&(test_context.jitter_buffer));
+	assert(ret == 0);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 4u);
+	pwtest_int_eq(test_context.jitter_buffer.last_seqnum, -1);
+	CHECK_LOST_PACKET_EVENT(201, 1u, false);
+	CHECK_OUTPUT_PACKET_EVENT(202);
+	CHECK_LOST_PACKET_EVENT(203, 2u, false);
+	CHECK_OUTPUT_PACKET_EVENT(205);
+
+	/* Verify that regular mode is working properly by sending
+	 * in packet 700. Since after draining, the last_seqnum is
+	 * -1, a discontinuity in the sequence numbers is okay. */
+	send_packet(&test_context, 700);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 1u);
+	CHECK_OUTPUT_PACKET_EVENT(700);
+
+	teardown_test_context(&test_context);
+
+	return PWTEST_PASS;
+}
+
+PWTEST(rtp_jitter_buffer_test_explicit_drain_coalesced_loss)
+{
+	/* Test that a contiguous set of lost packets is coalesced
+	 * into one signal lost packet signal. */
+
+	struct test_context test_context;
+
+	setup_test_context(&test_context, 10);
+
+	/* Establish regular mode with packet 50. */
+	send_packet(&test_context, 50);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 1u);
+	CHECK_OUTPUT_PACKET_EVENT(50);
+
+	/* Send in packet 54 to produce gap at 51, 52, 53 and
+	 * cause the jitter buffer to enter hold-back mode. */
+	send_packet(&test_context, 54);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 0u);
+
+	/* Drain the jitter buffer. The packets 51, 52, 53 are
+	 * now considered lost, and should be reported as such. */
+	int ret = rtp_jitter_buffer_drain(&(test_context.jitter_buffer));
+	assert(ret == 0);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 2u);
+	pwtest_int_eq(test_context.jitter_buffer.last_seqnum, -1);
+	CHECK_LOST_PACKET_EVENT(51, 3u, false);
+	CHECK_OUTPUT_PACKET_EVENT(54);
+
+	teardown_test_context(&test_context);
+
+	return PWTEST_PASS;
+}
+
+PWTEST(rtp_jitter_buffer_test_explicit_drain_with_seqnum_wraparound)
+{
+	/* Test what happens when explicitly draining the jitter
+	 * buffer while in hold-back mode and with sequence numbers
+	 * wrapping around. Missing packets should be signaled as
+	 * lost packets by this. */
+
+	struct test_context test_context;
+
+	setup_test_context(&test_context, 10);
+
+	/* Establish regular mode with packet 65533. */
+	send_packet(&test_context, 65533);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 1u);
+	CHECK_OUTPUT_PACKET_EVENT(65533);
+
+	/* Send in packets 65535 and 2 to produce gap at 65534, 0, 1
+	 * and cause the jitter buffer to enter hold-back mode. */
+	send_packet(&test_context, 65535);
+	send_packet(&test_context, 2);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 0u);
+
+	/* Drain explicitly. This should output the following (in this order):
+	 *
+	 * - 1 lost packet, starting at seqnum 65534, not open-ended
+	 * - 1 packet output with seqnum 65535
+	 * - 2 lost packets, starting at seqnum 0, not open-ended
+	 * - 1 packet output with seqnum 2
+	 *
+	 * This should also set the jitter buffer back to regular mode.
+	 * The last_seqnum should be -1, since after explicit drain,
+	 * the jitter buffer has no idea what packets will come next.*/
+	int ret = rtp_jitter_buffer_drain(&(test_context.jitter_buffer));
+	assert(ret == 0);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 4u);
+	pwtest_int_eq(test_context.jitter_buffer.last_seqnum, -1);
+	CHECK_LOST_PACKET_EVENT(65534, 1u, false);
+	CHECK_OUTPUT_PACKET_EVENT(65535);
+	CHECK_LOST_PACKET_EVENT(0, 2u, false);
+	CHECK_OUTPUT_PACKET_EVENT(2);
+
+	/* Verify that regular mode is working properly by sending
+	 * in packet 700. Since after draining, the last_seqnum is
+	 * -1, a discontinuity in the sequence numbers is okay. */
+	send_packet(&test_context, 700);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 1u);
+	CHECK_OUTPUT_PACKET_EVENT(700);
+
+	teardown_test_context(&test_context);
+
+	return PWTEST_PASS;
+}
+
+PWTEST(rtp_jitter_buffer_test_stale_packets_in_regular_mode)
+{
+	/* Test what happens when stale and old packets are sent into
+	 * the jitter buffer in regular mode. They should be dropped
+	 * without influencing the behavior of the jitter buffer. */
+
+	struct test_context test_context;
+
+	setup_test_context(&test_context, 10);
+
+	/* Establish regular mode with packets 100 and 101. */
+	send_packet(&test_context, 100);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	send_packet(&test_context, 101);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 2u);
+	CHECK_OUTPUT_PACKET_EVENT(100);
+	CHECK_OUTPUT_PACKET_EVENT(101);
+
+	/* Send in packet 101. Since a packet 101 was already seen,
+	 * this is a stale packet, and needs to be dropped. */
+	send_packet(&test_context, 101);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 0u);
+
+	/* Send in packet 99. Since packets 100 and 101 were already seen,
+	 * this is an old packet, and needs to be dropped. */
+	send_packet(&test_context, 99);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 0u);
+
+	/* Verify that regular mode is working properly by sending
+	 * in packet 102. */
+	send_packet(&test_context, 102);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 1u);
+	CHECK_OUTPUT_PACKET_EVENT(102);
+
+	teardown_test_context(&test_context);
+
+	return PWTEST_PASS;
+}
+
+PWTEST(rtp_jitter_buffer_test_stale_packets_in_hold_back_mode)
+{
+	/* Test what happens when stale and old packets are sent into
+	 * the jitter buffer in hold-back mode. They should be dropped
+	 * without influencing the behavior of the jitter buffer. */
+
+	struct test_context test_context;
+
+	setup_test_context(&test_context, 10);
+
+	/* Establish hold-back mode with packets 300 and 302.
+	 * Hold-back mode gets active because of the gap at 301. */
+	send_packet(&test_context, 300);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	send_packet(&test_context, 302);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 1u);
+	CHECK_OUTPUT_PACKET_EVENT(300);
+
+	/* Send in packet 299. Since packets 300 and 302 were already seen,
+	 * this is an old packet, and needs to be dropped. */
+	send_packet(&test_context, 299);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 0u);
+
+	/* Send in packet 300. Since a packet 300 was already seen,
+	 * this is a stale packet, and needs to be dropped. */
+	send_packet(&test_context, 300);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 0u);
+
+	/* Send in packet 302. This is another stale packet. The
+	 * difference to the packet 300 check above is that the
+	 * packet 302 that was previously observed is held back,
+	 * and was not output thus far. */
+	send_packet(&test_context, 302);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 0u);
+
+	/* Send in packet 301 to test that switching back
+	 * to regular mode still works properly. */
+	send_packet(&test_context, 301);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 2u);
+	CHECK_OUTPUT_PACKET_EVENT(301);
+	CHECK_OUTPUT_PACKET_EVENT(302);
+
+	teardown_test_context(&test_context);
+
+	return PWTEST_PASS;
+}
+
+PWTEST(rtp_jitter_buffer_test_flush)
+{
+	/* Test the flush functionality. This should discard any held-back
+	 * packets, without emitting them, and the jitter buffer should
+	 * be back in regular mode afterwards. */
+
+	struct test_context test_context;
+
+	setup_test_context(&test_context, 10);
+
+	/* Establish hold-back mode with packets 500 and 502.
+	 * Hold-back mode gets active because of the gap at 501. */
+	send_packet(&test_context, 500);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	send_packet(&test_context, 502);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 1u);
+	CHECK_OUTPUT_PACKET_EVENT(500);
+
+	rtp_jitter_buffer_flush(&(test_context.jitter_buffer));
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 0u);
+	pwtest_int_eq(test_context.jitter_buffer.last_seqnum, -1);
+
+	/* Verify that regular mode is working properly by sending
+	 * in packet 700. Since after flushing, the last_seqnum is
+	 * -1, a discontinuity in the sequence numbers is okay. */
+	send_packet(&test_context, 700);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 1u);
+	CHECK_OUTPUT_PACKET_EVENT(700);
+
+	teardown_test_context(&test_context);
+
+	return PWTEST_PASS;
+}
+
+PWTEST(rtp_jitter_buffer_test_seqnum_wraparound_regular)
+{
+	/* Check that in regular mode, output of in-sequence packets
+	 * works properly even when a sequence number wrap-around occurs. */
+
+	struct test_context test_context;
+
+	setup_test_context(&test_context, 10);
+
+	send_packet(&test_context, 65534);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	send_packet(&test_context, 65535);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	send_packet(&test_context, 0);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	send_packet(&test_context, 1);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 4u);
+	CHECK_OUTPUT_PACKET_EVENT(65534);
+	CHECK_OUTPUT_PACKET_EVENT(65535);
+	CHECK_OUTPUT_PACKET_EVENT(0);
+	CHECK_OUTPUT_PACKET_EVENT(1);
+
+	teardown_test_context(&test_context);
+
+	return PWTEST_PASS;
+}
+
+PWTEST(rtp_jitter_buffer_test_seqnum_wraparound_with_reordering)
+{
+	/* Check that in hold-back mode, output of in-sequence packets
+	 * works properly even when a sequence number wrap-around occurs. */
+
+	struct test_context test_context;
+
+	setup_test_context(&test_context, 10);
+
+	/* Send packets 65534 and 65535 in order. */
+	send_packet(&test_context, 65534);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	send_packet(&test_context, 65535);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 2u);
+	CHECK_OUTPUT_PACKET_EVENT(65534);
+	CHECK_OUTPUT_PACKET_EVENT(65535);
+
+	/* Send in packet 1, causing a gap at 0. */
+	send_packet(&test_context, 1);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 0u);
+
+	/* Fill the gap by sending in packet 0, then check that
+	 * packets 0 and 1 were now output in order. */
+	send_packet(&test_context, 0);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 2u);
+	CHECK_OUTPUT_PACKET_EVENT(0);
+	CHECK_OUTPUT_PACKET_EVENT(1);
+
+	teardown_test_context(&test_context);
+
+	return PWTEST_PASS;
+}
+
+PWTEST(rtp_jitter_buffer_test_overextension_single_gap_no_end_gap)
+{
+	/* Check what happens when hold-back mode is active, the
+	 * valid seqnum window's maximum length is reached, and then,
+	 * a packet with a sequence number that is one past the window
+	 * range is added. This new packet would overextend the window,
+	 * so the window is shifted forwards. However, it is only
+	 * overextended by 1, so only the oldest slot in the window
+	 * needs to be drained. In this case, that oldest slot contains
+	 * the gap at the very beginning of the window. Also, since
+	 * aside from that gap, there are no other ones, and the new
+	 * packet (the one that overextends the window) comes directly
+	 * after the last packet in the valid seqnum window, the
+	 * jitter buffer will have no gaps left to take care of, so
+	 * all held back packets can be output.
+	 *
+	 * This simulates cases where one packet is lost among
+	 * a string of packets that all arrive in order. */
+
+	struct test_context test_context;
+
+	setup_test_context(&test_context, 10);
+
+	/* Produce a sequence of packets with a gap in them. Start at 100,
+	 * skip 101, then go all the way to 110.
+	 *
+	 * First, packet 100 will immediately be output. Then, packet 102
+	 * will enable hold-back mode (due to the gap at 101). The valid
+	 * seqnum window then starts at 101, and extends all the way to 110.
+	 * 110-101+1 = 10, which equals the max num packets of the jitter
+	 * buffer here. In other words, after this, the jitter buffer valid
+	 * range is as large as it can maximally be. */
+	send_packet(&test_context, 100);
+	for (uint16_t i = 102; i <= 110; i++) {
+		send_packet(&test_context, i);
+		pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	}
+	pwtest_int_eq(test_context.num_events, 1u);
+	pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 10u);
+	CHECK_OUTPUT_PACKET_EVENT(100);
+
+	/* Now insert packet 111. This would overextend the window, so the
+	 * jitter buffer has to shift the window and drain the oldest slots
+	 * that are no longer part of the shifted window. Since packet 111
+	 * would overextend the window by 1, it means that the one oldest
+	 * slot is drained. That oldest slot actually is the gap at 101.
+	 * Since that gap was drained (resulting in a packet loss signal
+	 * at seqnum 101 of length 1), only packets remain in the valid
+	 * seqnum window, no gaps anymore, so the jitter buffer immediately
+	 * outputs all of them, in order. */
+	send_packet(&test_context, 111);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 11u);
+	CHECK_LOST_PACKET_EVENT(101, 1u, false);
+	for (uint16_t i = 102; i <= 111; i++)
+		CHECK_OUTPUT_PACKET_EVENT(i);
+
+	teardown_test_context(&test_context);
+
+	return PWTEST_PASS;
+}
+
+PWTEST(rtp_jitter_buffer_test_overextension_multiple_gaps_no_end_gap)
+{
+	/* Check what happens when hold-back mode is active, the
+	 * valid seqnum window's maximum length is reached, and then,
+	 * a packet with a sequence number that is one past the window
+	 * range is added. This new packet would overextend the window,
+	 * so the window is shifted forwards. However, it is only
+	 * overextended by 1, so only the oldest slot in the window
+	 * needs to be drained. In this case, that oldest slot contains
+	 * the gap at the very beginning of the window. Since there
+	 * are more gaps present, the hold-back mode is not left.
+	 *
+	 * This simulates cases where more than one packet is lost
+	 * among a string of packets that all arrive in order. */
+
+	struct test_context test_context;
+
+	setup_test_context(&test_context, 10);
+
+	/* Produce a sequence of packets with a gap in them. Start at 100,
+	 * skip 101 and 102, and go all the way to 110.
+	 *
+	 * In the hold-back mode that results from this, the valid range
+	 * then starts at 101, and extends all the way to 110. 110-101+1 = 10,
+	 * which equals the max num packets of the jitter buffer here. In
+	 * other words, after this, the jitter buffer valid range is as large
+	 * as it can maximally be. */
+	send_packet(&test_context, 100);
+	send_packet(&test_context, 103);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	for (uint16_t i = 105; i <= 110; i++) {
+		send_packet(&test_context, i);
+		pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	}
+	pwtest_int_eq(test_context.num_events, 1u);
+	pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 10u);
+	CHECK_OUTPUT_PACKET_EVENT(100);
+
+	/* Now insert packet 111. This would overextend the window, so the
+	 * jitter buffer has to shift the window and drain the oldest slots
+	 * that are no longer part of the shifted window. Since packet 111
+	 * would overextend the window by 1, it means that the one oldest
+	 * slot is drained. But, at 102, there is also gap, and 102 is now
+	 * the new start of the valid seqnum window, so the jitter buffer
+	 * cannot output any packets yet. */
+	send_packet(&test_context, 111);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 1u);
+	pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 102u);
+	pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 10u);
+	CHECK_LOST_PACKET_EVENT(101, 1u, false);
+
+	/* To see that the behavior remains as expected, fill the gap at 102.
+	 * Since 102 is the very beginning of the valid seqnum window, and there
+	 * is a packet at 103, the jitter buffer can now output 102 and 103.
+	 * Also, the valid seqnum window shrinks accordingly by 2, its length
+	 * becoming 8 and its start seqnum becoming 104. */
+	send_packet(&test_context, 102);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 2u);
+	pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 104u);
+	pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 8u);
+	CHECK_OUTPUT_PACKET_EVENT(102);
+	CHECK_OUTPUT_PACKET_EVENT(103);
+
+	/* Finally, send in packet 104. By now, 104 is the start of the valid
+	 * packet window, and a gap is there. Since this is the last gap in
+	 * the jitter buffer, once it is filled, all packets can be output. */
+	send_packet(&test_context, 104);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 8u);
+	for (uint16_t i = 104; i <= 111; i++)
+		CHECK_OUTPUT_PACKET_EVENT(i);
+
+	teardown_test_context(&test_context);
+
+	return PWTEST_PASS;
+}
+
+PWTEST(rtp_jitter_buffer_test_overextension_after_partial_output)
+{
+	/* Check what happens when first, in hold-back mode, a partial
+	 * drain happens, and then, the valid seqnum window is overextended. */
+
+	struct test_context test_context;
+
+	setup_test_context(&test_context, 5);
+
+	/* Add a packet 100, which is output immediately, since the
+	 * jitter buffer is in regular mode. */
+	send_packet(&test_context, 100);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 1u);
+	CHECK_OUTPUT_PACKET_EVENT(100);
+
+	/* Now add packet 102. Since there is a gap at 101, hold-back
+	 * mode is enabled. The valid seqnum window starts at 101,
+	 * and is of length 2. */
+	send_packet(&test_context, 102);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 101u);
+	pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 2u);
+	pwtest_int_eq(test_context.num_events, 0u);
+
+	/* Packets 103 to 105 are inserted. This fills the window to
+	 * capacity, since now, it has been extended, and goes from
+	 * 101 to 105. That is, it starts at 101, and is of length 5
+	 * which equals the jitter buffer capacity). */
+	send_packet(&test_context, 103);
+	send_packet(&test_context, 104);
+	send_packet(&test_context, 105);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 101u);
+	pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 5u);
+	pwtest_int_eq(test_context.num_events, 0u);
+
+	/* Now add packet 101. This fills the gap. All 5 packets
+	 * can be output, and the jitter buffer returns to the regular mode. */
+	send_packet(&test_context, 101);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 5u);
+	CHECK_OUTPUT_PACKET_EVENT(101);
+	CHECK_OUTPUT_PACKET_EVENT(102);
+	CHECK_OUTPUT_PACKET_EVENT(103);
+	CHECK_OUTPUT_PACKET_EVENT(104);
+	CHECK_OUTPUT_PACKET_EVENT(105);
+
+	/* Re-enter the hold-back mode by adding packet 107 and
+	 * intentionally leaving out packet 106. The valid seqnum
+	 * window now starts at 106, and is of length 2. */
+	send_packet(&test_context, 107);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 106u);
+	pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 2u);
+	pwtest_int_eq(test_context.num_events, 0u);
+
+	/* Packets 108 to 110 are inserted. This fills the window to
+	 * capacity, since now, it has been extended, and goes from
+	 * 106 to 110. That is, it starts at 106, and is of length 5
+	 * which equals the jitter buffer capacity). */
+	send_packet(&test_context, 108);
+	send_packet(&test_context, 109);
+	send_packet(&test_context, 110);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 106u);
+	pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 5u);
+	pwtest_int_eq(test_context.num_events, 0u);
+
+	/* Packet 111 is added. This overextends the window, since it would
+	 * now go from 106 to 111. That is a length of 111-106+1 = 6, which
+	 * is beyond the capacity (5).
+	 *
+	 * The overextension is still low enough that most of the window
+	 * contents can be reused. In fact, only the oldest slot (the one
+	 * containing the gap at 106) needs to be drained by signaling it
+	 * as a packet 106 loss.
+	 *
+	 * Once packet 106 is signaled as lost, and the corresponding slot
+	 * is drained, the leftovers are all packets, no gaps, so all packets
+	 * from 107 to 111 are output.
+	 *
+	 * By combining this with multiple partial drains above, it is verified
+	 * that valid_seqnum_window_start_seqnum updates (which happen during
+	 * partial drains) do not break the overextension handling. */
+	send_packet(&test_context, 111);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 6u);
+	CHECK_LOST_PACKET_EVENT(106, 1u, false);
+	CHECK_OUTPUT_PACKET_EVENT(107);
+	CHECK_OUTPUT_PACKET_EVENT(108);
+	CHECK_OUTPUT_PACKET_EVENT(109);
+	CHECK_OUTPUT_PACKET_EVENT(110);
+	CHECK_OUTPUT_PACKET_EVENT(111);
+
+	/* Verify regular mode recovery. */
+	send_packet(&test_context, 112);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 1u);
+	CHECK_OUTPUT_PACKET_EVENT(112);
+
+	teardown_test_context(&test_context);
+
+	return PWTEST_PASS;
+}
+
+PWTEST(rtp_jitter_buffer_test_immediate_overextension_after_regular_mode)
+{
+	/* Check what happens when a gap causes the jitter buffer to switch
+	 * to the hold-back mode, but that gap is so large that it immediately
+	 * overextends the valid seqnum window. The jitter buffer should
+	 * instantly recognize the immediate overextension aqnd signal an open
+	 * ended packet loss event. It does not stay in the hold-back mode,
+	 * since there is nothing to hold back in that case. */
+
+	struct test_context test_context;
+
+	setup_test_context(&test_context, 10);
+
+	/* Send 100, 101 in order. */
+	send_packet(&test_context, 100);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	send_packet(&test_context, 101);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 2u);
+	CHECK_OUTPUT_PACKET_EVENT(100);
+	CHECK_OUTPUT_PACKET_EVENT(101);
+
+	/* Send 200. A massive gap of far more than 10 packets is produced
+	 * -> jitter buffer signals an open ended gap, but stays in regular mode. */
+	send_packet(&test_context, 200);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 2u);
+	CHECK_LOST_PACKET_EVENT(102, 10u, true);
+	CHECK_OUTPUT_PACKET_EVENT(200);
+
+	teardown_test_context(&test_context);
+
+	return PWTEST_PASS;
+}
+
+PWTEST(rtp_jitter_buffer_test_immediate_overextension_after_regular_mode_threshold_open_closed_gap)
+{
+	/* This is similar to rtp_jitter_buffer_test_immediate_overextension_after_regular_mode,
+	 * but checks for a corner case. That is: If the gap length equals
+	 * the number of slots, then the gap should not be reported as open.
+	 *
+	 * Test this by producing such a gap. Then further verify by repeating
+	 * the test, but by a gap that is 1 packet larger than the number of
+	 * slots. The first round should report a closed gap of a size equal
+	 * to the number of slot. The second round should report an open gap. */
+
+	/* First round. */
+	{
+		struct test_context test_context;
+
+		setup_test_context(&test_context, 10);
+
+		/* Send 10, 11 in order. */
+		send_packet(&test_context, 10);
+		pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+		send_packet(&test_context, 11);
+		pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+		pwtest_int_eq(test_context.num_events, 2u);
+		CHECK_OUTPUT_PACKET_EVENT(10);
+		CHECK_OUTPUT_PACKET_EVENT(11);
+
+		/* Send 22. A gap of exactly 10 packets (= the number of slots)
+		 * is produced -> jitter buffer signals a closed gap of size
+		 * equal to the number of slots. */
+		send_packet(&test_context, 22);
+		pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+		pwtest_int_eq(test_context.num_events, 2u);
+		CHECK_LOST_PACKET_EVENT(12, 10u, false);
+		CHECK_OUTPUT_PACKET_EVENT(22);
+
+		teardown_test_context(&test_context);
+	}
+
+	/* Second round. */
+	{
+		struct test_context test_context;
+
+		setup_test_context(&test_context, 10);
+
+		/* Send 10, 11 in order. */
+		send_packet(&test_context, 10);
+		pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+		send_packet(&test_context, 11);
+		pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+		pwtest_int_eq(test_context.num_events, 2u);
+		CHECK_OUTPUT_PACKET_EVENT(10);
+		CHECK_OUTPUT_PACKET_EVENT(11);
+
+		/* Send 23. A gap of exactly 11 packets (= 1 past the number
+		 * of slots) is produced -> jitter buffer signals an open
+		 * ended gap of size equal to the number of slots. */
+		send_packet(&test_context, 23);
+		pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+		pwtest_int_eq(test_context.num_events, 2u);
+		CHECK_LOST_PACKET_EVENT(12, 10u, true);
+		CHECK_OUTPUT_PACKET_EVENT(23);
+
+		teardown_test_context(&test_context);
+	}
+
+	return PWTEST_PASS;
+}
+
+PWTEST(rtp_jitter_buffer_test_full_window_invalidation_non_open_ended_gap)
+{
+	/* Check what happens when hold-back mode is active, the
+	 * valid seqnum window's maximum length is reached, and then,
+	 * a packet with a sequence number that is far enough to
+	 * overextend the window past its current length. This means
+	 * that the shifting method (verified in earlier tests above)
+	 * won't work - the window is shifted completely past its
+	 * current range, so none of those slots remain valid,
+	 * and must all be drained. Furthermore, it means that between
+	 * the last seqnum of the old window and the first seqnum of
+	 * the new window, there is a gap. The jitter buffer is expected
+	 * to do the following:
+	 *
+	 * 1. Drain the entire current valid seqnum window
+	 * 2. Reset the window to only contain the seqnum of the new packet
+	 * 3. Signal the gap between the old and the new window
+	 *
+	 * Here, the window is shifted far enough that none of the
+	 * original content can be retained, but not so far that
+	 * the gap between the old and new windows becomes too large
+	 * to fully cover via PLC. As a result, that gap is signaled
+	 * as packet loss, but as a non-open-ended one. */
+
+	struct test_context test_context;
+
+	setup_test_context(&test_context, 10);
+
+	/* Establish hold-back mode with packets 10 and 12.
+	 * Hold-back mode gets active because of the gap at 11. */
+	send_packet(&test_context, 10);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	send_packet(&test_context, 12);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 1u);
+	CHECK_OUTPUT_PACKET_EVENT(10);
+
+	/* Send in packet 22. This would overextend the window. Shifting
+	 * the current window moves it past packet 12, so the jitter
+	 * buffer must be fully drained. Since afterwards, there is
+	 * nothing left in the jitter buffer other than the new packet,
+	 * the valid seqnum window length becomes 1, and starts at 22.
+	 * This means that there are no gaps left, so the contents
+	 * (in this case, just the packet 22) can be output immediately.
+	 * Also, the gap between the old window and the new window goes
+	 * from seqnum 13 (one past the end of the old window) to seqnum
+	 * 21 (one before the new packet 22). 21-13+1 = 9, which is
+	 * less than the jitter buffer capacity (which is 10), so that
+	 * gap is announced as non-open-ended packet loss. */
+	send_packet(&test_context, 22);
+	pwtest_int_eq(test_context.num_events, 4u);
+	CHECK_LOST_PACKET_EVENT(11, 1u, false);
+	CHECK_OUTPUT_PACKET_EVENT(12);
+	CHECK_LOST_PACKET_EVENT(13, 9u, false);
+	CHECK_OUTPUT_PACKET_EVENT(22);
+
+	teardown_test_context(&test_context);
+
+	return PWTEST_PASS;
+}
+
+PWTEST(rtp_jitter_buffer_test_full_window_invalidation_open_ended_gap)
+{
+	/* Check what happens when hold-back mode is active, the
+	 * valid seqnum window's maximum length is reached, and then,
+	 * a packet with a sequence number that is far enough to
+	 * overextend the window past its current length. This means
+	 * that the shifting method (verified in earlier tests above)
+	 * won't work - the window is shifted completely past its
+	 * current range, so none of those slots remain valid,
+	 * and must all be drained. Furthermore, it means that between
+	 * the last seqnum of the old window and the first seqnum of
+	 * the new window, there is a gap. The jitter buffer is expected
+	 * to do the following:
+	 *
+	 * 1. Drain the entire current valid seqnum window
+	 * 2. Reset the window to only contain the seqnum of the new packet
+	 * 3. Signal the gap between the old and the new window
+	 *
+	 * Here, the window is shifted far enough that none of the
+	 * original content can be retained, and that that the gap
+	 * between the old and new windows becomes too large
+	 * to fully cover via PLC. As a result, that gap is signaled
+	 * as packet loss, but as an open-ended one. */
+
+	struct test_context test_context;
+
+	setup_test_context(&test_context, 10);
+
+	/* Establish hold-back mode with packets 10 and 12.
+	 * Hold-back mode gets active because of the gap at 11. */
+	send_packet(&test_context, 10);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	send_packet(&test_context, 12);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 1u);
+	CHECK_OUTPUT_PACKET_EVENT(10);
+
+	/* Send in packet 400. This would overextend the window. Shifting
+	 * the current window moves it past packet 12, so the jitter
+	 * buffer must be fully drained. Since afterwards, there is
+	 * nothing left in the jitter buffer other than the new packet,
+	 * the valid seqnum window length becomes 1, and starts at 400.
+	 * This means that there are no gaps left, so the contents
+	 * (in this case, just the packet 400) can be output immediately.
+	 * Also, the gap between the old window and the new window goes
+	 * from seqnum 13 (one past the end of the old window) to seqnum
+	 * 399 (one before the new packet 400). 399-13+1 = 387, which is
+	 * far beyond the jitter buffer capacity (which is 10). That gap
+	 * is then signaled as an open ended packet loss with maximum
+	 * length 10, meaning that any PLC/fadeout measure must not
+	 * exceed the length of 10 packets. (In non-open-ended signals,
+	 * the length instead specifies the exact length of the gap.)
+	 * This is done to avoid excessive PLC/fadeout calculations,
+	 * like in this case, where it otherwise would force PLC for
+	 * 387 packets. Callers are encouraged to apply fadeout as well
+	 * to not have a hard cutoff after the maximum (10 packets here).*/
+	send_packet(&test_context, 400);
+	pwtest_int_eq(test_context.num_events, 4u);
+	CHECK_LOST_PACKET_EVENT(11, 1u, false);
+	CHECK_OUTPUT_PACKET_EVENT(12);
+	CHECK_LOST_PACKET_EVENT(13, 10u, true);
+	CHECK_OUTPUT_PACKET_EVENT(400);
+
+	teardown_test_context(&test_context);
+
+	return PWTEST_PASS;
+}
+
+PWTEST(rtp_jitter_buffer_test_timeout_drain)
+{
+	/* Check what happens when hold-back mode is enabled and
+	 * the gaps are not filled in time. It is expected that the
+	 * jitter buffer's timeout expires and forcibly drains
+	 * its contents. */
+
+	struct test_context test_context;
+	struct timespec ts;
+
+	setup_test_context(&test_context, 10);
+
+	/* Establish hold-back mode with packets 60 and 62.
+	 * Hold-back mode gets active because of the gap at 61. */
+	send_packet(&test_context, 60);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	send_packet(&test_context, 62);
+	pwtest_bool_true(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 1u);
+	CHECK_OUTPUT_PACKET_EVENT(60);
+
+	/* The jitter buffer's timeout timer is configured to expire
+	 * when the total duration of its capacity passes after the
+	 * hold-back mode was enabled. In this test, capacity is 10
+	 * packets, and each packet covers 10ms, then the total duration
+	 * is 10*10ms = 100 ms, and that will also be the timeout of
+	 * that timer, and the gap that was detected earlier will have
+	 * armed that timer. Sleep for 50ms longer than its timeout
+	 * duration to make sure it expires and thus provokes the
+	 * draining of the jitter buffer. */
+	ts.tv_sec = 0;
+	ts.tv_nsec = 10 * TEST_PACKET_DURATION + 50 * SPA_NSEC_PER_MSEC;
+	nanosleep(&ts, NULL);
+
+	/* Iterate the loop to process the timer expiration. */
+	pw_loop_enter(test_context.loop);
+	pw_loop_iterate(test_context.loop, 0);
+	pw_loop_leave(test_context.loop);
+
+	/* After draining, the jitter buffer should be back to regular
+	 * mode, just as if rtp_jitter_buffer_drain() had been called. */
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 2u);
+	pwtest_int_eq(test_context.jitter_buffer.last_seqnum, -1);
+	CHECK_LOST_PACKET_EVENT(61, 1u, false);
+	CHECK_OUTPUT_PACKET_EVENT(62);
+
+	/* Verify that regular mode is working properly by sending
+	 * in packet 700. Since after draining, the last_seqnum is
+	 * -1, a discontinuity in the sequence numbers is okay. */
+	send_packet(&test_context, 700);
+	pwtest_bool_false(test_context.jitter_buffer.hold_back_mode);
+	pwtest_int_eq(test_context.num_events, 1u);
+	CHECK_OUTPUT_PACKET_EVENT(700);
+
+	teardown_test_context(&test_context);
+
+	return PWTEST_PASS;
+}
+
+PWTEST_SUITE(pw_module_rtp_common_lib)
+{
+	pwtest_add(rtp_jitter_buffer_test_consecutive_packets, PWTEST_NOARG);
+	pwtest_add(rtp_jitter_buffer_test_simple_reordering, PWTEST_NOARG);
+	pwtest_add(rtp_jitter_buffer_test_partial_output, PWTEST_NOARG);
+	pwtest_add(rtp_jitter_buffer_test_explicit_drain_in_regular_mode, PWTEST_NOARG);
+	pwtest_add(rtp_jitter_buffer_test_explicit_drain_in_hold_back_mode, PWTEST_NOARG);
+	pwtest_add(rtp_jitter_buffer_test_explicit_drain_coalesced_loss, PWTEST_NOARG);
+	pwtest_add(rtp_jitter_buffer_test_explicit_drain_with_seqnum_wraparound, PWTEST_NOARG);
+	pwtest_add(rtp_jitter_buffer_test_stale_packets_in_regular_mode, PWTEST_NOARG);
+	pwtest_add(rtp_jitter_buffer_test_stale_packets_in_hold_back_mode, PWTEST_NOARG);
+	pwtest_add(rtp_jitter_buffer_test_flush, PWTEST_NOARG);
+	pwtest_add(rtp_jitter_buffer_test_seqnum_wraparound_regular, PWTEST_NOARG);
+	pwtest_add(rtp_jitter_buffer_test_seqnum_wraparound_with_reordering, PWTEST_NOARG);
+	pwtest_add(rtp_jitter_buffer_test_overextension_single_gap_no_end_gap, PWTEST_NOARG);
+	pwtest_add(rtp_jitter_buffer_test_overextension_multiple_gaps_no_end_gap, PWTEST_NOARG);
+	pwtest_add(rtp_jitter_buffer_test_overextension_after_partial_output, PWTEST_NOARG);
+	pwtest_add(rtp_jitter_buffer_test_immediate_overextension_after_regular_mode, PWTEST_NOARG);
+	pwtest_add(rtp_jitter_buffer_test_full_window_invalidation_non_open_ended_gap, PWTEST_NOARG);
+	pwtest_add(rtp_jitter_buffer_test_immediate_overextension_after_regular_mode_threshold_open_closed_gap, PWTEST_NOARG);
+	pwtest_add(rtp_jitter_buffer_test_full_window_invalidation_open_ended_gap, PWTEST_NOARG);
+	pwtest_add(rtp_jitter_buffer_test_timeout_drain, PWTEST_NOARG);
+
+	return PWTEST_PASS;
+}