From 11775850e5875536caf84be214f1807933f640d8 Mon Sep 17 00:00:00 2001 From: Carlos Rafael Giani Date: Fri, 19 Jun 2026 19:18:33 +0200 Subject: [PATCH 1/5] module-rtp: Cleanup default raw / raop formats This places the default formats into a single place, which makes it easier to keep track of them. --- src/modules/module-raop-sink.c | 2 +- src/modules/module-rtp-session.c | 4 ++-- src/modules/module-rtp-sink.c | 2 +- src/modules/module-rtp-source.c | 2 +- src/modules/module-rtp/stream.c | 2 +- src/modules/module-rtp/stream.h | 3 ++- 6 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/modules/module-raop-sink.c b/src/modules/module-raop-sink.c index af84ba3eb..ce8bdb2ce 100644 --- a/src/modules/module-raop-sink.c +++ b/src/modules/module-raop-sink.c @@ -149,7 +149,7 @@ PW_LOG_TOPIC(mod_topic, "mod." NAME); #define MAX_PORT_RETRY 128 -#define RAOP_FORMAT "S16LE" +#define RAOP_FORMAT DEFAULT_RAOP_AUDIO_FORMAT #define RAOP_STRIDE (2*DEFAULT_CHANNELS) #define RAOP_RATE 44100 #define RAOP_LATENCY_MS 250 diff --git a/src/modules/module-rtp-session.c b/src/modules/module-rtp-session.c index 4374f64b5..525ab75bc 100644 --- a/src/modules/module-rtp-session.c +++ b/src/modules/module-rtp-session.c @@ -140,7 +140,7 @@ PW_LOG_TOPIC(mod_topic, "mod." NAME); "( sess.min-ptime= ) " \ "( sess.max-ptime= ) " \ "( sess.media= ) " \ - "( audio.format= ) " \ + "( audio.format= ) " \ "( audio.rate= ) " \ "( audio.channels= ) "\ "( audio.position= ) " \ @@ -1624,7 +1624,7 @@ int pipewire__module_init(struct pw_impl_module *module, const char *args) if (spa_streq(str, "audio")) { struct spa_dict_item items[] = { - { "audio.format", DEFAULT_FORMAT }, + { "audio.format", DEFAULT_RAW_AUDIO_FORMAT }, { "audio.rate", SPA_STRINGIFY(DEFAULT_RATE) }, { "audio.channels", SPA_STRINGIFY(DEFAULT_CHANNELS) }, { "audio.position", DEFAULT_POSITION } }; diff --git a/src/modules/module-rtp-sink.c b/src/modules/module-rtp-sink.c index e0d34482b..5f8c3e3d7 100644 --- a/src/modules/module-rtp-sink.c +++ b/src/modules/module-rtp-sink.c @@ -199,7 +199,7 @@ PW_LOG_TOPIC(mod_topic, "mod." NAME); "( sess.min-ptime= ) " \ "( sess.max-ptime= ) " \ "( sess.media= ) " \ - "( audio.format= ) " \ + "( audio.format= ) " \ "( audio.rate= ) " \ "( audio.channels= ) " \ "( audio.position= ) " \ diff --git a/src/modules/module-rtp-source.c b/src/modules/module-rtp-source.c index 332d126d5..f294d2c8e 100644 --- a/src/modules/module-rtp-source.c +++ b/src/modules/module-rtp-source.c @@ -174,7 +174,7 @@ PW_LOG_TOPIC(mod_topic, "mod." NAME); "( sess.latency.msec= ) "\ "( sess.ignore-ssrc= ) "\ "( sess.media= ) " \ - "( audio.format= ) " \ + "( audio.format= ) " \ "( audio.rate= ) " \ "( audio.channels= ) " \ "( audio.position= ) " \ diff --git a/src/modules/module-rtp/stream.c b/src/modules/module-rtp/stream.c index c61950978..5bbc43abc 100644 --- a/src/modules/module-rtp/stream.c +++ b/src/modules/module-rtp/stream.c @@ -613,7 +613,7 @@ static int parse_audio_info(const struct pw_properties *props, struct spa_audio_ { return spa_audio_info_raw_init_dict_keys(info, &SPA_DICT_ITEMS( - SPA_DICT_ITEM(SPA_KEY_AUDIO_FORMAT, DEFAULT_FORMAT), + SPA_DICT_ITEM(SPA_KEY_AUDIO_FORMAT, DEFAULT_RAW_AUDIO_FORMAT), SPA_DICT_ITEM(SPA_KEY_AUDIO_RATE, SPA_STRINGIFY(DEFAULT_RATE)), SPA_DICT_ITEM(SPA_KEY_AUDIO_POSITION, DEFAULT_POSITION)), &props->dict, diff --git a/src/modules/module-rtp/stream.h b/src/modules/module-rtp/stream.h index 37d041b06..94ceb3650 100644 --- a/src/modules/module-rtp/stream.h +++ b/src/modules/module-rtp/stream.h @@ -11,7 +11,8 @@ extern "C" { struct rtp_stream; -#define DEFAULT_FORMAT "S16BE" +#define DEFAULT_RAW_AUDIO_FORMAT "S16BE" +#define DEFAULT_RAOP_AUDIO_FORMAT "S16LE" #define DEFAULT_RATE 48000 #define DEFAULT_CHANNELS 2 #define DEFAULT_POSITION "[ FL FR ]" From 470a758e2a1ac0153dc9cc53d9259e29a2493161 Mon Sep 17 00:00:00 2001 From: Carlos Rafael Giani Date: Wed, 24 Jun 2026 10:35:48 +0200 Subject: [PATCH 2/5] module-rtp: Correctly scale device_delay in constant latency mode --- src/modules/module-rtp/audio.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/modules/module-rtp/audio.c b/src/modules/module-rtp/audio.c index 5021b223b..20f58fde6 100644 --- a/src/modules/module-rtp/audio.c +++ b/src/modules/module-rtp/audio.c @@ -217,6 +217,13 @@ static void rtp_audio_process_playback(void *data) avail = spa_ringbuffer_get_read_index(&impl->ring, ×tamp); + if (impl->io_position) { + uint32_t clock_rate = impl->io_position->clock.rate.denom; + /* Device delay is reported in clock rate units. If this does not + * match the RTP rate, the device delay must be transformed first. */ + device_delay = scale_u64(device_delay, impl->rate, clock_rate); + } + /* Reduce target buffer by the delay amount to start playback sooner. * This compensates for the delay to the device. */ if (SPA_UNLIKELY(impl->target_buffer < device_delay)) { From f17b9b3ce679240b0557a9eac428e746bfcc3b7f Mon Sep 17 00:00:00 2001 From: Carlos Rafael Giani Date: Wed, 24 Jun 2026 13:56:27 +0200 Subject: [PATCH 3/5] module-rtp-source: Detect and drop packets that are larger than the MTU --- src/modules/module-rtp-source.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/modules/module-rtp-source.c b/src/modules/module-rtp-source.c index f294d2c8e..651c7c1be 100644 --- a/src/modules/module-rtp-source.c +++ b/src/modules/module-rtp-source.c @@ -311,9 +311,22 @@ on_rtp_io(void *data, int fd, uint32_t mask) current_time = get_time_ns(impl); if (mask & SPA_IO_IN) { - if ((len = recvfrom(fd, impl->buffer, impl->buffer_size, 0, (struct sockaddr *)(&recvaddr), &recvaddr_len)) < 0) + if ((len = recvfrom(fd, impl->buffer, impl->buffer_size, +#ifdef __linux__ + /* Use this Linux specific feature to get the actual size of the + * packet, even if it was truncated due to it being larger than + * the buffer size. The code below uses this to detect packets + * that exceed the MTU size. */ + MSG_TRUNC, +#else + 0, +#endif + (struct sockaddr *)(&recvaddr), &recvaddr_len)) < 0) goto receive_error; + if (SPA_UNLIKELY((size_t)len > impl->buffer_size)) + goto packet_larger_than_mtu; + /* Filter the packets to exclude those with source addresses * that do not match the expected one. Only used with unicast. * (The bind() call in make_socket takes care of only @@ -373,6 +386,12 @@ short_packet: pw_log_warn("(%d suppressed) short packet of len %zd received", suppressed, len); return; +packet_larger_than_mtu: + if ((suppressed = spa_ratelimit_test(&impl->rate_limit, current_time)) >= 0) + pw_log_warn("(%d suppressed) packet received that is larger than " + "the configured MTU (%zu bytes)", + suppressed, impl->buffer_size); + return; } static int rejoin_igmp_group(struct spa_loop *loop, bool async, uint32_t seq, From 31bb82e11643c2ef686cd55e44eec9071e4a3ebd Mon Sep 17 00:00:00 2001 From: Carlos Rafael Giani Date: Fri, 19 Jun 2026 17:38:18 +0200 Subject: [PATCH 4/5] module-rtp: Add RTP jitter buffer This new data structure is useful for reordering incoming packets if they arrive out-of-order. Many audio codecs require frames and/or packets to be processed in sequence order due to inter-frame dependencies, so reordering is critical for such encoded data. It also detects lost packets and reports those in sequence with received packets (crucial for proper PLC), and detects and drops late and duplicate packets. --- src/modules/meson.build | 3 +- src/modules/module-rtp/jitter-buffer.c | 1057 +++++++++++++++ src/modules/module-rtp/jitter-buffer.h | 317 +++++ test/meson.build | 11 + test/modules/module-rtp/test-jitter-buffer.c | 1259 ++++++++++++++++++ 5 files changed, 2646 insertions(+), 1 deletion(-) create mode 100644 src/modules/module-rtp/jitter-buffer.c create mode 100644 src/modules/module-rtp/jitter-buffer.h create mode 100644 test/modules/module-rtp/test-jitter-buffer.c diff --git a/src/modules/meson.build b/src/modules/meson.build index 6bd108e95..2a2b52678 100644 --- a/src/modules/meson.build +++ b/src/modules/meson.build @@ -595,7 +595,8 @@ summary({'zeroconf-discover': build_module_zeroconf_discover}, bool_yn: true, se # (by avoiding build script code duplication), create a static library # that contains that common code. pipewire_module_rtp_common_lib = static_library('pipewire-module-rtp-common-lib', - [ 'module-rtp/stream.c' ], + [ 'module-rtp/stream.c', + 'module-rtp/jitter-buffer.c' ], include_directories : [configinc], install : false, dependencies : [mathlib, dl_lib, rt_lib, pipewire_dep, opus_dep], diff --git a/src/modules/module-rtp/jitter-buffer.c b/src/modules/module-rtp/jitter-buffer.c new file mode 100644 index 000000000..34a8fc34f --- /dev/null +++ b/src/modules/module-rtp/jitter-buffer.c @@ -0,0 +1,1057 @@ +/* PipeWire */ +/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */ +/* SPDX-License-Identifier: MIT */ + +#include +#include +#include +#include + +#include + +#include +#include + +#include + +#include +#include + +PW_LOG_TOPIC_EXTERN(mod_topic); +#define PW_LOG_TOPIC_DEFAULT mod_topic + +/* RTP jitter buffer design overview + * + * NOTE: For basic information about what the jitter buffer does and how it + * is used, read through the rtp_jitter_buffer struct documentation first. + * + * The jitter buffer consists of slots and the valid seqnum window. Slots + * are abstract entities, and exist as items in the slots array. This array + * contains num_slots items. Valid slots contain elements; these are packets + * or gaps. The slot element_type is then set accordingly either to + * RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET or RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP. + * (See the rtp_jitter_buffer struct documentation for what "valid" means.) + * If a slot contains a packet, then the packet data is stored in the + * packet_buffer, as described in the rtp_jitter_buffer struct documentation, + * + * A "slot index" goes from 0 to (num_slots-1) and is used for accessing + * the slot array. Slot indices are mapped to sequence numbers through modulo + * arithmetic: + * + * slot_index = seqnum mod num_slots + * + * This works reliably because the valid seqnum window establishes the + * range of valid sequence numbers, and because that window can at most + * be of size num_slots. Thus, even if a much older or much newer packet + * (that is, a packet with much smaller or larger seqnum than the ones + * encountered thus far) arrives, there is no danger of that modulo + * arithmetic incorrectly aliasing slots, since the valid seqnum window + * will take care of validating the seqnums first. + * + * In RTP, since sequence numbers are unsigned 16-bit integers, the integer + * wrap around needs to be addressed. Packet sequence number 65535 can be + * reached in real world scenarios depending on the packet duration. For + * example, if a packet covers 1 ms, then after ~66 seconds, sequence + * number 65535 will be reached. For this reason, the valid seqnum window's + * start and length quantities use uint32_t as type - it factors out the + * wrap-around in certain calculations, which make them easier. + * + * The wrap around behavior also makes it non-trivial to calculate sequence + * number deltas correctly. For this reason, the calculate_seqnum_delta() + * utility function is used for seqnum deltas. + * + * In regular mode, incoming packets all have had the expected monotonically + * incrementing sequence number. (A sequence number wrap around technically + * means that they do not monotonically increment, but this is omitted here, + * because the wrap around is a numerical limitation, and it is handled + * as a continuation of a packet seqnum increment.) In this mode, the packet + * data, packet size, header size, timestamp are forwarded immediately to the + * output_rtp_packet() function pointer without copying the packet data. This + * improves performance during regular mode, which is the most common mode + * the jitter buffer will be in, unless the network quality is very poor. The + * slots and the valid seqnum window have no meaning in this mode. + * + * When the jitter buffer expects seqnum X, but sees seqnum X-N (that is, + * an older seqnum), the incoming packet with seqnum X-N is dropped, and + * regular mode continues. If the packet's seqnum instead is X+N, it means + * that there is a gap. For example, if the last packet had seqnum 200, + * the jitter buffer expects the next packet to have seqnum 201. If the + * next packet instead has seqnum 202, then there is a gap at 201. The + * jitter buffer switches to hold-back mode, and establishes a valid seqnum + * window that starts at 201 and is of length 2. That is, it covers the gap + * and the new packet. The gap is registered at slot (201 % num_slots), the + * packet is inserted in slot (202 % num_slots). Suppose that num_slots is 10. + * Then, the gap will be registered at slot 201 % 10 = 1, and the packet 202 + * will be stored at slot 202 % 10 = 2. To be more specific, item #1 in the + * slots array will have its type set to RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP, + * and item #2 will have its type set to RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET. + * The size of packet 202 will be stored in item #2 in the slots array. + * (Item #1 in the slots array will not have a packet size set, since a gap + * has no "size" in bytes.) + * Packet 202's data will be copied into packet_buffer, at location + * (max_packet_size * 2), since the slot index is 2. That way, slot 1 is set + * to contain a gap at seqnum 201, and slot 2 is set to contain a packet 202 + * with the specified packet data, packet size, header size, timestamp. + * + * In hold-back mode, every time a packet is inserted, the jitter buffer will + * check if the oldest N valid slots contain packets (and no gaps). The "oldest" + * slot is the one whose associated sequence number equals the value of + * valid_seqnum_window_start_seqnum. The second oldest slot's associated sequence + * number equals (valid_seqnum_window_start_seqnum+1) etc. If the oldest N slots + * indeed contain purely packets and no gaps (that is, N > 0), the jitter buffer + * will output the packets from those N oldest slots, in order (the packet from + * the oldest slot is output first, then comes the packet from the second oldest + * slot etc.). Then, the valid_seqnum_window_start_seqnum value is shifted forwards + * by N, since those packets from the oldest N slots are no longer held back. + * Accordingly, the valid_seqnum_window_length value is decremented by N. If there + * are still gaps, it means there are packets behind the gaps. In such a case, + * valid_seqnum_window_length will be nonzero even after decrementing it. But if + * this output step did emit all of the remaining held back packets, it implies + * that there is nothing left in the slots, nothing is being held back, so the + * jitter buffer switches back to regular mode. + * + * When a packet comes in whose sequence number is older than the start of the + * valid seqnum window, it is dropped, just like in the regular mode. If however + * the sequence number goes past the window, it effectively extends the window. + * Suppose that the window starts at seqnum 200, and is of length 5. This means + * that currently, all seqnums from 200 to 204 (both inclusive) are valid. But + * then, a packet with seqnum 209 comes in. This requires extending the window + * from length 5 to length 10, such that it covers all seqnums from 200 to 209. + * This step of course can introduce gaps. In this example, a new gap is added + * that starts at seqnum 205 and is of length 4, since the gap extends all the + * way to seqnum 208. The four slots that are associated with seqnums 205, 206, + * 207, 208 are all set to contain gaps. + * + * However, the window cannot be extended indefinitely. If it is extended past + * num_slots, it is referred to as "overextended". Such a window cannot remain like + * this, because there are not enough slots to store all the elements it covers. + * In such a case, the jitter buffer will try to shift the window forward. If + * this shift amount is small enough, then the newest N of the currently valid + * slots remain in the window, and the oldest (num_slots-N) slots need to be + * drained. To continue with the earlier example, suppose that num_slots is 8. + * This means that packet 209 would overextend the window by 2 slots. The + * jitter buffer then shifts the window such that it starts at 202 and is of + * length 8 (that is, the num_slots amount). Slots associated with seqnums that + * go from 202 to 204 remain within the window, but slots associated with seqnums + * 200 and 201 are not, and thus need to be drained. + * + * It is possible that the window is shifted so far ahead that _none_ of the + * currently valid slots remain in the window. In such a case, the window is + * reset to contain the newly arrived packet and to be of length 1. Since this + * is a case in which (as explained above), the jitter buffer sees that the + * oldest N packets (N being 1 in this case) have no gaps in between them, this + * means that the newly arrived packet is immediately output. And since after + * that, no held-back packets remain, the jitter buffer switches back to + * regular mode. + * + * When slots are drained, it means that the jitter buffer looks at the elements + * inside them, and calls output_rtp_packet() or signal_lost_packets(). Draining + * does aggregate gaps to avoid calling signal_lost_packets() often. Only valid + * slots (that is, slots whose associated sequence numbers lie within the valid + * seqnum window) are drained. + * For example, suppose that within the window, there is a gap at seqnum 201, + * a packet at seqnum 202, another one at seqnum 203, a gap that starts at seqnum + * 204 and goes until seqnum 208, and then a final packet at 209. Draining will + * cause the following sequence of calls (in this order): + * + * 1. signal_lost_packets() , with gap at seqnum 201, or length 1 + * 2. output_rtp_packet() , with data of packet at seqnum 202 + * 3. output_rtp_packet() , with data of packet at seqnum 203 + * 4. signal_lost_packets() , with gap at seqnum 204, or length 5 + * 5. output_rtp_packet() , with data of packet at seqnum 209 + * + * A partial drain drains only the oldest N slots in the window, and held-back + * packets remain. A full drain drains all slots in the entire window - + * nothing remains. In the full drain case, the jitter buffer switches back + * to regular mode afterwards. + * + * A full drain is automatically done if the hold-back mode persists for some + * time. This is a case where there is a gap, but the associated packet does + * not arrive in time (or not at all). The timeout_timer exists for this purpose; + * if it expires, it automatically does a full drain on the jitter buffer. + * That timer is reset when the window is shifted due to overextension, and + * is disabled when switching back to regular mode. + * + * In cases where draining is done because of a window overextension, the jitter + * buffer will signal packet loss after the drain if there is a gap in between + * where the window used to be and where it now is after the shift. If for example + * the window previously starts at seqnum 200 with length 4, and now starts at + * seqnum 210 with length 10, there is a gap from 201 to 209 (both inclusive). + * The jitter buffer will announce that via signal_lost_packets(). If however + * that gap is larger than num_slots, then the jitter buffer calls that function + * pointer with open_ended set to true. Such an "open ended gap" is a gap that + * is actually larger than num_slots, but is truncated to that length, to prevent + * PLC measures from having to produce an excessive amount of data. open_ended + * being set to true allows the signal_lost_packets() calback to apply additional + * measures, such as a fadeout, to cleanly terminate its PLC for the open gap. This + * might be necessary if PLC measures otherwise never reach silence on their own. + * + * These mechanisms allow for partial outputs in cases where there are multiple + * gaps in the slots, and also switches back to regular mode immediately once + * everything has been output or the jitter buffer is fully drained. + * + * Packet reordering is done implicitly, since when inserting packets, it is done + * so according to their seqnum, but packet output is done in order of their + * storage in the slots (the "oldest" slots, that is, slots associated with the + * oldest valid seqnums within the window are output first). + * + * Note that slots that contain gaps are not explicitly marked as containing gaps. + * Instead, all items in the slots array have their element type initially set to + * RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP. Later, when packets are inserted, these + * types are overwritten with RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET. And, when + * packets are output, the associated slot array items have their types set back + * to RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP. This is because gaps are fundamentally + * implicit - they do not exist as their own entities (unlike packets). */ + +static int arm_timeout_timer(struct rtp_jitter_buffer *jitter_buffer); +static void disarm_timeout_timer(struct rtp_jitter_buffer *jitter_buffer); +static void on_timeout_expiration(void *data, uint64_t expirations); + +static inline size_t seqnum_to_slot_index(struct rtp_jitter_buffer *jitter_buffer, + uint16_t seqnum); + +static void store_packet(struct rtp_jitter_buffer *jitter_buffer, + const uint8_t *packet_data, size_t packet_size, size_t header_size, + uint32_t timestamp, uint16_t seqnum, bool check_for_duplicates); + +static int do_drain(struct rtp_jitter_buffer *jitter_buffer, + size_t num_oldest_slots_to_drain); + +int rtp_jitter_buffer_init(struct rtp_jitter_buffer *jitter_buffer, struct rtp_jitter_buffer_params *params) +{ + int ret = 0; + size_t idx; + + spa_assert(jitter_buffer != NULL); + spa_assert(!jitter_buffer->initialized); + spa_assert(params != NULL); + spa_assert(params->num_slots > 0); + spa_assert(params->max_packet_size > 0); + spa_assert(params->packet_duration > 0); + spa_assert(params->loop != NULL); + spa_assert(params->output_rtp_packet != NULL); + spa_assert(params->signal_lost_packets != NULL); + + spa_memzero(jitter_buffer, sizeof(struct rtp_jitter_buffer)); + memcpy(&(jitter_buffer->params), params, sizeof(struct rtp_jitter_buffer_params)); + + /* Set the flag here already to make sure that in case + * of errors, rtp_jitter_buffer_shutdown() correctly + * cleans up any partial initialization. */ + jitter_buffer->initialized = true; + + jitter_buffer->slots = calloc(params->num_slots, sizeof(struct rtp_jitter_buffer_slot)); + if (jitter_buffer->slots == NULL) { + ret = -ENOMEM; + pw_log_error("Could not allocate memory for slots array: %m"); + goto error; + } + + jitter_buffer->packet_buffer = calloc(params->num_slots, params->max_packet_size); + if (jitter_buffer->packet_buffer == NULL) { + ret = -ENOMEM; + pw_log_error("Could not allocate memory for packet buffer: %m"); + goto error; + } + + for (idx = 0; idx < jitter_buffer->params.num_slots; ++idx) + jitter_buffer->slots[idx].element_type = RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP; + + jitter_buffer->hold_back_mode = false; + jitter_buffer->last_seqnum = -1; + + /* Synchronize loop access. See the spa_loop_methods + * documentation; the add_source() documentation states: + * "Must be called from the loop's own thread." By extension, + * this also applies to pw_loop_add_timer(). */ + pw_loop_lock(jitter_buffer->params.loop); + jitter_buffer->timeout_timer = pw_loop_add_timer(params->loop, on_timeout_expiration, + jitter_buffer); + pw_loop_unlock(jitter_buffer->params.loop); + + if (jitter_buffer->timeout_timer == NULL) { + ret = -errno; + pw_log_error("Could not create timeout timer: %m"); + goto error; + } + + pw_log_info("Initialized RTP jitter buffer: num slots: %zu; max packet size: %zu bytes; " + "packet duration: %" PRIu64 " ns; total capacity duration: %" PRIu64 " ns", + params->num_slots, params->max_packet_size, params->packet_duration, + params->packet_duration * params->num_slots); + +finish: + return ret; + +error: + rtp_jitter_buffer_shutdown(jitter_buffer); + goto finish; +} + +void rtp_jitter_buffer_shutdown(struct rtp_jitter_buffer *jitter_buffer) +{ + spa_assert(jitter_buffer != NULL); + + if (!jitter_buffer->initialized) + return; + + if (jitter_buffer->timeout_timer != NULL) { + /* Synchronize loop access. See the spa_loop_methods + * documentation; the remove_source() documentation states: + * "Must be called from the loop's own thread." By extension, + * this also applies to pw_loop_destroy_source(). */ + pw_loop_lock(jitter_buffer->params.loop); + /* Note that this also internally detaches the source from the + * loop, and thus, any previously queued timeout callbacks are + * no longer invoked once this call ends. + * (For more, see the detach_source(), loop_destroy_source(), + * and loop_iterate() functions in spa/plugins/support/loop.c , + * particularly how they handle ep[i].data . Also see how + * remove_from_poll() - called by loop_destroy_source() - and + * loop_iterate() handle remove_count .) */ + pw_loop_destroy_source(jitter_buffer->params.loop, jitter_buffer->timeout_timer); + pw_loop_unlock(jitter_buffer->params.loop); + } + + free(jitter_buffer->slots); + free(jitter_buffer->packet_buffer); + + jitter_buffer->initialized = false; + + pw_log_info("RTP jitter buffer shut down"); +} + +int rtp_jitter_buffer_insert_packet(struct rtp_jitter_buffer *jitter_buffer, + const uint8_t *packet_data, size_t packet_size, + size_t header_size, uint32_t timestamp, uint16_t seqnum) +{ + struct rtp_jitter_buffer_params *params; + int ret = 0; + size_t slot_index; + int16_t seqnum_delta; + + spa_assert(jitter_buffer != NULL); + spa_assert(jitter_buffer->initialized); + spa_assert(packet_data != NULL); + spa_assert(header_size <= packet_size); + + params = &(jitter_buffer->params); + + spa_assert(packet_size <= params->max_packet_size); + + pw_log_trace("Got packet with size %zu and seqnum %" PRIu16 "; valid seqnum window: " + "start seqnum / length: %" PRIu32 " / %" PRIu32, packet_size, seqnum, + jitter_buffer->valid_seqnum_window_start_seqnum, + jitter_buffer->valid_seqnum_window_length); + + if (jitter_buffer->hold_back_mode) { + size_t idx = 0; + size_t num_oldest_slots_with_packets; + + spa_assert(jitter_buffer->last_seqnum >= 0); + + seqnum_delta = calculate_seqnum_delta(jitter_buffer->valid_seqnum_window_start_seqnum, seqnum); + + if (seqnum_delta < 0) { + /* In hold-back mode, sequence numbers at or after the sequence + * number stored in valid_seqnum_window_start_seqnum are of interest. + * Sequence numbers older than that are stale and to be discarded. + * See the overview at the top of this file for details. */ + pw_log_info("Dropping packet with stale or duplicate sequence number %" + PRId32, seqnum); + goto finish; + } else if ((size_t)seqnum_delta < params->num_slots) { + /* Packet is not stale, and fits within the capacity of the jitter + * buffer, that is, its sequence number is not too far ahead of the + * valid_seqnum_window_start_seqnum. Insert it into the buffer + * according to its sequence number. + * + * In case its sequence number is further ahead than the current + * valid seqnum window (but still fits within the jitter buffer + * capacity), it means that this packet actually extends the window + * when inserted into the buffer, and it does so this way: + * + * window_length = MAX(window_length, seqnum - window_start_seqnum + 1) + * + * (seqnum - window_start_seqnum) is stored in seqnum_delta, so this + * becomes: + * + * window_length = MAX(window_length, seqnum_delta + 1) + * + * (The +1 is there because the length stores a size-like quantity, + * so it must be added to avoid an off-by-one error.) + * + * Also see the overview at the top. */ + + jitter_buffer->valid_seqnum_window_length = SPA_MAX( + jitter_buffer->valid_seqnum_window_length, ((size_t)seqnum_delta) + 1); + + store_packet(jitter_buffer, packet_data, packet_size, header_size, + timestamp, seqnum, true); + } else { + /* The packet's sequence number is beyond the valid seqnum window, + * and trying to extend the window to encompass that packet would + * overextend it, that is, its length would exceed num_slots. + * + * (See the overview at the top.) + * + * Check by how much the window would be overextended. If the + * overextension is less than the valid seqnum window length, then + * some of the currently valid slots would remain valid after shifting + * the window. Specifically, the last (valid_seqnum_window_length - + * overextension_amount) slots remain valid, while the ones before that + * need to be drained. The result is a valid seqnum window that has been + * shifted forward by valid_seqnum_window_length, with a length that + * equals the num_slots amount. + * + * seqnum_delta+1 equals the amount of slots from the start of the + * window to the new packet (+1 since the new packet itself is included). + * Since by now, we know that the window would be overextended, this + * implies that (seqnum_delta+1) > num_slots. + * + * The overextension amount therefore is: + * + * overextension_amount = (seqnum_delta+1) - num_slots + * + * This is also the number of oldest slots that need to be drained at the + * start of the valid seqnum window before shifting it. If encompassing + * the new packet increases the window by overextension_amount, draining + * those oldest slots decreases it again by overextension_amount, resulting + * in a window length equaling num_slots. + * + * If however the overextension is equal to or larger than the valid seqnum + * window length, then none of the slots within the window remain valid. + * It therefore makes no sense then to try to keep them around, so the + * jitter buffer is fully drained before inserting the new packet. Also, + * this means that afterwards, the slot that contains the new packet is + * the only remaining one, that is, the window length is 1. */ + + size_t overextension_amount; + bool window_fully_invalid; + size_t num_oldest_slots_to_drain; + bool open_ended = false; + + overextension_amount = ((size_t)seqnum_delta) + 1 - params->num_slots; + window_fully_invalid = (overextension_amount >= jitter_buffer->valid_seqnum_window_length); + num_oldest_slots_to_drain = SPA_MIN(overextension_amount, + jitter_buffer->valid_seqnum_window_length); + + pw_log_debug("Packet with seqnum %" PRIu16 " exceeds capacity of jitter buffer " + "in hold-back mode; window shift amount: %zu window fully invalid: %d " + "num oldest slots to drain: %zu", seqnum, overextension_amount, + window_fully_invalid, num_oldest_slots_to_drain); + + if (num_oldest_slots_to_drain > 0) { + ret = do_drain(jitter_buffer, num_oldest_slots_to_drain); + if (SPA_UNLIKELY(ret < 0)) { + ret = -EIO; + goto finish; + } + } + + /* If the window was fully invalidated (= drained), insert a packet loss + * signal to allow callers to append some sort of PLC / fadeout to the + * drained data. This is helpful for avoiding hard cutoffs in the output. */ + if (window_fully_invalid) { + ssize_t gap_length; + uint16_t one_past_last_valid_seqnum; + + one_past_last_valid_seqnum = jitter_buffer->valid_seqnum_window_start_seqnum + + jitter_buffer->valid_seqnum_window_length; + gap_length = calculate_seqnum_delta(one_past_last_valid_seqnum, seqnum); + if (SPA_UNLIKELY(gap_length < 0)) { + /* This would indicate a serious error in the calculations, + * so log it accordingly. */ + pw_log_error("Negative packet loss amount %zd detected; valid seqnum " + "window start seqnum / length: %" PRIu32 " / %" PRIu32 "; " + "using packet loss amount 0 instead", + gap_length, jitter_buffer->valid_seqnum_window_start_seqnum, + jitter_buffer->valid_seqnum_window_length); + gap_length = 0; + } + + /* If the gap exceeds the capacity of the jitter buffer, then limit the + * packet loss amount and consider the gap open ended to avoid cases where + * callers otherwise would have to conceal an excessive amount of packet loss. */ + if ((size_t)gap_length > params->num_slots) { + open_ended = true; + gap_length = params->num_slots; + } + + if (gap_length > 0) { + if (open_ended) { + pw_log_debug("Signaling packet loss, starting at seqnum %" + PRIu16 ", gap length %zd, open ended", + one_past_last_valid_seqnum, gap_length); + } else { + pw_log_debug("Signaling packet loss, starting at seqnum %" + PRIu16 ", gap length %zd, not open ended", + one_past_last_valid_seqnum, gap_length); + } + if ((ret = params->signal_lost_packets(params->context, + one_past_last_valid_seqnum, (size_t)gap_length, open_ended)) != 0) { + pw_log_error("Could not signal lost RTP packet: %s", spa_strerror(ret)); + goto finish; + } + } + } + + /* As explained in the overview, a shift that fully invalidates + * the window is a different case than one that retains entries + * from the old window. */ + if (window_fully_invalid) { + jitter_buffer->valid_seqnum_window_start_seqnum = seqnum; + jitter_buffer->valid_seqnum_window_length = 1; + } else { + /* If the window is not fully invalid, then figure out + * its new start_seqnum by moving backwards. A not fully + * invalid window is of length num_slots (see the overview + * for why), and ends at the sequence number of the new + * packet. Therefore, it starts at (seqnum +1 - num_slots). + * (+1 since the new packet itself also has to be factored in.) */ + jitter_buffer->valid_seqnum_window_start_seqnum = seqnum + 1 - params->num_slots; + jitter_buffer->valid_seqnum_window_length = params->num_slots; + } + pw_log_debug("Reset valid seqnum window to start at seqnum %" PRIu32 " and be of length %" + PRIu32, jitter_buffer->valid_seqnum_window_start_seqnum, + jitter_buffer->valid_seqnum_window_length); + + /* Now write this new packet into the packet buffer to hold + * it back until the gap before it is filled. */ + store_packet(jitter_buffer, packet_data, packet_size, header_size, + timestamp, seqnum, false); + + /* NOTE: This assumes that spa_system_timerfd_settime() behaves just + * like timerfd_settime() in the sense that it clears any timer + * expirations that haven't been noticed yet. In other words, even + * if the timer expired already, rearming by calling + * spa_system_timerfd_settime() is assumed to implicitly wipe + * these prior expirations. + * + * This is precisely what timerfd_settime() does. From its manpage: + * + * > If the timer has already expired one or more times ***since its + * > settings were last modified using timerfd_settime()***, or since + * > the last successful read(2), then the buffer given to read(2) + * > returns an unsigned 8-byte integer (uint64_t) containing the + * > number of expirations that have occurred. + * + * And this is important to make sure that, should the timer expire while + * we are here, it does not lead to an immediate jitter buffer draining. + */ + if (ret == 0) { + pw_log_debug("Rearming timeout timer as part of resetting hold-back mode"); + ret = arm_timeout_timer(jitter_buffer); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("Could not arm timer: %s", spa_strerror(ret)); + ret = -EINVAL; + } + } else { + pw_log_warn("Not rearming timeout timer due to earlier error while draining"); + } + + /* Next, let the logic below check the contents of the new, + * updated valid seqnum window. */ + } + + /* After inserting the packet, go through the valid window and check if + * the oldest N slots contain packets without gaps in between them. First, + * N (= num_oldest_slots_with_packets) has to be determined. Iterate over + * the slots, starting at the valid seqnum window start, until either, + * a slot with a gap inside is encountered, or the end of the window is + * reached. That way, the number of oldest N held back packets that present + * in an uninterrupted sequence in the oldest N slots and thus can be output + * is determined. */ + for (idx = 0; idx < jitter_buffer->valid_seqnum_window_length; ++idx) { + uint16_t item_seqnum = jitter_buffer->valid_seqnum_window_start_seqnum + idx; + slot_index = seqnum_to_slot_index(jitter_buffer, item_seqnum); + if (jitter_buffer->slots[slot_index].element_type != + RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET) + break; + } + num_oldest_slots_with_packets = idx; + + if (num_oldest_slots_with_packets > 0) { + pw_log_debug("The first %zu slots in valid seqnum window all contain " + "packets - these packets can be output immediately", + num_oldest_slots_with_packets); + + /* Now output the packets. */ + for (idx = 0; idx < num_oldest_slots_with_packets; ++idx) { + struct rtp_jitter_buffer_slot *slot; + uint16_t item_seqnum; + const uint8_t *packet_data_to_output; + + /* This implicitly does the sequence number wrap-around + * by storing the value in uint16_t. */ + item_seqnum = jitter_buffer->valid_seqnum_window_start_seqnum + idx; + + slot_index = seqnum_to_slot_index(jitter_buffer, item_seqnum); + slot = &(jitter_buffer->slots[slot_index]); + + packet_data_to_output = jitter_buffer->packet_buffer + + params->max_packet_size * slot_index; + + ret = params->output_rtp_packet(params->context, packet_data_to_output, + slot->packet_size, slot->header_size, slot->timestamp, item_seqnum); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("Could not output RTP packet: %s", spa_strerror(ret)); + ret = -EIO; + goto finish; + } + + slot->element_type = RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP; + } + + /* Since num_oldest_slots_with_packets is the result of a for-loop that counts up + * to valid_seqnum_window_length, this assertion should always hold. */ + spa_assert(jitter_buffer->valid_seqnum_window_length >= num_oldest_slots_with_packets); + + /* Move the window start further to move past the slots whose packets + * were just output. This also reduces the valid seqnum window length. */ + jitter_buffer->valid_seqnum_window_start_seqnum += num_oldest_slots_with_packets; + jitter_buffer->valid_seqnum_window_length -= num_oldest_slots_with_packets; + + pw_log_trace("%zu packet(s) output in hold-back mode; valid seqnum window " + "is now: start seqnum / length: %" PRIu32 " / %" PRIu32, + num_oldest_slots_with_packets, + jitter_buffer->valid_seqnum_window_start_seqnum, + jitter_buffer->valid_seqnum_window_length); + + /* If the valid seqnum window length has become is zero, it means that + * all held-back packets have been output, and no gaps remain. + * This in turn means that we are done - all previously missing packets + * have been received and have been output in order of their sequence + * numbers. Switch back to regular mode and set the seqnum of the last + * output packet the last_seqnum. */ + if (jitter_buffer->valid_seqnum_window_length == 0) { + jitter_buffer->hold_back_mode = false; + /* Don't set this to the newly arrived packet's seqnum directly. + * Hold-back mode is active, which means that packets previously + * arrived out of order - so, the new packet too might not have + * arrived in order. */ + jitter_buffer->last_seqnum = jitter_buffer->valid_seqnum_window_start_seqnum - 1; + /* Also cancel the timeout timer to not trigger an erroneous drain. */ + disarm_timeout_timer(jitter_buffer); + pw_log_debug("No more packets held back; switched back to regular mode, " + "last seqnum set to %" PRId32, jitter_buffer->last_seqnum); + } + } + } else { + /* Here, we are in regular mode. */ + + /* If a preceding sequence number is known, use it to detect gaps. For + * example, last sequence number is 400, this packet's sequence number + * is 402 -> there is a gap at sequence number 401. This can happen due + * to packet loss and out-of-order packet transmission. + * + * (Also see the overview at the top.) */ + if (jitter_buffer->last_seqnum >= 0) { + seqnum_delta = calculate_seqnum_delta(jitter_buffer->last_seqnum, seqnum); + + if (seqnum_delta <= 0) { + /* In the regular mode, it is implied that all preceding + * packets were in order of the sequence numbers, and that + * any missing packets have been also announced in-order. + * Therefore, if at this point, the delta is <= 0, it means + * that a packet with a sequence number equal to or lower + * than the previous packet's sequence number arrived + * (taking possible wraparound into account). This packet + * may be a duplicate, and is unusable, so drop it. */ + + pw_log_info("Dropping packet with stale sequence number %" PRId32, seqnum); + goto finish; + } else if (seqnum_delta > 1) { + /* If the delta is >1, it indicates a gap. This can happen + * due to out-of-order packets or due to packet loss. + * Activate hold-back mode: Hold back the received packet + * and any further ones until there are consecutive + * sequences of packets available, which can be output. + * And, write information about that packet and the gap into + * the associated slots. + * Also arm the timeout timer in case the missing packets + * do not arrive in time. */ + + jitter_buffer->hold_back_mode = true; + /* Set the valid seqnum window start to the sequence number that + * was actually expected as the one that follows the last packet. + * Since this place was reached, it implies that the current packet's + * sequence number does not equal to that one, and thus, the packet + * we actually expected is still missing. This is the oldest packet + * we expect here, since packets with a sequence number preceding + * that packet's have been processed already at this point. */ + jitter_buffer->valid_seqnum_window_start_seqnum = + ((uint16_t)jitter_buffer->last_seqnum) + 1; + /* The current valid seqnum window length equals the seqnum_delta. This + * is because the window goes from valid_seqnum_window_start_seqnum to + * the last slot with a packet in the valid seqnum window (both inclusive). + * For example, if valid_seqnum_window_start_seqnum is initially set + * to 301 here, and the current packet has sequence number 305, it + * means that the window goes from 301 to 305, so it consists of + * 5 slots (the first 4 contain the gap from 301 to 304, and the fifth + * slot contains packet 305). + * + * seqnum_delta equals: + * + * (this current packet's seqnum - last_seqnum) + * + * Substituting in the seqnum_delta formula, we get: + * + * valid_seqnum_window_start_seqnum = last_seqnum + 1 + * -> last_seqnum = valid_seqnum_window_start_seqnum - 1 + * + * and: + * + * seqnum_delta = + * (this current packet's seqnum - last_seqnum) = + * (this current packet's seqnum - (valid_seqnum_window_start_seqnum - 1)) = + * (this current packet's seqnum - valid_seqnum_window_start_seqnum + 1) + * + * which is exactly the formula needed to compute a size or length + * from A to B (or, in this case, from valid_seqnum_window_start_seqnum + * to the packet's seqnum). */ + jitter_buffer->valid_seqnum_window_length = seqnum_delta; + + pw_log_debug("Gap detected starting at seqnum %" PRIu32 ", length %" + PRId16 "; switched to hold-back mode", + jitter_buffer->valid_seqnum_window_start_seqnum, + seqnum_delta - 1); + + if (SPA_UNLIKELY(jitter_buffer->valid_seqnum_window_length > params->num_slots)) { + /* It is possible that the sudden gap that leads to the + * activation of the hold-back mode is so big that it + * immediately overextends the valid seqnum window. + * However, unlike in the hold-back cases handled above, + * here, nothing is held back yet, so there are no slots + * that can remain valid. Therefore, this behaves just like + * in the window_fully_invalid == true case further above. + * + * In most cases, the gap is way larger than the number of + * slots. There is a corner case though, and that is when + * the window length equals (num_slots + 1). Keep in mind + * that the window includes the gap _and_ the new packet - + * hence the +1. From this it follows that if the window + * length equals num_slots+1, it means that the actual + * gap is num_slots in length. This is essentially the + * largest possible gap that still isn't open ended. For + * this reason, a check is made to identify this corner case. + * + * In either case, the correct, uninterrupted, in-order + * packets that precede this gap are followed by proper PLC. + * Should the gap be open ended, PLC can be augmented to + * include a fadeout for example. + * + * And, since an overextended window is invalid, and in this + * case, no held-back packets are presents, the valid seqnum + * window is adjusted to only contain the current packet. + * + * After signaling, the jitter buffer switches back to regular + * mode. This is because after the large gap, nothing is held + * back anymore - there is no reason for staying in the + * hold-back mode. */ + + bool open_ended = jitter_buffer->valid_seqnum_window_length > + (params->num_slots + 1); + + pw_log_debug("This gap immediately overextends the valid seqnum " + "window; signaling packet loss, starting at seqnum %" + PRIu16 ", gap length %zd, %sopen ended", + jitter_buffer->valid_seqnum_window_start_seqnum, + params->num_slots, open_ended ? "" : "not "); + + if ((ret = params->signal_lost_packets(params->context, + jitter_buffer->valid_seqnum_window_start_seqnum, + params->num_slots, open_ended)) != 0) { + pw_log_error("Could not signal lost RTP packet: %s", spa_strerror(ret)); + goto finish; + } + + pw_log_debug("Switching back to regular mode and emitting packet " + "after the large gap"); + + jitter_buffer->hold_back_mode = false; + ret = params->output_rtp_packet(params->context, packet_data, + packet_size, header_size, timestamp, seqnum); + jitter_buffer->last_seqnum = seqnum; + goto finish; + + } + + store_packet(jitter_buffer, packet_data, packet_size, header_size, + timestamp, seqnum, false); + + ret = arm_timeout_timer(jitter_buffer); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("Could not arm timer: %s", spa_strerror(ret)); + ret = -EINVAL; + } + + goto finish; + } + } + + /* If hold-back mode is not active, just forward the packet. */ + ret = params->output_rtp_packet(params->context, packet_data, packet_size, + header_size, timestamp, seqnum); + + jitter_buffer->last_seqnum = seqnum; + } + +finish: + return ret; +} + +int rtp_jitter_buffer_drain(struct rtp_jitter_buffer *jitter_buffer) +{ + int ret; + + spa_assert(jitter_buffer != NULL); + spa_assert(jitter_buffer->initialized); + + if (!jitter_buffer->hold_back_mode) + return 0; + + ret = do_drain(jitter_buffer, jitter_buffer->valid_seqnum_window_length); + + /* Not calling signal_lost_packets with open_ended = true here, since + * that one is meant for cases where mid-stream, the hold-back mode was + * activated, and a sudden large sequence jump occurred. This function + * here however is called externally. */ + + jitter_buffer->hold_back_mode = false; + jitter_buffer->last_seqnum = -1; + pw_log_debug("Switching back to regular mode after explicit drain"); + + /* Disarm any potentially ongoing timer since the + * jitter buffer just switched back to regular mode. */ + disarm_timeout_timer(jitter_buffer); + + return ret; +} + +void rtp_jitter_buffer_flush(struct rtp_jitter_buffer *jitter_buffer) +{ + size_t idx; + + spa_assert(jitter_buffer != NULL); + spa_assert(jitter_buffer->initialized); + + jitter_buffer->hold_back_mode = false; + jitter_buffer->last_seqnum = -1; + pw_log_debug("Switching back to regular mode after flush"); + + for (idx = 0; idx < jitter_buffer->params.num_slots; ++idx) + jitter_buffer->slots[idx].element_type = RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP; + + /* Disarm any potentially ongoing timer since the + * jitter buffer just switched back to regular mode. */ + disarm_timeout_timer(jitter_buffer); +} + +static int set_timeout(struct rtp_jitter_buffer *jitter_buffer, uint64_t timeout) +{ + struct itimerspec ts; + + ts.it_value.tv_sec = timeout / SPA_NSEC_PER_SEC; + ts.it_value.tv_nsec = timeout % SPA_NSEC_PER_SEC; + ts.it_interval.tv_sec = 0; + ts.it_interval.tv_nsec = 0; + + return spa_system_timerfd_settime(jitter_buffer->params.loop->system, + jitter_buffer->timeout_timer->fd, 0, &ts, NULL); +} + +static int arm_timeout_timer(struct rtp_jitter_buffer *jitter_buffer) +{ + struct rtp_jitter_buffer_params *params = &(jitter_buffer->params); + + /* Set the timeout to expire at the total duration + * covered by the packet buffer's capacity. */ + return set_timeout(jitter_buffer, + params->num_slots * params->packet_duration); +} + +static void disarm_timeout_timer(struct rtp_jitter_buffer *jitter_buffer) +{ + set_timeout(jitter_buffer, 0); +} + +static void on_timeout_expiration(void *data, uint64_t expirations) +{ + struct rtp_jitter_buffer *jitter_buffer = data; + + pw_log_info("Timeout timer expired; draining jitter buffer"); + do_drain(jitter_buffer, jitter_buffer->valid_seqnum_window_length); + jitter_buffer->hold_back_mode = false; + jitter_buffer->last_seqnum = -1; +} + +static void store_packet(struct rtp_jitter_buffer *jitter_buffer, const uint8_t *packet_data, + size_t packet_size, size_t header_size, uint32_t timestamp, + uint16_t seqnum, bool check_for_duplicates) +{ + size_t slot_index; + uint8_t *dest; + struct rtp_jitter_buffer_params *params = &(jitter_buffer->params); + struct rtp_jitter_buffer_slot *slot; + + slot_index = seqnum_to_slot_index(jitter_buffer, seqnum); + slot = &(jitter_buffer->slots[slot_index]); + + if (check_for_duplicates) { + if (SPA_UNLIKELY(slot->element_type == RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET)) { + pw_log_debug("Packet with seqnum %" PRIu16 " has already been inserted; " + "this is a duplicate; dropping", seqnum); + return; + } + } + + dest = jitter_buffer->packet_buffer + params->max_packet_size * slot_index; + memcpy(dest, packet_data, packet_size); + + slot->element_type = RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET; + slot->packet_size = packet_size; + slot->header_size = header_size; + slot->timestamp = timestamp; +} + +static inline size_t seqnum_to_slot_index(struct rtp_jitter_buffer *jitter_buffer, + uint16_t seqnum) +{ + return ((size_t)seqnum) % jitter_buffer->params.num_slots; +} + +static int do_drain(struct rtp_jitter_buffer *jitter_buffer, + size_t num_oldest_slots_to_drain) +{ + /* Important: This intentionally does not reset valid_seqnum_window_length + * or valid_seqnum_window_start_seqnum. That is up to the caller, since + * this function is called in several different situations that require + * different handling of these states. */ + + int ret = 0; + size_t idx; + int32_t first_drained_packet_seqnum = -1; + uint16_t first_lost_packet_seqnum = 0; + bool tracking_lost_packets = false; + struct rtp_jitter_buffer_params *params = &(jitter_buffer->params); + + spa_assert(num_oldest_slots_to_drain <= jitter_buffer->valid_seqnum_window_length); + + if (num_oldest_slots_to_drain == 0) + return 0; + + if (num_oldest_slots_to_drain == jitter_buffer->valid_seqnum_window_length) { + pw_log_debug("Draining all slots in the entire valid seqnum window"); + } else { + pw_log_debug("Draining the first (= oldest) %zu slots in the valid seqnum window; " + "window start seqnum / length: %" PRIu32 " / %" PRIu32, + num_oldest_slots_to_drain, + jitter_buffer->valid_seqnum_window_start_seqnum, + jitter_buffer->valid_seqnum_window_length); + } + + for (idx = 0; idx < num_oldest_slots_to_drain; ++idx) { + uint16_t seqnum = jitter_buffer->valid_seqnum_window_start_seqnum + idx; + size_t slot_index = seqnum_to_slot_index(jitter_buffer, seqnum); + struct rtp_jitter_buffer_slot *slot = &(jitter_buffer->slots[slot_index]); + + if (slot->element_type == RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET) { + const uint8_t *packet_data; + + if (tracking_lost_packets) { + int16_t seqnum_delta = calculate_seqnum_delta( + first_lost_packet_seqnum, seqnum); + + tracking_lost_packets = false; + + if (SPA_LIKELY(seqnum_delta > 0)) { + pw_log_debug("Signaling packet loss sequence, starting at " + "seqnum %" PRIu16 ", gap length %zu, not open ended", + first_lost_packet_seqnum, (size_t)seqnum_delta); + if ((ret = params->signal_lost_packets(params->context, + first_lost_packet_seqnum, (size_t)seqnum_delta, false)) < 0) { + goto finish; + } + } else { + pw_log_error("Negative delta %" PRId16" between first and last " + "seqnum in lost seqnum range %" PRIu16 " - %" PRIu16 + "encountered while draining", seqnum_delta, + first_lost_packet_seqnum, seqnum); + } + } + + packet_data = jitter_buffer->packet_buffer + params->max_packet_size * slot_index; + + if ((ret = params->output_rtp_packet(params->context, packet_data, + slot->packet_size, slot->header_size, slot->timestamp, seqnum)) < 0) + goto finish; + + slot->element_type = RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP; + + if (first_drained_packet_seqnum < 0) + first_drained_packet_seqnum = seqnum; + } else { + /* If a packet is not available, consider it lost. + * Do not announce it right away - instead, switch + * to a tracking mode, and keep iterating over the + * table. That way, it becomes possible to announce + * entire spans of lost packets in one go, as a gap + * with multiple packets, which can be more efficient + * for PLC. For example, when 10 packets in a row are + * lost, that would then announce a gap that is 10 + * packets long instead of announcing 10 times that + * a packet was lost. */ + if (!tracking_lost_packets) { + first_lost_packet_seqnum = seqnum; + tracking_lost_packets = true; + + if (first_drained_packet_seqnum >= 0) { + uint16_t last_drained_packet_seqnum = + jitter_buffer->valid_seqnum_window_start_seqnum + (idx - 1); + pw_log_debug("Drained packet(s) with seqnums %" PRId32 " - %" PRIu16, + first_drained_packet_seqnum, last_drained_packet_seqnum); + first_drained_packet_seqnum = -1; + } + } + } + } + + if (first_drained_packet_seqnum >= 0) { + uint16_t last_drained_packet_seqnum = jitter_buffer->valid_seqnum_window_start_seqnum + (idx - 1); + pw_log_debug("Drained packet(s) with seqnums %" PRId32 " - %" PRIu16, first_drained_packet_seqnum, + last_drained_packet_seqnum); + } + + if (tracking_lost_packets) { + /* This can happen when the valid seqnum window is only partially + * drained, that is, num_oldest_slots_to_drain <= valid_seqnum_window_length. */ + + uint16_t last_lost_packet_seqnum; + int16_t seqnum_delta; + + last_lost_packet_seqnum = jitter_buffer->valid_seqnum_window_start_seqnum + (idx - 1); + seqnum_delta = calculate_seqnum_delta(first_lost_packet_seqnum, last_lost_packet_seqnum) + 1; + + spa_assert(seqnum_delta >= 0); + + pw_log_debug("Signaling final packet loss sequence, starting at " + "seqnum %" PRIu16 ", gap length %zu, not open ended", + first_lost_packet_seqnum, (size_t)seqnum_delta); + + /* This final gap is always signaled as a non-open-ended loss, + * since its size is exactly defined. Open-ended packet losses + * happen in cases where the packet loss results in a gap that + * is too large for any packet loss concealment mechanism to + * fully cover (or its end not even known, so PLC is not fully + * applicable). */ + if ((ret = params->signal_lost_packets(params->context, + first_lost_packet_seqnum, (size_t)seqnum_delta, false)) < 0) { + goto finish; + } + } + +finish: + return ret; +} + diff --git a/src/modules/module-rtp/jitter-buffer.h b/src/modules/module-rtp/jitter-buffer.h new file mode 100644 index 000000000..408eaf567 --- /dev/null +++ b/src/modules/module-rtp/jitter-buffer.h @@ -0,0 +1,317 @@ +/* PipeWire */ +/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */ +/* SPDX-License-Identifier: MIT */ + +#ifndef PIPEWIRE_RTP_JITTER_BUFFER_H +#define PIPEWIRE_RTP_JITTER_BUFFER_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +struct rtp_jitter_buffer_params { + /* How many slots the jitter buffer shall have. This defines + * the capacity of the jitter buffer. Must be at least 1. + * For a more detailed explanation what slots are, see + * further below. */ + size_t num_slots; + /* Maximum size of the packets, in bytes. This must include + * the RTP header bytes. Must be at least 1. */ + size_t max_packet_size; + /* Duration of each packet, in nanoseconds. + * Must be at least 1. */ + uint64_t packet_duration; + /* PipeWire loop, used for setting up a timeout timer. + * This needs to be a loop that runs in the same thread + * as this jitter buffer. + * This must be set to a valid pointer. */ + struct pw_loop *loop; + /* User-defined context that is passed to the function pointers. */ + void *context; + + /* Function pointer called when the jitter buffer decides to output + * an RTP packet. In regular mode, this is immediately invoked when + * rtp_jitter_buffer_insert_packet() is called. No data is copied + * then; this function pointer is directly passed the packet_data and + * packet_size argument values that rtp_jitter_buffer_insert_packet() + * was called with. In hold-back mode, this is called when the jitter + * buffer is able to output previously held back packets in order + * (for example, because a preceding packet just now arrived), or + * when the jitter buffer gets drained. packet_data points to the + * bytes of the RTP packet that is output, and packet_size contains + * the size of the packet in bytes. + * + * Should this function experience an error, it returns a negated + * errno, and 0 if it succeeds. In case of an error, if the overall + * logic can continue, it is recommended to fully reset the jitter + * buffer by calling rtp_jitter_buffer_flush(). + * + * This must be set to a valid pointer. */ + int (*output_rtp_packet)(void *context, const uint8_t *packet_data, size_t packet_size, + size_t header_size, uint32_t timestamp, uint16_t seqnum); + /* Function pointer called when the jitter buffer detected one or + * more lost packets in a row. For example, if the last received packet + * had sequence number 16, and then, the jitter buffer received packet + * with sequence number 26, then 27, 28, etc., it eventually will + * detect that packets 17 through 25 are lost, and call this, setting + * seqnum_of_first_lost_packet to 17, and gap_length to 8. The last + * argument is set to false if the packet loss is detected mid stream + * when the gap is small enough that measures like PLC can reasonably + * cover all lost packets. If the detected gap is too large, open_ended + * will be set to true. Implementations must then interpret the value of + * gap_length as a maximum; whatever PLC measures the implementation + * uses must not produce output larger than gap_length. This is a + * safety measure to avoid cases where PLC would have to produce an + * excessive amount of data. + * + * When open_ended is true, implementations should also apply a fade + * out at the end of the produced PLC content to avoid a potential + * hard cutoff at the end. + * + * Should this function experience an error, it returns a negated + * errno, and 0 if it succeeds. In case of an error, if the overall + * logic can continue, it is recommended to fully reset the jitter + * buffer by calling rtp_jitter_buffer_flush(). + * + * This must be set to a valid pointer. */ + int (*signal_lost_packets)(void *context, uint16_t seqnum_of_first_lost_packet, + size_t gap_length, bool open_ended); +}; + +enum rtp_jitter_buffer_element_type { + RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET, + RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP +}; + +/* Slot structure. A slot stores an element (a packer or + * a gap). Slots outside of the valid seqnum window have + * undefined contents. See the "slots" array documentation + * in the rtp_jitter_buffer structure below. */ +struct rtp_jitter_buffer_slot { + enum rtp_jitter_buffer_element_type element_type; + /* This is only used when the element_type is + * RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET. See + * the packet_buffer documentation below for more. */ + size_t packet_size; + /* Size of the RTP header. This must be less than + * or equal to packet_size. The packet data that + * is stored in packet_buffer includes this header. */ + size_t header_size; + /* RTP timestamp. */ + uint32_t timestamp; +}; + +struct rtp_jitter_buffer { + /* Copy of the params passed to rtp_jitter_buffer_init(). */ + struct rtp_jitter_buffer_params params; + + /* Set to true by rtp_jitter_buffer_init(), and set to false + * by rtp_jitter_buffer_shutdown(). */ + bool initialized; + + /* Array of slots. The size of this array equals num_slots. + * Only slots that are within the valid seqnum window have + * valid values in this array. For example, if the slot at + * array index 3 corresponds to sequence number 400, but + * the valid seqnum window starts at 402, then the value + * in this array at index 3 has no meaning. Not used when + * regular mode is active. */ + struct rtp_jitter_buffer_slot *slots; + + /* Buffer for storing packets in hold-back mode. The size + * of this buffer is max_packet_size*num_slots. To access + * a packet, do it this way: + * + * packet_data = &(packet_buffer[max_packet_size * index]); + * + * Doing this is technically less space efficient if the actual + * packet size is smaller than max_packet_size, but it makes + * buffer management and packet addressing much easier, and + * avoids costly reallocations. The packet_size field in the + * slot structure specifies how many bytes at that location + * actually make up the packet. FOr example, if the slot at + * index 5 in the slots array has its element_type value set + * to RTP_JITTER_BUFFER_ELEMENT_TYPE_PACKET, then at location + * &(packet_buffer[max_packet_size * 5]), valid packet data + * can be found. And, starting from that location, the amount + * of bytes the packet there is made of equals the packet_size + * value of the slot at index 5. + * + * Only the locations that correspond to valid slots with + * a packet as element contain valid data. For example, if the + * slot at index 3 corresponds to sequence number 400, but the + * valid seqnum window starts at 402, then the data at location + * &(packet_buffer[max_packet_size * 3]) has no meaning. Also, + * if the slot does lie within the valid window, but the type + * of the contained element is RTP_JITTER_BUFFER_ELEMENT_TYPE_GAP, + * then the associated location in this packert buffer is + * meaningless. Not used when regular mode is active. */ + uint8_t *packet_buffer; + + /* If true, hold-back mode is active. */ + bool hold_back_mode; + /* Sequence number of the last packet observed while in + * regular mode. If no last packet is known (at startup for + * example), this is set to -1. + * Not used when hold-back mode is active. */ + int32_t last_seqnum; + /* Timer that, when it times out, drains all slots and switches + * back to regular mode. This is armed when hold-back mode is + * activated, and when it is reset. + * Not used when regular mode is active. */ + struct spa_source *timeout_timer; + /* These define the valid seqnum window. Packet sequence numbers + * in the [start_seqnum .. (start_seqnum + window_length - 1)] + * range (both start and end of it inclusive) are within this + * window, and are considered valid. A sequence number below the + * window's start_seqnum is considered stale. A sequence number + * beyond the end of the window extends the window; the window + * length is increased such that this new sequence number is at + * the very end of the window, that is, the length is extended + * such that new_seqnum == (start_seqnum + window_length - 1) + * + * However, if this overextends the window (meaning that the + * window length would be greater than the number of slots), + * the logic in rtp_jitter_buffer_insert_packet() will adjust + * the window's start_seqnum and length. + * + * If hold-back mode is off, these two values have no meaning. + * + * This window is necessary for the following: + * + * 1. It allows for detecting and dropping old, stale packets + * by looking at their sequence numbers. + * 2. It detects when currently held-back packets need to be + * drained, that is, the gap that precedes them is not getting + * filled in time. This happens when packets are lost. + * 3. It defines what slots are valid. Only those slots that are + * associated with sequence numbers that fit inside this + * window are valid. Slots outside of the window have no + * defined nor meaningful content (that is, packet size, + * packet buffer data, or slot element type). + * + * Note that even though valid_seqnum_window_start_seqnum is a + * sequence number, it is a uint32_t instead of a uint16_t, for + * easier arithmetics. */ + uint32_t valid_seqnum_window_start_seqnum; + uint32_t valid_seqnum_window_length; +}; + +/* Initializes an uninitialized rtp_jitter_buffer instance with the given params. + * + * See rtp_jitter_buffer_params for details about the input parameters. + * + * params must be a valid pointer. An internal copy of the rtp_jitter_buffer_params + * instance that pointer refers to will be made, so the caller does not have to + * keep that instance around for the duration of the jitter buffer's lifetime. + * + * The jitter buffer must not be already initialized. + * + * Returns 0 if initialization was successful, nonzero in case of an error. + * If an error happens, this automatically calls rtp_jitter_buffer_shutdown(). */ +int rtp_jitter_buffer_init(struct rtp_jitter_buffer *jitter_buffer, struct rtp_jitter_buffer_params *params); + +/* Shuts down a previously initialized jitter buffer. + * + * jitter_buffer must be a valid pointer. + * + * Calling this on an uninitialized jitter buffer results in a no-op. */ +void rtp_jitter_buffer_shutdown(struct rtp_jitter_buffer *jitter_buffer); + +/* Returns true if the jitter buffer is initialized. */ +static inline bool rtp_jitter_buffer_is_initialized(struct rtp_jitter_buffer *jitter_buffer) +{ + return jitter_buffer->initialized; +} + +/* Inserts an RTP packet into the jitter buffer. + * + * Depending on the sequence number of the RTP packet, the jitter buffer + * will immediately output that packet, or hold it back inside a slot + * for purposes of reordering and packet loss detection. (The packet's data + * will be copied into the packet buffer.) The packet order is defined by + * the RTP sequence number. (16-bit unsigned int RTP sequence wrap-around + * is handled properly.) + * + * If for example packets with sequence numbers 1, 4, 3, 5 arrived (in this + * incorrect order), packet 1 will have been output immediately, but packets + * 4, 3, 5 will have been held back (due to the gap at 2 and the incorrect + * order of packets). If later, packet 2 arrives, the gap at 2 is filled, + * and the jitter buffer will then output all packets from 2 to 5, in the + * correct order. If however packet 2 never arrives, but packets past + * packet 5 keep arriving, then eventually, packet 2 will be considered + * lost (causing signal_lost_packets() to be called), and the held-packets + * 4, 3, 5 will be drained (and output via output_rtp_packet(), in correct + * order 3, 4, 5). + * + * jitter_buffer and packet_data must be valid pointers. packet_size must + * be the size of the memory block pointed to by packet_data . + * + * header_size is the size of the RTP header, and must be less than or equa + * to packet_size. timestamp is the RTP timestamp. + * + * Note that while the values of header_size, timestamp, seqnum could be + * parsed from the packet header, this is not done by this function for + * efficiency reasons. Parsing those may require byte swapping to handle + * endianness, and computing the header size may depend on whether the + * header has extensions or not. These computations and proceses are done + * once, by the caller, and passed to this call via the arguments. + * + * Returns 0 if the call was successful, nonzero in case of an error. + * If an error happens, consider the jitter buffer as no longer usable + * (it can only be shut down then). */ +int rtp_jitter_buffer_insert_packet(struct rtp_jitter_buffer *jitter_buffer, + const uint8_t *packet_data, size_t packet_size, + size_t header_size, uint32_t timestamp, uint16_t seqnum); + +/* Drains all held-back packets and reports packet loss based on those packets. + * + * If the jitter buffer is currently holding back packets due to some packets + * having been added out of order previously, or because gaps were detected, this + * drains them, causing the output_rtp_packet() and signal_lost_packets() function + * pointers from the params passed to rtp_jitter_buffer_init() to be called. If + * for example packets with sequence numbers 1, 4, 3, 5 arrived (in this incorrect + * order), packet 1 will have been output immediately, but packets 4, 3, 5 will + * have been held back (due to the gap at 2 and the incorrect order of packets). + * If then, this function is called, the jitter buffer will (in this order) call + * signal_lost_packets() to signal the loss of packet 2, and then call + * output_rtp_packet() to output packets 3, 4, 5 (in that corrected order). + * + * After draining, the jitter buffer will be back in regular mode. + * + * If no packets were added, or if the jitter buffer is in regular mode, + * this does nothing. + * + * The jitter buffer also has an internal timer that is activated as soon + * as gaps and out of order packets are detected. If that timer expires, + * the jitter buffer will automatically drain itself. If however all gaps + * are filled in time, the timer is deactivated, and no automatic drain occurs. + * + * jitter_buffer must be a valid pointer. + * + * Returns 0 if the call was successful, nonzero in case of an error. + * If an error happens, consider the jitter buffer as no longer usable + * (it can only be shut down then). */ +int rtp_jitter_buffer_drain(struct rtp_jitter_buffer *jitter_buffer); + +/* Flushes all held-back packets. + * + * Unlike draining, this does not call any function pointers. Instead, it + * flushes any and all held-back packets (meaning, these packets are all + * gone, and _not_ reported as lost via signal_lost_packets()), and + * resets the jitter buffer back to the regular mode. + * + * This is useful for performing a hard reset on the jitter buffer. + * + * jitter_buffer must be a valid pointer. */ +void rtp_jitter_buffer_flush(struct rtp_jitter_buffer *jitter_buffer); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* PIPEWIRE_RTP_JITTER_BUFFER_H */ diff --git a/test/meson.build b/test/meson.build index a54e059b9..6982720dd 100644 --- a/test/meson.build +++ b/test/meson.build @@ -124,6 +124,17 @@ test('test-spa', link_with: pwtest_lib) ) +test('test-module-rtp-common-lib', + executable('test-module-rtp-common-lib', + 'modules/module-rtp/test-jitter-buffer.c', + include_directories : [ + pwtest_inc, + include_directories('../src/modules'), + ], + dependencies: [ spa_dep, pipewire_module_rtp_common_dep ], + link_with: [pwtest_lib]) +) + openal_info = find_program('openal-info', required: false) if openal_info.found() cdata.set_quoted('OPENAL_INFO_PATH', openal_info.full_path()) diff --git a/test/modules/module-rtp/test-jitter-buffer.c b/test/modules/module-rtp/test-jitter-buffer.c new file mode 100644 index 000000000..ae4943643 --- /dev/null +++ b/test/modules/module-rtp/test-jitter-buffer.c @@ -0,0 +1,1259 @@ +/* PipeWire */ +/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */ +/* SPDX-License-Identifier: MIT */ + +#include +#include +#include +#include + +#include "config.h" + +#include +#include +#include + +#include "pwtest.h" + +PW_LOG_TOPIC(mod_topic, "test.rtp-jitter-buffer"); +#define PW_LOG_TOPIC_DEFAULT mod_topic + +enum test_event_type { + TEST_EVENT_OUTPUT_PACKET, + TEST_EVENT_LOST_PACKETS, +}; + +struct test_event { + enum test_event_type type; + union { + struct { + uint16_t seqnum; + } output; + struct { + uint16_t first_seqnum; + size_t count; + bool open_ended; + } lost; + }; +}; + +#define MAX_TEST_EVENTS 256 +#define MAX_TEST_PACKET_SIZE 2048 +#define TEST_PACKET_SIZE 128 +#define TEST_HEADER_SIZE 16 +#define TEST_TIMESTAMP 123456 +#define TEST_PACKET_DURATION (10 * SPA_NSEC_PER_MSEC) + +struct test_context { + struct pw_loop *loop; + struct pw_main_loop *main_loop; + struct rtp_jitter_buffer jitter_buffer; + + struct test_event events[MAX_TEST_EVENTS]; + size_t num_events; + + uint8_t packet_bytes[MAX_TEST_PACKET_SIZE]; +}; + +static void send_packet(struct test_context *test_context, uint16_t seqnum) +{ + /* Create a simulated RTP packet. Only write the sequence number + * into its header. The rest (SSRC, CSRC, payload type etc.) are + * of no interest to the jitter buffer - it only cares about the + * sequence number. */ + struct rtp_header *header = (struct rtp_header *)(test_context->packet_bytes); + header->sequence_number = htons(seqnum); + int ret = rtp_jitter_buffer_insert_packet(&(test_context->jitter_buffer), + test_context->packet_bytes, (TEST_PACKET_SIZE), (TEST_HEADER_SIZE), (TEST_TIMESTAMP), seqnum); + assert(ret == 0); +} + +static int test_output_rtp_packet(void *context, const uint8_t *packet_data, size_t packet_size, + size_t header_size, uint32_t timestamp, uint16_t seqnum) +{ + struct test_context *test_context = context; + struct rtp_header *header = (struct rtp_header *)packet_data; + + assert(test_context->num_events < MAX_TEST_EVENTS); + + /* Check that this function is not simply passed + * the value of params.max_packet_size, and that + * the other values (header size, timestamp) + * are correct as well. */ + pwtest_int_eq(packet_size, (size_t)(TEST_PACKET_SIZE)); + pwtest_int_eq(header_size, (size_t)(TEST_HEADER_SIZE)); + pwtest_int_eq(timestamp, (size_t)(TEST_TIMESTAMP)); + + /* Compare the seqnum that is given by the caller + * with the seqnum in the RTP header to verify that + * the packet data is correctly associated with the + * information from the function arguments. */ + pwtest_int_eq(seqnum, ntohs(header->sequence_number)); + + test_context->events[test_context->num_events].type = TEST_EVENT_OUTPUT_PACKET; + test_context->events[test_context->num_events].output.seqnum = seqnum; + pw_log_debug("Output RTP packet with seqnum %" PRIu16, test_context->events[test_context->num_events].output.seqnum); + test_context->num_events++; + + return 0; +} + +static int test_signal_lost_packets(void *context, uint16_t seq_of_first_lost_packet, + size_t num_lost_packets, bool open_ended) +{ + struct test_context *test_context = context; + + assert(test_context->num_events < MAX_TEST_EVENTS); + + test_context->events[test_context->num_events].type = TEST_EVENT_LOST_PACKETS; + test_context->events[test_context->num_events].lost.first_seqnum = seq_of_first_lost_packet; + test_context->events[test_context->num_events].lost.count = num_lost_packets; + test_context->events[test_context->num_events].lost.open_ended = open_ended; + test_context->num_events++; + + return 0; +} + +static void setup_test_context(struct test_context *test_context, size_t num_slots) +{ + struct rtp_jitter_buffer_params params; + + assert(test_context != NULL); + + spa_memzero(test_context, sizeof(struct test_context)); + + pw_init(0, NULL); + + test_context->main_loop = pw_main_loop_new(NULL); + assert(test_context->main_loop != NULL); + test_context->loop = pw_main_loop_get_loop(test_context->main_loop); + + memset(¶ms, 0, sizeof(params)); + params.num_slots = num_slots; + /* Set the maximum packet size to a value higher than TEST_PACKET_SIZE + * to be able to check in test_output_rtp_packet() that that function + * does not simply get the max_packet_size value as the packet size, + * but the _actual_ packet size. (Also see test_output_rtp_packet().) */ + params.max_packet_size = MAX_TEST_PACKET_SIZE; + params.packet_duration = TEST_PACKET_DURATION; + params.loop = test_context->loop; + params.context = test_context; + params.output_rtp_packet = test_output_rtp_packet; + params.signal_lost_packets = test_signal_lost_packets; + + int ret = rtp_jitter_buffer_init(&(test_context->jitter_buffer), ¶ms); + assert(ret == 0); +} + +static void teardown_test_context(struct test_context *test_context) +{ + assert(test_context != NULL); + + rtp_jitter_buffer_shutdown(&(test_context->jitter_buffer)); + if (test_context->main_loop != NULL) + pw_main_loop_destroy(test_context->main_loop); + pw_deinit(); +} + +#define SHIFT_TEST_EVENTS() \ + do { \ + memmove( \ + &(test_context.events[0]), \ + &(test_context.events[1]), \ + (test_context.num_events - 1) * sizeof(struct test_event)); \ + test_context.num_events--; \ + } while (0) + +#define CHECK_LOST_PACKET_EVENT(FIRST_SEQNUM, COUNT, OPEN_ENDED) \ + do { \ + pwtest_int_ge(test_context.num_events, 1u); \ + pwtest_int_eq((int)(test_context.events[0].type), TEST_EVENT_LOST_PACKETS); \ + pwtest_int_eq(test_context.events[0].lost.first_seqnum, (FIRST_SEQNUM)); \ + pwtest_int_eq(test_context.events[0].lost.count, (size_t)(COUNT)); \ + pwtest_int_eq(test_context.events[0].lost.open_ended, (OPEN_ENDED)); \ + SHIFT_TEST_EVENTS(); \ + } while (0) + +#define CHECK_OUTPUT_PACKET_EVENT(SEQNUM) \ + do { \ + pwtest_int_ge(test_context.num_events, 1u); \ + pwtest_int_eq((int)(test_context.events[0].type), TEST_EVENT_OUTPUT_PACKET); \ + pwtest_int_eq(test_context.events[0].output.seqnum, (SEQNUM)); \ + SHIFT_TEST_EVENTS(); \ + } while (0) + +PWTEST(rtp_jitter_buffer_test_consecutive_packets) +{ + /* Simple test with packets that are passed to the jitter buffer + * in order, with no gaps. Immediate output is expected, since + * the jitter buffer will be in regular mode. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Send packets 100, 101, 102, 103, 104 in order. + * All 5 should be immediately output, and the + * hold-back mode should remain disabled. */ + for (uint16_t i = 0; i < 5; i++) { + uint16_t seqnum = 100 + i; + send_packet(&test_context, seqnum); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + } + + pwtest_int_eq(test_context.num_events, 5u); + for (uint16_t i = 0; i < 5; i++) { + uint16_t seqnum = 100 + i; + CHECK_OUTPUT_PACKET_EVENT(seqnum); + } + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_simple_reordering) +{ + /* Check that simple out-of-order packet arrival is handled properly. + * There should be no gaps signaled, and the packets should be output + * in order. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Send 100, 101 in order. */ + send_packet(&test_context, 100); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 101); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(100); + CHECK_OUTPUT_PACKET_EVENT(101); + + /* Send 103. A gap at 102 is produced -> jitter buffer enables hold-back mode. + * No output takes place just yet, since 103 is held back. + * The valid seqnum window starts at 102 and ends at packet 103. */ + send_packet(&test_context, 103); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 102u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 2u); + pwtest_int_eq(test_context.num_events, 0u); + + /* Send 102 to simulate out-of-order arrival. This fills the gap + * at 102 (implying that it is not signaled), and should cause + * 102 and 103 to be output (in order) and the hold-back mode + * to be disabled again. */ + send_packet(&test_context, 102); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(102); + CHECK_OUTPUT_PACKET_EVENT(103); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_partial_output) +{ + /* Test that partial output is done correctly when some + * gaps are filled. (Partial means that only part of the + * held-back packets are output.) */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Establish regular mode with packet 400. */ + send_packet(&test_context, 400); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(400); + + /* Send in packet 402 to produce a gap at 401 and cause the + * jitter buffer to enter hold-back mode. */ + send_packet(&test_context, 402); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 401u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 2u); + pwtest_int_eq(test_context.num_events, 0u); + + /* Send in packets 404 and 405. This keeps the gap at 401, adds + * a gap at 403, and keeps the jitter buffer in hold-back mode. */ + send_packet(&test_context, 404); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 405); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + + /* Send in packet 401, which fills the gap at 401. This allows + * the jitter buffer to output packets 401 and 402. But since + * another gap exists at 403, hold-back mode remains enabled. */ + send_packet(&test_context, 401); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(401); + CHECK_OUTPUT_PACKET_EVENT(402); + + /* Send in packet 403, which fills the gap at 403. This allows + * the jitter buffer to output packets 403, 404, 405. Those were + * the remaining held-back packets, so hold-back mode should be + * turned off now. */ + send_packet(&test_context, 403); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 3u); + CHECK_OUTPUT_PACKET_EVENT(403); + CHECK_OUTPUT_PACKET_EVENT(404); + CHECK_OUTPUT_PACKET_EVENT(405); + + /* Verify that regular mode is working properly by sending + * in packet 406. */ + send_packet(&test_context, 406); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(406); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_explicit_drain_in_regular_mode) +{ + /* Test what happens when explicitly draining the jitter buffer + * while in regular mode. Draining should be a no-op in this mode. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Establish regular mode with packets 200 and 201. */ + send_packet(&test_context, 200); + send_packet(&test_context, 201); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(200); + CHECK_OUTPUT_PACKET_EVENT(201); + + /* Drain, and then check the outcome. Check that it was a no-op. */ + int ret = rtp_jitter_buffer_drain(&(test_context.jitter_buffer)); + assert(ret == 0); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + pwtest_int_eq(test_context.jitter_buffer.last_seqnum, 201); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_explicit_drain_in_hold_back_mode) +{ + /* Test what happens when explicitly draining the jitter buffer + * while in hold-back mode. Missing packets should be signaled + * as lost packets by this. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Establish regular mode with packet 200. */ + send_packet(&test_context, 200); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(200); + + /* Send in packets 202 and 205 to produce gap at 201, 203, 204 + * and cause the jitter buffer to enter hold-back mode. */ + send_packet(&test_context, 202); + send_packet(&test_context, 205); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + + /* Drain explicitly. This should output the following (in this order): + * + * - 1 lost packet, starting at seqnum 201, not open-ended + * - 1 packet output with seqnum 202 + * - 2 lost packets, starting at seqnum 203, not open-ended + * - 1 packet output with seqnum 205 + * + * This should also set the jitter buffer back to regular mode. + * The last_seqnum should be -1, since after explicit drain, + * the jitter buffer has no idea what packets will come next.*/ + int ret = rtp_jitter_buffer_drain(&(test_context.jitter_buffer)); + assert(ret == 0); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 4u); + pwtest_int_eq(test_context.jitter_buffer.last_seqnum, -1); + CHECK_LOST_PACKET_EVENT(201, 1u, false); + CHECK_OUTPUT_PACKET_EVENT(202); + CHECK_LOST_PACKET_EVENT(203, 2u, false); + CHECK_OUTPUT_PACKET_EVENT(205); + + /* Verify that regular mode is working properly by sending + * in packet 700. Since after draining, the last_seqnum is + * -1, a discontinuity in the sequence numbers is okay. */ + send_packet(&test_context, 700); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(700); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_explicit_drain_coalesced_loss) +{ + /* Test that a contiguous set of lost packets is coalesced + * into one signal lost packet signal. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Establish regular mode with packet 50. */ + send_packet(&test_context, 50); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(50); + + /* Send in packet 54 to produce gap at 51, 52, 53 and + * cause the jitter buffer to enter hold-back mode. */ + send_packet(&test_context, 54); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + + /* Drain the jitter buffer. The packets 51, 52, 53 are + * now considered lost, and should be reported as such. */ + int ret = rtp_jitter_buffer_drain(&(test_context.jitter_buffer)); + assert(ret == 0); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + pwtest_int_eq(test_context.jitter_buffer.last_seqnum, -1); + CHECK_LOST_PACKET_EVENT(51, 3u, false); + CHECK_OUTPUT_PACKET_EVENT(54); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_explicit_drain_with_seqnum_wraparound) +{ + /* Test what happens when explicitly draining the jitter + * buffer while in hold-back mode and with sequence numbers + * wrapping around. Missing packets should be signaled as + * lost packets by this. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Establish regular mode with packet 65533. */ + send_packet(&test_context, 65533); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(65533); + + /* Send in packets 65535 and 2 to produce gap at 65534, 0, 1 + * and cause the jitter buffer to enter hold-back mode. */ + send_packet(&test_context, 65535); + send_packet(&test_context, 2); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + + /* Drain explicitly. This should output the following (in this order): + * + * - 1 lost packet, starting at seqnum 65534, not open-ended + * - 1 packet output with seqnum 65535 + * - 2 lost packets, starting at seqnum 0, not open-ended + * - 1 packet output with seqnum 2 + * + * This should also set the jitter buffer back to regular mode. + * The last_seqnum should be -1, since after explicit drain, + * the jitter buffer has no idea what packets will come next.*/ + int ret = rtp_jitter_buffer_drain(&(test_context.jitter_buffer)); + assert(ret == 0); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 4u); + pwtest_int_eq(test_context.jitter_buffer.last_seqnum, -1); + CHECK_LOST_PACKET_EVENT(65534, 1u, false); + CHECK_OUTPUT_PACKET_EVENT(65535); + CHECK_LOST_PACKET_EVENT(0, 2u, false); + CHECK_OUTPUT_PACKET_EVENT(2); + + /* Verify that regular mode is working properly by sending + * in packet 700. Since after draining, the last_seqnum is + * -1, a discontinuity in the sequence numbers is okay. */ + send_packet(&test_context, 700); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(700); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_stale_packets_in_regular_mode) +{ + /* Test what happens when stale and old packets are sent into + * the jitter buffer in regular mode. They should be dropped + * without influencing the behavior of the jitter buffer. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Establish regular mode with packets 100 and 101. */ + send_packet(&test_context, 100); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 101); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(100); + CHECK_OUTPUT_PACKET_EVENT(101); + + /* Send in packet 101. Since a packet 101 was already seen, + * this is a stale packet, and needs to be dropped. */ + send_packet(&test_context, 101); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + + /* Send in packet 99. Since packets 100 and 101 were already seen, + * this is an old packet, and needs to be dropped. */ + send_packet(&test_context, 99); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + + /* Verify that regular mode is working properly by sending + * in packet 102. */ + send_packet(&test_context, 102); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(102); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_stale_packets_in_hold_back_mode) +{ + /* Test what happens when stale and old packets are sent into + * the jitter buffer in hold-back mode. They should be dropped + * without influencing the behavior of the jitter buffer. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Establish hold-back mode with packets 300 and 302. + * Hold-back mode gets active because of the gap at 301. */ + send_packet(&test_context, 300); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 302); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(300); + + /* Send in packet 299. Since packets 300 and 302 were already seen, + * this is an old packet, and needs to be dropped. */ + send_packet(&test_context, 299); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + + /* Send in packet 300. Since a packet 300 was already seen, + * this is a stale packet, and needs to be dropped. */ + send_packet(&test_context, 300); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + + /* Send in packet 302. This is another stale packet. The + * difference to the packet 300 check above is that the + * packet 302 that was previously observed is held back, + * and was not output thus far. */ + send_packet(&test_context, 302); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + + /* Send in packet 301 to test that switching back + * to regular mode still works properly. */ + send_packet(&test_context, 301); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(301); + CHECK_OUTPUT_PACKET_EVENT(302); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_flush) +{ + /* Test the flush functionality. This should discard any held-back + * packets, without emitting them, and the jitter buffer should + * be back in regular mode afterwards. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Establish hold-back mode with packets 500 and 502. + * Hold-back mode gets active because of the gap at 501. */ + send_packet(&test_context, 500); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 502); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(500); + + rtp_jitter_buffer_flush(&(test_context.jitter_buffer)); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + pwtest_int_eq(test_context.jitter_buffer.last_seqnum, -1); + + /* Verify that regular mode is working properly by sending + * in packet 700. Since after flushing, the last_seqnum is + * -1, a discontinuity in the sequence numbers is okay. */ + send_packet(&test_context, 700); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(700); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_seqnum_wraparound_regular) +{ + /* Check that in regular mode, output of in-sequence packets + * works properly even when a sequence number wrap-around occurs. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + send_packet(&test_context, 65534); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 65535); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 0); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 1); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 4u); + CHECK_OUTPUT_PACKET_EVENT(65534); + CHECK_OUTPUT_PACKET_EVENT(65535); + CHECK_OUTPUT_PACKET_EVENT(0); + CHECK_OUTPUT_PACKET_EVENT(1); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_seqnum_wraparound_with_reordering) +{ + /* Check that in hold-back mode, output of in-sequence packets + * works properly even when a sequence number wrap-around occurs. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Send packets 65534 and 65535 in order. */ + send_packet(&test_context, 65534); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 65535); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(65534); + CHECK_OUTPUT_PACKET_EVENT(65535); + + /* Send in packet 1, causing a gap at 0. */ + send_packet(&test_context, 1); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 0u); + + /* Fill the gap by sending in packet 0, then check that + * packets 0 and 1 were now output in order. */ + send_packet(&test_context, 0); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(0); + CHECK_OUTPUT_PACKET_EVENT(1); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_overextension_single_gap_no_end_gap) +{ + /* Check what happens when hold-back mode is active, the + * valid seqnum window's maximum length is reached, and then, + * a packet with a sequence number that is one past the window + * range is added. This new packet would overextend the window, + * so the window is shifted forwards. However, it is only + * overextended by 1, so only the oldest slot in the window + * needs to be drained. In this case, that oldest slot contains + * the gap at the very beginning of the window. Also, since + * aside from that gap, there are no other ones, and the new + * packet (the one that overextends the window) comes directly + * after the last packet in the valid seqnum window, the + * jitter buffer will have no gaps left to take care of, so + * all held back packets can be output. + * + * This simulates cases where one packet is lost among + * a string of packets that all arrive in order. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Produce a sequence of packets with a gap in them. Start at 100, + * skip 101, then go all the way to 110. + * + * First, packet 100 will immediately be output. Then, packet 102 + * will enable hold-back mode (due to the gap at 101). The valid + * seqnum window then starts at 101, and extends all the way to 110. + * 110-101+1 = 10, which equals the max num packets of the jitter + * buffer here. In other words, after this, the jitter buffer valid + * range is as large as it can maximally be. */ + send_packet(&test_context, 100); + for (uint16_t i = 102; i <= 110; i++) { + send_packet(&test_context, i); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + } + pwtest_int_eq(test_context.num_events, 1u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 10u); + CHECK_OUTPUT_PACKET_EVENT(100); + + /* Now insert packet 111. This would overextend the window, so the + * jitter buffer has to shift the window and drain the oldest slots + * that are no longer part of the shifted window. Since packet 111 + * would overextend the window by 1, it means that the one oldest + * slot is drained. That oldest slot actually is the gap at 101. + * Since that gap was drained (resulting in a packet loss signal + * at seqnum 101 of length 1), only packets remain in the valid + * seqnum window, no gaps anymore, so the jitter buffer immediately + * outputs all of them, in order. */ + send_packet(&test_context, 111); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 11u); + CHECK_LOST_PACKET_EVENT(101, 1u, false); + for (uint16_t i = 102; i <= 111; i++) + CHECK_OUTPUT_PACKET_EVENT(i); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_overextension_multiple_gaps_no_end_gap) +{ + /* Check what happens when hold-back mode is active, the + * valid seqnum window's maximum length is reached, and then, + * a packet with a sequence number that is one past the window + * range is added. This new packet would overextend the window, + * so the window is shifted forwards. However, it is only + * overextended by 1, so only the oldest slot in the window + * needs to be drained. In this case, that oldest slot contains + * the gap at the very beginning of the window. Since there + * are more gaps present, the hold-back mode is not left. + * + * This simulates cases where more than one packet is lost + * among a string of packets that all arrive in order. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Produce a sequence of packets with a gap in them. Start at 100, + * skip 101 and 102, and go all the way to 110. + * + * In the hold-back mode that results from this, the valid range + * then starts at 101, and extends all the way to 110. 110-101+1 = 10, + * which equals the max num packets of the jitter buffer here. In + * other words, after this, the jitter buffer valid range is as large + * as it can maximally be. */ + send_packet(&test_context, 100); + send_packet(&test_context, 103); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + for (uint16_t i = 105; i <= 110; i++) { + send_packet(&test_context, i); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + } + pwtest_int_eq(test_context.num_events, 1u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 10u); + CHECK_OUTPUT_PACKET_EVENT(100); + + /* Now insert packet 111. This would overextend the window, so the + * jitter buffer has to shift the window and drain the oldest slots + * that are no longer part of the shifted window. Since packet 111 + * would overextend the window by 1, it means that the one oldest + * slot is drained. But, at 102, there is also gap, and 102 is now + * the new start of the valid seqnum window, so the jitter buffer + * cannot output any packets yet. */ + send_packet(&test_context, 111); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 102u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 10u); + CHECK_LOST_PACKET_EVENT(101, 1u, false); + + /* To see that the behavior remains as expected, fill the gap at 102. + * Since 102 is the very beginning of the valid seqnum window, and there + * is a packet at 103, the jitter buffer can now output 102 and 103. + * Also, the valid seqnum window shrinks accordingly by 2, its length + * becoming 8 and its start seqnum becoming 104. */ + send_packet(&test_context, 102); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 104u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 8u); + CHECK_OUTPUT_PACKET_EVENT(102); + CHECK_OUTPUT_PACKET_EVENT(103); + + /* Finally, send in packet 104. By now, 104 is the start of the valid + * packet window, and a gap is there. Since this is the last gap in + * the jitter buffer, once it is filled, all packets can be output. */ + send_packet(&test_context, 104); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 8u); + for (uint16_t i = 104; i <= 111; i++) + CHECK_OUTPUT_PACKET_EVENT(i); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_overextension_after_partial_output) +{ + /* Check what happens when first, in hold-back mode, a partial + * drain happens, and then, the valid seqnum window is overextended. */ + + struct test_context test_context; + + setup_test_context(&test_context, 5); + + /* Add a packet 100, which is output immediately, since the + * jitter buffer is in regular mode. */ + send_packet(&test_context, 100); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(100); + + /* Now add packet 102. Since there is a gap at 101, hold-back + * mode is enabled. The valid seqnum window starts at 101, + * and is of length 2. */ + send_packet(&test_context, 102); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 101u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 2u); + pwtest_int_eq(test_context.num_events, 0u); + + /* Packets 103 to 105 are inserted. This fills the window to + * capacity, since now, it has been extended, and goes from + * 101 to 105. That is, it starts at 101, and is of length 5 + * which equals the jitter buffer capacity). */ + send_packet(&test_context, 103); + send_packet(&test_context, 104); + send_packet(&test_context, 105); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 101u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 5u); + pwtest_int_eq(test_context.num_events, 0u); + + /* Now add packet 101. This fills the gap. All 5 packets + * can be output, and the jitter buffer returns to the regular mode. */ + send_packet(&test_context, 101); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 5u); + CHECK_OUTPUT_PACKET_EVENT(101); + CHECK_OUTPUT_PACKET_EVENT(102); + CHECK_OUTPUT_PACKET_EVENT(103); + CHECK_OUTPUT_PACKET_EVENT(104); + CHECK_OUTPUT_PACKET_EVENT(105); + + /* Re-enter the hold-back mode by adding packet 107 and + * intentionally leaving out packet 106. The valid seqnum + * window now starts at 106, and is of length 2. */ + send_packet(&test_context, 107); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 106u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 2u); + pwtest_int_eq(test_context.num_events, 0u); + + /* Packets 108 to 110 are inserted. This fills the window to + * capacity, since now, it has been extended, and goes from + * 106 to 110. That is, it starts at 106, and is of length 5 + * which equals the jitter buffer capacity). */ + send_packet(&test_context, 108); + send_packet(&test_context, 109); + send_packet(&test_context, 110); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_start_seqnum, 106u); + pwtest_int_eq(test_context.jitter_buffer.valid_seqnum_window_length, 5u); + pwtest_int_eq(test_context.num_events, 0u); + + /* Packet 111 is added. This overextends the window, since it would + * now go from 106 to 111. That is a length of 111-106+1 = 6, which + * is beyond the capacity (5). + * + * The overextension is still low enough that most of the window + * contents can be reused. In fact, only the oldest slot (the one + * containing the gap at 106) needs to be drained by signaling it + * as a packet 106 loss. + * + * Once packet 106 is signaled as lost, and the corresponding slot + * is drained, the leftovers are all packets, no gaps, so all packets + * from 107 to 111 are output. + * + * By combining this with multiple partial drains above, it is verified + * that valid_seqnum_window_start_seqnum updates (which happen during + * partial drains) do not break the overextension handling. */ + send_packet(&test_context, 111); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 6u); + CHECK_LOST_PACKET_EVENT(106, 1u, false); + CHECK_OUTPUT_PACKET_EVENT(107); + CHECK_OUTPUT_PACKET_EVENT(108); + CHECK_OUTPUT_PACKET_EVENT(109); + CHECK_OUTPUT_PACKET_EVENT(110); + CHECK_OUTPUT_PACKET_EVENT(111); + + /* Verify regular mode recovery. */ + send_packet(&test_context, 112); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(112); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_immediate_overextension_after_regular_mode) +{ + /* Check what happens when a gap causes the jitter buffer to switch + * to the hold-back mode, but that gap is so large that it immediately + * overextends the valid seqnum window. The jitter buffer should + * instantly recognize the immediate overextension aqnd signal an open + * ended packet loss event. It does not stay in the hold-back mode, + * since there is nothing to hold back in that case. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Send 100, 101 in order. */ + send_packet(&test_context, 100); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 101); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(100); + CHECK_OUTPUT_PACKET_EVENT(101); + + /* Send 200. A massive gap of far more than 10 packets is produced + * -> jitter buffer signals an open ended gap, but stays in regular mode. */ + send_packet(&test_context, 200); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_LOST_PACKET_EVENT(102, 10u, true); + CHECK_OUTPUT_PACKET_EVENT(200); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_immediate_overextension_after_regular_mode_threshold_open_closed_gap) +{ + /* This is similar to rtp_jitter_buffer_test_immediate_overextension_after_regular_mode, + * but checks for a corner case. That is: If the gap length equals + * the number of slots, then the gap should not be reported as open. + * + * Test this by producing such a gap. Then further verify by repeating + * the test, but by a gap that is 1 packet larger than the number of + * slots. The first round should report a closed gap of a size equal + * to the number of slot. The second round should report an open gap. */ + + /* First round. */ + { + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Send 10, 11 in order. */ + send_packet(&test_context, 10); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 11); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(10); + CHECK_OUTPUT_PACKET_EVENT(11); + + /* Send 22. A gap of exactly 10 packets (= the number of slots) + * is produced -> jitter buffer signals a closed gap of size + * equal to the number of slots. */ + send_packet(&test_context, 22); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_LOST_PACKET_EVENT(12, 10u, false); + CHECK_OUTPUT_PACKET_EVENT(22); + + teardown_test_context(&test_context); + } + + /* Second round. */ + { + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Send 10, 11 in order. */ + send_packet(&test_context, 10); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 11); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_OUTPUT_PACKET_EVENT(10); + CHECK_OUTPUT_PACKET_EVENT(11); + + /* Send 23. A gap of exactly 11 packets (= 1 past the number + * of slots) is produced -> jitter buffer signals an open + * ended gap of size equal to the number of slots. */ + send_packet(&test_context, 23); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + CHECK_LOST_PACKET_EVENT(12, 10u, true); + CHECK_OUTPUT_PACKET_EVENT(23); + + teardown_test_context(&test_context); + } + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_full_window_invalidation_non_open_ended_gap) +{ + /* Check what happens when hold-back mode is active, the + * valid seqnum window's maximum length is reached, and then, + * a packet with a sequence number that is far enough to + * overextend the window past its current length. This means + * that the shifting method (verified in earlier tests above) + * won't work - the window is shifted completely past its + * current range, so none of those slots remain valid, + * and must all be drained. Furthermore, it means that between + * the last seqnum of the old window and the first seqnum of + * the new window, there is a gap. The jitter buffer is expected + * to do the following: + * + * 1. Drain the entire current valid seqnum window + * 2. Reset the window to only contain the seqnum of the new packet + * 3. Signal the gap between the old and the new window + * + * Here, the window is shifted far enough that none of the + * original content can be retained, but not so far that + * the gap between the old and new windows becomes too large + * to fully cover via PLC. As a result, that gap is signaled + * as packet loss, but as a non-open-ended one. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Establish hold-back mode with packets 10 and 12. + * Hold-back mode gets active because of the gap at 11. */ + send_packet(&test_context, 10); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 12); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(10); + + /* Send in packet 22. This would overextend the window. Shifting + * the current window moves it past packet 12, so the jitter + * buffer must be fully drained. Since afterwards, there is + * nothing left in the jitter buffer other than the new packet, + * the valid seqnum window length becomes 1, and starts at 22. + * This means that there are no gaps left, so the contents + * (in this case, just the packet 22) can be output immediately. + * Also, the gap between the old window and the new window goes + * from seqnum 13 (one past the end of the old window) to seqnum + * 21 (one before the new packet 22). 21-13+1 = 9, which is + * less than the jitter buffer capacity (which is 10), so that + * gap is announced as non-open-ended packet loss. */ + send_packet(&test_context, 22); + pwtest_int_eq(test_context.num_events, 4u); + CHECK_LOST_PACKET_EVENT(11, 1u, false); + CHECK_OUTPUT_PACKET_EVENT(12); + CHECK_LOST_PACKET_EVENT(13, 9u, false); + CHECK_OUTPUT_PACKET_EVENT(22); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_full_window_invalidation_open_ended_gap) +{ + /* Check what happens when hold-back mode is active, the + * valid seqnum window's maximum length is reached, and then, + * a packet with a sequence number that is far enough to + * overextend the window past its current length. This means + * that the shifting method (verified in earlier tests above) + * won't work - the window is shifted completely past its + * current range, so none of those slots remain valid, + * and must all be drained. Furthermore, it means that between + * the last seqnum of the old window and the first seqnum of + * the new window, there is a gap. The jitter buffer is expected + * to do the following: + * + * 1. Drain the entire current valid seqnum window + * 2. Reset the window to only contain the seqnum of the new packet + * 3. Signal the gap between the old and the new window + * + * Here, the window is shifted far enough that none of the + * original content can be retained, and that that the gap + * between the old and new windows becomes too large + * to fully cover via PLC. As a result, that gap is signaled + * as packet loss, but as an open-ended one. */ + + struct test_context test_context; + + setup_test_context(&test_context, 10); + + /* Establish hold-back mode with packets 10 and 12. + * Hold-back mode gets active because of the gap at 11. */ + send_packet(&test_context, 10); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 12); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(10); + + /* Send in packet 400. This would overextend the window. Shifting + * the current window moves it past packet 12, so the jitter + * buffer must be fully drained. Since afterwards, there is + * nothing left in the jitter buffer other than the new packet, + * the valid seqnum window length becomes 1, and starts at 400. + * This means that there are no gaps left, so the contents + * (in this case, just the packet 400) can be output immediately. + * Also, the gap between the old window and the new window goes + * from seqnum 13 (one past the end of the old window) to seqnum + * 399 (one before the new packet 400). 399-13+1 = 387, which is + * far beyond the jitter buffer capacity (which is 10). That gap + * is then signaled as an open ended packet loss with maximum + * length 10, meaning that any PLC/fadeout measure must not + * exceed the length of 10 packets. (In non-open-ended signals, + * the length instead specifies the exact length of the gap.) + * This is done to avoid excessive PLC/fadeout calculations, + * like in this case, where it otherwise would force PLC for + * 387 packets. Callers are encouraged to apply fadeout as well + * to not have a hard cutoff after the maximum (10 packets here).*/ + send_packet(&test_context, 400); + pwtest_int_eq(test_context.num_events, 4u); + CHECK_LOST_PACKET_EVENT(11, 1u, false); + CHECK_OUTPUT_PACKET_EVENT(12); + CHECK_LOST_PACKET_EVENT(13, 10u, true); + CHECK_OUTPUT_PACKET_EVENT(400); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST(rtp_jitter_buffer_test_timeout_drain) +{ + /* Check what happens when hold-back mode is enabled and + * the gaps are not filled in time. It is expected that the + * jitter buffer's timeout expires and forcibly drains + * its contents. */ + + struct test_context test_context; + struct timespec ts; + + setup_test_context(&test_context, 10); + + /* Establish hold-back mode with packets 60 and 62. + * Hold-back mode gets active because of the gap at 61. */ + send_packet(&test_context, 60); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + send_packet(&test_context, 62); + pwtest_bool_true(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(60); + + /* The jitter buffer's timeout timer is configured to expire + * when the total duration of its capacity passes after the + * hold-back mode was enabled. In this test, capacity is 10 + * packets, and each packet covers 10ms, then the total duration + * is 10*10ms = 100 ms, and that will also be the timeout of + * that timer, and the gap that was detected earlier will have + * armed that timer. Sleep for 50ms longer than its timeout + * duration to make sure it expires and thus provokes the + * draining of the jitter buffer. */ + ts.tv_sec = 0; + ts.tv_nsec = 10 * TEST_PACKET_DURATION + 50 * SPA_NSEC_PER_MSEC; + nanosleep(&ts, NULL); + + /* Iterate the loop to process the timer expiration. */ + pw_loop_enter(test_context.loop); + pw_loop_iterate(test_context.loop, 0); + pw_loop_leave(test_context.loop); + + /* After draining, the jitter buffer should be back to regular + * mode, just as if rtp_jitter_buffer_drain() had been called. */ + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 2u); + pwtest_int_eq(test_context.jitter_buffer.last_seqnum, -1); + CHECK_LOST_PACKET_EVENT(61, 1u, false); + CHECK_OUTPUT_PACKET_EVENT(62); + + /* Verify that regular mode is working properly by sending + * in packet 700. Since after draining, the last_seqnum is + * -1, a discontinuity in the sequence numbers is okay. */ + send_packet(&test_context, 700); + pwtest_bool_false(test_context.jitter_buffer.hold_back_mode); + pwtest_int_eq(test_context.num_events, 1u); + CHECK_OUTPUT_PACKET_EVENT(700); + + teardown_test_context(&test_context); + + return PWTEST_PASS; +} + +PWTEST_SUITE(pw_module_rtp_common_lib) +{ + pwtest_add(rtp_jitter_buffer_test_consecutive_packets, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_simple_reordering, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_partial_output, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_explicit_drain_in_regular_mode, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_explicit_drain_in_hold_back_mode, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_explicit_drain_coalesced_loss, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_explicit_drain_with_seqnum_wraparound, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_stale_packets_in_regular_mode, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_stale_packets_in_hold_back_mode, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_flush, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_seqnum_wraparound_regular, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_seqnum_wraparound_with_reordering, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_overextension_single_gap_no_end_gap, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_overextension_multiple_gaps_no_end_gap, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_overextension_after_partial_output, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_immediate_overextension_after_regular_mode, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_full_window_invalidation_non_open_ended_gap, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_immediate_overextension_after_regular_mode_threshold_open_closed_gap, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_full_window_invalidation_open_ended_gap, PWTEST_NOARG); + pwtest_add(rtp_jitter_buffer_test_timeout_drain, PWTEST_NOARG); + + return PWTEST_PASS; +} From 6b524fd596e8ad4e77f209eeb14c8a822117bba0 Mon Sep 17 00:00:00 2001 From: Carlos Rafael Giani Date: Wed, 24 Jun 2026 18:48:36 +0200 Subject: [PATCH 5/5] module-rtp: Add audio codec support to audio.c and replace opus.c with it Opus was integrated as a completely separate code path to the PCM audio processing found in audio.c. This is actually not ideal, since the only part that actually is Opus specific is the part that en- and decodes from and to PCM. The rest is 1:1 the same PCM handling. For this reason, it is much better to instead add audio codec support to audio.c, meaning that the code in there can now encode PCM audio right before sending it out as RTP, and decode incoming packets to PCM right before actually processing the decoded audio data. This significantly modifies how stream.c initializes the PCM audio path, since the audio codec feature is new. It now treats the Opus subtype as an audio codec selector instead of a selector for an entirely alternate code path (like how MIDI integration remains entirely separate). Since audio codecs usually require their frames to be decoded in order, this also integrates the RTP jitter buffer in the RTP module. Opus is now integrated as such a codec in audio.c. When it is selected, incoming packets in rtp_audio_receive() are first inserted into the jitter buffer. That buffer then outputs packets in order, and then, these packets are decoded to PCM. The rest of the processing chain goes as usual. A similar route is used for when the jitter buffer signals packet loss to be able to apply PLC. For encoding, it is similar (except that no jitter buffer is involved); in rtp_audio_flush_packets(), when Opus is active, the PCM data is rerouted to be fed to Opus for encoding, and the Opus output is then placed into the iovec array instead of the original PCM. This also improves overall Opus support; it supports S16 PCM data in addition to F32 data, correctly checks the ptime, sample rate etc. for Opus compatibility, computes an ideal bitrate, allows for manual bitrate selection and encoding complexity adjustment (via the new stream properties "opus.encoder.bitrate" and "opus.encoder.complexity"), sets several other Opus CTLs to fixed values, supports the Opus restricted-lowdelay mode (sacrifices Speech code paths for lower latency, enabled by setting the "opus.encoder.restricted-lowdelay" stream property to true), and also uses Opus' PLC in case of packet loss. The audio codec interface is designed such that adding other codecs in the future is easily doable. New integrations need to implement the function pointers found in the rtp_audio_codec structure, and expose an instance of such a custom rtp_audio_codec structure instance (see the get_rtp_opus_codec() implementation for an example). --- src/modules/meson.build | 4 +- src/modules/module-rtp-session.c | 4 +- src/modules/module-rtp-sink.c | 3 +- src/modules/module-rtp-source.c | 7 +- src/modules/module-rtp/audio-codec.c | 14 + src/modules/module-rtp/audio-codec.h | 209 +++++++++ src/modules/module-rtp/audio.c | 462 ++++++++++++++++++-- src/modules/module-rtp/opus-codec.c | 625 +++++++++++++++++++++++++++ src/modules/module-rtp/opus-codec.h | 20 + src/modules/module-rtp/opus.c | 359 --------------- src/modules/module-rtp/stream.c | 224 +++++++--- src/modules/module-rtp/stream.h | 1 + 12 files changed, 1461 insertions(+), 471 deletions(-) create mode 100644 src/modules/module-rtp/audio-codec.c create mode 100644 src/modules/module-rtp/audio-codec.h create mode 100644 src/modules/module-rtp/opus-codec.c create mode 100644 src/modules/module-rtp/opus-codec.h delete mode 100644 src/modules/module-rtp/opus.c diff --git a/src/modules/meson.build b/src/modules/meson.build index 2a2b52678..7e4f68b76 100644 --- a/src/modules/meson.build +++ b/src/modules/meson.build @@ -596,7 +596,9 @@ summary({'zeroconf-discover': build_module_zeroconf_discover}, bool_yn: true, se # that contains that common code. pipewire_module_rtp_common_lib = static_library('pipewire-module-rtp-common-lib', [ 'module-rtp/stream.c', - 'module-rtp/jitter-buffer.c' ], + 'module-rtp/jitter-buffer.c', + 'module-rtp/audio-codec.c', + 'module-rtp/opus-codec.c' ], include_directories : [configinc], install : false, dependencies : [mathlib, dl_lib, rt_lib, pipewire_dep, opus_dep], diff --git a/src/modules/module-rtp-session.c b/src/modules/module-rtp-session.c index 525ab75bc..ef7b3cf90 100644 --- a/src/modules/module-rtp-session.c +++ b/src/modules/module-rtp-session.c @@ -140,7 +140,8 @@ PW_LOG_TOPIC(mod_topic, "mod." NAME); "( sess.min-ptime= ) " \ "( sess.max-ptime= ) " \ "( sess.media= ) " \ - "( audio.format= ) " \ + "( audio.format= ) " \ "( audio.rate= ) " \ "( audio.channels= ) "\ "( audio.position= ) " \ @@ -1632,6 +1633,7 @@ int pipewire__module_init(struct pw_impl_module *module, const char *args) } else if (spa_streq(str, "opus")) { struct spa_dict_item items[] = { + { "audio.format", DEFAULT_OPUS_AUDIO_FORMAT }, { "audio.rate", SPA_STRINGIFY(DEFAULT_RATE) }, { "audio.channels", SPA_STRINGIFY(DEFAULT_CHANNELS) }, { "audio.position", DEFAULT_POSITION } }; diff --git a/src/modules/module-rtp-sink.c b/src/modules/module-rtp-sink.c index 5f8c3e3d7..8f045e71d 100644 --- a/src/modules/module-rtp-sink.c +++ b/src/modules/module-rtp-sink.c @@ -199,7 +199,8 @@ PW_LOG_TOPIC(mod_topic, "mod." NAME); "( sess.min-ptime= ) " \ "( sess.max-ptime= ) " \ "( sess.media= ) " \ - "( audio.format= ) " \ + "( audio.format= ) " \ "( audio.rate= ) " \ "( audio.channels= ) " \ "( audio.position= ) " \ diff --git a/src/modules/module-rtp-source.c b/src/modules/module-rtp-source.c index 651c7c1be..6cce0b1c7 100644 --- a/src/modules/module-rtp-source.c +++ b/src/modules/module-rtp-source.c @@ -174,7 +174,8 @@ PW_LOG_TOPIC(mod_topic, "mod." NAME); "( sess.latency.msec= ) "\ "( sess.ignore-ssrc= ) "\ "( sess.media= ) " \ - "( audio.format= ) " \ + "( audio.format= ) " \ "( audio.rate= ) " \ "( audio.channels= ) " \ "( audio.position= ) " \ @@ -316,7 +317,9 @@ on_rtp_io(void *data, int fd, uint32_t mask) /* Use this Linux specific feature to get the actual size of the * packet, even if it was truncated due to it being larger than * the buffer size. The code below uses this to detect packets - * that exceed the MTU size. */ + * that exceed the MTU size. Truncated packets are unusable. + * Especially if an audio codec is in use, the partial absence + * of data can lead to a corrupted state. */ MSG_TRUNC, #else 0, diff --git a/src/modules/module-rtp/audio-codec.c b/src/modules/module-rtp/audio-codec.c new file mode 100644 index 000000000..e32387bbb --- /dev/null +++ b/src/modules/module-rtp/audio-codec.c @@ -0,0 +1,14 @@ +/* PipeWire */ +/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */ +/* SPDX-License-Identifier: MIT */ + +#include "audio-codec.h" + +const char * rtp_audio_codec_type_name(enum rtp_audio_codec_type type) +{ + switch (type) { + case RTP_AUDIO_CODEC_TYPE_ENCODER: return "encoder"; + case RTP_AUDIO_CODEC_TYPE_DECODER: return "decoder"; + default: return ""; + } +} diff --git a/src/modules/module-rtp/audio-codec.h b/src/modules/module-rtp/audio-codec.h new file mode 100644 index 000000000..f608f11bc --- /dev/null +++ b/src/modules/module-rtp/audio-codec.h @@ -0,0 +1,209 @@ +/* PipeWire */ +/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */ +/* SPDX-License-Identifier: MIT */ + +#ifndef PIPEWIRE_RTP_AUDIO_CODEC_H +#define PIPEWIRE_RTP_AUDIO_CODEC_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include + +enum rtp_audio_codec_type { + RTP_AUDIO_CODEC_TYPE_ENCODER, + RTP_AUDIO_CODEC_TYPE_DECODER, +}; + +struct rtp_audio_codec_context { + void *handle; + struct spa_audio_info audio_info; + size_t samples_per_frame; + size_t max_encoded_frame_size; + size_t stride; + enum rtp_audio_codec_type type; + uint8_t *output_buffer; +}; + +struct rtp_audio_codec { + /* Initializes the audio codec. + * + * The codec initialization will be stored in the context. + * + * Initializing an already initialized codec leads to undefined behavior. + * + * context must point to an rtp_audio_codec_context struct instance. + * Said instance will be filled with states for the initialized codec. + * Any already present values will be overwritten. + * + * stream_info must point to an spa_audio_info that contains a raw audio + * format that the codec shall en/decode from/to. + * + * type specifies whether the codec shall be initialized as an + * en- or a decoder. + * + * samples_per_frame specifies how many samples a frame covers. + * "Samples" is meant in the RTP sense; that is, it specifies how + * many samples are there in one channel. This means that this + * value does not depend on the channel count in stream_info. + * + * max_encoded_data_size defines the maximum allowed size in bytes for + * encoded frames. The encode() function then is guaranteed to not + * produce a frame larger than this (it may produce a frame that is + * smaller than that though). + * + * stride specifies the number of bytes per one sample. This value + * _is_ depending on the channel count in stream_info. That is, + * stride = (number of channels x bytes per sample). + * + * codec_props contains extra, codec specific properties, like the + * encoding quality. These properties are optional, and this argument + * can be set to NULL. + * + * Returns 0 in case of success, and a negative errno in case of an error. + * An error will roll back any partial initialization. */ + int (*init)(struct rtp_audio_codec_context *context, struct spa_audio_info *stream_info, + enum rtp_audio_codec_type type, uint32_t samples_per_frame, + size_t max_encoded_data_size, size_t stride, struct pw_properties *codec_props); + + /* Shuts down a previously initialized codec. + * + * If the codec was not initialized, or was already shut down, + * this is a no-op. + * + * context must point to a valid rtp_audio_codec_context struct instance. */ + void (*shutdown)(struct rtp_audio_codec_context *context); + + /* Resets the codec state. + * + * Use this if for example a frame is corrupted and could not be decoded + * to reset to an initial state. This is recommended, since in such cases, + * the codec might not be able to cleanly decode data if previous states + * are retained. + * + * reason is an optional string for logging. It helps indicating in the + * logs the reason for the codec reset. If set to NULL, no reason is logged. + * + * context must point to a valid rtp_audio_codec_context struct instance + * that was initialized. */ + void (*reset)(struct rtp_audio_codec_context *context, const char *reason); + + /* Return the en- or decoder delay. + * + * The delay is given in samples. Whether this returns the en- or the + * decoder delay depends on type that was passed to init(). The delay + * is stored in the value pointed to by the delay pointer. + * + * IMPORTANT: This delay must not vary for the duration of the existence + * of the context. Once the context is created, the delay must be fixed. + * + * context must point to a valid rtp_audio_codec_context struct instance + * that was initialized. + * + * delay must be a valid pointer. + * + * Returns 0 in case of success, and a negative errno in case of an error. + * + * In case of an error, the value pointed to by the delay pointer is + * undefined. An error means that the codec cannot be used anymore and + * must be shut down. */ + int (*get_delay)(struct rtp_audio_codec_context *context, size_t *delay); + + /* Encodes a set of samples to a codec frame. + * + * This function only works if the type during initialization + * was RTP_AUDIO_CODEC_TYPE_ENCODER. If it isn't, this function's + * behavior is undefined. + * + * context must point to a valid rtp_audio_codec_context struct instance + * that was initialized. + * + * in_samples must point to a memory block containing at least the amount + * of samples that was specified when the codec was initialized. + * + * out_encoded_data must point to a pointer that shall be set to refer to + * an internal buffer that contains the encoded data. out_encoded_data_size + * must point to a size_t value that shall be set to the size of the encoded + * data in bytes. + * + * Returns 0 in case of success, and a negative errno in case of an error. + * + * In case of an error, the values out_encoded_data and out_encoded_data_size + * are set to are undefined. An error means that the codec cannot be used + * anymore and must be shut down. */ + int (*encode)(struct rtp_audio_codec_context *context, const uint8_t *in_samples, + uint8_t **out_encoded_data, size_t *out_encoded_data_size); + + /* Decodes a codec frame to a set of samples. + * + * Codec frames must be fed into the decoder in order. + * + * This function only works if the type during initialization + * was RTP_AUDIO_CODEC_TYPE_DECODER. If it isn't, this function's + * behavior is undefined. + * + * context must point to a valid rtp_audio_codec_context struct instance + * that was initialized. + * + * in_encoded_data must point to a memory block containing the codec frame. + * in_encoded_data_size must be set to the size of the codec frame in bytes. + * + * out_samples must point to a pointer that shall be set refer to an + * internal buffer that contains the decoded samples. out_num_samples + * must point to a size_t value that shall contain how many samples + * were decoded. That amount's maximum possible value is the number + * of samples per frame specified during initialization. The codec is + * allowed to produce less samples than that, but not more than that. + * + * Returns 0 in case of success, and a negative errno in case of an error. + * + * In case of an error, the values out_samples and out_num_samples + * are set to are undefined. An error means that the codec cannot be used + * anymore and must be shut down. */ + int (*decode)(struct rtp_audio_codec_context *context, const uint8_t *in_encoded_data, + size_t in_encoded_data_size, uint8_t **out_samples, size_t *out_num_samples); + + /* Applies PLC to cover a lost frame. + * + * This only works properly if non-missing frames that preceded the + * lost frame were decoded in order. + * + * context must point to a valid rtp_audio_codec_context struct instance + * that was initialized. + * + * out_samples must point to a pointer that shall be set refer to an + * internal buffer that contains the PLC samples. out_num_samples + * must point to a size_t value that shall contain how many PLC samples + * were generated. That amount's maximum possible value is the number + * of samples per frame specified during initialization. The codec is + * allowed to produce less samples than that, but not more than that. + * + * Returns 0 in case of success, and a negative errno in case of an error. + * + * In case of an error, the values out_samples and out_num_samples + * are set to are undefined. An error means that the codec cannot be used + * anymore and must be shut down. */ + int (*apply_plc)(struct rtp_audio_codec_context *context, + uint8_t **out_samples, size_t *out_num_samples); + + /* Gets a human-readable name of this codec. + * + * This is meant for logging purposes. */ + const char * (*get_name)(void); +}; + +/* Returns a human-readable string for the given audio codec type. + * + * This is meant for logging purposes. */ +const char * rtp_audio_codec_type_name(enum rtp_audio_codec_type type); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* PIPEWIRE_RTP_AUDIO_CODEC_H */ diff --git a/src/modules/module-rtp/audio.c b/src/modules/module-rtp/audio.c index 20f58fde6..a42eddaf7 100644 --- a/src/modules/module-rtp/audio.c +++ b/src/modules/module-rtp/audio.c @@ -119,12 +119,13 @@ static void rtp_audio_process_playback(void *data) uint32_t clock_rate = impl->io_position->clock.rate.denom; /* Translate the clock position to an RTP timestamp and - * shift it to compensate for device delay and ASRC delay. - * The device delay is scaled along with the clock position, - * since both are expressed in clock sample units, while - * pwt.buffered is expressed in stream time. */ + * shift it to compensate for device delay, decoder delay, + * and ASRC delay. The device delay is scaled along with + * the clock position, since both are expressed in clock + * sample units, while pwt.buffered is expressed in + * stream time. */ timestamp = scale_u64(impl->io_position->clock.position + device_delay, - impl->rate, clock_rate) + pwt.buffered; + impl->rate, clock_rate) + pwt.buffered + impl->codec_delay; spa_ringbuffer_read_update(&impl->ring, timestamp); avail = spa_ringbuffer_get_read_index(&impl->ring, &read_index); } else { @@ -224,15 +225,27 @@ static void rtp_audio_process_playback(void *data) device_delay = scale_u64(device_delay, impl->rate, clock_rate); } - /* Reduce target buffer by the delay amount to start playback sooner. - * This compensates for the delay to the device. */ - if (SPA_UNLIKELY(impl->target_buffer < device_delay)) { - pw_log_error("Delay to device (%" PRIu32 ") is higher than " - "the target buffer size (%" PRIu32 ")", device_delay, - impl->target_buffer); + /* Reduce target buffer by the decoder and device delay amount to + * start playback sooner. This compensates for the delay to the + * device and for the decoder delay. */ + + if (SPA_UNLIKELY(impl->target_buffer < (impl->codec_delay + device_delay))) { + if (impl->target_buffer < device_delay) { + pw_log_error("Delay to device (%" PRIu32 ") is higher than " + "the target buffer size (%" PRIu32 ")", device_delay, + impl->target_buffer); + } else if (impl->target_buffer < impl->codec_delay) { + pw_log_error("Decoder delay (%zu) is higher than the " + "target buffer size (%" PRIu32 ")", impl->codec_delay, + impl->target_buffer); + } else { + pw_log_error("The combined decoder delay (%zu) and device delay " + "(%" PRIu32 " are higher than the target buffer size (%" PRIu32 ")", + impl->codec_delay, device_delay, impl->target_buffer); + } target_buffer = 0; } else { - target_buffer = impl->target_buffer - device_delay; + target_buffer = impl->target_buffer - impl->codec_delay - device_delay; } if (avail < (int32_t)wanted) { @@ -330,50 +343,325 @@ static void rtp_audio_process_playback(void *data) pw_stream_queue_buffer(impl->stream, buf); } +static int process_received_samples(struct impl *impl, uint8_t *samples, uint32_t num_samples, + uint16_t seqnum, uint32_t timestamp, uint32_t ts_offset); + +static int on_jitter_buffer_output_rtp_packet(void *context, const uint8_t *packet_data, size_t packet_size, + size_t header_size, uint32_t timestamp, uint16_t seqnum) +{ + int ret; + size_t payload_size; + uint8_t *decoded_samples; + size_t num_decoded_samples; + struct impl *impl = context; + + payload_size = packet_size - header_size; + + ret = impl->audio_codec->decode(&(impl->audio_codec_context), packet_data + header_size, + payload_size, &decoded_samples, &num_decoded_samples); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("could not decode audio from packet with seqnum %" PRIu16 ": %s", + seqnum, spa_strerror(ret)); + return ret; + } + + pw_log_trace("got packet with seqnum %" PRIu16 " from jitter buffer; decoding " + "yielded %zu samples", seqnum, num_decoded_samples); + + return process_received_samples(impl, decoded_samples, num_decoded_samples, seqnum, + timestamp, impl->target_buffer); +} + +static void reset_rtp_audio_codec(struct impl *impl, const char *reason); + +static int on_jitter_buffer_signal_lost_packets(void *context, uint16_t seqnum_of_first_lost_packet, + size_t num_lost_packets, bool open_ended) +{ + int ret; + uint8_t *plc_samples; + size_t num_plc_samples; + uint32_t timestamp; + struct impl *impl = context; + size_t i; + + /* Don't apply PLC if sync is not established. Playback is in sync when + * there is a steady supply of data going on and the timestamps have been + * okay thus far. When sync is lost, there is an audible discontinuity + * that requires a reset of the ringbuffer contents along with the stats + * that keep playback in sync, so applying PLC then makes no sense. */ + if (!impl->have_sync) + return 0; + + // TODO apply fadeout if open_ended == true + + pw_log_info("Jitter buffer signals lost packet(s); packet loss sequence starts at " + "seqnum %" PRIu16 ", sequence length %zd, %sopen ended", seqnum_of_first_lost_packet, + num_lost_packets, open_ended ? "" : "not "); + + for (i = 0; i < num_lost_packets; ++i) { + uint16_t seqnum = (seqnum_of_first_lost_packet + i); + + ret = impl->audio_codec->apply_plc(&(impl->audio_codec_context), &plc_samples, &num_plc_samples); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("could not apply PLC: %s", spa_strerror(ret)); + return ret; + } + + spa_ringbuffer_get_write_index(&impl->ring, ×tamp); + + /* PLC generates sample data to cover the lost packet, but it does + * not generate timestamps. Since there is no other information available + * at this stage for reconstructing timestamps, just use the current + * ringbuffer write index. That write index is actually the ideal current + * timestamp in direct timestamp mode - that is, if sync is perfect, then + * that ideal timestamp perfectly matches the RTP timestamps (when they + * are shited by target_buffer). See process_received_samples() for + * how these are used and compared to calculate a skew. */ + + pw_log_trace("jitter buffer signaled loss of packet with seqnum %" PRIu16 "; PLC " + "yielded %zu samples; using RTP timestamp %" PRIu32, seqnum, num_plc_samples, + timestamp); + + /* The write index has the target_buffer factored in. (See for example the + * !have_sync cases in process_received_samples().) Therefore, it is + * important to ensure that the timestamp we got from the ringbuffer write + * index is _not_ shifted by target_buffer again. For this reason, pass + * 0 as the ts_offset. */ + ret = process_received_samples(impl, plc_samples, num_plc_samples, seqnum, timestamp, 0); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("could not process PLC samples: %s", spa_strerror(ret)); + return ret; + } + } + + if (open_ended) + reset_rtp_audio_codec(impl, "open ended packet loss"); + + return 0; +} + +static int setup_rtp_audio_codec(struct impl *impl, const struct rtp_audio_codec *audio_codec, + struct pw_properties *props) +{ + int ret; + enum rtp_audio_codec_type audio_codec_type; + + /* No audio codec -> PCM is to be transmitted directly. + * Nothing to set up then. */ + if (audio_codec == NULL) + return 0; + + spa_assert(impl->psamples > 0); + + /* A jitter buffer is only needed in the output direction, + * since that is the direction source nodes (= receivers) use. */ + if (impl->direction == PW_DIRECTION_OUTPUT) { + struct rtp_jitter_buffer_params params; + uint32_t jitter_buffer_length_perc; + size_t max_num_packets_in_ringbuffer; + + if (props != NULL) { + jitter_buffer_length_perc = pw_properties_get_uint32(props, + "jitter.buffer.length.perc", 50); + } else { + jitter_buffer_length_perc = 50; + } + + /* Round up the number of packets, since even a packet with just 1 sample + * actually inside is considered a full packet in the ringbuffer. */ + max_num_packets_in_ringbuffer = (impl->target_buffer + (impl->psamples - 1)) / impl->psamples; + /* Get the number of slots out of the number of packets that fit in the + * jitter buffer, using the jitter_buffer_length_perc percentage. At least + * 1 slot is necessary though, so limit the output to a minimum of 1. */ + params.num_slots = SPA_MAX(max_num_packets_in_ringbuffer * jitter_buffer_length_perc / 100, 1u); + pw_log_debug("ringbuffer can hold up to %zu packets worth of data; " + "jitter buffer length is set to cover %" PRIu32 "%% of " + "the ring buffer -> num slots: %zu", max_num_packets_in_ringbuffer, + jitter_buffer_length_perc, params.num_slots); + + params.max_packet_size = impl->mtu; + params.packet_duration = scale_u64(impl->psamples, SPA_NSEC_PER_SEC, impl->rate); + params.loop = impl->data_loop; + params.context = impl; + params.output_rtp_packet = on_jitter_buffer_output_rtp_packet; + params.signal_lost_packets = on_jitter_buffer_signal_lost_packets; + + ret = rtp_jitter_buffer_init(&(impl->jitter_buffer), ¶ms); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("could not setup jitter buffer: %s", spa_strerror(ret)); + return ret; + } + + audio_codec_type = RTP_AUDIO_CODEC_TYPE_DECODER; + } else { + audio_codec_type = RTP_AUDIO_CODEC_TYPE_ENCODER; + /* This is a buffer used when data that is to be encoded wraps + * around the ring buffer. The encoder needs the data in + * contiguous form, so the wrapped data must be copied into + * this buffer first in such cases. */ + impl->audio_encoder_staging_buffer = malloc(impl->psamples * impl->stride); + } + + impl->audio_codec = audio_codec; + + ret = impl->audio_codec->init(&(impl->audio_codec_context), &impl->stream_info, + audio_codec_type, impl->psamples, impl->payload_size, impl->stride, props); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("could not setup audio codec: %s", spa_strerror(ret)); + return ret; + } + + /* Since a stream always either only receives or only sends, one single + * quantity is used for both directions. This means that when this stream + * sends, the audio_codec is configured to encode, and thus, codec_delay + * then is the encoder delay. Consequently, if the stream receives, and + * the audio_codec decodes, codec_delay is the decoder delay. */ + ret = impl->audio_codec->get_delay(&(impl->audio_codec_context), &(impl->codec_delay)); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("could not get audio codec delay: %s", spa_strerror(ret)); + return ret; + } + + /* In the output direction (which is what RTP source nodes use), the + * codec_delay is the decoder delay. That delay is applied by either + * shifting timestamps forward (in the direct timestamp mode) or by + * subtracting from target_buffer (in the constant latency mode). + * That is, the decoder delay is treated as part of the specified + * session latency. This ensures that RTP source nodes are in sync + * even in the (unlikely) case that they use different decoders with + * different decoder latencies. But, it is then important to check + * that the decoder delay is smaller than the buffer size. If this + * is not the case, then the constant latency mode would use negative + * buffer sizes, and the direct timestamp mode would always shift + * timestamps past the buffer size. */ + if ((impl->direction == PW_DIRECTION_OUTPUT) && + SPA_UNLIKELY(impl->codec_delay > impl->target_buffer)) { + pw_log_error("decoder delay (%zu samples) is larger than buffer size " + "(%" PRIu32 " samples)", impl->codec_delay, impl->target_buffer); + return -EINVAL; + } + + pw_log_info("initialized %s %s with %" PRIu32 " samples per packet; " + "codec delay: %zu samples", impl->audio_codec->get_name(), + rtp_audio_codec_type_name(audio_codec_type), impl->psamples, + impl->codec_delay); + + return 0; +} + +static void teardown_rtp_audio_codec(struct impl *impl) +{ + if ((impl->audio_codec != NULL) && (impl->audio_codec_context.handle != NULL)) { + rtp_jitter_buffer_shutdown(&(impl->jitter_buffer)); + + impl->audio_codec->shutdown(&(impl->audio_codec_context)); + impl->audio_codec_context.handle = NULL; + } + + free(impl->audio_encoder_staging_buffer); + impl->audio_encoder_staging_buffer = NULL; +} + +static void reset_rtp_audio_codec(struct impl *impl, const char *reason) +{ + if (impl->audio_codec != NULL) + impl->audio_codec->reset(&(impl->audio_codec_context), reason); +} + static int rtp_audio_receive(struct impl *impl, uint8_t *buffer, ssize_t len, ssize_t hlen, uint64_t current_time) { + int ret; struct rtp_header *hdr; - ssize_t plen; - uint16_t seq; - uint32_t timestamp, samples, write, expected_write; - uint32_t stride = impl->stride; - int32_t filled; + uint16_t seqnum; + uint32_t timestamp; hdr = (struct rtp_header*)buffer; + seqnum = ntohs(hdr->sequence_number); + timestamp = ntohl(hdr->timestamp) - impl->ts_offset; - seq = ntohs(hdr->sequence_number); - if (impl->have_seq && impl->seq != seq) { - pw_log_info("unexpected seq (%d != %d) SSRC:%u", - seq, impl->seq, impl->ssrc); - /* No need to resynchronize here. If packets arrive out of - * order, then they are still written in order into the ring - * buffer, since they are written according to where the - * RTP timestamp points to. */ + if (impl->have_seq && (impl->next_expected_incoming_seq >= 0) && + (impl->next_expected_incoming_seq != seqnum)) { + pw_log_info("packet arrived out of order: expected/actual packet seq: %" + PRIu16"/%" PRIu16 " SSRC: %" PRIu32 " timestamp: %" PRIu32, + impl->next_expected_incoming_seq, seqnum, impl->ssrc, + timestamp); } + + /* Note that setting the last_recv_timestamp might not be correct + * in cases where the jitter buffer switches to hold-back mode. That's + * because in such cases, the jitter buffer will output packets in + * bursts, and it might do so at a time that is significantly ahead + * of the current_time. However - the jitter buffer switches to that + * mode when it detects packet losses. last_recv_timestamp is used + * for in-flight data calculations to smoothen out DLL adjustments + * when the RTP source is running in constant delay mode. These + * calculations assume steady packet transmission and a reliable + * network - and in such a network, neither packet reordering nor + * packet losses occur. Thus, it is still okay to set this timestamp + * here instead of in process_received_samples(), because when packets + * are lost / reordered, those calculations fall apart anyway. */ + impl->last_recv_timestamp = current_time; + + impl->next_expected_incoming_seq = (seqnum + 1) & 65535; + + /* If execution reaches this point, then the packet might be out of order, + * but still arrived in time. If the jitter buffer is initialized, it + * will take care of reordering packets. If it is not initialized, then + * there is no need to reorder the packets and depayload the data. The + * packet contents can be directly written into the ring buffer + * according to where their RTP timestamps point to, so even if they + * come in out of order, they ultimately end up in the ring buffer in + * the right locations. This can be done when raw PCM data is + * transmitted, but not when the data is encoded - with encoded data, + * the jitter buffer is necessary, since audio codecs typically require + * encoded frames to arrive in order. */ + + if (rtp_jitter_buffer_is_initialized(&(impl->jitter_buffer))) { + ret = rtp_jitter_buffer_insert_packet(&(impl->jitter_buffer), buffer, len, + hlen, timestamp, seqnum); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("could not insert packet into jitter buffer: %s", spa_strerror(ret)); + return ret; + } + } else { + ssize_t plen = len - hlen; + ret = process_received_samples(impl, &buffer[hlen], plen / impl->stride, + seqnum, timestamp, impl->target_buffer); + if (SPA_UNLIKELY(ret < 0)) { + pw_log_error("could not process received samples: %s", spa_strerror(ret)); + return ret; + } + } + + return 0; +} + +static int process_received_samples(struct impl *impl, uint8_t *samples, + uint32_t num_samples, uint16_t seq, uint32_t timestamp, uint32_t ts_offset) +{ + uint32_t write, expected_write; + int32_t filled; + uint32_t stride = impl->stride; + impl->seq = seq + 1; impl->have_seq = true; - timestamp = ntohl(hdr->timestamp) - impl->ts_offset; - impl->receiving = true; - impl->last_recv_timestamp = current_time; - - plen = len - hlen; - samples = plen / stride; filled = spa_ringbuffer_get_write_index(&impl->ring, &expected_write); - /* we always write to timestamp + delay */ - write = timestamp + impl->target_buffer; + write = timestamp + ts_offset; if (!impl->have_sync) { pw_log_info("sync to timestamp:%u seq:%u ts_offset:%u SSRC:%u target:%u direct:%u", timestamp, seq, impl->ts_offset, impl->ssrc, impl->target_buffer, impl->direct_timestamp); - /* we read from timestamp, keeping target_buffer of data - * in the ringbuffer. */ + /* Synchronize by setting the read and write indices such that + * the read index is ahead of the write index by exactly the + * ringbuffer length, and the write index equals the timestamp + * of the current RTP packet. */ impl->ring.readindex = timestamp; impl->ring.writeindex = write; filled = impl->target_buffer; @@ -381,26 +669,34 @@ static int rtp_audio_receive(struct impl *impl, uint8_t *buffer, ssize_t len, spa_dll_init(&impl->dll); spa_dll_set_bw(&impl->dll, SPA_DLL_BW_MIN, 128, impl->rate); memset(impl->buffer, 0, impl->buffer_size); + + reset_rtp_audio_codec(impl, "(re)resynchronization"); + impl->have_sync = true; } else if (expected_write != write) { pw_log_debug("unexpected write (%u != %u)", write, expected_write); } - /* Write overrun only makes sense in constant delay mode. See the - * RTP source module documentation and the rtp_audio_process_playback() - * code for an explanation why. */ - if (!impl->direct_timestamp && (filled + samples > impl->buffer_size / stride)) { - pw_log_debug("receiver write overrun %u + %u > %u", filled, samples, + if (!impl->direct_timestamp && (filled + num_samples > impl->buffer_size / stride)) { + /* In constant delay mode, the goal is to keep the buffer fill + * level at a fixed level. If it goes above or below that, rate + * matching is used in rtp_audio_process_playback() to drive + * the fill level to that target value. If however the write side + * (that is, this function here) reaches a write overrun, it cannot + * insert any more samples and rely on that rate matching to + * compensate (there's no more room in the ringbuffer). A hard + * resync is needed in such a case. */ + pw_log_debug("receiver write overrun %u + %u > %u", filled, num_samples, impl->buffer_size / stride); impl->have_sync = false; } else { - pw_log_trace("got samples:%u", samples); + pw_log_trace("got %" PRIu32 " samples", num_samples); spa_ringbuffer_write_data(&impl->ring, impl->buffer, impl->actual_max_buffer_size, ((uint64_t)write * stride) % impl->actual_max_buffer_size, - &buffer[hlen], (samples * stride)); + samples, (num_samples * stride)); /* Only update the write index if data was actually _appended_. * If packets arrived out of order, then it may be that parts @@ -433,7 +729,7 @@ static int rtp_audio_receive(struct impl *impl, uint8_t *buffer, ssize_t len, * In unsigned arithmetic, if write + samples exceeds UINT32_MAX, * it wraps around to a smaller value. We detect this by checking * if new_write < write (which can only happen on overflow). */ - const uint32_t new_write = write + samples; + const uint32_t new_write = write + num_samples; const bool wrapped_around = new_write < write; /* Determine if new_write is ahead of expected_write. @@ -476,6 +772,8 @@ static void rtp_audio_send_packets(struct impl *impl, uint32_t timestamp, uint32 struct iovec iov[3]; struct rtp_header header; uint32_t stride; + bool do_send = true; + int ret; stride = impl->stride; @@ -505,12 +803,84 @@ static void rtp_audio_send_packets(struct impl *impl, uint32_t timestamp, uint32 ((uint64_t)timestamp * stride) % impl->actual_max_buffer_size, &iov[1], tosend * stride); - pw_log_trace_fp("sending %d packet:%d ts_offset:%d timestamp:%u (%f s)", - tosend, num, impl->ts_offset, timestamp, + pw_log_trace_fp("sending %d packet:%d seq: %" PRIu16 " ts_offset:%d timestamp:%u (%f s)", + tosend, num, seq, impl->ts_offset, timestamp, (double)timestamp * impl->io_position->clock.rate.num / impl->io_position->clock.rate.denom); - rtp_stream_call_send_packet(impl, iov, 3); + if (impl->audio_codec != NULL) { + /* In here, the ring buffer that iov points to is redirected + * to be encoded, and the encoder's output is then set as the + * new iov content. */ + + uint8_t *samples_to_encode = NULL; + uint8_t *encoded_data = NULL; + size_t encoded_data_size = 0; + + /* The audio codec expects one contiguous data block with impl->psamples + * samples. In case of a ring buffer wrap around, copy the audio data + * into the staging buffer, since the encoder cannot handle the wrap + * around on its own. And if there are fewer than psamples samples, + * copy what's available into the staging buffer and zero-stuff the rest + * of its space to be able to give the encoder the required amount of + * data to encode. */ + spa_assert((iov[1].iov_len + iov[2].iov_len) == (tosend * stride)); + if (iov[2].iov_len != 0) { + /* Copy the audio data, taking wrap around into account. */ + memcpy(impl->audio_encoder_staging_buffer, iov[1].iov_base, iov[1].iov_len); + memcpy(impl->audio_encoder_staging_buffer + iov[1].iov_len, iov[2].iov_base, iov[2].iov_len); + /* Zero-pad the unused trailing portions of the buffer. */ + if ((uint32_t)tosend < impl->psamples) { + memset(impl->audio_encoder_staging_buffer + iov[1].iov_len + iov[2].iov_len, + 0, (impl->psamples - tosend) * stride); + } + + samples_to_encode = impl->audio_encoder_staging_buffer; + } else if ((uint32_t)tosend < impl->psamples) { + /* Copy the audio data. */ + memcpy(impl->audio_encoder_staging_buffer, iov[1].iov_base, iov[1].iov_len); + /* Zero-pad the unused trailing portions of the buffer. */ + memset(impl->audio_encoder_staging_buffer + iov[1].iov_len, + 0, (impl->psamples - tosend) * stride); + + samples_to_encode = impl->audio_encoder_staging_buffer; + } else { + /* No wrap around happening, and no zero padding necessary. + * The audio data can be directly fed into the encoder. */ + samples_to_encode = iov[1].iov_base; + } + + ret = impl->audio_codec->encode(&(impl->audio_codec_context), + samples_to_encode, &encoded_data, &encoded_data_size); + + if (SPA_LIKELY(ret == 0)) { + /* Tweak iov to get the actual RTP payload from that single staging + * buffer instead of from the ring buffer directly. (The iov_base + * value of iov[2] is still set to encoded_data, even though its + * iov_len is 0, since it is not sure if setting its iov_base + * pointer to NULL is valid. iov_len 0 _is_ valid, and causes + * POSIX calls to ignore that iovec item. */ + iov[1].iov_base = encoded_data; + iov[1].iov_len = encoded_data_size; + iov[2].iov_base = encoded_data; + iov[2].iov_len = 0; + do_send = true; + } else { + /* Normally, an encode() error would require shutting down the + * encoder. However, this is not feasible here, since this code + * runs in the data loop thread. Furthermore, if the errors keep + * occurring, this would lead to a constantly reinitializing + * encoder. There is no discernible way to communicate the error + * to the application either. Thus, in case of an error, skip + * the send, and log it. */ + pw_log_error("could not encode audio for packet with seqnum %" PRIu16 ": %s", + seq, spa_strerror(ret)); + do_send = false; + } + } + + if (SPA_LIKELY(do_send)) + rtp_stream_call_send_packet(impl, iov, 3); seq++; first = false; diff --git a/src/modules/module-rtp/opus-codec.c b/src/modules/module-rtp/opus-codec.c new file mode 100644 index 000000000..f4a5bcccf --- /dev/null +++ b/src/modules/module-rtp/opus-codec.c @@ -0,0 +1,625 @@ +/* PipeWire */ +/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */ +/* SPDX-License-Identifier: MIT */ + +#include "config.h" + +#ifdef HAVE_OPUS + +#include +#include + +#include "opus-codec.h" + +#include +#include + +#include + +#include +#include + +PW_LOG_TOPIC_EXTERN(mod_topic); +#define PW_LOG_TOPIC_DEFAULT mod_topic + +static int opus_error_to_neg_errno(int opus_error) +{ + switch (opus_error) { + case OPUS_OK: return 0; + case OPUS_BAD_ARG: return -EINVAL; + case OPUS_BUFFER_TOO_SMALL: return -ENOBUFS; + case OPUS_INTERNAL_ERROR: return -EIO; + case OPUS_INVALID_PACKET: return -EBADMSG; + case OPUS_UNIMPLEMENTED: return -ENOTSUP; + case OPUS_INVALID_STATE: return -EBADF; + case OPUS_ALLOC_FAIL: return -ENOMEM; + default: return -EIO; + } +} + +static void rtp_opus_codec_shutdown(struct rtp_audio_codec_context *context); + +static int rtp_opus_codec_init(struct rtp_audio_codec_context *context, + struct spa_audio_info *stream_info, + enum rtp_audio_codec_type type, uint32_t samples_per_frame, + size_t max_encoded_frame_size, size_t stride, + struct pw_properties *codec_props) +{ + int ret = 0; + int opus_ret = OPUS_OK; + uint32_t i; + unsigned char mapping[255]; + uint64_t us_per_frame; + const char *codec_type_name = ""; + size_t output_buffer_size = 0; + + spa_assert(stream_info != NULL); + spa_assert(context != NULL); + spa_assert(samples_per_frame > 0); + spa_assert(max_encoded_frame_size > 0); + spa_assert(stride > 0); + + spa_memzero(context, sizeof(struct rtp_audio_codec_context)); + + /* Opus supports: + * + * - 1 to 255 channels + * - 8, 12, 16, 24, 48 kHz sample rates + * - 16-bit signed integer and 32-bit float point as sample format + * (with native endianness) + * - 2.5, 5, 10, 20, 40, 60ms frame sizes + * + * Check that the parameters satisfy these constraints. */ + + if ((stream_info->info.raw.channels == 0) || (stream_info->info.raw.channels > 255)) { + pw_log_error("Opus cannot handle %" PRIu32 " channel(s); valid channel count range: 1-255", + stream_info->info.raw.channels); + ret = -EINVAL; + goto error; + } + + switch (stream_info->info.raw.rate) { + case 8000: + case 12000: + case 16000: + case 24000: + case 48000: + break; + default: + pw_log_error("unsupported sample rate of %" PRIu32 " Hz; supported sample rates: " + "8000, 12000, 16000, 24000, 48000", stream_info->info.raw.rate); + ret = -EINVAL; + goto error; + } + + switch (stream_info->info.raw.format) { + case SPA_AUDIO_FORMAT_S16: + case SPA_AUDIO_FORMAT_F32: + break; + default: + pw_log_error("unsupported sample format %s; Opus requires 16-bit signed integer " + "or 32-bit floating point samples", + spa_type_audio_format_to_short_name(stream_info->info.raw.format)); + ret = -EINVAL; + goto error; + } + + us_per_frame = (uint64_t)samples_per_frame * 1000000 / stream_info->info.raw.rate; + + switch (us_per_frame) { + case 2500: + case 5000: + case 10000: + case 20000: + case 40000: + case 60000: + break; + default: + pw_log_error("unsupported frame length: %" PRIu32 " samples (%.1f ms)", + samples_per_frame, (double)us_per_frame / 1000.0); + ret = -EINVAL; + goto error; + } + + /* Setup the context. */ + + context->audio_info = *stream_info; + context->samples_per_frame = samples_per_frame; + context->max_encoded_frame_size = max_encoded_frame_size; + context->stride = stride; + context->type = type; + + /* TODO: Currently, we use a simple 1:1 channel mapping. Also, coupled + * streams are not used at the moment. This is a limitation of the current + * Opus integration, in part because it is unclear how to communicate + * the channel mapping and coupled streams from the en- to the decoder. + * One possibility would be to enforce a (de-facto) standard channel + * mapping, like the Vorbis channel mapping. */ + + for (i = 0; i < stream_info->info.raw.channels; i++) + mapping[i] = i; + + switch (type) { + case RTP_AUDIO_CODEC_TYPE_ENCODER: { + static const opus_int32 DEFAULT_COMPLEXITY = 10; + + bool use_computed_max_bitrate = true; + opus_int32 props_bitrate = -1; + opus_int32 computed_max_bitrate; + opus_int32 complexity = DEFAULT_COMPLEXITY; + bool restricted_lowdelay = false; + bool in_band_fec = false; + int packet_loss_percentage = 0; + opus_int32 signal_type = OPUS_AUTO; + const char *signal_type_str = "auto"; + + if (codec_props != NULL) { + const char *prop_str; + + complexity = pw_properties_get_int32(codec_props, "opus.encoder.complexity", complexity); + props_bitrate = pw_properties_get_int32(codec_props, "opus.encoder.bitrate", props_bitrate); + restricted_lowdelay = pw_properties_get_bool(codec_props, "opus.encoder.restricted-lowdelay", + restricted_lowdelay); + in_band_fec = pw_properties_get_bool(codec_props, "opus.encoder.inband-fec", in_band_fec); + packet_loss_percentage = pw_properties_get_int32(codec_props, "opus.encoder.packet-loss-percentage", + packet_loss_percentage); + + if ((packet_loss_percentage < 0) || (packet_loss_percentage > 100)) { + pw_log_error("invalid packet loss percentage %d (valid range: 0-100)", packet_loss_percentage); + ret = -EINVAL; + goto error; + } + + prop_str = pw_properties_get(codec_props, "opus.encoder.signal-type"); + if (prop_str != NULL) { + if (spa_streq(signal_type_str, "auto")) + signal_type = OPUS_AUTO; + else if (spa_streq(signal_type_str, "voice")) + signal_type = OPUS_SIGNAL_VOICE; + else if (spa_streq(signal_type_str, "music")) + signal_type = OPUS_SIGNAL_MUSIC; + else { + pw_log_error("unsupported Opus encoder signal type \"%s\"", signal_type_str); + ret = -EINVAL; + goto error; + } + } + } + + context->handle = opus_multistream_encoder_create( + stream_info->info.raw.rate, + stream_info->info.raw.channels, + stream_info->info.raw.channels, + 0, + mapping, + restricted_lowdelay ? OPUS_APPLICATION_RESTRICTED_LOWDELAY : OPUS_APPLICATION_AUDIO, + &opus_ret); + + if (opus_ret != OPUS_OK) + goto error_while_creating; + + /* Some CTL may actually not be implemented. It is safe to ignore these. */ + #define SET_OPUS_ENCODER_CTL(CTL) \ + do { \ + opus_ret = opus_multistream_encoder_ctl(context->handle, CTL); \ + switch (opus_ret) { \ + case OPUS_OK: \ + break; \ + case OPUS_UNIMPLEMENTED: \ + pw_log_debug("could not set encoder CTL %s since it is " \ + "not implemented; ignoring", #CTL); \ + break; \ + default: \ + pw_log_error("error while setting encoder CTL %s: %s", \ + #CTL, opus_strerror(opus_ret)); \ + ret = opus_error_to_neg_errno(opus_ret); \ + goto error; \ + } \ + } while (0) + + /* Use the maximum encoded frame size as the size + * for the output buffer, since that one will be + * used as the destination for the encoder. */ + output_buffer_size = max_encoded_frame_size; + + /* Hardcode encoder CTLs to ensure consistent and deterministic + * encoding behavior, even if the defaults change across + * libopus versions. */ + + /* Counterintuitively, CBR is not actually useful for RTP. RFC 7587 + * section 3.1.2 documents this. This applies to the generic audio + * use case here as well. The opus_multistream_encode() function + * (and its float variant) accept an argument that hard-limits the + * maximum encoded output size, so there is no possibility of Opus + * frames exceeding the maximum RTP payload size. Do constrain VBR + * though to not experience excessive variability. */ + SET_OPUS_ENCODER_CTL(OPUS_SET_VBR(1)); + SET_OPUS_ENCODER_CTL(OPUS_SET_VBR_CONSTRAINT(1)); + /* This is a niche feature that is useful for avoiding audio configuration + * switching in software stacks. Signals are forcibly converted to mono or + * stereo, depending on the parameter. Not needed here: channel configuration + * is determined by the PipeWire stream format, not the codec. */ + SET_OPUS_ENCODER_CTL(OPUS_SET_FORCE_CHANNELS(OPUS_AUTO)); + /* Allow the encoder to automatically select the appropriate bandwidth, and + * set the upper bound to the maximum to enable unconstrained selection. */ + SET_OPUS_ENCODER_CTL(OPUS_SET_MAX_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND)); + SET_OPUS_ENCODER_CTL(OPUS_SET_BANDWIDTH(OPUS_AUTO)); + /* DTX is a SILK-layer feature, and not useful for generic audio. It + * is there for detecting silence / low-energy periods during speech. */ + SET_OPUS_ENCODER_CTL(OPUS_SET_DTX(0)); + /* This is a hint for the encoder about the actual depth of the source + * signal. For example, if 32-bit floating point audio is the input, but + * the actual audio signal has a noise floor that resembles that of + * 16-bit audio, then setting this to 16 is appropriate. But such + * signal details are not known here, so set this to the maximum. + * + * Also, from https://www.opus-codec.org/docs/opus_api-1.6/group__opus__encoderctls.html#gaa23940eb477ff617edc14b8d66e104c0 : + * > When using opus_encode() instead of opus_encode_float(), or when libopus is compiled + * > for fixed-point, the encoder uses the minimum of the value set here and the value 16. */ + SET_OPUS_ENCODER_CTL(OPUS_SET_LSB_DEPTH(24)); + /* Disabling prediction hurts encoder efficiency substantially, and + * is only really useful for debugging and testing purposes. Keep + * prediction on. */ + SET_OPUS_ENCODER_CTL(OPUS_SET_PREDICTION_DISABLED(0)); +#ifdef OPUS_SET_DRED_DURATION_REQUEST + /* DRED is a new deep-learning-based redundancy mechanism that embeds up + * to one second of recovery data. It is not used here, since, at this point, + * it only works with SILK, though maybe one day it will work with the CELT + * layer as well. */ + SET_OPUS_ENCODER_CTL(OPUS_SET_DRED_DURATION(0)); +#endif +#ifdef OPUS_SET_QEXT_REQUEST + /* Quality extensions are a new feature that requires very high bitrates. + * See: https://datatracker.ietf.org/doc/draft-ietf-mlcodec-opus-scalable-quality-extension/ + * Disabled due to its experimental nature and need for very high bitrates. */ + SET_OPUS_ENCODER_CTL(OPUS_SET_QEXT(0)); +#endif + + /* Set the bitrate. */ + + /* Estimate the "max computed bitrate" out of the max_encoded_frame_size + * that make the most use of the available space. The valid bitrate range + is 500 to 512000 bits/s, so clamp the estimation to that range. */ + computed_max_bitrate = ((uint64_t)max_encoded_frame_size) * 8 * SPA_USEC_PER_SEC / us_per_frame; + computed_max_bitrate = SPA_CLAMP(computed_max_bitrate, 500, 512000); + + pw_log_debug("computed a bitrate of %" PRId32 " bits/s for Opus encoding based " + "on the max allowed encoded frame size of %zu bytes", + (int32_t)computed_max_bitrate, max_encoded_frame_size); + + if (props_bitrate >= 0) { + pw_log_info("got bitrate %" PRId32 " from properties", (int32_t)props_bitrate); + + if (props_bitrate < 500) { + pw_log_warn("bitrate %" PRId32 " from properties is invalid (must " + "be at least 500 bits/s); setting computed bitrate instead", + (int32_t)props_bitrate); + } else if (props_bitrate > computed_max_bitrate) { + pw_log_warn("bitrate %" PRId32 " from properties exceeds computed " + "max bitrate; setting computed max bitrate instead", + (int32_t)props_bitrate); + } else { + use_computed_max_bitrate = false; + } + } + + if (use_computed_max_bitrate) { + pw_log_info("setting computed max bitrate of %" PRId32 " bits/s", + (int32_t)computed_max_bitrate); + SET_OPUS_ENCODER_CTL(OPUS_SET_BITRATE(computed_max_bitrate)); + } else { + pw_log_info("setting bitrate of %" PRId32 " bits/s from opus.encoder.bitrate property", + (int32_t)props_bitrate); + SET_OPUS_ENCODER_CTL(OPUS_SET_BITRATE(props_bitrate)); + } + + /* Set the encoding complexity. */ + + if ((complexity < 0) || (complexity > 10)) { + pw_log_warn("complexity %" PRId32 " is invalid; choosing %" PRId32 " as default", + (int32_t)complexity, (int32_t)DEFAULT_COMPLEXITY); + complexity = DEFAULT_COMPLEXITY; + } + SET_OPUS_ENCODER_CTL(OPUS_SET_COMPLEXITY(complexity)); + pw_log_info("setting encoding complexity %" PRId32, (int32_t)complexity); + + /* Set the in-band FEC. */ + + SET_OPUS_ENCODER_CTL(OPUS_SET_INBAND_FEC(in_band_fec ? 1 : 0)); + SET_OPUS_ENCODER_CTL(OPUS_SET_PACKET_LOSS_PERC(packet_loss_percentage)); + pw_log_info("%s in-band FEC; packet loss percentage: %d", + in_band_fec ? "enabling" : "disabling", packet_loss_percentage); + + /* Set the signal type. */ + + SET_OPUS_ENCODER_CTL(OPUS_SET_SIGNAL(signal_type)); + pw_log_info("setting signal type \"%s\"", signal_type_str); + + #undef SET_OPUS_ENCODER_CTL + + break; + } + case RTP_AUDIO_CODEC_TYPE_DECODER: + context->handle = opus_multistream_decoder_create( + stream_info->info.raw.rate, + stream_info->info.raw.channels, + stream_info->info.raw.channels, + 0, + mapping, + &opus_ret); + + if (opus_ret != OPUS_OK) + goto error_while_creating; + + /* Use the space a fully decoded frame needs as + * the output buffer size, since that one will be + * used as the destination for the decoder. */ + output_buffer_size = samples_per_frame * stride; + break; + default: + pw_log_error("unsupported audio codec type %d", (int)(type)); + ret = -EINVAL; + goto error; + } + + codec_type_name = rtp_audio_codec_type_name(type); + + pw_log_info("Opus %s created; samples per frame: %" PRIu32 "; max encoded frame data size: %zu; " + "output buffer size: %zu; ms per frame: %.1f; sample format: %s", codec_type_name, + samples_per_frame, max_encoded_frame_size, output_buffer_size, (double)us_per_frame / 1000.0, + spa_type_audio_format_to_short_name(stream_info->info.raw.format)); + + context->output_buffer = malloc(output_buffer_size); + if (context->output_buffer == NULL) { + pw_log_error("could not allocate %s output buffer", codec_type_name); + ret = -ENOMEM; + goto error; + } + + return 0; + +error: + rtp_opus_codec_shutdown(context); + return ret; + +error_while_creating: + context->handle = NULL; + pw_log_error("error while creating %s: %s", codec_type_name, opus_strerror(opus_ret)); + ret = opus_error_to_neg_errno(opus_ret); + goto error; +} + +static void rtp_opus_codec_shutdown(struct rtp_audio_codec_context *context) +{ + spa_assert(context != NULL); + + if (context->handle != NULL) { + if (context->type == RTP_AUDIO_CODEC_TYPE_ENCODER) + opus_multistream_encoder_destroy(context->handle); + else + opus_multistream_decoder_destroy(context->handle); + context->handle = NULL; + + pw_log_info("Opus %s destroyed", rtp_audio_codec_type_name(context->type)); + } + + if (context->output_buffer != NULL) { + free(context->output_buffer); + context->output_buffer = NULL; + pw_log_debug("%s output buffer freed", rtp_audio_codec_type_name(context->type)); + } +} + +static void rtp_opus_codec_reset(struct rtp_audio_codec_context *context, char const *reason) +{ + spa_assert(context != NULL); + + if (context->type == RTP_AUDIO_CODEC_TYPE_ENCODER) + opus_multistream_encoder_ctl(context->handle, OPUS_RESET_STATE); + else + opus_multistream_decoder_ctl(context->handle, OPUS_RESET_STATE); + + if (reason != NULL) + pw_log_info("Opus %s reset, reason: %s", rtp_audio_codec_type_name(context->type), reason); + else + pw_log_info("Opus %s reset (no reason given)", rtp_audio_codec_type_name(context->type)); +} + +static int rtp_opus_codec_get_delay(struct rtp_audio_codec_context *context, size_t *delay) +{ + int ret; + opus_int32 sample_rate = 0; + opus_int32 decoder_delay = 0; + + spa_assert(context != NULL); + spa_assert(delay != NULL); + + /* The Opus specification requires that all decoder implementations + * have the exact same delay. Encoders can vary, however. libopus + * combines the en- and decoder delay into a "lookahead value", + * which is accessible from the encoder (but not the decoder). + * Since for the RTP transmissions, it is beneficial to handle + * en- and decoder delay separately, extract the encoder delay out + * of the lookahead value if this is an encoder (by subtracting + * the decoder delay from the lookahead), and if this is a + * decoder, just return the decoder delay. + * + * The decoder delay is fixed to 2.5 ms according to RFC 6716, + * so this code needs to convert this to samples. */ + + if (context->type == RTP_AUDIO_CODEC_TYPE_ENCODER) + ret = opus_multistream_encoder_ctl(context->handle, OPUS_GET_SAMPLE_RATE(&sample_rate)); + else + ret = opus_multistream_decoder_ctl(context->handle, OPUS_GET_SAMPLE_RATE(&sample_rate)); + + if (ret != OPUS_OK) { + pw_log_error("could not get %s sample rate: %s", + rtp_audio_codec_type_name(context->type), opus_strerror(ret)); + return opus_error_to_neg_errno(ret); + } + + /* Convert the fixed 2.5 ms decoder delay to samples based + * on the en/decoder's sample rate. Define 2.5ms as 2500us + * here to do the calculation purely with integers. */ + decoder_delay = 2500LL * sample_rate / SPA_USEC_PER_SEC; + + if (context->type == RTP_AUDIO_CODEC_TYPE_ENCODER) { + opus_int32 lookahead = 0; + + ret = opus_multistream_encoder_ctl(context->handle, OPUS_GET_LOOKAHEAD(&lookahead)); + if (ret != OPUS_OK) { + pw_log_error("could not get encoder lookahead: %s", opus_strerror(ret)); + return opus_error_to_neg_errno(ret); + } + + if (SPA_UNLIKELY(lookahead < decoder_delay)) { + pw_log_error("lookahead %" PRId32 " is smaller than decoder delay %" PRId32, + (int32_t)lookahead, (int32_t)decoder_delay); + return -EINVAL; + } + + *delay = lookahead - decoder_delay; + return 0; + } else { + *delay = decoder_delay; + return 0; + } +} + +static int rtp_opus_codec_encode(struct rtp_audio_codec_context *context, const uint8_t *in_samples, + uint8_t **out_encoded_data, size_t *out_encoded_data_size) +{ + int ret; + + spa_assert(context != NULL); + spa_assert(in_samples != NULL); + spa_assert(out_encoded_data != NULL); + spa_assert(out_encoded_data_size != NULL); + + switch (context->audio_info.info.raw.format) { + case SPA_AUDIO_FORMAT_S16: + ret = opus_multistream_encode(context->handle, (const opus_int16 *)in_samples, + context->samples_per_frame, context->output_buffer, context->max_encoded_frame_size); + break; + case SPA_AUDIO_FORMAT_F32: + ret = opus_multistream_encode_float(context->handle, (const float *)in_samples, + context->samples_per_frame, context->output_buffer, context->max_encoded_frame_size); + break; + default: + return -EINVAL; + } + + if (ret >= 0) { + *out_encoded_data = context->output_buffer; + *out_encoded_data_size = ret; + pw_log_trace("encoded %zu samples to %zu bytes", context->samples_per_frame, + *out_encoded_data_size); + return 0; + } else { + pw_log_error("error while encoding audio: %s", opus_strerror(ret)); + return -EIO; + } +} + +static int rtp_opus_codec_decode(struct rtp_audio_codec_context *context, const uint8_t *in_encoded_data, + size_t in_encoded_data_size, uint8_t **out_samples, size_t *out_num_samples) +{ + int ret; + + spa_assert(context != NULL); + spa_assert(in_encoded_data != NULL); + spa_assert(in_encoded_data_size > 0); + spa_assert(out_samples != NULL); + spa_assert(out_num_samples != NULL); + + switch (context->audio_info.info.raw.format) { + case SPA_AUDIO_FORMAT_S16: + ret = opus_multistream_decode(context->handle, in_encoded_data, in_encoded_data_size, + (opus_int16 *)(context->output_buffer), context->samples_per_frame, 0); + break; + case SPA_AUDIO_FORMAT_F32: + ret = opus_multistream_decode_float(context->handle, in_encoded_data, in_encoded_data_size, + (float *)(context->output_buffer), context->samples_per_frame, 0); + break; + default: + return -EINVAL; + } + + if (ret >= 0) { + *out_samples = context->output_buffer; + *out_num_samples = ret; + pw_log_trace("decoded %zu bytes to %zu samples", in_encoded_data_size, *out_num_samples); + return 0; + } else { + pw_log_error("error while decoding audio: %s", opus_strerror(ret)); + return -EIO; + } +} + +static int rtp_opus_codec_apply_plc(struct rtp_audio_codec_context *context, + uint8_t **out_samples, size_t *out_num_samples) +{ + int ret; + + spa_assert(context != NULL); + spa_assert(out_samples != NULL); + spa_assert(out_num_samples != NULL); + + /* PLC is applied by "decoding" from a nullpointer. See: + * https://www.opus-codec.org/docs/opus_api-1.6/group__opus__multistream.html#gaa4b89541efe01970cf52e4a336db3ad0 */ + + switch (context->audio_info.info.raw.format) { + case SPA_AUDIO_FORMAT_S16: + ret = opus_multistream_decode(context->handle, NULL, 0, + (opus_int16 *)(context->output_buffer), context->samples_per_frame, 0); + break; + case SPA_AUDIO_FORMAT_F32: + ret = opus_multistream_decode_float(context->handle, NULL, 0, + (float *)(context->output_buffer), context->samples_per_frame, 0); + break; + default: + return -EINVAL; + } + + if (ret >= 0) { + *out_samples = context->output_buffer; + *out_num_samples = ret; + pw_log_debug("generated %zu PLC samples", *out_num_samples); + return 0; + } else { + pw_log_error("error while applying PLC: %s", opus_strerror(ret)); + return -EIO; + } +} + +static const char * rtp_opus_codec_get_name(void) +{ + return "Opus"; +} + +const struct rtp_audio_codec* get_rtp_opus_codec(void) +{ + static const struct rtp_audio_codec codec = { + .init = rtp_opus_codec_init, + .shutdown = rtp_opus_codec_shutdown, + .reset = rtp_opus_codec_reset, + .get_delay = rtp_opus_codec_get_delay, + .encode = rtp_opus_codec_encode, + .decode = rtp_opus_codec_decode, + .apply_plc = rtp_opus_codec_apply_plc, + .get_name = rtp_opus_codec_get_name, + }; + + return &codec; +} + +#else + +#include + +const struct rtp_audio_codec* get_rtp_opus_codec(void) +{ + return NULL; +} + +#endif diff --git a/src/modules/module-rtp/opus-codec.h b/src/modules/module-rtp/opus-codec.h new file mode 100644 index 000000000..1192e3c57 --- /dev/null +++ b/src/modules/module-rtp/opus-codec.h @@ -0,0 +1,20 @@ +/* PipeWire */ +/* SPDX-FileCopyrightText: Copyright © 2026 Carlos Rafael Giani */ +/* SPDX-License-Identifier: MIT */ + +#ifndef PIPEWIRE_RTP_OPUS_CODEC_H +#define PIPEWIRE_RTP_OPUS_CODEC_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "audio-codec.h" + +const struct rtp_audio_codec* get_rtp_opus_codec(void); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* PIPEWIRE_RTP_OPUS_CODEC_H */ diff --git a/src/modules/module-rtp/opus.c b/src/modules/module-rtp/opus.c deleted file mode 100644 index c25fc17f8..000000000 --- a/src/modules/module-rtp/opus.c +++ /dev/null @@ -1,359 +0,0 @@ -/* PipeWire */ -/* SPDX-FileCopyrightText: Copyright © 2023 Wim Taymans */ -/* SPDX-License-Identifier: MIT */ - -#ifdef HAVE_OPUS - -#include -#include - -/* TODO: Direct timestamp mode here may require a rework. See audio.c for a reference. - * Also check out the usage of actual_max_buffer_size in audio.c. */ - -static void rtp_opus_process_playback(void *data) -{ - struct impl *impl = data; - struct pw_buffer *buf; - struct spa_data *d; - uint32_t wanted, timestamp, target_buffer, stride, maxsize; - int32_t avail; - - if ((buf = pw_stream_dequeue_buffer(impl->stream)) == NULL) { - pw_log_info("Out of stream buffers: %m"); - return; - } - d = buf->buffer->datas; - - stride = impl->stride; - - maxsize = d[0].maxsize / stride; - wanted = buf->requested ? SPA_MIN(buf->requested, maxsize) : maxsize; - - if (impl->io_position && impl->direct_timestamp) { - /* in direct mode, read directly from the timestamp index, - * because sender and receiver are in sync, this would keep - * target_buffer of samples available. */ - spa_ringbuffer_read_update(&impl->ring, - impl->io_position->clock.position); - } - avail = spa_ringbuffer_get_read_index(&impl->ring, ×tamp); - - target_buffer = impl->target_buffer; - - if (avail < (int32_t)wanted) { - enum spa_log_level level; - memset(d[0].data, 0, wanted * stride); - if (impl->have_sync) { - impl->have_sync = false; - level = SPA_LOG_LEVEL_WARN; - } else { - level = SPA_LOG_LEVEL_DEBUG; - } - pw_log(level, "underrun %d/%u < %u", - avail, target_buffer, wanted); - } else { - double error, corr; - if (impl->first) { - if ((uint32_t)avail > target_buffer) { - uint32_t skip = avail - target_buffer; - pw_log_debug("first: avail:%d skip:%u target:%u", - avail, skip, target_buffer); - timestamp += skip; - avail = target_buffer; - } - impl->first = false; - } else if (avail > (int32_t)SPA_MIN(target_buffer * 8, impl->buffer_size2 / stride)) { - pw_log_warn("overrun %u > %u", avail, target_buffer * 8); - timestamp += avail - target_buffer; - avail = target_buffer; - } - if (!impl->direct_timestamp) { - /* when not using direct timestamp and clocks are not - * in sync, try to adjust our playback rate to keep the - * requested target_buffer bytes in the ringbuffer */ - error = (double)target_buffer - (double)avail; - error = SPA_CLAMPD(error, -impl->max_error, impl->max_error); - - corr = spa_dll_update(&impl->dll, error); - - pw_log_trace("avail:%u target:%u error:%f corr:%f", avail, - target_buffer, error, corr); - - pw_stream_set_rate(impl->stream, 1.0 / corr); - } - spa_ringbuffer_read_data(&impl->ring, - impl->buffer, - impl->buffer_size2, - (timestamp * stride) & impl->buffer_mask2, - d[0].data, wanted * stride); - - timestamp += wanted; - spa_ringbuffer_read_update(&impl->ring, timestamp); - } - d[0].chunk->offset = 0; - d[0].chunk->size = wanted * stride; - d[0].chunk->stride = stride; - d[0].chunk->flags = 0; - buf->size = wanted; - - pw_stream_queue_buffer(impl->stream, buf); -} - -static int rtp_opus_receive(struct impl *impl, uint8_t *buffer, ssize_t len, - ssize_t hlen, uint64_t current_time) -{ - struct rtp_header *hdr; - ssize_t plen; - uint16_t seq; - uint32_t timestamp, samples, write, expected_write; - uint32_t stride = impl->stride; - OpusMSDecoder *dec = impl->stream_data; - int32_t filled; - int res; - - hdr = (struct rtp_header*)buffer; - - seq = ntohs(hdr->sequence_number); - if (impl->have_seq && impl->seq != seq) { - pw_log_info("unexpected seq (%d != %d) SSRC:%u", - seq, impl->seq, impl->ssrc); - impl->have_sync = false; - } - impl->seq = seq + 1; - impl->have_seq = true; - - timestamp = ntohl(hdr->timestamp) - impl->ts_offset; - - impl->receiving = true; - - plen = len - hlen; - - filled = spa_ringbuffer_get_write_index(&impl->ring, &expected_write); - - /* we always write to timestamp + delay */ - write = timestamp + impl->target_buffer; - - if (!impl->have_sync) { - pw_log_info("sync to timestamp:%u seq:%u ts_offset:%u SSRC:%u target:%u direct:%u", - timestamp, seq, impl->ts_offset, impl->ssrc, - impl->target_buffer, impl->direct_timestamp); - - /* we read from timestamp, keeping target_buffer of data - * in the ringbuffer. */ - impl->ring.readindex = timestamp; - impl->ring.writeindex = write; - filled = impl->target_buffer; - - spa_dll_init(&impl->dll); - spa_dll_set_bw(&impl->dll, SPA_DLL_BW_MIN, 128, impl->rate); - memset(impl->buffer, 0, impl->buffer_size); - impl->have_sync = true; - } else if (expected_write != write) { - pw_log_debug("unexpected write (%u != %u)", - write, expected_write); - } - - if (filled + 2880 > (int32_t)(impl->buffer_size2 / stride)) { - pw_log_debug("capture overrun %u + %d > %u", filled, 2880, - impl->buffer_size2 / stride); - impl->have_sync = false; - } else { - uint32_t index = (write * stride) & impl->buffer_mask2, end; - - res = opus_multistream_decode_float(dec, - &buffer[hlen], plen, - (float*)&impl->buffer[index], 2880, - 0); - - end = index + (res * stride); - /* fold to the lower part of the ringbuffer when overflow */ - if (end > impl->buffer_size2) - memmove(impl->buffer, &impl->buffer[impl->buffer_size2], end - impl->buffer_size2); - - pw_log_info("receiving %zd len:%d timestamp:%d %u", plen, res, timestamp, index); - samples = res; - - write += samples; - spa_ringbuffer_write_update(&impl->ring, write); - } - return 0; -} - -static void rtp_opus_flush_packets(struct impl *impl) -{ - int32_t avail, tosend; - uint32_t stride, timestamp, offset; - uint8_t out[1280]; - struct iovec iov[2]; - struct rtp_header header; - OpusMSEncoder *enc = impl->stream_data; - int res = 0; - - avail = spa_ringbuffer_get_read_index(&impl->ring, ×tamp); - tosend = impl->psamples; - - if (avail < tosend) - return; - - stride = impl->stride; - - spa_zero(header); - header.v = 2; - header.pt = impl->payload; - header.ssrc = htonl(impl->ssrc); - - iov[0].iov_base = &header; - iov[0].iov_len = sizeof(header); - iov[1].iov_base = out; - iov[1].iov_len = 0; - - offset = 0; - while (avail >= tosend) { - header.sequence_number = htons(impl->seq); - header.timestamp = htonl(impl->ts_offset + timestamp); - - res = opus_multistream_encode_float(enc, - (const float*)&impl->buffer[offset * stride], tosend, - out, sizeof(out)); - - pw_log_trace("sending %d len:%d timestamp:%d", tosend, res, timestamp); - iov[1].iov_len = res; - - rtp_stream_call_send_packet(impl, iov, 2); - - impl->seq++; - timestamp += tosend; - offset += tosend; - avail -= tosend; - } - - pw_log_trace("move %d offset:%d", avail, offset); - memmove(impl->buffer, &impl->buffer[offset * stride], avail * stride); - - spa_ringbuffer_read_update(&impl->ring, timestamp); -} - -static void rtp_opus_process_capture(void *data) -{ - struct impl *impl = data; - struct pw_buffer *buf; - struct spa_data *d; - uint32_t offs, size, timestamp, expected_timestamp, stride; - int32_t filled, wanted; - - if ((buf = pw_stream_dequeue_buffer(impl->stream)) == NULL) { - pw_log_info("Out of stream buffers: %m"); - return; - } - d = buf->buffer->datas; - - offs = SPA_MIN(d[0].chunk->offset, d[0].maxsize); - size = SPA_MIN(d[0].chunk->size, d[0].maxsize - offs); - stride = impl->stride; - wanted = size / stride; - - filled = spa_ringbuffer_get_write_index(&impl->ring, &expected_timestamp); - - if (SPA_LIKELY(impl->io_position)) { - uint32_t rate = impl->io_position->clock.rate.denom; - timestamp = impl->io_position->clock.position * impl->rate / rate; - } else - timestamp = expected_timestamp; - - if (!impl->have_sync) { - pw_log_info("sync to timestamp:%u seq:%u ts_offset:%u SSRC:%u", - timestamp, impl->seq, impl->ts_offset, impl->ssrc); - impl->ring.readindex = impl->ring.writeindex = expected_timestamp = timestamp; - memset(impl->buffer, 0, impl->buffer_size); - impl->have_sync = true; - } else { - if (SPA_ABS((int32_t)expected_timestamp - (int32_t)timestamp) > 32) { - pw_log_warn("expected %u != timestamp %u", expected_timestamp, timestamp); - impl->have_sync = false; - } else if (filled + wanted > (int32_t)(impl->buffer_size / stride)) { - pw_log_warn("overrun %u + %u > %u", filled, wanted, impl->buffer_size / stride); - impl->have_sync = false; - } - } - - spa_ringbuffer_write_data(&impl->ring, - impl->buffer, - impl->buffer_size, - (filled * stride) & impl->buffer_mask, - SPA_PTROFF(d[0].data, offs, void), wanted * stride); - expected_timestamp += wanted; - spa_ringbuffer_write_update(&impl->ring, expected_timestamp); - - pw_stream_queue_buffer(impl->stream, buf); - - rtp_opus_flush_packets(impl); -} - -static void rtp_opus_deinit(struct impl *impl, enum spa_direction direction) -{ - if (impl->stream_data) { - if (direction == SPA_DIRECTION_INPUT) - opus_multistream_encoder_destroy(impl->stream_data); - else - opus_multistream_decoder_destroy(impl->stream_data); - } -} - -static int rtp_opus_init(struct impl *impl, enum spa_direction direction) -{ - int err; - unsigned char mapping[255]; - uint32_t i; - - if (impl->info.info.opus.channels > 255) - return -EINVAL; - - if (impl->psamples >= 2880) - impl->psamples = 2880; - else if (impl->psamples >= 1920) - impl->psamples = 1920; - else if (impl->psamples >= 960) - impl->psamples = 960; - else if (impl->psamples >= 480) - impl->psamples = 480; - else if (impl->psamples >= 240) - impl->psamples = 240; - else - impl->psamples = 120; - - for (i = 0; i < impl->info.info.opus.channels; i++) - mapping[i] = i; - - impl->deinit = rtp_opus_deinit; - impl->receive_rtp = rtp_opus_receive; - if (direction == SPA_DIRECTION_INPUT) { - impl->stream_events.process = rtp_opus_process_capture; - - impl->stream_data = opus_multistream_encoder_create( - impl->info.info.opus.rate, - impl->info.info.opus.channels, - impl->info.info.opus.channels, 0, - mapping, - OPUS_APPLICATION_AUDIO, - &err); - } - else { - impl->stream_events.process = rtp_opus_process_playback; - - impl->stream_data = opus_multistream_decoder_create( - impl->info.info.opus.rate, - impl->info.info.opus.channels, - impl->info.info.opus.channels, 0, - mapping, - &err); - } - if (!impl->stream_data) - pw_log_error("opus error: %d", err); - return impl->stream_data ? 0 : err; -} -#else -static int rtp_opus_init(struct impl *impl, enum spa_direction direction) -{ - return -ENOTSUP; -} -#endif diff --git a/src/modules/module-rtp/stream.c b/src/modules/module-rtp/stream.c index 5bbc43abc..7ead41d0e 100644 --- a/src/modules/module-rtp/stream.c +++ b/src/modules/module-rtp/stream.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,9 @@ #include #include #include +#include +#include +#include PW_LOG_TOPIC_EXTERN(mod_topic); #define PW_LOG_TOPIC_DEFAULT mod_topic @@ -105,7 +109,6 @@ struct impl { const struct rtp_format_info *rtp_format_info; enum spa_direction direction; - void *stream_data; uint32_t rate; uint32_t stride; @@ -113,6 +116,15 @@ struct impl { uint8_t payload; uint32_t ssrc; uint16_t seq; + /* This is used for pre-checking for received packets that arrive + * out of order. Depending on the audio type, a jitter buffer + * may be used intermediately, and then, the seq field above + * will be set _after_ the jitter buffer reorders packets, + * so it cannot be used for the pre-checking purpose. Thus, + * this separate seq field is required. + * It is int32_t to be able to store -1 as an indicator that + * no expected seqnum is set yet. */ + int32_t next_expected_incoming_seq; unsigned fixed_ssrc:1; unsigned have_ssrc:1; unsigned ignore_ssrc:1; @@ -187,6 +199,19 @@ struct impl { void (*deinit)(struct impl *impl, enum spa_direction direction); int (*resend_packets)(struct impl *impl, uint16_t seq, uint16_t num); + struct rtp_jitter_buffer jitter_buffer; + const struct rtp_audio_codec *audio_codec; + struct rtp_audio_codec_context audio_codec_context; + /* This buffer is needed in case the data that is to be encoded is wrapped + * around the ring buffer border. In such a case, the two halves have to + * be copied and merged into this buffer, since audio codecs expect one + * contiguous input memory block as the data to encode. */ + uint8_t *audio_encoder_staging_buffer; + + /* Delay of the audio codec. Depending on how the audio codec is configured, + * this is either the decoder delay or the encoder delay. In samples. */ + size_t codec_delay; + /* * pw_filter where the filter would be driven at the PTP clock * rate with RTP sink being driven at the sink driver clock rate @@ -311,7 +336,6 @@ static int do_finish_stopping_state(struct spa_loop *loop, bool async, uint32_t #include "module-rtp/audio.c" #include "module-rtp/midi.c" -#include "module-rtp/opus.c" struct rtp_format_info { uint32_t media_subtype; @@ -407,6 +431,11 @@ static int stream_start(struct impl *impl) pw_log_error("error while closing leftover connection: %s", spa_strerror(res)); } + impl->next_expected_incoming_seq = -1; + + if (impl->audio_codec != NULL) + reset_rtp_audio_codec(impl, "starting new stream"); + impl->reset_ringbuffer(impl); res = 0; @@ -519,23 +548,18 @@ static void on_stream_state_changed(void *d, enum pw_stream_state old, } } -static void update_latency_params(struct impl *impl) +static void fill_latency_params(struct impl *impl, struct spa_pod_builder *b, + const struct spa_pod **params, uint32_t *n_params) { - uint32_t n_params = 0; - const struct spa_pod *params[2]; - uint8_t buffer[1024]; - struct spa_pod_builder b; struct spa_latency_info main_latency; - spa_pod_builder_init(&b, buffer, sizeof(buffer)); - /* main_latency is the latency in the direction indicated by impl->direction. - * In RTP streams, this consists solely of the process latency. (In theory, - * PipeWire SPA nodes could have additional latencies on top of the process - * latency, but this is not the case here.) The other direction is already - * handled by pw_stream. + * In RTP streams, this consists of the process latency. In the INPUT direction + * (which is what sinks use), the encoder delay is also part of main_latency. + * The full latency params also include latency in the other direction - + * this is already handled by pw_stream. * - * The main_latncy is passed as updated SPA_PARAM_Latency params to the stream. + * The main_latency is passed as updated SPA_PARAM_Latency params to the stream. * That way, the stream always gets information of latency for _both_ directions; * the direction indicated by impl->direction is covered by main_latency, and * the opposite direction is already taken care of by the default pw_stream @@ -547,10 +571,27 @@ static void update_latency_params(struct impl *impl) main_latency = SPA_LATENCY_INFO(impl->direction); spa_process_latency_info_add(&impl->process_latency, &main_latency); - params[n_params++] = spa_latency_build(&b, SPA_PARAM_Latency, &main_latency); - params[n_params++] = spa_process_latency_build(&b, SPA_PARAM_ProcessLatency, - &impl->process_latency); + if (impl->direction == PW_DIRECTION_INPUT) { + int64_t codec_delay_ns = (int64_t)(impl->codec_delay) * SPA_NSEC_PER_SEC / impl->rate; + main_latency.min_ns += codec_delay_ns; + main_latency.max_ns += codec_delay_ns; + } + params[(*n_params)++] = spa_latency_build(b, SPA_PARAM_Latency, &main_latency); + params[(*n_params)++] = spa_process_latency_build(b, SPA_PARAM_ProcessLatency, + &impl->process_latency); +} + +static void update_latency_params(struct impl *impl) +{ + const struct spa_pod *params[2]; + uint32_t n_params; + uint8_t buffer[1024]; + struct spa_pod_builder b; + + n_params = 0; + spa_pod_builder_init(&b, buffer, sizeof(buffer)); + fill_latency_params(impl, &b, params, &n_params); pw_stream_update_params(impl->stream, params, n_params); } @@ -609,11 +650,12 @@ static const struct rtp_format_info *find_rtp_pcm_audio_format_info(const struct return NULL; } -static int parse_audio_info(const struct pw_properties *props, struct spa_audio_info_raw *info) +static int parse_audio_info(const struct pw_properties *props, struct spa_audio_info_raw *info, + const char *default_format) { return spa_audio_info_raw_init_dict_keys(info, &SPA_DICT_ITEMS( - SPA_DICT_ITEM(SPA_KEY_AUDIO_FORMAT, DEFAULT_RAW_AUDIO_FORMAT), + SPA_DICT_ITEM(SPA_KEY_AUDIO_FORMAT, default_format), SPA_DICT_ITEM(SPA_KEY_AUDIO_RATE, SPA_STRINGIFY(DEFAULT_RATE)), SPA_DICT_ITEM(SPA_KEY_AUDIO_POSITION, DEFAULT_POSITION)), &props->dict, @@ -660,6 +702,8 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core, float latency_msec; int res; bool process_latency_from_sess; + uint32_t audio_codec_type = 0; + const char *default_audio_format = NULL; impl = calloc(1, sizeof(*impl)); if (impl == NULL) { @@ -690,11 +734,15 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core, impl->info.media_type = SPA_MEDIA_TYPE_audio; impl->info.media_subtype = SPA_MEDIA_SUBTYPE_raw; impl->payload = 127; + audio_codec_type = SPA_MEDIA_SUBTYPE_raw; + default_audio_format = DEFAULT_RAW_AUDIO_FORMAT; } else if (spa_streq(str, "raop")) { impl->info.media_type = SPA_MEDIA_TYPE_audio; impl->info.media_subtype = SPA_MEDIA_SUBTYPE_raw; impl->payload = 0x60; + audio_codec_type = SPA_MEDIA_SUBTYPE_raw; + default_audio_format = DEFAULT_RAOP_AUDIO_FORMAT; } else if (spa_streq(str, "midi")) { impl->info.media_type = SPA_MEDIA_TYPE_application; @@ -702,10 +750,14 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core, impl->payload = 0x61; } #ifdef HAVE_OPUS + /* The "opus" sess.media type is actually raw audio that + * is encoded with Opus before sending it out as RTP. */ else if (spa_streq(str, "opus")) { impl->info.media_type = SPA_MEDIA_TYPE_audio; - impl->info.media_subtype = SPA_MEDIA_SUBTYPE_opus; + impl->info.media_subtype = SPA_MEDIA_SUBTYPE_raw; impl->payload = 127; + audio_codec_type = SPA_MEDIA_SUBTYPE_opus; + default_audio_format = DEFAULT_OPUS_AUDIO_FORMAT; } #endif else { @@ -716,22 +768,61 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core, switch (impl->info.media_subtype) { case SPA_MEDIA_SUBTYPE_raw: - if ((res = parse_audio_info(props, &impl->info.info.raw)) < 0) { + if ((res = parse_audio_info(props, &impl->info.info.raw, default_audio_format)) < 0) { pw_log_error("can't parse format: %s", spa_strerror(res)); goto out; } impl->stream_info = impl->info; - impl->rtp_format_info = find_rtp_pcm_audio_format_info(&impl->info); - if (impl->rtp_format_info == NULL) { - pw_log_error("unsupported audio format:%d (%s) channels:%d", + impl->rate = impl->stream_info.info.raw.rate; + + /* Pick the RTP information and stride values suitable for + * the specified codec type. If the codec type is set to + * SPA_MEDIA_SUBTYPE_raw, then no special encoding is done, + * and PCM samples are transmitted directly over RTP. */ + switch (audio_codec_type) { + case SPA_MEDIA_SUBTYPE_raw: + impl->rtp_format_info = find_rtp_pcm_audio_format_info(&impl->info); + if (impl->rtp_format_info == NULL) { + pw_log_error("unsupported audio format:%d (%s) channels:%d", impl->stream_info.info.raw.format, spa_type_audio_format_to_short_name(impl->stream_info.info.raw.format), impl->stream_info.info.raw.channels); + res = -EINVAL; + goto out; + } + impl->stride = impl->rtp_format_info->size * impl->stream_info.info.raw.channels; + pw_log_info("configured raw PCM RTP payload: MIME: %s format: %s rate: %" + PRIu32 " stride: %" PRIu32, impl->rtp_format_info->mime, + spa_type_audio_format_to_short_name(impl->rtp_format_info->format), + impl->rate, impl->stride); + break; + case SPA_MEDIA_SUBTYPE_opus: + impl->rtp_format_info = &rtp_opus_format_info; + switch (impl->stream_info.info.raw.format) { + case SPA_AUDIO_FORMAT_S16: + impl->stride = 2 * impl->stream_info.info.raw.channels; + break; + case SPA_AUDIO_FORMAT_F32: + impl->stride = 4 * impl->stream_info.info.raw.channels; + break; + default: + pw_log_error("unsupported raw audio format for encoding to Opus:%d (%s)", + impl->stream_info.info.raw.format, + spa_type_audio_format_to_short_name(impl->stream_info.info.raw.format)); + res = -EINVAL; + goto out; + } + pw_log_info("configured Opus RTP payload: format: %s rate: %" PRIu32 " stride: %" + PRIu32, spa_type_audio_format_to_short_name(impl->stream_info.info.raw.format), + impl->rate, impl->stride); + break; + default: + pw_log_error("unsupported audio encoding:%d (%s)", audio_codec_type, + spa_type_to_short_name(audio_codec_type, + spa_type_media_subtype, "")); res = -EINVAL; goto out; } - impl->stride = impl->rtp_format_info->size * impl->stream_info.info.raw.channels; - impl->rate = impl->stream_info.info.raw.rate; break; case SPA_MEDIA_SUBTYPE_control: impl->stream_info = impl->info; @@ -741,21 +832,8 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core, impl->rate = pw_properties_get_uint32(props, "midi.rate", 10000); if (impl->rate == 0) impl->rate = 10000; - break; - case SPA_MEDIA_SUBTYPE_opus: - impl->stream_info.media_type = SPA_MEDIA_TYPE_audio; - impl->stream_info.media_subtype = SPA_MEDIA_SUBTYPE_raw; - if ((res = parse_audio_info(props, &impl->stream_info.info.raw)) < 0) { - pw_log_error("can't parse format: %s", spa_strerror(res)); - goto out; - } - impl->stream_info.info.raw.format = SPA_AUDIO_FORMAT_F32; - impl->info.info.opus.rate = impl->stream_info.info.raw.rate; - impl->info.info.opus.channels = impl->stream_info.info.raw.channels; - - impl->rtp_format_info = &rtp_opus_format_info; - impl->stride = impl->rtp_format_info->size * impl->stream_info.info.raw.channels; - impl->rate = impl->stream_info.info.raw.rate; + pw_log_info("configured MIDI RTP payload: rate: %" PRIu32 " stride: %" PRIu32, + impl->rate, impl->stride); break; default: spa_assert_not_reached(); @@ -845,6 +923,7 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core, impl->payload_size = impl->mtu - impl->header_size; impl->seq = pw_rand32(); + impl->next_expected_incoming_seq = -1; str = pw_properties_get(props, "sess.min-ptime"); if (!spa_atof(str, &min_ptime)) @@ -954,6 +1033,26 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core, spa_dll_set_bw(&impl->dll, SPA_DLL_BW_MIN, 128, impl->rate); impl->corr = 1.0; + switch (impl->info.media_subtype) { + case SPA_MEDIA_SUBTYPE_raw: + switch (audio_codec_type) { + case SPA_MEDIA_SUBTYPE_opus: + res = setup_rtp_audio_codec(impl, get_rtp_opus_codec(), props); + break; + default: + res = 0; + break; + } + + if (SPA_UNLIKELY(res < 0)) + goto out; + + break; + + default: + break; + } + impl->stream = pw_stream_new(core, "rtp-session", spa_steal_ptr(props)); if (impl->stream == NULL) { res = -errno; @@ -980,12 +1079,6 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core, SPA_FORMAT_mediaSubtype, SPA_POD_Id(SPA_MEDIA_SUBTYPE_control)); rtp_midi_init(impl, direction); break; - case SPA_MEDIA_SUBTYPE_opus: - params[n_params++] = spa_format_audio_build(&b, - SPA_PARAM_EnumFormat, &impl->stream_info); - flags |= PW_STREAM_FLAG_AUTOCONNECT; - rtp_opus_init(impl, direction); - break; default: res = -EINVAL; goto out; @@ -998,24 +1091,20 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core, * quantity in turn is subjected to constraint checks (see above), it is * possible that the _actual_ session latency no longer equals the value * of sess.latency.msec by the time this location is reached. To take into - * account these constraint adjustments, convert back the impl->target_buffer - * to nanoseconds, and use that as the process latency. + * account these constraint adjustments, fill_latency_params() converts + * back the impl->target_buffer to nanoseconds, and uses that as the + * process latency. * - * Then, just like how update_latency_params() does it, construct the - * SPA_PARAM_Latency and SPA_PARAM_ProcessLatency params to let the new + * Then, just like in update_latency_params(), the SPA_PARAM_Latency + * and SPA_PARAM_ProcessLatency params are constructed to let the new * pw_stream know of these latency figures right from the start. */ - struct spa_latency_info latency; - impl->process_latency.ns = (int64_t)(impl->target_buffer * 1e9 / impl->rate); - pw_log_debug("set process latency to %" PRId64 " based on sess.latency.msec " - "value %f", impl->process_latency.ns, latency_msec); + pw_log_debug("set process latency to %" PRId64 " ns based on sess.latency.msec " + "value %f ms (= %" PRIu32 " samples)", impl->process_latency.ns, + latency_msec, impl->target_buffer); - latency = SPA_LATENCY_INFO(impl->direction); - spa_process_latency_info_add(&(impl->process_latency), &latency); - params[n_params++] = spa_latency_build(&b, SPA_PARAM_Latency, &latency); - params[n_params++] = spa_process_latency_build(&b, SPA_PARAM_ProcessLatency, - &(impl->process_latency)); + fill_latency_params(impl, &b, params, &n_params); } pw_stream_add_listener(impl->stream, @@ -1040,6 +1129,7 @@ struct rtp_stream *rtp_stream_new(struct pw_core *core, return (struct rtp_stream*)impl; out: + teardown_rtp_audio_codec(impl); pw_properties_free(props); if (impl) { if (impl->stream) @@ -1058,6 +1148,8 @@ void rtp_stream_destroy(struct rtp_stream *s) rtp_stream_emit_destroy(impl); + teardown_rtp_audio_codec(impl); + if (impl->deinit) impl->deinit(impl, impl->direction); @@ -1142,7 +1234,17 @@ unexpected_ssrc: int rtp_stream_resend_packets(struct rtp_stream *s, uint16_t seq, uint16_t num) { struct impl *impl = (struct impl*)s; - if (impl->resend_packets) + /* Resending only works with raw data, since codecs usually + * have internal state that is updated after encoding a + * packet. This then means that resend attempts would not + * yield the exact same packet, and this can corrupt the + * state of the decoder in a receiver. + * To support resending with codecs, the last N packets + * have to be cached somehow. Given the fact that, thus + * far, only the RAOP sink resends packets, and RAOP only + * supports raw PCM, it is currently easier to just + * disable retransmissions when a codec is in use. */ + if (impl->resend_packets && (impl->audio_codec == NULL)) return impl->resend_packets(impl, seq, num); else return -ENOTSUP; diff --git a/src/modules/module-rtp/stream.h b/src/modules/module-rtp/stream.h index 94ceb3650..0cf44eda7 100644 --- a/src/modules/module-rtp/stream.h +++ b/src/modules/module-rtp/stream.h @@ -13,6 +13,7 @@ struct rtp_stream; #define DEFAULT_RAW_AUDIO_FORMAT "S16BE" #define DEFAULT_RAOP_AUDIO_FORMAT "S16LE" +#define DEFAULT_OPUS_AUDIO_FORMAT "F32" #define DEFAULT_RATE 48000 #define DEFAULT_CHANNELS 2 #define DEFAULT_POSITION "[ FL FR ]"