bluez5: media-source: fix off-by-one cycle in rate matching

The rate matching filter assumes buffer level for cycle j+1 is

    buffer(j+1) = buffer(j) + recv(j) - corr(j+1) * duration

but what we are actually doing is instead

    buffer(j+1) = buffer(j) + recv(j) - corr(j-1) * duration

because the correction factor that is computed is not used for the next
cycle, but the one following that. Although the filter is still stable
in theory the extra lag causes oscillations to be damped less.

Fix by using the computed correction factor for the next cycle, as
there's no reason why we'd like to have more lag in rate matching.

This then changes c(j-1) -> c(j) in the assumptions, which turns out to
fix the situation. Fix the filter derivation to match.  The filter
coefficients stay as they were, and they are actually exactly correct
also for short averaging times.

In practice, it is observed that ISO RX with quantum 4096 converges to
stable rate, whereas previously the matching retained small
oscillations.
This commit is contained in:
Pauli Virtanen 2025-12-22 15:29:16 +02:00 committed by Wim Taymans
parent c4812af436
commit de34ce606f
3 changed files with 68 additions and 41 deletions

View file

@ -87,6 +87,7 @@ struct spa_bt_decode_buffer
double rate_diff;
int32_t delay;
int32_t delay_frac;
uint32_t prev_samples;
double level;
@ -265,8 +266,7 @@ static inline void spa_bt_decode_buffer_recover(struct spa_bt_decode_buffer *thi
spa_log_debug(this->log, "%p recover level:%f", this, this->level);
}
static inline void spa_bt_decode_buffer_process(struct spa_bt_decode_buffer *this, uint32_t samples, int64_t duration_ns,
double rate_diff, int64_t next_nsec, int32_t delay, int32_t delay_frac)
static inline void spa_bt_decode_buffer_process(struct spa_bt_decode_buffer *this, uint32_t samples, int64_t duration_ns)
{
const uint32_t data_size = samples * this->frame_size;
const int32_t packet_size = SPA_CLAMP(this->packet_size.max, 0, INT32_MAX/8);
@ -274,15 +274,7 @@ static inline void spa_bt_decode_buffer_process(struct spa_bt_decode_buffer *thi
uint32_t avail;
double level;
this->rate_diff = rate_diff;
this->next_nsec = next_nsec;
this->delay = delay;
this->delay_frac = delay_frac;
/* The fractional delay is given at the start of current cycle. Make it relative
* to next_nsec used for the level calculations.
*/
this->delay_frac += (int32_t)(1e9 * samples - duration_ns * this->rate * this->rate_diff);
this->prev_samples = samples;
if (SPA_UNLIKELY(duration_ns != this->duration_ns)) {
this->duration_ns = duration_ns;
@ -332,6 +324,11 @@ static inline void spa_bt_decode_buffer_process(struct spa_bt_decode_buffer *thi
this->pos += samples;
this->corr = spa_bt_rate_control_update(&this->ctl,
level * SPA_NSEC_PER_SEC / this->rate,
((double)target + 0.5/this->rate) * SPA_NSEC_PER_SEC / this->rate,
duration_ns, this->avg_period, this->rate_diff_max);
enum spa_log_level log_level = (this->pos > this->rate) ? SPA_LOG_LEVEL_DEBUG : SPA_LOG_LEVEL_TRACE;
if (SPA_UNLIKELY(spa_log_level_topic_enabled(this->log, SPA_LOG_TOPIC_DEFAULT, log_level))) {
spa_log_lev(this->log, log_level,
@ -346,11 +343,6 @@ static inline void spa_bt_decode_buffer_process(struct spa_bt_decode_buffer *thi
this->pos = 0;
}
this->corr = spa_bt_rate_control_update(&this->ctl,
level * SPA_NSEC_PER_SEC / this->rate,
((double)target + 0.5/this->rate) * SPA_NSEC_PER_SEC / this->rate,
duration_ns, this->avg_period, this->rate_diff_max);
spa_bt_decode_buffer_get_read(this, &avail);
if (avail < data_size) {
spa_log_debug(this->log, "%p underrun samples:%d", this,
@ -360,6 +352,27 @@ static inline void spa_bt_decode_buffer_process(struct spa_bt_decode_buffer *thi
}
}
static inline void spa_bt_decode_buffer_set_next(struct spa_bt_decode_buffer *this, double rate_diff, int64_t next_nsec,
int32_t delay, int32_t delay_frac, bool delay_at_start)
{
/* Called after spa_bt_decode_buffer_process() on the same cycle to update
* next_nsec & rate_diff values.
*/
this->rate_diff = rate_diff;
this->next_nsec = next_nsec;
this->delay = delay;
this->delay_frac = delay_frac;
/* If fractional delay is given at the start of current cycle, make it relative to
* next_nsec used for the level calculations.
*/
if (delay_at_start)
this->delay_frac += (int32_t)(1e9 * this->prev_samples - this->duration_ns * this->rate * this->rate_diff);
/* Recalculate this->level */
spa_bt_decode_buffer_write_packet(this, 0, 0);
}
struct spa_bt_recvmsg_data {
struct spa_log *log;
struct spa_system *data_system;

View file

@ -872,6 +872,13 @@ static void media_on_timeout(struct spa_source *source)
spa_log_trace(this->log, "%p: io:%d->%d status:%d", this, io_status, port->io->status, status);
}
/* Use any updated rate correction already for the next cycle */
this->next_time = (uint64_t)(now_time + duration * SPA_NSEC_PER_SEC / port->buffer.corr / rate);
if (SPA_LIKELY(this->clock)) {
this->clock->rate_diff = port->buffer.corr;
this->clock->next_nsec = this->next_time;
}
spa_node_call_ready(&this->callbacks, SPA_STATUS_HAVE_DATA);
set_timeout(this, this->next_time);
@ -1807,13 +1814,9 @@ static void process_buffering(struct impl *this)
if (plc)
spa_bt_decode_buffer_recover(&port->buffer);
setup_matching(this);
spa_bt_decode_buffer_process(&port->buffer, samples, duration_ns);
spa_bt_decode_buffer_process(&port->buffer, samples, duration_ns,
this->position ? this->position->clock.rate_diff : 1.0,
this->position ? this->position->clock.next_nsec : 0,
this->resampling ? this->port.rate_match->delay : 0,
this->resampling ? this->port.rate_match->delay_frac : 0);
setup_matching(this);
/* copy data to buffers */
if (!spa_list_is_empty(&port->free)) {
@ -1935,6 +1938,7 @@ static int impl_node_process(void *object)
struct impl *this = object;
struct port *port;
struct spa_io_buffers *io;
int ret;
spa_return_val_if_fail(this != NULL, -EINVAL);
@ -1959,9 +1963,19 @@ static int impl_node_process(void *object)
/* Follower produces buffers here, driver in timeout */
if (this->following)
return produce_buffer(this);
ret = produce_buffer(this);
else
return SPA_STATUS_OK;
ret = SPA_STATUS_OK;
/* Update decode buffer vs. next wakeup timing */
spa_bt_decode_buffer_set_next(&port->buffer,
this->position ? this->position->clock.rate_diff : 1.0,
this->position ? this->position->clock.next_nsec : 0,
this->resampling ? this->port.rate_match->delay : 0,
this->resampling ? this->port.rate_match->delay_frac : 0,
this->following);
return ret;
}
static const struct spa_node_methods impl_node = {

View file

@ -94,11 +94,14 @@ static inline bool spa_bt_ptp_valid(struct spa_bt_ptp *p)
* The deviation from the buffer level target evolves as
*
* delta(j) = level(j) - target
* delta(j+1) = delta(j) + r(j) - c(j+1)
* delta(j+1) = delta(j) + r(j) - c(j)
*
* where r is samples received in one duration, and c corrected rate
* (samples per duration).
*
* Note that the rate correction calculated on *previous* cycle is what affects the
* current one.
*
* The rate correction is in general determined by linear filter f
*
* c(j+1) = c(j) + \sum_{k=0}^\infty delta(j - k) f(k)
@ -111,7 +114,7 @@ static inline bool spa_bt_ptp_valid(struct spa_bt_ptp *p)
*
* delta(z) = G(z) r(z)
* c(z) = F(z) delta(z)
* G(z) = (z - 1) / [(z - 1)^2 + z f(z)]
* G(z) = (z - 1) / [(z - 1)^2 + f(z)]
* F(z) = f(z) / (z - 1)
*
* We now want: poles of G(z) must be in |z|<1 for stability, F(z)
@ -119,23 +122,22 @@ static inline bool spa_bt_ptp_valid(struct spa_bt_ptp *p)
*
* To satisfy the conditions, take
*
* (z - 1)^2 + z f(z) = p(z) / q(z)
* (z - 1)^2 + f(z) = p(z) / q(z)
*
* where p(z) is polynomial with leading term z^n with wanted root
* structure, and q(z) is any polynomial with leading term z^{n-2}.
* This guarantees f(z) is causal, and G(z) = (z-1) q(z) / p(z).
* where p(z) / q(z) are polynomials such that p(z)/q(z) ~ z^2 - 2 z + O(1)
* in 1/z expansion. This guarantees f(z) is causal, and G(z) = (z-1) q(z) / p(z).
* We can choose p(z) and q(z) to improve low-pass properties of F(z).
*
* Simplest choice is p(z)=(z-x)^2 and q(z)=1, but that gives flat
* Simplest choice is p(z)=(z-1)^2 and q(z)=1, but that does not supress
* high frequency response in F(z). Better choice is p(z) = (z-u)*(z-v)*(z-w)
* and q(z) = z - r. To make F(z) better lowpass, one can cancel
* a resulting 1/z pole in F(z) by setting r=u*v*w. Then,
* and q(z) = z - r. Causality requires r = u + v + w - 2.
* Then,
*
* G(z) = (z - u*v*w)*(z - 1) / [(z - u)*(z - v)*(z - w)]
* F(z) = (a z + b - a) / (z - 1) * H(z)
* H(z) = beta / (z - 1 + beta)
* beta = 1 - u*v*w
* a = [(1-u) + (1-v) + (1-w) - beta] / beta
* beta = 3 - u - v - w
* a = [u*v + u*w + v*w - u - v - w + beta] / beta
* b = (1-u)*(1-v)*(1-w) / beta
*
* which corresponds to iteration for c(j):
@ -147,8 +149,10 @@ static inline bool spa_bt_ptp_valid(struct spa_bt_ptp *p)
* which gives the low-pass property for c(j).
*
* The simplest filter is obtained by putting the poles at
* u=v=w=(1-beta)**(1/3). Since beta << 1, computing the root
* can be avoided by expanding in series.
* u=v=w=(1 - beta/3). Then a=beta/3 and b=beta^2/27
*
* The same filter is obtained if one uses c(j+1) instead of c(j)
* in the starting point and takes limit beta -> 0.
*
* Overshoot in impulse response could be reduced by moving one of the
* poles closer to z=1, but this increases the step response time.
@ -169,12 +173,8 @@ static inline double spa_bt_rate_control_update(struct spa_bt_rate_control *this
double target, double duration, double period, double rate_diff_max)
{
/*
* u = (1 - beta)^(1/3)
* x = a / beta
* y = b / beta
* a = (2 + u) * (1 - u)^2 / beta
* b = (1 - u)^3 / beta
* beta -> 0
*/
const double beta = SPA_CLAMP(duration / period, 0, 0.5);
const double x = 1.0/3;