module-equalizer-sink: trying new buffering strategies

2026-07-03 00:06:49 -04:00 · 2009-07-18 01:00:35 -07:00 · 2009-07-18 01:00:35 -07:00 · cf8331a0da
commit cf8331a0da
parent d4fe5bfce9
2 changed files with 160 additions and 145 deletions
--- a/src/modules/module-equalizer-sink.c
+++ b/src/modules/module-equalizer-sink.c
@ -81,9 +81,10 @@ struct userdata {
              * the latency of the filter, calculated from window_size
              * based on constraints of COLA and window function
              */
    size_t overlap_size;//window_size-R
    size_t samples_gathered;
    size_t n_buffered_output;
    size_t max_output;
    size_t target_samples;
    float *H;//frequency response filter (magnitude based)
    float *W;//windowing function (time domain)
    float *work_buffer,**input,**overlap_accum,**output_buffer;
@ -91,7 +92,8 @@ struct userdata {
    fftwf_plan forward_plan,inverse_plan;
    //size_t samplings;
-    pa_memblockq *memblockq;
+    pa_memchunk conv_buffer;
    pa_memblockq *rendered_q;
 };
 static const char* const valid_modargs[] = {
@ -186,12 +188,14 @@ static int sink_process_msg(pa_msgobject *o, int code, void *data, int64_t offse
        case PA_SINK_MESSAGE_GET_LATENCY: {
            pa_usec_t usec = 0;
            pa_sample_spec *ss=&u->sink->sample_spec;
            size_t fs=pa_frame_size(ss);
            /* Get the latency of the master sink */
            if (PA_MSGOBJECT(u->master)->process_msg(PA_MSGOBJECT(u->master), PA_SINK_MESSAGE_GET_LATENCY, &usec, 0, NULL) < 0)
                usec = 0;
-            usec+=pa_bytes_to_usec(u->n_buffered_output*pa_frame_size(ss),ss);
+            usec+=pa_bytes_to_usec(u->samples_gathered*fs,ss);
            usec += pa_bytes_to_usec(pa_memblockq_get_length(u->rendered_q), ss);
            /* Add the latency internal to our sink input on top */
            usec += pa_bytes_to_usec(pa_memblockq_get_length(u->sink_input->thread_info.render_memblockq), &u->master->sample_spec);
            *((pa_usec_t*) data) = usec;
@ -227,7 +231,7 @@ static void sink_request_rewind(pa_sink *s) {
    pa_assert_se(u = s->userdata);
    /* Just hand this one over to the master sink */
-    pa_sink_input_request_rewind(u->sink_input, s->thread_info.rewind_nbytes + pa_memblockq_get_length(u->memblockq), TRUE, FALSE, FALSE);
+    pa_sink_input_request_rewind(u->sink_input, s->thread_info.rewind_nbytes + pa_memblockq_get_length(u->rendered_q), TRUE, FALSE, FALSE);
 }
 /* Called from I/O thread context */
@ -246,116 +250,92 @@ static void sink_update_requested_latency(pa_sink *s) {
 /* Called from I/O thread context */
 static int sink_input_pop_cb(pa_sink_input *i, size_t nbytes, pa_memchunk *chunk) {
    struct userdata *u;
    float *src, *dst;
    pa_memchunk tchunk;
    pa_sink_input_assert_ref(i);
    pa_assert(chunk);
    pa_assert_se(u = i->userdata);
    pa_assert_se(u->sink);
-    size_t fs = pa_frame_size(&(u->sink->sample_spec));
+    size_t fs=pa_frame_size(&(u->sink->sample_spec));
    size_t ss=pa_sample_size(&(u->sink->sample_spec));
    size_t fe = fs/ss;
    size_t samples_requested=nbytes/fs;
    pa_memchunk tchunk;
    chunk->memblock=NULL;
    size_t buffered_samples=pa_memblockq_get_length(u->rendered_q)/fs;
    if (!u->sink || !PA_SINK_IS_OPENED(u->sink->thread_info.state))
        return -1;
-    //collect the minimum number of samples
+    pa_log("start output-buffered %ld, input-buffered %ld",buffered_samples,u->samples_gathered);
-    //TODO figure out a better way of buffering the needed
+    //collect samples
-    //number of samples, this doesn't seem to work correctly
+    size_t buffered_remaining=pa_memblockq_get_length(u->rendered_q)/fs;
-    //most of the itme
+    size_t buffer_missing=pa_memblockq_missing(u->rendered_q)/fs;
-    if(u->samples_gathered < u->R){
+    size_t desired_samples=(buffer_missing>=u->R)*PA_MIN(u->target_samples-u->samples_gathered,buffer_missing);
-        //render some new fragments to our memblockq
+    if(desired_samples>0){
-        size_t desired_samples=PA_MIN(u->R-u->samples_gathered,u->max_output);
+        u->conv_buffer.index=0;
-        while (pa_memblockq_peek(u->memblockq, &tchunk) < 0) {
+        //if we still had buffered output, 
-            pa_memchunk nchunk;
+        //or can gather any more in the buffer
        //politely request (optimistic)
        if(buffered_samples>=samples_requested ||
            (u->samples_gathered/u->R)*u->R>=samples_requested){
            u->conv_buffer.length=desired_samples*fs;
            pa_log("trying to buffer %ld samples",desired_samples);
            pa_sink_render_into(u->sink, &u->conv_buffer);
        }else{//we need it now! force it
            //TODO: minimum amount or the whole buffer better?
            desired_samples=u->R-u->samples_gathered%u->R;
            u->conv_buffer.length=desired_samples*fs;
            pa_log("force-buffer %ld samples",desired_samples);
            pa_sink_render_into_full(u->sink, &u->conv_buffer);
            pa_assert_se(u->conv_buffer.length==desired_samples*fs);
        }
        size_t n_samples=u->conv_buffer.length/fs;
        float *src;
        pa_log("received %ld samples",n_samples);
-            pa_sink_render(u->sink, desired_samples*fs, &nchunk);
+        pa_assert_se(n_samples<=u->target_samples-u->samples_gathered);
-            pa_memblockq_push(u->memblockq, &nchunk);
+        src = (float*) ((uint8_t*) pa_memblock_acquire(u->conv_buffer.memblock) + u->conv_buffer.index);
            pa_memblock_unref(nchunk.memblock);
        }
        if(tchunk.length/fs!=desired_samples){
            pa_log("got %ld samples, asked for %ld",tchunk.length/fs,desired_samples);
        }
        size_t n_samples=PA_MIN(tchunk.length/fs,u->R-u->samples_gathered);
        pa_assert_se(n_samples<=u->R-u->samples_gathered);
        src = (float*) ((uint8_t*) pa_memblock_acquire(tchunk.memblock) + tchunk.index);
        for (size_t c=0;c<u->channels;c++) {
-            pa_sample_clamp(PA_SAMPLE_FLOAT32NE,u->input[c]+(u->window_size-u->R)+u->samples_gathered,sizeof(float), src+c, fs, n_samples);
+            //buffer with an offset after the overlap from previous
            //iterations
            pa_assert_se(
                u->input[c]+u->overlap_size+u->samples_gathered+n_samples<=u->input[c]+u->target_samples+u->overlap_size
            );
            pa_sample_clamp(PA_SAMPLE_FLOAT32NE,u->input[c]+u->overlap_size+u->samples_gathered,sizeof(float), src+c, fs, n_samples);
        }
        u->samples_gathered+=n_samples;
-        pa_memblock_release(tchunk.memblock);
+        pa_memblock_release(u->conv_buffer.memblock);
        pa_memblock_unref(tchunk.memblock);
    }
-    //output any buffered outputs first
+    //pa_assert_se(u->samples_gathered>=u->R);
    if(u->n_buffered_output>0){
        //pa_log("outputing %ld buffered samples",u->n_buffered_output);
        chunk->index = 0;
        size_t n_outputable=PA_MIN(u->n_buffered_output,u->max_output);
        chunk->length = n_outputable*fs;
        chunk->memblock = pa_memblock_new(i->sink->core->mempool, chunk->length);
        pa_memblockq_drop(u->memblockq, chunk->length);
        dst = (float*) pa_memblock_acquire(chunk->memblock);
        for(size_t j=0;j<u->channels;++j){
            pa_sample_clamp(PA_SAMPLE_FLOAT32NE, dst+j, fs, u->output_buffer[j], sizeof(float),n_outputable);
            memmove(u->output_buffer[j],u->output_buffer[j]+n_outputable,(u->n_buffered_output-n_outputable)*sizeof(float));
        }
        u->n_buffered_output-=n_outputable;
        pa_memblock_release(chunk->memblock);
        return 0;
    }
    pa_assert_se(u->n_buffered_output==0);
    if(u->samples_gathered<u->R){
        return -1;
    }
    //IT should be this guy if we're buffering like how its supposed to
    //size_t n_outputable=PA_MIN(u->window_size-u->R,u->max_output);
    //This one takes into account the actual data gathered but then the dsp
    //stuff is wrong when the buffer "underruns"
    size_t n_outputable=PA_MIN(u->R,u->max_output)*(u->R==u->samples_gathered);
    chunk->index=0;
    chunk->length=n_outputable*fs;
    chunk->memblock = pa_memblock_new(i->sink->core->mempool, chunk->length);
    pa_memblockq_drop(u->memblockq, chunk->length);
    dst = (float*) pa_memblock_acquire(chunk->memblock);
    pa_assert_se(u->fft_size>=u->window_size);
    pa_assert_se(u->R<u->window_size);
-    size_t sample_rem=u->R-n_outputable;
+    //process every complete block on hand
    while(u->samples_gathered>=u->R&&buffer_missing>=u->R){
        float *dst;
        //pa_log("iter gathered: %ld",u->samples_gathered);
        tchunk.index=0;
        tchunk.length=u->R*fs;
        tchunk.memblock=pa_memblock_new(u->core->mempool,tchunk.length);
        //pa_memblockq_drop(u->rendered_q, tchunk.length);
        pa_assert_se(tchunk.length==u->R*fs);
        dst=(float*)pa_memblock_acquire(tchunk.memblock);
        //use a linear-phase sliding STFT and overlap-add method (for each channel)
        for (size_t c=0;c<u->channels;c++) {
-        ////zero padd the data
+            //zero padd the data
        //memset(u->work_buffer,0,u->fft_size*sizeof(float));
            memset(u->work_buffer+u->window_size,0,(u->fft_size-u->window_size)*sizeof(float));
-        ////window the data
+            //window the data
            for(size_t j=0;j<u->window_size;++j){
                u->work_buffer[j]=u->W[j]*u->input[c][j];
            }
            //Processing is done here!
            //do fft
        //char fname[1024];
        //if(u->samplings==200){
        //    pa_assert_se(0);
        //}
        //this iterations input
        //sprintf(fname,"/home/jason/input%ld-%ld.txt",u->samplings+1,c);
        //array_out(fname,u->input[c]+(u->window_size-u->R),u->R);
            fftwf_execute_dft_r2c(u->forward_plan,u->work_buffer,u->output_window);
            //perform filtering
            for(size_t j=0;j<u->fft_size/2+1;++j){
                u->output_window[j][0]*=u->H[j];
                u->output_window[j][1]*=u->H[j];
            }
-        ////inverse fft
+            //inverse fft
            fftwf_execute_dft_c2r(u->inverse_plan,u->output_window,u->work_buffer);
        //the output for the previous iteration's input
        //sprintf(fname,"/home/jason/output%ld-%ld.txt",u->samplings,c);
        //array_out(fname,u->work_buffer,u->window_size);
            ////debug: tests overlaping add
            ////and negates ALL PREVIOUS processing
            ////yields a perfect reconstruction if COLA is held
@ -366,33 +346,63 @@ static int sink_input_pop_cb(pa_sink_input *i, size_t nbytes, pa_memchunk *chunk
            //overlap add and preserve overlap component from this window (linear phase)
            for(size_t j=0;j<u->R;++j){
                u->work_buffer[j]+=u->overlap_accum[c][j];
-            u->overlap_accum[c][j]=u->work_buffer[u->window_size-u->R+j];
+                u->overlap_accum[c][j]=u->work_buffer[u->overlap_size+j];
            }
        /*
            //debug: tests if basic buffering works
-        //shouldn't modify the signal AT ALL
+            //shouldn't modify the signal AT ALL (beyond roundoff)
            for(size_t j=0;j<u->window_size;++j){
                u->work_buffer[j]=u->input[c][j];
            }
        */
-        //preseve the needed input for the next windows overlap
+            //preseve the needed input for the next window's overlap
            memmove(u->input[c],u->input[c]+u->R,
-            (u->window_size-u->R)*sizeof(float)
+                (u->samples_gathered+u->overlap_size-u->R)*sizeof(float)
            );
            //output the samples that are outputable now
-        pa_sample_clamp(PA_SAMPLE_FLOAT32NE, dst+c, fs, u->work_buffer, sizeof(float),n_outputable);
+            pa_sample_clamp(PA_SAMPLE_FLOAT32NE, dst+c, fs, u->work_buffer, sizeof(float),u->R);
        //buffer the rest of them
        memcpy(u->output_buffer[c]+u->n_buffered_output,u->work_buffer+n_outputable,sample_rem*sizeof(float));
        }
-    //u->samplings++;
+        pa_memblock_release(tchunk.memblock);
-    u->n_buffered_output+=sample_rem;
+        pa_memblockq_push(u->rendered_q, &tchunk);
-    u->samples_gathered=0;
+        pa_memblock_unref(tchunk.memblock);
-end:
+        u->samples_gathered-=u->R;
        buffer_missing-=u->R;
    }
    //deque from renderq and output
    //pa_memblockq_set_prebuf(u->rendered_q,samples_requested*fs);
    pa_assert_se(pa_memblockq_peek(u->rendered_q,&tchunk)>=0);
    if(tchunk.length>=nbytes){
        *chunk=tchunk;
        chunk->length=samples_requested*fs;
        pa_memblock_ref(chunk->memblock);
        pa_memblock_unref(tchunk.memblock);
        pa_memblockq_drop(u->rendered_q, chunk->length);
    }else{
        size_t copied=0;
        chunk->length=nbytes;
        chunk->memblock=pa_memblock_new(u->core->mempool,chunk->length);
        uint8_t *dst=(uint8_t*)pa_memblock_acquire(chunk->memblock);
        do{
            size_t l=PA_MIN(tchunk.length-tchunk.index,nbytes-copied);
            uint8_t *src=(((uint8_t*)pa_memblock_acquire(tchunk.memblock))+tchunk.index);
            memmove(dst+copied,src,l);
            copied+=l;
            pa_memblock_release(tchunk.memblock);
            pa_memblock_unref(tchunk.memblock);
            pa_memblockq_drop(u->rendered_q,l);
            if(copied<nbytes){
                if(pa_memblockq_get_length(u->rendered_q)==0){
                    chunk->length=copied;
                    break;
                }
                pa_memblockq_peek(u->rendered_q,&tchunk);
            }
        }while(copied<nbytes);
        pa_memblock_release(chunk->memblock);
    }
    pa_assert_se(chunk->memblock);
    pa_log("output requested %ld, gave %ld",nbytes/fs,chunk->length/fs);
    //pa_log("end pop");
    return 0;
 }
@ -410,20 +420,19 @@ static void sink_input_process_rewind_cb(pa_sink_input *i, size_t nbytes) {
    if (u->sink->thread_info.rewind_nbytes > 0) {
        size_t max_rewrite;
-        max_rewrite = nbytes + pa_memblockq_get_length(u->memblockq);
+        max_rewrite = nbytes + pa_memblockq_get_length(u->rendered_q);
        amount = PA_MIN(u->sink->thread_info.rewind_nbytes, max_rewrite);
        u->sink->thread_info.rewind_nbytes = 0;
        if (amount > 0) {
-            pa_memblockq_seek(u->memblockq, - (int64_t) amount, PA_SEEK_RELATIVE, TRUE);
+            pa_memblockq_seek(u->rendered_q, - (int64_t) amount, PA_SEEK_RELATIVE, TRUE);
            pa_log_debug("Resetting equalizer");
            u->n_buffered_output=0;
            u->samples_gathered=0;
        }
    }
    pa_sink_process_rewind(u->sink, amount);
-    pa_memblockq_rewind(u->memblockq, nbytes);
+    pa_memblockq_rewind(u->rendered_q, nbytes);
 }
 /* Called from I/O thread context */
@ -436,7 +445,7 @@ static void sink_input_update_max_rewind_cb(pa_sink_input *i, size_t nbytes) {
    if (!u->sink || !PA_SINK_IS_LINKED(u->sink->thread_info.state))
        return;
-    pa_memblockq_set_maxrewind(u->memblockq, nbytes);
+    pa_memblockq_set_maxrewind(u->rendered_q, nbytes);
    pa_sink_set_max_rewind_within_thread(u->sink, nbytes);
 }
@ -582,17 +591,20 @@ int pa__init(pa_module*m) {
    u->master = master;
    u->sink = NULL;
    u->sink_input = NULL;
    u->memblockq = pa_memblockq_new(0, MEMBLOCKQ_MAXLENGTH, 0, fs, 1, 1, 0, NULL);
    //u->samplings=0;
    u->channels=ss.channels;
    u->fft_size=pow(2,ceil(log(ss.rate)/log(2)));
    pa_log("fft size: %ld",u->fft_size);
    u->window_size=7999;
    u->R=(u->window_size+1)/2;
    u->overlap_size=u->window_size-u->R;
    u->target_samples=5*u->R;
    u->samples_gathered=0;
    u->n_buffered_output=0;
    u->max_output=pa_frame_align(pa_mempool_block_size_max(m->core->mempool), &ss)/pa_frame_size(&ss);
    u->rendered_q = pa_memblockq_new(0, MEMBLOCKQ_MAXLENGTH,u->target_samples*fs, fs, fs, 0, 0, NULL);
    u->conv_buffer.memblock=pa_memblock_new(u->core->mempool,u->target_samples*fs);
    u->H=(float*) fftwf_malloc((u->fft_size/2+1)*sizeof(float));
    u->W=(float*) fftwf_malloc((u->window_size)*sizeof(float));
    u->work_buffer=(float*) fftwf_malloc(u->fft_size*sizeof(float));
@ -600,9 +612,9 @@ int pa__init(pa_module*m) {
    u->overlap_accum=(float **)malloc(sizeof(float *)*u->channels);
    u->output_buffer=(float **)malloc(sizeof(float *)*u->channels);
    for(size_t c=0;c<u->channels;++c){
-        u->input[c]=(float*) fftwf_malloc(u->window_size*sizeof(float));
+        u->input[c]=(float*) fftwf_malloc((u->target_samples+u->overlap_size)*sizeof(float));
        pa_assert_se(u->input[c]);
-        memset(u->input[c],0,u->window_size*sizeof(float));
+        memset(u->input[c],0,(u->target_samples+u->overlap_size)*sizeof(float));
        pa_assert_se(u->input[c]);
        u->overlap_accum[c]=(float*) fftwf_malloc(u->R*sizeof(float));
        pa_assert_se(u->overlap_accum[c]);
@ -780,8 +792,11 @@ void pa__done(pa_module*m) {
        pa_sink_input_unref(u->sink_input);
    }
-    if (u->memblockq)
+    if(u->conv_buffer.memblock)
-        pa_memblockq_free(u->memblockq);
+        pa_memblock_unref(u->conv_buffer.memblock);
    if (u->rendered_q)
        pa_memblockq_free(u->rendered_q);
    fftwf_destroy_plan(u->inverse_plan);
    fftwf_destroy_plan(u->forward_plan);
--- a/src/pulsecore/memblock.c
+++ b/src/pulsecore/memblock.c
@ -54,7 +54,7 @@
 * stored in SHM and our OS does not commit the memory before we use
 * it for the first time. */
 #define PA_MEMPOOL_SLOTS_MAX 1024
-#define PA_MEMPOOL_SLOT_SIZE (64*1024)
+#define PA_MEMPOOL_SLOT_SIZE (128*1024)
 #define PA_MEMEXPORT_SLOTS_MAX 128