// // Command-list based filter-running code. // // Copyright (c) 2002-2003 Jim Peters . This // file is released under the GNU Lesser General Public License // (LGPL) version 2.1 as published by the Free Software // Foundation. See the file COPYING_LIB for details, or visit // . // // This version of the filter-running code is based on getting // the filter to go as fast as possible with a pre-compiled // routine, but without flattening the filter structure. This // gives greater accuracy than the combined filter. The result // is mostly faster than the combined filter (tested on ix86 with // gcc -O6), except where the combined filter gets a big // advantage from flattening the filter list. This code is also // portable (unlike the JIT option). // typedef struct Run { int magic; // Magic: 0x64966325 int buf_size; // Length of working buffer required in doubles double *coef; // Coefficient list char *cmd; // Command list } Run; typedef struct RunBuf { double *coef; char *cmd; int mov_cnt; // Number of bytes to memmove double buf[1]; // is resized in fid_run_newbuf() } RunBuf; // // Filter processing routine. This is designed to avoid too many // branches, and references are very localized in the code, // keeping the optimizer from trying to store and remember old // values. // // // Step commands: // 0 END // 1 IIR coefficient (1+0) // 2 2x IIR coefficient (2+0) // 3 3x IIR coefficient (3+0) // 4 4Nx IIR coefficient (4N+0) // 5 FIR coefficient (0+1) // 6 2x FIR coefficient (0+2) // 7 3x FIR coefficient (0+3) // 8 4Nx FIR coefficient (0+4N) // 9 IIR+FIR coefficients (1+1) // 10 2x IIR+FIR coefficients (2+2) // 11 3x IIR+FIR coefficients (3+3) // 12 4Nx IIR+FIR coefficients (4N+4N) // 13 End-stage, pure IIR, assume no FIR done at all (1+0) // 14 End-stage with just FIR coeff (0+2) // 15 End-stage with IIR+FIR coeff (1+2) // 16 IIR + pure-IIR endstage (2+0) // 17 FIR + FIR end-stage (0+3) // 18 IIR+FIR + IIR+FIR end-stage (2+3) // 19 Nx (IIR + pure-IIR endstage) (2+0) // 20 Nx (FIR + FIR end-stage) (0+3) // 21 Nx (IIR+FIR + IIR+FIR end-stage) (2+3) // 22 Gain coefficient (0+1) // // Most filters are made up of 2x2 IIR/FIR pairs, which means a // list of command 18 bytes. The other big job would be long FIR // filters. These have to be handled with a list of 7,6,5 // commands, plus a 13 command. // typedef unsigned char uchar; static double filter_step(void *fbuf, double iir) { double *coef= ((RunBuf*)fbuf)->coef; char *cmd= ((RunBuf*)fbuf)->cmd; double *buf= &((RunBuf*)fbuf)->buf[0]; char ch; double fir= 0; double tmp= buf[0]; int cnt; // Using a memmove first is faster on gcc -O6 / ix86 than moving // the values whilst working through the buffers. memmove(buf, buf+1, ((RunBuf*)fbuf)->mov_cnt); #define IIR \ iir -= *coef++ * tmp; \ tmp= *buf++; #define FIR \ fir += *coef++ * tmp; \ tmp= *buf++; #define BOTH \ iir -= *coef++ * tmp; \ fir += *coef++ * tmp; \ tmp= *buf++; #define ENDIIR \ iir -= *coef++ * tmp; \ tmp= *buf++; \ buf[-1]= iir; #define ENDFIR \ fir += *coef++ * tmp; \ tmp= *buf++; \ buf[-1]= iir; \ iir= fir + *coef++ * iir; \ fir= 0 #define ENDBOTH \ iir -= *coef++ * tmp; \ fir += *coef++ * tmp; \ tmp= *buf++; \ buf[-1]= iir; \ iir= fir + *coef++ * iir; \ fir= 0 #define GAIN \ iir *= *coef++ while ((ch= *cmd++)) switch (ch) { case 1: IIR; break; case 2: IIR; IIR; break; case 3: IIR; IIR; IIR; break; case 4: cnt= *cmd++; do { IIR; IIR; IIR; IIR; } while (--cnt > 0); break; case 5: FIR; break; case 6: FIR; FIR; break; case 7: FIR; FIR; FIR; break; case 8: cnt= *cmd++; do { FIR; FIR; FIR; FIR; } while (--cnt > 0); break; case 9: BOTH; break; case 10: BOTH; BOTH; break; case 11: BOTH; BOTH; BOTH; break; case 12: cnt= *cmd++; do { BOTH; BOTH; BOTH; BOTH; } while (--cnt > 0); break; case 13: ENDIIR; break; case 14: ENDFIR; break; case 15: ENDBOTH; break; case 16: IIR; ENDIIR; break; case 17: FIR; ENDFIR; break; case 18: BOTH; ENDBOTH; break; case 19: cnt= *cmd++; do { IIR; ENDIIR; } while (--cnt > 0); break; case 20: cnt= *cmd++; do { FIR; ENDFIR; } while (--cnt > 0); break; case 21: cnt= *cmd++; do { BOTH; ENDBOTH; } while (--cnt > 0); break; case 22: GAIN; break; } #undef IIR #undef FIR #undef BOTH #undef ENDIIR #undef ENDFIR #undef ENDBOTH #undef GAIN return iir; } // // Create an instance of a filter, ready to run. This returns a // void* handle, and a function to call to execute the filter. // Working buffers for the filter instances must be allocated // separately using fid_run_newbuf(). This allows many // simultaneous instances of the filter to be run. // // The sub-filters are executed in the precise order that they // are given. This may lead to some inefficiency. Normally when // an IIR filter is followed by an FIR filter, the buffers can be // shared. However, if the sub-filters are not in IIR/FIR pairs, // then extra memory accesses are required. // // In any case, factors are extracted from IIR filters (so that // the first coefficient is 1), and single-element FIR filters // are merged into the global gain factor, and are ignored. // // The returned handle must be released using fid_run_free(). // void * fid_run_new(FidFilter *filt, double (**funcpp)(void *,double)) { int buf_size= 0; uchar *cp, prev; FidFilter *ff; double *dp; double gain= 1.0; int a; double *coef_tmp; uchar *cmd_tmp; int coef_cnt, coef_max; int cmd_cnt, cmd_max; int filt_cnt= 0; Run *rr; for (ff= filt; ff->len; ff= FFNEXT(ff)) filt_cnt += ff->len; // Allocate worst-case sizes for temporary arrays coef_tmp= ALLOC_ARR(coef_max= filt_cnt + 1, double); cmd_tmp= (uchar*)ALLOC_ARR(cmd_max= filt_cnt + 4, char); dp= coef_tmp; cp= cmd_tmp; prev= 0; // Generate command and coefficient lists while (filt->len) { int n_fir = 0; int n_iir = 0; int cnt; double *iir, *fir; double adj; if (filt->typ == 'F' && filt->len == 1) { gain *= filt->val[0]; filt= FFNEXT(filt); continue; } if (filt->typ == 'F') { iir= 0; fir= filt->val; n_fir= filt->len; filt= FFNEXT(filt); } else if (filt->typ == 'I') { iir= filt->val; n_iir= filt->len; fir= 0; filt= FFNEXT(filt); while (filt->typ == 'F' && filt->len == 1) { gain *= filt->val[0]; filt= FFNEXT(filt); } if (filt->typ == 'F') { fir= filt->val; n_fir= filt->len; filt= FFNEXT(filt); } } else error("Internal error: fid_run_new can only handle IIR + FIR types"); // Okay, we now have an IIR/FIR pair to process, possibly with // n_iir or n_fir == 0 if one half is missing cnt= n_iir > n_fir ? n_iir : n_fir; buf_size += cnt-1; if (n_iir) { adj= 1.0 / iir[0]; gain *= adj; } if (n_fir == 3 && n_iir == 3) { if (prev == 18) { cp[-1]= prev= 21; *cp++= 2; } else if (prev == 21) { cp[-1]++; } else *cp++= prev= 18; *dp++= iir[2]*adj; *dp++= fir[2]; *dp++= iir[1]*adj; *dp++= fir[1]; *dp++= fir[0]; } else if (n_fir == 3 && n_iir == 0) { if (prev == 17) { cp[-1]= prev= 20; *cp++= 2; } else if (prev == 20) { cp[-1]++; } else *cp++= prev= 17; *dp++= fir[2]; *dp++= fir[1]; *dp++= fir[0]; } else if (n_fir == 0 && n_iir == 3) { if (prev == 16) { cp[-1]= prev= 19; *cp++= 2; } else if (prev == 19) { cp[-1]++; } else *cp++= prev= 16; *dp++= iir[2]*adj; *dp++= iir[1]*adj; } else { prev= 0; // Just cancel 'prev' as we only use it for 16-18,19-21 if (cnt > n_fir) { a= 0; while (cnt > n_fir && cnt > 2) { *dp++= iir[--cnt] * adj; a++; } while (a >= 4) { int nn= a/4; if (nn > 255) nn= 255; *cp++= 4; *cp++= nn; a -= nn*4; } if (a) *cp++= a; } if (cnt > n_iir) { a= 0; while (cnt > n_iir && cnt > 2) { *dp++= fir[--cnt]; a++; } while (a >= 4) { int nn= a/4; if (nn > 255) nn= 255; *cp++= 8; *cp++= nn; a -= nn*4; } if (a) *cp++= 4+a; } a= 0; while (cnt > 2) { cnt--; a++; *dp++= iir[cnt]*adj; *dp++= fir[cnt]; } while (a >= 4) { int nn= a/4; if (nn > 255) nn= 255; *cp++= 12; *cp++= nn; a -= nn*4; } if (a) *cp++= 8+a; if (!n_fir) { *cp++= 13; *dp++= iir[1]; } else if (!n_iir) { *cp++= 14; *dp++= fir[1]; *dp++= fir[0]; } else { *cp++= 15; *dp++= iir[1]; *dp++= fir[1]; *dp++= fir[0]; } } } if (gain != 1.0) { *cp++= 22; *dp++= gain; } *cp++= 0; // Sanity checks coef_cnt= dp-coef_tmp; cmd_cnt= cp-cmd_tmp; if (coef_cnt > coef_max || cmd_cnt > cmd_max) error("fid_run_new internal error; arrays exceeded"); // Allocate the final Run structure to return rr= (Run*)Alloc(sizeof(Run) + coef_cnt*sizeof(double) + cmd_cnt*sizeof(char)); rr->magic= 0x64966325; rr->buf_size= buf_size; rr->coef= (double*)(rr+1); rr->cmd= (char*)(rr->coef + coef_cnt); memcpy(rr->coef, coef_tmp, coef_cnt*sizeof(double)); memcpy(rr->cmd, cmd_tmp, cmd_cnt*sizeof(char)); //DEBUG { //DEBUG int a; //DEBUG for (cp= cmd_tmp; *cp; cp++) printf("%d ", *cp); //DEBUG printf("\n"); //DEBUG //for (a= 0; amagic != 0x64966325) error("Bad handle passed to fid_run_newbuf()"); siz= rr->buf_size > 0 ? rr->buf_size - 1 : 0; // Fist element is part of sizeof(RunBuf) rb= (RunBuf*)Alloc(sizeof(RunBuf) + siz * sizeof(double)); rb->coef= rr->coef; rb->cmd= rr->cmd; rb->mov_cnt= siz * sizeof(double); // rb->buf[] already zerod return rb; } // // Find the space required for a filter buffer // int fid_run_bufsize(void *run) { Run *rr= (Run*)run; int siz; if (rr->magic != 0x64966325) error("Bad handle passed to fid_run_bufsize()"); siz= rr->buf_size ? rr->buf_size : 1; // Minimum one element to avoid problems return sizeof(RunBuf) + siz * sizeof(double); } // // Initialise a filter buffer allocated separately. Usually // fid_run_newbuf() is easier, but in the case where filter // buffers must be allocated as part of a larger structure, a // call to fid_run_newbuf can be replaced with a call to // fid_run_bufsize() to get the space required, and then a call // to fid_run_initbuf() once it has been allocated. // void fid_run_initbuf(void *run, void *buf) { Run *rr= (Run*)run; RunBuf *rb= (RunBuf*)buf; int siz; if (rr->magic != 0x64966325) error("Bad handle passed to fid_run_initbuf()"); siz= rr->buf_size ? rr->buf_size : 1; // Minimum one element to avoid problems rb->coef= rr->coef; rb->cmd= rr->cmd; rb->mov_cnt= (siz-1) * sizeof(double); memset(rb->buf, 0, rb->mov_cnt + sizeof(double)); } // // Reinitialise an instance of the filter, allowing it to start // afresh. It assumes that the buffer was correctly initialised // previously, either through a call to fid_run_newbuf() or // fid_run_initbuf(). // void fid_run_zapbuf(void *buf) { RunBuf *rb= (RunBuf*)buf; memset(rb->buf, 0, rb->mov_cnt + sizeof(double)); } // // Delete an instance // void fid_run_freebuf(void *runbuf) { free(runbuf); } // // Delete the filter // void fid_run_free(void *run) { free(run); } // END //