2 Copyright (c) 2003-2004, Mark Borgerding
3 Copyright (c) 2005-2007, Jean-Marc Valin
7 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
9 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
11 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
13 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
21 #include "_kiss_fft_guts.h"
23 #include "os_support.h"
25 /* The guts header contains all the multiplication and addition macros that are defined for
26 fixed or floating point complex numbers. It also delares the kf_ internal functions.
32 const kiss_fft_cfg st,
43 kiss_fft_cpx * Fout_beg = Fout;
46 Fout = Fout_beg + i*mm;
51 /* Almost the same as the code path below, except that we divide the input by two
52 (while keeping the best accuracy possible) */
54 tr = SHR32(SUB32(MULT16_16(Fout2->r , tw1->r),MULT16_16(Fout2->i , tw1->i)), 1);
55 ti = SHR32(ADD32(MULT16_16(Fout2->i , tw1->r),MULT16_16(Fout2->r , tw1->i)), 1);
57 Fout2->r = PSHR32(SUB32(SHL32(EXTEND32(Fout->r), 14), tr), 15);
58 Fout2->i = PSHR32(SUB32(SHL32(EXTEND32(Fout->i), 14), ti), 15);
59 Fout->r = PSHR32(ADD32(SHL32(EXTEND32(Fout->r), 14), tr), 15);
60 Fout->i = PSHR32(ADD32(SHL32(EXTEND32(Fout->i), 14), ti), 15);
67 kiss_fft_cpx * Fout_beg = Fout;
70 Fout = Fout_beg + i*mm;
75 C_MUL (t, *Fout2 , *tw1);
77 C_SUB( *Fout2 , *Fout , t );
89 const kiss_fft_cfg st,
95 kiss_fft_cpx *tw1,*tw2,*tw3;
96 kiss_fft_cpx scratch[6];
103 kiss_fft_cpx * Fout_beg = Fout;
106 Fout = Fout_beg + i*mm;
107 tw3 = tw2 = tw1 = st->twiddles;
110 C_MUL(scratch[0],Fout[m] , *tw1 );
111 C_MUL(scratch[1],Fout[m2] , *tw2 );
112 C_MUL(scratch[2],Fout[m3] , *tw3 );
114 C_SUB( scratch[5] , *Fout, scratch[1] );
115 C_ADDTO(*Fout, scratch[1]);
116 C_ADD( scratch[3] , scratch[0] , scratch[2] );
117 C_SUB( scratch[4] , scratch[0] , scratch[2] );
118 C_SUB( Fout[m2], *Fout, scratch[3] );
122 C_ADDTO( *Fout , scratch[3] );
124 Fout[m].r = scratch[5].r - scratch[4].i;
125 Fout[m].i = scratch[5].i + scratch[4].r;
126 Fout[m3].r = scratch[5].r + scratch[4].i;
127 Fout[m3].i = scratch[5].i - scratch[4].r;
133 kiss_fft_cpx * Fout_beg = Fout;
136 Fout = Fout_beg + i*mm;
137 tw3 = tw2 = tw1 = st->twiddles;
140 C_MUL4(scratch[0],Fout[m] , *tw1 );
141 C_MUL4(scratch[1],Fout[m2] , *tw2 );
142 C_MUL4(scratch[2],Fout[m3] , *tw3 );
144 Fout->r = PSHR16(Fout->r, 2);
145 Fout->i = PSHR16(Fout->i, 2);
146 C_SUB( scratch[5] , *Fout, scratch[1] );
147 C_ADDTO(*Fout, scratch[1]);
148 C_ADD( scratch[3] , scratch[0] , scratch[2] );
149 C_SUB( scratch[4] , scratch[0] , scratch[2] );
150 Fout[m2].r = PSHR16(Fout[m2].r, 2);
151 Fout[m2].i = PSHR16(Fout[m2].i, 2);
152 C_SUB( Fout[m2], *Fout, scratch[3] );
156 C_ADDTO( *Fout , scratch[3] );
158 Fout[m].r = scratch[5].r + scratch[4].i;
159 Fout[m].i = scratch[5].i - scratch[4].r;
160 Fout[m3].r = scratch[5].r - scratch[4].i;
161 Fout[m3].i = scratch[5].i + scratch[4].r;
168 static void kf_bfly3(
170 const size_t fstride,
171 const kiss_fft_cfg st,
176 const size_t m2 = 2*m;
177 kiss_fft_cpx *tw1,*tw2;
178 kiss_fft_cpx scratch[5];
180 epi3 = st->twiddles[fstride*m];
182 tw1=tw2=st->twiddles;
186 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
189 C_MUL(scratch[1],Fout[m] , *tw1);
190 C_MUL(scratch[2],Fout[m2] , *tw2);
192 C_ADD(scratch[3],scratch[1],scratch[2]);
193 C_SUB(scratch[0],scratch[1],scratch[2]);
197 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
198 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
200 C_MULBYSCALAR( scratch[0] , epi3.i );
202 C_ADDTO(*Fout,scratch[3]);
204 Fout[m2].r = Fout[m].r + scratch[0].i;
205 Fout[m2].i = Fout[m].i - scratch[0].r;
207 Fout[m].r -= scratch[0].i;
208 Fout[m].i += scratch[0].r;
214 static void kf_bfly5(
216 const size_t fstride,
217 const kiss_fft_cfg st,
221 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
223 kiss_fft_cpx scratch[13];
224 kiss_fft_cpx * twiddles = st->twiddles;
227 ya = twiddles[fstride*m];
228 yb = twiddles[fstride*2*m];
237 for ( u=0; u<m; ++u ) {
239 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
243 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
244 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
245 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
246 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
248 C_ADD( scratch[7],scratch[1],scratch[4]);
249 C_SUB( scratch[10],scratch[1],scratch[4]);
250 C_ADD( scratch[8],scratch[2],scratch[3]);
251 C_SUB( scratch[9],scratch[2],scratch[3]);
253 Fout0->r += scratch[7].r + scratch[8].r;
254 Fout0->i += scratch[7].i + scratch[8].i;
256 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
257 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
259 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
260 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
262 C_SUB(*Fout1,scratch[5],scratch[6]);
263 C_ADD(*Fout4,scratch[5],scratch[6]);
265 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
266 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
267 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
268 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
270 C_ADD(*Fout2,scratch[11],scratch[12]);
271 C_SUB(*Fout3,scratch[11],scratch[12]);
273 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
277 /* perform the butterfly for one stage of a mixed radix FFT */
278 static void kf_bfly_generic(
280 const size_t fstride,
281 const kiss_fft_cfg st,
287 kiss_fft_cpx * twiddles = st->twiddles;
289 kiss_fft_cpx scratchbuf[17];
290 int Norig = st->nfft;
292 /*CHECKBUF(scratchbuf,nscratchbuf,p);*/
294 speex_fatal("KissFFT: max radix supported is 17");
296 for ( u=0; u<m; ++u ) {
298 for ( q1=0 ; q1<p ; ++q1 ) {
299 scratchbuf[q1] = Fout[ k ];
301 C_FIXDIV(scratchbuf[q1],p);
307 for ( q1=0 ; q1<p ; ++q1 ) {
309 Fout[ k ] = scratchbuf[0];
311 twidx += fstride * k;
312 if (twidx>=Norig) twidx-=Norig;
313 C_MUL(t,scratchbuf[q] , twiddles[twidx] );
314 C_ADDTO( Fout[ k ] ,t);
324 const kiss_fft_cpx * f,
325 const size_t fstride,
328 const kiss_fft_cfg st
331 const int p=*factors++; /* the radix */
332 const int m=*factors++; /* stage's fft length/p */
334 /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
341 f += fstride*in_stride;
347 kf_shuffle( Fout , f, fstride*p, in_stride, factors,st);
348 f += fstride*in_stride;
357 const kiss_fft_cpx * f,
358 const size_t fstride,
361 const kiss_fft_cfg st,
368 kiss_fft_cpx * Fout_beg=Fout;
369 const int p=*factors++; /* the radix */
370 const int m=*factors++; /* stage's fft length/p */
372 /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
379 f += fstride*in_stride;
385 kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m);
386 f += fstride*in_stride;
394 case 2: kf_bfly2(Fout,fstride,st,m); break;
395 case 3: kf_bfly3(Fout,fstride,st,m); break;
396 case 4: kf_bfly4(Fout,fstride,st,m); break;
397 case 5: kf_bfly5(Fout,fstride,st,m); break;
398 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
401 /*printf ("fft %d %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N, m2);*/
407 Fout = Fout_beg+i*m2;
408 const kiss_fft_cpx * f2 = f+i*s2;
412 f2 += fstride*in_stride;
416 kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m);
423 case 2: kf_bfly2(Fout,fstride,st,m, N, m2); break;
424 case 3: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly3(Fout,fstride,st,m);} break;
425 case 4: kf_bfly4(Fout,fstride,st,m, N, m2); break;
426 case 5: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly5(Fout,fstride,st,m);} break;
427 default: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly_generic(Fout,fstride,st,m,p);} break;
432 /* facbuf is populated by p1,m1,p2,m2, ...
437 void kf_factor(int n,int * facbuf)
441 /*factor out powers of 4, powers of 2, then any remaining primes */
445 case 4: p = 2; break;
446 case 2: p = 3; break;
447 default: p += 2; break;
449 if (p>32000 || (spx_int32_t)p*(spx_int32_t)p > n)
450 p = n; /* no more factors, skip to end */
459 * User-callable function to allocate all necessary storage space for the fft.
461 * The return value is a contiguous block of memory, allocated with malloc. As such,
462 * It can be freed with free(), rather than a kiss_fft-specific function.
464 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
466 kiss_fft_cfg st=NULL;
467 size_t memneeded = sizeof(struct kiss_fft_state)
468 + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
470 if ( lenmem==NULL ) {
471 st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
473 if (mem != NULL && *lenmem >= memneeded)
474 st = (kiss_fft_cfg)mem;
480 st->inverse = inverse_fft;
482 for (i=0;i<nfft;++i) {
483 spx_word32_t phase = i;
486 kf_cexp2(st->twiddles+i, DIV32(SHL32(phase,17),nfft));
489 for (i=0;i<nfft;++i) {
490 const double pi=3.14159265358979323846264338327;
491 double phase = ( -2*pi /nfft ) * i;
494 kf_cexp(st->twiddles+i, phase );
497 kf_factor(nfft,st->factors);
505 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
509 speex_fatal("In-place FFT not supported");
510 /*CHECKBUF(tmpbuf,ntmpbuf,st->nfft);
511 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
512 SPEEX_MOVE(fout,tmpbuf,st->nfft);*/
514 kf_shuffle( fout, fin, 1,in_stride, st->factors,st);
515 kf_work( fout, fin, 1,in_stride, st->factors,st, 1, in_stride, 1);
519 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
521 kiss_fft_stride(cfg,fin,fout,1);