1 /********************************************************************
3 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 *
9 * by the XIPHOPHORUS Company http://www.xiph.org/ *
11 ********************************************************************
13 function: *unnormalized* fft transform
14 last mod: $Id: smallft.c,v 1.19 2003/10/08 05:12:37 jm Exp $
16 ********************************************************************/
18 /* FFT implementation from OggSquish, minus cosine transforms,
19 * minus all but radix 2/4 case. In Vorbis we only need this
22 * To do more than just power-of-two sized vectors, see the full
23 * version I wrote for NetLib.
25 * Note that the packing is a little strange; rather than the FFT r/i
26 * packing following R_0, I_n, R_1, I_1, R_2, I_2 ... R_n-1, I_n-1,
27 * it follows R_0, R_1, I_1, R_2, I_2 ... R_n-1, I_n-1, I_n like the
38 #include "os_support.h"
40 static void drfti1(int n, float *wa, int *ifac){
41 static int ntryh[4] = { 4,2,3,5 };
42 static float tpi = 6.28318530717958648f;
43 float arg,argh,argld,fi;
46 int ld, ii, ip, is, nq, nr;
86 for (k1=0;k1<nfm1;k1++){
98 for (ii=2;ii<ido;ii+=2){
110 static void fdrffti(int n, float *wsave, int *ifac){
113 drfti1(n, wsave+n, ifac);
116 static void dradf2(int ido,int l1,float *cc,float *ch,float *wa1){
119 int t0,t1,t2,t3,t4,t5,t6;
125 ch[t1<<1]=cc[t1]+cc[t2];
126 ch[(t1<<1)+t3-1]=cc[t1]-cc[t2];
146 tr2=wa1[i-2]*cc[t3-1]+wa1[i-1]*cc[t3];
147 ti2=wa1[i-2]*cc[t3]-wa1[i-1]*cc[t3-1];
150 ch[t6-1]=cc[t5-1]+tr2;
151 ch[t4-1]=cc[t5-1]-tr2;
171 static void dradf4(int ido,int l1,float *cc,float *ch,float *wa1,
172 float *wa2,float *wa3){
173 static float hsqt2 = .70710678118654752f;
174 int i,k,t0,t1,t2,t3,t4,t5,t6;
175 float ci2,ci3,ci4,cr2,cr3,cr4,ti1,ti2,ti3,ti4,tr1,tr2,tr3,tr4;
187 ch[t5=t3<<2]=tr1+tr2;
188 ch[(ido<<2)+t5-1]=tr2-tr1;
189 ch[(t5+=(ido<<1))-1]=cc[t3]-cc[t4];
190 ch[t5]=cc[t2]-cc[t1];
213 cr2=wa1[i-2]*cc[t3-1]+wa1[i-1]*cc[t3];
214 ci2=wa1[i-2]*cc[t3]-wa1[i-1]*cc[t3-1];
216 cr3=wa2[i-2]*cc[t3-1]+wa2[i-1]*cc[t3];
217 ci3=wa2[i-2]*cc[t3]-wa2[i-1]*cc[t3-1];
219 cr4=wa3[i-2]*cc[t3-1]+wa3[i-1]*cc[t3];
220 ci4=wa3[i-2]*cc[t3]-wa3[i-1]*cc[t3-1];
250 t2=(t1=t0+ido-1)+(t0<<1);
257 ti1=-hsqt2*(cc[t1]+cc[t2]);
258 tr1=hsqt2*(cc[t1]-cc[t2]);
260 ch[t4-1]=tr1+cc[t6-1];
261 ch[t4+t5-1]=cc[t6-1]-tr1;
263 ch[t4]=ti1-cc[t1+t0];
264 ch[t4+t5]=ti1+cc[t1+t0];
273 static void dradfg(int ido,int ip,int l1,int idl1,float *cc,float *c1,
274 float *c2,float *ch,float *ch2,float *wa){
276 static float tpi=6.283185307179586f;
277 int idij,ipph,i,j,k,l,ic,ik,is;
278 int t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
279 float dc2,ai1,ai2,ar1,ar2,ds2;
281 float dcp,arg,dsp,ar1h,ar2h;
295 for(ik=0;ik<idl1;ik++)ch2[ik]=c2[ik];
321 ch[t3-1]=wa[idij-1]*c1[t3-1]+wa[idij]*c1[t3];
322 ch[t3]=wa[idij-1]*c1[t3]-wa[idij]*c1[t3-1];
338 ch[t3-1]=wa[idij-1]*c1[t3-1]+wa[idij]*c1[t3];
339 ch[t3]=wa[idij-1]*c1[t3]-wa[idij]*c1[t3-1];
362 c1[t5-1]=ch[t5-1]+ch[t6-1];
363 c1[t6-1]=ch[t5]-ch[t6];
364 c1[t5]=ch[t5]+ch[t6];
365 c1[t6]=ch[t6-1]-ch[t5-1];
381 c1[t5-1]=ch[t5-1]+ch[t6-1];
382 c1[t6-1]=ch[t5]-ch[t6];
383 c1[t5]=ch[t5]+ch[t6];
384 c1[t6]=ch[t6-1]-ch[t5-1];
393 for(ik=0;ik<idl1;ik++)c2[ik]=ch2[ik];
405 c1[t3]=ch[t3]+ch[t4];
406 c1[t4]=ch[t4]-ch[t3];
418 ar1h=dcp*ar1-dsp*ai1;
426 for(ik=0;ik<idl1;ik++){
427 ch2[t4++]=c2[ik]+ar1*c2[t7++];
428 ch2[t5++]=ai1*c2[t6++];
442 ar2h=dc2*ar2-ds2*ai2;
450 for(ik=0;ik<idl1;ik++){
451 ch2[t6++]+=ar2*c2[t8++];
452 ch2[t7++]+=ai2*c2[t9++];
461 for(ik=0;ik<idl1;ik++)ch2[ik]+=c2[t2++];
471 for(i=0;i<ido;i++)cc[t4++]=ch[t3++];
532 cc[i+t7-1]=ch[i+t8-1]+ch[i+t9-1];
533 cc[ic+t6-1]=ch[i+t8-1]-ch[i+t9-1];
534 cc[i+t7]=ch[i+t8]+ch[i+t9];
535 cc[ic+t6]=ch[i+t9]-ch[i+t8];
562 cc[t7-1]=ch[t8-1]+ch[t9-1];
563 cc[t6-1]=ch[t8-1]-ch[t9-1];
564 cc[t7]=ch[t8]+ch[t9];
565 cc[t6]=ch[t9]-ch[t8];
575 static void drftf1(int n,float *c,float *ch,float *wa,int *ifac){
578 int ip,iw,ido,idl1,ix2,ix3;
585 for(k1=0;k1<nf;k1++){
599 dradf4(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1);
601 dradf4(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1);
608 dradf2(ido,l1,c,ch,wa+iw-1);
612 dradf2(ido,l1,ch,c,wa+iw-1);
619 dradfg(ido,ip,l1,idl1,c,c,c,ch,ch,wa+iw-1);
624 dradfg(ido,ip,l1,idl1,ch,ch,ch,c,c,wa+iw-1);
633 for(i=0;i<n;i++)c[i]=ch[i];
636 static void dradb2(int ido,int l1,float *cc,float *ch,float *wa1){
637 int i,k,t0,t1,t2,t3,t4,t5,t6;
646 ch[t1]=cc[t2]+cc[t3+t2];
647 ch[t1+t0]=cc[t2]-cc[t3+t2];
665 ch[t3-1]=cc[t4-1]+cc[t5-1];
666 tr2=cc[t4-1]-cc[t5-1];
667 ch[t3]=cc[t4]-cc[t5];
669 ch[t6-1]=wa1[i-2]*tr2-wa1[i-1]*ti2;
670 ch[t6]=wa1[i-2]*ti2+wa1[i-1]*tr2;
681 ch[t1]=cc[t2]+cc[t2];
682 ch[t1+t0]=-(cc[t2+1]+cc[t2+1]);
688 static void dradb3(int ido,int l1,float *cc,float *ch,float *wa1,
690 static float taur = -.5f;
691 static float taui = .8660254037844386f;
692 int i,k,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
693 float ci2,ci3,di2,di3,cr2,cr3,dr2,dr3,ti2,tr2;
702 tr2=cc[t3-1]+cc[t3-1];
703 cr2=cc[t5]+(taur*tr2);
705 ci3=taui*(cc[t3]+cc[t3]);
730 tr2=cc[t5-1]+cc[t6-1];
731 cr2=cc[t7-1]+(taur*tr2);
732 ch[t8-1]=cc[t7-1]+tr2;
734 ci2=cc[t7]+(taur*ti2);
736 cr3=taui*(cc[t5-1]-cc[t6-1]);
737 ci3=taui*(cc[t5]+cc[t6]);
742 ch[t9-1]=wa1[i-2]*dr2-wa1[i-1]*di2;
743 ch[t9]=wa1[i-2]*di2+wa1[i-1]*dr2;
744 ch[t10-1]=wa2[i-2]*dr3-wa2[i-1]*di3;
745 ch[t10]=wa2[i-2]*di3+wa2[i-1]*dr3;
751 static void dradb4(int ido,int l1,float *cc,float *ch,float *wa1,
752 float *wa2,float *wa3){
753 static float sqrt2=1.414213562373095f;
754 int i,k,t0,t1,t2,t3,t4,t5,t6,t7,t8;
755 float ci2,ci3,ci4,cr2,cr3,cr4,ti1,ti2,ti3,ti4,tr1,tr2,tr3,tr4;
765 tr3=cc[t4-1]+cc[t4-1];
767 tr1=cc[t3]-cc[(t4+=t6)-1];
782 t5=(t4=(t3=(t2=t1<<2)+t6))+t6;
794 tr1=cc[t2-1]-cc[t5-1];
795 tr2=cc[t2-1]+cc[t5-1];
796 ti4=cc[t3-1]-cc[t4-1];
797 tr3=cc[t3-1]+cc[t4-1];
807 ch[(t8=t7+t0)-1]=wa1[i-2]*cr2-wa1[i-1]*ci2;
808 ch[t8]=wa1[i-2]*ci2+wa1[i-1]*cr2;
809 ch[(t8+=t0)-1]=wa2[i-2]*cr3-wa2[i-1]*ci3;
810 ch[t8]=wa2[i-2]*ci3+wa2[i-1]*cr3;
811 ch[(t8+=t0)-1]=wa3[i-2]*cr4-wa3[i-1]*ci4;
812 ch[t8]=wa3[i-2]*ci4+wa3[i-1]*cr4;
817 if(ido%2 == 1)return;
829 tr1=cc[t1-1]-cc[t4-1];
830 tr2=cc[t1-1]+cc[t4-1];
832 ch[t5+=t0]=sqrt2*(tr1-ti1);
834 ch[t5+=t0]=-sqrt2*(tr1+ti1);
842 static void dradbg(int ido,int ip,int l1,int idl1,float *cc,float *c1,
843 float *c2,float *ch,float *ch2,float *wa){
844 static float tpi=6.283185307179586f;
845 int idij,ipph,i,j,k,l,ik,is,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,
847 float dc2,ai1,ai2,ar1,ar2,ds2;
849 float dcp,arg,dsp,ar1h,ar2h;
901 ch[t3]=cc[t6-1]+cc[t6-1];
902 ch[t4]=cc[t6]+cc[t6];
910 if (ido == 1)goto L116;
934 ch[t5-1]=cc[t9-1]+cc[t11-1];
935 ch[t6-1]=cc[t9-1]-cc[t11-1];
936 ch[t5]=cc[t9]-cc[t11];
937 ch[t6]=cc[t9]+cc[t11];
968 ch[t5-1]=cc[t11-1]+cc[t12-1];
969 ch[t6-1]=cc[t11-1]-cc[t12-1];
970 ch[t5]=cc[t11]-cc[t12];
971 ch[t6]=cc[t11]+cc[t12];
990 ar1h=dcp*ar1-dsp*ai1;
998 for(ik=0;ik<idl1;ik++){
999 c2[t4++]=ch2[t6++]+ar1*ch2[t7++];
1000 c2[t5++]=ai1*ch2[t8++];
1009 for(j=2;j<ipph;j++){
1012 ar2h=dc2*ar2-ds2*ai2;
1013 ai2=dc2*ai2+ds2*ar2;
1019 for(ik=0;ik<idl1;ik++){
1020 c2[t4++]+=ar2*ch2[t11++];
1021 c2[t5++]+=ai2*ch2[t12++];
1027 for(j=1;j<ipph;j++){
1030 for(ik=0;ik<idl1;ik++)ch2[ik]+=ch2[t2++];
1035 for(j=1;j<ipph;j++){
1041 ch[t3]=c1[t3]-c1[t4];
1042 ch[t4]=c1[t3]+c1[t4];
1048 if(ido==1)goto L132;
1049 if(nbd<l1)goto L128;
1053 for(j=1;j<ipph;j++){
1061 for(i=2;i<ido;i+=2){
1064 ch[t5-1]=c1[t5-1]-c1[t6];
1065 ch[t6-1]=c1[t5-1]+c1[t6];
1066 ch[t5]=c1[t5]+c1[t6-1];
1067 ch[t6]=c1[t5]-c1[t6-1];
1078 for(j=1;j<ipph;j++){
1083 for(i=2;i<ido;i+=2){
1089 ch[t5-1]=c1[t5-1]-c1[t6];
1090 ch[t6-1]=c1[t5-1]+c1[t6];
1091 ch[t5]=c1[t5]+c1[t6-1];
1092 ch[t6]=c1[t5]-c1[t6-1];
1102 for(ik=0;ik<idl1;ik++)c2[ik]=ch2[ik];
1113 if(nbd>l1)goto L139;
1122 for(i=2;i<ido;i+=2){
1127 c1[t3-1]=wa[idij-1]*ch[t3-1]-wa[idij]*ch[t3];
1128 c1[t3]=wa[idij-1]*ch[t3]+wa[idij]*ch[t3-1];
1145 for(i=2;i<ido;i+=2){
1148 c1[t3-1]=wa[idij-1]*ch[t3-1]-wa[idij]*ch[t3];
1149 c1[t3]=wa[idij-1]*ch[t3]+wa[idij]*ch[t3-1];
1156 static void drftb1(int n, float *c, float *ch, float *wa, int *ifac){
1159 int nf,ip,iw,ix2,ix3,ido,idl1;
1166 for(k1=0;k1<nf;k1++){
1176 dradb4(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1);
1178 dradb4(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1);
1186 dradb2(ido,l1,ch,c,wa+iw-1);
1188 dradb2(ido,l1,c,ch,wa+iw-1);
1197 dradb3(ido,l1,ch,c,wa+iw-1,wa+ix2-1);
1199 dradb3(ido,l1,c,ch,wa+iw-1,wa+ix2-1);
1204 /* The radix five case can be translated later..... */
1205 /* if(ip!=5)goto L112;
1211 dradb5(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1,wa+ix4-1);
1213 dradb5(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1,wa+ix4-1);
1219 dradbg(ido,ip,l1,idl1,ch,ch,ch,c,c,wa+iw-1);
1221 dradbg(ido,ip,l1,idl1,c,c,c,ch,ch,wa+iw-1);
1231 for(i=0;i<n;i++)c[i]=ch[i];
1234 void spx_drft_forward(struct drft_lookup *l,float *data){
1236 drftf1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
1239 void spx_drft_backward(struct drft_lookup *l,float *data){
1241 drftb1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
1244 void spx_drft_init(struct drft_lookup *l,int n)
1247 l->trigcache=(float*)speex_alloc(3*n*sizeof(*l->trigcache));
1248 l->splitcache=(int*)speex_alloc(32*sizeof(*l->splitcache));
1249 fdrffti(n, l->trigcache, l->splitcache);
1252 void spx_drft_clear(struct drft_lookup *l)
1257 speex_free(l->trigcache);
1259 speex_free(l->splitcache);