Newsgroups: sci.math.num-analysis,sci.math,comp.lang.c,comp.lang.c++,comp.dsp
Path: usc!howland.reston.ans.net!newsserver.jvnc.net!darwin.sura.net!sgiblab!sono!mayer
From: mayer@acuson.com (Ron Mayer)
Subject: another SUMMARY: FFT in C (Best?,Fastest?,probably not) (part 2 of 2)
Message-ID: <1993Mar19.034348.16124@acuson.com>
Keywords: My code
Lines: 900
Reply-To: mayer@acuson.com ()
Organization: ACUSON, Mountain View, CA
References: <pm860605.125.732416789@longs.LANCE.ColoState.Edu>
Date: Fri, 19 Mar 1993 03:43:48 GMT
Xref: usc sci.math.num-analysis:7476 sci.math:41880 comp.lang.c:67195 comp.lang.c++:39620 comp.dsp:5667


This is a shar file containing the FFT routines described in my
previous posting.  (My own code, and a Duhamel-Hollman FFT for comparison.)

I would really really appreciate it if someone would tell me if anyone
finds errors in my code or if it fails to compile.  I would also be
interested in timing stats on various different architectures.

    Thanks,
    Ron Mayer
    mayer@acuson.com


#! /bin/sh
# To unbundle, remove everything above this line and "sh" the file
echo Makefile 1>&2
cat <<'========= End of Makefile =========' | sed 's/^>//' >Makefile
>all: time_duhamel time_mayer time_r_mayer test_mayer test_duhamel
>
>
>
>time_duhamel: time_duhamel.c fft_duhamel.c
>	cc -O4 -o time_duhamel  time_duhamel.c fft_duhamel.c -lm
>
>time_mayer: time_mayer.c fft_mayer.c
>	cc -O4 -o time_mayer  time_mayer.c fft_mayer.c
>
>time_r_mayer: time_r_mayer.c fft_mayer.c
>	cc -O4 -o time_r_mayer  time_r_mayer.c fft_mayer.c
>
>
>
>test_duhamel: test_duhamel.c fft_duhamel.c
>	cc -O4 -o test_duhamel  test_duhamel.c fft_duhamel.c -lm
>
>test_mayer: test_mayer.c fft_mayer.c
>	cc -O4 -o test_mayer  test_mayer.c fft_mayer.c
>
========= End of Makefile =========
echo README 1>&2
cat <<'========= End of README =========' | sed 's/^>//' >README
>
>This archive includes the following files
>    fft_mayer.c    = my fft routines
>    trigtbl.h      = A fast trig function generator
>    time_mayer.c   = a test program for my fft/ifft
>    time_r_mayer.c = a test program for my real valued fft/ifft
>
>    fft_duhamel.c  = "a duhamel-holman split radix dif fft"
>    time_duhamel.c = a test program the above fft
>
>    test_mayer.c   = tests the fft
>    test_duhamel.c = tests the fft
>
>    summary.sparc  = results of many different ffts using gcc on a sparc
>    compare_ffts   = script to run time_* for a variety of sizes
>
>    Makefile       = creates the test programs
>
>The test programs created are
>    time_mayer   [optional-number-of-points]
>    time_duhamel [optional-number-of-points]
>which should run each fft and ifft a bunch of times and display the results
>and
>    test_mayer
>    test_duhamal
>which should output the same results and confirm that both work correctly.
>
>
>
>Please see fft_mayer.c for copyright info.
>
========= End of README =========
echo compare_ffts 1>&2
cat <<'========= End of compare_ffts =========' | sed 's/^>//' >compare_ffts
>#!/bin/sh
>
>time_duhamel 4
>time_mayer   4
>time_r_mayer 4
>
>time_duhamel 128
>time_mayer   128
>time_r_mayer 128
>
>time_duhamel 32768
>time_mayer   32768
>time_r_mayer 32768
========= End of compare_ffts =========
echo fft_duhamel.c 1>&2
cat <<'========= End of fft_duhamel.c =========' | sed 's/^>//' >fft_duhamel.c
>/*
>** by: Dave Edelblute, edelblut@cod.nosc.mil, 05 Jan 1993
>** Modified: R. Mayer to work with my benchmark routines.
>*/
>
>#include <stdio.h>
>#include <math.h>
>
>#ifndef PI
>#define PI  3.14159265358979323846
>#endif
>
>/*
>       A Duhamel-Hollman split-radix dif fft
>       Ref: Electronics Letters, Jan. 5, 1984
>       Complex input and output data in arrays x and y
>       Length is n.
>*/
>
>int fft( x, y, np )
>double x[2]; 
>double y[2]; 
>int np ;
>{
>	double *px,*py;
>	int i,j,k,m,n,i0,i1,i2,i3,is,id,n1,n2,n4 ;
>	double  a,e,a3,cc1,ss1,cc3,ss3,r1,r2,s1,s2,s3,xt ;
>	px = x - 1; 
>	py = y - 1;
>	i = 2; 
>	m = 1; 
>	while (i < np) { 
>		i = i+i; 
>		m = m+1; 
>	};
>	n = i; 
>	if (n != np) {
>		for (i = np+1; i <= n; i++)  { 
>			*(px + i) = 0.0; 
>			*(py + i) = 0.0; 
>		};
>		printf("\nuse %d point fft",n); 
>	}
>
>	n2 = n+n;
>	for (k = 1;  k <= m-1; k++ ) {
>		n2 = n2 / 2; 
>		n4 = n2 / 4; 
>		e = 2.0 * PI / n2; 
>		a = 0.0;
>		for (j = 1; j<= n4 ; j++) {
>			a3 = 3.0*a; 
>			cc1 = cos(a); 
>			ss1 = sin(a);
>			cc3 = cos(a3); 
>			ss3 = sin(a3); 
>			a = j*e; 
>			is = j; 
>			id = 2*n2;
>			while ( is < n ) {
>				for (i0 = is; i0 <= n-1; i0 = i0 + id) {
>					i1 = i0 + n4; 
>					i2 = i1 + n4; 
>					i3 = i2 + n4;
>					r1 = *(px+i0) - *(px+i2);
>					*(px+i0) = *(px+i0) + *(px+i2);
>					r2 = *(px+i1) - *(px+i3);
>					*(px+i1) = *(px+i1) + *(px+i3);
>					s1 = *(py+i0) - *(py+i2);
>					*(py+i0) = *(py+i0) + *(py+i2);
>					s2 = *(py+i1) - *(py+i3);
>					*(py+i1) = *(py+i1) + *(py+i3);
>					s3 = r1 - s2; 
>					r1 = r1 + s2; 
>					s2 = r2 - s1; 
>					r2 = r2 + s1;
>					*(px+i2) = r1*cc1 - s2*ss1;
>					*(py+i2) = -s2*cc1 - r1*ss1;
>					*(px+i3) = s3*cc3 + r2*ss3;
>					*(py+i3) = r2*cc3 - s3*ss3;
>				}
>				is = 2*id - n2 + j; 
>				id = 4*id;
>			}
>		}
>	}
>
>/*
>---------------------Last stage, length=2 butterfly---------------------
>*/
>	is = 1; 
>	id = 4;
>	while ( is < n) {
>		for (i0 = is; i0 <= n; i0 = i0 + id) {
>			i1 = i0 + 1; 
>			r1 = *(px+i0);
>			*(px+i0) = r1 + *(px+i1);
>			*(px+i1) = r1 - *(px+i1);
>			r1 = *(py+i0);
>			*(py+i0) = r1 + *(py+i1);
>			*(py+i1) = r1 - *(py+i1);
>		}
>		is = 2*id - 1; 
>		id = 4 * id; 
>	}
>
>/*
>--------------------------Bit reverse counter
>*/
>	j = 1; 
>	n1 = n - 1;
>	for (i = 1; i <= n1; i++) {
>		if (i < j) {
>			xt = *(px+j);
>			*(px+j) = *(px+i); 
>			*(px+i) = xt;
>			xt = *(py+j); 
>			*(py+j) = *(py+i);
>			*(py+i) = xt;
>		}
>		k = n / 2; 
>		while (k < j) { 
>			j = j - k; 
>			k = k / 2; 
>		}
>		j = j + k;
>	}
>
>	/*
>  for (i = 1; i<=16; i++) printf("%d  %g   %gn",i,*(px+i),(py+i));
>*/
>
>	return(n);
>
>}
========= End of fft_duhamel.c =========
echo fft_mayer.c 1>&2
cat <<'========= End of fft_mayer.c =========' | sed 's/^>//' >fft_mayer.c
>/*
>** FFT and FHT routines
>**  Copyright 1988, 1993; Ron Mayer
>**  
>**  fht(fz,n);
>**      Does a hartley transform of "n" points in the array "fz".
>**  fft(n,real,imag)
>**      Does a fourier transform of "n" points of the "real" and
>**      "imag" arrays.
>**  ifft(n,real,imag)
>**      Does an inverse fourier transform of "n" points of the "real"
>**      and "imag" arrays.
>**  realfft(n,real)
>**      Does a real-valued fourier transform of "n" points of the
>**      "real" and "imag" arrays.  The real part of the transform ends
>**      up in the first half of the array and the imaginary part of the
>**      transform ends up in the second half of the array.
>**  realifft(n,real)
>**      The inverse of the realfft() routine above.
>**      
>**      
>** NOTE: This routine uses at least 2 patented algorithms, and may be
>**       under the restrictions of a bunch of different organizations.
>**       Although I wrote it completely myself; it is kind of a derivative
>**       of a routine I once authored and released under the GPL, so it
>**       may fall under the free software foundation's restrictions;
>**       it was worked on as a Stanford Univ project, so they claim
>**       some rights to it; it was further optimized at work here, so
>**       I think this company claims parts of it.  The patents are
>**       held by R. Bracewell (the FHT algorithm) and O. Buneman (the
>**       trig generator), both at Stanford Univ.
>**       If it were up to me, I'd say go do whatever you want with it;
>**       but it would be polite to give credit to the following people
>**       if you use this anywhere:
>**           Euler     - probable inventor of the fourier transform.
>**           Gauss     - probable inventor of the FFT.
>**           Hartley   - probable inventor of the hartley transform.
>**           Buneman   - for a really cool trig generator
>**           Mayer(me) - for authoring this particular version and
>**                       including all the optimizations in one package.
>**       Thanks,
>**       Ron Mayer; mayer@acuson.com
>**
>*/
>
>
>
>#define GOOD_TRIG
>#include "trigtbl.h"
>char fht_version[] = "Brcwl-Hrtly-Ron-dbld";
>
>#define SQRT2_2   0.70710678118654752440084436210484
>#define SQRT2   2*0.70710678118654752440084436210484
>fht(fz,n)
>int n;
>REAL *fz;
>{
> REAL a,b;
> REAL c1,s1,s2,c2,s3,c3,s4,c4;
> REAL f0,g0,f1,g1,f2,g2,f3,g3;
> int i,k,k1,k2,k3,k4,kx;
> REAL *fi,*fn,*gi;
> TRIG_VARS;
>
> for (k1=1,k2=0;k1<n;k1++)
>    {
>     REAL a;
>     for (k=n>>1; (!((k2^=k)&k)); k>>=1);
>     if (k1>k2)
>	{
>	     a=fz[k1];fz[k1]=fz[k2];fz[k2]=a;
>	}
>    }
> for ( k=0 ; (1<<k)<n ; k++ );
> k  &= 1;
> if (k==0)
>    {
>	 for (fi=fz,fn=fz+n;fi<fn;fi+=4)
>	    {
>	     REAL f0,f1,f2,f3;
>	     f1     = fi[0 ]-fi[1 ];
>	     f0     = fi[0 ]+fi[1 ];
>	     f3     = fi[2 ]-fi[3 ];
>	     f2     = fi[2 ]+fi[3 ];
>	     fi[2 ] = (f0-f2);	
>	     fi[0 ] = (f0+f2);
>	     fi[3 ] = (f1-f3);	
>	     fi[1 ] = (f1+f3);
>	    }
>    }
> else
>    {
>	 for (fi=fz,fn=fz+n,gi=fi+1;fi<fn;fi+=8,gi+=8)
>	    {
>	     REAL s1,c1,s2,c2,s3,c3,s4,c4,g0,f0,f1,g1,f2,g2,f3,g3;
>	     c1     = fi[0 ] - gi[0 ];
>	     s1     = fi[0 ] + gi[0 ];
>	     c2     = fi[2 ] - gi[2 ];
>	     s2     = fi[2 ] + gi[2 ];
>	     c3     = fi[4 ] - gi[4 ];
>	     s3     = fi[4 ] + gi[4 ];
>	     c4     = fi[6 ] - gi[6 ];
>	     s4     = fi[6 ] + gi[6 ];
>	     f1     = (s1 - s2);	
>	     f0     = (s1 + s2);
>	     g1     = (c1 - c2);	
>	     g0     = (c1 + c2);
>	     f3     = (s3 - s4);	
>	     f2     = (s3 + s4);
>	     g3     = SQRT2*c4;		
>	     g2     = SQRT2*c3;
>	     fi[4 ] = f0 - f2;
>	     fi[0 ] = f0 + f2;
>	     fi[6 ] = f1 - f3;
>	     fi[2 ] = f1 + f3;
>	     gi[4 ] = g0 - g2;
>	     gi[0 ] = g0 + g2;
>	     gi[6 ] = g1 - g3;
>	     gi[2 ] = g1 + g3;
>	    }
>    }
> if (n<16) return;
>
> do
>    {
>     REAL s1,c1;
>     k  += 2;
>     k1  = 1  << k;
>     k2  = k1 << 1;
>     k4  = k2 << 1;
>     k3  = k2 + k1;
>     kx  = k1 >> 1;
>	 fi  = fz;
>	 gi  = fi + kx;
>	 fn  = fz + n;
>	 do
>	    {
>	     REAL g0,f0,f1,g1,f2,g2,f3,g3;
>	     f1      = fi[0 ] - fi[k1];
>	     f0      = fi[0 ] + fi[k1];
>	     f3      = fi[k2] - fi[k3];
>	     f2      = fi[k2] + fi[k3];
>	     fi[k2]  = f0	  - f2;
>	     fi[0 ]  = f0	  + f2;
>	     fi[k3]  = f1	  - f3;
>	     fi[k1]  = f1	  + f3;
>	     g1      = gi[0 ] - gi[k1];
>	     g0      = gi[0 ] + gi[k1];
>	     g3      = SQRT2  * gi[k3];
>	     g2      = SQRT2  * gi[k2];
>	     gi[k2]  = g0	  - g2;
>	     gi[0 ]  = g0	  + g2;
>	     gi[k3]  = g1	  - g3;
>	     gi[k1]  = g1	  + g3;
>	     gi     += k4;
>	     fi     += k4;
>	    } while (fi<fn);
>     TRIG_INIT(k,c1,s1);
>     for (i=1;i<kx;i++)
>        {
>	 REAL c2,s2;
>         TRIG_NEXT(k,c1,s1);
>         c2 = c1*c1 - s1*s1;
>         s2 = 2*(c1*s1);
>	     fn = fz + n;
>	     fi = fz +i;
>	     gi = fz +k1-i;
>	     do
>		{
>		 REAL a,b,g0,f0,f1,g1,f2,g2,f3,g3;
>		 b       = s2*fi[k1] - c2*gi[k1];
>		 a       = c2*fi[k1] + s2*gi[k1];
>		 f1      = fi[0 ]    - a;
>		 f0      = fi[0 ]    + a;
>		 g1      = gi[0 ]    - b;
>		 g0      = gi[0 ]    + b;
>		 b       = s2*fi[k3] - c2*gi[k3];
>		 a       = c2*fi[k3] + s2*gi[k3];
>		 f3      = fi[k2]    - a;
>		 f2      = fi[k2]    + a;
>		 g3      = gi[k2]    - b;
>		 g2      = gi[k2]    + b;
>		 b       = s1*f2     - c1*g3;
>		 a       = c1*f2     + s1*g3;
>		 fi[k2]  = f0        - a;
>		 fi[0 ]  = f0        + a;
>		 gi[k3]  = g1        - b;
>		 gi[k1]  = g1        + b;
>		 b       = c1*g2     - s1*f3;
>		 a       = s1*g2     + c1*f3;
>		 gi[k2]  = g0        - a;
>		 gi[0 ]  = g0        + a;
>		 fi[k3]  = f1        - b;
>		 fi[k1]  = f1        + b;
>		 gi     += k4;
>		 fi     += k4;
>		} while (fi<fn);
>        }
>     TRIG_RESET(k,c1,s1);
>    } while (k4<n);
>}
>
>
>ifft(n,real,imag)
>int n;
>double *real,*imag;
>{
> double a,b,c,d;
> double q,r,s,t;
> int i,j,k;
> fht(real,n);
> fht(imag,n);
> for (i=1,j=n-1,k=n/2;i<k;i++,j--) {
>  a = real[i]; b = real[j];  q=a+b; r=a-b;
>  c = imag[i]; d = imag[j];  s=c+d; t=c-d;
>  imag[i] = (s+r)*0.5;  imag[j] = (s-r)*0.5;
>  real[i] = (q-t)*0.5;  real[j] = (q+t)*0.5;
> }
>}
>
>realfft(n,real)
>int n;
>double *real;
>{
> double a,b,c,d;
> int i,j,k;
> fht(real,n);
> for (i=1,j=n-1,k=n/2;i<k;i++,j--) {
>  a = real[i];
>  b = real[j];
>  real[j] = (a-b)*0.5;
>  real[i] = (a+b)*0.5;
> }
>}
>
>fft(n,real,imag)
>int n;
>double *real,*imag;
>{
> double a,b,c,d;
> double q,r,s,t;
> int i,j,k;
> for (i=1,j=n-1,k=n/2;i<k;i++,j--) {
>  a = real[i]; b = real[j];  q=a+b; r=a-b;
>  c = imag[i]; d = imag[j];  s=c+d; t=c-d;
>  real[i] = (q+t)*.5; real[j] = (q-t)*.5;
>  imag[i] = (s-r)*.5; imag[j] = (s+r)*.5;
> }
> fht(real,n);
> fht(imag,n);
>}
>
>realifft(n,real)
>int n;
>double *real;
>{
> double a,b,c,d;
> int i,j,k;
> for (i=1,j=n-1,k=n/2;i<k;i++,j--) {
>  a = real[i];
>  b = real[j];
>  real[j] = (a-b);
>  real[i] = (a+b);
> }
> fht(real,n);
>}
========= End of fft_mayer.c =========
echo summary.sparc 1>&2
cat <<'========= End of summary.sparc =========' | sed 's/^>//' >summary.sparc
>
>This file contains a benchmark results of a number of popular FFT
>algorithms.  The algorithms compared are:
>
>    FFT-numrec
>        The FFT from numerical recipies, converted to double precision
>    FFT-duhamel
>        A "duhamel-holman split radix fft" from "electronics letters,
>        jan. 5, 1994", coded by Dave Edelblute, edelblut@cod.nosc.mil
>    FFT-wang
>        Singleton's arbitrary-radix FFT translated to C and coded by
>        John Wang, wang@acuson.com
>    FFT-mayer
>        An original FFT by Ron Mayer (mayer@acuson.com)
>    real-FFT-numrec
>        The real valued FFT from numerical recipies, converted to
>        double precision.
>    real-FFT-mayer
>        An original real valued FFT by Ron Mayer (mayer@acuson.com)
>
>I compiled each of the programs using gcc 2.0 with the -O4 flag on a
>Sun Sparc 1; and timed (using the "clock()" function in SunOS) a
>number of iterations of forward and reverse transforms of a known data
>set.  At the end of the iterations of forward and reverse transforms I
>compared the data with the original to check for accumulated errors.
>
>algorithm                  # of       # of    time                     errors
>  used                   iterations   points
>n=4
>FFT-numrec                (16386       4):   4466488 CPU us ;ssq errors 0.0
>FFT-duhamel               (16386       4):   2016586 CPU us ;ssq errors 0.0
>FFT-wang                  (16386       4):   3299868 CPU us ;ssq errors 0.0
>FFT-mayer                 (16386       4):   1333280 CPU us ;ssq errors 0.0
>real-FFT-numrec           (16386       4):   3133208 CPU us ;ssq errors 0.0
>real-FFT-mayer            (16386       4):    666640 CPU us ;ssq errors 0.0
>n=128
>FFT-numrec                (514       128):   3883178 CPU us ;ssq errors 4.1e-21
>FFT-duhamel               (514       128):   6349746 CPU us ;ssq errors 8.6e-22
>FFT-wang                  (514       128):   3866512 CPU us ;ssq errors 1.5e-09
>FFT-mayer                 (514       128):   2999880 CPU us ;ssq errors 6.9e-22
>real-FFT-numrec           (514       128):   2333240 CPU us ;ssq errors 4.1e-21
>real-FFT-mayer            (514       128):   1433276 CPU us ;ssq errors 6.9e-22
>n=2048
>FFT-numrec                (34       2048):   5733104 CPU us ;ssq errors 8.6e-19
>FFT-duhamel               (34       2048):   8849646 CPU us ;ssq errors 3.2e-20
>FFT-wang                  (34       2048):   5783102 CPU us ;ssq errors 2.2e-08
>FFT-mayer                 (34       2048):   4649814 CPU us ;ssq errors 9.4e-20
>real-FFT-numrec           (34       2048):   3116542 CPU us ;ssq errors 1.6e-18
>real-FFT-mayer            (34       2048):   2183246 CPU us ;ssq errors 9.4e-20
>n=32768
>FFT-numrec                (4       32768):  18732584 CPU us ;ssq errors 1.5e-16
>FFT-duhamel               (4       32768):  22632428 CPU us ;ssq errors 3.7e-18
>FFT-wang                  (4       32768):  16299348 CPU us ;ssq errors 1.1e-06
>FFT-mayer                 (4       32768):  13849446 CPU us ;ssq errors 1.2e-17
>real-FFT-numrec           (4       32768):   9999600 CPU us ;ssq errors 1.9e-16
>real-FFT-mayer            (4       32768):   6716398 CPU us ;ssq errors 1.2e-17
========= End of summary.sparc =========
echo test_duhamel.c 1>&2
cat <<'========= End of test_duhamel.c =========' | sed 's/^>//' >test_duhamel.c
>/*
>** FFT tester
>**      Loads random numbers into complex array, ffts it, and shows result.
>**	Beats me what the output should be; but any two different ffts
>**      should return the same result.
>*/
>
>#define REAL double
>
> REAL real[140001];
> REAL imag[140001];
>
>main(argc,argv)
>int argc;
>char **argv;
>{
> int i;
> for (i=0;i<256;i++)   {real[i]=random()%10;  imag[i]=random()%10; }
> fft(real, imag, 256);
> for (i=0;i<10;i++) {printf("%g,%g ",real[i],imag[i]);}
> printf("\n");
>}
========= End of test_duhamel.c =========
echo test_mayer.c 1>&2
cat <<'========= End of test_mayer.c =========' | sed 's/^>//' >test_mayer.c
>/*
>** FFT tester
>**      Loads random numbers into complex array, ffts it, and shows result.
>**	Beats me what the output should be; but any two different ffts
>**      should return the same result.
>*/
>
>#define REAL double
>
> REAL real[140001];
> REAL imag[140001];
>
>main(argc,argv)
>int argc;
>char **argv;
>{
> int i;
> for (i=0;i<256;i++)   {real[i]=random()%10;  imag[i]=random()%10; }
> fft(256, real, imag);
> for (i=0;i<10;i++) {printf("%g,%g ",real[i],imag[i]);}
> printf("\n");
>}
========= End of test_mayer.c =========
echo time_duhamel.c 1>&2
cat <<'========= End of time_duhamel.c =========' | sed 's/^>//' >time_duhamel.c
>#define REAL double
>
> REAL real[140001];
> REAL imag[140001];
>
>main(argc,argv)
>int argc;
>char **argv;
>{
> int num=0,lop,i,k,j;
> long cycles;
> REAL ssq=0;
> REAL scale;
>int status;
> if (argc>1) num=atoi(argv[1]);
> if (num==0) num=256;
> for (i=0;i<num;i++)
>    {
>     real[i]=i;
>     imag[i]=0;
>    }
> scale = 1.0/num;
> lop = 2+256*256/num;
> printf("FFT-duhamel               (%-6d %6d):",lop,num);
> cycles=clock();
> for (k=0;k<lop;k++)
>     {
>      fft(real, imag, num);
>      fft(real, imag, num);
>      for (j=0;j<num*2;j++)  {real[j]*=scale;imag[j]*=scale;}
>     }
> printf("%10d CPU us ;",clock());
>
> for (ssq=0,i=0;i<num;i++)
>    ssq+=(real[i]-i)*(real[i]-i);
> printf("ssq errors %#.2g\n",ssq);
>
>}
>
>/*
>**  NOTE:
>**      No inverse transform was included with this package; I ran the forward
>**      transform instead, and as long as I end up running it a multiple of
>**      four times and scale correctly I can still check the accumulated
>**      errors.  I assume an inverse transform would be roughly as fast as a
>**      forward transform for these purposes.
>*/
========= End of time_duhamel.c =========
echo time_mayer.c 1>&2
cat <<'========= End of time_mayer.c =========' | sed 's/^>//' >time_mayer.c
>#define REAL double
>
> REAL real[140001];
> REAL imag[140001];
>
>main(argc,argv)
>int argc;
>char **argv;
>{
> int num=0,lop,i,k,j;
> long cycles;
> REAL ssq=0;
> REAL scale;
>int status;
> if (argc>1) num=atoi(argv[1]);
> if (num==0) num=256;
> for (i=0;i<num;i++)
>    {
>     real[i]=i;
>     imag[i]=0;
>    }
> scale = 1.0/num;
> lop = 2+256*256/num;
> printf("FFT-mayer                 (%-6d %6d):",lop,num);
> cycles=clock();
> for (k=0;k<lop;k++)
>     {
>      fft(num, real, imag);
>      ifft(num, real, imag);
>      for (j=0;j<num*2;j++)  {real[j]*=scale;imag[j]*=scale;}
>     }
> printf("%10d CPU us ;",clock());
>
> for (ssq=0,i=0;i<num;i++)
>    ssq+=(real[i]-i)*(real[i]-i);
> printf("ssq errors %#.2g\n",ssq);
>
>}
========= End of time_mayer.c =========
echo time_r_mayer.c 1>&2
cat <<'========= End of time_r_mayer.c =========' | sed 's/^>//' >time_r_mayer.c
>#define REAL double
>
> REAL real[140001];
> REAL imag[140001];
>
>main(argc,argv)
>int argc;
>char **argv;
>{
> int num=0,lop,i,k,j;
> long cycles;
> REAL ssq=0;
> REAL scale;
>int status;
> if (argc>1) num=atoi(argv[1]);
> if (num==0) num=256;
> for (i=0;i<num;i++)
>    {
>     real[i]=i;
>     imag[i]=0;
>    }
> scale = 1.0/num;
> lop = 2+256*256/num;
> printf("real-FFT-mayer            (%-6d %6d):",lop,num);
> cycles=clock();
> for (k=0;k<lop;k++)
>     {
>      realfft(num, real);
>      realifft(num, real);
>      for (j=0;j<num*2;j++)  {real[j]*=scale;}
>     }
> printf("%10d CPU us ;",clock());
>
> for (ssq=0,i=0;i<num;i++)
>    ssq+=(real[i]-i)*(real[i]-i);
> printf("ssq errors %#.2g\n",ssq);
>
>}
========= End of time_r_mayer.c =========
echo trigtbl.h 1>&2
cat <<'========= End of trigtbl.h =========' | sed 's/^>//' >trigtbl.h
>/*
>** Please only distribute this with it's associated FHT routine.
>** This algorithm is apparently patented(!) and the code copyrighted. 
>** See the comment with the fht routine for more info.
>**   -Thanks,
>**   Ron Mayer
>*/
>
>#ifdef REAL
>#else
>#define REAL double
>#endif
>
>#ifdef GOOD_TRIG
>#else
>#define FAST_TRIG
>#endif
>
>#if defined(GOOD_TRIG)
>#define FHT_SWAP(a,b,t) {(t)=(a);(a)=(b);(b)=(t);}
>#define TRIG_VARS					         \
>      int t_lam=0;
>#define TRIG_INIT(k,c,s)					 \
>     {								 \
>      int i;							 \
>      for (i=2 ; i<=k ; i++)					 \
>          {coswrk[i]=costab[i];sinwrk[i]=sintab[i];}		 \
>      t_lam = 0;					         \
>      c = 1;							 \
>      s = 0;							 \
>     }
>#define TRIG_NEXT(k,c,s)					 \
>     {								 \
>	 int i,j;	                                         \
>         (t_lam)++;	  					 \
>         for (i=0 ; !((1<<i)&t_lam) ; i++);			 \
>         i = k-i;						 \
>         s = sinwrk[i];						 \
>         c = coswrk[i];						 \
>         if (i>1)   						 \
>            {	    						 \
>             for (j=k-i+2 ; (1<<j)&t_lam ; j++);		 \
>             j	       = k - j;					 \
>             sinwrk[i] = halsec[i] * (sinwrk[i-1] + sinwrk[j]);  \
>             coswrk[i] = halsec[i] * (coswrk[i-1] + coswrk[j]);  \
>            }                                                    \
>     }
>#define TRIG_RESET(k,c,s)
>#endif
>
>#if defined(FAST_TRIG)
>#define TRIG_VARS					 \
>      REAL t_c,t_s;
>#define TRIG_INIT(k,c,s)				 \
>    {				        		 \
>     t_c  = costab[k];				         \
>     t_s  = sintab[k];				         \
>     c    = 1;				    		 \
>     s    = 0;				    		 \
>    }
>#define TRIG_NEXT(k,c,s)				 \
>    {                                                    \
>     REAL t = c;                                         \
>     c   = t*t_c - s*t_s;				 \
>     s   = t*t_s + s*t_c;				 \
>    }
>#define TRIG_RESET(k,c,s)
>#endif
>
>static REAL halsec[20]=
>    {
>     0,
>     0,
>     .54119610014619698439972320536638942006107206337801,
>     .50979557910415916894193980398784391368261849190893,
>     .50241928618815570551167011928012092247859337193963,
>     .50060299823519630134550410676638239611758632599591,
>     .50015063602065098821477101271097658495974913010340,
>     .50003765191554772296778139077905492847503165398345,
>     .50000941253588775676512870469186533538523133757983,
>     .50000235310628608051401267171204408939326297376426,
>     .50000058827484117879868526730916804925780637276181,
>     .50000014706860214875463798283871198206179118093251,
>     .50000003676714377807315864400643020315103490883972,
>     .50000000919178552207366560348853455333939112569380,
>     .50000000229794635411562887767906868558991922348920,
>     .50000000057448658687873302235147272458812263401372
>    };
>static REAL costab[20]=
>    {
>     .00000000000000000000000000000000000000000000000000,
>     .70710678118654752440084436210484903928483593768847,
>     .92387953251128675612818318939678828682241662586364,
>     .98078528040323044912618223613423903697393373089333,
>     .99518472667219688624483695310947992157547486872985,
>     .99879545620517239271477160475910069444320361470461,
>     .99969881869620422011576564966617219685006108125772,
>     .99992470183914454092164649119638322435060646880221,
>     .99998117528260114265699043772856771617391725094433,
>     .99999529380957617151158012570011989955298763362218,
>     .99999882345170190992902571017152601904826792288976,
>     .99999970586288221916022821773876567711626389934930,
>     .99999992646571785114473148070738785694820115568892,
>     .99999998161642929380834691540290971450507605124278,
>     .99999999540410731289097193313960614895889430318945,
>     .99999999885102682756267330779455410840053741619428
>    };
>static REAL sintab[20]=
>    {
>     1.0000000000000000000000000000000000000000000000000,
>     .70710678118654752440084436210484903928483593768846,
>     .38268343236508977172845998403039886676134456248561,
>     .19509032201612826784828486847702224092769161775195,
>     .09801714032956060199419556388864184586113667316749,
>     .04906767432741801425495497694268265831474536302574,
>     .02454122852291228803173452945928292506546611923944,
>     .01227153828571992607940826195100321214037231959176,
>     .00613588464915447535964023459037258091705788631738,
>     .00306795676296597627014536549091984251894461021344,
>     .00153398018628476561230369715026407907995486457522,
>     .00076699031874270452693856835794857664314091945205,
>     .00038349518757139558907246168118138126339502603495,
>     .00019174759731070330743990956198900093346887403385,
>     .00009587379909597734587051721097647635118706561284,
>     .00004793689960306688454900399049465887274686668768
>    };
>static REAL coswrk[20]=
>    {
>     .00000000000000000000000000000000000000000000000000,
>     .70710678118654752440084436210484903928483593768847,
>     .92387953251128675612818318939678828682241662586364,
>     .98078528040323044912618223613423903697393373089333,
>     .99518472667219688624483695310947992157547486872985,
>     .99879545620517239271477160475910069444320361470461,
>     .99969881869620422011576564966617219685006108125772,
>     .99992470183914454092164649119638322435060646880221,
>     .99998117528260114265699043772856771617391725094433,
>     .99999529380957617151158012570011989955298763362218,
>     .99999882345170190992902571017152601904826792288976,
>     .99999970586288221916022821773876567711626389934930,
>     .99999992646571785114473148070738785694820115568892,
>     .99999998161642929380834691540290971450507605124278,
>     .99999999540410731289097193313960614895889430318945,
>     .99999999885102682756267330779455410840053741619428
>    };
>static REAL sinwrk[20]=
>    {
>     1.0000000000000000000000000000000000000000000000000,
>     .70710678118654752440084436210484903928483593768846,
>     .38268343236508977172845998403039886676134456248561,
>     .19509032201612826784828486847702224092769161775195,
>     .09801714032956060199419556388864184586113667316749,
>     .04906767432741801425495497694268265831474536302574,
>     .02454122852291228803173452945928292506546611923944,
>     .01227153828571992607940826195100321214037231959176,
>     .00613588464915447535964023459037258091705788631738,
>     .00306795676296597627014536549091984251894461021344,
>     .00153398018628476561230369715026407907995486457522,
>     .00076699031874270452693856835794857664314091945205,
>     .00038349518757139558907246168118138126339502603495,
>     .00019174759731070330743990956198900093346887403385,
>     .00009587379909597734587051721097647635118706561284,
>     .00004793689960306688454900399049465887274686668768
>    };
========= End of trigtbl.h =========
#  End of shell archive
exit 0