694 lines
17 KiB
C++
694 lines
17 KiB
C++
/*
|
|
|
|
ESP32 FFT
|
|
=========
|
|
|
|
This provides a vanilla radix-2 FFT implementation and a test example.
|
|
|
|
Author
|
|
------
|
|
|
|
This code was written by [Robin Scheibler](http://www.robinscheibler.org) during rainy days in October 2017.
|
|
|
|
License
|
|
-------
|
|
|
|
Copyright (c) 2017 Robin Scheibler
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in all
|
|
copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
SOFTWARE.
|
|
|
|
*/
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <math.h>
|
|
#include <complex.h>
|
|
|
|
#include "fft.h"
|
|
|
|
#define TWO_PI 6.28318530
|
|
#define USE_SPLIT_RADIX 1
|
|
#define LARGE_BASE_CASE 1
|
|
|
|
fft_config_t *fft_init(int size, fft_type_t type, fft_direction_t direction, float *input, float *output)
|
|
{
|
|
/*
|
|
* Prepare an FFT of correct size and types.
|
|
*
|
|
* If no input or output buffers are provided, they will be allocated.
|
|
*/
|
|
int k,m;
|
|
|
|
fft_config_t *config = (fft_config_t *)malloc(sizeof(fft_config_t));
|
|
|
|
// Check if the size is a power of two
|
|
if ((size & (size-1)) != 0) // tests if size is a power of two
|
|
return NULL;
|
|
|
|
// start configuration
|
|
config->flags = 0;
|
|
config->type = type;
|
|
config->direction = direction;
|
|
config->size = size;
|
|
|
|
// Allocate and precompute twiddle factors
|
|
config->twiddle_factors = (float *)malloc(2 * config->size * sizeof(float));
|
|
|
|
float two_pi_by_n = TWO_PI / config->size;
|
|
|
|
for (k = 0, m = 0 ; k < config->size ; k++, m+=2)
|
|
{
|
|
config->twiddle_factors[m] = cosf(two_pi_by_n * k); // real
|
|
config->twiddle_factors[m+1] = sinf(two_pi_by_n * k); // imag
|
|
}
|
|
|
|
// Allocate input buffer
|
|
if (input != NULL)
|
|
config->input = input;
|
|
else
|
|
{
|
|
if (config->type == FFT_REAL)
|
|
config->input = (float *)malloc(config->size * sizeof(float));
|
|
else if (config->type == FFT_COMPLEX)
|
|
config->input = (float *)malloc(2 * config->size * sizeof(float));
|
|
|
|
config->flags |= FFT_OWN_INPUT_MEM;
|
|
}
|
|
|
|
if (config->input == NULL)
|
|
return NULL;
|
|
|
|
// Allocate output buffer
|
|
if (output != NULL)
|
|
config->output = output;
|
|
else
|
|
{
|
|
if (config->type == FFT_REAL)
|
|
config->output = (float *)malloc(config->size * sizeof(float));
|
|
else if (config->type == FFT_COMPLEX)
|
|
config->output = (float *)malloc(2 * config->size * sizeof(float));
|
|
|
|
config->flags |= FFT_OWN_OUTPUT_MEM;
|
|
}
|
|
|
|
if (config->output == NULL)
|
|
return NULL;
|
|
|
|
return config;
|
|
}
|
|
|
|
void fft_destroy(fft_config_t *config)
|
|
{
|
|
if (config->flags & FFT_OWN_INPUT_MEM)
|
|
free(config->input);
|
|
|
|
if (config->flags & FFT_OWN_OUTPUT_MEM)
|
|
free(config->output);
|
|
|
|
free(config->twiddle_factors);
|
|
free(config);
|
|
}
|
|
|
|
void fft_execute(fft_config_t *config)
|
|
{
|
|
if (config->type == FFT_REAL && config->direction == FFT_FORWARD)
|
|
rfft(config->input, config->output, config->twiddle_factors, config->size);
|
|
else if (config->type == FFT_REAL && config->direction == FFT_BACKWARD)
|
|
irfft(config->input, config->output, config->twiddle_factors, config->size);
|
|
else if (config->type == FFT_COMPLEX && config->direction == FFT_FORWARD)
|
|
fft(config->input, config->output, config->twiddle_factors, config->size);
|
|
else if (config->type == FFT_COMPLEX && config->direction == FFT_BACKWARD)
|
|
ifft(config->input, config->output, config->twiddle_factors, config->size);
|
|
}
|
|
|
|
void fft(float *input, float *output, float *twiddle_factors, int n)
|
|
{
|
|
/*
|
|
* Forward fast Fourier transform
|
|
* DIT, radix-2, out-of-place implementation
|
|
*
|
|
* Parameters
|
|
* ----------
|
|
* input (float *)
|
|
* The input array containing the complex samples with
|
|
* real/imaginary parts interleaved [Re(x0), Im(x0), ..., Re(x_n-1), Im(x_n-1)]
|
|
* output (float *)
|
|
* The output array containing the complex samples with
|
|
* real/imaginary parts interleaved [Re(x0), Im(x0), ..., Re(x_n-1), Im(x_n-1)]
|
|
* n (int)
|
|
* The FFT size, should be a power of 2
|
|
*/
|
|
|
|
#if USE_SPLIT_RADIX
|
|
split_radix_fft(input, output, n, 2, twiddle_factors, 2);
|
|
#else
|
|
fft_primitive(input, output, n, 2, twiddle_factors, 2);
|
|
#endif
|
|
}
|
|
|
|
void ifft(float *input, float *output, float *twiddle_factors, int n)
|
|
{
|
|
/*
|
|
* Inverse fast Fourier transform
|
|
* DIT, radix-2, out-of-place implementation
|
|
*
|
|
* Parameters
|
|
* ----------
|
|
* input (float *)
|
|
* The input array containing the complex samples with
|
|
* real/imaginary parts interleaved [Re(x0), Im(x0), ..., Re(x_n-1), Im(x_n-1)]
|
|
* output (float *)
|
|
* The output array containing the complex samples with
|
|
* real/imaginary parts interleaved [Re(x0), Im(x0), ..., Re(x_n-1), Im(x_n-1)]
|
|
* n (int)
|
|
* The FFT size, should be a power of 2
|
|
*/
|
|
ifft_primitive(input, output, n, 2, twiddle_factors, 2);
|
|
}
|
|
|
|
void rfft(float *x, float *y, float *twiddle_factors, int n)
|
|
{
|
|
|
|
// This code uses the two-for-the-price-of-one strategy
|
|
#if USE_SPLIT_RADIX
|
|
split_radix_fft(x, y, n / 2, 2, twiddle_factors, 4);
|
|
#else
|
|
fft_primitive(x, y, n / 2, 2, twiddle_factors, 4);
|
|
#endif
|
|
|
|
// Now apply post processing to recover positive
|
|
// frequencies of the real FFT
|
|
float t = y[0];
|
|
y[0] = t + y[1]; // DC coefficient
|
|
y[1] = t - y[1]; // Center coefficient
|
|
|
|
// Apply post processing to quarter element
|
|
// this boils down to taking complex conjugate
|
|
y[n/2+1] = -y[n/2+1];
|
|
|
|
// Now process all the other frequencies
|
|
int k;
|
|
for (k = 2 ; k < n / 2 ; k += 2)
|
|
{
|
|
float xer, xei, xor_t, xoi, c, s, tr, ti;
|
|
|
|
c = twiddle_factors[k];
|
|
s = twiddle_factors[k+1];
|
|
|
|
// even half coefficient
|
|
xer = 0.5 * (y[k] + y[n-k]);
|
|
xei = 0.5 * (y[k+1] - y[n-k+1]);
|
|
|
|
// odd half coefficient
|
|
xor_t = 0.5 * (y[k+1] + y[n-k+1]);
|
|
xoi = - 0.5 * (y[k] - y[n-k]);
|
|
|
|
tr = c * xor_t + s * xoi;
|
|
ti = -s * xor_t + c * xoi;
|
|
|
|
y[k] = xer + tr;
|
|
y[k+1] = xei + ti;
|
|
|
|
y[n-k] = xer - tr;
|
|
y[n-k+1] = -(xei - ti);
|
|
}
|
|
}
|
|
|
|
void irfft(float *x, float *y, float *twiddle_factors, int n)
|
|
{
|
|
/*
|
|
* Destroys content of input vector
|
|
*/
|
|
int k;
|
|
|
|
// Here we need to apply a pre-processing first
|
|
float t = x[0];
|
|
x[0] = 0.5 * (t + x[1]);
|
|
x[1] = 0.5 * (t - x[1]);
|
|
|
|
x[n/2+1] = -x[n/2+1];
|
|
|
|
for (k = 2 ; k < n / 2 ; k += 2)
|
|
{
|
|
float xer, xei, xor_t, xoi, c, s, tr, ti;
|
|
|
|
c = twiddle_factors[k];
|
|
s = twiddle_factors[k+1];
|
|
|
|
xer = 0.5 * (x[k] + x[n-k]);
|
|
tr = 0.5 * (x[k] - x[n-k]);
|
|
|
|
xei = 0.5 * (x[k+1] - x[n-k+1]);
|
|
ti = 0.5 * (x[k+1] + x[n-k+1]);
|
|
|
|
xor_t = c * tr - s * ti;
|
|
xoi = s * tr + c * ti;
|
|
|
|
x[k] = xer - xoi;
|
|
x[k+1] = xor_t + xei;
|
|
|
|
x[n-k] = xer + xoi;
|
|
x[n-k+1] = xor_t - xei;
|
|
}
|
|
|
|
ifft_primitive(x, y, n / 2, 2, twiddle_factors, 4);
|
|
}
|
|
|
|
void fft_primitive(float *x, float *y, int n, int stride, float *twiddle_factors, int tw_stride)
|
|
{
|
|
/*
|
|
* This code will compute the FFT of the input vector x
|
|
*
|
|
* The input data is assumed to be real/imag interleaved
|
|
*
|
|
* The size n should be a power of two
|
|
*
|
|
* y is an output buffer of size 2n to accomodate for complex numbers
|
|
*
|
|
* Forward fast Fourier transform
|
|
* DIT, radix-2, out-of-place implementation
|
|
*
|
|
* For a complex FFT, call first stage as:
|
|
* fft(x, y, n, 2, 2);
|
|
*
|
|
* Parameters
|
|
* ----------
|
|
* x (float *)
|
|
* The input array containing the complex samples with
|
|
* real/imaginary parts interleaved [Re(x0), Im(x0), ..., Re(x_n-1), Im(x_n-1)]
|
|
* y (float *)
|
|
* The output array containing the complex samples with
|
|
* real/imaginary parts interleaved [Re(x0), Im(x0), ..., Re(x_n-1), Im(x_n-1)]
|
|
* n (int)
|
|
* The FFT size, should be a power of 2
|
|
* stride (int)
|
|
* The number of elements to skip between two successive samples
|
|
* tw_stride (int)
|
|
* The number of elements to skip between two successive twiddle factors
|
|
*/
|
|
int k;
|
|
float t;
|
|
|
|
#if LARGE_BASE_CASE
|
|
// End condition, stop at n=8 to avoid one trivial recursion
|
|
if (n == 8)
|
|
{
|
|
fft8(x, stride, y, 2);
|
|
return;
|
|
}
|
|
#else
|
|
// End condition, stop at n=2 to avoid one trivial recursion
|
|
if (n == 2)
|
|
{
|
|
y[0] = x[0] + x[stride];
|
|
y[1] = x[1] + x[stride + 1];
|
|
y[2] = x[0] - x[stride];
|
|
y[3] = x[1] - x[stride + 1];
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
// Recursion -- Decimation In Time algorithm
|
|
fft_primitive(x, y, n / 2, 2 * stride, twiddle_factors, 2 * tw_stride); // even half
|
|
fft_primitive(x + stride, y+n, n / 2, 2 * stride, twiddle_factors, 2 * tw_stride); // odd half
|
|
|
|
// Stitch back together
|
|
|
|
// We can a few multiplications in the first step
|
|
t = y[0];
|
|
y[0] = t + y[n];
|
|
y[n] = t - y[n];
|
|
|
|
t = y[1];
|
|
y[1] = t + y[n+1];
|
|
y[n+1] = t - y[n+1];
|
|
|
|
for (k = 1 ; k < n / 2 ; k++)
|
|
{
|
|
float x1r, x1i, x2r, x2i, c, s;
|
|
c = twiddle_factors[k * tw_stride];
|
|
s = twiddle_factors[k * tw_stride + 1];
|
|
|
|
x1r = y[2 * k];
|
|
x1i = y[2 * k + 1];
|
|
x2r = c * y[n + 2 * k] + s * y[n + 2 * k + 1];
|
|
x2i = -s * y[n + 2 * k] + c * y[n + 2 * k + 1];
|
|
|
|
y[2 * k] = x1r + x2r;
|
|
y[2 * k + 1] = x1i + x2i;
|
|
|
|
y[n + 2 * k] = x1r - x2r;
|
|
y[n + 2 * k + 1] = x1i - x2i;
|
|
}
|
|
|
|
}
|
|
|
|
void split_radix_fft(float *x, float *y, int n, int stride, float *twiddle_factors, int tw_stride)
|
|
{
|
|
/*
|
|
* This code will compute the FFT of the input vector x
|
|
*
|
|
* The input data is assumed to be real/imag interleaved
|
|
*
|
|
* The size n should be a power of two
|
|
*
|
|
* y is an output buffer of size 2n to accomodate for complex numbers
|
|
*
|
|
* Forward fast Fourier transform
|
|
* Split-Radix
|
|
* DIT, radix-2, out-of-place implementation
|
|
*
|
|
* For a complex FFT, call first stage as:
|
|
* fft(x, y, n, 2, 2);
|
|
*
|
|
* Parameters
|
|
* ----------
|
|
* x (float *)
|
|
* The input array containing the complex samples with
|
|
* real/imaginary parts interleaved [Re(x0), Im(x0), ..., Re(x_n-1), Im(x_n-1)]
|
|
* y (float *)
|
|
* The output array containing the complex samples with
|
|
* real/imaginary parts interleaved [Re(x0), Im(x0), ..., Re(x_n-1), Im(x_n-1)]
|
|
* n (int)
|
|
* The FFT size, should be a power of 2
|
|
* stride (int)
|
|
* The number of elements to skip between two successive samples
|
|
* twiddle_factors (float *)
|
|
* The array of twiddle factors
|
|
* tw_stride (int)
|
|
* The number of elements to skip between two successive twiddle factors
|
|
*/
|
|
int k;
|
|
|
|
#if LARGE_BASE_CASE
|
|
// End condition, stop at n=2 to avoid one trivial recursion
|
|
if (n == 8)
|
|
{
|
|
fft8(x, stride, y, 2);
|
|
return;
|
|
}
|
|
else if (n == 4)
|
|
{
|
|
fft4(x, stride, y, 2);
|
|
return;
|
|
}
|
|
#else
|
|
// End condition, stop at n=2 to avoid one trivial recursion
|
|
if (n == 2)
|
|
{
|
|
y[0] = x[0] + x[stride];
|
|
y[1] = x[1] + x[stride + 1];
|
|
y[2] = x[0] - x[stride];
|
|
y[3] = x[1] - x[stride + 1];
|
|
return;
|
|
}
|
|
else if (n == 1)
|
|
{
|
|
y[0] = x[0];
|
|
y[1] = x[1];
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
// Recursion -- Decimation In Time algorithm
|
|
split_radix_fft(x, y, n / 2, 2 * stride, twiddle_factors, 2 * tw_stride);
|
|
split_radix_fft(x + stride, y + n, n / 4, 4 * stride, twiddle_factors, 4 * tw_stride);
|
|
split_radix_fft(x + 3 * stride, y + n + n / 2, n / 4, 4 * stride, twiddle_factors, 4 * tw_stride);
|
|
|
|
// Stitch together the output
|
|
float u1r, u1i, u2r, u2i, x1r, x1i, x2r, x2i;
|
|
float t;
|
|
|
|
// We can save a few multiplications in the first step
|
|
u1r = y[0];
|
|
u1i = y[1];
|
|
u2r = y[n / 2];
|
|
u2i = y[n / 2 + 1];
|
|
|
|
x1r = y[n];
|
|
x1i = y[n + 1];
|
|
x2r = y[n / 2 + n];
|
|
x2i = y[n / 2 + n + 1];
|
|
|
|
t = x1r + x2r;
|
|
y[0] = u1r + t;
|
|
y[n] = u1r - t;
|
|
|
|
t = x1i + x2i;
|
|
y[1] = u1i + t;
|
|
y[n + 1] = u1i - t;
|
|
|
|
t = x2i - x1i;
|
|
y[n / 2] = u2r - t;
|
|
y[n + n / 2] = u2r + t;
|
|
|
|
t = x1r - x2r;
|
|
y[n / 2 + 1] = u2i - t;
|
|
y[n + n / 2 + 1] = u2i + t;
|
|
|
|
for (k = 1 ; k < n / 4 ; k++)
|
|
{
|
|
float u1r, u1i, u2r, u2i, x1r, x1i, x2r, x2i, c1, s1, c2, s2;
|
|
c1 = twiddle_factors[k * tw_stride];
|
|
s1 = twiddle_factors[k * tw_stride + 1];
|
|
c2 = twiddle_factors[3 * k * tw_stride];
|
|
s2 = twiddle_factors[3 * k * tw_stride + 1];
|
|
|
|
u1r = y[2 * k];
|
|
u1i = y[2 * k + 1];
|
|
u2r = y[2 * k + n / 2];
|
|
u2i = y[2 * k + n / 2 + 1];
|
|
|
|
x1r = c1 * y[n + 2 * k] + s1 * y[n + 2 * k + 1];
|
|
x1i = -s1 * y[n + 2 * k] + c1 * y[n + 2 * k + 1];
|
|
x2r = c2 * y[n / 2 + n + 2 * k] + s2 * y[n / 2 + n + 2 * k + 1];
|
|
x2i = -s2 * y[n / 2 + n + 2 * k] + c2 * y[n / 2 + n + 2 * k + 1];
|
|
|
|
t = x1r + x2r;
|
|
y[2 * k] = u1r + t;
|
|
y[2 * k + n] = u1r - t;
|
|
|
|
t = x1i + x2i;
|
|
y[2 * k + 1] = u1i + t;
|
|
y[2 * k + n + 1] = u1i - t;
|
|
|
|
t = x2i - x1i;
|
|
y[2 * k + n / 2] = u2r - t;
|
|
y[2 * k + n + n / 2] = u2r + t;
|
|
|
|
t = x1r - x2r;
|
|
y[2 * k + n / 2 + 1] = u2i - t;
|
|
y[2 * k + n + n / 2 + 1] = u2i + t;
|
|
}
|
|
|
|
}
|
|
|
|
|
|
void ifft_primitive(float *input, float *output, int n, int stride, float *twiddle_factors, int tw_stride)
|
|
{
|
|
|
|
#if USE_SPLIT_RADIX
|
|
split_radix_fft(input, output, n, stride, twiddle_factors, tw_stride);
|
|
#else
|
|
fft_primitive(input, output, n, stride, twiddle_factors, tw_stride);
|
|
#endif
|
|
|
|
int ks;
|
|
|
|
int ns = n * stride;
|
|
|
|
// reverse all coefficients from 1 to n / 2 - 1
|
|
for (ks = stride ; ks < ns / 2 ; ks += stride)
|
|
{
|
|
float t;
|
|
|
|
t = output[ks];
|
|
output[ks] = output[ns-ks];
|
|
output[ns-ks] = t;
|
|
|
|
t = output[ks+1];
|
|
output[ks+1] = output[ns-ks+1];
|
|
output[ns-ks+1] = t;
|
|
}
|
|
|
|
// Apply normalization
|
|
float norm = 1. / n;
|
|
for (ks = 0 ; ks < ns ; ks += stride)
|
|
{
|
|
output[ks] *= norm;
|
|
output[ks+1] *= norm;
|
|
}
|
|
|
|
}
|
|
|
|
inline void fft8(float *input, int stride_in, float *output, int stride_out)
|
|
{
|
|
/*
|
|
* Unrolled implementation of FFT8 for a little more performance
|
|
*/
|
|
float a0r, a1r, a2r, a3r, a4r, a5r, a6r, a7r;
|
|
float a0i, a1i, a2i, a3i, a4i, a5i, a6i, a7i;
|
|
float b0r, b1r, b2r, b3r, b4r, b5r, b6r, b7r;
|
|
float b0i, b1i, b2i, b3i, b4i, b5i, b6i, b7i;
|
|
float t;
|
|
float sin_pi_4 = 0.7071067812;
|
|
|
|
a0r = input[0];
|
|
a0i = input[1];
|
|
a1r = input[stride_in];
|
|
a1i = input[stride_in+1];
|
|
a2r = input[2*stride_in];
|
|
a2i = input[2*stride_in+1];
|
|
a3r = input[3*stride_in];
|
|
a3i = input[3*stride_in+1];
|
|
a4r = input[4*stride_in];
|
|
a4i = input[4*stride_in+1];
|
|
a5r = input[5*stride_in];
|
|
a5i = input[5*stride_in+1];
|
|
a6r = input[6*stride_in];
|
|
a6i = input[6*stride_in+1];
|
|
a7r = input[7*stride_in];
|
|
a7i = input[7*stride_in+1];
|
|
|
|
// Stage 1
|
|
|
|
b0r = a0r + a4r;
|
|
b0i = a0i + a4i;
|
|
|
|
b1r = a1r + a5r;
|
|
b1i = a1i + a5i;
|
|
|
|
b2r = a2r + a6r;
|
|
b2i = a2i + a6i;
|
|
|
|
b3r = a3r + a7r;
|
|
b3i = a3i + a7i;
|
|
|
|
b4r = a0r - a4r;
|
|
b4i = a0i - a4i;
|
|
|
|
b5r = a1r - a5r;
|
|
b5i = a1i - a5i;
|
|
// W_8^1 = 1/sqrt(2) - j / sqrt(2)
|
|
t = b5r + b5i;
|
|
b5i = (b5i - b5r) * sin_pi_4;
|
|
b5r = t * sin_pi_4;
|
|
|
|
// W_8^2 = -j
|
|
b6r = a2i - a6i;
|
|
b6i = a6r - a2r;
|
|
|
|
b7r = a3r - a7r;
|
|
b7i = a3i - a7i;
|
|
// W_8^3 = -1 / sqrt(2) + j / sqrt(2)
|
|
t = sin_pi_4 * (b7i - b7r);
|
|
b7i = - (b7r + b7i) * sin_pi_4;
|
|
b7r = t;
|
|
|
|
// Stage 2
|
|
|
|
a0r = b0r + b2r;
|
|
a0i = b0i + b2i;
|
|
|
|
a1r = b1r + b3r;
|
|
a1i = b1i + b3i;
|
|
|
|
a2r = b0r - b2r;
|
|
a2i = b0i - b2i;
|
|
|
|
// * j
|
|
a3r = b1i - b3i;
|
|
a3i = b3r - b1r;
|
|
|
|
a4r = b4r + b6r;
|
|
a4i = b4i + b6i;
|
|
|
|
a5r = b5r + b7r;
|
|
a5i = b5i + b7i;
|
|
|
|
a6r = b4r - b6r;
|
|
a6i = b4i - b6i;
|
|
|
|
// * j
|
|
a7r = b5i - b7i;
|
|
a7i = b7r - b5r;
|
|
|
|
// Stage 3
|
|
|
|
// X[0]
|
|
output[0] = a0r + a1r;
|
|
output[1] = a0i + a1i;
|
|
|
|
// X[4]
|
|
output[4*stride_out] = a0r - a1r;
|
|
output[4*stride_out+1] = a0i - a1i;
|
|
|
|
// X[2]
|
|
output[2*stride_out] = a2r + a3r;
|
|
output[2*stride_out+1] = a2i + a3i;
|
|
|
|
// X[6]
|
|
output[6*stride_out] = a2r - a3r;
|
|
output[6*stride_out+1] = a2i - a3i;
|
|
|
|
// X[1]
|
|
output[stride_out] = a4r + a5r;
|
|
output[stride_out+1] = a4i + a5i;
|
|
|
|
// X[5]
|
|
output[5*stride_out] = a4r - a5r;
|
|
output[5*stride_out+1] = a4i - a5i;
|
|
|
|
// X[3]
|
|
output[3*stride_out] = a6r + a7r;
|
|
output[3*stride_out+1] = a6i + a7i;
|
|
|
|
// X[7]
|
|
output[7*stride_out] = a6r - a7r;
|
|
output[7*stride_out+1] = a6i - a7i;
|
|
|
|
}
|
|
|
|
inline void fft4(float *input, int stride_in, float *output, int stride_out)
|
|
{
|
|
/*
|
|
* Unrolled implementation of FFT4 for a little more performance
|
|
*/
|
|
float t1, t2;
|
|
|
|
t1 = input[0] + input[2*stride_in];
|
|
t2 = input[stride_in] + input[3*stride_in];
|
|
output[0] = t1 + t2;
|
|
output[2*stride_out] = t1 - t2;
|
|
|
|
t1 = input[1] + input[2*stride_in+1];
|
|
t2 = input[stride_in+1] + input[3*stride_in+1];
|
|
output[1] = t1 + t2;
|
|
output[2*stride_out+1] = t1 - t2;
|
|
|
|
t1 = input[0] - input[2*stride_in];
|
|
t2 = input[stride_in+1] - input[3*stride_in+1];
|
|
output[stride_out] = t1 + t2;
|
|
output[3*stride_out] = t1 - t2;
|
|
|
|
t1 = input[1] - input[2*stride_in+1];
|
|
t2 = input[3*stride_in] - input[stride_in];
|
|
output[stride_out+1] = t1 + t2;
|
|
output[3*stride_out+1] = t1 - t2;
|
|
}
|