/* * imdct_kni.c * * Copyright (C) Aaron Holtzman - May 1999 * * This file is part of ac3dec, a free Dolby AC-3 stream decoder. * * ac3dec is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * ac3dec is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GNU Make; see the file COPYING. If not, write to * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * * */ #ifdef __i386__ #include #include #include #include #include "ac3.h" #include "ac3_internal.h" #include "downmix.h" #include "imdct_kni.h" #include "srfft.h" #define N 512 /* Delay buffer for time domain interleaving */ static float xcos_sin_sse[128 * 4] __attribute__((aligned(16))); extern void (*imdct_do_512) (float data[],float delay[]); extern void (*imdct_do_512_nol) (float data[], float delay[]); extern void (*fft_64p) (complex_t *); extern const int pm128[]; extern float window[]; extern complex_t buf[128]; extern void fft_64p_kni (complex_t *); extern void fft_128p_kni (complex_t *); static void imdct_do_512_kni (float data[], float delay[]); static void imdct_do_512_nol_kni (float data[], float delay[]); int imdct_init_kni (void) { uint32_t accel = mm_accel (); if (accel & MM_ACCEL_X86_MMXEXT) { int i; float scale = 255.99609372; fprintf (stderr, "Using SSE for IMDCT\n"); imdct_do_512 = imdct_do_512_kni; imdct_do_512_nol = imdct_do_512_nol_kni; fft_64p = fft_64p_kni; for (i=0; i < 128; i++) { float xcos_i = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale; float xsin_i = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale; xcos_sin_sse[i * 4] = xcos_i; xcos_sin_sse[i * 4 + 1] = -xsin_i; xcos_sin_sse[i * 4 + 2] = -xsin_i; xcos_sin_sse[i * 4 + 3] = -xcos_i; } return 0; } else return -1; } static void imdct_do_512_kni (float data[], float delay[]) { imdct512_pre_ifft_twiddle_kni (pm128, buf, data, xcos_sin_sse); fft_128p_kni (buf); imdct512_post_ifft_twiddle_kni (buf, xcos_sin_sse); imdct512_window_delay_kni (buf, data, window, delay); } static void imdct_do_512_nol_kni (float data[], float delay[]) { imdct512_pre_ifft_twiddle_kni (pm128, buf, data, xcos_sin_sse); fft_128p_kni (buf); imdct512_post_ifft_twiddle_kni (buf, xcos_sin_sse); imdct512_window_delay_nol_kni (buf, data, window, delay); } #endif