1
0
mirror of https://github.com/VDR4Arch/vdr.git synced 2023-10-10 13:36:52 +02:00

Improvements from Matjaz Thaler

This commit is contained in:
Klaus Schmidinger 2001-08-09 11:41:39 +02:00
parent 85a0277910
commit c50dc5e888
47 changed files with 4454 additions and 1727 deletions

View File

@ -1,10 +1,16 @@
# #
# Makefile for 'ac3dec' # Makefile for 'ac3dec'
# #
# $Id: Makefile 1.1 2001/08/02 13:18:08 kls Exp $ # $Id: Makefile 1.2 2001/08/09 11:41:39 kls Exp $
#OBJS = coeff.o decode.o exponent.o rematrix.o bit_allocate.o crc.o dither.o\
# imdct.o sanity_check.o bitstream.o debug.o downmix.o parse.o stats.o\
# downmix_c.o imdct.o
OBJS = bit_allocate.o bitstream.o coeff.o cpu_accel.o crc.o debug.o decode.o\
dither.o downmix.o downmix_c.o exponent.o imdct.o imdct_c.o imdct_kni.o\
parse.o rematrix.o sanity_check.o srfft.o srfft_kni_c.o stats.o
OBJS = coeff.o decode.o exponent.o rematrix.o bit_allocate.o crc.o dither.o\
imdct.o sanity_check.o bitstream.o debug.o downmix.o parse.o stats.o
DEFINES += -DDOLBY_SURROUND DEFINES += -DDOLBY_SURROUND
@ -16,7 +22,7 @@ libac3.a: $(OBJS)
# Implicit rules: # Implicit rules:
%.o: %.c %.o: %.c
gcc -g -O2 -Wall -m486 -c $(DEFINES) $< gcc -g -O2 -Wall -m486 -I./ -c $(DEFINES) $<
clean: clean:
rm -f *~ libac3.a $(OBJS) rm -f *~ libac3.a $(OBJS)

View File

@ -22,39 +22,37 @@
* *
*/ */
#ifndef AARONS_TYPES #define AC3_BUFFER_SIZE (6*1024*16)
#define AARONS_TYPES
typedef unsigned long long uint_64;
typedef unsigned int uint_32;
typedef unsigned short uint_16;
typedef unsigned char uint_8;
typedef signed long long sint_64; #ifndef __AC3_H__
typedef signed int sint_32; #define __AC3_H__
typedef signed short sint_16;
typedef signed char sint_8; #ifdef __OMS__
#include <oms/plugin/output_audio.h>
#else
//#include "audio_out.h"
#endif #endif
#define AC3_DOLBY_SURR_ENABLE 0x1 #include <inttypes.h>
#define AC3_3DNOW_ENABLE 0x2
#define AC3_MMX_ENABLE 0x4
#define AC3_ALTIVEC_ENABLE 0x8
typedef struct ac3_config_s #define AC3_DOLBY_SURR_ENABLE (1<<0)
{ #define AC3_ALTIVEC_ENABLE (1<<1)
//Bit flags that enable various things
uint_32 flags; typedef struct ac3_config_s {
//Callback that points the decoder to new stream data // Bit flags that enable various things
void (*fill_buffer_callback)(uint_8 **, uint_8 **); uint32_t flags;
//Number of discrete channels in final output (for downmixing) // Number of discrete channels in final output (for downmixing)
uint_16 num_output_ch; uint16_t num_output_ch;
//Which channel of a dual mono stream to select // Which channel of a dual mono stream to select
uint_16 dual_mono_ch_sel; uint16_t dual_mono_ch_sel;
} ac3_config_t; } ac3_config_t;
void ac3_init(ac3_config_t *); void ac3dec_init (void);
uint_32 ac3_decode_data(uint_8 *data_start,uint_8 *data_end, int ac3reset, int *input_pointer, int *output_pointer, char *ac3_data);
#ifdef __OMS__
size_t ac3dec_decode_data (plugin_output_audio_t *output, uint8_t *data_start, uint8_t *data_end);
#else
size_t ac3dec_decode_data (uint8_t *data_start ,uint8_t *data_end, int ac3reset, int *input_pointer, int *output_pointer, char *ac3_data);
#endif
#endif

View File

@ -25,6 +25,8 @@
#define inline #define inline
#endif #endif
#include <setjmp.h>
/* Exponent strategy constants */ /* Exponent strategy constants */
#define EXP_REUSE (0) #define EXP_REUSE (0)
#define EXP_D15 (1) #define EXP_D15 (1)
@ -43,7 +45,8 @@ typedef float stream_samples_t[6][256];
/* global config structure */ /* global config structure */
extern ac3_config_t ac3_config; extern ac3_config_t ac3_config;
/* global error flag */ /* global error flag */
extern uint_32 error_flag; extern jmp_buf error_jmp_mark;
#define HANDLE_ERROR() longjmp (error_jmp_mark, -1)
/* Everything you wanted to know about band structure */ /* Everything you wanted to know about band structure */
/* /*
@ -72,273 +75,279 @@ extern uint_32 error_flag;
typedef struct syncinfo_s typedef struct syncinfo_s
{ {
uint_32 magic; uint32_t magic;
/* Sync word == 0x0B77 */ /* Sync word == 0x0B77 */
uint_16 syncword; uint16_t syncword;
/* crc for the first 5/8 of the sync block */ /* crc for the first 5/8 of the sync block */
/* uint_16 crc1; */ /* uint16_t crc1; */
/* Stream Sampling Rate (kHz) 0 = 48, 1 = 44.1, 2 = 32, 3 = reserved */ /* Stream Sampling Rate (kHz) 0 = 48, 1 = 44.1, 2 = 32, 3 = reserved */
uint_16 fscod; uint16_t fscod;
/* Frame size code */ /* Frame size code */
uint_16 frmsizecod; uint16_t frmsizecod;
/* Information not in the AC-3 bitstream, but derived */ /* Information not in the AC-3 bitstream, but derived */
/* Frame size in 16 bit words */ /* Frame size in 16 bit words */
uint_16 frame_size; uint16_t frame_size;
/* Bit rate in kilobits */ /* Bit rate in kilobits */
uint_16 bit_rate; uint16_t bit_rate;
/* sampling rate in hertz */ /* sampling rate in hertz */
uint_32 sampling_rate; uint32_t sampling_rate;
} syncinfo_t; } syncinfo_t;
typedef struct bsi_s typedef struct bsi_s
{ {
uint_32 magic; uint32_t magic;
/* Bit stream identification == 0x8 */ /* Bit stream identification == 0x8 */
uint_16 bsid; uint16_t bsid;
/* Bit stream mode */ /* Bit stream mode */
uint_16 bsmod; uint16_t bsmod;
/* Audio coding mode */ /* Audio coding mode */
uint_16 acmod; uint16_t acmod;
/* If we're using the centre channel then */ /* If we're using the centre channel then */
/* centre mix level */ /* centre mix level */
uint_16 cmixlev; uint16_t cmixlev;
/* If we're using the surround channel then */ /* If we're using the surround channel then */
/* surround mix level */ /* surround mix level */
uint_16 surmixlev; uint16_t surmixlev;
/* If we're in 2/0 mode then */ /* If we're in 2/0 mode then */
/* Dolby surround mix level - NOT USED - */ /* Dolby surround mix level - NOT USED - */
uint_16 dsurmod; uint16_t dsurmod;
/* Low frequency effects on */ /* Low frequency effects on */
uint_16 lfeon; uint16_t lfeon;
/* Dialogue Normalization level */ /* Dialogue Normalization level */
uint_16 dialnorm; uint16_t dialnorm;
/* Compression exists */ /* Compression exists */
uint_16 compre; uint16_t compre;
/* Compression level */ /* Compression level */
uint_16 compr; uint16_t compr;
/* Language code exists */ /* Language code exists */
uint_16 langcode; uint16_t langcode;
/* Language code */ /* Language code */
uint_16 langcod; uint16_t langcod;
/* Audio production info exists*/ /* Audio production info exists*/
uint_16 audprodie; uint16_t audprodie;
uint_16 mixlevel; uint16_t mixlevel;
uint_16 roomtyp; uint16_t roomtyp;
/* If we're in dual mono mode (acmod == 0) then extra stuff */ /* If we're in dual mono mode (acmod == 0) then extra stuff */
uint_16 dialnorm2; uint16_t dialnorm2;
uint_16 compr2e; uint16_t compr2e;
uint_16 compr2; uint16_t compr2;
uint_16 langcod2e; uint16_t langcod2e;
uint_16 langcod2; uint16_t langcod2;
uint_16 audprodi2e; uint16_t audprodi2e;
uint_16 mixlevel2; uint16_t mixlevel2;
uint_16 roomtyp2; uint16_t roomtyp2;
/* Copyright bit */ /* Copyright bit */
uint_16 copyrightb; uint16_t copyrightb;
/* Original bit */ /* Original bit */
uint_16 origbs; uint16_t origbs;
/* Timecode 1 exists */ /* Timecode 1 exists */
uint_16 timecod1e; uint16_t timecod1e;
/* Timecode 1 */ /* Timecode 1 */
uint_16 timecod1; uint16_t timecod1;
/* Timecode 2 exists */ /* Timecode 2 exists */
uint_16 timecod2e; uint16_t timecod2e;
/* Timecode 2 */ /* Timecode 2 */
uint_16 timecod2; uint16_t timecod2;
/* Additional bit stream info exists */ /* Additional bit stream info exists */
uint_16 addbsie; uint16_t addbsie;
/* Additional bit stream length - 1 (in bytes) */ /* Additional bit stream length - 1 (in bytes) */
uint_16 addbsil; uint16_t addbsil;
/* Additional bit stream information (max 64 bytes) */ /* Additional bit stream information (max 64 bytes) */
uint_8 addbsi[64]; uint8_t addbsi[64];
/* Information not in the AC-3 bitstream, but derived */ /* Information not in the AC-3 bitstream, but derived */
/* Number of channels (excluding LFE) /* Number of channels (excluding LFE)
* Derived from acmod */ * Derived from acmod */
uint_16 nfchans; uint16_t nfchans;
} bsi_t; } bsi_t;
/* more pain */ /* more pain */
typedef struct audblk_s typedef struct audblk_s
{ {
uint_32 magic1; uint32_t magic1;
/* block switch bit indexed by channel num */ /* block switch bit indexed by channel num */
uint_16 blksw[5]; uint16_t blksw[5];
/* dither enable bit indexed by channel num */ /* dither enable bit indexed by channel num */
uint_16 dithflag[5]; uint16_t dithflag[5];
/* dynamic range gain exists */ /* dynamic range gain exists */
uint_16 dynrnge; uint16_t dynrnge;
/* dynamic range gain */ /* dynamic range gain */
uint_16 dynrng; uint16_t dynrng;
/* if acmod==0 then */ /* if acmod==0 then */
/* dynamic range 2 gain exists */ /* dynamic range 2 gain exists */
uint_16 dynrng2e; uint16_t dynrng2e;
/* dynamic range 2 gain */ /* dynamic range 2 gain */
uint_16 dynrng2; uint16_t dynrng2;
/* coupling strategy exists */ /* coupling strategy exists */
uint_16 cplstre; uint16_t cplstre;
/* coupling in use */ /* coupling in use */
uint_16 cplinu; uint16_t cplinu;
/* channel coupled */ /* channel coupled */
uint_16 chincpl[5]; uint16_t chincpl[5];
/* if acmod==2 then */ /* if acmod==2 then */
/* Phase flags in use */ /* Phase flags in use */
uint_16 phsflginu; uint16_t phsflginu;
/* coupling begin frequency code */ /* coupling begin frequency code */
uint_16 cplbegf; uint16_t cplbegf;
/* coupling end frequency code */ /* coupling end frequency code */
uint_16 cplendf; uint16_t cplendf;
/* coupling band structure bits */ /* coupling band structure bits */
uint_16 cplbndstrc[18]; uint16_t cplbndstrc[18];
/* Do coupling co-ords exist for this channel? */ /* Do coupling co-ords exist for this channel? */
uint_16 cplcoe[5]; uint16_t cplcoe[5];
/* Master coupling co-ordinate */ /* Master coupling co-ordinate */
uint_16 mstrcplco[5]; uint16_t mstrcplco[5];
/* Per coupling band coupling co-ordinates */ /* Per coupling band coupling co-ordinates */
uint_16 cplcoexp[5][18]; uint16_t cplcoexp[5][18];
uint_16 cplcomant[5][18]; uint16_t cplcomant[5][18];
/* Phase flags for dual mono */ /* Phase flags for dual mono */
uint_16 phsflg[18]; uint16_t phsflg[18];
/* Is there a rematrixing strategy */ /* Is there a rematrixing strategy */
uint_16 rematstr; uint16_t rematstr;
/* Rematrixing bits */ /* Rematrixing bits */
uint_16 rematflg[4]; uint16_t rematflg[4];
/* Coupling exponent strategy */ /* Coupling exponent strategy */
uint_16 cplexpstr; uint16_t cplexpstr;
/* Exponent strategy for full bandwidth channels */ /* Exponent strategy for full bandwidth channels */
uint_16 chexpstr[5]; uint16_t chexpstr[5];
/* Exponent strategy for lfe channel */ /* Exponent strategy for lfe channel */
uint_16 lfeexpstr; uint16_t lfeexpstr;
/* Channel bandwidth for independent channels */ /* Channel bandwidth for independent channels */
uint_16 chbwcod[5]; uint16_t chbwcod[5];
/* The absolute coupling exponent */ /* The absolute coupling exponent */
uint_16 cplabsexp; uint16_t cplabsexp;
/* Coupling channel exponents (D15 mode gives 18 * 12 /3 encoded exponents */ /* Coupling channel exponents (D15 mode gives 18 * 12 /3 encoded exponents */
uint_16 cplexps[18 * 12 / 3]; uint16_t cplexps[18 * 12 / 3];
/* Sanity checking constant */ /* Sanity checking constant */
uint_32 magic2; uint32_t magic2;
/* fbw channel exponents */ /* fbw channel exponents */
uint_16 exps[5][252 / 3]; uint16_t exps[5][252 / 3];
/* channel gain range */ /* channel gain range */
uint_16 gainrng[5]; uint16_t gainrng[5];
/* low frequency exponents */ /* low frequency exponents */
uint_16 lfeexps[3]; uint16_t lfeexps[3];
/* Bit allocation info */ /* Bit allocation info */
uint_16 baie; uint16_t baie;
/* Slow decay code */ /* Slow decay code */
uint_16 sdcycod; uint16_t sdcycod;
/* Fast decay code */ /* Fast decay code */
uint_16 fdcycod; uint16_t fdcycod;
/* Slow gain code */ /* Slow gain code */
uint_16 sgaincod; uint16_t sgaincod;
/* dB per bit code */ /* dB per bit code */
uint_16 dbpbcod; uint16_t dbpbcod;
/* masking floor code */ /* masking floor code */
uint_16 floorcod; uint16_t floorcod;
/* SNR offset info */ /* SNR offset info */
uint_16 snroffste; uint16_t snroffste;
/* coarse SNR offset */ /* coarse SNR offset */
uint_16 csnroffst; uint16_t csnroffst;
/* coupling fine SNR offset */ /* coupling fine SNR offset */
uint_16 cplfsnroffst; uint16_t cplfsnroffst;
/* coupling fast gain code */ /* coupling fast gain code */
uint_16 cplfgaincod; uint16_t cplfgaincod;
/* fbw fine SNR offset */ /* fbw fine SNR offset */
uint_16 fsnroffst[5]; uint16_t fsnroffst[5];
/* fbw fast gain code */ /* fbw fast gain code */
uint_16 fgaincod[5]; uint16_t fgaincod[5];
/* lfe fine SNR offset */ /* lfe fine SNR offset */
uint_16 lfefsnroffst; uint16_t lfefsnroffst;
/* lfe fast gain code */ /* lfe fast gain code */
uint_16 lfefgaincod; uint16_t lfefgaincod;
/* Coupling leak info */ /* Coupling leak info */
uint_16 cplleake; uint16_t cplleake;
/* coupling fast leak initialization */ /* coupling fast leak initialization */
uint_16 cplfleak; uint16_t cplfleak;
/* coupling slow leak initialization */ /* coupling slow leak initialization */
uint_16 cplsleak; uint16_t cplsleak;
/* delta bit allocation info */ /* delta bit allocation info */
uint_16 deltbaie; uint16_t deltbaie;
/* coupling delta bit allocation exists */ /* coupling delta bit allocation exists */
uint_16 cpldeltbae; uint16_t cpldeltbae;
/* fbw delta bit allocation exists */ /* fbw delta bit allocation exists */
uint_16 deltbae[5]; uint16_t deltbae[5];
/* number of cpl delta bit segments */ /* number of cpl delta bit segments */
uint_16 cpldeltnseg; uint16_t cpldeltnseg;
/* coupling delta bit allocation offset */ /* coupling delta bit allocation offset */
uint_16 cpldeltoffst[8]; uint16_t cpldeltoffst[8];
/* coupling delta bit allocation length */ /* coupling delta bit allocation length */
uint_16 cpldeltlen[8]; uint16_t cpldeltlen[8];
/* coupling delta bit allocation length */ /* coupling delta bit allocation length */
uint_16 cpldeltba[8]; uint16_t cpldeltba[8];
/* number of delta bit segments */ /* number of delta bit segments */
uint_16 deltnseg[5]; uint16_t deltnseg[5];
/* fbw delta bit allocation offset */ /* fbw delta bit allocation offset */
uint_16 deltoffst[5][8]; uint16_t deltoffst[5][8];
/* fbw delta bit allocation length */ /* fbw delta bit allocation length */
uint_16 deltlen[5][8]; uint16_t deltlen[5][8];
/* fbw delta bit allocation length */ /* fbw delta bit allocation length */
uint_16 deltba[5][8]; uint16_t deltba[5][8];
/* skip length exists */ /* skip length exists */
uint_16 skiple; uint16_t skiple;
/* skip length */ /* skip length */
uint_16 skipl; uint16_t skipl;
//Removed Feb 2000 -ah //Removed Feb 2000 -ah
//added Jul 2000 ++dent
/* channel mantissas */ /* channel mantissas */
//uint_16 chmant[5][256]; uint16_t chmant[5][256];
/* coupling mantissas */ /* coupling mantissas */
uint_16 cplmant[256]; // uint16_t cplmant[256];
//Added Jun 2000 -MaXX
/* coupling floats */
float cpl_flt[ 256 ];
//Removed Feb 2000 -ah //Removed Feb 2000 -ah
//added Jul 2000 ++dent
/* coupling mantissas */ /* coupling mantissas */
//uint_16 lfemant[7]; uint16_t lfemant[7];
/* -- Information not in the bitstream, but derived thereof -- */ /* -- Information not in the bitstream, but derived thereof -- */
/* Number of coupling sub-bands */ /* Number of coupling sub-bands */
uint_16 ncplsubnd; uint16_t ncplsubnd;
/* Number of combined coupling sub-bands /* Number of combined coupling sub-bands
* Derived from ncplsubnd and cplbndstrc */ * Derived from ncplsubnd and cplbndstrc */
uint_16 ncplbnd; uint16_t ncplbnd;
/* Number of exponent groups by channel /* Number of exponent groups by channel
* Derived from strmant, endmant */ * Derived from strmant, endmant */
uint_16 nchgrps[5]; uint16_t nchgrps[5];
/* Number of coupling exponent groups /* Number of coupling exponent groups
* Derived from cplbegf, cplendf, cplexpstr */ * Derived from cplbegf, cplendf, cplexpstr */
uint_16 ncplgrps; uint16_t ncplgrps;
/* End mantissa numbers of fbw channels */ /* End mantissa numbers of fbw channels */
uint_16 endmant[5]; uint16_t endmant[5];
/* Start and end mantissa numbers for the coupling channel */ /* Start and end mantissa numbers for the coupling channel */
uint_16 cplstrtmant; uint16_t cplstrtmant;
uint_16 cplendmant; uint16_t cplendmant;
/* Decoded exponent info */ /* Decoded exponent info */
uint_16 fbw_exp[5][256]; uint16_t fbw_exp[5][256];
uint_16 cpl_exp[256]; uint16_t cpl_exp[256];
uint_16 lfe_exp[7]; uint16_t lfe_exp[7];
/* Bit allocation pointer results */ /* Bit allocation pointer results */
uint_16 fbw_bap[5][256]; uint16_t fbw_bap[5][256];
uint_16 cpl_bap[256]; uint16_t cpl_bap[256];
uint_16 lfe_bap[7]; uint16_t lfe_bap[7];
uint_32 magic3; uint32_t magic3;
} audblk_t; } audblk_t;

View File

@ -28,46 +28,46 @@
static inline sint_16 logadd(sint_16 a,sint_16 b); static inline int16_t logadd(int16_t a,int16_t b);
static sint_16 calc_lowcomp(sint_16 a,sint_16 b0,sint_16 b1,sint_16 bin); static int16_t calc_lowcomp(int16_t a,int16_t b0,int16_t b1,int16_t bin);
static inline uint_16 min(sint_16 a,sint_16 b); static inline uint16_t min(int16_t a,int16_t b);
static inline uint_16 max(sint_16 a,sint_16 b); static inline uint16_t max(int16_t a,int16_t b);
static void ba_compute_psd(sint_16 start, sint_16 end, sint_16 exps[], static void ba_compute_psd(int16_t start, int16_t end, int16_t exps[],
sint_16 psd[], sint_16 bndpsd[]); int16_t psd[], int16_t bndpsd[]);
static void ba_compute_excitation(sint_16 start, sint_16 end,sint_16 fgain, static void ba_compute_excitation(int16_t start, int16_t end,int16_t fgain,
sint_16 fastleak, sint_16 slowleak, sint_16 is_lfe, sint_16 bndpsd[], int16_t fastleak, int16_t slowleak, int16_t is_lfe, int16_t bndpsd[],
sint_16 excite[]); int16_t excite[]);
static void ba_compute_mask(sint_16 start, sint_16 end, uint_16 fscod, static void ba_compute_mask(int16_t start, int16_t end, uint16_t fscod,
uint_16 deltbae, uint_16 deltnseg, uint_16 deltoffst[], uint_16 deltba[], uint16_t deltbae, uint16_t deltnseg, uint16_t deltoffst[], uint16_t deltba[],
uint_16 deltlen[], sint_16 excite[], sint_16 mask[]); uint16_t deltlen[], int16_t excite[], int16_t mask[]);
static void ba_compute_bap(sint_16 start, sint_16 end, sint_16 snroffset, static void ba_compute_bap(int16_t start, int16_t end, int16_t snroffset,
sint_16 psd[], sint_16 mask[], sint_16 bap[]); int16_t psd[], int16_t mask[], int16_t bap[]);
/* Misc LUTs for bit allocation process */ /* Misc LUTs for bit allocation process */
static sint_16 slowdec[] = { 0x0f, 0x11, 0x13, 0x15 }; static int16_t slowdec[] = { 0x0f, 0x11, 0x13, 0x15 };
static sint_16 fastdec[] = { 0x3f, 0x53, 0x67, 0x7b }; static int16_t fastdec[] = { 0x3f, 0x53, 0x67, 0x7b };
static sint_16 slowgain[] = { 0x540, 0x4d8, 0x478, 0x410 }; static int16_t slowgain[] = { 0x540, 0x4d8, 0x478, 0x410 };
static sint_16 dbpbtab[] = { 0x000, 0x700, 0x900, 0xb00 }; static int16_t dbpbtab[] = { 0x000, 0x700, 0x900, 0xb00 };
static uint_16 floortab[] = { 0x2f0, 0x2b0, 0x270, 0x230, 0x1f0, 0x170, 0x0f0, 0xf800 }; static uint16_t floortab[] = { 0x2f0, 0x2b0, 0x270, 0x230, 0x1f0, 0x170, 0x0f0, 0xf800 };
static sint_16 fastgain[] = { 0x080, 0x100, 0x180, 0x200, 0x280, 0x300, 0x380, 0x400 }; static int16_t fastgain[] = { 0x080, 0x100, 0x180, 0x200, 0x280, 0x300, 0x380, 0x400 };
static sint_16 bndtab[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, static int16_t bndtab[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31,
34, 37, 40, 43, 46, 49, 55, 61, 67, 73, 34, 37, 40, 43, 46, 49, 55, 61, 67, 73,
79, 85, 97, 109, 121, 133, 157, 181, 205, 229 }; 79, 85, 97, 109, 121, 133, 157, 181, 205, 229 };
static sint_16 bndsz[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, static int16_t bndsz[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3,
3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6,
6, 12, 12, 12, 12, 24, 24, 24, 24, 24 }; 6, 12, 12, 12, 12, 24, 24, 24, 24, 24 };
static sint_16 masktab[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, static int16_t masktab[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 28, 28, 29, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 28, 28, 29,
29, 29, 30, 30, 30, 31, 31, 31, 32, 32, 32, 33, 33, 33, 34, 34, 29, 29, 30, 30, 30, 31, 31, 31, 32, 32, 32, 33, 33, 33, 34, 34,
34, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 37, 37, 37, 34, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 37, 37, 37,
@ -85,7 +85,7 @@ static sint_16 masktab[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 0, 0, 0 }; 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 0, 0, 0 };
static sint_16 latab[] = { 0x0040, 0x003f, 0x003e, 0x003d, 0x003c, 0x003b, 0x003a, 0x0039, static int16_t latab[] = { 0x0040, 0x003f, 0x003e, 0x003d, 0x003c, 0x003b, 0x003a, 0x0039,
0x0038, 0x0037, 0x0036, 0x0035, 0x0034, 0x0034, 0x0033, 0x0032, 0x0038, 0x0037, 0x0036, 0x0035, 0x0034, 0x0034, 0x0033, 0x0032,
0x0031, 0x0030, 0x002f, 0x002f, 0x002e, 0x002d, 0x002c, 0x002c, 0x0031, 0x0030, 0x002f, 0x002f, 0x002e, 0x002d, 0x002c, 0x002c,
0x002b, 0x002a, 0x0029, 0x0029, 0x0028, 0x0027, 0x0026, 0x0026, 0x002b, 0x002a, 0x0029, 0x0029, 0x0028, 0x0027, 0x0026, 0x0026,
@ -119,7 +119,7 @@ static sint_16 latab[] = { 0x0040, 0x003f, 0x003e, 0x003d, 0x003c, 0x003b, 0x003
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000}; 0x0000, 0x0000, 0x0000, 0x0000};
static sint_16 hth[][50] = {{ 0x04d0, 0x04d0, 0x0440, 0x0400, 0x03e0, 0x03c0, 0x03b0, 0x03b0, static int16_t hth[][50] = {{ 0x04d0, 0x04d0, 0x0440, 0x0400, 0x03e0, 0x03c0, 0x03b0, 0x03b0,
0x03a0, 0x03a0, 0x03a0, 0x03a0, 0x03a0, 0x0390, 0x0390, 0x0390, 0x03a0, 0x03a0, 0x03a0, 0x03a0, 0x03a0, 0x0390, 0x0390, 0x0390,
0x0380, 0x0380, 0x0370, 0x0370, 0x0360, 0x0360, 0x0350, 0x0350, 0x0380, 0x0380, 0x0370, 0x0370, 0x0360, 0x0360, 0x0350, 0x0350,
0x0340, 0x0340, 0x0330, 0x0320, 0x0310, 0x0300, 0x02f0, 0x02f0, 0x0340, 0x0340, 0x0330, 0x0320, 0x0310, 0x0300, 0x02f0, 0x02f0,
@ -144,38 +144,50 @@ static sint_16 hth[][50] = {{ 0x04d0, 0x04d0, 0x0440, 0x0400, 0x03e0, 0x03c0, 0x
0x0450, 0x04e0 }}; 0x0450, 0x04e0 }};
static sint_16 baptab[] = { 0, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6, static int16_t baptab[] = { 0, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6,
6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 10, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 10,
10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14,
14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15 }; 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15 };
static sint_16 sdecay; static int16_t sdecay;
static sint_16 fdecay; static int16_t fdecay;
static sint_16 sgain; static int16_t sgain;
static sint_16 dbknee; static int16_t dbknee;
static sint_16 floor; static int16_t floor;
static sint_16 psd[256]; static int16_t psd[256];
static sint_16 bndpsd[256]; static int16_t bndpsd[256];
static sint_16 excite[256]; static int16_t excite[256];
static sint_16 mask[256]; static int16_t mask[256];
static inline uint_16
max(sint_16 a,sint_16 b) /**
*
**/
static inline uint16_t max(int16_t a,int16_t b)
{ {
return (a > b ? a : b); return (a > b ? a : b);
} }
static inline uint_16
min(sint_16 a,sint_16 b) /**
*
**/
static inline uint16_t min(int16_t a,int16_t b)
{ {
return (a < b ? a : b); return (a < b ? a : b);
} }
static inline sint_16
logadd(sint_16 a,sint_16 b) /**
*
**/
static inline int16_t logadd(int16_t a,int16_t b)
{ {
sint_16 c; int16_t c;
sint_16 address; int16_t address;
c = a - b; c = a - b;
address = min((abs(c) >> 1), 255); address = min((abs(c) >> 1), 255);
@ -187,15 +199,19 @@ logadd(sint_16 a,sint_16 b)
} }
void bit_allocate(uint_16 fscod, bsi_t *bsi, audblk_t *audblk) /**
*
**/
void bit_allocate(uint16_t fscod, bsi_t *bsi, audblk_t *audblk)
{ {
uint_16 i; uint16_t i;
sint_16 fgain; int16_t fgain;
sint_16 snroffset; int16_t snroffset;
sint_16 start; int16_t start;
sint_16 end; int16_t end;
sint_16 fastleak; int16_t fastleak;
sint_16 slowleak; int16_t slowleak;
/* Only perform bit_allocation if the exponents have changed or we /* Only perform bit_allocation if the exponents have changed or we
* have new sideband information */ * have new sideband information */
@ -217,11 +233,10 @@ void bit_allocate(uint_16 fscod, bsi_t *bsi, audblk_t *audblk)
if(!audblk->csnroffst && !audblk->fsnroffst[0] && if(!audblk->csnroffst && !audblk->fsnroffst[0] &&
!audblk->fsnroffst[1] && !audblk->fsnroffst[2] && !audblk->fsnroffst[1] && !audblk->fsnroffst[2] &&
!audblk->fsnroffst[3] && !audblk->fsnroffst[4] && !audblk->fsnroffst[3] && !audblk->fsnroffst[4] &&
!audblk->cplfsnroffst && !audblk->lfefsnroffst) !audblk->cplfsnroffst && !audblk->lfefsnroffst) {
{ memset(audblk->fbw_bap,0,sizeof(uint16_t) * 256 * 5);
memset(audblk->fbw_bap,0,sizeof(uint_16) * 256 * 5); memset(audblk->cpl_bap,0,sizeof(uint16_t) * 256);
memset(audblk->cpl_bap,0,sizeof(uint_16) * 256); memset(audblk->lfe_bap,0,sizeof(uint16_t) * 7);
memset(audblk->lfe_bap,0,sizeof(uint_16) * 7);
return; return;
} }
@ -245,8 +260,7 @@ void bit_allocate(uint_16 fscod, bsi_t *bsi, audblk_t *audblk)
ba_compute_bap(start, end, snroffset, psd, mask, audblk->fbw_bap[i]); ba_compute_bap(start, end, snroffset, psd, mask, audblk->fbw_bap[i]);
} }
if(audblk->cplinu) if(audblk->cplinu) {
{
start = audblk->cplstrtmant; start = audblk->cplstrtmant;
end = audblk->cplendmant; end = audblk->cplendmant;
fgain = fastgain[audblk->cplfgaincod]; fgain = fastgain[audblk->cplfgaincod];
@ -264,8 +278,7 @@ void bit_allocate(uint_16 fscod, bsi_t *bsi, audblk_t *audblk)
ba_compute_bap(start, end, snroffset, psd, mask, audblk->cpl_bap); ba_compute_bap(start, end, snroffset, psd, mask, audblk->cpl_bap);
} }
if(bsi->lfeon) if(bsi->lfeon) {
{
start = 0; start = 0;
end = 7; end = 7;
fgain = fastgain[audblk->lfefgaincod]; fgain = fastgain[audblk->lfefgaincod];
@ -285,15 +298,18 @@ void bit_allocate(uint_16 fscod, bsi_t *bsi, audblk_t *audblk)
} }
static void ba_compute_psd(sint_16 start, sint_16 end, sint_16 exps[], /**
sint_16 psd[], sint_16 bndpsd[]) *
**/
static void ba_compute_psd(int16_t start, int16_t end, int16_t exps[],
int16_t psd[], int16_t bndpsd[])
{ {
int bin,i,j,k; int bin,i,j,k;
sint_16 lastbin = 0; int16_t lastbin = 0;
/* Map the exponents into dBs */ /* Map the exponents into dBs */
for (bin=start; bin<end; bin++) for (bin=start; bin<end; bin++) {
{
psd[bin] = (3072 - (exps[bin] << 7)); psd[bin] = (3072 - (exps[bin] << 7));
} }
@ -301,14 +317,12 @@ static void ba_compute_psd(sint_16 start, sint_16 end, sint_16 exps[],
j = start; j = start;
k = masktab[start]; k = masktab[start];
do do {
{
lastbin = min(bndtab[k] + bndsz[k], end); lastbin = min(bndtab[k] + bndsz[k], end);
bndpsd[k] = psd[j]; bndpsd[k] = psd[j];
j++; j++;
for (i = j; i < lastbin; i++) for (i = j; i < lastbin; i++) {
{
bndpsd[k] = logadd(bndpsd[k], psd[j]); bndpsd[k] = logadd(bndpsd[k], psd[j]);
j++; j++;
} }
@ -317,49 +331,49 @@ static void ba_compute_psd(sint_16 start, sint_16 end, sint_16 exps[],
} while (end > lastbin); } while (end > lastbin);
} }
static void ba_compute_excitation(sint_16 start, sint_16 end,sint_16 fgain,
sint_16 fastleak, sint_16 slowleak, sint_16 is_lfe, sint_16 bndpsd[], /**
sint_16 excite[]) *
**/
static void ba_compute_excitation(int16_t start, int16_t end,int16_t fgain,
int16_t fastleak, int16_t slowleak, int16_t is_lfe, int16_t bndpsd[],
int16_t excite[])
{ {
int bin; int bin;
sint_16 bndstrt; int16_t bndstrt;
sint_16 bndend; int16_t bndend;
sint_16 lowcomp = 0; int16_t lowcomp = 0;
sint_16 begin = 0; int16_t begin = 0;
/* Compute excitation function */ /* Compute excitation function */
bndstrt = masktab[start]; bndstrt = masktab[start];
bndend = masktab[end - 1] + 1; bndend = masktab[end - 1] + 1;
if (bndstrt == 0) /* For fbw and lfe channels */ if (bndstrt == 0) { /* For fbw and lfe channels */
{
lowcomp = calc_lowcomp(lowcomp, bndpsd[0], bndpsd[1], 0); lowcomp = calc_lowcomp(lowcomp, bndpsd[0], bndpsd[1], 0);
excite[0] = bndpsd[0] - fgain - lowcomp; excite[0] = bndpsd[0] - fgain - lowcomp;
lowcomp = calc_lowcomp(lowcomp, bndpsd[1], bndpsd[2], 1); lowcomp = calc_lowcomp(lowcomp, bndpsd[1], bndpsd[2], 1);
excite[1] = bndpsd[1] - fgain - lowcomp; excite[1] = bndpsd[1] - fgain - lowcomp;
begin = 7 ; begin = 7 ;
/* Note: Do not call calc_lowcomp() for the last band of the lfe channel, (bin = 6) */ // Note: Do not call calc_lowcomp() for the last band of the lfe channel,(bin=6)
for (bin = 2; bin < 7; bin++) for (bin = 2; bin < 7; bin++) {
{
if (!(is_lfe && (bin == 6))) if (!(is_lfe && (bin == 6)))
lowcomp = calc_lowcomp(lowcomp, bndpsd[bin], bndpsd[bin+1], bin); lowcomp = calc_lowcomp(lowcomp, bndpsd[bin], bndpsd[bin+1], bin);
fastleak = bndpsd[bin] - fgain; fastleak = bndpsd[bin] - fgain;
slowleak = bndpsd[bin] - sgain; slowleak = bndpsd[bin] - sgain;
excite[bin] = fastleak - lowcomp; excite[bin] = fastleak - lowcomp;
if (!(is_lfe && (bin == 6))) if (!(is_lfe && (bin == 6))) {
{ if (bndpsd[bin] <= bndpsd[bin+1]) {
if (bndpsd[bin] <= bndpsd[bin+1])
{
begin = bin + 1 ; begin = bin + 1 ;
break; break;
} }
} }
} }
for (bin = begin; bin < min(bndend, 22); bin++) for (bin = begin; bin < min(bndend, 22); bin++) {
{
if (!(is_lfe && (bin == 6))) if (!(is_lfe && (bin == 6)))
lowcomp = calc_lowcomp(lowcomp, bndpsd[bin], bndpsd[bin+1], bin); lowcomp = calc_lowcomp(lowcomp, bndpsd[bin], bndpsd[bin+1], bin);
fastleak -= fdecay ; fastleak -= fdecay ;
@ -371,12 +385,9 @@ static void ba_compute_excitation(sint_16 start, sint_16 end,sint_16 fgain,
begin = 22; begin = 22;
} }
else /* For coupling channel */ else /* For coupling channel */
{
begin = bndstrt; begin = bndstrt;
}
for (bin = begin; bin < bndend; bin++) for (bin = begin; bin < bndend; bin++) {
{
fastleak -= fdecay; fastleak -= fdecay;
fastleak = max(fastleak, bndpsd[bin] - fgain); fastleak = max(fastleak, bndpsd[bin] - fgain);
slowleak -= sdecay; slowleak -= sdecay;
@ -385,49 +396,47 @@ static void ba_compute_excitation(sint_16 start, sint_16 end,sint_16 fgain,
} }
} }
static void ba_compute_mask(sint_16 start, sint_16 end, uint_16 fscod,
uint_16 deltbae, uint_16 deltnseg, uint_16 deltoffst[], uint_16 deltba[], /**
uint_16 deltlen[], sint_16 excite[], sint_16 mask[]) *
**/
static void ba_compute_mask(int16_t start, int16_t end, uint16_t fscod,
uint16_t deltbae, uint16_t deltnseg, uint16_t deltoffst[], uint16_t deltba[],
uint16_t deltlen[], int16_t excite[], int16_t mask[])
{ {
int bin,k; int bin,k;
sint_16 bndstrt; int16_t bndstrt;
sint_16 bndend; int16_t bndend;
sint_16 delta; int16_t delta;
bndstrt = masktab[start]; bndstrt = masktab[start];
bndend = masktab[end - 1] + 1; bndend = masktab[end - 1] + 1;
/* Compute the masking curve */ /* Compute the masking curve */
for (bin = bndstrt; bin < bndend; bin++) for (bin = bndstrt; bin < bndend; bin++) {
{ if (bndpsd[bin] < dbknee) {
if (bndpsd[bin] < dbknee)
{
excite[bin] += ((dbknee - bndpsd[bin]) >> 2); excite[bin] += ((dbknee - bndpsd[bin]) >> 2);
} }
mask[bin] = max(excite[bin], hth[fscod][bin]); mask[bin] = max(excite[bin], hth[fscod][bin]);
} }
/* Perform delta bit modulation if necessary */ /* Perform delta bit modulation if necessary */
if ((deltbae == DELTA_BIT_REUSE) || (deltbae == DELTA_BIT_NEW)) if ((deltbae == DELTA_BIT_REUSE) || (deltbae == DELTA_BIT_NEW)) {
{ int16_t band = 0;
sint_16 band = 0; int16_t seg = 0;
sint_16 seg = 0;
for (seg = 0; seg < deltnseg+1; seg++) for (seg = 0; seg < deltnseg+1; seg++) {
{
band += deltoffst[seg]; band += deltoffst[seg];
if (deltba[seg] >= 4)
{ if (deltba[seg] >= 4) {
delta = (deltba[seg] - 3) << 7; delta = (deltba[seg] - 3) << 7;
} } else {
else
{
delta = (deltba[seg] - 4) << 7; delta = (deltba[seg] - 4) << 7;
} }
for (k = 0; k < deltlen[seg]; k++) for (k = 0; k < deltlen[seg]; k++) {
{
mask[band] += delta; mask[band] += delta;
band++; band++;
} }
@ -435,19 +444,23 @@ static void ba_compute_mask(sint_16 start, sint_16 end, uint_16 fscod,
} }
} }
static void ba_compute_bap(sint_16 start, sint_16 end, sint_16 snroffset,
sint_16 psd[], sint_16 mask[], sint_16 bap[]) /**
*
**/
static void ba_compute_bap(int16_t start, int16_t end, int16_t snroffset,
int16_t psd[], int16_t mask[], int16_t bap[])
{ {
int i,j,k; int i,j,k;
sint_16 lastbin = 0; int16_t lastbin = 0;
sint_16 address = 0; int16_t address = 0;
/* Compute the bit allocation pointer for each bin */ /* Compute the bit allocation pointer for each bin */
i = start; i = start;
j = masktab[start]; j = masktab[start];
do do {
{
lastbin = min(bndtab[j] + bndsz[j], end); lastbin = min(bndtab[j] + bndsz[j], end);
mask[j] -= snroffset; mask[j] -= snroffset;
mask[j] -= floor; mask[j] -= floor;
@ -457,8 +470,7 @@ static void ba_compute_bap(sint_16 start, sint_16 end, sint_16 snroffset,
mask[j] &= 0x1fe0; mask[j] &= 0x1fe0;
mask[j] += floor; mask[j] += floor;
for (k = i; k < lastbin; k++) for (k = i; k < lastbin; k++) {
{
address = (psd[i] - mask[j]) >> 5; address = (psd[i] - mask[j]) >> 5;
address = min(63, max(0, address)); address = min(63, max(0, address));
bap[i] = baptab[address]; bap[i] = baptab[address];
@ -468,25 +480,25 @@ static void ba_compute_bap(sint_16 start, sint_16 end, sint_16 snroffset,
} while (end > lastbin); } while (end > lastbin);
} }
static sint_16
calc_lowcomp(sint_16 a,sint_16 b0,sint_16 b1,sint_16 bin) /**
*
**/
static int16_t calc_lowcomp (int16_t a, int16_t b0, int16_t b1, int16_t bin)
{ {
if (bin < 7) if (bin < 7) {
{
if ((b0 + 256) == b1) if ((b0 + 256) == b1)
a = 384; a = 384;
else if (b0 > b1) else if (b0 > b1)
a = max(0, a - 64); a = max(0, a - 64);
} } else if (bin < 20) {
else if (bin < 20)
{
if ((b0 + 256) == b1) if ((b0 + 256) == b1)
a = 320; a = 320;
else if (b0 > b1) else if (b0 > b1)
a = max(0, a - 64) ; a = max(0, a - 64) ;
} } else
else
a = max(0, a - 128); a = max(0, a - 128);
return(a); return(a);

View File

@ -21,4 +21,4 @@
* *
*/ */
void bit_allocate(uint_16 fscod, bsi_t *bsi, audblk_t *audblk); void bit_allocate(uint16_t fscod, bsi_t *bsi, audblk_t *audblk);

View File

@ -23,54 +23,57 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h> #include <stdio.h>
#include <inttypes.h>
#include <bswap.h>
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "ac3.h" #include "ac3.h"
#include "ac3_internal.h" #include "ac3_internal.h"
#include "bitstream.h" #include "bitstream.h"
uint_8 *buffer_start = 0; uint32_t bits_left = 0;
uint_32 bits_left = 0; uint64_t current_word;
uint_32 current_word; uint64_t *buffer_start = 0;
static inline void
bitstream_fill_current() static inline uint64_t getdword (void)
{ {
current_word = *((uint_32*)buffer_start)++; return be2me_64 (*buffer_start++);
current_word = swab32(current_word);
} }
//
// The fast paths for _get is in the
// bitstream.h header file so it can be inlined.
//
// The "bottom half" of this routine is suffixed _bh
//
// -ah
//
uint_32 static inline void bitstream_fill_current (void)
bitstream_get_bh(uint_32 num_bits)
{ {
uint_32 result; //current_word = bswap_64 (*buffer_start++);
current_word = getdword ();
}
uint32_t bitstream_get_bh (uint32_t num_bits)
{
uint32_t result;
num_bits -= bits_left; num_bits -= bits_left;
result = (current_word << (32 - bits_left)) >> (32 - bits_left); result = (current_word << (64 - bits_left)) >> (64 - bits_left);
bitstream_fill_current(); bitstream_fill_current();
if(num_bits != 0) if(num_bits != 0)
result = (result << num_bits) | (current_word >> (32 - num_bits)); result = (result << num_bits) | (current_word >> (64 - num_bits));
bits_left = 32 - num_bits; bits_left = 64 - num_bits;
return result; return result;
} }
void
bitstream_init(uint_8 *start) void bitstream_init (uint8_t *start)
{ {
//initialize the start of the buffer //initialize the start of the buffer
buffer_start = start; buffer_start = (uint64_t *) start;
bits_left = 0; bits_left = 0;
} }

View File

@ -21,56 +21,24 @@
* *
*/ */
//My new and improved vego-matic endian swapping routine
//(stolen from the kernel)
#ifdef WORDS_BIGENDIAN
# define swab32(x) (x) #include <inttypes.h>
#else extern uint32_t bits_left;
extern uint64_t current_word;
# if defined (__i386__) void bitstream_init(uint8_t *start);
inline uint32_t bitstream_get_bh(uint32_t num_bits);
# define swab32(x) __i386_swab32(x) static inline uint32_t bitstream_get (uint32_t num_bits)
static inline const uint_32 __i386_swab32(uint_32 x)
{
__asm__("bswap %0" : "=r" (x) : "0" (x));
return x;
}
# else
# define swab32(x)\
((((uint_8*)&x)[0] << 24) | (((uint_8*)&x)[1] << 16) | \
(((uint_8*)&x)[2] << 8) | (((uint_8*)&x)[3]))
# endif
#endif
extern uint_32 bits_left;
extern uint_32 current_word;
void bitstream_init(uint_8 *start);
uint_8 bitstream_get_byte(void);
uint_8 *bitstream_get_buffer_start(void);
void bitstream_buffer_frame(uint_32 frame_size);
uint_32 bitstream_get_bh(uint_32 num_bits);
static inline uint_32
bitstream_get(uint_32 num_bits)
{ {
uint_32 result; uint32_t result;
if(num_bits < bits_left) if (num_bits < bits_left) {
{ result = (current_word << (64 - bits_left)) >> (64 - num_bits);
result = (current_word << (32 - bits_left)) >> (32 - num_bits);
bits_left -= num_bits; bits_left -= num_bits;
return result; return result;
} }
return bitstream_get_bh(num_bits); return bitstream_get_bh (num_bits);
} }

80
ac3dec/bswap.h Normal file
View File

@ -0,0 +1,80 @@
/*****
*
* This file is part of the OMS program.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*****/
#ifndef __BSWAP_H__
#define __BSWAP_H__
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifdef HAVE_BYTESWAP_H
#include <byteswap.h>
#else
#include <inttypes.h>
#ifdef WORDS_BIGENDIAN
// FIXME these need to actually swap ;)
#define bswap_16(x) (x)
#define bswap_32(x) (x)
#define bswap_64(x) (x)
#else
// This is wrong, 'cannot take address of ...'
#define bswap_16(x) ((((uint8_t*)&x)[2] << 8) \
| (((uint8_t*)&x)[3]))
// code from bits/byteswap.h (C) 1997, 1998 Free Software Foundation, Inc.
#define bswap_32(x) \
((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \
(((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24))
#define bswap_64(x) \
(__extension__ \
({ union { __extension__ unsigned long long int __ll; \
unsigned long int __l[2]; } __w, __r; \
__w.__ll = (x); \
__r.__l[0] = bswap_32 (__w.__l[1]); \
__r.__l[1] = bswap_32 (__w.__l[0]); \
__r.__ll; }))
#endif
#endif
// be2me ... BigEndian to MachineEndian
// le2me ... LittleEndian to MachineEndian
#ifdef WORDS_BIGENDIAN
#define be2me_16(x) (x)
#define be2me_32(x) (x)
#define be2me_64(x) (x)
#define le2me_16(x) bswap_16(x)
#define le2me_32(x) bswap_32(x)
#define le2me_64(x) bswap_64(x)
#else
#define be2me_16(x) bswap_16(x)
#define be2me_32(x) bswap_32(x)
#define be2me_64(x) bswap_64(x)
#define le2me_16(x) (x)
#define le2me_32(x) (x)
#define le2me_64(x) (x)
#endif
#endif

29
ac3dec/cmplx.h Normal file
View File

@ -0,0 +1,29 @@
/*****
*
* This file is part of the OMS program.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*****/
#ifndef __COMPLEX_H__
#define __COMPLEX_H__
typedef struct complex {
float re;
float im;
} complex_t;
#endif

View File

@ -28,51 +28,183 @@
#include "ac3_internal.h" #include "ac3_internal.h"
#include "decode.h"
#include "bitstream.h" #include "bitstream.h"
#include "dither.h" #include "dither.h"
#include "coeff.h" #include "coeff.h"
// //
//Lookup tables of 0.15 two's complement quantization values //Lookup tables of 0.15 two's complement quantization values
// //
static const uint_16 q_1[3] = #define Q0 ((-2 << 15) / 3.0)
#define Q1 (0)
#define Q2 ((2 << 15) / 3.0)
static const float q_1_0[ 32 ] =
{ {
( -2 << 15)/3, 0,( 2 << 15)/3 Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,
Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,
Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,
0,0,0,0,0
}; };
static const uint_16 q_2[5] = static const float q_1_1[ 32 ] =
{ {
( -4 << 15)/5,( -2 << 15)/5, 0, Q0,Q0,Q0,Q1,Q1,Q1,Q2,Q2,Q2,
( 2 << 15)/5,( 4 << 15)/5 Q0,Q0,Q0,Q1,Q1,Q1,Q2,Q2,Q2,
Q0,Q0,Q0,Q1,Q1,Q1,Q2,Q2,Q2,
0,0,0,0,0
}; };
static const uint_16 q_3[7] = static const float q_1_2[ 32 ] =
{ {
( -6 << 15)/7,( -4 << 15)/7,( -2 << 15)/7, 0, Q0,Q1,Q2,Q0,Q1,Q2,Q0,Q1,Q2,
( 2 << 15)/7,( 4 << 15)/7,( 6 << 15)/7 Q0,Q1,Q2,Q0,Q1,Q2,Q0,Q1,Q2,
Q0,Q1,Q2,Q0,Q1,Q2,Q0,Q1,Q2,
0,0,0,0,0
}; };
static const uint_16 q_4[11] = #undef Q0
#undef Q1
#undef Q2
#define Q0 ((-4 << 15) / 5.0)
#define Q1 ((-2 << 15) / 5.0)
#define Q2 (0)
#define Q3 ((2 << 15) / 5.0)
#define Q4 ((4 << 15) / 5.0)
static const float q_2_0[ 128 ] =
{ {
(-10 << 15)/11,(-8 << 15)/11,(-6 << 15)/11, ( -4 << 15)/11,(-2 << 15)/11, 0, Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,
( 2 << 15)/11,( 4 << 15)/11,( 6 << 15)/11, ( 8 << 15)/11,(10 << 15)/11 Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,
Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,
Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,
Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,
Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,
Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,
Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,
Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,
Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,
0,0,0
}; };
static const uint_16 q_5[15] = static const float q_2_1[ 128 ] =
{ {
(-14 << 15)/15,(-12 << 15)/15,(-10 << 15)/15, Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,
( -8 << 15)/15,( -6 << 15)/15,( -4 << 15)/15, Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,
( -2 << 15)/15, 0 ,( 2 << 15)/15, Q4,Q4,Q4,Q4,Q4,Q0,Q0,Q0,Q0,Q0,
( 4 << 15)/15,( 6 << 15)/15,( 8 << 15)/15, Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,
( 10 << 15)/15,( 12 << 15)/15,( 14 << 15)/15 Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,
Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,
Q4,Q4,Q4,Q4,Q4,Q0,Q0,Q0,Q0,Q0,
Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,
Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,
Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,
Q4,Q4,Q4,Q4,Q4,0,0,0
};
static const float q_2_2[ 128 ] =
{
Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
Q0,Q1,Q2,Q3,Q4,0,0,0
};
#undef Q0
#undef Q1
#undef Q2
#undef Q3
#undef Q4
static const float q_3[7] =
{
(-6 << 15)/7.0, (-4 << 15)/7.0, (-2 << 15)/7.0, 0.0,
( 2 << 15)/7.0, ( 4 << 15)/7.0, ( 6 << 15)/7.0
};
#define Q0 ((-10 << 15) / 11.0)
#define Q1 ((-8 << 15) / 11.0)
#define Q2 ((-6 << 15) / 11.0)
#define Q3 ((-4 << 15) / 11.0)
#define Q4 ((-2 << 15) / 11.0)
#define Q5 (0)
#define Q6 ((2 << 15) / 11.0)
#define Q7 ((4 << 15) / 11.0)
#define Q8 ((6 << 15) / 11.0)
#define Q9 ((8 << 15) / 11.0)
#define QA ((10 << 15) / 11.0)
static const float q_4_0[ 128 ] =
{
Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0,
Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1,
Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2,
Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3,
Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4,
Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5,
Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6,
Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7,
Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8,
Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9,
QA, QA, QA, QA, QA, QA, QA, QA, QA, QA, QA,
0, 0, 0, 0, 0, 0, 0
};
static const float q_4_1[ 128 ] =
{
Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
0, 0, 0, 0, 0, 0, 0
};
#undef Q0
#undef Q1
#undef Q2
#undef Q3
#undef Q4
#undef Q5
#undef Q6
#undef Q7
#undef Q8
#undef Q9
#undef QA
static const float q_5[15] =
{
(-14 << 15)/15.0,(-12 << 15)/15.0,(-10 << 15)/15.0,
( -8 << 15)/15.0,( -6 << 15)/15.0,( -4 << 15)/15.0,
( -2 << 15)/15.0, 0.0 ,( 2 << 15)/15.0,
( 4 << 15)/15.0,( 6 << 15)/15.0,( 8 << 15)/15.0,
( 10 << 15)/15.0,( 12 << 15)/15.0,( 14 << 15)/15.0
}; };
// //
// Scale factors for convert_to_float // Scale factors for convert_to_float
// //
static const uint_32 u32_scale_factors[25] = static const uint32_t u32_scale_factors[25] =
{ {
0x38000000, //2 ^ -(0 + 15) 0x38000000, //2 ^ -(0 + 15)
0x37800000, //2 ^ -(1 + 15) 0x37800000, //2 ^ -(1 + 15)
@ -104,230 +236,179 @@ static const uint_32 u32_scale_factors[25] =
static float *scale_factor = (float*)u32_scale_factors; static float *scale_factor = (float*)u32_scale_factors;
//These store the persistent state of the packed mantissas //These store the persistent state of the packed mantissas
static uint_16 m_1[3]; static float q_1[2];
static uint_16 m_2[3]; static float q_2[2];
static uint_16 m_4[2]; static float q_4[1];
static uint_16 m_1_pointer; static int32_t q_1_pointer;
static uint_16 m_2_pointer; static int32_t q_2_pointer;
static uint_16 m_4_pointer; static int32_t q_4_pointer;
static float __inline__
coeff_get_float(uint16_t bap, uint16_t dithflag, uint16_t exp);
//Conversion from bap to number of bits in the mantissas //Conversion from bap to number of bits in the mantissas
//zeros account for cases 0,1,2,4 which are special cased //zeros account for cases 0,1,2,4 which are special cased
static uint_16 qnttztab[16] = { 0, 0, 0, 3, 0 , 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16}; static uint16_t qnttztab[16] =
static void coeff_reset(void);
static sint_16 coeff_get_mantissa(uint_16 bap, uint_16 dithflag);
static void coeff_uncouple_ch(float samples[],bsi_t *bsi,audblk_t *audblk,uint_32 ch);
//
// Convert a 0.15 fixed point number into IEEE single
// precision floating point and scale by 2^-exp
//
static inline float
convert_to_float(uint_16 exp, sint_16 mantissa)
{ {
float x; 0, 0, 0, 3,
0, 4, 5, 6,
7, 8, 9, 10,
11, 12, 14, 16
};
//the scale by 2^-15 is built into the scale factor table static void coeff_reset(void);
x = mantissa * scale_factor[exp]; static float coeff_get_float(uint16_t bap, uint16_t dithflag, uint16_t exp);
static void coeff_uncouple_ch(float samples[],bsi_t *bsi,audblk_t *audblk,uint32_t ch);
return x; void coeff_unpack(bsi_t *bsi, audblk_t *audblk, stream_samples_t samples)
}
void
coeff_unpack(bsi_t *bsi, audblk_t *audblk, stream_samples_t samples)
{ {
uint_16 i,j; uint16_t i,j;
uint_32 done_cpl = 0; uint32_t done_cpl = 0;
sint_16 mantissa;
coeff_reset(); coeff_reset();
for(i=0; i< bsi->nfchans; i++) for(i=0; i< bsi->nfchans; i++) {
{
for(j=0; j < audblk->endmant[i]; j++) for(j=0; j < audblk->endmant[i]; j++)
{ samples[i][j] = coeff_get_float(audblk->fbw_bap[i][j], audblk->dithflag[i], audblk->fbw_exp[i][j]);
mantissa = coeff_get_mantissa(audblk->fbw_bap[i][j],audblk->dithflag[i]);
samples[i][j] = convert_to_float(audblk->fbw_exp[i][j],mantissa);
}
if(audblk->cplinu && audblk->chincpl[i] && !(done_cpl)) if(audblk->cplinu && audblk->chincpl[i] && !(done_cpl)) {
{
// ncplmant is equal to 12 * ncplsubnd // ncplmant is equal to 12 * ncplsubnd
// Don't dither coupling channel until channel separation so that // Don't dither coupling channel until channel
// interchannel noise is uncorrelated // separation so that interchannel noise is uncorrelated
for(j=audblk->cplstrtmant; j < audblk->cplendmant; j++) for(j=audblk->cplstrtmant; j < audblk->cplendmant; j++)
audblk->cplmant[j] = coeff_get_mantissa(audblk->cpl_bap[j],0); audblk->cpl_flt[j] = coeff_get_float(audblk->cpl_bap[j],0, audblk->cpl_exp[j]);
done_cpl = 1; done_cpl = 1;
} }
} }
//uncouple the channel if necessary //uncouple the channel if necessary
if(audblk->cplinu) if(audblk->cplinu) {
{ for(i=0; i< bsi->nfchans; i++) {
for(i=0; i< bsi->nfchans; i++)
{
if(audblk->chincpl[i]) if(audblk->chincpl[i])
coeff_uncouple_ch(samples[i],bsi,audblk,i); coeff_uncouple_ch(samples[i],bsi,audblk,i);
} }
} }
if(bsi->lfeon) if(bsi->lfeon) {
{
// There are always 7 mantissas for lfe, no dither for lfe // There are always 7 mantissas for lfe, no dither for lfe
for(j=0; j < 7 ; j++) for(j=0; j < 7 ; j++)
{ samples[5][j] = coeff_get_float(audblk->lfe_bap[j], 0, audblk->lfe_exp[j]);
mantissa = coeff_get_mantissa(audblk->lfe_bap[j],0);
samples[5][j] = convert_to_float(audblk->lfe_exp[j],mantissa);
}
} }
} }
//
//Fetch a mantissa from the bitstream /**
// * Fetch a float from the bitstream
//The mantissa returned is a signed 0.15 fixed point number **/
//
static sint_16 static float inline coeff_get_float (uint16_t bap, uint16_t dithflag, uint16_t exp)
coeff_get_mantissa(uint_16 bap, uint_16 dithflag)
{ {
uint_16 mantissa; uint16_t dummy = 0;
uint_16 group_code;
//If the bap is 0-5 then we have special cases to take care of //If the bap is 0-5 then we have special cases to take care of
switch(bap) switch(bap) {
{
case 0: case 0:
if(dithflag) if(dithflag)
mantissa = dither_gen(); return (dither_gen() * scale_factor[exp]);
else
mantissa = 0; return 0.0;
break;
case 1: case 1:
if(m_1_pointer > 2) if (q_1_pointer >= 0)
{ return(q_1[q_1_pointer--] * scale_factor[exp]);
group_code = bitstream_get(5);
if(group_code > 26) if ((dummy = bitstream_get (5)) > 26)
goto error; goto error;
q_1[1] = q_1_1[dummy];
q_1[0] = q_1_2[dummy];
q_1_pointer = 1;
return (q_1_0[dummy] * scale_factor[exp]);
m_1[0] = group_code / 9;
m_1[1] = (group_code % 9) / 3;
m_1[2] = (group_code % 9) % 3;
m_1_pointer = 0;
}
mantissa = m_1[m_1_pointer++];
mantissa = q_1[mantissa];
break;
case 2: case 2:
if(q_2_pointer >= 0)
return (q_2[q_2_pointer--] * scale_factor[exp]);
if(m_2_pointer > 2) if ((dummy = bitstream_get (7)) > 124)
{ goto error;
group_code = bitstream_get(7);
if(group_code > 124) q_2[1] = q_2_1[dummy];
goto error; q_2[0] = q_2_2[dummy];
q_2_pointer = 1;
m_2[0] = group_code / 25; return (q_2_0[dummy] * scale_factor[exp]);
m_2[1] = (group_code % 25) / 5 ;
m_2[2] = (group_code % 25) % 5 ;
m_2_pointer = 0;
}
mantissa = m_2[m_2_pointer++];
mantissa = q_2[mantissa];
break;
case 3: case 3:
mantissa = bitstream_get(3); if ((dummy = bitstream_get (3)) > 6)
if(mantissa > 6)
goto error; goto error;
mantissa = q_3[mantissa]; return (q_3[dummy] * scale_factor[exp]);
break;
case 4: case 4:
if(m_4_pointer > 1) if(q_4_pointer >= 0)
{ return (q_4[q_4_pointer--] * scale_factor[exp]);
group_code = bitstream_get(7);
if(group_code > 120) if ((dummy = bitstream_get (7)) > 120)
goto error;
m_4[0] = group_code / 11;
m_4[1] = group_code % 11;
m_4_pointer = 0;
}
mantissa = m_4[m_4_pointer++];
mantissa = q_4[mantissa];
break;
case 5:
mantissa = bitstream_get(4);
if(mantissa > 14)
goto error; goto error;
mantissa = q_5[mantissa]; q_4[0] = q_4_1[dummy];
break; q_4_pointer = 0;
return (q_4_0[dummy] * scale_factor[exp]);
case 5:
if ((dummy = bitstream_get (4)) > 14)
goto error;
return (q_5[dummy] * scale_factor[exp]);
default: default:
mantissa = bitstream_get(qnttztab[bap]); dummy = bitstream_get(qnttztab[bap]);
mantissa <<= 16 - qnttztab[bap]; dummy <<= 16 - qnttztab[bap];
return ((int16_t)dummy * scale_factor[exp]);
} }
return mantissa;
error: error:
if(!error_flag) #ifdef DEBUG
fprintf(stderr,"** Invalid mantissa - skipping frame **\n"); fprintf(stderr,"** Invalid mantissa - skipping frame **\n");
error_flag = 1; #endif
HANDLE_ERROR();
return 0;
} }
//
// Reset the mantissa state /**
// * Reset the mantissa state
static void **/
coeff_reset(void)
static void coeff_reset(void)
{ {
m_1[2] = m_1[1] = m_1[0] = 0; q_1_pointer = q_2_pointer = q_4_pointer = -1;
m_2[2] = m_2[1] = m_2[0] = 0;
m_4[1] = m_4[0] = 0;
m_1_pointer = m_2_pointer = m_4_pointer = 3;
} }
//
// Uncouple the coupling channel into a fbw channel /**
// * Uncouple the coupling channel into a fbw channel
static void **/
coeff_uncouple_ch(float samples[],bsi_t *bsi,audblk_t *audblk,uint_32 ch)
static void coeff_uncouple_ch (float samples[],bsi_t *bsi,audblk_t *audblk,uint32_t ch)
{ {
uint_32 bnd = 0; uint32_t bnd = 0;
uint_32 sub_bnd = 0; uint32_t sub_bnd = 0;
uint_32 i,j; uint32_t i,j;
float cpl_coord = 1.0; float cpl_coord = 1.0;
uint_32 cpl_exp_tmp; uint32_t cpl_exp_tmp;
uint_32 cpl_mant_tmp; uint32_t cpl_mant_tmp;
sint_16 mantissa;
for (i=audblk->cplstrtmant;i<audblk->cplendmant;) {
for(i=audblk->cplstrtmant;i<audblk->cplendmant;) if (!audblk->cplbndstrc[sub_bnd++]) {
{
if(!audblk->cplbndstrc[sub_bnd++])
{
cpl_exp_tmp = audblk->cplcoexp[ch][bnd] + 3 * audblk->mstrcplco[ch]; cpl_exp_tmp = audblk->cplcoexp[ch][bnd] + 3 * audblk->mstrcplco[ch];
if(audblk->cplcoexp[ch][bnd] == 15) if (audblk->cplcoexp[ch][bnd] == 15)
cpl_mant_tmp = (audblk->cplcomant[ch][bnd]) << 11; cpl_mant_tmp = (audblk->cplcomant[ch][bnd]) << 11;
else else
cpl_mant_tmp = ((0x10) | audblk->cplcomant[ch][bnd]) << 10; cpl_mant_tmp = ((0x10) | audblk->cplcomant[ch][bnd]) << 10;
cpl_coord = convert_to_float(cpl_exp_tmp,cpl_mant_tmp) * 8.0f; cpl_coord = (cpl_mant_tmp * scale_factor[cpl_exp_tmp]) * 8.0f;
//Invert the phase for the right channel if necessary //Invert the phase for the right channel if necessary
if(bsi->acmod == 0x2 && audblk->phsflginu && ch == 1 && audblk->phsflg[bnd]) if(bsi->acmod == 0x2 && audblk->phsflginu && ch == 1 && audblk->phsflg[bnd])
@ -336,16 +417,13 @@ coeff_uncouple_ch(float samples[],bsi_t *bsi,audblk_t *audblk,uint_32 ch)
bnd++; bnd++;
} }
for(j=0;j < 12; j++) for(j=0;j < 12; j++) {
{ // Get new dither values for each channel if necessary,
//Get new dither values for each channel if necessary, so // so the channels are uncorrelated
//the channels are uncorrelated if(audblk->dithflag[ch] && !audblk->cpl_bap[i])
if(audblk->dithflag[ch] && audblk->cpl_bap[i] == 0) samples[i] = cpl_coord * (dither_gen() * scale_factor[audblk->cpl_exp[i]]);
mantissa = dither_gen();
else else
mantissa = audblk->cplmant[i]; samples[i] = cpl_coord * audblk->cpl_flt[i];
samples[i] = cpl_coord * convert_to_float(audblk->cpl_exp[i],mantissa);
i++; i++;
} }

129
ac3dec/cpu_accel.c Normal file
View File

@ -0,0 +1,129 @@
/*****
*
* This file is part of the OMS program.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*****/
#include <inttypes.h>
#include "mm_accel.h"
//#ifdef __i386__
#if 0
#ifdef __PIC__
#define cpuid(op, eax, ebx, ecx, edx) \
__asm__ __volatile__ ("pushl %%ebx\n\t" \
"cpuid\n\t" \
"movl %%ebx, %%esi\n\t" \
"popl %%ebx" \
: "=a" (eax), \
"=S" (ebx), \
"=c" (ecx), \
"=d" (edx) \
: "a" (op) \
: "cc")
#else
#define cpuid(op, eax, ebx, ecx, edx) \
__asm__("cpuid" \
: "=a" (eax), \
"=b" (ebx), \
"=c" (ecx), \
"=d" (edx) \
: "a" (op) \
: "cc")
#endif
static inline int has_cpuid ()
{
return 1;
/*
uint32_t eax, ebx;
asm ("pushfl\n\t"
"popl %0\n\t"
"movl %0,%1\n\t"
"xorl $0x200000,%0\n\t"
"pushl %0\n\t"
"popfl\n\t"
"pushfl\n\t"
"popl %0"
: "=a" (eax),
"=b" (ebx)
:
: "cc");
return (eax != ebx);
*/
}
static uint32_t x86_accel (void)
{
uint32_t eax, ebx, ecx, edx;
int AMD;
uint32_t caps;
if (!has_cpuid ()) // no cpuid
return 0;
cpuid (0x00000000, eax, ebx, ecx, edx);
if (!eax) // vendor string only
return 0;
AMD = (ebx == 0x68747541) && (ecx == 0x444d4163) && (edx == 0x69746e65);
cpuid (0x00000001, eax, ebx, ecx, edx);
if (! (edx & 0x00800000)) // no MMX
return 0;
caps = OMS_ACCEL_X86_MMX;
if (edx & 0x02000000) // SSE - identical to AMD MMX extensions
caps = OMS_ACCEL_X86_MMX | OMS_ACCEL_X86_MMXEXT;
cpuid (0x80000000, eax, ebx, ecx, edx);
if (eax < 0x80000001) // no extended capabilities
return caps;
cpuid (0x80000001, eax, ebx, ecx, edx);
if (edx & 0x80000000)
caps |= OMS_ACCEL_X86_3DNOW;
if (AMD && (edx & 0x00400000)) // AMD MMX extensions
caps |= OMS_ACCEL_X86_MMXEXT;
return caps;
}
#endif
uint32_t mm_accel (void)
{
//#ifdef __i386__
#if 0
static int got_accel = 0;
static uint32_t accel;
if (!got_accel) {
got_accel = 1;
accel = x86_accel ();
}
return accel;
#else
return 0;
#endif
}

View File

@ -30,7 +30,7 @@
#include "crc.h" #include "crc.h"
static const uint_16 crc_lut[256] = static const uint16_t crc_lut[256] =
{ {
0x0000,0x8005,0x800f,0x000a,0x801b,0x001e,0x0014,0x8011, 0x0000,0x8005,0x800f,0x000a,0x801b,0x001e,0x0014,0x8011,
0x8033,0x0036,0x003c,0x8039,0x0028,0x802d,0x8027,0x0022, 0x8033,0x0036,0x003c,0x8039,0x0028,0x802d,0x8027,0x0022,
@ -66,31 +66,31 @@ static const uint_16 crc_lut[256] =
0x8213,0x0216,0x021c,0x8219,0x0208,0x820d,0x8207,0x0202 0x8213,0x0216,0x021c,0x8219,0x0208,0x820d,0x8207,0x0202
}; };
static uint_16 state; static uint16_t state;
void
crc_init(void) void crc_init(void)
{ {
state = 0; state = 0;
} }
inline void crc_process_byte(uint_8 data) inline void crc_process_byte (uint8_t data)
{ {
state = crc_lut[data ^ (state>>8)] ^ (state<<8); state = crc_lut[data ^ (state>>8)] ^ (state<<8);
} }
void
crc_process_frame(uint_8 *data,uint_32 num_bytes)
{
uint_32 i;
for(i=0;i<num_bytes;i++) void crc_process_frame (uint8_t *data,uint32_t num_bytes)
crc_process_byte(data[i]); {
uint32_t i;
for(i=0; i<num_bytes; i++)
crc_process_byte (data[i]);
} }
int
crc_validate(void) int crc_validate(void)
{ {
return(state == 0); return (state == 0);
} }

View File

@ -23,5 +23,5 @@
int crc_validate(void); int crc_validate(void);
void crc_init(void); void crc_init(void);
void crc_process_byte(uint_8 data); void crc_process_byte(uint8_t data);
void crc_process_frame(uint_8 *data,uint_32 num_bytes); void crc_process_frame(uint8_t *data,uint32_t num_bytes);

View File

@ -31,18 +31,8 @@ static int debug_level = -1;
// We could potentially have multiple levels of debug info // We could potentially have multiple levels of debug info
int debug_is_on(void) int debug_is_on(void)
{ {
char *env_var; if(debug_level < 0) {
debug_level = getenv ("AC3_DEBUG") ? 1 : 0;
if(debug_level < 0)
{
env_var = getenv("AC3_DEBUG");
if (env_var)
{
debug_level = 1;
}
else
debug_level = 0;
} }
return debug_level; return debug_level;
@ -52,7 +42,7 @@ int debug_is_on(void)
#ifndef __GNUC__ #ifndef __GNUC__
void dprintf(char fmt[],...) void dprintf(char fmt[],...)
{ {
int foo = 0; return;
} }
#endif #endif

View File

@ -22,16 +22,20 @@
* *
*/ */
int debug_is_on(void); int debug_is_on (void);
#ifdef __GNUC__ #ifdef __GNUC__
#define dprintf(format,args...)\ #ifdef DEBUG
#define dprintf(args...)\
{\ {\
if (debug_is_on())\ if (debug_is_on())\
{\ {\
fprintf(stderr,format,## args);\ fprintf(stderr, args);\
}\ }\
} }
#else #else
#define dprintf(args...) { };
#endif
#else
void dprintf(char fmt[],...); void dprintf(char fmt[],...);
#endif #endif

View File

@ -3,9 +3,6 @@
* *
* Copyright (C) Aaron Holtzman - May 1999 * Copyright (C) Aaron Holtzman - May 1999
* *
* Added support for DVB-s PCI card by:
* Matjaz Thaler <matjaz.thaler@rd.iskraemeco.si> - November 2000
*
* This file is part of ac3dec, a free Dolby AC-3 stream decoder. * This file is part of ac3dec, a free Dolby AC-3 stream decoder.
* *
* ac3dec is free software; you can redistribute it and/or modify * ac3dec is free software; you can redistribute it and/or modify
@ -22,6 +19,17 @@
* along with GNU Make; see the file COPYING. If not, write to * along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* *
*------------------------------------------------------------
*
* Thomas Mirlacher <dent@cosy.sbg.ac.at>
* added OMS support
* 11 Jan 2001
* Thomas Mirlacher <dent@cosy.sbg.ac.at>
* faster error response using jmp functions
*
* 9 Aug 2001
* Matjaz Thaler <matjaz.thaler@rd.iskraemeco.si>
* Added support for DVB-s PCI card
* *
*/ */
@ -34,14 +42,17 @@
#include <errno.h> #include <errno.h>
#include <string.h> #include <string.h>
#include <sys/time.h> #include <sys/time.h>
#include <unistd.h>
#include <sys/types.h> #ifdef __OMS__
#include <sys/stat.h> #include <oms/oms.h>
#include <fcntl.h> #include <oms/plugin/output_audio.h>
#endif
#include "ac3.h" #include "ac3.h"
#include "ac3_internal.h" #include "ac3_internal.h"
#include "bitstream.h" #include "bitstream.h"
#include "downmix.h"
#include "srfft.h"
#include "imdct.h" #include "imdct.h"
#include "exponent.h" #include "exponent.h"
#include "coeff.h" #include "coeff.h"
@ -51,96 +62,91 @@
#include "stats.h" #include "stats.h"
#include "rematrix.h" #include "rematrix.h"
#include "sanity_check.h" #include "sanity_check.h"
#include "downmix.h"
#include "debug.h" #include "debug.h"
#ifndef __OMS__
#define AC3_BUFFER_SIZE (6*1024*16) //#include "audio_out.h"
#endif
//our global config structure //our global config structure
ac3_config_t ac3_config; ac3_config_t ac3_config;
uint_32 error_flag = 0;
static audblk_t audblk; static audblk_t audblk;
static bsi_t bsi; static bsi_t bsi;
static syncinfo_t syncinfo; static syncinfo_t syncinfo;
static uint_32 frame_count = 0; #ifndef __OMS__
//static uint_32 is_output_initialized = 0; static uint32_t done_banner;
#endif
static uint32_t is_output_initialized = 0;
//the floating point samples for one audblk //the floating point samples for one audblk
static stream_samples_t samples; static stream_samples_t samples;
//the integer samples for the entire frame (with enough space for 2 ch out) //the integer samples for the entire frame (with enough space for 2 ch out)
//if this size change, be sure to change the size when muting //if this size change, be sure to change the size when muting
static sint_16 s16_samples[2 * 6 * 256]; static int16_t s16_samples[2 * 6 * 256] __attribute__ ((aligned(16)));
// downmix stuff
static float cmixlev_lut[4] = { 0.707, 0.595, 0.500, 0.707 };
static float smixlev_lut[4] = { 0.707, 0.500, 0.0 , 0.500 };
static dm_par_t dm_par;
//Storage for the syncframe //Storage for the syncframe
#define SYNC_BUFFER_MAX_SIZE 4096 #define BUFFER_MAX_SIZE 4096
static uint_8 sync_buffer[SYNC_BUFFER_MAX_SIZE]; static uint8_t buffer[BUFFER_MAX_SIZE];
static uint_32 sync_buffer_size = 0;; static uint32_t buffer_size = 0;;
// for error handling
jmp_buf error_jmp_mark;
uint_32 static uint32_t decode_buffer_syncframe (syncinfo_t *syncinfo, uint8_t **start, uint8_t *end)
decode_sync_buffer_syncframe(syncinfo_t *syncinfo, uint_8 **start,uint_8 *end)
{ {
uint_8 *cur = *start; uint8_t *cur = *start;
uint_16 syncword = syncinfo->syncword; uint16_t syncword = syncinfo->syncword;
uint_32 ret = 0; uint32_t ret = 0;
//
// Find an ac3 sync frame. // Find an ac3 sync frame.
// while (syncword != 0x0b77) {
resync: if (cur >= end)
while(syncword != 0x0b77)
{
if(cur >= end)
goto done; goto done;
syncword = (syncword << 8) + *cur++; syncword = (syncword << 8) + *cur++;
} }
//need the next 3 bytes to decide how big the frame is //need the next 3 bytes to decide how big the frame is
while(sync_buffer_size < 3) while (buffer_size < 3) {
{
if(cur >= end) if(cur >= end)
goto done; goto done;
sync_buffer[sync_buffer_size++] = *cur++; buffer[buffer_size++] = *cur++;
} }
parse_syncinfo(syncinfo,sync_buffer); parse_syncinfo (syncinfo,buffer);
stats_print_syncinfo(syncinfo); stats_print_syncinfo (syncinfo);
while(sync_buffer_size < syncinfo->frame_size * 2 - 2) while (buffer_size < syncinfo->frame_size * 2 - 2) {
{
if(cur >= end) if(cur >= end)
goto done; goto done;
sync_buffer[sync_buffer_size++] = *cur++; buffer[buffer_size++] = *cur++;
} }
#if 0
// Check the crc over the entire frame // Check the crc over the entire frame
crc_init(); crc_init();
crc_process_frame(sync_buffer,syncinfo->frame_size * 2 - 2); crc_process_frame (buffer, syncinfo->frame_size * 2 - 2);
if(!crc_validate()) if (!crc_validate()) {
{
fprintf(stderr,"** CRC failed - skipping frame **\n"); fprintf(stderr,"** CRC failed - skipping frame **\n");
syncword = 0xffff; goto done;
sync_buffer_size = 0;
goto resync;
} }
#endif
//
//if we got to this point, we found a valid ac3 frame to decode //if we got to this point, we found a valid ac3 frame to decode
//
bitstream_init(sync_buffer); bitstream_init (buffer);
//get rid of the syncinfo struct as we already parsed it //get rid of the syncinfo struct as we already parsed it
bitstream_get(24); bitstream_get (24);
//reset the syncword for next time //reset the syncword for next time
syncword = 0xffff; syncword = 0xffff;
sync_buffer_size = 0; buffer_size = 0;
ret = 1; ret = 1;
done: done:
@ -149,100 +155,131 @@ done:
return ret; return ret;
} }
void
decode_mute(void) void inline decode_mute (void)
{ {
fprintf(stderr,"muting frame\n");
//mute the frame //mute the frame
memset(s16_samples,0,sizeof(sint_16) * 256 * 2 * 6); memset (s16_samples, 0, sizeof(int16_t) * 256 * 2 * 6);
error_flag = 0;
} }
void void ac3dec_init (void)
ac3_init(ac3_config_t *config)
{ {
memcpy(&ac3_config,config,sizeof(ac3_config_t)); // FIXME - don't do that statically here
ac3_config.num_output_ch = 2;
ac3_config.flags = 0;
imdct_init(); imdct_init ();
sanity_check_init(&syncinfo,&bsi,&audblk); downmix_init ();
memset (&syncinfo, 0, sizeof (syncinfo));
// ac3_output = *foo; memset (&bsi, 0, sizeof (bsi));
memset (&audblk, 0, sizeof (audblk));
sanity_check_init (&syncinfo,&bsi,&audblk);
} }
uint_32 ac3_decode_data(uint_8 *data_start,uint_8 *data_end, int ac3reset, int *input_pointer, int *output_pointer, char *ac3_data) #ifdef __OMS__
size_t ac3dec_decode_data (plugin_output_audio_t *output, uint8_t *data_start, uint8_t *data_end)
#else
size_t ac3dec_decode_data (uint8_t *data_start ,uint8_t *data_end, int ac3reset, int *input_pointer, int *output_pointer, char *ac3_data)
#endif
{ {
uint_32 i; uint32_t i;
int datasize; int datasize;
char *data; char *data;
if(ac3reset != 0){ if(ac3reset != 0){
syncinfo.syncword = 0xffff; syncinfo.syncword = 0xffff;
sync_buffer_size = 0; buffer_size = 0;
} }
while(decode_sync_buffer_syncframe(&syncinfo,&data_start,data_end)) if (setjmp (error_jmp_mark) < 0) {
{ ac3dec_init ();
dprintf("(decode) begin frame %d\n",frame_count++); return 0;
}
if(error_flag) while (decode_buffer_syncframe (&syncinfo, &data_start, data_end)) {
{ parse_bsi (&bsi);
decode_mute();
continue; #ifndef __OMS__
if(!done_banner) {
// stats_print_banner(&syncinfo,&bsi);
done_banner = 1;
} }
#endif
parse_bsi(&bsi); // compute downmix parameters
// downmix to tow channels for now
dm_par.clev = 0.0; dm_par.slev = 0.0; dm_par.unit = 1.0;
if (bsi.acmod & 0x1) // have center
dm_par.clev = cmixlev_lut[bsi.cmixlev];
for(i=0; i < 6; i++) if (bsi.acmod & 0x4) // have surround channels
{ dm_par.slev = smixlev_lut[bsi.surmixlev];
dm_par.unit /= 1.0 + dm_par.clev + dm_par.slev;
dm_par.clev *= dm_par.unit;
dm_par.slev *= dm_par.unit;
for(i=0; i < 6; i++) {
//Initialize freq/time sample storage //Initialize freq/time sample storage
memset(samples,0,sizeof(float) * 256 * (bsi.nfchans + bsi.lfeon)); memset (samples, 0, sizeof(float) * 256 * (bsi.nfchans + bsi.lfeon));
// Extract most of the audblk info from the bitstream // Extract most of the audblk info from the bitstream
// (minus the mantissas // (minus the mantissas
parse_audblk(&bsi,&audblk); parse_audblk (&bsi,&audblk);
// Take the differential exponent data and turn it into // Take the differential exponent data and turn it into
// absolute exponents // absolute exponents
exponent_unpack(&bsi,&audblk); exponent_unpack (&bsi,&audblk);
if(error_flag)
goto error;
// Figure out how many bits per mantissa // Figure out how many bits per mantissa
bit_allocate(syncinfo.fscod,&bsi,&audblk); bit_allocate (syncinfo.fscod,&bsi,&audblk);
// Extract the mantissas from the stream and // Extract the mantissas from the stream and
// generate floating point frequency coefficients // generate floating point frequency coefficients
coeff_unpack(&bsi,&audblk,samples); coeff_unpack (&bsi,&audblk,samples);
if(error_flag)
goto error;
if(bsi.acmod == 0x2) if (bsi.acmod == 0x2)
rematrix(&audblk,samples); rematrix (&audblk,samples);
// Convert the frequency samples into time samples // Convert the frequency samples into time samples
imdct(&bsi,&audblk,samples); imdct (&bsi,&audblk,samples, &s16_samples[i * 2 * 256], &dm_par);
// Downmix into the requested number of channels // Downmix into the requested number of channels
// and convert floating point to sint_16 // and convert floating point to int16_t
downmix(&bsi,samples,&s16_samples[i * 2 * 256]); // downmix(&bsi,samples,&s16_samples[i * 2 * 256]);
sanity_check(&syncinfo,&bsi,&audblk); if (sanity_check(&syncinfo,&bsi,&audblk) < 0) {
if(error_flag) HANDLE_ERROR ();
goto error; return 0;
}
continue; continue;
} }
parse_auxdata(&syncinfo); if (!is_output_initialized) {
#ifdef __OMS__
plugin_output_audio_attr_t attr;
#ifdef __sun__
attr.format = 16;
#else
attr.format = AFMT_S16_NE;
#endif
attr.speed = syncinfo.sampling_rate;
attr.channels = 2;
/* // output->setup (&attr);
if(!is_output_initialized) #else
{ // ao_functions->open (16, syncinfo.sampling_rate, 2);
ac3_output.open(16,syncinfo.sampling_rate,2); #endif
is_output_initialized = 1; is_output_initialized = 1;
} }
*/
#ifdef __OMS__
output->write (s16_samples, 256 * 6 * 2 * 2);
#else
// ao_functions->play(s16_samples, 256 * 6 * 2);
data = (char *)s16_samples; data = (char *)s16_samples;
datasize = 0; datasize = 0;
while(datasize < 6144){ while(datasize < 6144){
@ -257,13 +294,11 @@ uint_32 ac3_decode_data(uint_8 *data_start,uint_8 *data_end, int ac3reset, int *
} }
} }
#endif
//write(1, s16_samples, 256 * 6 * 2* 2);
//ac3_output.play(s16_samples, 256 * 6 * 2);
error:
;
//find a new frame
} }
decode_mute ();
return 0; return 0;
} }

View File

@ -31,7 +31,7 @@
#include "dither.h" #include "dither.h"
const uint_16 dither_lut[256] = const uint16_t dither_lut[256] =
{ {
0x0000, 0xa011, 0xe033, 0x4022, 0x6077, 0xc066, 0x8044, 0x2055, 0x0000, 0xa011, 0xe033, 0x4022, 0x6077, 0xc066, 0x8044, 0x2055,
0xc0ee, 0x60ff, 0x20dd, 0x80cc, 0xa099, 0x0088, 0x40aa, 0xe0bb, 0xc0ee, 0x60ff, 0x20dd, 0x80cc, 0xa099, 0x0088, 0x40aa, 0xe0bb,
@ -67,7 +67,7 @@ const uint_16 dither_lut[256] =
0x8bf4, 0x2be5, 0x6bc7, 0xcbd6, 0xeb83, 0x4b92, 0x0bb0, 0xaba1 0x8bf4, 0x2be5, 0x6bc7, 0xcbd6, 0xeb83, 0x4b92, 0x0bb0, 0xaba1
}; };
uint_16 lfsr_state = 1; uint16_t lfsr_state = 1;
// //
// see dither_gen (inline-able) in dither.h // see dither_gen (inline-able) in dither.h
@ -89,18 +89,17 @@ uint_16 lfsr_state = 1;
* *
*/ */
uint_16 dither_gen(void) uint16_t dither_gen(void)
{ {
int i; int i;
uint_32 state; uint32_t state;
//explicitly bring the state into a local var as gcc > 3.0? //explicitly bring the state into a local var as gcc > 3.0?
//doesn't know how to optimize out the stores //doesn't know how to optimize out the stores
state = lfsr_state; state = lfsr_state;
//Generate eight pseudo random bits //Generate eight pseudo random bits
for(i=0;i<8;i++) for(i=0;i<8;i++) {
{
state <<= 1; state <<= 1;
if(state & 0x10000) if(state & 0x10000)
@ -109,7 +108,7 @@ uint_16 dither_gen(void)
lfsr_state = state; lfsr_state = state;
return (((((sint_32)state<<8)>>8) * (sint_32) (0.707106 * 256.0))>>16); return (((((int32_t)state<<8)>>8) * (int32_t) (0.707106 * 256.0))>>16);
} }
#endif #endif

View File

@ -22,16 +22,16 @@
*/ */
extern uint_16 lfsr_state; extern uint16_t lfsr_state;
extern const uint_16 dither_lut[256]; extern const uint16_t dither_lut[256];
static inline uint_16 dither_gen(void) static inline uint16_t dither_gen(void)
{ {
sint_16 state; int16_t state;
state = dither_lut[lfsr_state >> 8] ^ (lfsr_state << 8); state = dither_lut[lfsr_state >> 8] ^ (lfsr_state << 8);
lfsr_state = (uint_16) state; lfsr_state = (uint16_t) state;
return ((state * (sint_32) (0.707106 * 256.0))>>8); return ((state * (int32_t) (0.707106 * 256.0))>>8);
} }

View File

@ -1,10 +1,7 @@
/* /*
* imdct.c
* *
* downmix.c * Copyright (C) Aaron Holtzman - May 1999
*
* Copyright (C) Aaron Holtzman - Sept 1999
*
* Originally based on code by Yuqing Deng.
* *
* This file is part of ac3dec, a free Dolby AC-3 stream decoder. * This file is part of ac3dec, a free Dolby AC-3 stream decoder.
* *
@ -28,401 +25,65 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h> #include <stdio.h>
#include <math.h> #include <math.h>
#include <mm_accel.h>
#include "ac3.h" #include "ac3.h"
#include "ac3_internal.h" #include "ac3_internal.h"
#include "decode.h"
#include "downmix.h"
#include "debug.h" #include "debug.h"
#include "downmix.h"
#include "downmix_c.h"
#include "downmix_i386.h"
#ifdef HAVE_KNI
#include "downmix_kni.h"
#endif
void (*downmix_3f_2r_to_2ch)(float *samples, dm_par_t * dm_par);
void (*downmix_3f_1r_to_2ch)(float *samples, dm_par_t * dm_par);
void (*downmix_2f_2r_to_2ch)(float *samples, dm_par_t * dm_par);
void (*downmix_2f_1r_to_2ch)(float *samples, dm_par_t * dm_par);
void (*downmix_3f_0r_to_2ch)(float *samples, dm_par_t * dm_par);
void (*stream_sample_2ch_to_s16)(int16_t *s16_samples, float *left, float *right);
void (*stream_sample_1ch_to_s16)(int16_t *s16_samples, float *center);
//Pre-scaled downmix coefficients void downmix_init()
static float cmixlev_lut[4] = { 0.2928, 0.2468, 0.2071, 0.2468 };
static float smixlev_lut[4] = { 0.2928, 0.2071, 0.0 , 0.2071 };
static void
downmix_3f_2r_to_2ch(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples)
{ {
uint_32 j; #ifdef __i386__
float right_tmp; #ifdef HAVE_KNI
float left_tmp; uint32_t accel = mm_accel ();
float clev,slev;
float *centre = 0, *left = 0, *right = 0, *left_sur = 0, *right_sur = 0;
left = samples[0]; // other dowmixing should go here too
centre = samples[1]; if (accel & MM_ACCEL_X86_MMXEXT) {
right = samples[2]; dprintf("Using SSE for downmix\n");
left_sur = samples[3]; downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_kni;
right_sur = samples[4]; downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_kni;
downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_kni;
clev = cmixlev_lut[bsi->cmixlev]; downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_kni;
slev = smixlev_lut[bsi->surmixlev]; downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_kni;
stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_kni;
#if defined DOLBY_SURROUND stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_kni;
for (j = 0; j < 256; j++) } else if (accel & MM_ACCEL_X86_3DNOW) {
} else
#endif
#endif
{ {
right_tmp = *left_sur++ + *right_sur++; downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_c;
left_tmp = 1.4142f * *left++ + *centre - right_tmp; downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_c;
right_tmp += 1.4142f * *right++ + *centre++; downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_c;
downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_c;
s16_samples[j * 2 ] = (sint_16) (left_tmp * 16000.0f); downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_c;
s16_samples[j * 2 + 1] = (sint_16) (right_tmp * 16000.0f); #ifdef __i386__
} #if 1
stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_c;
stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_c;
#else #else
for (j = 0; j < 256; j++) stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_i386;
{ stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_i386;
left_tmp = 0.4142f * *left++ + clev * *centre + slev * *left_sur++;
right_tmp= 0.4142f * *right++ + clev * *centre++ + slev * *right_sur++;
s16_samples[j * 2 ] = (sint_16) (left_tmp * 32767.0f);
s16_samples[j * 2 + 1] = (sint_16) (right_tmp * 32767.0f);
}
#endif #endif
} #else
stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_c;
static void stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_c;
downmix_2f_2r_to_2ch(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples)
{
uint_32 j;
float right_tmp;
float left_tmp;
float slev;
float *left = 0, *right = 0, *left_sur = 0, *right_sur = 0;
left = samples[0];
right = samples[1];
left_sur = samples[2];
right_sur = samples[3];
slev = smixlev_lut[bsi->surmixlev];
for (j = 0; j < 256; j++)
{
left_tmp = 0.4142f * *left++ + slev * *left_sur++;
right_tmp= 0.4142f * *right++ + slev * *right_sur++;
s16_samples[j * 2 ] = (sint_16) (left_tmp * 32767.0f);
s16_samples[j * 2 + 1] = (sint_16) (right_tmp * 32767.0f);
}
}
static void
downmix_3f_1r_to_2ch(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples)
{
uint_32 j;
float right_tmp;
float left_tmp;
float clev,slev;
float *centre = 0, *left = 0, *right = 0, *sur = 0;
left = samples[0];
centre = samples[1];
right = samples[2];
//Mono surround
sur = samples[3];
clev = cmixlev_lut[bsi->cmixlev];
slev = smixlev_lut[bsi->surmixlev];
for (j = 0; j < 256; j++)
{
left_tmp = 0.4142f * *left++ + clev * *centre++ + slev * *sur;
right_tmp= 0.4142f * *right++ + clev * *centre + slev * *sur++;
s16_samples[j * 2 ] = (sint_16) (left_tmp * 32767.0f);
s16_samples[j * 2 + 1] = (sint_16) (right_tmp * 32767.0f);
}
}
static void
downmix_2f_1r_to_2ch(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples)
{
uint_32 j;
float right_tmp;
float left_tmp;
float slev;
float *left = 0, *right = 0, *sur = 0;
left = samples[0];
right = samples[1];
//Mono surround
sur = samples[2];
slev = smixlev_lut[bsi->surmixlev];
for (j = 0; j < 256; j++)
{
left_tmp = 0.4142f * *left++ + slev * *sur;
right_tmp= 0.4142f * *right++ + slev * *sur++;
s16_samples[j * 2 ] = (sint_16) (left_tmp * 32767.0f);
s16_samples[j * 2 + 1] = (sint_16) (right_tmp * 32767.0f);
}
}
static void
downmix_3f_0r_to_2ch(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples)
{
uint_32 j;
float right_tmp;
float left_tmp;
float clev;
float *centre = 0, *left = 0, *right = 0;
left = samples[0];
centre = samples[1];
right = samples[2];
clev = cmixlev_lut[bsi->cmixlev];
for (j = 0; j < 256; j++)
{
left_tmp = 0.4142f * *left++ + clev * *centre;
right_tmp= 0.4142f * *right++ + clev * *centre++;
s16_samples[j * 2 ] = (sint_16) (left_tmp * 32767.0f);
s16_samples[j * 2 + 1] = (sint_16) (right_tmp * 32767.0f);
}
}
static void
downmix_2f_0r_to_2ch(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples)
{
uint_32 j;
float *left = 0, *right = 0;
left = samples[0];
right = samples[1];
for (j = 0; j < 256; j++)
{
s16_samples[j * 2 ] = (sint_16) (*left++ * 32767.0f);
s16_samples[j * 2 + 1] = (sint_16) (*right++ * 32767.0f);
}
}
static void
downmix_1f_0r_to_2ch(float *centre,sint_16 *s16_samples)
{
uint_32 j;
float tmp;
//Mono program!
for (j = 0; j < 256; j++)
{
tmp = 32767.0f * 0.7071f * *centre++;
s16_samples[j * 2 ] = s16_samples[j * 2 + 1] = (sint_16) tmp;
}
}
//
// Downmix into 2 or 4 channels (4 ch isn't in quite yet)
//
// The downmix function names have the following format
//
// downmix_Xf_Yr_to_[2|4]ch[_dolby]
//
// where X = number of front channels
// Y = number of rear channels
// [2|4] = number of output channels
// [_dolby] = with or without dolby surround mix
//
void downmix(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples)
{
if(bsi->acmod > 7)
dprintf("(downmix) invalid acmod number\n");
//
//There are two main cases, with or without Dolby Surround
//
if(ac3_config.flags & AC3_DOLBY_SURR_ENABLE)
{
fprintf(stderr,"Dolby Surround Mixes not currently enabled\n");
exit(1);
}
//Non-Dolby surround downmixes
switch(bsi->acmod)
{
// 3/2
case 7:
downmix_3f_2r_to_2ch(bsi,samples,s16_samples);
break;
// 2/2
case 6:
downmix_2f_2r_to_2ch(bsi,samples,s16_samples);
break;
// 3/1
case 5:
downmix_3f_1r_to_2ch(bsi,samples,s16_samples);
break;
// 2/1
case 4:
downmix_2f_1r_to_2ch(bsi,samples,s16_samples);
break;
// 3/0
case 3:
downmix_3f_0r_to_2ch(bsi,samples,s16_samples);
break;
case 2:
downmix_2f_0r_to_2ch(bsi,samples,s16_samples);
break;
// 1/0
case 1:
downmix_1f_0r_to_2ch(samples[0],s16_samples);
break;
// 1+1
case 0:
downmix_1f_0r_to_2ch(samples[ac3_config.dual_mono_ch_sel],s16_samples);
break;
}
}
#if 0
//the dolby mixes lay here for the time being
switch(bsi->acmod)
{
// 3/2
case 7:
left = samples[0];
centre = samples[1];
right = samples[2];
left_sur = samples[3];
right_sur = samples[4];
for (j = 0; j < 256; j++)
{
right_tmp = 0.2265f * *left_sur++ + 0.2265f * *right_sur++;
left_tmp = -1 * right_tmp;
right_tmp += 0.3204f * *right++ + 0.2265f * *centre;
left_tmp += 0.3204f * *left++ + 0.2265f * *centre++;
samples[1][j] = right_tmp;
samples[0][j] = left_tmp;
}
break;
// 2/2
case 6:
left = samples[0];
right = samples[1];
left_sur = samples[2];
right_sur = samples[3];
for (j = 0; j < 256; j++)
{
right_tmp = 0.2265f * *left_sur++ + 0.2265f * *right_sur++;
left_tmp = -1 * right_tmp;
right_tmp += 0.3204f * *right++;
left_tmp += 0.3204f * *left++ ;
samples[1][j] = right_tmp;
samples[0][j] = left_tmp;
}
break;
// 3/1
case 5:
left = samples[0];
centre = samples[1];
right = samples[2];
//Mono surround
right_sur = samples[3];
for (j = 0; j < 256; j++)
{
right_tmp = 0.2265f * *right_sur++;
left_tmp = -1 * right_tmp;
right_tmp += 0.3204f * *right++ + 0.2265f * *centre;
left_tmp += 0.3204f * *left++ + 0.2265f * *centre++;
samples[1][j] = right_tmp;
samples[0][j] = left_tmp;
}
break;
// 2/1
case 4:
left = samples[0];
right = samples[1];
//Mono surround
right_sur = samples[2];
for (j = 0; j < 256; j++)
{
right_tmp = 0.2265f * *right_sur++;
left_tmp = -1 * right_tmp;
right_tmp += 0.3204f * *right++;
left_tmp += 0.3204f * *left++;
samples[1][j] = right_tmp;
samples[0][j] = left_tmp;
}
break;
// 3/0
case 3:
left = samples[0];
centre = samples[1];
right = samples[2];
for (j = 0; j < 256; j++)
{
right_tmp = 0.3204f * *right++ + 0.2265f * *centre;
left_tmp = 0.3204f * *left++ + 0.2265f * *centre++;
samples[1][j] = right_tmp;
samples[0][j] = left_tmp;
}
break;
// 2/0
case 2:
//Do nothing!
break;
// 1/0
case 1:
//Mono program!
right = samples[0];
for (j = 0; j < 256; j++)
{
right_tmp = 0.7071f * *right++;
samples[1][j] = right_tmp;
samples[0][j] = right_tmp;
}
break;
// 1+1
case 0:
//Dual mono, output selected by user
right = samples[ac3_config.dual_mono_ch_sel];
for (j = 0; j < 256; j++)
{
right_tmp = 0.7071f * *right++;
samples[1][j] = right_tmp;
samples[0][j] = right_tmp;
}
break;
#endif #endif
}
}

View File

@ -25,4 +25,10 @@
* *
*/ */
void downmix(bsi_t* bsi, stream_samples_t stream_samples,sint_16 *s16_samples); typedef struct dm_par_s {
float unit;
float clev;
float slev;
} dm_par_t;
void downmix_init();

161
ac3dec/downmix_c.c Normal file
View File

@ -0,0 +1,161 @@
/*
* downmix_c.c
*
* Copyright (C) Aaron Holtzman - May 1999
*
* This file is part of ac3dec, a free Dolby AC-3 stream decoder.
*
* ac3dec is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* ac3dec is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "ac3.h"
#include "ac3_internal.h"
#include "debug.h"
#include "downmix.h"
#include "downmix_c.h"
void downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
{
int i;
float *left, *right, *center, *left_sur, *right_sur;
float left_tmp, right_tmp;
left = samples;
right = samples + 256 * 2;
center = samples + 256;
left_sur = samples + 256 * 3;
right_sur = samples + 256 * 4;
for (i=0; i < 256; i++) {
#if defined DOLBY_SURROUND
left_tmp = dm_par->unit * *left + dm_par->clev * *center - dm_par->slev * (*left_sur + *right_sur);
right_tmp= dm_par->unit * *right++ + dm_par->clev * *center + dm_par->slev * (*left_sur++ + *right_sur++);
#else
left_tmp = dm_par->unit * *left + dm_par->clev * *center + dm_par->slev * *left_sur++;
right_tmp= dm_par->unit * *right++ + dm_par->clev * *center + dm_par->slev * *right_sur++;
#endif
*left++ = left_tmp;
*center++ = right_tmp;
}
}
void downmix_2f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
{
int i;
float *left, *right, *left_sur, *right_sur;
float left_tmp, right_tmp;
left = &samples[0];
right = &samples[256];
left_sur = &samples[512];
right_sur = &samples[768];
for (i = 0; i < 256; i++) {
left_tmp = dm_par->unit * *left + dm_par->slev * *left_sur++;
right_tmp= dm_par->unit * *right + dm_par->slev * *right_sur++;
*left++ = left_tmp;
*right++ = right_tmp;
}
}
void downmix_3f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
{
int i;
float *left, *right, *center, *right_sur;
float left_tmp, right_tmp;
left = &samples[0];
right = &samples[512];
center = &samples[256];
right_sur = &samples[768];
for (i = 0; i < 256; i++) {
left_tmp = dm_par->unit * *left + dm_par->clev * *center - dm_par->slev * *right_sur;
right_tmp= dm_par->unit * *right++ + dm_par->clev * *center + dm_par->slev * *right_sur++;
*left++ = left_tmp;
*center++ = right_tmp;
}
}
void downmix_2f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
{
int i;
float *left, *right, *right_sur;
float left_tmp, right_tmp;
left = &samples[0];
right = &samples[256];
right_sur = &samples[512];
for (i = 0; i < 256; i++) {
left_tmp = dm_par->unit * *left - dm_par->slev * *right_sur;
right_tmp= dm_par->unit * *right + dm_par->slev * *right_sur++;
*left++ = left_tmp;
*right++ = right_tmp;
}
}
void downmix_3f_0r_to_2ch_c (float *samples, dm_par_t *dm_par)
{
int i;
float *left, *right, *center;
float left_tmp, right_tmp;
left = &samples[0];
center = &samples[256];
right = &samples[512];
for (i = 0; i < 256; i++) {
left_tmp = dm_par->unit * *left + dm_par->clev * *center;
right_tmp= dm_par->unit * *right++ + dm_par->clev * *center;
*left++ = left_tmp;
*center++ = right_tmp;
}
}
void stream_sample_2ch_to_s16_c (int16_t *s16_samples, float *left, float *right)
{
int i;
for (i=0; i < 256; i++) {
*s16_samples++ = (int16_t) *left++;
*s16_samples++ = (int16_t) *right++;
}
}
void stream_sample_1ch_to_s16_c (int16_t *s16_samples, float *center)
{
int i;
float tmp;
for (i=0; i < 256; i++) {
*s16_samples++ = tmp = (int16_t) (0.7071f * *center++);
*s16_samples++ = tmp;
}
}

32
ac3dec/downmix_c.h Normal file
View File

@ -0,0 +1,32 @@
/*****
*
* This file is part of the OMS program.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*****/
#ifndef __DOWNMIX_C_H__
#define __DOWNMIX_C_H__
void downmix_3f_2r_to_2ch_c(float *samples, dm_par_t * dm_par);
void downmix_3f_1r_to_2ch_c(float *samples, dm_par_t * dm_par);
void downmix_2f_2r_to_2ch_c(float *samples, dm_par_t * dm_par);
void downmix_2f_1r_to_2ch_c(float *samples, dm_par_t * dm_par);
void downmix_3f_0r_to_2ch_c(float *samples, dm_par_t * dm_par);
void stream_sample_2ch_to_s16_c(int16_t *s16_samples, float *left, float *right);
void stream_sample_1ch_to_s16_c(int16_t *s16_samples, float *center);
#endif

92
ac3dec/downmix_i386.S Normal file
View File

@ -0,0 +1,92 @@
/* This is basicly gcc generated.
* Only the floating point rounding mode loads and saves
* are removed in the stream_sample_to_s16 functions.
*/
#ifdef __i386__
.file "downmix.c"
.version "01.01"
gcc2_compiled.:
.text
.align 4
.globl stream_sample_2ch_to_s16_i386
.type stream_sample_2ch_to_s16_i386,@function
stream_sample_2ch_to_s16_i386:
pushl %ebp
movl %esp,%ebp
subl $28,%esp
pushl %edi
pushl %esi
pushl %ebx
movl 8(%ebp),%edx
movl 12(%ebp),%ebx
movl 16(%ebp),%ecx
movl $255,%esi
.p2align 4,,7
.L373:
flds (%ebx)
fistpl -8(%ebp)
movl -8(%ebp),%eax
movw %ax,(%edx)
addl $2,%edx
addl $4,%ebx
flds (%ecx)
fistpl -8(%ebp)
movl -8(%ebp),%eax
movw %ax,(%edx)
addl $4,%ecx
addl $2,%edx
decl %esi
jns .L373
popl %ebx
popl %esi
popl %edi
leave
ret
.Lfe6:
.size stream_sample_2ch_to_s16_i386,.Lfe6-stream_sample_2ch_to_s16_i386
.section .rodata
.align 4
.LC46:
.long 0x3f350481
.text
.align 4
.globl stream_sample_1ch_to_s16_i386
.type stream_sample_1ch_to_s16_i386,@function
stream_sample_1ch_to_s16_i386:
pushl %ebp
movl %esp,%ebp
subl $16,%esp
pushl %esi
pushl %ebx
movl 8(%ebp),%edx
movl 12(%ebp),%ecx
flds .LC46
movl $255,%ebx
.p2align 4,,7
.L379:
flds (%ecx)
fmul %st(1),%st
fistpl -8(%ebp)
movl -8(%ebp),%eax
movw %ax,-2(%ebp)
addl $4,%ecx
flds -2(%ebp)
fistpl -8(%ebp)
movl -8(%ebp),%eax
movw %ax,(%edx)
addl $2,%edx
movw %ax,(%edx)
addl $2,%edx
decl %ebx
jns .L379
fstp %st(0)
popl %ebx
popl %esi
leave
ret
.Lfe7:
.size stream_sample_1ch_to_s16_i386,.Lfe7-stream_sample_1ch_to_s16_i386
.ident "GCC: (GNU) 2.95.3 19991030 (prerelease)"
#endif

27
ac3dec/downmix_i386.h Normal file
View File

@ -0,0 +1,27 @@
/*****
*
* This file is part of the OMS program.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*****/
#ifndef __DOWNMIX_I386_H__
#define __DOWNMIX_I386_H__
void stream_sample_2ch_to_s16_i386(int16_t *s16_samples, float *left, float *right);
void stream_sample_1ch_to_s16_i386(int16_t *s16_samples, float *center);
#endif

396
ac3dec/downmix_kni.S Normal file
View File

@ -0,0 +1,396 @@
/*
* downmix_kni.S
*
* Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - October 2000
*
*
* downmix_kni.S is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* downmix_kni.S is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*/
#ifdef __i386__
.section .rodata
.align 4
sqrt2: .float 0f0.7071068
.p2align 5,0,
.section .text
.align 4
.global downmix_3f_2r_to_2ch_kni
.type downmix_3f_2r_to_2ch_kni, @function
downmix_3f_2r_to_2ch_kni:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
movl 8(%ebp), %eax /* samples[] */
movl 12(%ebp), %ebx /* &dm_par */
movl $64, %ecx /* loop counter */
movss (%ebx), %xmm5 /* unit */
shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */
movss 4(%ebx), %xmm6 /* clev */
shufps $0, %xmm6, %xmm6 /* clev | clev | clev | clev */
movss 8(%ebx), %xmm7 /* slev */
shufps $0, %xmm7, %xmm7 /* slev | slev | slev | slev */
.loop:
movaps (%eax), %xmm0 /* left */
movaps 2048(%eax), %xmm1 /* right */
movaps 1024(%eax), %xmm2 /* center */
mulps %xmm5, %xmm0
mulps %xmm5, %xmm1
mulps %xmm6, %xmm2
movaps 3072(%eax), %xmm3 /* leftsur */
movaps 4096(%eax), %xmm4 /* rithgsur */
addps %xmm2, %xmm0
addps %xmm2, %xmm1
mulps %xmm7, %xmm3
mulps %xmm7, %xmm4
addps %xmm3, %xmm0
addps %xmm4, %xmm1
movaps %xmm0, (%eax)
movaps %xmm1, 1024(%eax)
addl $16, %eax
decl %ecx
jnz .loop
popl %ecx
popl %ebx
popl %eax
leave
ret
.p2align 4,,7
.global downmix_2f_2r_to_2ch_kni
.type downmix_2f_2r_to_2ch_kni, @function
downmix_2f_2r_to_2ch_kni:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
movl 8(%ebp), %eax /* samples[] */
movl 12(%ebp), %ebx /* &dm_par */
movl $64, %ecx /* loop counter */
movss (%ebx), %xmm5 /* unit */
shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */
movss 8(%ebx), %xmm7 /* slev */
shufps $0, %xmm7, %xmm7 /* slev | slev | slev | slev */
.loop3:
movaps (%eax), %xmm0 /* left */
movaps 1024(%eax), %xmm1 /* right */
movaps 2048(%eax), %xmm3 /* leftsur */
mulps %xmm5, %xmm0
mulps %xmm5, %xmm1
movaps 3072(%eax), %xmm4 /* rightsur */
mulps %xmm7, %xmm3
mulps %xmm7, %xmm4
addps %xmm3, %xmm0
addps %xmm4, %xmm1
movaps %xmm0, (%eax)
movaps %xmm1, 1024(%eax)
addl $16, %eax
decl %ecx
jnz .loop3
popl %ecx
popl %ebx
popl %eax
leave
ret
.p2align 4,,7
.global downmix_3f_1r_to_2ch_kni
.type downmix_3f_1r_to_2ch_kni, @function
downmix_3f_1r_to_2ch_kni:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
movl 8(%ebp), %eax /* samples[] */
movl 12(%ebp), %ebx /* &dm_par */
movl $64, %ecx /* loop counter */
movss (%ebx), %xmm5 /* unit */
shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */
movss 4(%ebx), %xmm6 /* clev */
shufps $0, %xmm6, %xmm6 /* clev | clev | clev | clev */
movss 8(%ebx), %xmm7 /* slev */
shufps $0, %xmm7, %xmm7 /* slev | slev | slev | slev */
.loop4:
movaps (%eax), %xmm0 /* left */
movaps 2048(%eax), %xmm1 /* right */
movaps 1024(%eax), %xmm2 /* center */
mulps %xmm5, %xmm0
mulps %xmm5, %xmm1
mulps %xmm6, %xmm2
movaps 3072(%eax), %xmm3 /* sur */
addps %xmm2, %xmm0
mulps %xmm7, %xmm3
addps %xmm2, %xmm1
subps %xmm3, %xmm0
addps %xmm3, %xmm1
movaps %xmm0, (%eax)
movaps %xmm1, 1024(%eax)
addl $16, %eax
decl %ecx
jnz .loop4
popl %ecx
popl %ebx
popl %eax
leave
ret
.p2align 4,,7
.global downmix_2f_1r_to_2ch_kni
.type downmix_2f_1r_to_2ch_kni, @function
downmix_2f_1r_to_2ch_kni:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
movl 8(%ebp), %eax /* samples[] */
movl 12(%ebp), %ebx /* &dm_par */
movl $64, %ecx /* loop counter */
movss (%ebx), %xmm5 /* unit */
shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */
movss 8(%ebx), %xmm7 /* slev */
shufps $0, %xmm7, %xmm7 /* slev | slev | slev | slev */
.loop5:
movaps (%eax), %xmm0 /* left */
movaps 1024(%eax), %xmm1 /* right */
mulps %xmm5, %xmm0
mulps %xmm5, %xmm1
movaps 2048(%eax), %xmm3 /* sur */
mulps %xmm7, %xmm3
subps %xmm3, %xmm0
addps %xmm3, %xmm1
movaps %xmm0, (%eax)
movaps %xmm1, 1024(%eax)
addl $16, %eax
decl %ecx
jnz .loop5
popl %ecx
popl %ebx
popl %eax
leave
ret
.p2align 4,,7
.global downmix_3f_0r_to_2ch_kni
.type downmix_3f_0r_to_2ch_kni, @function
downmix_3f_0r_to_2ch_kni:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
movl 8(%ebp), %eax /* samples[] */
movl 12(%ebp), %ebx /* &dm_par */
movl $64, %ecx /* loop counter */
movss (%ebx), %xmm5 /* unit */
shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */
movss 4(%ebx), %xmm6 /* clev */
shufps $0, %xmm6, %xmm6 /* clev | clev | clev | clev */
.loop6:
movaps (%eax), %xmm0 /* left */
movaps 2048(%eax), %xmm1 /* right */
movaps 1024(%eax), %xmm2 /* center */
mulps %xmm5, %xmm0
mulps %xmm5, %xmm1
mulps %xmm6, %xmm2
addps %xmm2, %xmm0
addps %xmm2, %xmm1
movaps %xmm0, (%eax)
movaps %xmm1, 1024(%eax)
addl $16, %eax
decl %ecx
jnz .loop6
popl %ecx
popl %ebx
popl %eax
leave
ret
.p2align 4,,7
.global stream_sample_2ch_to_s16_kni
.type stream_sample_2ch_to_s16_kni, @function
stream_sample_2ch_to_s16_kni:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %edx
pushl %ecx
movl 8(%ebp), %eax /* s16_samples */
movl 12(%ebp), %ebx /* left */
movl 16(%ebp), %edx /* right */
movl $64, %ecx
.loop1:
movaps (%ebx), %xmm0 /* l3 | l2 | l1 | l0 */
movaps (%edx), %xmm1 /* r3 | r2 | r1 | r0 */
movhlps %xmm0, %xmm2 /* l3 | l2 */
movhlps %xmm1, %xmm3 /* r3 | r2 */
unpcklps %xmm1, %xmm0 /* r1 | l1 | r0 | l0 */
unpcklps %xmm3, %xmm2 /* r3 | l3 | r2 | l2 */
cvtps2pi %xmm0, %mm0 /* r0 l0 --> mm0, int_32 */
movhlps %xmm0, %xmm0
cvtps2pi %xmm0, %mm1 /* r1 l1 --> mm1, int_32 */
cvtps2pi %xmm2, %mm2 /* r2 l2 --> mm2, int_32 */
movhlps %xmm2, %xmm2
cvtps2pi %xmm2, %mm3 /* r3 l3 --> mm3, int_32 */
packssdw %mm1, %mm0 /* r1 l1 r0 l0 --> mm0, int_16 */
packssdw %mm3, %mm2 /* r3 l3 r2 l2 --> mm2, int_16 */
movq %mm0, (%eax)
movq %mm2, 8(%eax)
addl $16, %eax
addl $16, %ebx
addl $16, %edx
decl %ecx
jnz .loop1
popl %ecx
popl %edx
popl %ebx
popl %eax
emms
leave
ret
.p2align 4,,7
.global stream_sample_1ch_to_s16_kni
.type stream_sample_1ch_to_s16_kni, @function
stream_sample_1ch_to_s16_kni:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
movl $sqrt2, %eax
movss (%eax), %xmm7
movl 8(%ebp), %eax /* s16_samples */
movl 12(%ebp), %ebx /* left */
shufps $0, %xmm7, %xmm7
movl $64, %ecx
.loop2:
movaps (%ebx), %xmm0 /* c3 | c2 | c1 | c0 */
mulps %xmm7, %xmm0
movhlps %xmm0, %xmm2 /* c3 | c2 */
cvtps2pi %xmm0, %mm0 /* c1 c0 --> mm0, int_32 */
cvtps2pi %xmm2, %mm1 /* c3 c2 --> mm1, int_32 */
packssdw %mm0, %mm0 /* c1 c1 c0 c0 --> mm0, int_16 */
packssdw %mm1, %mm1 /* c3 c3 c2 c2 --> mm1, int_16 */
movq %mm0, (%eax)
movq %mm1, 8(%eax)
addl $16, %eax
addl $16, %ebx
decl %ecx
jnz .loop2
popl %ecx
popl %ebx
popl %eax
emms
leave
ret
#endif

32
ac3dec/downmix_kni.h Normal file
View File

@ -0,0 +1,32 @@
/*****
*
* This file is part of the OMS program.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*****/
#ifndef __DOWNMIX_KNI_H__
#define __DOWNMIX_KNI_H__
void downmix_3f_2r_to_2ch_kni(float *samples, dm_par_t * dm_par);
void downmix_3f_1r_to_2ch_kni(float *samples, dm_par_t * dm_par);
void downmix_2f_2r_to_2ch_kni(float *samples, dm_par_t * dm_par);
void downmix_2f_1r_to_2ch_kni(float *samples, dm_par_t * dm_par);
void downmix_3f_0r_to_2ch_kni(float *samples, dm_par_t * dm_par);
void stream_sample_2ch_to_s16_kni(int16_t *s16_samples, float *left, float *right);
void stream_sample_1ch_to_s16_kni(int16_t *s16_samples, float *center);
#endif

View File

@ -28,41 +28,43 @@
#include "ac3_internal.h" #include "ac3_internal.h"
#include "decode.h"
#include "exponent.h" #include "exponent.h"
static void exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp, static inline void exp_unpack_ch(uint16_t type,uint16_t expstr,uint16_t ngrps,uint16_t initial_exp, uint16_t exps[], uint16_t *dest);
uint_16 exps[], uint_16 *dest);
void
exponent_unpack( bsi_t *bsi, audblk_t *audblk) /**
*
**/
void exponent_unpack( bsi_t *bsi, audblk_t *audblk)
{ {
uint_16 i; uint16_t i;
for(i=0; i< bsi->nfchans; i++) for(i=0; i< bsi->nfchans; i++)
exp_unpack_ch(UNPACK_FBW, audblk->chexpstr[i], audblk->nchgrps[i], audblk->exps[i][0], exp_unpack_ch(UNPACK_FBW, audblk->chexpstr[i], audblk->nchgrps[i], audblk->exps[i][0], &audblk->exps[i][1], audblk->fbw_exp[i]);
&audblk->exps[i][1], audblk->fbw_exp[i]);
if(audblk->cplinu) if(audblk->cplinu)
exp_unpack_ch(UNPACK_CPL, audblk->cplexpstr, audblk->ncplgrps, audblk->cplabsexp << 1, exp_unpack_ch(UNPACK_CPL, audblk->cplexpstr, audblk->ncplgrps, audblk->cplabsexp << 1, audblk->cplexps, &audblk->cpl_exp[audblk->cplstrtmant]);
audblk->cplexps, &audblk->cpl_exp[audblk->cplstrtmant]);
if(bsi->lfeon) if(bsi->lfeon)
exp_unpack_ch(UNPACK_LFE, audblk->lfeexpstr, 2, audblk->lfeexps[0], exp_unpack_ch(UNPACK_LFE, audblk->lfeexpstr, 2, audblk->lfeexps[0], &audblk->lfeexps[1], audblk->lfe_exp);
&audblk->lfeexps[1], audblk->lfe_exp);
} }
static void /**
exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp, *
uint_16 exps[], uint_16 *dest) **/
{
uint_16 i,j;
sint_16 exp_acc;
sint_16 exp_1,exp_2,exp_3;
if(expstr == EXP_REUSE) static inline void exp_unpack_ch(uint16_t type,uint16_t expstr,uint16_t ngrps,uint16_t initial_exp,
uint16_t exps[], uint16_t *dest)
{
uint16_t i,j;
int16_t exp_acc;
int16_t exp_1,exp_2,exp_3;
if (expstr == EXP_REUSE)
return; return;
/* Handle the initial absolute exponent */ /* Handle the initial absolute exponent */
@ -75,8 +77,7 @@ exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp,
dest[j++] = exp_acc; dest[j++] = exp_acc;
/* Loop through the groups and fill the dest array appropriately */ /* Loop through the groups and fill the dest array appropriately */
for(i=0; i< ngrps; i++) for(i=0; i< ngrps; i++) {
{
if(exps[i] > 124) if(exps[i] > 124)
goto error; goto error;
@ -86,8 +87,7 @@ exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp,
exp_acc += (exp_1 - 2); exp_acc += (exp_1 - 2);
switch(expstr) switch(expstr) {
{
case EXP_D45: case EXP_D45:
dest[j++] = exp_acc; dest[j++] = exp_acc;
dest[j++] = exp_acc; dest[j++] = exp_acc;
@ -99,8 +99,7 @@ exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp,
exp_acc += (exp_2 - 2); exp_acc += (exp_2 - 2);
switch(expstr) switch(expstr) {
{
case EXP_D45: case EXP_D45:
dest[j++] = exp_acc; dest[j++] = exp_acc;
dest[j++] = exp_acc; dest[j++] = exp_acc;
@ -112,8 +111,7 @@ exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp,
exp_acc += (exp_3 - 2); exp_acc += (exp_3 - 2);
switch(expstr) switch(expstr) {
{
case EXP_D45: case EXP_D45:
dest[j++] = exp_acc; dest[j++] = exp_acc;
dest[j++] = exp_acc; dest[j++] = exp_acc;
@ -125,11 +123,10 @@ exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp,
} }
return; return;
goto error;
error: error:
if(!error_flag) #ifdef DEBUG
fprintf(stderr,"** Invalid exponent - skipping frame **\n"); fprintf (stderr,"** Invalid exponent - skipping frame **\n");
error_flag = 1; #endif
HANDLE_ERROR ();
} }

View File

@ -28,75 +28,48 @@
#include "ac3.h" #include "ac3.h"
#include "ac3_internal.h" #include "ac3_internal.h"
#include "downmix.h"
#include "decode.h"
#include "imdct.h" #include "imdct.h"
#include "imdct_c.h"
#ifdef HAVE_KNI
#include "imdct_kni.h"
#endif
#include "srfft.h"
void imdct_do_256(float data[],float delay[]);
void imdct_do_512(float data[],float delay[]);
typedef struct complex_s extern void (*downmix_3f_2r_to_2ch)(float *samples, dm_par_t * dm_par);
{ extern void (*downmix_3f_1r_to_2ch)(float *samples, dm_par_t * dm_par);
float real; extern void (*downmix_2f_2r_to_2ch)(float *samples, dm_par_t * dm_par);
float imag; extern void (*downmix_2f_1r_to_2ch)(float *samples, dm_par_t * dm_par);
} complex_t; extern void (*downmix_3f_0r_to_2ch)(float *samples, dm_par_t * dm_par);
extern void (*stream_sample_2ch_to_s16)(int16_t *s16_samples, float *left, float *right);
extern void (*stream_sample_1ch_to_s16)(int16_t *s16_samples, float *center);
void (*fft_64p) (complex_t *);
void (*imdct_do_512) (float data[],float delay[]);
void (*imdct_do_512_nol) (float data[], float delay[]);
void imdct_do_256 (float data[],float delay[]);
#define N 512 #define N 512
/* static complex_t buf[128]; */
/* 128 point bit-reverse LUT */ //static complex_t buf[128] __attribute__((aligned(16)));
static uint_8 bit_reverse_512[128] = { complex_t buf[128] __attribute__((aligned(16)));
0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70,
0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78,
0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74,
0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c,
0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72,
0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a,
0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76,
0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e,
0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71,
0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79,
0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75,
0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d,
0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73,
0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b,
0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77,
0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f};
static uint_8 bit_reverse_256[64] = {
0x00, 0x20, 0x10, 0x30, 0x08, 0x28, 0x18, 0x38,
0x04, 0x24, 0x14, 0x34, 0x0c, 0x2c, 0x1c, 0x3c,
0x02, 0x22, 0x12, 0x32, 0x0a, 0x2a, 0x1a, 0x3a,
0x06, 0x26, 0x16, 0x36, 0x0e, 0x2e, 0x1e, 0x3e,
0x01, 0x21, 0x11, 0x31, 0x09, 0x29, 0x19, 0x39,
0x05, 0x25, 0x15, 0x35, 0x0d, 0x2d, 0x1d, 0x3d,
0x03, 0x23, 0x13, 0x33, 0x0b, 0x2b, 0x1b, 0x3b,
0x07, 0x27, 0x17, 0x37, 0x0f, 0x2f, 0x1f, 0x3f};
static complex_t buf[128];
/* Twiddle factor LUT */
static complex_t *w[7];
static complex_t w_1[1];
static complex_t w_2[2];
static complex_t w_4[4];
static complex_t w_8[8];
static complex_t w_16[16];
static complex_t w_32[32];
static complex_t w_64[64];
/* Twiddle factors for IMDCT */
static float xcos1[128];
static float xsin1[128];
static float xcos2[64];
static float xsin2[64];
/* Delay buffer for time domain interleaving */ /* Delay buffer for time domain interleaving */
static float delay[6][256]; static float delay[6][256];
static float delay1[6][256];
/* Twiddle factors for IMDCT */
static float xcos2[64];
static float xsin2[64];
/* Windowing function for Modified DCT - Thank you acroread */ /* Windowing function for Modified DCT - Thank you acroread */
static float window[] = { //static float window[] = {
float window[] = {
0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130, 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443, 0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061, 0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
@ -128,280 +101,116 @@ static float window[] = {
0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993, 0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993,
0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999, 0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999,
0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000,
1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000 }; 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000
};
//static const int pm128[128] =
static inline void swap_cmplx(complex_t *a, complex_t *b) const int pm128[128] =
{ {
complex_t tmp; 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120,
4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44, 60, 76, 92, 108, 124,
2, 18, 34, 50, 66, 82, 98, 114, 10, 42, 74, 106, 26, 58, 90, 122,
6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62, 94, 126,
1, 17, 33, 49, 65, 81, 97, 113, 9, 41, 73, 105, 25, 57, 89, 121,
5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125,
3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123,
7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127
};
tmp = *a; static const int pm64[64] =
*a = *b;
*b = tmp;
}
static inline complex_t cmplx_mult(complex_t a, complex_t b)
{ {
complex_t ret; 0, 8, 16, 24, 32, 40, 48, 56,
4, 20, 36, 52, 12, 28, 44, 60,
2, 10, 18, 26, 34, 42, 50, 58,
6, 14, 22, 30, 38, 46, 54, 62,
1, 9, 17, 25, 33, 41, 49, 57,
5, 21, 37, 53, 13, 29, 45, 61,
3, 11, 19, 27, 35, 43, 51, 59,
7, 23, 39, 55, 15, 31, 47, 63
};
ret.real = a.real * b.real - a.imag * b.imag;
ret.imag = a.real * b.imag + a.imag * b.real;
return ret; void imdct_init (void)
} {
int i;
float scale = 255.99609372;
void imdct_init(void) #ifdef __i386__
{ #ifdef HAVE_KNI
int i,k; if (!imdct_init_kni ());
complex_t angle_step; else
complex_t current_angle; #endif
#endif
if (!imdct_init_c ());
/* Twiddle factors to turn IFFT into IMDCT */ // More twiddle factors to turn IFFT into IMDCT */
for( i=0; i < 128; i++) for (i=0; i < 64; i++) {
{ xcos2[i] = cos(2.0f * M_PI * (8*i+1)/(4*N)) * scale;
xcos1[i] = -cos(2.0f * M_PI * (8*i+1)/(8*N)) ; xsin2[i] = sin(2.0f * M_PI * (8*i+1)/(4*N)) * scale;
xsin1[i] = -sin(2.0f * M_PI * (8*i+1)/(8*N)) ;
}
/* More twiddle factors to turn IFFT into IMDCT */
for( i=0; i < 64; i++)
{
xcos2[i] = -cos(2.0f * M_PI * (8*i+1)/(4*N)) ;
xsin2[i] = -sin(2.0f * M_PI * (8*i+1)/(4*N)) ;
}
/* Canonical twiddle factors for FFT */
w[0] = w_1;
w[1] = w_2;
w[2] = w_4;
w[3] = w_8;
w[4] = w_16;
w[5] = w_32;
w[6] = w_64;
for( i = 0; i < 7; i++)
{
angle_step.real = cos(-2.0 * M_PI / (1 << (i+1)));
angle_step.imag = sin(-2.0 * M_PI / (1 << (i+1)));
current_angle.real = 1.0;
current_angle.imag = 0.0;
for (k = 0; k < 1 << i; k++)
{
w[i][k] = current_angle;
current_angle = cmplx_mult(current_angle,angle_step);
}
} }
} }
void
imdct_do_512(float data[],float delay[]) void imdct_do_256 (float data[],float delay[])
{ {
int i,k; int i, j, k;
int p,q; int p, q;
int m;
int two_m;
int two_m_plus_one;
float tmp_a_i; float tmp_a_i;
float tmp_a_r; float tmp_a_r;
float tmp_b_i;
float tmp_b_r;
float *data_ptr; float *data_ptr;
float *delay_ptr; float *delay_ptr;
float *window_ptr; float *window_ptr;
// complex_t *buf1, *buf2;
// 512 IMDCT with source and dest data in 'data'
//
// Pre IFFT complex multiply plus IFFT cmplx conjugate and bit reverse buf1 = &buf[0];
// permutation buf2 = &buf[64];
for( i=0; i < 128; i++)
{
k = bit_reverse_512[i];
/* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ // Pre IFFT complex multiply plus IFFT complex conjugate
buf[k].real = (data[256-2*i-1] * xcos1[i]) - (data[2*i] * xsin1[i]); for (k=0; k<64; k++) {
buf[k].imag = -1.0 * ((data[2*i] * xcos1[i]) + (data[256-2*i-1] * xsin1[i])); /* X1[k] = X[2*k] */
/* X2[k] = X[2*k+1] */
j = pm64[k];
p = 2 * (128-2*j-1);
q = 2 * (2 * j);
/* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
buf1[k].re = data[p] * xcos2[j] - data[q] * xsin2[j];
buf1[k].im = -1.0f * (data[q] * xcos2[j] + data[p] * xsin2[j]);
/* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
buf2[k].re = data[p + 1] * xcos2[j] - data[q + 1] * xsin2[j];
buf2[k].im = -1.0f * ( data[q + 1] * xcos2[j] + data[p + 1] * xsin2[j]);
} }
// FFT Merge fft_64p(&buf1[0]);
for (m=0; m < 7; m++) fft_64p(&buf2[0]);
{
if(m)
two_m = (1 << m);
else
two_m = 1;
two_m_plus_one = (1 << (m+1)); #ifdef DEBUG
//DEBUG FFT
#if 0
printf ("Post FFT, buf1\n");
for (i=0; i < 64; i++)
printf("%d %f %f\n", i, buf_1[i].re, buf_1[i].im);
printf ("Post FFT, buf2\n");
for (i=0; i < 64; i++)
printf("%d %f %f\n", i, buf_2[i].re, buf_2[i].im);
#endif
#endif
for(k = 0; k < two_m; k++)
{
for(i = 0; i < 128; i += two_m_plus_one)
{
p = k + i;
q = p + two_m;
tmp_a_r = buf[p].real;
tmp_a_i = buf[p].imag;
tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag;
tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag;
buf[p].real = tmp_a_r + tmp_b_r;
buf[p].imag = tmp_a_i + tmp_b_i;
buf[q].real = tmp_a_r - tmp_b_r;
buf[q].imag = tmp_a_i - tmp_b_i;
}
}
}
/* Post IFFT complex multiply plus IFFT complex conjugate*/
for( i=0; i < 128; i++)
{
/* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
tmp_a_r = buf[i].real;
tmp_a_i = buf[i].imag;
//Note that I flipped the signs on the imaginary ops to do the complex conj
buf[i].real =(tmp_a_r * xcos1[i]) + (tmp_a_i * xsin1[i]);
buf[i].imag =(tmp_a_r * xsin1[i]) - (tmp_a_i * xcos1[i]);
}
data_ptr = data;
delay_ptr = delay;
window_ptr = window;
/* Window and convert to real valued signal */
for(i=0; i< 64; i++)
{
*data_ptr++ = 2.0f * (-buf[64+i].imag * *window_ptr++ + *delay_ptr++);
*data_ptr++ = 2.0f * ( buf[64-i-1].real * *window_ptr++ + *delay_ptr++);
}
for(i=0; i< 64; i++)
{
*data_ptr++ = 2.0f * (-buf[i].real * *window_ptr++ + *delay_ptr++);
*data_ptr++ = 2.0f * ( buf[128-i-1].imag * *window_ptr++ + *delay_ptr++);
}
/* The trailing edge of the window goes into the delay line */
delay_ptr = delay;
for(i=0; i< 64; i++)
{
*delay_ptr++ = -buf[64+i].real * *--window_ptr;
*delay_ptr++ = buf[64-i-1].imag * *--window_ptr;
}
for(i=0; i<64; i++)
{
*delay_ptr++ = buf[i].imag * *--window_ptr;
*delay_ptr++ = -buf[128-i-1].real * *--window_ptr;
}
}
void
imdct_do_256(float data[],float delay[])
{
int i,k;
int p,q;
int m;
int two_m;
int two_m_plus_one;
float tmp_a_i;
float tmp_a_r;
float tmp_b_i;
float tmp_b_r;
float *data_ptr;
float *delay_ptr;
float *window_ptr;
complex_t *buf_1, *buf_2;
buf_1 = &buf[0];
buf_2 = &buf[64];
// Pre IFFT complex multiply plus IFFT cmplx conjugate and bit reverse
// permutation
for(i=0; i<64; i++)
{
/* X1[i] = X[2*i] */
/* X2[i] = X[2*i+1] */
k = bit_reverse_256[i];
p = 2 * (128-2*i-1);
q = 2 * (2 * i);
/* Z1[i] = (X1[128-2*i-1] + j * X1[2*i]) * (xcos2[i] + j * xsin2[i]); */
buf_1[k].real = data[p] * xcos2[i] - data[q] * xsin2[i];
buf_1[k].imag = -1.0f * (data[q] * xcos2[i] + data[p] * xsin2[i]);
/* Z2[i] = (X2[128-2*i-1] + j * X2[2*i]) * (xcos2[i] + j * xsin2[i]); */
buf_2[k].real = data[p + 1] * xcos2[i] - data[q + 1] * xsin2[i];
buf_2[k].imag = -1.0f * ( data[q + 1] * xcos2[i] + data[p + 1] * xsin2[i]);
}
// FFT Merge
for (m=0; m < 6; m++)
{
two_m = (1 << m);
two_m_plus_one = (1 << (m+1));
if(m)
two_m = (1 << m);
else
two_m = 1;
for(k = 0; k < two_m; k++)
{
for(i = 0; i < 64; i += two_m_plus_one)
{
p = k + i;
q = p + two_m;
//Do block 1
tmp_a_r = buf_1[p].real;
tmp_a_i = buf_1[p].imag;
tmp_b_r = buf_1[q].real * w[m][k].real - buf_1[q].imag * w[m][k].imag;
tmp_b_i = buf_1[q].imag * w[m][k].real + buf_1[q].real * w[m][k].imag;
buf_1[p].real = tmp_a_r + tmp_b_r;
buf_1[p].imag = tmp_a_i + tmp_b_i;
buf_1[q].real = tmp_a_r - tmp_b_r;
buf_1[q].imag = tmp_a_i - tmp_b_i;
//Do block 2
tmp_a_r = buf_2[p].real;
tmp_a_i = buf_2[p].imag;
tmp_b_r = buf_2[q].real * w[m][k].real - buf_2[q].imag * w[m][k].imag;
tmp_b_i = buf_2[q].imag * w[m][k].real + buf_2[q].real * w[m][k].imag;
buf_2[p].real = tmp_a_r + tmp_b_r;
buf_2[p].imag = tmp_a_i + tmp_b_i;
buf_2[q].real = tmp_a_r - tmp_b_r;
buf_2[q].imag = tmp_a_i - tmp_b_i;
}
}
}
// Post IFFT complex multiply // Post IFFT complex multiply
for( i=0; i < 64; i++) for( i=0; i < 64; i++) {
{ tmp_a_r = buf1[i].re;
//Note that I flipped the signs on the imaginary ops to do the complex conj tmp_a_i = -buf1[i].im;
buf1[i].re =(tmp_a_r * xcos2[i]) - (tmp_a_i * xsin2[i]);
/* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ buf1[i].im =(tmp_a_r * xsin2[i]) + (tmp_a_i * xcos2[i]);
tmp_a_r = buf_1[i].real; tmp_a_r = buf2[i].re;
tmp_a_i = buf_1[i].imag; tmp_a_i = -buf2[i].im;
buf_1[i].real =(tmp_a_r * xcos2[i]) + (tmp_a_i * xsin2[i]); buf2[i].re =(tmp_a_r * xcos2[i]) - (tmp_a_i * xsin2[i]);
buf_1[i].imag =(tmp_a_r * xsin2[i]) - (tmp_a_i * xcos2[i]); buf2[i].im =(tmp_a_r * xsin2[i]) + (tmp_a_i * xcos2[i]);
/* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */
tmp_a_r = buf_2[i].real;
tmp_a_i = buf_2[i].imag;
buf_2[i].real =(tmp_a_r * xcos2[i]) + (tmp_a_i * xsin2[i]);
buf_2[i].imag =(tmp_a_r * xsin2[i]) - (tmp_a_i * xcos2[i]);
} }
data_ptr = data; data_ptr = data;
@ -409,30 +218,122 @@ imdct_do_256(float data[],float delay[])
window_ptr = window; window_ptr = window;
/* Window and convert to real valued signal */ /* Window and convert to real valued signal */
for(i=0; i< 64; i++) for(i=0; i< 64; i++) {
{ *data_ptr++ = -buf1[i].im * *window_ptr++ + *delay_ptr++;
*data_ptr++ = 2.0f * (-buf_1[i].imag * *window_ptr++ + *delay_ptr++); *data_ptr++ = buf1[64-i-1].re * *window_ptr++ + *delay_ptr++;
*data_ptr++ = 2.0f * ( buf_1[64-i-1].real * *window_ptr++ + *delay_ptr++);
} }
for(i=0; i< 64; i++) for(i=0; i< 64; i++) {
{ *data_ptr++ = -buf1[i].re * *window_ptr++ + *delay_ptr++;
*data_ptr++ = 2.0f * (-buf_1[i].real * *window_ptr++ + *delay_ptr++); *data_ptr++ = buf1[64-i-1].im * *window_ptr++ + *delay_ptr++;
*data_ptr++ = 2.0f * ( buf_1[64-i-1].imag * *window_ptr++ + *delay_ptr++);
} }
delay_ptr = delay; delay_ptr = delay;
for(i=0; i< 64; i++) for(i=0; i< 64; i++) {
{ *delay_ptr++ = -buf2[i].re * *--window_ptr;
*delay_ptr++ = -buf_2[i].real * *--window_ptr; *delay_ptr++ = buf2[64-i-1].im * *--window_ptr;
*delay_ptr++ = buf_2[64-i-1].imag * *--window_ptr;
} }
for(i=0; i< 64; i++) for(i=0; i< 64; i++) {
{ *delay_ptr++ = buf2[i].im * *--window_ptr;
*delay_ptr++ = buf_2[i].imag * *--window_ptr; *delay_ptr++ = -buf2[64-i-1].re * *--window_ptr;
*delay_ptr++ = -buf_2[64-i-1].real * *--window_ptr; }
}
/**
*
**/
void imdct_do_256_nol (float data[], float delay[])
{
int i, j, k;
int p, q;
float tmp_a_i;
float tmp_a_r;
float *data_ptr;
float *delay_ptr;
float *window_ptr;
complex_t *buf1, *buf2;
buf1 = &buf[0];
buf2 = &buf[64];
/* Pre IFFT complex multiply plus IFFT cmplx conjugate */
for(k=0; k<64; k++) {
/* X1[k] = X[2*k] */
/* X2[k] = X[2*k+1] */
j = pm64[k];
p = 2 * (128-2*j-1);
q = 2 * (2 * j);
/* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
buf1[k].re = data[p] * xcos2[j] - data[q] * xsin2[j];
buf1[k].im = -1.0f * (data[q] * xcos2[j] + data[p] * xsin2[j]);
/* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
buf2[k].re = data[p + 1] * xcos2[j] - data[q + 1] * xsin2[j];
buf2[k].im = -1.0f * ( data[q + 1] * xcos2[j] + data[p + 1] * xsin2[j]);
}
fft_64p(&buf1[0]);
fft_64p(&buf2[0]);
#ifdef DEBUG
//DEBUG FFT
#if 0
printf("Post FFT, buf1\n");
for (i=0; i < 64; i++)
printf("%d %f %f\n", i, buf_1[i].re, buf_1[i].im);
printf("Post FFT, buf2\n");
for (i=0; i < 64; i++)
printf("%d %f %f\n", i, buf_2[i].re, buf_2[i].im);
#endif
#endif
/* Post IFFT complex multiply */
for( i=0; i < 64; i++) {
/* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
tmp_a_r = buf1[i].re;
tmp_a_i = -buf1[i].im;
buf1[i].re =(tmp_a_r * xcos2[i]) - (tmp_a_i * xsin2[i]);
buf1[i].im =(tmp_a_r * xsin2[i]) + (tmp_a_i * xcos2[i]);
/* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */
tmp_a_r = buf2[i].re;
tmp_a_i = -buf2[i].im;
buf2[i].re =(tmp_a_r * xcos2[i]) - (tmp_a_i * xsin2[i]);
buf2[i].im =(tmp_a_r * xsin2[i]) + (tmp_a_i * xcos2[i]);
}
data_ptr = data;
delay_ptr = delay;
window_ptr = window;
/* Window and convert to real valued signal, no overlap */
for(i=0; i< 64; i++) {
*data_ptr++ = -buf1[i].im * *window_ptr++;
*data_ptr++ = buf1[64-i-1].re * *window_ptr++;
}
for(i=0; i< 64; i++) {
*data_ptr++ = -buf1[i].re * *window_ptr++ + *delay_ptr++;
*data_ptr++ = buf1[64-i-1].im * *window_ptr++ + *delay_ptr++;
}
delay_ptr = delay;
for(i=0; i< 64; i++) {
*delay_ptr++ = -buf2[i].re * *--window_ptr;
*delay_ptr++ = buf2[64-i-1].im * *--window_ptr;
}
for(i=0; i< 64; i++) {
*delay_ptr++ = buf2[i].im * *--window_ptr;
*delay_ptr++ = -buf2[64-i-1].re * *--window_ptr;
} }
} }
@ -440,29 +341,190 @@ imdct_do_256(float data[],float delay[])
///#include <sys/time.h> ///#include <sys/time.h>
//FIXME remove //FIXME remove
void void imdct (bsi_t *bsi,audblk_t *audblk, stream_samples_t samples, int16_t *s16_samples, dm_par_t* dm_par)
imdct(bsi_t *bsi,audblk_t *audblk, stream_samples_t samples) { {
int i; int i;
int doable = 0;
float *center=NULL, *left, *right, *left_sur, *right_sur;
float *delay_left, *delay_right;
float *delay1_left, *delay1_right, *delay1_center, *delay1_sr, *delay1_sl;
float right_tmp, left_tmp;
void (*do_imdct)(float data[], float deley[]);
//handy timing code // test if dm in frequency is doable
//struct timeval start,end; if (!(doable = audblk->blksw[0]))
do_imdct = imdct_do_512;
else
do_imdct = imdct_do_256;
//gettimeofday(&start,0); // downmix in the frequency domain if all the channels
// use the same imdct
for(i=0; i<bsi->nfchans;i++) for (i=0; i < bsi->nfchans; i++) {
{ if (doable != audblk->blksw[i]) {
if(audblk->blksw[i]) do_imdct = NULL;
imdct_do_256(samples[i],delay[i]); break;
else }
imdct_do_512(samples[i],delay[i]);
} }
//gettimeofday(&end,0);
//printf("imdct %ld us\n",(end.tv_sec - start.tv_sec) * 1000000 +
//end.tv_usec - start.tv_usec);
//XXX We don't bother with the IMDCT for the LFE as it's currently if (do_imdct) {
//unused. //dowmix first and imdct
//if (bsi->lfeon) switch(bsi->acmod) {
// imdct_do_512(coeffs->lfe,samples->channel[5],delay[5]); case 7: // 3/2
// downmix_3f_2r_to_2ch (samples[0], dm_par);
break;
case 6: // 2/2
downmix_2f_2r_to_2ch (samples[0], dm_par);
break;
case 5: // 3/1
downmix_3f_1r_to_2ch (samples[0], dm_par);
break;
case 4: // 2/1
downmix_2f_1r_to_2ch (samples[0], dm_par);
break;
case 3: // 3/0
downmix_3f_0r_to_2ch (samples[0], dm_par);
break;
case 2:
break;
default: // 1/0
if (bsi->acmod == 1)
center = samples[0];
else if (bsi->acmod == 0)
center = samples[ac3_config.dual_mono_ch_sel];
do_imdct(center, delay[0]); // no downmix
stream_sample_1ch_to_s16 (s16_samples, center);
return;
//goto done;
break;
}
do_imdct (samples[0], delay[0]);
do_imdct (samples[1], delay[1]);
stream_sample_2ch_to_s16(s16_samples, samples[0], samples[1]);
} else { //imdct and then dowmix
// delay and samples should be saved and mixed
//fprintf(stderr, "time domain downmix\n");
for (i=0; i<bsi->nfchans; i++) {
if (audblk->blksw[i])
imdct_do_256_nol (samples[i],delay1[i]);
else
imdct_do_512_nol (samples[i],delay1[i]);
}
// mix the sample, overlap
switch(bsi->acmod) {
case 7: // 3/2
left = samples[0];
center = samples[1];
right = samples[2];
left_sur = samples[3];
right_sur = samples[4];
delay_left = delay[0];
delay_right = delay[1];
delay1_left = delay1[0];
delay1_center = delay1[1];
delay1_right = delay1[2];
delay1_sl = delay1[3];
delay1_sr = delay1[4];
for (i = 0; i < 256; i++) {
left_tmp = dm_par->unit * *left++ + dm_par->clev * *center + dm_par->slev * *left_sur++;
right_tmp= dm_par->unit * *right++ + dm_par->clev * *center++ + dm_par->slev * *right_sur++;
*s16_samples++ = (int16_t)(left_tmp + *delay_left);
*s16_samples++ = (int16_t)(right_tmp + *delay_right);
*delay_left++ = dm_par->unit * *delay1_left++ + dm_par->clev * *delay1_center + dm_par->slev * *delay1_sl++;
*delay_right++ = dm_par->unit * *delay1_right++ + dm_par->clev * *center++ + dm_par->slev * *delay1_sr++;
}
break;
case 6: // 2/2
left = samples[0];
right = samples[1];
left_sur = samples[2];
right_sur = samples[3];
delay_left = delay[0];
delay_right = delay[1];
delay1_left = delay1[0];
delay1_right = delay1[1];
delay1_sl = delay1[2];
delay1_sr = delay1[3];
for (i = 0; i < 256; i++) {
left_tmp = dm_par->unit * *left++ + dm_par->slev * *left_sur++;
right_tmp= dm_par->unit * *right++ + dm_par->slev * *right_sur++;
*s16_samples++ = (int16_t)(left_tmp + *delay_left);
*s16_samples++ = (int16_t)(right_tmp + *delay_right);
*delay_left++ = dm_par->unit * *delay1_left++ + dm_par->slev * *delay1_sl++;
*delay_right++ = dm_par->unit * *delay1_right++ + dm_par->slev * *delay1_sr++;
}
break;
case 5: // 3/1
left = samples[0];
center = samples[1];
right = samples[2];
right_sur = samples[3];
delay_left = delay[0];
delay_right = delay[1];
delay1_left = delay1[0];
delay1_center = delay1[1];
delay1_right = delay1[2];
delay1_sl = delay1[3];
for (i = 0; i < 256; i++) {
left_tmp = dm_par->unit * *left++ + dm_par->clev * *center - dm_par->slev * *right_sur;
right_tmp= dm_par->unit * *right++ + dm_par->clev * *center++ + dm_par->slev * *right_sur++;
*s16_samples++ = (int16_t)(left_tmp + *delay_left);
*s16_samples++ = (int16_t)(right_tmp + *delay_right);
*delay_left++ = dm_par->unit * *delay1_left++ + dm_par->clev * *delay1_center + dm_par->slev * *delay1_sl;
*delay_right++ = dm_par->unit * *delay1_right++ + dm_par->clev * *center++ + dm_par->slev * *delay1_sl++;
}
break;
case 4: // 2/1
left = samples[0];
right = samples[1];
right_sur = samples[2];
delay_left = delay[0];
delay_right = delay[1];
delay1_left = delay1[0];
delay1_right = delay1[1];
delay1_sl = delay1[2];
for (i = 0; i < 256; i++) {
left_tmp = dm_par->unit * *left++ - dm_par->slev * *right_sur;
right_tmp= dm_par->unit * *right++ + dm_par->slev * *right_sur++;
*s16_samples++ = (int16_t)(left_tmp + *delay_left);
*s16_samples++ = (int16_t)(right_tmp + *delay_right);
*delay_left++ = dm_par->unit * *delay1_left++ + dm_par->slev * *delay1_sl;
*delay_right++ = dm_par->unit * *delay1_right++ + dm_par->slev * *delay1_sl++;
}
break;
case 3: // 3/0
left = samples[0];
center = samples[1];
right = samples[2];
delay_left = delay[0];
delay_right = delay[1];
delay1_left = delay1[0];
delay1_center = delay1[1];
delay1_right = delay1[2];
for (i = 0; i < 256; i++) {
left_tmp = dm_par->unit * *left++ + dm_par->clev * *center;
right_tmp= dm_par->unit * *right++ + dm_par->clev * *center++;
*s16_samples++ = (int16_t)(left_tmp + *delay_left);
*s16_samples++ = (int16_t)(right_tmp + *delay_right);
*delay_left++ = dm_par->unit * *delay1_left++ + dm_par->clev * *delay1_center;
*delay_right++ = dm_par->unit * *delay1_right++ + dm_par->clev * *center++;
}
break;
case 2: // copy to output
for (i = 0; i < 256; i++) {
*s16_samples++ = (int16_t)samples[0][i];
*s16_samples++ = (int16_t)samples[1][i];
}
break;
}
}
} }

View File

@ -22,5 +22,5 @@
* *
*/ */
void imdct(bsi_t *bsi,audblk_t *audblk, stream_samples_t samples); void imdct (bsi_t *bsi,audblk_t *audblk, stream_samples_t samples, int16_t *s16_samples, dm_par_t *dm_par);
void imdct_init(void); void imdct_init(void);

548
ac3dec/imdct512_kni.S Normal file
View File

@ -0,0 +1,548 @@
/*
* imdct512_kni.S
*
* Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - October 2000
*
*
* imdct512_kni.S is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* imdct512_kni.S is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*/
#ifdef __i386__
.text
.align 4
.global imdct512_pre_ifft_twiddle_kni
.type imdct512_pre_ifft_twiddle_kni, @function
imdct512_pre_ifft_twiddle_kni:
pushl %ebp
movl %esp, %ebp
addl $-4, %esp /* local variable, loop counter */
pushl %eax
pushl %ebx
pushl %ecx
pushl %edx
pushl %edi
pushl %esi
movl 8(%ebp), %eax /* pmt */
movl 12(%ebp), %ebx /* buf */
movl 16(%ebp), %ecx /* data */
movl 20(%ebp), %edx /* xcos_sin_sse */
movl $64, -4(%ebp)
.loop:
movl (%eax), %esi
movl 4(%eax), %edi
movss (%ecx, %esi, 8), %xmm1 /* 2j */
movss (%ecx, %edi, 8), %xmm3 /* 2(j+1) */
shll $1, %esi
shll $1, %edi
movaps (%edx, %esi, 8), %xmm0; /* -c_j | -s_j | -s_j | c_j */
movaps (%edx, %edi, 8), %xmm2; /* -c_j+1 | -s_j+1 | -s_j+1 | c_j+1 */
negl %esi
negl %edi
movss 1020(%ecx, %esi, 4), %xmm4 /* 255-2j */
addl $8, %eax
movss 1020(%ecx, %edi, 4), %xmm5 /* 255-2(j+1) */
shufps $0, %xmm1, %xmm4 /* 2j | 2j | 255-2j | 255-2j */
shufps $0, %xmm3, %xmm5 /* 2(j+1) | 2(j+1) | 255-2(j+1) | 255-2(j+1) */
mulps %xmm4, %xmm0
mulps %xmm5, %xmm2
movhlps %xmm0, %xmm1
movhlps %xmm2, %xmm3
addl $16, %ebx
addps %xmm1, %xmm0
addps %xmm3, %xmm2
movlhps %xmm2, %xmm0
movaps %xmm0, -16(%ebx)
decl -4(%ebp)
jnz .loop
popl %esi
popl %edi
popl %edx
popl %ecx
popl %ebx
popl %eax
addl $4, %esp
popl %ebp
ret
.p2align 4,0
.global imdct512_post_ifft_twiddle_kni
.type imdct512_post_ifft_twiddle_kni, @function
imdct512_post_ifft_twiddle_kni:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
movl 8(%ebp), %eax /* buf[] */
movl 12(%ebp), %ebx /* xcos_sin_sse[] */
movl $32, %ecx /* loop counter */
.loop1:
movaps (%eax), %xmm0 /* im1 | re1 | im0 | re0 */
movaps (%ebx), %xmm2 /* -c | -s | -s | c */
movhlps %xmm0, %xmm1 /* im1 | re1 */
movaps 16(%ebx), %xmm3 /* -c1 | -s1 | -s1 | c1 */
shufps $0x50, %xmm0, %xmm0 /* im0 | im0 | re0 | re0 */
shufps $0x50, %xmm1, %xmm1 /* im1 | im1 | re1 | re1 */
movaps 16(%eax), %xmm4 /* im3 | re3 | im2 | re2 */
shufps $0x27, %xmm2, %xmm2 /* c | -s | -s | -c */
movhlps %xmm4, %xmm5 /* im3 | re3 */
shufps $0x27, %xmm3, %xmm3 /* c1 | -s1 | -s1 | -c1 */
movaps 32(%ebx), %xmm6 /* -c2 | -s2 | -s2 | c2 */
movaps 48(%ebx), %xmm7 /* -c3 | -s3 | -s3 | c3 */
shufps $0x50, %xmm4, %xmm4 /* im2 | im2 | re2 | re2 */
shufps $0x50, %xmm5, %xmm5 /* im3 | im3 | re3 | re3 */
mulps %xmm2, %xmm0
mulps %xmm3, %xmm1
shufps $0x27, %xmm6, %xmm6 /* c2 | -s2 | -s2 | -c2 */
shufps $0x27, %xmm7, %xmm7 /* c3 | -s3 | -s3 | -c3 */
movhlps %xmm0, %xmm2
movhlps %xmm1, %xmm3
mulps %xmm6, %xmm4
mulps %xmm7, %xmm5
addps %xmm2, %xmm0
addps %xmm3, %xmm1
movhlps %xmm4, %xmm6
movhlps %xmm5, %xmm7
addps %xmm6, %xmm4
addps %xmm7, %xmm5
movlhps %xmm1, %xmm0
movlhps %xmm5, %xmm4
movaps %xmm0, (%eax)
movaps %xmm4, 16(%eax)
addl $64, %ebx
addl $32, %eax
decl %ecx
jnz .loop1
popl %ecx
popl %ebx
popl %eax
leave
ret
.p2align 4,0
.global imdct512_window_delay_kni
.type imdct512_window_delay_kni, @function
imdct512_window_delay_kni:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
pushl %edx
pushl %esi
pushl %edi
movl 20(%ebp), %ebx /* delay */
movl 16(%ebp), %edx /* window */
movl 8(%ebp), %eax /* buf */
movl $16, %ecx /* loop count */
leal 516(%eax), %esi /* buf[64].im */
leal 504(%eax), %edi /* buf[63].re */
movl 12(%ebp), %eax /* data */
.first_128_samples:
movss (%esi), %xmm0
movss 8(%esi), %xmm2
movss (%edi), %xmm1
movss -8(%edi), %xmm3
movlhps %xmm2, %xmm0 /* 0.0 | im1 | 0.0 | im0 */
movlhps %xmm3, %xmm1 /* 0.0 | re1 | 0.0 | re0 */
movaps (%edx), %xmm4 /* w3 | w2 | w1 | w0 */
movaps (%ebx), %xmm5 /* d3 | d2 | d1 | d0 */
shufps $0xb1, %xmm1, %xmm1 /* re1 | 0.0 | re0 | 0.0 */
movss 16(%esi), %xmm6 /* im2 */
movss 24(%esi), %xmm7 /* im3 */
subps %xmm1, %xmm0 /* -re1 | im1 | -re0 | im0 */
movss -16(%edi), %xmm2 /* re2 */
movss -24(%edi), %xmm3 /* re3 */
mulps %xmm4, %xmm0
movlhps %xmm7, %xmm6 /* 0.0 | im3 | 0.0 | im2 */
movlhps %xmm3, %xmm2 /* 0.0 | re3 | 0.0 | re2 */
addps %xmm5, %xmm0
shufps $0xb1, %xmm2, %xmm2 /* re3 | 0.0 | re2 | 0.0 */
movaps 16(%edx), %xmm4 /* w7 | w6 | w5 | w4 */
movaps 16(%ebx), %xmm5 /* d7 | d6 | d5 | d4 */
subps %xmm2, %xmm6 /* -re3 | im3 | -re2 | im2 */
addl $32, %edx
movaps %xmm0, (%eax)
addl $32, %ebx
mulps %xmm4, %xmm6
addl $32, %esi
addl $32, %eax
addps %xmm5, %xmm6
addl $-32, %edi
movaps %xmm6, -16(%eax)
decl %ecx
jnz .first_128_samples
movl 8(%ebp), %esi /* buf[0].re */
leal 1020(%esi), %edi /* buf[127].im */
movl $16, %ecx /* loop count */
.second_128_samples:
movss (%esi), %xmm0 /* buf[i].re */
movss 8(%esi), %xmm2 /* re1 */
movss (%edi), %xmm1 /* buf[127-i].im */
movss -8(%edi), %xmm3 /* im1 */
movlhps %xmm2, %xmm0 /* 0.0 | re1 | 0.0 | re0 */
movlhps %xmm3, %xmm1 /* 0.0 | im1 | 0.0 | im1 */
movaps (%edx), %xmm4 /* w3 | w2 | w1 | w0 */
movaps (%ebx), %xmm5 /* d3 | d2 | d1 | d0 */
shufps $0xb1, %xmm1, %xmm1 /* im1 | 0.0 | im0 | 0.0 */
movss 16(%esi), %xmm6 /* re2 */
movss 24(%esi), %xmm7 /* re3 */
movss -16(%edi), %xmm2 /* im2 */
movss -24(%edi), %xmm3 /* im3 */
subps %xmm1, %xmm0 /* -im1 | re1 | -im0 | re0 */
movlhps %xmm7, %xmm6 /* 0.0 | re3 | 0.0 | re2 */
movlhps %xmm3, %xmm2 /* 0.0 | im3 | 0.0 | im2 */
mulps %xmm4, %xmm0
shufps $0xb1, %xmm2, %xmm2 /* im3 | 0.0 | im2 | 0.0 */
movaps 16(%edx), %xmm4 /* w7 | w6 | w5 | w4 */
addl $32, %esi
subps %xmm2, %xmm6 /* -im3 | re3 | -im2 | re2 */
addps %xmm5, %xmm0
mulps %xmm4, %xmm6
addl $-32, %edi
movaps 16(%ebx), %xmm5 /* d7 | d6 | d5 | d4 */
movaps %xmm0, (%eax)
addps %xmm5, %xmm6
addl $32, %edx
addl $32, %eax
addl $32, %ebx
movaps %xmm6, -16(%eax)
decl %ecx
jnz .second_128_samples
movl 8(%ebp), %eax
leal 512(%eax), %esi /* buf[64].re */
leal 508(%eax), %edi /* buf[63].im */
movl $16, %ecx /* loop count */
movl 20(%ebp), %eax /* delay */
.first_128_delay:
movss (%esi), %xmm0
movss 8(%esi), %xmm2
movss (%edi), %xmm1
movss -8(%edi), %xmm3
movlhps %xmm2, %xmm0 /* 0.0 | re1 | 0.0 | re0 */
movlhps %xmm3, %xmm1 /* 0.0 | im1 | 0.0 | im0 */
movaps -16(%edx), %xmm4 /* w3 | w2 | w1 | w0 */
shufps $0xb1, %xmm1, %xmm1 /* im1 | 0.0 | im0 | 0.0 */
movss 16(%esi), %xmm6 /* re2 */
movss 24(%esi), %xmm7 /* re3 */
movss -16(%edi), %xmm2 /* im2 */
movss -24(%edi), %xmm3 /* im3 */
subps %xmm1, %xmm0 /* -im1 | re1 | -im0 | re0 */
addl $-32, %edx
movlhps %xmm7, %xmm6 /* 0.0 | re3 | 0.0 | re2 */
movlhps %xmm3, %xmm2 /* 0.0 | im3 | 0.0 | im2 */
mulps %xmm4, %xmm0
movaps (%edx), %xmm5 /* w7 | w6 | w5 | w4 */
shufps $0xb1, %xmm2, %xmm2 /* im3 | 0.0 | im2 | 0.0 */
movaps %xmm0, (%eax)
addl $32, %esi
subps %xmm2, %xmm6 /* -im3 | re3 | -im2 | re2 */
addl $-32, %edi
mulps %xmm5, %xmm6
addl $32, %eax
movaps %xmm6, -16(%eax)
decl %ecx
jnz .first_128_delay
movl 8(%ebp), %ebx
leal 4(%ebx), %esi /* buf[0].im */
leal 1016(%ebx), %edi /* buf[127].re */
movl $16, %ecx /* loop count */
.second_128_delay:
movss (%esi), %xmm0
movss 8(%esi), %xmm2
movss (%edi), %xmm1
movss -8(%edi), %xmm3
movlhps %xmm2, %xmm0 /* 0.0 | im1 | 0.0 | im0 */
movlhps %xmm3, %xmm1 /* 0.0 | re1 | 0.0 | re0 */
movaps -16(%edx), %xmm4 /* w3 | w2 | w1 | w0 */
shufps $0xb1, %xmm1, %xmm1 /* re1 | 0.0 | re0 | 0.0 */
movss 16(%esi), %xmm6 /* im2 */
movss 24(%esi), %xmm7 /* im3 */
movss -16(%edi), %xmm2 /* re2 */
movss -24(%edi), %xmm3 /* re3 */
subps %xmm0, %xmm1 /* re1 | -im1 | re0 | -im0 */
addl $-32, %edx
movlhps %xmm7, %xmm6 /* 0.0 | im3 | 0.0 | im2 */
movlhps %xmm3, %xmm2 /* 0.0 | re3 | 0.0 | re2 */
mulps %xmm4, %xmm1
movaps (%edx), %xmm5 /* w7 | w6 | w5 | w4 */
shufps $0xb1, %xmm2, %xmm2 /* re3 | 0.0 | re2 | 0.0 */
movaps %xmm1, (%eax)
addl $32, %esi
subps %xmm6, %xmm2 /* re | -im3 | re | -im2 */
addl $-32, %edi
mulps %xmm5, %xmm2
addl $32, %eax
movaps %xmm2, -16(%eax)
decl %ecx
jnz .second_128_delay
popl %edi
popl %esi
popl %edx
popl %ecx
popl %ebx
popl %eax
leave
ret
.p2align 4,0
.global imdct512_window_delay_nol_kni
.type imdct512_window_delay_nol_kni, @function
imdct512_window_delay_nol_kni:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
pushl %edx
pushl %esi
pushl %edi
/* movl 20(%ebp), %ebx delay */
movl 16(%ebp), %edx /* window */
movl 8(%ebp), %eax /* buf */
movl $16, %ecx /* loop count */
leal 516(%eax), %esi /* buf[64].im */
leal 504(%eax), %edi /* buf[63].re */
movl 12(%ebp), %eax /* data */
.first_128_sample:
movss (%esi), %xmm0
movss 8(%esi), %xmm2
movss (%edi), %xmm1
movss -8(%edi), %xmm3
movlhps %xmm2, %xmm0 /* 0.0 | im1 | 0.0 | im0 */
movlhps %xmm3, %xmm1 /* 0.0 | re1 | 0.0 | re0 */
movaps (%edx), %xmm4 /* w3 | w2 | w1 | w0 */
/* movaps (%ebx), %xmm5 d3 | d2 | d1 | d0 */
shufps $0xb1, %xmm1, %xmm1 /* re1 | 0.0 | re0 | 0.0 */
movss 16(%esi), %xmm6 /* im2 */
movss 24(%esi), %xmm7 /* im3 */
subps %xmm1, %xmm0 /* -re1 | im1 | -re0 | im0 */
movss -16(%edi), %xmm2 /* re2 */
movss -24(%edi), %xmm3 /* re3 */
mulps %xmm4, %xmm0
movlhps %xmm7, %xmm6 /* 0.0 | im3 | 0.0 | im2 */
movlhps %xmm3, %xmm2 /* 0.0 | re3 | 0.0 | re2 */
/* addps %xmm5, %xmm0 */
shufps $0xb1, %xmm2, %xmm2 /* re3 | 0.0 | re2 | 0.0 */
movaps 16(%edx), %xmm4 /* w7 | w6 | w5 | w4 */
/* movaps 16(%ebx), %xmm5 d7 | d6 | d5 | d4 */
subps %xmm2, %xmm6 /* -re3 | im3 | -re2 | im2 */
addl $32, %edx
movaps %xmm0, (%eax)
/* addl $32, %ebx */
mulps %xmm4, %xmm6
addl $32, %esi
addl $32, %eax
/* addps %xmm5, %xmm6 */
addl $-32, %edi
movaps %xmm6, -16(%eax)
decl %ecx
jnz .first_128_sample
movl 8(%ebp), %esi /* buf[0].re */
leal 1020(%esi), %edi /* buf[127].im */
movl $16, %ecx /* loop count */
.second_128_sample:
movss (%esi), %xmm0 /* buf[i].re */
movss 8(%esi), %xmm2 /* re1 */
movss (%edi), %xmm1 /* buf[127-i].im */
movss -8(%edi), %xmm3 /* im1 */
movlhps %xmm2, %xmm0 /* 0.0 | re1 | 0.0 | re0 */
movlhps %xmm3, %xmm1 /* 0.0 | im1 | 0.0 | im1 */
movaps (%edx), %xmm4 /* w3 | w2 | w1 | w0 */
/* movaps (%ebx), %xmm5 d3 | d2 | d1 | d0 */
shufps $0xb1, %xmm1, %xmm1 /* im1 | 0.0 | im0 | 0.0 */
movss 16(%esi), %xmm6 /* re2 */
movss 24(%esi), %xmm7 /* re3 */
movss -16(%edi), %xmm2 /* im2 */
movss -24(%edi), %xmm3 /* im3 */
subps %xmm1, %xmm0 /* -im1 | re1 | -im0 | re0 */
movlhps %xmm7, %xmm6 /* 0.0 | re3 | 0.0 | re2 */
movlhps %xmm3, %xmm2 /* 0.0 | im3 | 0.0 | im2 */
mulps %xmm4, %xmm0
shufps $0xb1, %xmm2, %xmm2 /* im3 | 0.0 | im2 | 0.0 */
movaps 16(%edx), %xmm4 /* w7 | w6 | w5 | w4 */
addl $32, %esi
subps %xmm2, %xmm6 /* -im3 | re3 | -im2 | re2 */
/* addps %xmm5, %xmm0 */
mulps %xmm4, %xmm6
addl $-32, %edi
/* movaps 16(%ebx), %xmm5 d7 | d6 | d5 | d4 */
movaps %xmm0, (%eax)
/* addps %xmm5, %xmm6 */
addl $32, %edx
addl $32, %eax
/* addl $32, %ebx */
movaps %xmm6, -16(%eax)
decl %ecx
jnz .second_128_sample
movl 8(%ebp), %eax
leal 512(%eax), %esi /* buf[64].re */
leal 508(%eax), %edi /* buf[63].im */
movl $16, %ecx /* loop count */
movl 20(%ebp), %eax /* delay */
.first_128_delays:
movss (%esi), %xmm0
movss 8(%esi), %xmm2
movss (%edi), %xmm1
movss -8(%edi), %xmm3
movlhps %xmm2, %xmm0 /* 0.0 | re1 | 0.0 | re0 */
movlhps %xmm3, %xmm1 /* 0.0 | im1 | 0.0 | im0 */
movaps -16(%edx), %xmm4 /* w3 | w2 | w1 | w0 */
shufps $0xb1, %xmm1, %xmm1 /* im1 | 0.0 | im0 | 0.0 */
movss 16(%esi), %xmm6 /* re2 */
movss 24(%esi), %xmm7 /* re3 */
movss -16(%edi), %xmm2 /* im2 */
movss -24(%edi), %xmm3 /* im3 */
subps %xmm1, %xmm0 /* -im1 | re1 | -im0 | re0 */
addl $-32, %edx
movlhps %xmm7, %xmm6 /* 0.0 | re3 | 0.0 | re2 */
movlhps %xmm3, %xmm2 /* 0.0 | im3 | 0.0 | im2 */
mulps %xmm4, %xmm0
movaps (%edx), %xmm5 /* w7 | w6 | w5 | w4 */
shufps $0xb1, %xmm2, %xmm2 /* im3 | 0.0 | im2 | 0.0 */
movaps %xmm0, (%eax)
addl $32, %esi
subps %xmm2, %xmm6 /* -im3 | re3 | -im2 | re2 */
addl $-32, %edi
mulps %xmm5, %xmm6
addl $32, %eax
movaps %xmm6, -16(%eax)
decl %ecx
jnz .first_128_delays
movl 8(%ebp), %ebx
leal 4(%ebx), %esi /* buf[0].im */
leal 1016(%ebx), %edi /* buf[127].re */
movl $16, %ecx /* loop count */
.second_128_delays:
movss (%esi), %xmm0
movss 8(%esi), %xmm2
movss (%edi), %xmm1
movss -8(%edi), %xmm3
movlhps %xmm2, %xmm0 /* 0.0 | im1 | 0.0 | im0 */
movlhps %xmm3, %xmm1 /* 0.0 | re1 | 0.0 | re0 */
movaps -16(%edx), %xmm4 /* w3 | w2 | w1 | w0 */
shufps $0xb1, %xmm1, %xmm1 /* re1 | 0.0 | re0 | 0.0 */
movss 16(%esi), %xmm6 /* im2 */
movss 24(%esi), %xmm7 /* im3 */
movss -16(%edi), %xmm2 /* re2 */
movss -24(%edi), %xmm3 /* re3 */
subps %xmm0, %xmm1 /* re1 | -im1 | re0 | -im0 */
addl $-32, %edx
movlhps %xmm7, %xmm6 /* 0.0 | im3 | 0.0 | im2 */
movlhps %xmm3, %xmm2 /* 0.0 | re3 | 0.0 | re2 */
mulps %xmm4, %xmm1
movaps (%edx), %xmm5 /* w7 | w6 | w5 | w4 */
shufps $0xb1, %xmm2, %xmm2 /* re3 | 0.0 | re2 | 0.0 */
movaps %xmm1, (%eax)
addl $32, %esi
subps %xmm6, %xmm2 /* re | -im3 | re | -im2 */
addl $-32, %edi
mulps %xmm5, %xmm2
addl $32, %eax
movaps %xmm2, -16(%eax)
decl %ecx
jnz .second_128_delays
popl %edi
popl %esi
popl %edx
popl %ecx
popl %ebx
popl %eax
leave
ret
.p2align 4,0
#endif

218
ac3dec/imdct_c.c Normal file
View File

@ -0,0 +1,218 @@
/*
* imdct.c
*
* Copyright (C) Aaron Holtzman - May 1999
*
* This file is part of ac3dec, a free Dolby AC-3 stream decoder.
*
* ac3dec is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* ac3dec is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "ac3.h"
#include "ac3_internal.h"
#include "downmix.h"
#include "imdct_c.h"
#include "srfft.h"
#define N 512
extern void (*imdct_do_512) (float data[],float delay[]);
extern void (*imdct_do_512_nol) (float data[], float delay[]);
extern void (*fft_64p) (complex_t *);
extern const int pm128[];
extern float window[];
extern complex_t buf[128];
extern void fft_64p_c (complex_t *);
extern void fft_128p_c (complex_t *);
static void imdct_do_512_c (float data[],float delay[]);
static void imdct_do_512_nol_c (float data[], float delay[]);
/* Twiddle factors for IMDCT */
static float xcos1[128] __attribute__((aligned(16)));
static float xsin1[128] __attribute__((aligned(16)));
int imdct_init_c (void)
{
int i;
float scale = 255.99609372;
imdct_do_512 = imdct_do_512_c;
imdct_do_512_nol = imdct_do_512_nol_c;
fft_64p = fft_64p_c;
/* Twiddle factors to turn IFFT into IMDCT */
for (i=0; i < 128; i++) {
xcos1[i] = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
xsin1[i] = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
}
return 0;
}
static void imdct_do_512_c (float data[], float delay[])
{
int i, j;
float tmp_a_r, tmp_a_i;
float *data_ptr;
float *delay_ptr;
float *window_ptr;
// 512 IMDCT with source and dest data in 'data'
// Pre IFFT complex multiply plus IFFT complex conjugate
for( i=0; i < 128; i++) {
j = pm128[i];
//a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]);
//c = data[2*j] * xcos1[j];
//b = data[256-2*j-1] * xsin1[j];
//buf1[i].re = a - b + c;
//buf1[i].im = b + c;
buf[i].re = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]);
buf[i].im = -1.0 * (data[2*j] * xcos1[j] + data[256-2*j-1] * xsin1[j]);
}
fft_128p_c (&buf[0]);
// Post IFFT complex multiply plus IFFT complex conjugate
for (i=0; i < 128; i++) {
tmp_a_r = buf[i].re;
tmp_a_i = buf[i].im;
//a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]);
//b = tmp_a_r * xsin1[j];
//c = tmp_a_i * xcos1[j];
//buf[j].re = a - b + c;
//buf[j].im = b + c;
buf[i].re =(tmp_a_r * xcos1[i]) + (tmp_a_i * xsin1[i]);
buf[i].im =(tmp_a_r * xsin1[i]) - (tmp_a_i * xcos1[i]);
}
data_ptr = data;
delay_ptr = delay;
window_ptr = window;
// Window and convert to real valued signal
for (i=0; i< 64; i++) {
*data_ptr++ = -buf[64+i].im * *window_ptr++ + *delay_ptr++;
*data_ptr++ = buf[64-i-1].re * *window_ptr++ + *delay_ptr++;
}
for(i=0; i< 64; i++) {
*data_ptr++ = -buf[i].re * *window_ptr++ + *delay_ptr++;
*data_ptr++ = buf[128-i-1].im * *window_ptr++ + *delay_ptr++;
}
// The trailing edge of the window goes into the delay line
delay_ptr = delay;
for(i=0; i< 64; i++) {
*delay_ptr++ = -buf[64+i].re * *--window_ptr;
*delay_ptr++ = buf[64-i-1].im * *--window_ptr;
}
for(i=0; i<64; i++) {
*delay_ptr++ = buf[i].im * *--window_ptr;
*delay_ptr++ = -buf[128-i-1].re * *--window_ptr;
}
}
static void imdct_do_512_nol_c (float data[], float delay[])
{
int i, j;
float tmp_a_i;
float tmp_a_r;
float *data_ptr;
float *delay_ptr;
float *window_ptr;
//
// 512 IMDCT with source and dest data in 'data'
//
// Pre IFFT complex multiply plus IFFT cmplx conjugate
for( i=0; i < 128; i++) {
/* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) */
j = pm128[i];
//a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]);
//c = data[2*j] * xcos1[j];
//b = data[256-2*j-1] * xsin1[j];
//buf1[i].re = a - b + c;
//buf1[i].im = b + c;
buf[i].re = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]);
buf[i].im = -1.0 * (data[2*j] * xcos1[j] + data[256-2*j-1] * xsin1[j]);
}
fft_128p_c (&buf[0]);
/* Post IFFT complex multiply plus IFFT complex conjugate*/
for (i=0; i < 128; i++) {
/* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
/* int j1 = i; */
tmp_a_r = buf[i].re;
tmp_a_i = buf[i].im;
//a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]);
//b = tmp_a_r * xsin1[j];
//c = tmp_a_i * xcos1[j];
//buf[j].re = a - b + c;
//buf[j].im = b + c;
buf[i].re =(tmp_a_r * xcos1[i]) + (tmp_a_i * xsin1[i]);
buf[i].im =(tmp_a_r * xsin1[i]) - (tmp_a_i * xcos1[i]);
}
data_ptr = data;
delay_ptr = delay;
window_ptr = window;
/* Window and convert to real valued signal, no overlap here*/
for (i=0; i< 64; i++) {
*data_ptr++ = -buf[64+i].im * *window_ptr++;
*data_ptr++ = buf[64-i-1].re * *window_ptr++;
}
for(i=0; i< 64; i++) {
*data_ptr++ = -buf[i].re * *window_ptr++;
*data_ptr++ = buf[128-i-1].im * *window_ptr++;
}
/* The trailing edge of the window goes into the delay line */
delay_ptr = delay;
for(i=0; i< 64; i++) {
*delay_ptr++ = -buf[64+i].re * *--window_ptr;
*delay_ptr++ = buf[64-i-1].im * *--window_ptr;
}
for(i=0; i<64; i++) {
*delay_ptr++ = buf[i].im * *--window_ptr;
*delay_ptr++ = -buf[128-i-1].re * *--window_ptr;
}
}

36
ac3dec/imdct_c.h Normal file
View File

@ -0,0 +1,36 @@
/*****
*
* This file is part of the OMS program.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*****/
#ifndef __IMDCT_C_H__
#define __IMDCT_C_H__
#include "cmplx.h"
int imdct_init_c (void);
void fft_128p_c (complex_t *);
void fft_64p_c (complex_t *);
void imdct512_pre_ifft_twiddle_c (const int *pmt, complex_t *buf, float *data, float *xcos_sin_sse);
void imdct512_post_ifft_twiddle_c (complex_t *buf, float *xcos_sin_sse);
void imdct512_window_delay_c (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt);
void imdct512_window_delay_nol_c (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt);
#endif

103
ac3dec/imdct_kni.c Normal file
View File

@ -0,0 +1,103 @@
/*
* imdct_kni.c
*
* Copyright (C) Aaron Holtzman - May 1999
*
* This file is part of ac3dec, a free Dolby AC-3 stream decoder.
*
* ac3dec is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* ac3dec is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*
*/
#ifdef __i386__
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <mm_accel.h>
#include "ac3.h"
#include "ac3_internal.h"
#include "downmix.h"
#include "imdct_kni.h"
#include "srfft.h"
#define N 512
/* Delay buffer for time domain interleaving */
static float xcos_sin_sse[128 * 4] __attribute__((aligned(16)));
extern void (*imdct_do_512) (float data[],float delay[]);
extern void (*imdct_do_512_nol) (float data[], float delay[]);
extern void (*fft_64p) (complex_t *);
extern const int pm128[];
extern float window[];
extern complex_t buf[128];
extern void fft_64p_kni (complex_t *);
extern void fft_128p_kni (complex_t *);
static void imdct_do_512_kni (float data[], float delay[]);
static void imdct_do_512_nol_kni (float data[], float delay[]);
int imdct_init_kni (void)
{
uint32_t accel = mm_accel ();
if (accel & MM_ACCEL_X86_MMXEXT) {
int i;
float scale = 255.99609372;
fprintf (stderr, "Using SSE for IMDCT\n");
imdct_do_512 = imdct_do_512_kni;
imdct_do_512_nol = imdct_do_512_nol_kni;
fft_64p = fft_64p_kni;
for (i=0; i < 128; i++) {
float xcos_i = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
float xsin_i = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
xcos_sin_sse[i * 4] = xcos_i;
xcos_sin_sse[i * 4 + 1] = -xsin_i;
xcos_sin_sse[i * 4 + 2] = -xsin_i;
xcos_sin_sse[i * 4 + 3] = -xcos_i;
}
return 0;
} else
return -1;
}
static void imdct_do_512_kni (float data[], float delay[])
{
imdct512_pre_ifft_twiddle_kni (pm128, buf, data, xcos_sin_sse);
fft_128p_kni (buf);
imdct512_post_ifft_twiddle_kni (buf, xcos_sin_sse);
imdct512_window_delay_kni (buf, data, window, delay);
}
static void imdct_do_512_nol_kni (float data[], float delay[])
{
imdct512_pre_ifft_twiddle_kni (pm128, buf, data, xcos_sin_sse);
fft_128p_kni (buf);
imdct512_post_ifft_twiddle_kni (buf, xcos_sin_sse);
imdct512_window_delay_nol_kni (buf, data, window, delay);
}
#endif

36
ac3dec/imdct_kni.h Normal file
View File

@ -0,0 +1,36 @@
/*****
*
* This file is part of the OMS program.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*****/
#ifndef __IMDCT_KNI_H__
#define __IMDCT_KNI_H__
#include "cmplx.h"
int imdct_init_kni (void);
void fft_128p_kni(complex_t *);
void fft_64p_kni(complex_t *);
void imdct512_pre_ifft_twiddle_kni(const int *pmt, complex_t *buf, float *data, float *xcos_sin_sse);
void imdct512_post_ifft_twiddle_kni(complex_t *buf, float *xcos_sin_sse);
void imdct512_window_delay_kni(complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt);
void imdct512_window_delay_nol_kni(complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt);
#endif

36
ac3dec/mm_accel.h Normal file
View File

@ -0,0 +1,36 @@
/*****
*
* This file is part of the OMS program.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*****/
#ifndef __MM_ACCEL_H__
#define __MM_ACCEL_H__
#include <inttypes.h>
// generic accelerations
#define MM_ACCEL_MLIB 0x00000001
// x86 accelerations
#define MM_ACCEL_X86_MMX 0x80000000
#define MM_ACCEL_X86_3DNOW 0x40000000
#define MM_ACCEL_X86_MMXEXT 0x20000000
uint32_t mm_accel (void);
#endif

View File

@ -31,15 +31,16 @@
#include "bitstream.h" #include "bitstream.h"
#include "stats.h" #include "stats.h"
#include "debug.h" #include "debug.h"
#include "crc.h"
#include "parse.h" #include "parse.h"
/* Misc LUT */ /* Misc LUT */
static const uint_16 nfchans[8] = {2,1,2,3,3,4,4,5}; static const uint16_t nfchans[8] = {2,1,2,3,3,4,4,5};
struct frmsize_s struct frmsize_s
{ {
uint_16 bit_rate; uint16_t bit_rate;
uint_16 frm_size[3]; uint16_t frm_size[3];
}; };
static const struct frmsize_s frmsizecod_tbl[64] = static const struct frmsize_s frmsizecod_tbl[64] =
@ -85,8 +86,7 @@ static const struct frmsize_s frmsizecod_tbl[64] =
}; };
/* Parse a syncinfo structure, minus the sync word */ /* Parse a syncinfo structure, minus the sync word */
void void parse_syncinfo(syncinfo_t *syncinfo, uint8_t *data)
parse_syncinfo(syncinfo_t *syncinfo,uint_8 *data)
{ {
// //
// We need to read in the entire syncinfo struct (0x0b77 + 24 bits) // We need to read in the entire syncinfo struct (0x0b77 + 24 bits)
@ -96,10 +96,8 @@ parse_syncinfo(syncinfo_t *syncinfo,uint_8 *data)
// Get the sampling rate // Get the sampling rate
syncinfo->fscod = (data[2] >> 6) & 0x3; syncinfo->fscod = (data[2] >> 6) & 0x3;
if(syncinfo->fscod == 3) if(syncinfo->fscod == 3) {
{
//invalid sampling rate code //invalid sampling rate code
error_flag = 1;
return; return;
} }
else if(syncinfo->fscod == 2) else if(syncinfo->fscod == 2)
@ -119,15 +117,13 @@ parse_syncinfo(syncinfo_t *syncinfo,uint_8 *data)
} }
/*
/**
* This routine fills a bsi struct from the AC3 stream * This routine fills a bsi struct from the AC3 stream
*/ **/
void void parse_bsi(bsi_t *bsi)
parse_bsi(bsi_t *bsi)
{ {
uint_32 i;
/* Check the AC-3 version number */ /* Check the AC-3 version number */
bsi->bsid = bitstream_get(5); bsi->bsid = bitstream_get(5);
@ -159,25 +155,19 @@ parse_bsi(bsi_t *bsi)
bsi->dialnorm = bitstream_get(5); bsi->dialnorm = bitstream_get(5);
/* Does compression gain exist? */ /* Does compression gain exist? */
bsi->compre = bitstream_get(1); if ((bsi->compre = bitstream_get(1))) {
if (bsi->compre)
{
/* Get compression gain */ /* Get compression gain */
bsi->compr = bitstream_get(8); bsi->compr = bitstream_get(8);
} }
/* Does language code exist? */ /* Does language code exist? */
bsi->langcode = bitstream_get(1); if ((bsi->langcode = bitstream_get(1))) {
if (bsi->langcode)
{
/* Get langauge code */ /* Get langauge code */
bsi->langcod = bitstream_get(8); bsi->langcod = bitstream_get(8);
} }
/* Does audio production info exist? */ /* Does audio production info exist? */
bsi->audprodie = bitstream_get(1); if ((bsi->audprodie = bitstream_get(1))) {
if (bsi->audprodie)
{
/* Get mix level */ /* Get mix level */
bsi->mixlevel = bitstream_get(5); bsi->mixlevel = bitstream_get(5);
@ -186,31 +176,24 @@ parse_bsi(bsi_t *bsi)
} }
/* If we're in dual mono mode then get some extra info */ /* If we're in dual mono mode then get some extra info */
if (bsi->acmod ==0) if (!bsi->acmod) {
{
/* Get the dialogue normalization level two */ /* Get the dialogue normalization level two */
bsi->dialnorm2 = bitstream_get(5); bsi->dialnorm2 = bitstream_get(5);
/* Does compression gain two exist? */ /* Does compression gain two exist? */
bsi->compr2e = bitstream_get(1); if ((bsi->compr2e = bitstream_get(1))) {
if (bsi->compr2e)
{
/* Get compression gain two */ /* Get compression gain two */
bsi->compr2 = bitstream_get(8); bsi->compr2 = bitstream_get(8);
} }
/* Does language code two exist? */ /* Does language code two exist? */
bsi->langcod2e = bitstream_get(1); if ((bsi->langcod2e = bitstream_get(1))) {
if (bsi->langcod2e)
{
/* Get langauge code two */ /* Get langauge code two */
bsi->langcod2 = bitstream_get(8); bsi->langcod2 = bitstream_get(8);
} }
/* Does audio production info two exist? */ /* Does audio production info two exist? */
bsi->audprodi2e = bitstream_get(1); if ((bsi->audprodi2e = bitstream_get(1))) {
if (bsi->audprodi2e)
{
/* Get mix level two */ /* Get mix level two */
bsi->mixlevel2 = bitstream_get(5); bsi->mixlevel2 = bitstream_get(5);
@ -226,22 +209,17 @@ parse_bsi(bsi_t *bsi)
bsi->origbs = bitstream_get(1); bsi->origbs = bitstream_get(1);
/* Does timecode one exist? */ /* Does timecode one exist? */
bsi->timecod1e = bitstream_get(1); if ((bsi->timecod1e = bitstream_get(1)))
if(bsi->timecod1e)
bsi->timecod1 = bitstream_get(14); bsi->timecod1 = bitstream_get(14);
/* Does timecode two exist? */ /* Does timecode two exist? */
bsi->timecod2e = bitstream_get(1); if ((bsi->timecod2e = bitstream_get(1)))
if(bsi->timecod2e)
bsi->timecod2 = bitstream_get(14); bsi->timecod2 = bitstream_get(14);
/* Does addition info exist? */ /* Does addition info exist? */
bsi->addbsie = bitstream_get(1); if ((bsi->addbsie = bitstream_get(1))) {
uint32_t i;
if(bsi->addbsie)
{
/* Get how much info is there */ /* Get how much info is there */
bsi->addbsil = bitstream_get(6); bsi->addbsil = bitstream_get(6);
@ -253,52 +231,41 @@ parse_bsi(bsi_t *bsi)
stats_print_bsi(bsi); stats_print_bsi(bsi);
} }
/* More pain inducing parsing */ /* More pain inducing parsing */
void void parse_audblk(bsi_t *bsi,audblk_t *audblk)
parse_audblk(bsi_t *bsi,audblk_t *audblk)
{ {
int i,j; int i,j;
for (i=0;i < bsi->nfchans; i++) for (i=0; i < bsi->nfchans; i++) {
{
/* Is this channel an interleaved 256 + 256 block ? */ /* Is this channel an interleaved 256 + 256 block ? */
audblk->blksw[i] = bitstream_get(1); audblk->blksw[i] = bitstream_get(1);
} }
for (i=0;i < bsi->nfchans; i++) for (i=0;i < bsi->nfchans; i++) {
{
/* Should we dither this channel? */ /* Should we dither this channel? */
audblk->dithflag[i] = bitstream_get(1); audblk->dithflag[i] = bitstream_get(1);
} }
/* Does dynamic range control exist? */ /* Does dynamic range control exist? */
audblk->dynrnge = bitstream_get(1); if ((audblk->dynrnge = bitstream_get(1))) {
if (audblk->dynrnge)
{
/* Get dynamic range info */ /* Get dynamic range info */
audblk->dynrng = bitstream_get(8); audblk->dynrng = bitstream_get(8);
} }
/* If we're in dual mono mode then get the second channel DR info */ /* If we're in dual mono mode then get the second channel DR info */
if (bsi->acmod == 0) if (bsi->acmod == 0) {
{
/* Does dynamic range control two exist? */ /* Does dynamic range control two exist? */
audblk->dynrng2e = bitstream_get(1); if ((audblk->dynrng2e = bitstream_get(1))) {
if (audblk->dynrng2e)
{
/* Get dynamic range info */ /* Get dynamic range info */
audblk->dynrng2 = bitstream_get(8); audblk->dynrng2 = bitstream_get(8);
} }
} }
/* Does coupling strategy exist? */ /* Does coupling strategy exist? */
audblk->cplstre = bitstream_get(1); if ((audblk->cplstre = bitstream_get(1))) {
if (audblk->cplstre)
{
/* Is coupling turned on? */ /* Is coupling turned on? */
audblk->cplinu = bitstream_get(1); if ((audblk->cplinu = bitstream_get(1))) {
if(audblk->cplinu)
{
for(i=0;i < bsi->nfchans; i++) for(i=0;i < bsi->nfchans; i++)
audblk->chincpl[i] = bitstream_get(1); audblk->chincpl[i] = bitstream_get(1);
if(bsi->acmod == 0x2) if(bsi->acmod == 0x2)
@ -315,30 +282,23 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
* band */ * band */
audblk->ncplbnd = audblk->ncplsubnd; audblk->ncplbnd = audblk->ncplsubnd;
for(i=1; i< audblk->ncplsubnd; i++) for(i=1; i< audblk->ncplsubnd; i++) {
{
audblk->cplbndstrc[i] = bitstream_get(1); audblk->cplbndstrc[i] = bitstream_get(1);
audblk->ncplbnd -= audblk->cplbndstrc[i]; audblk->ncplbnd -= audblk->cplbndstrc[i];
} }
} }
} }
if(audblk->cplinu) if(audblk->cplinu) {
{
/* Loop through all the channels and get their coupling co-ords */ /* Loop through all the channels and get their coupling co-ords */
for(i=0;i < bsi->nfchans;i++) for(i=0;i < bsi->nfchans;i++) {
{
if(!audblk->chincpl[i]) if(!audblk->chincpl[i])
continue; continue;
/* Is there new coupling co-ordinate info? */ /* Is there new coupling co-ordinate info? */
audblk->cplcoe[i] = bitstream_get(1); if ((audblk->cplcoe[i] = bitstream_get(1))) {
if(audblk->cplcoe[i])
{
audblk->mstrcplco[i] = bitstream_get(2); audblk->mstrcplco[i] = bitstream_get(2);
for(j=0;j < audblk->ncplbnd; j++) for(j=0;j < audblk->ncplbnd; j++) {
{
audblk->cplcoexp[i][j] = bitstream_get(4); audblk->cplcoexp[i][j] = bitstream_get(4);
audblk->cplcomant[i][j] = bitstream_get(4); audblk->cplcomant[i][j] = bitstream_get(4);
} }
@ -347,8 +307,7 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
/* If we're in dual mono mode, there's going to be some phase info */ /* If we're in dual mono mode, there's going to be some phase info */
if( (bsi->acmod == 0x2) && audblk->phsflginu && if( (bsi->acmod == 0x2) && audblk->phsflginu &&
(audblk->cplcoe[0] || audblk->cplcoe[1])) (audblk->cplcoe[0] || audblk->cplcoe[1])) {
{
for(j=0;j < audblk->ncplbnd; j++) for(j=0;j < audblk->ncplbnd; j++)
audblk->phsflg[j] = bitstream_get(1); audblk->phsflg[j] = bitstream_get(1);
@ -356,23 +315,17 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
} }
/* If we're in dual mono mode, there may be a rematrix strategy */ /* If we're in dual mono mode, there may be a rematrix strategy */
if(bsi->acmod == 0x2) if(bsi->acmod == 0x2) {
{ if ((audblk->rematstr = bitstream_get(1))) {
audblk->rematstr = bitstream_get(1); if (!audblk->cplinu) {
if(audblk->rematstr)
{
if (audblk->cplinu == 0)
{
for(i = 0; i < 4; i++) for(i = 0; i < 4; i++)
audblk->rematflg[i] = bitstream_get(1); audblk->rematflg[i] = bitstream_get(1);
} }
if((audblk->cplbegf > 2) && audblk->cplinu) if((audblk->cplbegf > 2) && audblk->cplinu) {
{
for(i = 0; i < 4; i++) for(i = 0; i < 4; i++)
audblk->rematflg[i] = bitstream_get(1); audblk->rematflg[i] = bitstream_get(1);
} }
if((audblk->cplbegf <= 2) && audblk->cplinu) if((audblk->cplbegf <= 2) && audblk->cplinu) {
{
for(i = 0; i < 3; i++) for(i = 0; i < 3; i++)
audblk->rematflg[i] = bitstream_get(1); audblk->rematflg[i] = bitstream_get(1);
} }
@ -383,8 +336,7 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
} }
} }
if (audblk->cplinu) if (audblk->cplinu) {
{
/* Get the coupling channel exponent strategy */ /* Get the coupling channel exponent strategy */
audblk->cplexpstr = bitstream_get(2); audblk->cplexpstr = bitstream_get(2);
audblk->ncplgrps = (audblk->cplendmant - audblk->cplstrtmant) / audblk->ncplgrps = (audblk->cplendmant - audblk->cplstrtmant) /
@ -399,18 +351,13 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
audblk->lfeexpstr = bitstream_get(1); audblk->lfeexpstr = bitstream_get(1);
/* Determine the bandwidths of all the fbw channels */ /* Determine the bandwidths of all the fbw channels */
for(i = 0; i < bsi->nfchans; i++) for(i = 0; i < bsi->nfchans; i++) {
{ uint16_t grp_size;
uint_16 grp_size;
if(audblk->chexpstr[i] != EXP_REUSE) if(audblk->chexpstr[i] != EXP_REUSE) {
{ if (audblk->cplinu && audblk->chincpl[i]) {
if (audblk->cplinu && audblk->chincpl[i])
{
audblk->endmant[i] = audblk->cplstrtmant; audblk->endmant[i] = audblk->cplstrtmant;
} } else {
else
{
audblk->chbwcod[i] = bitstream_get(6); audblk->chbwcod[i] = bitstream_get(6);
audblk->endmant[i] = ((audblk->chbwcod[i] + 12) * 3) + 37; audblk->endmant[i] = ((audblk->chbwcod[i] + 12) * 3) + 37;
} }
@ -422,18 +369,15 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
} }
/* Get the coupling exponents if they exist */ /* Get the coupling exponents if they exist */
if(audblk->cplinu && (audblk->cplexpstr != EXP_REUSE)) if(audblk->cplinu && (audblk->cplexpstr != EXP_REUSE)) {
{
audblk->cplabsexp = bitstream_get(4); audblk->cplabsexp = bitstream_get(4);
for(i=0;i< audblk->ncplgrps;i++) for(i=0;i< audblk->ncplgrps;i++)
audblk->cplexps[i] = bitstream_get(7); audblk->cplexps[i] = bitstream_get(7);
} }
/* Get the fwb channel exponents */ /* Get the fwb channel exponents */
for(i=0;i < bsi->nfchans; i++) for(i=0;i < bsi->nfchans; i++) {
{ if(audblk->chexpstr[i] != EXP_REUSE) {
if(audblk->chexpstr[i] != EXP_REUSE)
{
audblk->exps[i][0] = bitstream_get(4); audblk->exps[i][0] = bitstream_get(4);
for(j=1;j<=audblk->nchgrps[i];j++) for(j=1;j<=audblk->nchgrps[i];j++)
audblk->exps[i][j] = bitstream_get(7); audblk->exps[i][j] = bitstream_get(7);
@ -442,8 +386,7 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
} }
/* Get the lfe channel exponents */ /* Get the lfe channel exponents */
if(bsi->lfeon && (audblk->lfeexpstr != EXP_REUSE)) if(bsi->lfeon && (audblk->lfeexpstr != EXP_REUSE)) {
{
audblk->lfeexps[0] = bitstream_get(4); audblk->lfeexps[0] = bitstream_get(4);
audblk->lfeexps[1] = bitstream_get(7); audblk->lfeexps[1] = bitstream_get(7);
audblk->lfeexps[2] = bitstream_get(7); audblk->lfeexps[2] = bitstream_get(7);
@ -452,8 +395,7 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
/* Get the parametric bit allocation parameters */ /* Get the parametric bit allocation parameters */
audblk->baie = bitstream_get(1); audblk->baie = bitstream_get(1);
if(audblk->baie) if(audblk->baie) {
{
audblk->sdcycod = bitstream_get(2); audblk->sdcycod = bitstream_get(2);
audblk->fdcycod = bitstream_get(2); audblk->fdcycod = bitstream_get(2);
audblk->sgaincod = bitstream_get(2); audblk->sgaincod = bitstream_get(2);
@ -464,23 +406,19 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
/* Get the SNR off set info if it exists */ /* Get the SNR off set info if it exists */
audblk->snroffste = bitstream_get(1); audblk->snroffste = bitstream_get(1);
if(audblk->snroffste) if(audblk->snroffste) {
{
audblk->csnroffst = bitstream_get(6); audblk->csnroffst = bitstream_get(6);
if(audblk->cplinu) if(audblk->cplinu) {
{
audblk->cplfsnroffst = bitstream_get(4); audblk->cplfsnroffst = bitstream_get(4);
audblk->cplfgaincod = bitstream_get(3); audblk->cplfgaincod = bitstream_get(3);
} }
for(i = 0;i < bsi->nfchans; i++) for(i = 0;i < bsi->nfchans; i++) {
{
audblk->fsnroffst[i] = bitstream_get(4); audblk->fsnroffst[i] = bitstream_get(4);
audblk->fgaincod[i] = bitstream_get(3); audblk->fgaincod[i] = bitstream_get(3);
} }
if(bsi->lfeon) if(bsi->lfeon) {
{
audblk->lfefsnroffst = bitstream_get(4); audblk->lfefsnroffst = bitstream_get(4);
audblk->lfefgaincod = bitstream_get(3); audblk->lfefgaincod = bitstream_get(3);
@ -488,12 +426,10 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
} }
/* Get coupling leakage info if it exists */ /* Get coupling leakage info if it exists */
if(audblk->cplinu) if(audblk->cplinu) {
{
audblk->cplleake = bitstream_get(1); audblk->cplleake = bitstream_get(1);
if(audblk->cplleake) if(audblk->cplleake) {
{
audblk->cplfleak = bitstream_get(3); audblk->cplfleak = bitstream_get(3);
audblk->cplsleak = bitstream_get(3); audblk->cplsleak = bitstream_get(3);
} }
@ -502,32 +438,26 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
/* Get the delta bit alloaction info */ /* Get the delta bit alloaction info */
audblk->deltbaie = bitstream_get(1); audblk->deltbaie = bitstream_get(1);
if(audblk->deltbaie) if(audblk->deltbaie) {
{
if(audblk->cplinu) if(audblk->cplinu)
audblk->cpldeltbae = bitstream_get(2); audblk->cpldeltbae = bitstream_get(2);
for(i = 0;i < bsi->nfchans; i++) for(i = 0;i < bsi->nfchans; i++)
audblk->deltbae[i] = bitstream_get(2); audblk->deltbae[i] = bitstream_get(2);
if (audblk->cplinu && (audblk->cpldeltbae == DELTA_BIT_NEW)) if (audblk->cplinu && (audblk->cpldeltbae == DELTA_BIT_NEW)) {
{
audblk->cpldeltnseg = bitstream_get(3); audblk->cpldeltnseg = bitstream_get(3);
for(i = 0;i < audblk->cpldeltnseg + 1; i++) for(i = 0;i < audblk->cpldeltnseg + 1; i++) {
{
audblk->cpldeltoffst[i] = bitstream_get(5); audblk->cpldeltoffst[i] = bitstream_get(5);
audblk->cpldeltlen[i] = bitstream_get(4); audblk->cpldeltlen[i] = bitstream_get(4);
audblk->cpldeltba[i] = bitstream_get(3); audblk->cpldeltba[i] = bitstream_get(3);
} }
} }
for(i = 0;i < bsi->nfchans; i++) for(i = 0;i < bsi->nfchans; i++) {
{ if (audblk->deltbae[i] == DELTA_BIT_NEW) {
if (audblk->deltbae[i] == DELTA_BIT_NEW)
{
audblk->deltnseg[i] = bitstream_get(3); audblk->deltnseg[i] = bitstream_get(3);
for(j = 0; j < audblk->deltnseg[i] + 1; j++) for(j = 0; j < audblk->deltnseg[i] + 1; j++) {
{
audblk->deltoffst[i][j] = bitstream_get(5); audblk->deltoffst[i][j] = bitstream_get(5);
audblk->deltlen[i][j] = bitstream_get(4); audblk->deltlen[i][j] = bitstream_get(4);
audblk->deltba[i][j] = bitstream_get(3); audblk->deltba[i][j] = bitstream_get(3);
@ -537,61 +467,15 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
} }
/* Check to see if there's any dummy info to get */ /* Check to see if there's any dummy info to get */
if((audblk->skiple = bitstream_get(1))) if((audblk->skiple = bitstream_get(1))) {
{ uint16_t skip_data;
uint_16 skip_data;
audblk->skipl = bitstream_get(9); audblk->skipl = bitstream_get(9);
//XXX remove
//fprintf(stderr,"(parse) skipping %d bytes\n",audblk->skipl);
for(i = 0; i < audblk->skipl ; i++) for (i = 0; i < audblk->skipl; i++) {
{
skip_data = bitstream_get(8); skip_data = bitstream_get(8);
//XXX remove
//fprintf(stderr,"skipped data %2x\n",skip_data);
//if(skip_data != 0)
//{
//dprintf("(parse) Invalid skipped data %2x\n",skip_data);
//exit(1);
//}
} }
} }
stats_print_audblk(bsi,audblk); stats_print_audblk(bsi,audblk);
} }
void
parse_auxdata(syncinfo_t *syncinfo)
{
//FIXME keep this now that we don't really need it?
#if 0
int i;
int skip_length;
uint_16 crc;
uint_16 auxdatae;
skip_length = (syncinfo->frame_size * 16) - bitstream_get_total_bits() - 17 - 1;
//XXX remove
//dprintf("(auxdata) skipping %d auxbits\n",skip_length);
for(i=0; i < skip_length; i++)
//printf("Skipped bit %i\n",(uint_16)bitstream_get(1));
bitstream_get(1);
//get the auxdata exists bit
auxdatae = bitstream_get(1);
//XXX remove
//dprintf("auxdatae = %i\n",auxdatae);
//Skip the CRC reserved bit
bitstream_get(1);
//Get the crc
crc = bitstream_get(16);
#endif
}

View File

@ -21,8 +21,6 @@
* *
*/ */
void parse_syncinfo(syncinfo_t *syncinfo,uint_8 *data); void parse_syncinfo(syncinfo_t *syncinfo,uint8_t *data);
void parse_audblk(bsi_t *bsi,audblk_t *audblk); void parse_audblk(bsi_t *bsi,audblk_t *audblk);
void parse_bsi(bsi_t *bsi); void parse_bsi(bsi_t *bsi);
void parse_auxdata(syncinfo_t *syncinfo);

View File

@ -28,52 +28,60 @@
#include "ac3_internal.h" #include "ac3_internal.h"
#include "decode.h"
#include "rematrix.h" #include "rematrix.h"
struct rematrix_band_s struct rematrix_band_s
{ {
uint_32 start; uint32_t start;
uint_32 end; uint32_t end;
} rematrix_band[] = {
{13, 24},
{25, 36},
{37, 60},
{61, 252}
}; };
struct rematrix_band_s rematrix_band[] = { {13,24}, {25,36}, {37 ,60}, {61,252}};
static inline uint_32 min(uint_32 a,uint_32 b); /**
*
**/
static inline uint_32 inline uint32_t min (uint32_t a, uint32_t b)
min(uint_32 a,uint_32 b)
{ {
return (a < b ? a : b); return (a < b) ? a : b;
} }
/* This routine simply does stereo rematixing for the 2 channel
* stereo mode */
void rematrix(audblk_t *audblk, stream_samples_t samples)
{
uint_32 num_bands;
uint_32 start;
uint_32 end;
uint_32 i,j;
float left,right;
if(!audblk->cplinu || audblk->cplbegf > 2) /**
* This routine simply does stereo remartixing for the 2 channel
* stereo mode
**/
void rematrix (audblk_t *audblk, stream_samples_t samples)
{
uint32_t num_bands;
uint32_t start;
uint32_t end;
int i,j;
if (!audblk->cplinu || audblk->cplbegf > 2)
num_bands = 4; num_bands = 4;
else if (audblk->cplbegf > 0) else if (audblk->cplbegf > 0)
num_bands = 3; num_bands = 3;
else else
num_bands = 2; num_bands = 2;
for(i=0;i < num_bands; i++) for (i=0; i < num_bands; i++) {
{ if (!audblk->rematflg[i])
if(!audblk->rematflg[i])
continue; continue;
start = rematrix_band[i].start; start = rematrix_band[i].start;
end = min(rematrix_band[i].end ,12 * audblk->cplbegf + 36); end = min (rematrix_band[i].end ,12 * audblk->cplbegf + 36);
for (j=start;j < end; j++) {
float left,right;
for(j=start;j < end; j++)
{
left = samples[0][j] + samples[1][j]; left = samples[0][j] + samples[1][j];
right = samples[0][j] - samples[1][j]; right = samples[0][j] - samples[1][j];
samples[0][j] = left; samples[0][j] = left;

View File

@ -28,8 +28,11 @@
#include "sanity_check.h" #include "sanity_check.h"
void /**
sanity_check_init(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk) *
**/
void sanity_check_init(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk)
{ {
syncinfo->magic = AC3_MAGIC_NUMBER; syncinfo->magic = AC3_MAGIC_NUMBER;
bsi->magic = AC3_MAGIC_NUMBER; bsi->magic = AC3_MAGIC_NUMBER;
@ -38,94 +41,84 @@ sanity_check_init(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk)
audblk->magic3 = AC3_MAGIC_NUMBER; audblk->magic3 = AC3_MAGIC_NUMBER;
} }
void
sanity_check(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk) /**
*
**/
int sanity_check(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk)
{ {
int i; int i;
if(syncinfo->magic != AC3_MAGIC_NUMBER) if(syncinfo->magic != AC3_MAGIC_NUMBER) {
{
fprintf(stderr,"\n** Sanity check failed -- syncinfo magic number **"); fprintf(stderr,"\n** Sanity check failed -- syncinfo magic number **");
error_flag = 1; return -1;
} }
if(bsi->magic != AC3_MAGIC_NUMBER) if(bsi->magic != AC3_MAGIC_NUMBER) {
{
fprintf(stderr,"\n** Sanity check failed -- bsi magic number **"); fprintf(stderr,"\n** Sanity check failed -- bsi magic number **");
error_flag = 1; return -1;
} }
if(audblk->magic1 != AC3_MAGIC_NUMBER) if(audblk->magic1 != AC3_MAGIC_NUMBER) {
{
fprintf(stderr,"\n** Sanity check failed -- audblk magic number 1 **"); fprintf(stderr,"\n** Sanity check failed -- audblk magic number 1 **");
error_flag = 1; return -1;
} }
if(audblk->magic2 != AC3_MAGIC_NUMBER) if(audblk->magic2 != AC3_MAGIC_NUMBER) {
{
fprintf(stderr,"\n** Sanity check failed -- audblk magic number 2 **"); fprintf(stderr,"\n** Sanity check failed -- audblk magic number 2 **");
error_flag = 1; return -1;
} }
if(audblk->magic3 != AC3_MAGIC_NUMBER) if(audblk->magic3 != AC3_MAGIC_NUMBER) {
{
fprintf(stderr,"\n** Sanity check failed -- audblk magic number 3 **"); fprintf(stderr,"\n** Sanity check failed -- audblk magic number 3 **");
error_flag = 1; return -1;
} }
for(i = 0;i < 5 ; i++) for(i = 0;i < 5 ; i++) {
{
if (audblk->fbw_exp[i][255] !=0 || audblk->fbw_exp[i][254] !=0 || if (audblk->fbw_exp[i][255] !=0 || audblk->fbw_exp[i][254] !=0 ||
audblk->fbw_exp[i][253] !=0) audblk->fbw_exp[i][253] !=0) {
{
fprintf(stderr,"\n** Sanity check failed -- fbw_exp out of bounds **"); fprintf(stderr,"\n** Sanity check failed -- fbw_exp out of bounds **");
error_flag = 1; return -1;
} }
if (audblk->fbw_bap[i][255] !=0 || audblk->fbw_bap[i][254] !=0 || if (audblk->fbw_bap[i][255] !=0 || audblk->fbw_bap[i][254] !=0 ||
audblk->fbw_bap[i][253] !=0) audblk->fbw_bap[i][253] !=0) {
{
fprintf(stderr,"\n** Sanity check failed -- fbw_bap out of bounds **"); fprintf(stderr,"\n** Sanity check failed -- fbw_bap out of bounds **");
error_flag = 1; return -1;
} }
} }
if (audblk->cpl_exp[255] !=0 || audblk->cpl_exp[254] !=0 || if (audblk->cpl_exp[255] !=0 || audblk->cpl_exp[254] !=0 ||
audblk->cpl_exp[253] !=0) audblk->cpl_exp[253] !=0) {
{
fprintf(stderr,"\n** Sanity check failed -- cpl_exp out of bounds **"); fprintf(stderr,"\n** Sanity check failed -- cpl_exp out of bounds **");
error_flag = 1; return -1;
} }
if (audblk->cpl_bap[255] !=0 || audblk->cpl_bap[254] !=0 || if (audblk->cpl_bap[255] !=0 || audblk->cpl_bap[254] !=0 ||
audblk->cpl_bap[253] !=0) audblk->cpl_bap[253] !=0) {
{
fprintf(stderr,"\n** Sanity check failed -- cpl_bap out of bounds **"); fprintf(stderr,"\n** Sanity check failed -- cpl_bap out of bounds **");
error_flag = 1; return -1;
} }
if (audblk->cplmant[255] !=0 || audblk->cplmant[254] !=0 || if (audblk->cpl_flt[255] !=0 || audblk->cpl_flt[254] !=0 ||
audblk->cplmant[253] !=0) audblk->cpl_flt[253] !=0) {
{
fprintf(stderr,"\n** Sanity check failed -- cpl_mant out of bounds **"); fprintf(stderr,"\n** Sanity check failed -- cpl_mant out of bounds **");
error_flag = 1; return -1;
} }
if ((audblk->cplinu == 1) && (audblk->cplbegf > (audblk->cplendf+2))) if ((audblk->cplinu == 1) && (audblk->cplbegf > (audblk->cplendf+2))) {
{
fprintf(stderr,"\n** Sanity check failed -- cpl params inconsistent **"); fprintf(stderr,"\n** Sanity check failed -- cpl params inconsistent **");
error_flag = 1; return -1;
} }
for(i=0; i < bsi->nfchans; i++) for(i=0; i < bsi->nfchans; i++) {
{ if((audblk->chincpl[i] == 0) && (audblk->chbwcod[i] > 60)) {
if((audblk->chincpl[i] == 0) && (audblk->chbwcod[i] > 60))
{
fprintf(stderr,"\n** Sanity check failed -- chbwcod too big **"); fprintf(stderr,"\n** Sanity check failed -- chbwcod too big **");
error_flag = 1; return -1;
} }
} }
return; return 0;
} }

View File

@ -23,5 +23,5 @@
#define AC3_MAGIC_NUMBER 0xdeadbeef #define AC3_MAGIC_NUMBER 0xdeadbeef
void sanity_check_init(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk); void sanity_check_init (syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk);
void sanity_check(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk); int sanity_check (syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk);

305
ac3dec/srfft.c Normal file
View File

@ -0,0 +1,305 @@
/*
* srfft.c
*
* Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - April 2000
*
* 64 and 128 point split radix fft for ac3dec
*
* The algorithm is desribed in the book:
* "Computational Frameworks of the Fast Fourier Transform".
*
* The ideas and the the organization of code borrowed from djbfft written by
* D. J. Bernstein <djb@cr.py.to>. djbff can be found at
* http://cr.yp.to/djbfft.html.
*
* srfft.c is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* srfft.c is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*/
#include <stdio.h>
#include "srfft.h"
#include "srfftp.h"
void fft_8 (complex_t *x);
void fft_4(complex_t *x)
{
/* delta_p = 1 here */
/* x[k] = sum_{i=0..3} x[i] * w^{i*k}, w=e^{-2*pi/4}
*/
register float yt_r, yt_i, yb_r, yb_i, u_r, u_i, vi_r, vi_i;
yt_r = x[0].re;
yb_r = yt_r - x[2].re;
yt_r += x[2].re;
u_r = x[1].re;
vi_i = x[3].re - u_r;
u_r += x[3].re;
u_i = x[1].im;
vi_r = u_i - x[3].im;
u_i += x[3].im;
yt_i = yt_r;
yt_i += u_r;
x[0].re = yt_i;
yt_r -= u_r;
x[2].re = yt_r;
yt_i = yb_r;
yt_i += vi_r;
x[1].re = yt_i;
yb_r -= vi_r;
x[3].re = yb_r;
yt_i = x[0].im;
yb_i = yt_i - x[2].im;
yt_i += x[2].im;
yt_r = yt_i;
yt_r += u_i;
x[0].im = yt_r;
yt_i -= u_i;
x[2].im = yt_i;
yt_r = yb_i;
yt_r += vi_i;
x[1].im = yt_r;
yb_i -= vi_i;
x[3].im = yb_i;
}
void fft_8 (complex_t *x)
{
/* delta_p = diag{1, sqrt(i)} here */
/* x[k] = sum_{i=0..7} x[i] * w^{i*k}, w=e^{-2*pi/8}
*/
register float wT1_r, wT1_i, wB1_r, wB1_i, wT2_r, wT2_i, wB2_r, wB2_i;
wT1_r = x[1].re;
wT1_i = x[1].im;
wB1_r = x[3].re;
wB1_i = x[3].im;
x[1] = x[2];
x[2] = x[4];
x[3] = x[6];
fft_4(&x[0]);
/* x[0] x[4] */
wT2_r = x[5].re;
wT2_r += x[7].re;
wT2_r += wT1_r;
wT2_r += wB1_r;
wT2_i = wT2_r;
wT2_r += x[0].re;
wT2_i = x[0].re - wT2_i;
x[0].re = wT2_r;
x[4].re = wT2_i;
wT2_i = x[5].im;
wT2_i += x[7].im;
wT2_i += wT1_i;
wT2_i += wB1_i;
wT2_r = wT2_i;
wT2_r += x[0].im;
wT2_i = x[0].im - wT2_i;
x[0].im = wT2_r;
x[4].im = wT2_i;
/* x[2] x[6] */
wT2_r = x[5].im;
wT2_r -= x[7].im;
wT2_r += wT1_i;
wT2_r -= wB1_i;
wT2_i = wT2_r;
wT2_r += x[2].re;
wT2_i = x[2].re - wT2_i;
x[2].re = wT2_r;
x[6].re = wT2_i;
wT2_i = x[5].re;
wT2_i -= x[7].re;
wT2_i += wT1_r;
wT2_i -= wB1_r;
wT2_r = wT2_i;
wT2_r += x[2].im;
wT2_i = x[2].im - wT2_i;
x[2].im = wT2_i;
x[6].im = wT2_r;
/* x[1] x[5] */
wT2_r = wT1_r;
wT2_r += wB1_i;
wT2_r -= x[5].re;
wT2_r -= x[7].im;
wT2_i = wT1_i;
wT2_i -= wB1_r;
wT2_i -= x[5].im;
wT2_i += x[7].re;
wB2_r = wT2_r;
wB2_r += wT2_i;
wT2_i -= wT2_r;
wB2_r *= HSQRT2;
wT2_i *= HSQRT2;
wT2_r = wB2_r;
wB2_r += x[1].re;
wT2_r = x[1].re - wT2_r;
wB2_i = x[5].re;
x[1].re = wB2_r;
x[5].re = wT2_r;
wT2_r = wT2_i;
wT2_r += x[1].im;
wT2_i = x[1].im - wT2_i;
wB2_r = x[5].im;
x[1].im = wT2_r;
x[5].im = wT2_i;
/* x[3] x[7] */
wT1_r -= wB1_i;
wT1_i += wB1_r;
wB1_r = wB2_i - x[7].im;
wB1_i = wB2_r + x[7].re;
wT1_r -= wB1_r;
wT1_i -= wB1_i;
wB1_r = wT1_r + wT1_i;
wB1_r *= HSQRT2;
wT1_i -= wT1_r;
wT1_i *= HSQRT2;
wB2_r = x[3].re;
wB2_i = wB2_r + wT1_i;
wB2_r -= wT1_i;
x[3].re = wB2_i;
x[7].re = wB2_r;
wB2_i = x[3].im;
wB2_r = wB2_i + wB1_r;
wB2_i -= wB1_r;
x[3].im = wB2_i;
x[7].im = wB2_r;
}
void fft_asmb(int k, complex_t *x, complex_t *wTB,
const complex_t *d, const complex_t *d_3)
{
register complex_t *x2k, *x3k, *x4k, *wB;
register float a_r, a_i, a1_r, a1_i, u_r, u_i, v_r, v_i;
x2k = x + 2 * k;
x3k = x2k + 2 * k;
x4k = x3k + 2 * k;
wB = wTB + 2 * k;
TRANSZERO(x[0],x2k[0],x3k[0],x4k[0]);
TRANS(x[1],x2k[1],x3k[1],x4k[1],wTB[1],wB[1],d[1],d_3[1]);
--k;
for(;;) {
TRANS(x[2],x2k[2],x3k[2],x4k[2],wTB[2],wB[2],d[2],d_3[2]);
TRANS(x[3],x2k[3],x3k[3],x4k[3],wTB[3],wB[3],d[3],d_3[3]);
if (!--k) break;
x += 2;
x2k += 2;
x3k += 2;
x4k += 2;
d += 2;
d_3 += 2;
wTB += 2;
wB += 2;
}
}
void fft_asmb16(complex_t *x, complex_t *wTB)
{
register float a_r, a_i, a1_r, a1_i, u_r, u_i, v_r, v_i;
int k = 2;
/* transform x[0], x[8], x[4], x[12] */
TRANSZERO(x[0],x[4],x[8],x[12]);
/* transform x[1], x[9], x[5], x[13] */
TRANS(x[1],x[5],x[9],x[13],wTB[1],wTB[5],delta16[1],delta16_3[1]);
/* transform x[2], x[10], x[6], x[14] */
TRANSHALF_16(x[2],x[6],x[10],x[14]);
/* transform x[3], x[11], x[7], x[15] */
TRANS(x[3],x[7],x[11],x[15],wTB[3],wTB[7],delta16[3],delta16_3[3]);
}
void fft_64p_c (complex_t *a)
{
fft_8(&a[0]); fft_4(&a[8]); fft_4(&a[12]);
fft_asmb16(&a[0], &a[8]);
fft_8(&a[16]), fft_8(&a[24]);
fft_asmb(4, &a[0], &a[16],&delta32[0], &delta32_3[0]);
fft_8(&a[32]); fft_4(&a[40]); fft_4(&a[44]);
fft_asmb16(&a[32], &a[40]);
fft_8(&a[48]); fft_4(&a[56]); fft_4(&a[60]);
fft_asmb16(&a[48], &a[56]);
fft_asmb(8, &a[0], &a[32],&delta64[0], &delta64_3[0]);
}
void fft_128p_c (complex_t *a)
{
fft_8(&a[0]); fft_4(&a[8]); fft_4(&a[12]);
fft_asmb16(&a[0], &a[8]);
fft_8(&a[16]), fft_8(&a[24]);
fft_asmb(4, &a[0], &a[16],&delta32[0], &delta32_3[0]);
fft_8(&a[32]); fft_4(&a[40]); fft_4(&a[44]);
fft_asmb16(&a[32], &a[40]);
fft_8(&a[48]); fft_4(&a[56]); fft_4(&a[60]);
fft_asmb16(&a[48], &a[56]);
fft_asmb(8, &a[0], &a[32],&delta64[0], &delta64_3[0]);
fft_8(&a[64]); fft_4(&a[72]); fft_4(&a[76]);
/* fft_16(&a[64]); */
fft_asmb16(&a[64], &a[72]);
fft_8(&a[80]); fft_8(&a[88]);
/* fft_32(&a[64]); */
fft_asmb(4, &a[64], &a[80],&delta32[0], &delta32_3[0]);
fft_8(&a[96]); fft_4(&a[104]), fft_4(&a[108]);
/* fft_16(&a[96]); */
fft_asmb16(&a[96], &a[104]);
fft_8(&a[112]), fft_8(&a[120]);
/* fft_32(&a[96]); */
fft_asmb(4, &a[96], &a[112], &delta32[0], &delta32_3[0]);
/* fft_128(&a[0]); */
fft_asmb(16, &a[0], &a[64], &delta128[0], &delta128_3[0]);
}

39
ac3dec/srfft.h Normal file
View File

@ -0,0 +1,39 @@
/*
* srfft.h
*
* Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - April 2000
*
* 64 and 128 point split radix fft for ac3dec
*
* The algorithm is desribed in the book:
* "Computational Frameworks of the Fast Fourier Transform".
*
* The ideas and the the organization of code borrowed from djbfft written by
* D. J. Bernstein <djb@cr.py.to>. djbff can be found at
* http://cr.yp.to/djbfft.html.
*
* srfft.h is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* srfft.h is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*/
#ifndef SRFFT_H__
#define SRFFT_H__
#include "cmplx.h"
void fft_64p_c (complex_t *x);
void fft_128p_c (complex_t *x);
#endif /* SRFFT_H__ */

289
ac3dec/srfft_kni.S Normal file
View File

@ -0,0 +1,289 @@
/*
* srfft_kni.S
*
* Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - October 2000
*
*
* srfft_kni.S is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* srfft_kni.S is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*/
#ifdef __i386__
.section .rodata
.align 16
hsqrt2: .float 0f0.707106781188
.float 0f0.707106781188
.float 0f-0.707106781188
.float 0f-0.707106781188
C_1: .float 0f-1.0
.float 0f1.0
.float 0f-1.0
.float 0f1.0
.text
.align 4
.global fft_4_kni
.type fft_4_kni, @function
fft_4_kni:
pushl %ebp
movl %esp, %ebp
movl 8(%ebp), %eax /* complex_t * */
movaps (%eax), %xmm0 /* x[1] | x[0] */
movaps 16(%eax), %xmm2 /* x[3] | x[2] */
movaps %xmm0, %xmm1 /* x[1] | x[0] */
addps %xmm2, %xmm0 /* x[1] + x[3] | x[0] + x[2] */
subps %xmm2, %xmm1 /* x[1] - x[3] | x[0] - x[2] */
xorps %xmm6, %xmm6
movhlps %xmm1, %xmm4 /* x[1] - x[3] */
movhlps %xmm0, %xmm3 /* x[1] + x[3] */
subss %xmm4, %xmm6 /* -(x[1] - x[3]).re */
movlhps %xmm1, %xmm0 /* x[0] - x[2] | x[0] + x[2] */
movss %xmm6, %xmm4 /* (x[1] - x[3]).im | (x[3]-x[1]).re */
movaps %xmm0, %xmm2 /* x[0] - x[2] | x[0] + x[2] */
shufps $0x14, %xmm4, %xmm3 /* -i*(x[2] - x[3] | x[2] + x[3] */
addps %xmm3, %xmm0
subps %xmm3, %xmm2
movaps %xmm0, (%eax)
movaps %xmm2, 16(%eax)
leave
ret
.align 4
.global fft_8_kni
.type fft_8_kni, @function
fft_8_kni:
pushl %ebp
movl %esp, %ebp
movl 8(%ebp), %eax /* complext_t */
pushl %ebx
movlps (%eax), %xmm0 /* x[0] */
movlps 32(%eax), %xmm1 /* x[4] */
movhps 16(%eax), %xmm0 /* x[2] | x[0] */
movhps 48(%eax), %xmm1 /* x[6] | x[4] */
movaps %xmm0, %xmm2 /* x[2] | x[0] */
xorps %xmm3, %xmm3
addps %xmm1, %xmm0 /* x[2] + x[6] | x[0] + x[4] */
subps %xmm1, %xmm2 /* x[2] - x[6] | x[0] - x[4] */
movhlps %xmm0, %xmm5 /* x[2] + x[6] */
movhlps %xmm2, %xmm4
movlhps %xmm2, %xmm0 /* x[0] - x[4] | x[0] + x[4] */
subss %xmm4, %xmm3 /* -(x[2]-x[6]).re */
movaps %xmm0, %xmm7 /* x[0] - x[4] | x[0] + x[4] */
movss %xmm3, %xmm4 /* (x[2]-x[6]).im | -(x[2]-x[6]).re */
movlps 8(%eax), %xmm1 /* x[1] */
shufps $0x14, %xmm4, %xmm5 /* -i*(x[2] - x[6]) | x[2] + x[6] */
addps %xmm5, %xmm0 /* yt */
subps %xmm5, %xmm7 /* yb */
movhps 24(%eax), %xmm1 /* x[3] | x[1] */
movl $hsqrt2, %ebx
movlps 40(%eax), %xmm2 /* x[5] */
movhps 56(%eax), %xmm2 /* /x[7] | x[5] */
movaps %xmm1, %xmm3 /* x[3] | x[1] */
addps %xmm2, %xmm1 /* x[3] + x[7] | x[1] + x[5] */
subps %xmm2, %xmm3 /* x[3] - x[7] | x[1] - x[5] */
movaps (%ebx), %xmm4 /* -1/sqrt2 | -1/sqrt2 | 1/sqrt2 | 1/sqrt2 */
movaps %xmm3, %xmm6 /* x[3] - x[7] | x[1] - x[5] */
mulps %xmm4, %xmm3
shufps $0xc8, %xmm4, %xmm4 /* -1/sqrt2 | 1/sqrt2 | -1/sqrt2 | 1/sqrt2 */
shufps $0xb1, %xmm6, %xmm6
mulps %xmm4, %xmm6
addps %xmm3, %xmm6 /* (-1-i)/sqrt2 * (x[3]-x[7]) | (1-i)/sqrt2 * (x[1] - x[5] */
movhlps %xmm1, %xmm5 /* x[3] + x[7] */
movlhps %xmm6, %xmm1 /* (1+i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */
shufps $0xe4, %xmm6, %xmm5 /* (-1-i)/sqrt2 * (x[3]-x[7]) | x[3]+x[7] */
movaps %xmm1, %xmm3 /* (1-i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */
movl $C_1, %ebx
addps %xmm5, %xmm1 /* u */
subps %xmm5, %xmm3 /* v */
movaps %xmm0, %xmm2 /* yb */
movaps %xmm7, %xmm4 /* yt */
movaps (%ebx), %xmm5
mulps %xmm5, %xmm3
addps %xmm1, %xmm0 /* yt + u */
subps %xmm1, %xmm2 /* yt - u */
shufps $0xb1, %xmm3, %xmm3 /* -i * v */
movaps %xmm0, (%eax)
movaps %xmm2, 32(%eax)
addps %xmm3, %xmm4 /* yb - i*v */
subps %xmm3, %xmm7 /* yb + i*v */
movaps %xmm4, 16(%eax)
movaps %xmm7, 48(%eax)
popl %ebx
leave
ret
.align 4
.global fft_asmb_kni
.type fft_asmb, @function
fft_asmb_kni:
pushl %ebp
movl %esp, %ebp
subl $4, %esp
pushl %eax
pushl %ebx
pushl %ecx
pushl %edx
pushl %esi
pushl %edi
movl 8(%ebp), %ecx /* k */
movl 12(%ebp), %eax /* x */
movl %ecx, -4(%ebp) /* k */
movl 16(%ebp), %ebx /* wT */
movl 20(%ebp), %edx /* d */
movl 24(%ebp), %esi /* d3 */
shll $4, %ecx /* 16k */
addl $8, %edx
leal (%eax, %ecx, 2), %edi
addl $8, %esi
/* TRANSZERO and TRANS */
movaps (%eax), %xmm0 /* x[1] | x[0] */
movaps (%ebx), %xmm1 /* wT[1] | wT[0] */
movaps (%ebx, %ecx), %xmm2 /* wB[1] | wB[0] */
movlps (%edx), %xmm3 /* d */
movlps (%esi), %xmm4 /* d3 */
movhlps %xmm1, %xmm5 /* wT[1] */
movhlps %xmm2, %xmm6 /* wB[1] */
shufps $0x50, %xmm3, %xmm3 /* d[1].im | d[1].im | d[1].re | d[1].re */
shufps $0x50, %xmm4, %xmm4 /* d3[1].im | d3[1].im | d3[i].re | d3[i].re */
movlhps %xmm5, %xmm5 /* wT[1] | wT[1] */
movlhps %xmm6, %xmm6 /* wB[1] | wB[1] */
mulps %xmm3, %xmm5
mulps %xmm4, %xmm6
movhlps %xmm5, %xmm7 /* wT[1].im * d[1].im | wT[1].re * d[1].im */
movlhps %xmm6, %xmm5 /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wT[1].im * d[1].re | wT[1].re * d[1].re */
shufps $0xb1, %xmm6, %xmm7 /* wB[1].re * d3[1].im | wB[i].im * d3[1].im | wT[1].re * d[1].im | wT[1].im * d[1].im */
movl $C_1, %edi
movaps (%edi), %xmm4
mulps %xmm4, %xmm7
addps %xmm7, %xmm5 /* wB[1] * d3[1] | wT[1] * d[1] */
movlhps %xmm5, %xmm1 /* d[1] * wT[1] | wT[0] */
shufps $0xe4, %xmm5, %xmm2 /* d3[1] * wB[1] | wB[0] */
movaps %xmm1, %xmm3 /* d[1] * wT[1] | wT[0] */
leal (%eax, %ecx, 2), %edi
addps %xmm2, %xmm1 /* u */
subps %xmm2, %xmm3 /* v */
mulps %xmm4, %xmm3
movaps (%eax, %ecx), %xmm5 /* xk[1] | xk[0] */
shufps $0xb1, %xmm3, %xmm3 /* -i * v */
movaps %xmm0, %xmm2 /* x[1] | x[0] */
movaps %xmm5, %xmm6 /* xk[1] | xk[0] */
addps %xmm1, %xmm0
subps %xmm1, %xmm2
addps %xmm3, %xmm5
subps %xmm3, %xmm6
movaps %xmm0, (%eax)
movaps %xmm2, (%edi)
movaps %xmm5, (%eax, %ecx)
movaps %xmm6, (%edi, %ecx)
addl $16, %eax
addl $16, %ebx
addl $8, %edx
addl $8, %esi
decl -4(%ebp)
.loop:
movaps (%ebx), %xmm0 /* wT[1] | wT[0] */
movaps (%edx), %xmm1 /* d[1] | d[0] */
movaps (%ebx, %ecx), %xmm4 /* wB[1] | wB[0] */
movaps (%esi), %xmm5 /* d3[1] | d3[0] */
movhlps %xmm0, %xmm2 /* wT[1] */
movhlps %xmm1, %xmm3 /* d[1] */
movhlps %xmm4, %xmm6 /* wB[1] */
movhlps %xmm5, %xmm7 /* d3[1] */
shufps $0x50, %xmm1, %xmm1 /* d[0].im | d[0].im | d[0].re | d[0].re */
shufps $0x50, %xmm3, %xmm3 /* d[1].im | d[1].im | d[1].re | d[1].re */
movlhps %xmm0, %xmm0 /* wT[0] | wT[0] */
shufps $0x50, %xmm5, %xmm5 /* d3[0].im | d3[0].im | d3[0].re | d3[0].re */
movlhps %xmm2, %xmm2 /* wT[1] | wT[1] */
shufps $0x50, %xmm7, %xmm7 /* d3[1].im | d3[1].im | d3[1].re | d3[1].re */
mulps %xmm1, %xmm0 /* d[0].im * wT[0].im | d[0].im * wT[0].re | d[0].re * wT[0].im | d[0].re * wT[0].re */
mulps %xmm3, %xmm2 /* d[1].im * wT[1].im | d[1].im * wT[1].re | d[1].re * wT[1].im | d[1].re * wT[1].re */
movlhps %xmm4, %xmm4 /* wB[0] | wB[0] */
movlhps %xmm6, %xmm6 /* wB[1] | wB[1] */
movhlps %xmm0, %xmm1 /* d[0].im * wT[0].im | d[0].im * wT[0].re */
movlhps %xmm2, %xmm0 /* d[1].re * wT[1].im | d[1].re * wT[1].re | d[0].re * wT[0].im | d[0].re * wT[0].re */
mulps %xmm5, %xmm4 /* wB[0].im * d3[0].im | wB[0].re * d3[0].im | wB[0].im * d3[0].re | wB[0].re * d3[0].re */
mulps %xmm7, %xmm6 /* wB[1].im * d3[1].im | wB[1].re * d3[1].im | wB[1].im * d3[1].re | wB[1].re * d3[1].re */
shufps $0xb1, %xmm2, %xmm1 /* d[1].im * wT[1].re | d[1].im * wT[1].im | d[0].im * wT[0].re | d[0].im * wT[0].im */
movl $C_1, %edi
movaps (%edi), %xmm3 /* 1.0 | -1.0 | 1.0 | -1.0 */
movhlps %xmm4, %xmm5 /* wB[0].im * d3[0].im | wB[0].re * d3[0].im */
mulps %xmm3, %xmm1 /* d[1].im * wT[1].re | -d[1].im * wT[1].im | d[0].im * wT[0].re | -d[0].im * wT[0].im */
movlhps %xmm6, %xmm4 /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wB[0].im * d3[0].re | wB[0].im * d3[0].re */
addps %xmm1, %xmm0 /* wT[1] * d[1] | wT[0] * d[0] */
shufps $0xb1, %xmm6, %xmm5 /* wB[1].re * d3[1].im | wB[1].im * d3[1].im | wB[0].re * d3[0].im | wB[0].im * d3[0].im */
mulps %xmm3, %xmm5 /* wB[1].re * d3[1].im | -wB[1].im * d3[1].im | wB[0].re * d3[0].im | -wB[0].im * d3[0].im */
addps %xmm5, %xmm4 /* wB[1] * d3[1] | wB[0] * d3[0] */
movaps %xmm0, %xmm1 /* wT[1] * d[1] | wT[0] * d[0] */
addps %xmm4, %xmm0 /* u */
subps %xmm4, %xmm1 /* v */
movaps (%eax), %xmm6 /* x[1] | x[0] */
leal (%eax, %ecx, 2), %edi
mulps %xmm3, %xmm1
addl $16, %ebx
addl $16, %esi
shufps $0xb1, %xmm1, %xmm1 /* -i * v */
movaps (%eax, %ecx), %xmm7 /* xk[1] | xk[0] */
movaps %xmm6, %xmm2
movaps %xmm7, %xmm4
addps %xmm0, %xmm6
subps %xmm0, %xmm2
movaps %xmm6, (%eax)
movaps %xmm2, (%edi)
addps %xmm1, %xmm7
subps %xmm1, %xmm4
addl $16, %edx
movaps %xmm7, (%eax, %ecx)
movaps %xmm4, (%edi, %ecx)
addl $16, %eax
decl -4(%ebp)
jnz .loop
.end:
popl %edi
popl %esi
popl %edx
popl %ecx
popl %ebx
popl %eax
addl $4, %esp
leave
ret
#endif

30
ac3dec/srfft_kni.h Normal file
View File

@ -0,0 +1,30 @@
/*****
*
* This file is part of the OMS program.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*****/
#ifndef __SRFF_KNI__
#define __SRFF_KNI__
#include "cmplx.h"
void fft_4_kni (complex_t *a);
void fft_8_kni (complex_t *a);
void fft_asmb_kni (int, complex_t*, complex_t *, complex_t *, complex_t*);
#endif

93
ac3dec/srfft_kni_c.c Normal file
View File

@ -0,0 +1,93 @@
/*
* srfft_kni.c
*
* Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - April 2000
*
* 64 and 128 point split radix fft for ac3dec
*
* The algorithm is desribed in the book:
* "Computational Frameworks of the Fast Fourier Transform".
*
* The ideas and the the organization of code borrowed from djbfft written by
* D. J. Bernstein <djb@cr.py.to>. djbff can be found at
* http://cr.yp.to/djbfft.html.
*
* srfft.c is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* srfft.c is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*/
#ifdef __i386__
#include <stdio.h>
#include "srfft_kni.h"
#include "srfftp.h"
void fft_64p_kni(complex_t *a)
{
fft_8_kni(&a[0]); fft_4_kni(&a[8]); fft_4_kni(&a[12]);
fft_asmb_kni(2, &a[0], &a[8], &delta16[0], &delta16_3[0]);
fft_8_kni(&a[16]), fft_8_kni(&a[24]);
fft_asmb_kni(4, &a[0], &a[16],&delta32[0], &delta32_3[0]);
fft_8_kni(&a[32]); fft_4_kni(&a[40]); fft_4_kni(&a[44]);
fft_asmb_kni(2, &a[32], &a[40], &delta16[0], &delta16_3[0]);
fft_8_kni(&a[48]); fft_4_kni(&a[56]); fft_4_kni(&a[60]);
fft_asmb_kni(2, &a[48], &a[56], &delta16[0], &delta16_3[0]);
fft_asmb_kni(8, &a[0], &a[32],&delta64[0], &delta64_3[0]);
}
void fft_128p_kni(complex_t *a)
{
fft_8_kni(&a[0]); fft_4_kni(&a[8]); fft_4_kni(&a[12]);
fft_asmb_kni(2, &a[0], &a[8], &delta16[0], &delta16_3[0]);
fft_8_kni(&a[16]), fft_8_kni(&a[24]);
fft_asmb_kni(4, &a[0], &a[16],&delta32[0], &delta32_3[0]);
fft_8_kni(&a[32]); fft_4_kni(&a[40]); fft_4_kni(&a[44]);
fft_asmb_kni(2, &a[32], &a[40], &delta16[0], &delta16_3[0]);
fft_8_kni(&a[48]); fft_4_kni(&a[56]); fft_4_kni(&a[60]);
fft_asmb_kni(2, &a[48], &a[56], &delta16[0], &delta16_3[0]);
fft_asmb_kni(8, &a[0], &a[32],&delta64[0], &delta64_3[0]);
fft_8_kni(&a[64]); fft_4_kni(&a[72]); fft_4_kni(&a[76]);
/* fft_16(&a[64]); */
fft_asmb_kni(2, &a[64], &a[72], &delta16[0], &delta16_3[0]);
fft_8_kni(&a[80]); fft_8_kni(&a[88]);
/* fft_32(&a[64]); */
fft_asmb_kni(4, &a[64], &a[80],&delta32[0], &delta32_3[0]);
fft_8_kni(&a[96]); fft_4_kni(&a[104]), fft_4_kni(&a[108]);
/* fft_16(&a[96]); */
fft_asmb_kni(2, &a[96], &a[104], &delta16[0], &delta16_3[0]);
fft_8_kni(&a[112]), fft_8_kni(&a[120]);
/* fft_32(&a[96]); */
fft_asmb_kni(4, &a[96], &a[112], &delta32[0], &delta32_3[0]);
/* fft_128(&a[0]); */
fft_asmb_kni(16, &a[0], &a[64], &delta128[0], &delta128_3[0]);
}
#endif

305
ac3dec/srfftp.h Normal file
View File

@ -0,0 +1,305 @@
/*
* srfftp.h
*
* Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - April 2000
*
* 64 and 128 point split radix fft for ac3dec
*
* The algorithm is desribed in the book:
* "Computational Frameworks of the Fast Fourier Transform".
*
* The ideas and the the organization of code borrowed from djbfft written by
* D. J. Bernstein <djb@cr.py.to>. djbff can be found at
* http://cr.yp.to/djbfft.html.
*
* srfftp.h is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* srfftp.h is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*/
#ifndef SRFFTP_H__
#define SRFFTP_H__
#include "cmplx.h"
static complex_t delta16[4] =
{ {1.00000000000000, 0.00000000000000},
{0.92387953251129, -0.38268343236509},
{0.70710678118655, -0.70710678118655},
{0.38268343236509, -0.92387953251129}};
static complex_t delta16_3[4] =
{ {1.00000000000000, 0.00000000000000},
{0.38268343236509, -0.92387953251129},
{-0.70710678118655, -0.70710678118655},
{-0.92387953251129, 0.38268343236509}};
static complex_t delta32[8] =
{ {1.00000000000000, 0.00000000000000},
{0.98078528040323, -0.19509032201613},
{0.92387953251129, -0.38268343236509},
{0.83146961230255, -0.55557023301960},
{0.70710678118655, -0.70710678118655},
{0.55557023301960, -0.83146961230255},
{0.38268343236509, -0.92387953251129},
{0.19509032201613, -0.98078528040323}};
static complex_t delta32_3[8] =
{ {1.00000000000000, 0.00000000000000},
{0.83146961230255, -0.55557023301960},
{0.38268343236509, -0.92387953251129},
{-0.19509032201613, -0.98078528040323},
{-0.70710678118655, -0.70710678118655},
{-0.98078528040323, -0.19509032201613},
{-0.92387953251129, 0.38268343236509},
{-0.55557023301960, 0.83146961230255}};
static complex_t delta64[16] =
{ {1.00000000000000, 0.00000000000000},
{0.99518472667220, -0.09801714032956},
{0.98078528040323, -0.19509032201613},
{0.95694033573221, -0.29028467725446},
{0.92387953251129, -0.38268343236509},
{0.88192126434836, -0.47139673682600},
{0.83146961230255, -0.55557023301960},
{0.77301045336274, -0.63439328416365},
{0.70710678118655, -0.70710678118655},
{0.63439328416365, -0.77301045336274},
{0.55557023301960, -0.83146961230255},
{0.47139673682600, -0.88192126434835},
{0.38268343236509, -0.92387953251129},
{0.29028467725446, -0.95694033573221},
{0.19509032201613, -0.98078528040323},
{0.09801714032956, -0.99518472667220}};
static complex_t delta64_3[16] =
{ {1.00000000000000, 0.00000000000000},
{0.95694033573221, -0.29028467725446},
{0.83146961230255, -0.55557023301960},
{0.63439328416365, -0.77301045336274},
{0.38268343236509, -0.92387953251129},
{0.09801714032956, -0.99518472667220},
{-0.19509032201613, -0.98078528040323},
{-0.47139673682600, -0.88192126434836},
{-0.70710678118655, -0.70710678118655},
{-0.88192126434835, -0.47139673682600},
{-0.98078528040323, -0.19509032201613},
{-0.99518472667220, 0.09801714032956},
{-0.92387953251129, 0.38268343236509},
{-0.77301045336274, 0.63439328416365},
{-0.55557023301960, 0.83146961230255},
{-0.29028467725446, 0.95694033573221}};
static complex_t delta128[32] =
{ {1.00000000000000, 0.00000000000000},
{0.99879545620517, -0.04906767432742},
{0.99518472667220, -0.09801714032956},
{0.98917650996478, -0.14673047445536},
{0.98078528040323, -0.19509032201613},
{0.97003125319454, -0.24298017990326},
{0.95694033573221, -0.29028467725446},
{0.94154406518302, -0.33688985339222},
{0.92387953251129, -0.38268343236509},
{0.90398929312344, -0.42755509343028},
{0.88192126434836, -0.47139673682600},
{0.85772861000027, -0.51410274419322},
{0.83146961230255, -0.55557023301960},
{0.80320753148064, -0.59569930449243},
{0.77301045336274, -0.63439328416365},
{0.74095112535496, -0.67155895484702},
{0.70710678118655, -0.70710678118655},
{0.67155895484702, -0.74095112535496},
{0.63439328416365, -0.77301045336274},
{0.59569930449243, -0.80320753148064},
{0.55557023301960, -0.83146961230255},
{0.51410274419322, -0.85772861000027},
{0.47139673682600, -0.88192126434835},
{0.42755509343028, -0.90398929312344},
{0.38268343236509, -0.92387953251129},
{0.33688985339222, -0.94154406518302},
{0.29028467725446, -0.95694033573221},
{0.24298017990326, -0.97003125319454},
{0.19509032201613, -0.98078528040323},
{0.14673047445536, -0.98917650996478},
{0.09801714032956, -0.99518472667220},
{0.04906767432742, -0.99879545620517}};
static complex_t delta128_3[32] =
{ {1.00000000000000, 0.00000000000000},
{0.98917650996478, -0.14673047445536},
{0.95694033573221, -0.29028467725446},
{0.90398929312344, -0.42755509343028},
{0.83146961230255, -0.55557023301960},
{0.74095112535496, -0.67155895484702},
{0.63439328416365, -0.77301045336274},
{0.51410274419322, -0.85772861000027},
{0.38268343236509, -0.92387953251129},
{0.24298017990326, -0.97003125319454},
{0.09801714032956, -0.99518472667220},
{-0.04906767432742, -0.99879545620517},
{-0.19509032201613, -0.98078528040323},
{-0.33688985339222, -0.94154406518302},
{-0.47139673682600, -0.88192126434836},
{-0.59569930449243, -0.80320753148065},
{-0.70710678118655, -0.70710678118655},
{-0.80320753148065, -0.59569930449243},
{-0.88192126434835, -0.47139673682600},
{-0.94154406518302, -0.33688985339222},
{-0.98078528040323, -0.19509032201613},
{-0.99879545620517, -0.04906767432742},
{-0.99518472667220, 0.09801714032956},
{-0.97003125319454, 0.24298017990326},
{-0.92387953251129, 0.38268343236509},
{-0.85772861000027, 0.51410274419322},
{-0.77301045336274, 0.63439328416365},
{-0.67155895484702, 0.74095112535496},
{-0.55557023301960, 0.83146961230255},
{-0.42755509343028, 0.90398929312344},
{-0.29028467725446, 0.95694033573221},
{-0.14673047445536, 0.98917650996478}};
#define HSQRT2 0.707106781188;
#define TRANSZERO(A0,A4,A8,A12) { \
u_r = wTB[0].re; \
v_i = u_r - wTB[k*2].re; \
u_r += wTB[k*2].re; \
u_i = wTB[0].im; \
v_r = wTB[k*2].im - u_i; \
u_i += wTB[k*2].im; \
a_r = A0.re; \
a_i = A0.im; \
a1_r = a_r; \
a1_r += u_r; \
A0.re = a1_r; \
a_r -= u_r; \
A8.re = a_r; \
a1_i = a_i; \
a1_i += u_i; \
A0.im = a1_i; \
a_i -= u_i; \
A8.im = a_i; \
a1_r = A4.re; \
a1_i = A4.im; \
a_r = a1_r; \
a_r -= v_r; \
A4.re = a_r; \
a1_r += v_r; \
A12.re = a1_r; \
a_i = a1_i; \
a_i -= v_i; \
A4.im = a_i; \
a1_i += v_i; \
A12.im = a1_i; \
}
#define TRANSHALF_16(A2,A6,A10,A14) {\
u_r = wTB[2].re; \
a_r = u_r; \
u_i = wTB[2].im; \
u_r += u_i; \
u_i -= a_r; \
a_r = wTB[6].re; \
a1_r = a_r; \
a_i = wTB[6].im; \
a_r = a_i - a_r; \
a_i += a1_r; \
v_i = u_r - a_r; \
u_r += a_r; \
v_r = u_i + a_i; \
u_i -= a_i; \
v_i *= HSQRT2; \
v_r *= HSQRT2; \
u_r *= HSQRT2; \
u_i *= HSQRT2; \
a_r = A2.re; \
a_i = A2.im; \
a1_r = a_r; \
a1_r += u_r; \
A2.re = a1_r; \
a_r -= u_r; \
A10.re = a_r; \
a1_i = a_i; \
a1_i += u_i; \
A2.im = a1_i; \
a_i -= u_i; \
A10.im = a_i; \
a1_r = A6.re; \
a1_i = A6.im; \
a_r = a1_r; \
a1_r += v_r; \
A6.re = a1_r; \
a_r -= v_r; \
A14.re = a_r; \
a_i = a1_i; \
a1_i -= v_i; \
A6.im = a1_i; \
a_i += v_i; \
A14.im = a_i; \
}
#define TRANS(A1,A5,A9,A13,WT,WB,D,D3) { \
u_r = WT.re; \
a_r = u_r; \
a_r *= D.im; \
u_r *= D.re; \
a_i = WT.im; \
a1_i = a_i; \
a1_i *= D.re; \
a_i *= D.im; \
u_r -= a_i; \
u_i = a_r; \
u_i += a1_i; \
a_r = WB.re; \
a1_r = a_r; \
a1_r *= D3.re; \
a_r *= D3.im; \
a_i = WB.im; \
a1_i = a_i; \
a_i *= D3.re; \
a1_i *= D3.im; \
a1_r -= a1_i; \
a_r += a_i; \
v_i = u_r - a1_r; \
u_r += a1_r; \
v_r = a_r - u_i; \
u_i += a_r; \
a_r = A1.re; \
a_i = A1.im; \
a1_r = a_r; \
a1_r += u_r; \
A1.re = a1_r; \
a_r -= u_r; \
A9.re = a_r; \
a1_i = a_i; \
a1_i += u_i; \
A1.im = a1_i; \
a_i -= u_i; \
A9.im = a_i; \
a1_r = A5.re; \
a1_i = A5.im; \
a_r = a1_r; \
a1_r -= v_r; \
A5.re = a1_r; \
a_r += v_r; \
A13.re = a_r; \
a_i = a1_i; \
a1_i -= v_i; \
A5.im = a1_i; \
a_i += v_i; \
A13.im = a_i; \
}
#endif

View File

@ -28,16 +28,16 @@
#include "ac3_internal.h" #include "ac3_internal.h"
#include "decode.h"
#include "stats.h" #include "stats.h"
#include "debug.h" #include "debug.h"
#if !defined (__GNUC__) || defined (DEBUG)
static const char *service_ids[8] = static const char *service_ids[8] =
{ {
"CM","ME","VI","HI", "CM","ME","VI","HI",
"D", "C","E", "VO" "D", "C","E", "VO"
}; };
#endif
struct mixlev_s struct mixlev_s
{ {
@ -77,9 +77,10 @@ static const char *language[128] =
"Bengali", "Belorussian", "Bambora", "Azerbijani", "Assamese", "Armenian", "Arabic", "Amharic" "Bengali", "Belorussian", "Bambora", "Azerbijani", "Assamese", "Armenian", "Arabic", "Amharic"
}; };
void stats_print_banner(syncinfo_t *syncinfo,bsi_t *bsi) void stats_print_banner(syncinfo_t *syncinfo,bsi_t *bsi)
{ {
fprintf(stdout,"ac3dec-0.6.2-cvs (C) 2000 Aaron Holtzman (aholtzma@ess.engr.uvic.ca)\n"); // fprintf(stdout,PACKAGE"-"VERSION" (C) 2000 Aaron Holtzman (aholtzma@ess.engr.uvic.ca)\n");
fprintf(stdout,"%d.%d Mode ",bsi->nfchans,bsi->lfeon); fprintf(stdout,"%d.%d Mode ",bsi->nfchans,bsi->lfeon);
fprintf(stdout,"%2.1f KHz",syncinfo->sampling_rate * 1e-3); fprintf(stdout,"%2.1f KHz",syncinfo->sampling_rate * 1e-3);
@ -87,8 +88,7 @@ void stats_print_banner(syncinfo_t *syncinfo,bsi_t *bsi)
if (bsi->langcode && (bsi->langcod < 128)) if (bsi->langcode && (bsi->langcod < 128))
fprintf(stdout,"%s ", language[bsi->langcod]); fprintf(stdout,"%s ", language[bsi->langcod]);
switch(bsi->bsmod) switch(bsi->bsmod) {
{
case 0: case 0:
fprintf(stdout,"Complete Main Audio Service"); fprintf(stdout,"Complete Main Audio Service");
break; break;
@ -116,12 +116,12 @@ void stats_print_banner(syncinfo_t *syncinfo,bsi_t *bsi)
fprintf(stdout,"\n"); fprintf(stdout,"\n");
} }
void stats_print_syncinfo(syncinfo_t *syncinfo)
void stats_print_syncinfo (syncinfo_t *syncinfo)
{ {
dprintf("(syncinfo) "); dprintf("(syncinfo) ");
switch (syncinfo->fscod) switch (syncinfo->fscod) {
{
case 2: case 2:
dprintf("32 KHz "); dprintf("32 KHz ");
break; break;
@ -141,8 +141,8 @@ void stats_print_syncinfo(syncinfo_t *syncinfo)
} }
void stats_print_bsi(bsi_t *bsi)
{ void stats_print_bsi(bsi_t *bsi) {
dprintf("(bsi) "); dprintf("(bsi) ");
dprintf("%s",service_ids[bsi->bsmod]); dprintf("%s",service_ids[bsi->bsmod]);
dprintf(" %d.%d Mode ",bsi->nfchans,bsi->lfeon); dprintf(" %d.%d Mode ",bsi->nfchans,bsi->lfeon);
@ -154,11 +154,11 @@ void stats_print_bsi(bsi_t *bsi)
} }
char *exp_strat_tbl[4] = {"R ","D15 ","D25 ","D45 "}; char *exp_strat_tbl[4] = {"R ","D15 ","D25 ","D45 "};
void stats_print_audblk(bsi_t *bsi,audblk_t *audblk) void stats_print_audblk(bsi_t *bsi,audblk_t *audblk) {
{ uint32_t i;
uint_32 i;
dprintf("(audblk) "); dprintf("(audblk) ");
dprintf("%s ",audblk->cplinu ? "cpl on " : "cpl off"); dprintf("%s ",audblk->cplinu ? "cpl on " : "cpl off");