vdr/ac3dec/downmix_kni.S

397 lines
7.4 KiB
ArmAsm

/*
* downmix_kni.S
*
* Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - October 2000
*
*
* downmix_kni.S is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* downmix_kni.S is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*/
#ifdef __i386__
.section .rodata
.align 4
sqrt2: .float 0f0.7071068
.p2align 5,0,
.section .text
.align 4
.global downmix_3f_2r_to_2ch_kni
.type downmix_3f_2r_to_2ch_kni, @function
downmix_3f_2r_to_2ch_kni:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
movl 8(%ebp), %eax /* samples[] */
movl 12(%ebp), %ebx /* &dm_par */
movl $64, %ecx /* loop counter */
movss (%ebx), %xmm5 /* unit */
shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */
movss 4(%ebx), %xmm6 /* clev */
shufps $0, %xmm6, %xmm6 /* clev | clev | clev | clev */
movss 8(%ebx), %xmm7 /* slev */
shufps $0, %xmm7, %xmm7 /* slev | slev | slev | slev */
.loop:
movaps (%eax), %xmm0 /* left */
movaps 2048(%eax), %xmm1 /* right */
movaps 1024(%eax), %xmm2 /* center */
mulps %xmm5, %xmm0
mulps %xmm5, %xmm1
mulps %xmm6, %xmm2
movaps 3072(%eax), %xmm3 /* leftsur */
movaps 4096(%eax), %xmm4 /* rithgsur */
addps %xmm2, %xmm0
addps %xmm2, %xmm1
mulps %xmm7, %xmm3
mulps %xmm7, %xmm4
addps %xmm3, %xmm0
addps %xmm4, %xmm1
movaps %xmm0, (%eax)
movaps %xmm1, 1024(%eax)
addl $16, %eax
decl %ecx
jnz .loop
popl %ecx
popl %ebx
popl %eax
leave
ret
.p2align 4,,7
.global downmix_2f_2r_to_2ch_kni
.type downmix_2f_2r_to_2ch_kni, @function
downmix_2f_2r_to_2ch_kni:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
movl 8(%ebp), %eax /* samples[] */
movl 12(%ebp), %ebx /* &dm_par */
movl $64, %ecx /* loop counter */
movss (%ebx), %xmm5 /* unit */
shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */
movss 8(%ebx), %xmm7 /* slev */
shufps $0, %xmm7, %xmm7 /* slev | slev | slev | slev */
.loop3:
movaps (%eax), %xmm0 /* left */
movaps 1024(%eax), %xmm1 /* right */
movaps 2048(%eax), %xmm3 /* leftsur */
mulps %xmm5, %xmm0
mulps %xmm5, %xmm1
movaps 3072(%eax), %xmm4 /* rightsur */
mulps %xmm7, %xmm3
mulps %xmm7, %xmm4
addps %xmm3, %xmm0
addps %xmm4, %xmm1
movaps %xmm0, (%eax)
movaps %xmm1, 1024(%eax)
addl $16, %eax
decl %ecx
jnz .loop3
popl %ecx
popl %ebx
popl %eax
leave
ret
.p2align 4,,7
.global downmix_3f_1r_to_2ch_kni
.type downmix_3f_1r_to_2ch_kni, @function
downmix_3f_1r_to_2ch_kni:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
movl 8(%ebp), %eax /* samples[] */
movl 12(%ebp), %ebx /* &dm_par */
movl $64, %ecx /* loop counter */
movss (%ebx), %xmm5 /* unit */
shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */
movss 4(%ebx), %xmm6 /* clev */
shufps $0, %xmm6, %xmm6 /* clev | clev | clev | clev */
movss 8(%ebx), %xmm7 /* slev */
shufps $0, %xmm7, %xmm7 /* slev | slev | slev | slev */
.loop4:
movaps (%eax), %xmm0 /* left */
movaps 2048(%eax), %xmm1 /* right */
movaps 1024(%eax), %xmm2 /* center */
mulps %xmm5, %xmm0
mulps %xmm5, %xmm1
mulps %xmm6, %xmm2
movaps 3072(%eax), %xmm3 /* sur */
addps %xmm2, %xmm0
mulps %xmm7, %xmm3
addps %xmm2, %xmm1
subps %xmm3, %xmm0
addps %xmm3, %xmm1
movaps %xmm0, (%eax)
movaps %xmm1, 1024(%eax)
addl $16, %eax
decl %ecx
jnz .loop4
popl %ecx
popl %ebx
popl %eax
leave
ret
.p2align 4,,7
.global downmix_2f_1r_to_2ch_kni
.type downmix_2f_1r_to_2ch_kni, @function
downmix_2f_1r_to_2ch_kni:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
movl 8(%ebp), %eax /* samples[] */
movl 12(%ebp), %ebx /* &dm_par */
movl $64, %ecx /* loop counter */
movss (%ebx), %xmm5 /* unit */
shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */
movss 8(%ebx), %xmm7 /* slev */
shufps $0, %xmm7, %xmm7 /* slev | slev | slev | slev */
.loop5:
movaps (%eax), %xmm0 /* left */
movaps 1024(%eax), %xmm1 /* right */
mulps %xmm5, %xmm0
mulps %xmm5, %xmm1
movaps 2048(%eax), %xmm3 /* sur */
mulps %xmm7, %xmm3
subps %xmm3, %xmm0
addps %xmm3, %xmm1
movaps %xmm0, (%eax)
movaps %xmm1, 1024(%eax)
addl $16, %eax
decl %ecx
jnz .loop5
popl %ecx
popl %ebx
popl %eax
leave
ret
.p2align 4,,7
.global downmix_3f_0r_to_2ch_kni
.type downmix_3f_0r_to_2ch_kni, @function
downmix_3f_0r_to_2ch_kni:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
movl 8(%ebp), %eax /* samples[] */
movl 12(%ebp), %ebx /* &dm_par */
movl $64, %ecx /* loop counter */
movss (%ebx), %xmm5 /* unit */
shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */
movss 4(%ebx), %xmm6 /* clev */
shufps $0, %xmm6, %xmm6 /* clev | clev | clev | clev */
.loop6:
movaps (%eax), %xmm0 /* left */
movaps 2048(%eax), %xmm1 /* right */
movaps 1024(%eax), %xmm2 /* center */
mulps %xmm5, %xmm0
mulps %xmm5, %xmm1
mulps %xmm6, %xmm2
addps %xmm2, %xmm0
addps %xmm2, %xmm1
movaps %xmm0, (%eax)
movaps %xmm1, 1024(%eax)
addl $16, %eax
decl %ecx
jnz .loop6
popl %ecx
popl %ebx
popl %eax
leave
ret
.p2align 4,,7
.global stream_sample_2ch_to_s16_kni
.type stream_sample_2ch_to_s16_kni, @function
stream_sample_2ch_to_s16_kni:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %edx
pushl %ecx
movl 8(%ebp), %eax /* s16_samples */
movl 12(%ebp), %ebx /* left */
movl 16(%ebp), %edx /* right */
movl $64, %ecx
.loop1:
movaps (%ebx), %xmm0 /* l3 | l2 | l1 | l0 */
movaps (%edx), %xmm1 /* r3 | r2 | r1 | r0 */
movhlps %xmm0, %xmm2 /* l3 | l2 */
movhlps %xmm1, %xmm3 /* r3 | r2 */
unpcklps %xmm1, %xmm0 /* r1 | l1 | r0 | l0 */
unpcklps %xmm3, %xmm2 /* r3 | l3 | r2 | l2 */
cvtps2pi %xmm0, %mm0 /* r0 l0 --> mm0, int_32 */
movhlps %xmm0, %xmm0
cvtps2pi %xmm0, %mm1 /* r1 l1 --> mm1, int_32 */
cvtps2pi %xmm2, %mm2 /* r2 l2 --> mm2, int_32 */
movhlps %xmm2, %xmm2
cvtps2pi %xmm2, %mm3 /* r3 l3 --> mm3, int_32 */
packssdw %mm1, %mm0 /* r1 l1 r0 l0 --> mm0, int_16 */
packssdw %mm3, %mm2 /* r3 l3 r2 l2 --> mm2, int_16 */
movq %mm0, (%eax)
movq %mm2, 8(%eax)
addl $16, %eax
addl $16, %ebx
addl $16, %edx
decl %ecx
jnz .loop1
popl %ecx
popl %edx
popl %ebx
popl %eax
emms
leave
ret
.p2align 4,,7
.global stream_sample_1ch_to_s16_kni
.type stream_sample_1ch_to_s16_kni, @function
stream_sample_1ch_to_s16_kni:
pushl %ebp
movl %esp, %ebp
pushl %eax
pushl %ebx
pushl %ecx
movl $sqrt2, %eax
movss (%eax), %xmm7
movl 8(%ebp), %eax /* s16_samples */
movl 12(%ebp), %ebx /* left */
shufps $0, %xmm7, %xmm7
movl $64, %ecx
.loop2:
movaps (%ebx), %xmm0 /* c3 | c2 | c1 | c0 */
mulps %xmm7, %xmm0
movhlps %xmm0, %xmm2 /* c3 | c2 */
cvtps2pi %xmm0, %mm0 /* c1 c0 --> mm0, int_32 */
cvtps2pi %xmm2, %mm1 /* c3 c2 --> mm1, int_32 */
packssdw %mm0, %mm0 /* c1 c1 c0 c0 --> mm0, int_16 */
packssdw %mm1, %mm1 /* c3 c3 c2 c2 --> mm1, int_16 */
movq %mm0, (%eax)
movq %mm1, 8(%eax)
addl $16, %eax
addl $16, %ebx
decl %ecx
jnz .loop2
popl %ecx
popl %ebx
popl %eax
emms
leave
ret
#endif