mirror of
https://github.com/VDR4Arch/vdr.git
synced 2023-10-10 13:36:52 +02:00
397 lines
7.4 KiB
ArmAsm
397 lines
7.4 KiB
ArmAsm
|
/*
|
||
|
* downmix_kni.S
|
||
|
*
|
||
|
* Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - October 2000
|
||
|
*
|
||
|
*
|
||
|
* downmix_kni.S is free software; you can redistribute it and/or modify
|
||
|
* it under the terms of the GNU General Public License as published by
|
||
|
* the Free Software Foundation; either version 2, or (at your option)
|
||
|
* any later version.
|
||
|
*
|
||
|
* downmix_kni.S is distributed in the hope that it will be useful,
|
||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
* GNU General Public License for more details.
|
||
|
*
|
||
|
* You should have received a copy of the GNU General Public License
|
||
|
* along with GNU Make; see the file COPYING. If not, write to
|
||
|
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
#ifdef __i386__
|
||
|
|
||
|
.section .rodata
|
||
|
.align 4
|
||
|
sqrt2: .float 0f0.7071068
|
||
|
.p2align 5,0,
|
||
|
|
||
|
.section .text
|
||
|
|
||
|
.align 4
|
||
|
.global downmix_3f_2r_to_2ch_kni
|
||
|
.type downmix_3f_2r_to_2ch_kni, @function
|
||
|
|
||
|
downmix_3f_2r_to_2ch_kni:
|
||
|
pushl %ebp
|
||
|
movl %esp, %ebp
|
||
|
|
||
|
pushl %eax
|
||
|
pushl %ebx
|
||
|
pushl %ecx
|
||
|
|
||
|
movl 8(%ebp), %eax /* samples[] */
|
||
|
movl 12(%ebp), %ebx /* &dm_par */
|
||
|
movl $64, %ecx /* loop counter */
|
||
|
|
||
|
movss (%ebx), %xmm5 /* unit */
|
||
|
shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */
|
||
|
|
||
|
movss 4(%ebx), %xmm6 /* clev */
|
||
|
shufps $0, %xmm6, %xmm6 /* clev | clev | clev | clev */
|
||
|
|
||
|
movss 8(%ebx), %xmm7 /* slev */
|
||
|
shufps $0, %xmm7, %xmm7 /* slev | slev | slev | slev */
|
||
|
|
||
|
.loop:
|
||
|
movaps (%eax), %xmm0 /* left */
|
||
|
movaps 2048(%eax), %xmm1 /* right */
|
||
|
movaps 1024(%eax), %xmm2 /* center */
|
||
|
mulps %xmm5, %xmm0
|
||
|
mulps %xmm5, %xmm1
|
||
|
|
||
|
mulps %xmm6, %xmm2
|
||
|
movaps 3072(%eax), %xmm3 /* leftsur */
|
||
|
movaps 4096(%eax), %xmm4 /* rithgsur */
|
||
|
addps %xmm2, %xmm0
|
||
|
addps %xmm2, %xmm1
|
||
|
|
||
|
mulps %xmm7, %xmm3
|
||
|
mulps %xmm7, %xmm4
|
||
|
addps %xmm3, %xmm0
|
||
|
addps %xmm4, %xmm1
|
||
|
|
||
|
movaps %xmm0, (%eax)
|
||
|
movaps %xmm1, 1024(%eax)
|
||
|
|
||
|
addl $16, %eax
|
||
|
decl %ecx
|
||
|
jnz .loop
|
||
|
|
||
|
popl %ecx
|
||
|
popl %ebx
|
||
|
popl %eax
|
||
|
|
||
|
leave
|
||
|
ret
|
||
|
.p2align 4,,7
|
||
|
|
||
|
.global downmix_2f_2r_to_2ch_kni
|
||
|
.type downmix_2f_2r_to_2ch_kni, @function
|
||
|
|
||
|
downmix_2f_2r_to_2ch_kni:
|
||
|
pushl %ebp
|
||
|
movl %esp, %ebp
|
||
|
|
||
|
pushl %eax
|
||
|
pushl %ebx
|
||
|
pushl %ecx
|
||
|
|
||
|
movl 8(%ebp), %eax /* samples[] */
|
||
|
movl 12(%ebp), %ebx /* &dm_par */
|
||
|
movl $64, %ecx /* loop counter */
|
||
|
|
||
|
movss (%ebx), %xmm5 /* unit */
|
||
|
shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */
|
||
|
|
||
|
movss 8(%ebx), %xmm7 /* slev */
|
||
|
shufps $0, %xmm7, %xmm7 /* slev | slev | slev | slev */
|
||
|
|
||
|
.loop3:
|
||
|
movaps (%eax), %xmm0 /* left */
|
||
|
movaps 1024(%eax), %xmm1 /* right */
|
||
|
movaps 2048(%eax), %xmm3 /* leftsur */
|
||
|
mulps %xmm5, %xmm0
|
||
|
mulps %xmm5, %xmm1
|
||
|
|
||
|
movaps 3072(%eax), %xmm4 /* rightsur */
|
||
|
|
||
|
mulps %xmm7, %xmm3
|
||
|
mulps %xmm7, %xmm4
|
||
|
addps %xmm3, %xmm0
|
||
|
addps %xmm4, %xmm1
|
||
|
|
||
|
movaps %xmm0, (%eax)
|
||
|
movaps %xmm1, 1024(%eax)
|
||
|
|
||
|
addl $16, %eax
|
||
|
decl %ecx
|
||
|
jnz .loop3
|
||
|
|
||
|
popl %ecx
|
||
|
popl %ebx
|
||
|
popl %eax
|
||
|
|
||
|
leave
|
||
|
ret
|
||
|
.p2align 4,,7
|
||
|
|
||
|
.global downmix_3f_1r_to_2ch_kni
|
||
|
.type downmix_3f_1r_to_2ch_kni, @function
|
||
|
|
||
|
downmix_3f_1r_to_2ch_kni:
|
||
|
pushl %ebp
|
||
|
movl %esp, %ebp
|
||
|
|
||
|
pushl %eax
|
||
|
pushl %ebx
|
||
|
pushl %ecx
|
||
|
|
||
|
movl 8(%ebp), %eax /* samples[] */
|
||
|
movl 12(%ebp), %ebx /* &dm_par */
|
||
|
movl $64, %ecx /* loop counter */
|
||
|
|
||
|
movss (%ebx), %xmm5 /* unit */
|
||
|
shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */
|
||
|
|
||
|
movss 4(%ebx), %xmm6 /* clev */
|
||
|
shufps $0, %xmm6, %xmm6 /* clev | clev | clev | clev */
|
||
|
|
||
|
movss 8(%ebx), %xmm7 /* slev */
|
||
|
shufps $0, %xmm7, %xmm7 /* slev | slev | slev | slev */
|
||
|
|
||
|
.loop4:
|
||
|
movaps (%eax), %xmm0 /* left */
|
||
|
movaps 2048(%eax), %xmm1 /* right */
|
||
|
movaps 1024(%eax), %xmm2 /* center */
|
||
|
mulps %xmm5, %xmm0
|
||
|
mulps %xmm5, %xmm1
|
||
|
|
||
|
mulps %xmm6, %xmm2
|
||
|
movaps 3072(%eax), %xmm3 /* sur */
|
||
|
|
||
|
addps %xmm2, %xmm0
|
||
|
mulps %xmm7, %xmm3
|
||
|
|
||
|
addps %xmm2, %xmm1
|
||
|
|
||
|
subps %xmm3, %xmm0
|
||
|
addps %xmm3, %xmm1
|
||
|
|
||
|
movaps %xmm0, (%eax)
|
||
|
movaps %xmm1, 1024(%eax)
|
||
|
|
||
|
addl $16, %eax
|
||
|
decl %ecx
|
||
|
jnz .loop4
|
||
|
|
||
|
popl %ecx
|
||
|
popl %ebx
|
||
|
popl %eax
|
||
|
|
||
|
leave
|
||
|
ret
|
||
|
.p2align 4,,7
|
||
|
|
||
|
.global downmix_2f_1r_to_2ch_kni
|
||
|
.type downmix_2f_1r_to_2ch_kni, @function
|
||
|
|
||
|
downmix_2f_1r_to_2ch_kni:
|
||
|
pushl %ebp
|
||
|
movl %esp, %ebp
|
||
|
|
||
|
pushl %eax
|
||
|
pushl %ebx
|
||
|
pushl %ecx
|
||
|
|
||
|
movl 8(%ebp), %eax /* samples[] */
|
||
|
movl 12(%ebp), %ebx /* &dm_par */
|
||
|
movl $64, %ecx /* loop counter */
|
||
|
|
||
|
movss (%ebx), %xmm5 /* unit */
|
||
|
shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */
|
||
|
|
||
|
movss 8(%ebx), %xmm7 /* slev */
|
||
|
shufps $0, %xmm7, %xmm7 /* slev | slev | slev | slev */
|
||
|
|
||
|
.loop5:
|
||
|
movaps (%eax), %xmm0 /* left */
|
||
|
movaps 1024(%eax), %xmm1 /* right */
|
||
|
|
||
|
mulps %xmm5, %xmm0
|
||
|
mulps %xmm5, %xmm1
|
||
|
|
||
|
movaps 2048(%eax), %xmm3 /* sur */
|
||
|
|
||
|
mulps %xmm7, %xmm3
|
||
|
|
||
|
subps %xmm3, %xmm0
|
||
|
addps %xmm3, %xmm1
|
||
|
|
||
|
movaps %xmm0, (%eax)
|
||
|
movaps %xmm1, 1024(%eax)
|
||
|
|
||
|
addl $16, %eax
|
||
|
decl %ecx
|
||
|
jnz .loop5
|
||
|
|
||
|
popl %ecx
|
||
|
popl %ebx
|
||
|
popl %eax
|
||
|
|
||
|
leave
|
||
|
ret
|
||
|
.p2align 4,,7
|
||
|
|
||
|
.global downmix_3f_0r_to_2ch_kni
|
||
|
.type downmix_3f_0r_to_2ch_kni, @function
|
||
|
|
||
|
downmix_3f_0r_to_2ch_kni:
|
||
|
pushl %ebp
|
||
|
movl %esp, %ebp
|
||
|
|
||
|
pushl %eax
|
||
|
pushl %ebx
|
||
|
pushl %ecx
|
||
|
|
||
|
movl 8(%ebp), %eax /* samples[] */
|
||
|
movl 12(%ebp), %ebx /* &dm_par */
|
||
|
movl $64, %ecx /* loop counter */
|
||
|
|
||
|
movss (%ebx), %xmm5 /* unit */
|
||
|
shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */
|
||
|
|
||
|
movss 4(%ebx), %xmm6 /* clev */
|
||
|
shufps $0, %xmm6, %xmm6 /* clev | clev | clev | clev */
|
||
|
|
||
|
|
||
|
.loop6:
|
||
|
movaps (%eax), %xmm0 /* left */
|
||
|
movaps 2048(%eax), %xmm1 /* right */
|
||
|
movaps 1024(%eax), %xmm2 /* center */
|
||
|
mulps %xmm5, %xmm0
|
||
|
mulps %xmm5, %xmm1
|
||
|
|
||
|
mulps %xmm6, %xmm2
|
||
|
|
||
|
addps %xmm2, %xmm0
|
||
|
|
||
|
addps %xmm2, %xmm1
|
||
|
|
||
|
movaps %xmm0, (%eax)
|
||
|
movaps %xmm1, 1024(%eax)
|
||
|
|
||
|
addl $16, %eax
|
||
|
decl %ecx
|
||
|
jnz .loop6
|
||
|
|
||
|
popl %ecx
|
||
|
popl %ebx
|
||
|
popl %eax
|
||
|
|
||
|
leave
|
||
|
ret
|
||
|
.p2align 4,,7
|
||
|
|
||
|
.global stream_sample_2ch_to_s16_kni
|
||
|
.type stream_sample_2ch_to_s16_kni, @function
|
||
|
|
||
|
stream_sample_2ch_to_s16_kni:
|
||
|
pushl %ebp
|
||
|
movl %esp, %ebp
|
||
|
|
||
|
pushl %eax
|
||
|
pushl %ebx
|
||
|
pushl %edx
|
||
|
pushl %ecx
|
||
|
|
||
|
movl 8(%ebp), %eax /* s16_samples */
|
||
|
movl 12(%ebp), %ebx /* left */
|
||
|
movl 16(%ebp), %edx /* right */
|
||
|
movl $64, %ecx
|
||
|
|
||
|
.loop1:
|
||
|
movaps (%ebx), %xmm0 /* l3 | l2 | l1 | l0 */
|
||
|
movaps (%edx), %xmm1 /* r3 | r2 | r1 | r0 */
|
||
|
movhlps %xmm0, %xmm2 /* l3 | l2 */
|
||
|
movhlps %xmm1, %xmm3 /* r3 | r2 */
|
||
|
unpcklps %xmm1, %xmm0 /* r1 | l1 | r0 | l0 */
|
||
|
unpcklps %xmm3, %xmm2 /* r3 | l3 | r2 | l2 */
|
||
|
|
||
|
cvtps2pi %xmm0, %mm0 /* r0 l0 --> mm0, int_32 */
|
||
|
movhlps %xmm0, %xmm0
|
||
|
cvtps2pi %xmm0, %mm1 /* r1 l1 --> mm1, int_32 */
|
||
|
|
||
|
cvtps2pi %xmm2, %mm2 /* r2 l2 --> mm2, int_32 */
|
||
|
movhlps %xmm2, %xmm2
|
||
|
cvtps2pi %xmm2, %mm3 /* r3 l3 --> mm3, int_32 */
|
||
|
packssdw %mm1, %mm0 /* r1 l1 r0 l0 --> mm0, int_16 */
|
||
|
packssdw %mm3, %mm2 /* r3 l3 r2 l2 --> mm2, int_16 */
|
||
|
|
||
|
movq %mm0, (%eax)
|
||
|
movq %mm2, 8(%eax)
|
||
|
addl $16, %eax
|
||
|
addl $16, %ebx
|
||
|
addl $16, %edx
|
||
|
|
||
|
decl %ecx
|
||
|
jnz .loop1
|
||
|
|
||
|
popl %ecx
|
||
|
popl %edx
|
||
|
popl %ebx
|
||
|
popl %eax
|
||
|
|
||
|
emms
|
||
|
|
||
|
leave
|
||
|
ret
|
||
|
.p2align 4,,7
|
||
|
|
||
|
.global stream_sample_1ch_to_s16_kni
|
||
|
.type stream_sample_1ch_to_s16_kni, @function
|
||
|
|
||
|
stream_sample_1ch_to_s16_kni:
|
||
|
pushl %ebp
|
||
|
movl %esp, %ebp
|
||
|
|
||
|
pushl %eax
|
||
|
pushl %ebx
|
||
|
pushl %ecx
|
||
|
|
||
|
movl $sqrt2, %eax
|
||
|
movss (%eax), %xmm7
|
||
|
movl 8(%ebp), %eax /* s16_samples */
|
||
|
movl 12(%ebp), %ebx /* left */
|
||
|
shufps $0, %xmm7, %xmm7
|
||
|
movl $64, %ecx
|
||
|
|
||
|
.loop2:
|
||
|
movaps (%ebx), %xmm0 /* c3 | c2 | c1 | c0 */
|
||
|
mulps %xmm7, %xmm0
|
||
|
movhlps %xmm0, %xmm2 /* c3 | c2 */
|
||
|
|
||
|
cvtps2pi %xmm0, %mm0 /* c1 c0 --> mm0, int_32 */
|
||
|
cvtps2pi %xmm2, %mm1 /* c3 c2 --> mm1, int_32 */
|
||
|
|
||
|
packssdw %mm0, %mm0 /* c1 c1 c0 c0 --> mm0, int_16 */
|
||
|
packssdw %mm1, %mm1 /* c3 c3 c2 c2 --> mm1, int_16 */
|
||
|
|
||
|
movq %mm0, (%eax)
|
||
|
movq %mm1, 8(%eax)
|
||
|
addl $16, %eax
|
||
|
addl $16, %ebx
|
||
|
|
||
|
decl %ecx
|
||
|
jnz .loop2
|
||
|
|
||
|
popl %ecx
|
||
|
popl %ebx
|
||
|
popl %eax
|
||
|
|
||
|
emms
|
||
|
leave
|
||
|
ret
|
||
|
#endif
|