x264 source for verification 2026-05-22
This commit is contained in:
2016
common/loongarch/dct-a.S
Normal file
2016
common/loongarch/dct-a.S
Normal file
File diff suppressed because it is too large
Load Diff
95
common/loongarch/dct.h
Normal file
95
common/loongarch/dct.h
Normal file
@@ -0,0 +1,95 @@
|
||||
/*****************************************************************************
|
||||
* dct.h: loongarch transform and zigzag
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2023-2025 x264 project
|
||||
*
|
||||
* Authors: Peng Zhou <zhoupeng@loongson.cn>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X264_LOONGARCH_DCT_H
|
||||
#define X264_LOONGARCH_DCT_H
|
||||
|
||||
#define x264_sub8x8_dct_lasx x264_template(sub8x8_dct_lasx)
|
||||
void x264_sub8x8_dct_lasx( int16_t p_dst[4][16], uint8_t *p_src, uint8_t *p_ref );
|
||||
#define x264_sub16x16_dct_lasx x264_template(sub16x16_dct_lasx)
|
||||
void x264_sub16x16_dct_lasx( int16_t p_dst[16][16], uint8_t *p_src, uint8_t *p_ref );
|
||||
|
||||
#define x264_sub8x8_dct8_lsx x264_template(sub8x8_dct8_lsx)
|
||||
void x264_sub8x8_dct8_lsx( int16_t pi_dct[64], uint8_t *p_pix1, uint8_t *p_pix2 );
|
||||
#define x264_sub16x16_dct8_lasx x264_template(sub16x16_dct8_lasx)
|
||||
void x264_sub16x16_dct8_lasx( int16_t pi_dct[4][64], uint8_t *p_pix1,
|
||||
uint8_t *p_pix2 );
|
||||
|
||||
#define x264_add4x4_idct_lsx x264_template(add4x4_idct_lsx)
|
||||
void x264_add4x4_idct_lsx( uint8_t *p_dst, int16_t pi_dct[16] );
|
||||
#define x264_add8x8_idct_lasx x264_template(add8x8_idct_lasx)
|
||||
void x264_add8x8_idct_lasx( uint8_t *p_dst, int16_t pi_dct[4][16] );
|
||||
#define x264_add16x16_idct_lasx x264_template(add16x16_idct_lasx)
|
||||
void x264_add16x16_idct_lasx( uint8_t *p_dst, int16_t pi_dct[16][16] );
|
||||
#define x264_add8x8_idct8_lasx x264_template(add8x8_idct8_lasx)
|
||||
void x264_add8x8_idct8_lasx( uint8_t *p_dst, int16_t pi_dct[64] );
|
||||
#define x264_add8x8_idct_dc_lasx x264_template(add8x8_idct_dc_lasx)
|
||||
void x264_add8x8_idct_dc_lasx( uint8_t *p_dst, int16_t dct[4] );
|
||||
#define x264_add16x16_idct_dc_lasx x264_template(add16x16_idct_dc_lasx)
|
||||
void x264_add16x16_idct_dc_lasx( uint8_t *p_dst, int16_t dct[16] );
|
||||
|
||||
#define x264_idct4x4dc_lasx x264_template(idct4x4dc_lasx)
|
||||
void x264_idct4x4dc_lasx( int16_t d[16] );
|
||||
#define x264_dct4x4dc_lasx x264_template(dct4x4dc_lasx)
|
||||
void x264_dct4x4dc_lasx( int16_t d[16] );
|
||||
|
||||
#define x264_zigzag_scan_4x4_frame_lasx x264_template(zigzag_scan_4x4_frame_lasx)
|
||||
void x264_zigzag_scan_4x4_frame_lasx( int16_t level[16], int16_t dct[16] );
|
||||
|
||||
#define x264_sub4x4_dct_lsx x264_template(sub4x4_dct_lsx)
|
||||
void x264_sub4x4_dct_lsx( int16_t p_dst[16], uint8_t *p_src, uint8_t *p_ref );
|
||||
#define x264_sub8x8_dct_lsx x264_template(sub8x8_dct_lsx)
|
||||
void x264_sub8x8_dct_lsx( int16_t p_dst[4][16], uint8_t *p_src, uint8_t *p_ref );
|
||||
#define x264_sub16x16_dct_lsx x264_template(sub16x16_dct_lsx)
|
||||
void x264_sub16x16_dct_lsx( int16_t p_dst[16][16], uint8_t *p_src, uint8_t *p_ref );
|
||||
|
||||
#define x264_sub8x8_dct8_lsx x264_template(sub8x8_dct8_lsx)
|
||||
void x264_sub8x8_dct8_lsx( int16_t pi_dct[64], uint8_t *p_pix1, uint8_t *p_pix2 );
|
||||
#define x264_sub16x16_dct8_lsx x264_template(sub16x16_dct8_lsx)
|
||||
void x264_sub16x16_dct8_lsx( int16_t pi_dct[4][64], uint8_t *p_pix1,
|
||||
uint8_t *p_pix2 );
|
||||
|
||||
#define x264_add4x4_idct_lsx x264_template(add4x4_idct_lsx)
|
||||
void x264_add4x4_idct_lsx( uint8_t *p_dst, int16_t pi_dct[16] );
|
||||
#define x264_add8x8_idct_lsx x264_template(add8x8_idct_lsx)
|
||||
void x264_add8x8_idct_lsx( uint8_t *p_dst, int16_t pi_dct[4][16] );
|
||||
#define x264_add16x16_idct_lsx x264_template(add16x16_idct_lsx)
|
||||
void x264_add16x16_idct_lsx( uint8_t *p_dst, int16_t pi_dct[16][16] );
|
||||
#define x264_add8x8_idct8_lsx x264_template(add8x8_idct8_lsx)
|
||||
void x264_add8x8_idct8_lsx( uint8_t *p_dst, int16_t pi_dct[64] );
|
||||
#define x264_add8x8_idct_dc_lsx x264_template(add8x8_idct_dc_lsx)
|
||||
void x264_add8x8_idct_dc_lsx( uint8_t *p_dst, int16_t dct[4] );
|
||||
#define x264_add16x16_idct_dc_lsx x264_template(add16x16_idct_dc_lsx)
|
||||
void x264_add16x16_idct_dc_lsx( uint8_t *p_dst, int16_t dct[16] );
|
||||
|
||||
#define x264_idct4x4dc_lsx x264_template(idct4x4dc_lsx)
|
||||
void x264_idct4x4dc_lsx( int16_t d[16] );
|
||||
#define x264_dct4x4dc_lsx x264_template(dct4x4dc_lsx)
|
||||
void x264_dct4x4dc_lsx( int16_t d[16] );
|
||||
|
||||
#define x264_zigzag_scan_4x4_frame_lsx x264_template(zigzag_scan_4x4_frame_lsx)
|
||||
void x264_zigzag_scan_4x4_frame_lsx( int16_t level[16], int16_t dct[16] );
|
||||
|
||||
#endif
|
||||
1618
common/loongarch/deblock-a.S
Normal file
1618
common/loongarch/deblock-a.S
Normal file
File diff suppressed because it is too large
Load Diff
54
common/loongarch/deblock.h
Normal file
54
common/loongarch/deblock.h
Normal file
@@ -0,0 +1,54 @@
|
||||
/*****************************************************************************
|
||||
* deblock.h: loongarch deblock
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2023-2025 x264 project
|
||||
*
|
||||
* Authors: Hao Chen <chenhao@loongson.cn>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X264_LOONGARCH_DEBLOCK_H
|
||||
#define X264_LOONGARCH_DEBLOCK_H
|
||||
|
||||
#if !HIGH_BIT_DEPTH
|
||||
#define x264_deblock_v_luma_lasx x264_template(deblock_v_luma_lasx)
|
||||
void x264_deblock_v_luma_lasx( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
|
||||
#define x264_deblock_h_luma_lasx x264_template(deblock_h_luma_lasx)
|
||||
void x264_deblock_h_luma_lasx( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
|
||||
|
||||
#define x264_deblock_v_luma_intra_lsx x264_template(deblock_v_luma_intra_lsx)
|
||||
void x264_deblock_v_luma_intra_lsx( uint8_t *pix, intptr_t stride, int alpha, int beta );
|
||||
#define x264_deblock_h_luma_intra_lsx x264_template(deblock_h_luma_intra_lsx)
|
||||
void x264_deblock_h_luma_intra_lsx( uint8_t *pix, intptr_t stride, int alpha, int beta );
|
||||
|
||||
#define x264_deblock_v_luma_intra_lasx x264_template(deblock_v_luma_intra_lasx)
|
||||
void x264_deblock_v_luma_intra_lasx( uint8_t *pix, intptr_t stride, int alpha, int beta );
|
||||
#define x264_deblock_h_luma_intra_lasx x264_template(deblock_h_luma_intra_lasx)
|
||||
void x264_deblock_h_luma_intra_lasx( uint8_t *pix, intptr_t stride, int alpha, int beta );
|
||||
#define x264_deblock_strength_lsx x264_template(deblock_strength_lsx)
|
||||
void x264_deblock_strength_lsx( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
|
||||
int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
|
||||
int mvy_limit, int bframe );
|
||||
#define x264_deblock_strength_lasx x264_template(deblock_strength_lasx)
|
||||
void x264_deblock_strength_lasx( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
|
||||
int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
|
||||
int mvy_limit, int bframe );
|
||||
#endif
|
||||
|
||||
#endif
|
||||
770
common/loongarch/loongson_asm.S
Normal file
770
common/loongarch/loongson_asm.S
Normal file
@@ -0,0 +1,770 @@
|
||||
/*********************************************************************
|
||||
* Copyright (c) 2022-2024 Loongson Technology Corporation Limited
|
||||
* Contributed by Xiwei Gu <guxiwei-hf@loongson.cn>
|
||||
* Shiyou Yin <yinshiyou-hf@loongson.cn>
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*********************************************************************/
|
||||
|
||||
/*
|
||||
* This file is a LoongArch assembly helper file and available under ISC
|
||||
* license. It provides a large number of macros and alias to simplify
|
||||
* writing assembly code, especially for LSX and LASX optimizations.
|
||||
*
|
||||
* Any one can modify it or add new features for his/her own purposes.
|
||||
* Contributing a patch will be appreciated as it might be useful for
|
||||
* others as well. Send patches to loongson contributor mentioned above.
|
||||
*
|
||||
* MAJOR version: Usage changes, incompatible with previous version.
|
||||
* MINOR version: Add new macros/functions, or bug fixes.
|
||||
* MICRO version: Comment changes or implementation changes.
|
||||
*/
|
||||
|
||||
#define LML_VERSION_MAJOR 0
|
||||
#define LML_VERSION_MINOR 4
|
||||
#define LML_VERSION_MICRO 0
|
||||
|
||||
#define ASM_PREF
|
||||
#define DEFAULT_ALIGN 5
|
||||
|
||||
/*
|
||||
*============================================================================
|
||||
* macros for specific projetc, set them as needed.
|
||||
* Following LoongML macros for your reference.
|
||||
*============================================================================
|
||||
*/
|
||||
|
||||
.macro function name, align=DEFAULT_ALIGN
|
||||
.macro endfunc
|
||||
jirl $r0, $r1, 0x0
|
||||
.size ASM_PREF\name, . - ASM_PREF\name
|
||||
.purgem endfunc
|
||||
.endm
|
||||
.text ;
|
||||
.align \align ;
|
||||
.globl ASM_PREF\name ;
|
||||
.type ASM_PREF\name, @function ;
|
||||
ASM_PREF\name: ;
|
||||
.endm
|
||||
|
||||
.macro const name, align=DEFAULT_ALIGN
|
||||
.macro endconst
|
||||
.size \name, . - \name
|
||||
.purgem endconst
|
||||
.endm
|
||||
.section .rodata
|
||||
.align \align
|
||||
\name:
|
||||
.endm
|
||||
|
||||
/*
|
||||
*============================================================================
|
||||
* LoongArch register alias
|
||||
*============================================================================
|
||||
*/
|
||||
|
||||
#define a0 $a0
|
||||
#define a1 $a1
|
||||
#define a2 $a2
|
||||
#define a3 $a3
|
||||
#define a4 $a4
|
||||
#define a5 $a5
|
||||
#define a6 $a6
|
||||
#define a7 $a7
|
||||
|
||||
#define t0 $t0
|
||||
#define t1 $t1
|
||||
#define t2 $t2
|
||||
#define t3 $t3
|
||||
#define t4 $t4
|
||||
#define t5 $t5
|
||||
#define t6 $t6
|
||||
#define t7 $t7
|
||||
#define t8 $t8
|
||||
|
||||
#define s0 $s0
|
||||
#define s1 $s1
|
||||
#define s2 $s2
|
||||
#define s3 $s3
|
||||
#define s4 $s4
|
||||
#define s5 $s5
|
||||
#define s6 $s6
|
||||
#define s7 $s7
|
||||
#define s8 $s8
|
||||
|
||||
#define zero $zero
|
||||
#define sp $sp
|
||||
#define ra $ra
|
||||
|
||||
#define fa0 $fa0
|
||||
#define fa1 $fa1
|
||||
#define fa2 $fa2
|
||||
#define fa3 $fa3
|
||||
#define fa4 $fa4
|
||||
#define fa5 $fa5
|
||||
#define fa6 $fa6
|
||||
#define fa7 $fa7
|
||||
#define ft0 $ft0
|
||||
#define ft1 $ft1
|
||||
#define ft2 $ft2
|
||||
#define ft3 $ft3
|
||||
#define ft4 $ft4
|
||||
#define ft5 $ft5
|
||||
#define ft6 $ft6
|
||||
#define ft7 $ft7
|
||||
#define ft8 $ft8
|
||||
#define ft9 $ft9
|
||||
#define ft10 $ft10
|
||||
#define ft11 $ft11
|
||||
#define ft12 $ft12
|
||||
#define ft13 $ft13
|
||||
#define ft14 $ft14
|
||||
#define ft15 $ft15
|
||||
#define fs0 $fs0
|
||||
#define fs1 $fs1
|
||||
#define fs2 $fs2
|
||||
#define fs3 $fs3
|
||||
#define fs4 $fs4
|
||||
#define fs5 $fs5
|
||||
#define fs6 $fs6
|
||||
#define fs7 $fs7
|
||||
|
||||
#define f0 $f0
|
||||
#define f1 $f1
|
||||
#define f2 $f2
|
||||
#define f3 $f3
|
||||
#define f4 $f4
|
||||
#define f5 $f5
|
||||
#define f6 $f6
|
||||
#define f7 $f7
|
||||
#define f8 $f8
|
||||
#define f9 $f9
|
||||
#define f10 $f10
|
||||
#define f11 $f11
|
||||
#define f12 $f12
|
||||
#define f13 $f13
|
||||
#define f14 $f14
|
||||
#define f15 $f15
|
||||
#define f16 $f16
|
||||
#define f17 $f17
|
||||
#define f18 $f18
|
||||
#define f19 $f19
|
||||
#define f20 $f20
|
||||
#define f21 $f21
|
||||
#define f22 $f22
|
||||
#define f23 $f23
|
||||
#define f24 $f24
|
||||
#define f25 $f25
|
||||
#define f26 $f26
|
||||
#define f27 $f27
|
||||
#define f28 $f28
|
||||
#define f29 $f29
|
||||
#define f30 $f30
|
||||
#define f31 $f31
|
||||
|
||||
#define vr0 $vr0
|
||||
#define vr1 $vr1
|
||||
#define vr2 $vr2
|
||||
#define vr3 $vr3
|
||||
#define vr4 $vr4
|
||||
#define vr5 $vr5
|
||||
#define vr6 $vr6
|
||||
#define vr7 $vr7
|
||||
#define vr8 $vr8
|
||||
#define vr9 $vr9
|
||||
#define vr10 $vr10
|
||||
#define vr11 $vr11
|
||||
#define vr12 $vr12
|
||||
#define vr13 $vr13
|
||||
#define vr14 $vr14
|
||||
#define vr15 $vr15
|
||||
#define vr16 $vr16
|
||||
#define vr17 $vr17
|
||||
#define vr18 $vr18
|
||||
#define vr19 $vr19
|
||||
#define vr20 $vr20
|
||||
#define vr21 $vr21
|
||||
#define vr22 $vr22
|
||||
#define vr23 $vr23
|
||||
#define vr24 $vr24
|
||||
#define vr25 $vr25
|
||||
#define vr26 $vr26
|
||||
#define vr27 $vr27
|
||||
#define vr28 $vr28
|
||||
#define vr29 $vr29
|
||||
#define vr30 $vr30
|
||||
#define vr31 $vr31
|
||||
|
||||
#define xr0 $xr0
|
||||
#define xr1 $xr1
|
||||
#define xr2 $xr2
|
||||
#define xr3 $xr3
|
||||
#define xr4 $xr4
|
||||
#define xr5 $xr5
|
||||
#define xr6 $xr6
|
||||
#define xr7 $xr7
|
||||
#define xr8 $xr8
|
||||
#define xr9 $xr9
|
||||
#define xr10 $xr10
|
||||
#define xr11 $xr11
|
||||
#define xr12 $xr12
|
||||
#define xr13 $xr13
|
||||
#define xr14 $xr14
|
||||
#define xr15 $xr15
|
||||
#define xr16 $xr16
|
||||
#define xr17 $xr17
|
||||
#define xr18 $xr18
|
||||
#define xr19 $xr19
|
||||
#define xr20 $xr20
|
||||
#define xr21 $xr21
|
||||
#define xr22 $xr22
|
||||
#define xr23 $xr23
|
||||
#define xr24 $xr24
|
||||
#define xr25 $xr25
|
||||
#define xr26 $xr26
|
||||
#define xr27 $xr27
|
||||
#define xr28 $xr28
|
||||
#define xr29 $xr29
|
||||
#define xr30 $xr30
|
||||
#define xr31 $xr31
|
||||
|
||||
/*
|
||||
*============================================================================
|
||||
* LSX/LASX synthesize instructions
|
||||
*============================================================================
|
||||
*/
|
||||
|
||||
/*
|
||||
* Description : Dot product of byte vector elements
|
||||
* Arguments : Inputs - vj, vk
|
||||
* Outputs - vd
|
||||
* Return Type - halfword
|
||||
*/
|
||||
.macro vdp2.h.bu vd, vj, vk
|
||||
vmulwev.h.bu \vd, \vj, \vk
|
||||
vmaddwod.h.bu \vd, \vj, \vk
|
||||
.endm
|
||||
|
||||
.macro vdp2.h.bu.b vd, vj, vk
|
||||
vmulwev.h.bu.b \vd, \vj, \vk
|
||||
vmaddwod.h.bu.b \vd, \vj, \vk
|
||||
.endm
|
||||
|
||||
.macro vdp2.w.h vd, vj, vk
|
||||
vmulwev.w.h \vd, \vj, \vk
|
||||
vmaddwod.w.h \vd, \vj, \vk
|
||||
.endm
|
||||
|
||||
.macro xvdp2.h.bu xd, xj, xk
|
||||
xvmulwev.h.bu \xd, \xj, \xk
|
||||
xvmaddwod.h.bu \xd, \xj, \xk
|
||||
.endm
|
||||
|
||||
.macro xvdp2.h.bu.b xd, xj, xk
|
||||
xvmulwev.h.bu.b \xd, \xj, \xk
|
||||
xvmaddwod.h.bu.b \xd, \xj, \xk
|
||||
.endm
|
||||
|
||||
.macro xvdp2.w.h xd, xj, xk
|
||||
xvmulwev.w.h \xd, \xj, \xk
|
||||
xvmaddwod.w.h \xd, \xj, \xk
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Description : Dot product & addition of halfword vector elements
|
||||
* Arguments : Inputs - vj, vk
|
||||
* Outputs - vd
|
||||
* Return Type - twice size of input
|
||||
*/
|
||||
.macro vdp2add.h.bu vd, vj, vk
|
||||
vmaddwev.h.bu \vd, \vj, \vk
|
||||
vmaddwod.h.bu \vd, \vj, \vk
|
||||
.endm
|
||||
|
||||
.macro vdp2add.h.bu.b vd, vj, vk
|
||||
vmaddwev.h.bu.b \vd, \vj, \vk
|
||||
vmaddwod.h.bu.b \vd, \vj, \vk
|
||||
.endm
|
||||
|
||||
.macro vdp2add.w.h vd, vj, vk
|
||||
vmaddwev.w.h \vd, \vj, \vk
|
||||
vmaddwod.w.h \vd, \vj, \vk
|
||||
.endm
|
||||
|
||||
.macro xvdp2add.h.bu.b xd, xj, xk
|
||||
xvmaddwev.h.bu.b \xd, \xj, \xk
|
||||
xvmaddwod.h.bu.b \xd, \xj, \xk
|
||||
.endm
|
||||
|
||||
.macro xvdp2add.w.h xd, xj, xk
|
||||
xvmaddwev.w.h \xd, \xj, \xk
|
||||
xvmaddwod.w.h \xd, \xj, \xk
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Description : Range element vj[i] to vk[i] ~ vj[i]
|
||||
* clip: vj > vk ? vj : vk && vj < va ? vj : va
|
||||
*/
|
||||
.macro vclip.h vd, vj, vk, va
|
||||
vmax.h \vd, \vj, \vk
|
||||
vmin.h \vd, \vd, \va
|
||||
.endm
|
||||
|
||||
.macro vclip.w vd, vj, vk, va
|
||||
vmax.w \vd, \vj, \vk
|
||||
vmin.w \vd, \vd, \va
|
||||
.endm
|
||||
|
||||
.macro xvclip.h xd, xj, xk, xa
|
||||
xvmax.h \xd, \xj, \xk
|
||||
xvmin.h \xd, \xd, \xa
|
||||
.endm
|
||||
|
||||
.macro xvclip.w xd, xj, xk, xa
|
||||
xvmax.w \xd, \xj, \xk
|
||||
xvmin.w \xd, \xd, \xa
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Description : Range element vj[i] to 0 ~ 255
|
||||
* clip255: vj < 255 ? vj : 255 && vj > 0 ? vj : 0
|
||||
*/
|
||||
.macro vclip255.h vd, vj
|
||||
vmaxi.h \vd, \vj, 0
|
||||
vsat.hu \vd, \vd, 7
|
||||
.endm
|
||||
|
||||
.macro vclip255.w vd, vj
|
||||
vmaxi.w \vd, \vj, 0
|
||||
vsat.wu \vd, \vd, 7
|
||||
.endm
|
||||
|
||||
.macro xvclip255.h xd, xj
|
||||
xvmaxi.h \xd, \xj, 0
|
||||
xvsat.hu \xd, \xd, 7
|
||||
.endm
|
||||
|
||||
.macro xvclip255.w xd, xj
|
||||
xvmaxi.w \xd, \xj, 0
|
||||
xvsat.wu \xd, \xd, 7
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Description : Store elements of vector
|
||||
* vd : Data vector to be stroed
|
||||
* rk : Address of data storage
|
||||
* ra : Offset of address
|
||||
* si : Index of data in vd
|
||||
*/
|
||||
.macro vstelmx.b vd, rk, ra, si
|
||||
add.d \rk, \rk, \ra
|
||||
vstelm.b \vd, \rk, 0, \si
|
||||
.endm
|
||||
|
||||
.macro vstelmx.h vd, rk, ra, si
|
||||
add.d \rk, \rk, \ra
|
||||
vstelm.h \vd, \rk, 0, \si
|
||||
.endm
|
||||
|
||||
.macro vstelmx.w vd, rk, ra, si
|
||||
add.d \rk, \rk, \ra
|
||||
vstelm.w \vd, \rk, 0, \si
|
||||
.endm
|
||||
|
||||
.macro vstelmx.d vd, rk, ra, si
|
||||
add.d \rk, \rk, \ra
|
||||
vstelm.d \vd, \rk, 0, \si
|
||||
.endm
|
||||
|
||||
.macro vmov xd, xj
|
||||
vor.v \xd, \xj, \xj
|
||||
.endm
|
||||
|
||||
.macro xmov xd, xj
|
||||
xvor.v \xd, \xj, \xj
|
||||
.endm
|
||||
|
||||
.macro xvstelmx.d xd, rk, ra, si
|
||||
add.d \rk, \rk, \ra
|
||||
xvstelm.d \xd, \rk, 0, \si
|
||||
.endm
|
||||
|
||||
/*
|
||||
*============================================================================
|
||||
* LSX/LASX custom macros
|
||||
*============================================================================
|
||||
*/
|
||||
|
||||
/*
|
||||
* Load 4 float, double, V128, v256 elements with stride.
|
||||
*/
|
||||
.macro FLDS_LOADX_4 src, stride, stride2, stride3, out0, out1, out2, out3
|
||||
fld.s \out0, \src, 0
|
||||
fldx.s \out1, \src, \stride
|
||||
fldx.s \out2, \src, \stride2
|
||||
fldx.s \out3, \src, \stride3
|
||||
.endm
|
||||
|
||||
.macro FLDD_LOADX_4 src, stride, stride2, stride3, out0, out1, out2, out3
|
||||
fld.d \out0, \src, 0
|
||||
fldx.d \out1, \src, \stride
|
||||
fldx.d \out2, \src, \stride2
|
||||
fldx.d \out3, \src, \stride3
|
||||
.endm
|
||||
|
||||
.macro LSX_LOADX_4 src, stride, stride2, stride3, out0, out1, out2, out3
|
||||
vld \out0, \src, 0
|
||||
vldx \out1, \src, \stride
|
||||
vldx \out2, \src, \stride2
|
||||
vldx \out3, \src, \stride3
|
||||
.endm
|
||||
|
||||
.macro LASX_LOADX_4 src, stride, stride2, stride3, out0, out1, out2, out3
|
||||
xvld \out0, \src, 0
|
||||
xvldx \out1, \src, \stride
|
||||
xvldx \out2, \src, \stride2
|
||||
xvldx \out3, \src, \stride3
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Description : Transpose 4x4 block with half-word elements in vectors
|
||||
* Arguments : Inputs - in0, in1, in2, in3
|
||||
* Outputs - out0, out1, out2, out3
|
||||
*/
|
||||
.macro LSX_TRANSPOSE4x4_H in0, in1, in2, in3, out0, out1, out2, out3, \
|
||||
tmp0, tmp1
|
||||
vilvl.h \tmp0, \in1, \in0
|
||||
vilvl.h \tmp1, \in3, \in2
|
||||
vilvl.w \out0, \tmp1, \tmp0
|
||||
vilvh.w \out2, \tmp1, \tmp0
|
||||
vilvh.d \out1, \out0, \out0
|
||||
vilvh.d \out3, \out0, \out2
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Description : Transpose 4x4 block with word elements in vectors
|
||||
* Arguments : Inputs - in0, in1, in2, in3
|
||||
* Outputs - out0, out1, out2, out3
|
||||
* Details :
|
||||
* Example :
|
||||
* 1, 2, 3, 4 1, 5, 9,13
|
||||
* 5, 6, 7, 8 to 2, 6,10,14
|
||||
* 9,10,11,12 =====> 3, 7,11,15
|
||||
* 13,14,15,16 4, 8,12,16
|
||||
*/
|
||||
.macro LSX_TRANSPOSE4x4_W in0, in1, in2, in3, out0, out1, out2, out3, \
|
||||
tmp0, tmp1
|
||||
|
||||
vilvl.w \tmp0, \in1, \in0
|
||||
vilvh.w \out1, \in1, \in0
|
||||
vilvl.w \tmp1, \in3, \in2
|
||||
vilvh.w \out3, \in3, \in2
|
||||
|
||||
vilvl.d \out0, \tmp1, \tmp0
|
||||
vilvl.d \out2, \out3, \out1
|
||||
vilvh.d \out3, \out3, \out1
|
||||
vilvh.d \out1, \tmp1, \tmp0
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Description : Transpose 8x8 block with half-word elements in vectors
|
||||
* Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
|
||||
* Outputs - out0, out1, out2, out3, out4, out5, out6, out7
|
||||
*/
|
||||
.macro LSX_TRANSPOSE8x8_H in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
|
||||
out2, out3, out4, out5, out6, out7, tmp0, tmp1, tmp2, \
|
||||
tmp3, tmp4, tmp5, tmp6, tmp7
|
||||
vilvl.h \tmp0, \in6, \in4
|
||||
vilvl.h \tmp1, \in7, \in5
|
||||
vilvl.h \tmp2, \in2, \in0
|
||||
vilvl.h \tmp3, \in3, \in1
|
||||
|
||||
vilvl.h \tmp4, \tmp1, \tmp0
|
||||
vilvh.h \tmp5, \tmp1, \tmp0
|
||||
vilvl.h \tmp6, \tmp3, \tmp2
|
||||
vilvh.h \tmp7, \tmp3, \tmp2
|
||||
|
||||
vilvh.h \tmp0, \in6, \in4
|
||||
vilvh.h \tmp1, \in7, \in5
|
||||
vilvh.h \tmp2, \in2, \in0
|
||||
vilvh.h \tmp3, \in3, \in1
|
||||
|
||||
vpickev.d \out0, \tmp4, \tmp6
|
||||
vpickod.d \out1, \tmp4, \tmp6
|
||||
vpickev.d \out2, \tmp5, \tmp7
|
||||
vpickod.d \out3, \tmp5, \tmp7
|
||||
|
||||
vilvl.h \tmp4, \tmp1, \tmp0
|
||||
vilvh.h \tmp5, \tmp1, \tmp0
|
||||
vilvl.h \tmp6, \tmp3, \tmp2
|
||||
vilvh.h \tmp7, \tmp3, \tmp2
|
||||
|
||||
vpickev.d \out4, \tmp4, \tmp6
|
||||
vpickod.d \out5, \tmp4, \tmp6
|
||||
vpickev.d \out6, \tmp5, \tmp7
|
||||
vpickod.d \out7, \tmp5, \tmp7
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Description : Transpose 16x8 block with byte elements in vectors
|
||||
* Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
|
||||
* Outputs - out0, out1, out2, out3, out4, out5, out6, out7
|
||||
*/
|
||||
.macro LASX_TRANSPOSE16X8_B in0, in1, in2, in3, in4, in5, in6, in7, \
|
||||
in8, in9, in10, in11, in12, in13, in14, in15, \
|
||||
out0, out1, out2, out3, out4, out5, out6, out7,\
|
||||
tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7
|
||||
xvilvl.b \tmp0, \in2, \in0
|
||||
xvilvl.b \tmp1, \in3, \in1
|
||||
xvilvl.b \tmp2, \in6, \in4
|
||||
xvilvl.b \tmp3, \in7, \in5
|
||||
xvilvl.b \tmp4, \in10, \in8
|
||||
xvilvl.b \tmp5, \in11, \in9
|
||||
xvilvl.b \tmp6, \in14, \in12
|
||||
xvilvl.b \tmp7, \in15, \in13
|
||||
xvilvl.b \out0, \tmp1, \tmp0
|
||||
xvilvh.b \out1, \tmp1, \tmp0
|
||||
xvilvl.b \out2, \tmp3, \tmp2
|
||||
xvilvh.b \out3, \tmp3, \tmp2
|
||||
xvilvl.b \out4, \tmp5, \tmp4
|
||||
xvilvh.b \out5, \tmp5, \tmp4
|
||||
xvilvl.b \out6, \tmp7, \tmp6
|
||||
xvilvh.b \out7, \tmp7, \tmp6
|
||||
xvilvl.w \tmp0, \out2, \out0
|
||||
xvilvh.w \tmp2, \out2, \out0
|
||||
xvilvl.w \tmp4, \out3, \out1
|
||||
xvilvh.w \tmp6, \out3, \out1
|
||||
xvilvl.w \tmp1, \out6, \out4
|
||||
xvilvh.w \tmp3, \out6, \out4
|
||||
xvilvl.w \tmp5, \out7, \out5
|
||||
xvilvh.w \tmp7, \out7, \out5
|
||||
xvilvl.d \out0, \tmp1, \tmp0
|
||||
xvilvh.d \out1, \tmp1, \tmp0
|
||||
xvilvl.d \out2, \tmp3, \tmp2
|
||||
xvilvh.d \out3, \tmp3, \tmp2
|
||||
xvilvl.d \out4, \tmp5, \tmp4
|
||||
xvilvh.d \out5, \tmp5, \tmp4
|
||||
xvilvl.d \out6, \tmp7, \tmp6
|
||||
xvilvh.d \out7, \tmp7, \tmp6
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Description : Transpose 4x4 block with half-word elements in vectors
|
||||
* Arguments : Inputs - in0, in1, in2, in3
|
||||
* Outputs - out0, out1, out2, out3
|
||||
*/
|
||||
.macro LASX_TRANSPOSE4x4_H in0, in1, in2, in3, out0, out1, out2, out3, \
|
||||
tmp0, tmp1
|
||||
xvilvl.h \tmp0, \in1, \in0
|
||||
xvilvl.h \tmp1, \in3, \in2
|
||||
xvilvl.w \out0, \tmp1, \tmp0
|
||||
xvilvh.w \out2, \tmp1, \tmp0
|
||||
xvilvh.d \out1, \out0, \out0
|
||||
xvilvh.d \out3, \out0, \out2
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Description : Transpose 4x8 block with half-word elements in vectors
|
||||
* Arguments : Inputs - in0, in1, in2, in3
|
||||
* Outputs - out0, out1, out2, out3
|
||||
*/
|
||||
.macro LASX_TRANSPOSE4x8_H in0, in1, in2, in3, out0, out1, out2, out3, \
|
||||
tmp0, tmp1
|
||||
xvilvl.h \tmp0, \in2, \in0
|
||||
xvilvl.h \tmp1, \in3, \in1
|
||||
xvilvl.h \out2, \tmp1, \tmp0
|
||||
xvilvh.h \out3, \tmp1, \tmp0
|
||||
|
||||
xvilvl.d \out0, \out2, \out2
|
||||
xvilvh.d \out1, \out2, \out2
|
||||
xvilvl.d \out2, \out3, \out3
|
||||
xvilvh.d \out3, \out3, \out3
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Description : Transpose 8x8 block with half-word elements in vectors
|
||||
* Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
|
||||
* Outputs - out0, out1, out2, out3, out4, out5, out6, out7
|
||||
*/
|
||||
.macro LASX_TRANSPOSE8x8_H in0, in1, in2, in3, in4, in5, in6, in7, \
|
||||
out0, out1, out2, out3, out4, out5, out6, out7, \
|
||||
tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7
|
||||
xvilvl.h \tmp0, \in6, \in4
|
||||
xvilvl.h \tmp1, \in7, \in5
|
||||
xvilvl.h \tmp2, \in2, \in0
|
||||
xvilvl.h \tmp3, \in3, \in1
|
||||
|
||||
xvilvl.h \tmp4, \tmp1, \tmp0
|
||||
xvilvh.h \tmp5, \tmp1, \tmp0
|
||||
xvilvl.h \tmp6, \tmp3, \tmp2
|
||||
xvilvh.h \tmp7, \tmp3, \tmp2
|
||||
|
||||
xvilvh.h \tmp0, \in6, \in4
|
||||
xvilvh.h \tmp1, \in7, \in5
|
||||
xvilvh.h \tmp2, \in2, \in0
|
||||
xvilvh.h \tmp3, \in3, \in1
|
||||
|
||||
xvpickev.d \out0, \tmp4, \tmp6
|
||||
xvpickod.d \out1, \tmp4, \tmp6
|
||||
xvpickev.d \out2, \tmp5, \tmp7
|
||||
xvpickod.d \out3, \tmp5, \tmp7
|
||||
|
||||
xvilvl.h \tmp4, \tmp1, \tmp0
|
||||
xvilvh.h \tmp5, \tmp1, \tmp0
|
||||
xvilvl.h \tmp6, \tmp3, \tmp2
|
||||
xvilvh.h \tmp7, \tmp3, \tmp2
|
||||
|
||||
xvpickev.d \out4, \tmp4, \tmp6
|
||||
xvpickod.d \out5, \tmp4, \tmp6
|
||||
xvpickev.d \out6, \tmp5, \tmp7
|
||||
xvpickod.d \out7, \tmp5, \tmp7
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Description : Transpose 2x4x4 block with half-word elements in vectors
|
||||
* Arguments : Inputs - in0, in1, in2, in3
|
||||
* Outputs - out0, out1, out2, out3
|
||||
*/
|
||||
.macro LASX_TRANSPOSE2x4x4_H in0, in1, in2, in3, out0, out1, out2, out3, \
|
||||
tmp0, tmp1, tmp2
|
||||
xvilvh.h \tmp1, \in0, \in1
|
||||
xvilvl.h \out1, \in0, \in1
|
||||
xvilvh.h \tmp0, \in2, \in3
|
||||
xvilvl.h \out3, \in2, \in3
|
||||
|
||||
xvilvh.w \tmp2, \out3, \out1
|
||||
xvilvl.w \out3, \out3, \out1
|
||||
|
||||
xvilvl.w \out2, \tmp0, \tmp1
|
||||
xvilvh.w \tmp1, \tmp0, \tmp1
|
||||
|
||||
xvilvh.d \out0, \out2, \out3
|
||||
xvilvl.d \out2, \out2, \out3
|
||||
xvilvh.d \out1, \tmp1, \tmp2
|
||||
xvilvl.d \out3, \tmp1, \tmp2
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Description : Transpose 4x4 block with word elements in vectors
|
||||
* Arguments : Inputs - in0, in1, in2, in3
|
||||
* Outputs - out0, out1, out2, out3
|
||||
* Details :
|
||||
* Example :
|
||||
* 1, 2, 3, 4, 1, 2, 3, 4 1,5, 9,13, 1,5, 9,13
|
||||
* 5, 6, 7, 8, 5, 6, 7, 8 to 2,6,10,14, 2,6,10,14
|
||||
* 9,10,11,12, 9,10,11,12 =====> 3,7,11,15, 3,7,11,15
|
||||
* 13,14,15,16, 13,14,15,16 4,8,12,16, 4,8,12,16
|
||||
*/
|
||||
.macro LASX_TRANSPOSE4x4_W in0, in1, in2, in3, out0, out1, out2, out3, \
|
||||
tmp0, tmp1
|
||||
|
||||
xvilvl.w \tmp0, \in1, \in0
|
||||
xvilvh.w \out1, \in1, \in0
|
||||
xvilvl.w \tmp1, \in3, \in2
|
||||
xvilvh.w \out3, \in3, \in2
|
||||
|
||||
xvilvl.d \out0, \tmp1, \tmp0
|
||||
xvilvl.d \out2, \out3, \out1
|
||||
xvilvh.d \out3, \out3, \out1
|
||||
xvilvh.d \out1, \tmp1, \tmp0
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Description : Transpose 8x8 block with word elements in vectors
|
||||
* Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
|
||||
* Outputs - out0, out1, out2, out3, out4, out5, out6,
|
||||
* _out7
|
||||
* Example : LASX_TRANSPOSE8x8_W
|
||||
* in0 : 1,2,3,4,5,6,7,8
|
||||
* in1 : 2,2,3,4,5,6,7,8
|
||||
* in2 : 3,2,3,4,5,6,7,8
|
||||
* in3 : 4,2,3,4,5,6,7,8
|
||||
* in4 : 5,2,3,4,5,6,7,8
|
||||
* in5 : 6,2,3,4,5,6,7,8
|
||||
* in6 : 7,2,3,4,5,6,7,8
|
||||
* in7 : 8,2,3,4,5,6,7,8
|
||||
*
|
||||
* out0 : 1,2,3,4,5,6,7,8
|
||||
* out1 : 2,2,2,2,2,2,2,2
|
||||
* out2 : 3,3,3,3,3,3,3,3
|
||||
* out3 : 4,4,4,4,4,4,4,4
|
||||
* out4 : 5,5,5,5,5,5,5,5
|
||||
* out5 : 6,6,6,6,6,6,6,6
|
||||
* out6 : 7,7,7,7,7,7,7,7
|
||||
* out7 : 8,8,8,8,8,8,8,8
|
||||
*/
|
||||
.macro LASX_TRANSPOSE8x8_W in0, in1, in2, in3, in4, in5, in6, in7,\
|
||||
out0, out1, out2, out3, out4, out5, out6, out7,\
|
||||
tmp0, tmp1, tmp2, tmp3
|
||||
xvilvl.w \tmp0, \in2, \in0
|
||||
xvilvl.w \tmp1, \in3, \in1
|
||||
xvilvh.w \tmp2, \in2, \in0
|
||||
xvilvh.w \tmp3, \in3, \in1
|
||||
xvilvl.w \out0, \tmp1, \tmp0
|
||||
xvilvh.w \out1, \tmp1, \tmp0
|
||||
xvilvl.w \out2, \tmp3, \tmp2
|
||||
xvilvh.w \out3, \tmp3, \tmp2
|
||||
|
||||
xvilvl.w \tmp0, \in6, \in4
|
||||
xvilvl.w \tmp1, \in7, \in5
|
||||
xvilvh.w \tmp2, \in6, \in4
|
||||
xvilvh.w \tmp3, \in7, \in5
|
||||
xvilvl.w \out4, \tmp1, \tmp0
|
||||
xvilvh.w \out5, \tmp1, \tmp0
|
||||
xvilvl.w \out6, \tmp3, \tmp2
|
||||
xvilvh.w \out7, \tmp3, \tmp2
|
||||
|
||||
xmov \tmp0, \out0
|
||||
xmov \tmp1, \out1
|
||||
xmov \tmp2, \out2
|
||||
xmov \tmp3, \out3
|
||||
xvpermi.q \out0, \out4, 0x02
|
||||
xvpermi.q \out1, \out5, 0x02
|
||||
xvpermi.q \out2, \out6, 0x02
|
||||
xvpermi.q \out3, \out7, 0x02
|
||||
xvpermi.q \out4, \tmp0, 0x31
|
||||
xvpermi.q \out5, \tmp1, 0x31
|
||||
xvpermi.q \out6, \tmp2, 0x31
|
||||
xvpermi.q \out7, \tmp3, 0x31
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Description : Transpose 4x4 block with double-word elements in vectors
|
||||
* Arguments : Inputs - in0, in1, in2, in3
|
||||
* Outputs - out0, out1, out2, out3
|
||||
* Example : LASX_TRANSPOSE4x4_D
|
||||
* in0 : 1,2,3,4
|
||||
* in1 : 1,2,3,4
|
||||
* in2 : 1,2,3,4
|
||||
* in3 : 1,2,3,4
|
||||
*
|
||||
* out0 : 1,1,1,1
|
||||
* out1 : 2,2,2,2
|
||||
* out2 : 3,3,3,3
|
||||
* out3 : 4,4,4,4
|
||||
*/
|
||||
.macro LASX_TRANSPOSE4x4_D in0, in1, in2, in3, out0, out1, out2, out3, \
|
||||
tmp0, tmp1
|
||||
xvilvl.d \tmp0, \in1, \in0
|
||||
xvilvh.d \out1, \in1, \in0
|
||||
xvilvh.d \tmp1, \in3, \in2
|
||||
xvilvl.d \out2, \in3, \in2
|
||||
|
||||
xvor.v \out0, \tmp0, \tmp0
|
||||
xvor.v \out3, \tmp1, \tmp1
|
||||
|
||||
xvpermi.q \out0, \out2, 0x02
|
||||
xvpermi.q \out2, \tmp0, 0x31
|
||||
xvpermi.q \out3, \out1, 0x31
|
||||
xvpermi.q \out1, \tmp1, 0x02
|
||||
.endm
|
||||
47
common/loongarch/loongson_util.S
Normal file
47
common/loongarch/loongson_util.S
Normal file
@@ -0,0 +1,47 @@
|
||||
/*****************************************************************************
|
||||
* loongson_util.S: loongson utility macros
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2023-2025 x264 project
|
||||
*
|
||||
* Authors: Shiyou Yin <yinshiyou-hf@loongson.cn>
|
||||
* Xiwei Gu <guxiwei-hf@loongson.cn>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#define GLUE(a, b) a ## b
|
||||
#define JOIN(a, b) GLUE(a, b)
|
||||
|
||||
/* Set prefix as needed. */
|
||||
#define ASM_REF JOIN(JOIN(x264_, BIT_DEPTH), _)
|
||||
|
||||
#define FENC_STRIDE 16
|
||||
#define FDEC_STRIDE 32
|
||||
|
||||
.macro function_x264 name, align=DEFAULT_ALIGN
|
||||
.macro endfunc_x264
|
||||
jirl $r0, $r1, 0x0
|
||||
.size ASM_REF\name, . - ASM_REF\name
|
||||
.purgem endfunc_x264
|
||||
.endm
|
||||
.text ;
|
||||
.align \align ;
|
||||
.globl ASM_REF\name ;
|
||||
.type ASM_REF\name, @function ;
|
||||
ASM_REF\name: ;
|
||||
.endm
|
||||
2702
common/loongarch/mc-a.S
Normal file
2702
common/loongarch/mc-a.S
Normal file
File diff suppressed because it is too large
Load Diff
406
common/loongarch/mc-c.c
Normal file
406
common/loongarch/mc-c.c
Normal file
@@ -0,0 +1,406 @@
|
||||
/*****************************************************************************
|
||||
* mc-c.c: loongarch motion compensation
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2023-2025 x264 project
|
||||
*
|
||||
* Authors: Xiwei Gu <guxiwei-hf@loongson.cn>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common/common.h"
|
||||
#include "mc.h"
|
||||
|
||||
#if !HIGH_BIT_DEPTH
|
||||
|
||||
#define MC_WEIGHT_LSX(func) \
|
||||
static void (* mc##func##_wtab_lsx[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int ) = \
|
||||
{ \
|
||||
x264_mc_weight_w4##func##_lsx, \
|
||||
x264_mc_weight_w4##func##_lsx, \
|
||||
x264_mc_weight_w8##func##_lsx, \
|
||||
x264_mc_weight_w16##func##_lsx, \
|
||||
x264_mc_weight_w16##func##_lsx, \
|
||||
x264_mc_weight_w20##func##_lsx, \
|
||||
};
|
||||
|
||||
#define MC_WEIGHT(func) \
|
||||
static void (* mc##func##_wtab_lasx[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int ) = \
|
||||
{ \
|
||||
x264_mc_weight_w4##func##_lasx, \
|
||||
x264_mc_weight_w4##func##_lasx, \
|
||||
x264_mc_weight_w8##func##_lasx, \
|
||||
x264_mc_weight_w16##func##_lasx, \
|
||||
x264_mc_weight_w16##func##_lasx, \
|
||||
x264_mc_weight_w20##func##_lasx, \
|
||||
};
|
||||
|
||||
#if !HIGH_BIT_DEPTH
|
||||
MC_WEIGHT_LSX()
|
||||
MC_WEIGHT_LSX(_noden)
|
||||
MC_WEIGHT()
|
||||
MC_WEIGHT(_noden)
|
||||
#endif
|
||||
|
||||
static void weight_cache_lsx( x264_t *h, x264_weight_t *w )
|
||||
{
|
||||
if ( w->i_denom >= 1)
|
||||
{
|
||||
w->weightfn = mc_wtab_lsx;
|
||||
}
|
||||
else
|
||||
w->weightfn = mc_noden_wtab_lsx;
|
||||
}
|
||||
|
||||
static weight_fn_t mc_weight_wtab_lsx[6] =
|
||||
{
|
||||
x264_mc_weight_w4_lsx,
|
||||
x264_mc_weight_w4_lsx,
|
||||
x264_mc_weight_w8_lsx,
|
||||
x264_mc_weight_w16_lsx,
|
||||
x264_mc_weight_w16_lsx,
|
||||
x264_mc_weight_w20_lsx,
|
||||
};
|
||||
|
||||
static void (* const pixel_avg_wtab_lsx[6])(uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ) =
|
||||
{
|
||||
NULL,
|
||||
x264_pixel_avg2_w4_lsx,
|
||||
x264_pixel_avg2_w8_lsx,
|
||||
x264_pixel_avg2_w16_lsx,
|
||||
x264_pixel_avg2_w16_lsx,
|
||||
x264_pixel_avg2_w20_lsx,
|
||||
};
|
||||
|
||||
static void (* const mc_copy_wtab_lsx[5])( uint8_t *, intptr_t, uint8_t *, intptr_t, int ) =
|
||||
{
|
||||
NULL,
|
||||
x264_mc_copy_w4_lsx,
|
||||
x264_mc_copy_w8_lsx,
|
||||
NULL,
|
||||
x264_mc_copy_w16_lsx,
|
||||
};
|
||||
|
||||
static void weight_cache_lasx( x264_t *h, x264_weight_t *w )
|
||||
{
|
||||
if ( w->i_denom >= 1)
|
||||
{
|
||||
w->weightfn = mc_wtab_lasx;
|
||||
}
|
||||
else
|
||||
w->weightfn = mc_noden_wtab_lasx;
|
||||
}
|
||||
|
||||
static weight_fn_t mc_weight_wtab_lasx[6] =
|
||||
{
|
||||
x264_mc_weight_w4_lasx,
|
||||
x264_mc_weight_w4_lasx,
|
||||
x264_mc_weight_w8_lasx,
|
||||
x264_mc_weight_w16_lasx,
|
||||
x264_mc_weight_w16_lasx,
|
||||
x264_mc_weight_w20_lasx,
|
||||
};
|
||||
|
||||
static void (* const pixel_avg_wtab_lasx[6])(uint8_t *, intptr_t, uint8_t *,
|
||||
intptr_t, uint8_t *, int ) =
|
||||
{
|
||||
NULL,
|
||||
x264_pixel_avg2_w4_lasx,
|
||||
x264_pixel_avg2_w8_lasx,
|
||||
x264_pixel_avg2_w16_lasx,
|
||||
x264_pixel_avg2_w16_lasx,
|
||||
x264_pixel_avg2_w20_lasx,
|
||||
};
|
||||
|
||||
static void (* const mc_copy_wtab_lasx[5])( uint8_t *, intptr_t, uint8_t *,
|
||||
intptr_t, int ) =
|
||||
{
|
||||
NULL,
|
||||
x264_mc_copy_w4_lasx,
|
||||
x264_mc_copy_w8_lasx,
|
||||
NULL,
|
||||
x264_mc_copy_w16_lasx,
|
||||
};
|
||||
|
||||
static uint8_t *get_ref_lsx( uint8_t *p_dst, intptr_t *p_dst_stride,
|
||||
uint8_t *p_src[4], intptr_t i_src_stride,
|
||||
int32_t m_vx, int32_t m_vy,
|
||||
int32_t i_width, int32_t i_height,
|
||||
const x264_weight_t *pWeight )
|
||||
{
|
||||
int32_t i_qpel_idx;
|
||||
int32_t i_offset;
|
||||
uint8_t *p_src1;
|
||||
int32_t r_vy = m_vy & 3;
|
||||
int32_t r_vx = m_vx & 3;
|
||||
int32_t width = i_width >> 2;
|
||||
|
||||
i_qpel_idx = ( r_vy << 2 ) + r_vx;
|
||||
i_offset = ( m_vy >> 2 ) * i_src_stride + ( m_vx >> 2 );
|
||||
p_src1 = p_src[x264_hpel_ref0[i_qpel_idx]] + i_offset +
|
||||
( 3 == r_vy ) * i_src_stride;
|
||||
|
||||
if( i_qpel_idx & 5 )
|
||||
{
|
||||
uint8_t *p_src2 = p_src[x264_hpel_ref1[i_qpel_idx]] +
|
||||
i_offset + ( 3 == r_vx );
|
||||
pixel_avg_wtab_lsx[width](
|
||||
p_dst, *p_dst_stride, p_src1, i_src_stride,
|
||||
p_src2, i_height );
|
||||
|
||||
if( pWeight->weightfn )
|
||||
{
|
||||
pWeight->weightfn[width](p_dst, *p_dst_stride, p_dst, *p_dst_stride, pWeight, i_height);
|
||||
}
|
||||
return p_dst;
|
||||
}
|
||||
else if ( pWeight->weightfn )
|
||||
{
|
||||
pWeight->weightfn[width]( p_dst, *p_dst_stride, p_src1, i_src_stride, pWeight, i_height );
|
||||
return p_dst;
|
||||
}
|
||||
else
|
||||
{
|
||||
*p_dst_stride = i_src_stride;
|
||||
return p_src1;
|
||||
}
|
||||
}
|
||||
|
||||
static void mc_luma_lsx( uint8_t *p_dst, intptr_t i_dst_stride,
|
||||
uint8_t *p_src[4], intptr_t i_src_stride,
|
||||
int32_t m_vx, int32_t m_vy,
|
||||
int32_t i_width, int32_t i_height,
|
||||
const x264_weight_t *pWeight )
|
||||
{
|
||||
int32_t i_qpel_idx;
|
||||
int32_t i_offset;
|
||||
uint8_t *p_src1;
|
||||
|
||||
i_qpel_idx = ( ( m_vy & 3 ) << 2 ) + ( m_vx & 3 );
|
||||
i_offset = ( m_vy >> 2 ) * i_src_stride + ( m_vx >> 2 );
|
||||
p_src1 = p_src[x264_hpel_ref0[i_qpel_idx]] + i_offset +
|
||||
( 3 == ( m_vy & 3 ) ) * i_src_stride;
|
||||
|
||||
if( i_qpel_idx & 5 )
|
||||
{
|
||||
uint8_t *p_src2 = p_src[x264_hpel_ref1[i_qpel_idx]] +
|
||||
i_offset + ( 3 == ( m_vx & 3 ) );
|
||||
|
||||
pixel_avg_wtab_lsx[i_width >> 2](
|
||||
p_dst, i_dst_stride, p_src1, i_src_stride,
|
||||
p_src2, i_height );
|
||||
|
||||
if( pWeight->weightfn )
|
||||
{
|
||||
pWeight->weightfn[i_width>>2]( p_dst, i_dst_stride, p_dst, i_dst_stride, pWeight, i_height );
|
||||
}
|
||||
}
|
||||
else if( pWeight->weightfn )
|
||||
{
|
||||
pWeight->weightfn[i_width>>2]( p_dst, i_dst_stride, p_src1, i_src_stride, pWeight, i_height );
|
||||
}
|
||||
else
|
||||
{
|
||||
mc_copy_wtab_lsx[i_width>>2]( p_dst, i_dst_stride, p_src1, i_src_stride, i_height );
|
||||
}
|
||||
}
|
||||
|
||||
PLANE_INTERLEAVE(lsx)
|
||||
PLANE_COPY_YUYV(32, lsx)
|
||||
|
||||
#define x264_mc_chroma_lsx x264_template(mc_chroma_lsx)
|
||||
void x264_mc_chroma_lsx( uint8_t *p_dst_u, uint8_t *p_dst_v,
|
||||
intptr_t i_dst_stride,
|
||||
uint8_t *p_src, intptr_t i_src_stride,
|
||||
int32_t m_vx, int32_t m_vy,
|
||||
int32_t i_width, int32_t i_height );
|
||||
|
||||
static uint8_t *get_ref_lasx( uint8_t *p_dst, intptr_t *p_dst_stride,
|
||||
uint8_t *p_src[4], intptr_t i_src_stride,
|
||||
int32_t m_vx, int32_t m_vy,
|
||||
int32_t i_width, int32_t i_height,
|
||||
const x264_weight_t *pWeight )
|
||||
{
|
||||
int32_t i_qpel_idx;
|
||||
int32_t i_offset;
|
||||
uint8_t *p_src1;
|
||||
int32_t r_vy = m_vy & 3;
|
||||
int32_t r_vx = m_vx & 3;
|
||||
int32_t width = i_width >> 2;
|
||||
|
||||
i_qpel_idx = ( r_vy << 2 ) + r_vx;
|
||||
i_offset = ( m_vy >> 2 ) * i_src_stride + ( m_vx >> 2 );
|
||||
p_src1 = p_src[x264_hpel_ref0[i_qpel_idx]] + i_offset +
|
||||
( 3 == r_vy ) * i_src_stride;
|
||||
|
||||
if( i_qpel_idx & 5 )
|
||||
{
|
||||
uint8_t *p_src2 = p_src[x264_hpel_ref1[i_qpel_idx]] +
|
||||
i_offset + ( 3 == r_vx );
|
||||
pixel_avg_wtab_lasx[width](
|
||||
p_dst, *p_dst_stride, p_src1, i_src_stride,
|
||||
p_src2, i_height );
|
||||
|
||||
if( pWeight->weightfn )
|
||||
{
|
||||
pWeight->weightfn[width](p_dst, *p_dst_stride, p_dst, *p_dst_stride, pWeight, i_height);
|
||||
}
|
||||
return p_dst;
|
||||
}
|
||||
else if ( pWeight->weightfn )
|
||||
{
|
||||
pWeight->weightfn[width]( p_dst, *p_dst_stride, p_src1, i_src_stride, pWeight, i_height );
|
||||
return p_dst;
|
||||
}
|
||||
else
|
||||
{
|
||||
*p_dst_stride = i_src_stride;
|
||||
return p_src1;
|
||||
}
|
||||
}
|
||||
|
||||
static void mc_luma_lasx( uint8_t *p_dst, intptr_t i_dst_stride,
|
||||
uint8_t *p_src[4], intptr_t i_src_stride,
|
||||
int32_t m_vx, int32_t m_vy,
|
||||
int32_t i_width, int32_t i_height,
|
||||
const x264_weight_t *pWeight )
|
||||
{
|
||||
int32_t i_qpel_idx;
|
||||
int32_t i_offset;
|
||||
uint8_t *p_src1;
|
||||
|
||||
i_qpel_idx = ( ( m_vy & 3 ) << 2 ) + ( m_vx & 3 );
|
||||
i_offset = ( m_vy >> 2 ) * i_src_stride + ( m_vx >> 2 );
|
||||
p_src1 = p_src[x264_hpel_ref0[i_qpel_idx]] + i_offset +
|
||||
( 3 == ( m_vy & 3 ) ) * i_src_stride;
|
||||
|
||||
if( i_qpel_idx & 5 )
|
||||
{
|
||||
uint8_t *p_src2 = p_src[x264_hpel_ref1[i_qpel_idx]] +
|
||||
i_offset + ( 3 == ( m_vx & 3 ) );
|
||||
|
||||
pixel_avg_wtab_lasx[i_width >> 2](
|
||||
p_dst, i_dst_stride, p_src1, i_src_stride,
|
||||
p_src2, i_height );
|
||||
|
||||
if( pWeight->weightfn )
|
||||
{
|
||||
pWeight->weightfn[i_width>>2]( p_dst, i_dst_stride, p_dst, i_dst_stride, pWeight, i_height );
|
||||
}
|
||||
}
|
||||
else if( pWeight->weightfn )
|
||||
{
|
||||
pWeight->weightfn[i_width>>2]( p_dst, i_dst_stride, p_src1, i_src_stride, pWeight, i_height );
|
||||
}
|
||||
else
|
||||
{
|
||||
mc_copy_wtab_lasx[i_width>>2]( p_dst, i_dst_stride, p_src1, i_src_stride, i_height );
|
||||
}
|
||||
}
|
||||
|
||||
PLANE_COPY_YUYV(64, lasx)
|
||||
|
||||
#define x264_mc_chroma_lasx x264_template(mc_chroma_lasx)
|
||||
void x264_mc_chroma_lasx( uint8_t *p_dst_u, uint8_t *p_dst_v,
|
||||
intptr_t i_dst_stride,
|
||||
uint8_t *p_src, intptr_t i_src_stride,
|
||||
int32_t m_vx, int32_t m_vy,
|
||||
int32_t i_width, int32_t i_height );
|
||||
#endif // !HIGH_BIT_DEPTH
|
||||
|
||||
void x264_mc_init_loongarch( int32_t cpu, x264_mc_functions_t *pf )
|
||||
{
|
||||
#if !HIGH_BIT_DEPTH
|
||||
if( cpu & X264_CPU_LSX )
|
||||
{
|
||||
pf->mc_luma = mc_luma_lsx;
|
||||
pf->mc_chroma = x264_mc_chroma_lsx;
|
||||
pf->get_ref = get_ref_lsx;
|
||||
|
||||
pf->avg[PIXEL_16x16]= x264_pixel_avg_16x16_lsx;
|
||||
pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_lsx;
|
||||
pf->avg[PIXEL_8x16] = x264_pixel_avg_8x16_lsx;
|
||||
pf->avg[PIXEL_8x8] = x264_pixel_avg_8x8_lsx;
|
||||
pf->avg[PIXEL_8x4] = x264_pixel_avg_8x4_lsx;
|
||||
pf->avg[PIXEL_4x16] = x264_pixel_avg_4x16_lsx;
|
||||
pf->avg[PIXEL_4x8] = x264_pixel_avg_4x8_lsx;
|
||||
pf->avg[PIXEL_4x4] = x264_pixel_avg_4x4_lsx;
|
||||
pf->avg[PIXEL_4x2] = x264_pixel_avg_4x2_lsx;
|
||||
|
||||
pf->weight = mc_weight_wtab_lsx;
|
||||
pf->offsetadd = mc_weight_wtab_lsx;
|
||||
pf->offsetsub = mc_weight_wtab_lsx;
|
||||
pf->weight_cache = weight_cache_lsx;
|
||||
|
||||
pf->copy_16x16_unaligned = x264_mc_copy_w16_lsx;
|
||||
pf->copy[PIXEL_16x16] = x264_mc_copy_w16_lsx;
|
||||
pf->copy[PIXEL_8x8] = x264_mc_copy_w8_lsx;
|
||||
pf->copy[PIXEL_4x4] = x264_mc_copy_w4_lsx;
|
||||
|
||||
pf->store_interleave_chroma = x264_store_interleave_chroma_lsx;
|
||||
pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_lsx;
|
||||
pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_lsx;
|
||||
|
||||
pf->plane_copy_interleave = plane_copy_interleave_lsx;
|
||||
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_lsx;
|
||||
pf->plane_copy_deinterleave_yuyv = plane_copy_deinterleave_yuyv_lsx;
|
||||
|
||||
pf->hpel_filter = x264_hpel_filter_lsx;
|
||||
pf->memcpy_aligned = x264_memcpy_aligned_lsx;
|
||||
pf->memzero_aligned = x264_memzero_aligned_lsx;
|
||||
pf->frame_init_lowres_core = x264_frame_init_lowres_core_lsx;
|
||||
|
||||
pf->prefetch_fenc_420 = x264_prefetch_fenc_420_lsx;
|
||||
pf->prefetch_fenc_422 = x264_prefetch_fenc_422_lsx;
|
||||
pf->prefetch_ref = x264_prefetch_ref_lsx;
|
||||
}
|
||||
|
||||
if( cpu & X264_CPU_LASX )
|
||||
{
|
||||
pf->mc_luma = mc_luma_lasx;
|
||||
pf->mc_chroma = x264_mc_chroma_lasx;
|
||||
pf->get_ref = get_ref_lasx;
|
||||
|
||||
pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_lasx;
|
||||
pf->avg[PIXEL_8x16] = x264_pixel_avg_8x16_lasx;
|
||||
pf->avg[PIXEL_8x8] = x264_pixel_avg_8x8_lasx;
|
||||
pf->avg[PIXEL_8x4] = x264_pixel_avg_8x4_lasx;
|
||||
pf->avg[PIXEL_4x16] = x264_pixel_avg_4x16_lasx;
|
||||
pf->avg[PIXEL_4x8] = x264_pixel_avg_4x8_lasx;
|
||||
pf->avg[PIXEL_4x4] = x264_pixel_avg_4x4_lasx;
|
||||
pf->avg[PIXEL_4x2] = x264_pixel_avg_4x2_lasx;
|
||||
|
||||
pf->weight = mc_weight_wtab_lasx;
|
||||
pf->offsetadd = mc_weight_wtab_lasx;
|
||||
pf->offsetsub = mc_weight_wtab_lasx;
|
||||
pf->weight_cache = weight_cache_lasx;
|
||||
|
||||
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_lasx;
|
||||
pf->plane_copy_deinterleave_yuyv = plane_copy_deinterleave_yuyv_lasx;
|
||||
|
||||
pf->copy_16x16_unaligned = x264_mc_copy_w16_lasx;
|
||||
pf->copy[PIXEL_16x16] = x264_mc_copy_w16_lasx;
|
||||
pf->copy[PIXEL_8x8] = x264_mc_copy_w8_lasx;
|
||||
pf->copy[PIXEL_4x4] = x264_mc_copy_w4_lasx;
|
||||
|
||||
pf->hpel_filter = x264_hpel_filter_lasx;
|
||||
pf->memzero_aligned = x264_memzero_aligned_lasx;
|
||||
pf->frame_init_lowres_core = x264_frame_init_lowres_core_lasx;
|
||||
}
|
||||
#endif // !HIGH_BIT_DEPTH
|
||||
}
|
||||
196
common/loongarch/mc.h
Normal file
196
common/loongarch/mc.h
Normal file
@@ -0,0 +1,196 @@
|
||||
/*****************************************************************************
|
||||
* mc.h: loongarch motion compensation
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2023-2025 x264 project
|
||||
*
|
||||
* Authors: Xiwei Gu <guxiwei-hf@loongson.cn>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X264_LOONGARCH_MC_H
|
||||
#define X264_LOONGARCH_MC_H
|
||||
|
||||
#define x264_mc_init_loongarch x264_template(mc_init_loongarch)
|
||||
void x264_mc_init_loongarch( int cpu, x264_mc_functions_t *pf );
|
||||
|
||||
#define x264_pixel_avg_16x16_lsx x264_template(pixel_avg_16x16_lsx)
|
||||
void x264_pixel_avg_16x16_lsx( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int );
|
||||
#define x264_pixel_avg_16x8_lsx x264_template(pixel_avg_16x8_lsx)
|
||||
void x264_pixel_avg_16x8_lsx( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int );
|
||||
#define x264_pixel_avg_8x16_lsx x264_template(pixel_avg_8x16_lsx)
|
||||
void x264_pixel_avg_8x16_lsx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
|
||||
#define x264_pixel_avg_8x8_lsx x264_template(pixel_avg_8x8_lsx)
|
||||
void x264_pixel_avg_8x8_lsx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
|
||||
#define x264_pixel_avg_8x4_lsx x264_template(pixel_avg_8x4_lsx)
|
||||
void x264_pixel_avg_8x4_lsx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
|
||||
#define x264_pixel_avg_4x16_lsx x264_template(pixel_avg_4x16_lsx)
|
||||
void x264_pixel_avg_4x16_lsx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
|
||||
#define x264_pixel_avg_4x8_lsx x264_template(pixel_avg_4x8_lsx)
|
||||
void x264_pixel_avg_4x8_lsx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
|
||||
#define x264_pixel_avg_4x4_lsx x264_template(pixel_avg_4x4_lsx)
|
||||
void x264_pixel_avg_4x4_lsx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
|
||||
#define x264_pixel_avg_4x2_lsx x264_template(pixel_avg_4x2_lsx)
|
||||
void x264_pixel_avg_4x2_lsx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
|
||||
|
||||
#define x264_pixel_avg2_w4_lsx x264_template(pixel_avg2_w4_lsx)
|
||||
void x264_pixel_avg2_w4_lsx ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
|
||||
#define x264_pixel_avg2_w8_lsx x264_template(pixel_avg2_w8_lsx)
|
||||
void x264_pixel_avg2_w8_lsx ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
|
||||
#define x264_pixel_avg2_w16_lsx x264_template(pixel_avg2_w16_lsx)
|
||||
void x264_pixel_avg2_w16_lsx ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
|
||||
#define x264_pixel_avg2_w20_lsx x264_template(pixel_avg2_w20_lsx)
|
||||
void x264_pixel_avg2_w20_lsx ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
|
||||
|
||||
#define x264_mc_weight_w20_lsx x264_template(mc_weight_w20_lsx)
|
||||
void x264_mc_weight_w20_lsx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
|
||||
#define x264_mc_weight_w20_noden_lsx x264_template(mc_weight_w20_noden_lsx)
|
||||
void x264_mc_weight_w20_noden_lsx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
|
||||
#define x264_mc_weight_w16_lsx x264_template(mc_weight_w16_lsx)
|
||||
void x264_mc_weight_w16_lsx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
|
||||
#define x264_mc_weight_w16_noden_lsx x264_template(mc_weight_w16_noden_lsx)
|
||||
void x264_mc_weight_w16_noden_lsx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
|
||||
#define x264_mc_weight_w8_lsx x264_template(mc_weight_w8_lsx)
|
||||
void x264_mc_weight_w8_lsx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
|
||||
#define x264_mc_weight_w8_noden_lsx x264_template(mc_weight_w8_noden_lsx)
|
||||
void x264_mc_weight_w8_noden_lsx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
|
||||
#define x264_mc_weight_w4_lsx x264_template(mc_weight_w4_lsx)
|
||||
void x264_mc_weight_w4_lsx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
|
||||
#define x264_mc_weight_w4_noden_lsx x264_template(mc_weight_w4_noden_lsx)
|
||||
void x264_mc_weight_w4_noden_lsx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
|
||||
|
||||
#define x264_mc_copy_w16_lsx x264_template(mc_copy_w16_lsx)
|
||||
void x264_mc_copy_w16_lsx( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
|
||||
#define x264_mc_copy_w8_lsx x264_template(mc_copy_w8_lsx)
|
||||
void x264_mc_copy_w8_lsx( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
|
||||
#define x264_mc_copy_w4_lsx x264_template(mc_copy_w4_lsx)
|
||||
void x264_mc_copy_w4_lsx( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
|
||||
|
||||
#define x264_store_interleave_chroma_lsx x264_template(store_interleave_chroma_lsx)
|
||||
void x264_store_interleave_chroma_lsx( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
|
||||
#define x264_load_deinterleave_chroma_fenc_lsx x264_template(load_deinterleave_chroma_fenc_lsx)
|
||||
void x264_load_deinterleave_chroma_fenc_lsx( pixel *dst, pixel *src, intptr_t i_src, int height );
|
||||
#define x264_load_deinterleave_chroma_fdec_lsx x264_template(load_deinterleave_chroma_fdec_lsx)
|
||||
void x264_load_deinterleave_chroma_fdec_lsx( pixel *dst, pixel *src, intptr_t i_src, int height );
|
||||
|
||||
#define x264_plane_copy_interleave_core_lsx x264_template(plane_copy_interleave_core_lsx)
|
||||
void x264_plane_copy_interleave_core_lsx( pixel *dst, intptr_t i_dst,
|
||||
pixel *srcu, intptr_t i_srcu,
|
||||
pixel *srcv, intptr_t i_srcv, int w, int h );
|
||||
#define x264_plane_copy_deinterleave_lsx x264_template(plane_copy_deinterleave_lsx)
|
||||
void x264_plane_copy_deinterleave_lsx( pixel *dstu, intptr_t i_dstu,
|
||||
pixel *dstv, intptr_t i_dstv,
|
||||
pixel *src, intptr_t i_src, int w, int h );
|
||||
|
||||
#define x264_plane_copy_deinterleave_lasx x264_template(plane_copy_deinterleave_lasx)
|
||||
void x264_plane_copy_deinterleave_lasx( pixel *dstu, intptr_t i_dstu,
|
||||
pixel *dstv, intptr_t i_dstv,
|
||||
pixel *src, intptr_t i_src, int w, int h );
|
||||
|
||||
#define x264_prefetch_fenc_420_lsx x264_template(prefetch_fenc_420_lsx)
|
||||
void x264_prefetch_fenc_420_lsx( uint8_t *pix_y, intptr_t stride_y,
|
||||
uint8_t *pix_uv, intptr_t stride_uv,
|
||||
int32_t mb_x );
|
||||
#define x264_prefetch_fenc_422_lsx x264_template(prefetch_fenc_422_lsx)
|
||||
void x264_prefetch_fenc_422_lsx( uint8_t *pix_y, intptr_t stride_y,
|
||||
uint8_t *pix_uv, intptr_t stride_uv,
|
||||
int32_t mb_x );
|
||||
#define x264_prefetch_ref_lsx x264_template(prefetch_ref_lsx)
|
||||
void x264_prefetch_ref_lsx( uint8_t *pix, intptr_t stride, int32_t parity );
|
||||
|
||||
#define x264_memcpy_aligned_lsx x264_template(memcpy_aligned_lsx)
|
||||
void *x264_memcpy_aligned_lsx( void *dst, const void *src, size_t n );
|
||||
#define x264_memzero_aligned_lsx x264_template(memzero_aligned_lsx)
|
||||
void x264_memzero_aligned_lsx( void *p_dst, size_t n );
|
||||
|
||||
#define x264_hpel_filter_lsx x264_template(hpel_filter_lsx)
|
||||
void x264_hpel_filter_lsx( pixel *, pixel *, pixel *, pixel *, intptr_t, int, int, int16_t * );
|
||||
#define x264_frame_init_lowres_core_lsx x264_template(frame_init_lowres_core_lsx)
|
||||
void x264_frame_init_lowres_core_lsx( uint8_t *, uint8_t *, uint8_t *, uint8_t *,
|
||||
uint8_t *, intptr_t, intptr_t, int, int );
|
||||
|
||||
#define x264_pixel_avg_16x8_lasx x264_template(pixel_avg_16x8_lasx)
|
||||
void x264_pixel_avg_16x8_lasx( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int );
|
||||
#define x264_pixel_avg_8x16_lasx x264_template(pixel_avg_8x16_lasx)
|
||||
void x264_pixel_avg_8x16_lasx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
|
||||
#define x264_pixel_avg_8x8_lasx x264_template(pixel_avg_8x8_lasx)
|
||||
void x264_pixel_avg_8x8_lasx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
|
||||
#define x264_pixel_avg_8x4_lasx x264_template(pixel_avg_8x4_lasx)
|
||||
void x264_pixel_avg_8x4_lasx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
|
||||
#define x264_pixel_avg_4x16_lasx x264_template(pixel_avg_4x16_lasx)
|
||||
void x264_pixel_avg_4x16_lasx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
|
||||
#define x264_pixel_avg_4x8_lasx x264_template(pixel_avg_4x8_lasx)
|
||||
void x264_pixel_avg_4x8_lasx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
|
||||
#define x264_pixel_avg_4x4_lasx x264_template(pixel_avg_4x4_lasx)
|
||||
void x264_pixel_avg_4x4_lasx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
|
||||
#define x264_pixel_avg_4x2_lasx x264_template(pixel_avg_4x2_lasx)
|
||||
void x264_pixel_avg_4x2_lasx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
|
||||
|
||||
#define x264_pixel_avg2_w4_lasx x264_template(pixel_avg2_w4_lasx)
|
||||
void x264_pixel_avg2_w4_lasx ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
|
||||
#define x264_pixel_avg2_w8_lasx x264_template(pixel_avg2_w8_lasx)
|
||||
void x264_pixel_avg2_w8_lasx ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
|
||||
#define x264_pixel_avg2_w16_lasx x264_template(pixel_avg2_w16_lasx)
|
||||
void x264_pixel_avg2_w16_lasx ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
|
||||
#define x264_pixel_avg2_w20_lasx x264_template(pixel_avg2_w20_lasx)
|
||||
void x264_pixel_avg2_w20_lasx ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
|
||||
|
||||
#define x264_mc_weight_w20_lasx x264_template(mc_weight_w20_lasx)
|
||||
void x264_mc_weight_w20_lasx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
|
||||
#define x264_mc_weight_w20_noden_lasx x264_template(mc_weight_w20_noden_lasx)
|
||||
void x264_mc_weight_w20_noden_lasx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
|
||||
#define x264_mc_weight_w16_lasx x264_template(mc_weight_w16_lasx)
|
||||
void x264_mc_weight_w16_lasx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
|
||||
#define x264_mc_weight_w16_noden_lasx x264_template(mc_weight_w16_noden_lasx)
|
||||
void x264_mc_weight_w16_noden_lasx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
|
||||
#define x264_mc_weight_w8_lasx x264_template(mc_weight_w8_lasx)
|
||||
void x264_mc_weight_w8_lasx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
|
||||
#define x264_mc_weight_w8_noden_lasx x264_template(mc_weight_w8_noden_lasx)
|
||||
void x264_mc_weight_w8_noden_lasx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
|
||||
#define x264_mc_weight_w4_lasx x264_template(mc_weight_w4_lasx)
|
||||
void x264_mc_weight_w4_lasx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
|
||||
#define x264_mc_weight_w4_noden_lasx x264_template(mc_weight_w4_noden_lasx)
|
||||
void x264_mc_weight_w4_noden_lasx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
|
||||
|
||||
#define x264_mc_copy_w16_lasx x264_template(mc_copy_w16_lasx)
|
||||
void x264_mc_copy_w16_lasx( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
|
||||
#define x264_mc_copy_w8_lasx x264_template(mc_copy_w8_lasx)
|
||||
void x264_mc_copy_w8_lasx( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
|
||||
#define x264_mc_copy_w4_lasx x264_template(mc_copy_w4_lasx)
|
||||
void x264_mc_copy_w4_lasx( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
|
||||
|
||||
#define x264_plane_copy_interleave_core_lasx x264_template(plane_copy_interleave_core_lasx)
|
||||
void x264_plane_copy_interleave_core_lasx( pixel *dst, intptr_t i_dst,
|
||||
pixel *srcu, intptr_t i_srcu,
|
||||
pixel *srcv, intptr_t i_srcv, int w, int h );
|
||||
|
||||
#define x264_plane_copy_deinterleave_lasx x264_template(plane_copy_deinterleave_lasx)
|
||||
void x264_plane_copy_deinterleave_lasx( pixel *dstu, intptr_t i_dstu,
|
||||
pixel *dstv, intptr_t i_dstv,
|
||||
pixel *src, intptr_t i_src, int w, int h );
|
||||
|
||||
#define x264_memzero_aligned_lasx x264_template(memzero_aligned_lasx)
|
||||
void x264_memzero_aligned_lasx( void *p_dst, size_t n );
|
||||
|
||||
#define x264_hpel_filter_lasx x264_template(hpel_filter_lasx)
|
||||
void x264_hpel_filter_lasx( pixel *, pixel *, pixel *, pixel *, intptr_t, int, int, int16_t * );
|
||||
#define x264_frame_init_lowres_core_lasx x264_template(frame_init_lowres_core_lasx)
|
||||
void x264_frame_init_lowres_core_lasx( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *,
|
||||
intptr_t, intptr_t, int, int );
|
||||
|
||||
#endif
|
||||
3548
common/loongarch/pixel-a.S
Normal file
3548
common/loongarch/pixel-a.S
Normal file
File diff suppressed because it is too large
Load Diff
259
common/loongarch/pixel-c.c
Normal file
259
common/loongarch/pixel-c.c
Normal file
@@ -0,0 +1,259 @@
|
||||
/*****************************************************************************
|
||||
* pixel-c.c: loongarch pixel metrics
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2023-2025 x264 project
|
||||
*
|
||||
* Authors: Hecai Yuan <yuanhecai@loongson.cn>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common/common.h"
|
||||
#include "pixel.h"
|
||||
#include "predict.h"
|
||||
|
||||
#if !HIGH_BIT_DEPTH
|
||||
|
||||
uint64_t x264_pixel_hadamard_ac_8x8_lsx( uint8_t *p_pix, intptr_t i_stride )
|
||||
{
|
||||
uint64_t u_sum;
|
||||
|
||||
u_sum = x264_hadamard_ac_8x8_lsx( p_pix, i_stride );
|
||||
|
||||
return ( ( u_sum >> 34 ) << 32 ) + ( ( uint32_t ) u_sum >> 1 );
|
||||
}
|
||||
|
||||
uint64_t x264_pixel_hadamard_ac_8x16_lsx( uint8_t *p_pix, intptr_t i_stride )
|
||||
{
|
||||
uint64_t u_sum;
|
||||
|
||||
u_sum = x264_hadamard_ac_8x8_lsx( p_pix, i_stride );
|
||||
u_sum += x264_hadamard_ac_8x8_lsx( p_pix + 8 * i_stride, i_stride );
|
||||
|
||||
return ( ( u_sum >> 34 ) << 32 ) + ( ( uint32_t ) u_sum >> 1 );
|
||||
}
|
||||
|
||||
uint64_t x264_pixel_hadamard_ac_16x8_lsx( uint8_t *p_pix, intptr_t i_stride )
|
||||
{
|
||||
uint64_t u_sum;
|
||||
|
||||
u_sum = x264_hadamard_ac_8x8_lsx( p_pix, i_stride );
|
||||
u_sum += x264_hadamard_ac_8x8_lsx( p_pix + 8, i_stride );
|
||||
|
||||
return ( ( u_sum >> 34 ) << 32 ) + ( ( uint32_t ) u_sum >> 1 );
|
||||
}
|
||||
|
||||
uint64_t x264_pixel_hadamard_ac_16x16_lsx( uint8_t *p_pix, intptr_t i_stride )
|
||||
{
|
||||
uint64_t u_sum;
|
||||
|
||||
u_sum = x264_hadamard_ac_8x8_lsx( p_pix, i_stride );
|
||||
u_sum += x264_hadamard_ac_8x8_lsx( p_pix + 8, i_stride );
|
||||
u_sum += x264_hadamard_ac_8x8_lsx( p_pix + 8 * i_stride, i_stride );
|
||||
u_sum += x264_hadamard_ac_8x8_lsx( p_pix + 8 * i_stride + 8, i_stride );
|
||||
|
||||
return ( ( u_sum >> 34 ) << 32 ) + ( ( uint32_t ) u_sum >> 1 );
|
||||
}
|
||||
|
||||
uint64_t x264_pixel_hadamard_ac_8x8_lasx( uint8_t *p_pix, intptr_t i_stride )
|
||||
{
|
||||
uint64_t u_sum;
|
||||
|
||||
u_sum = x264_hadamard_ac_8x8_lasx( p_pix, i_stride );
|
||||
|
||||
return ( ( u_sum >> 34 ) << 32 ) + ( ( uint32_t ) u_sum >> 1 );
|
||||
}
|
||||
|
||||
uint64_t x264_pixel_hadamard_ac_8x16_lasx( uint8_t *p_pix, intptr_t i_stride )
|
||||
{
|
||||
uint64_t u_sum;
|
||||
|
||||
u_sum = x264_hadamard_ac_8x8_lasx( p_pix, i_stride );
|
||||
u_sum += x264_hadamard_ac_8x8_lasx( p_pix + ( i_stride << 3 ), i_stride );
|
||||
|
||||
return ( ( u_sum >> 34 ) << 32 ) + ( ( uint32_t ) u_sum >> 1 );
|
||||
}
|
||||
|
||||
void x264_intra_sa8d_x3_8x8_lsx( uint8_t *p_enc, uint8_t p_edge[36],
|
||||
int32_t p_sad_array[3] )
|
||||
{
|
||||
ALIGNED_ARRAY_16( uint8_t, pix, [8 * FDEC_STRIDE] );
|
||||
|
||||
x264_predict_8x8_v_lsx( pix, p_edge );
|
||||
p_sad_array[0] = x264_pixel_sa8d_8x8_lsx( pix, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
|
||||
x264_predict_8x8_h_lsx( pix, p_edge );
|
||||
p_sad_array[1] = x264_pixel_sa8d_8x8_lsx( pix, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
|
||||
x264_predict_8x8_dc_lsx( pix, p_edge );
|
||||
p_sad_array[2] = x264_pixel_sa8d_8x8_lsx( pix, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
}
|
||||
|
||||
void x264_intra_sa8d_x3_8x8_lasx( uint8_t *p_enc, uint8_t p_edge[36],
|
||||
int32_t p_sad_array[3] )
|
||||
{
|
||||
ALIGNED_ARRAY_16( uint8_t, pix, [8 * FDEC_STRIDE] );
|
||||
|
||||
x264_predict_8x8_v_lsx( pix, p_edge );
|
||||
p_sad_array[0] = x264_pixel_sa8d_8x8_lasx( pix, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
|
||||
x264_predict_8x8_h_lasx( pix, p_edge );
|
||||
p_sad_array[1] = x264_pixel_sa8d_8x8_lasx( pix, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
|
||||
x264_predict_8x8_dc_lsx( pix, p_edge );
|
||||
p_sad_array[2] = x264_pixel_sa8d_8x8_lasx( pix, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
}
|
||||
|
||||
void x264_intra_satd_x3_4x4_lsx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] )
|
||||
{
|
||||
x264_predict_4x4_v_lsx( p_dec );
|
||||
p_sad_array[0] = x264_pixel_satd_4x4_lsx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
|
||||
x264_predict_4x4_h_lsx( p_dec );
|
||||
p_sad_array[1] = x264_pixel_satd_4x4_lsx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
|
||||
x264_predict_4x4_dc_lsx( p_dec );
|
||||
p_sad_array[2] = x264_pixel_satd_4x4_lsx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
}
|
||||
|
||||
void x264_intra_satd_x3_16x16_lsx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] )
|
||||
{
|
||||
x264_predict_16x16_v_lsx( p_dec );
|
||||
p_sad_array[0] = x264_pixel_satd_16x16_lsx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
|
||||
x264_predict_16x16_h_lsx( p_dec );
|
||||
p_sad_array[1] = x264_pixel_satd_16x16_lsx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
|
||||
x264_predict_16x16_dc_lsx( p_dec );
|
||||
p_sad_array[2] = x264_pixel_satd_16x16_lsx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
}
|
||||
|
||||
void x264_intra_satd_x3_16x16_lasx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] )
|
||||
{
|
||||
x264_predict_16x16_v_lsx( p_dec );
|
||||
p_sad_array[0] = x264_pixel_satd_16x16_lasx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
|
||||
x264_predict_16x16_h_lsx( p_dec );
|
||||
p_sad_array[1] = x264_pixel_satd_16x16_lasx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
|
||||
x264_predict_16x16_dc_lsx( p_dec );
|
||||
p_sad_array[2] = x264_pixel_satd_16x16_lasx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
}
|
||||
|
||||
void x264_intra_satd_x3_8x8c_lsx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] )
|
||||
{
|
||||
x264_predict_8x8c_dc_lsx( p_dec );
|
||||
p_sad_array[0] = x264_pixel_satd_8x8_lsx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
|
||||
x264_predict_8x8c_h_lsx( p_dec );
|
||||
p_sad_array[1] = x264_pixel_satd_8x8_lsx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
|
||||
x264_predict_8x8c_v_lsx( p_dec );
|
||||
p_sad_array[2] = x264_pixel_satd_8x8_lsx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
}
|
||||
|
||||
void x264_intra_sad_x3_4x4_lsx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] )
|
||||
{
|
||||
x264_predict_4x4_v_lsx( p_dec );
|
||||
p_sad_array[0] = x264_pixel_sad_4x4_lsx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
|
||||
x264_predict_4x4_h_lsx( p_dec );
|
||||
p_sad_array[1] = x264_pixel_sad_4x4_lsx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
|
||||
x264_predict_4x4_dc_lsx( p_dec );
|
||||
p_sad_array[2] = x264_pixel_sad_4x4_lsx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
}
|
||||
|
||||
void x264_intra_sad_x3_16x16_lsx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] )
|
||||
{
|
||||
x264_predict_16x16_v_lsx( p_dec );
|
||||
p_sad_array[0] = x264_pixel_sad_16x16_lsx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
|
||||
x264_predict_16x16_h_lsx( p_dec );
|
||||
p_sad_array[1] = x264_pixel_sad_16x16_lsx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
|
||||
x264_predict_16x16_dc_lsx( p_dec );
|
||||
p_sad_array[2] = x264_pixel_sad_16x16_lsx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
}
|
||||
|
||||
void x264_intra_sad_x3_8x8_lsx( uint8_t *p_enc, uint8_t p_edge[36],
|
||||
int32_t p_sad_array[3] )
|
||||
{
|
||||
ALIGNED_ARRAY_16( uint8_t, pix, [8 * FDEC_STRIDE] );
|
||||
|
||||
x264_predict_8x8_v_lsx( pix, p_edge );
|
||||
p_sad_array[0] = x264_pixel_sad_8x8_lsx( pix, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
|
||||
x264_predict_8x8_h_lsx( pix, p_edge );
|
||||
p_sad_array[1] = x264_pixel_sad_8x8_lsx( pix, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
|
||||
x264_predict_8x8_dc_lsx( pix, p_edge );
|
||||
p_sad_array[2] = x264_pixel_sad_8x8_lsx( pix, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
}
|
||||
|
||||
void x264_intra_sad_x3_8x8c_lsx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] )
|
||||
{
|
||||
x264_predict_8x8c_dc_lsx( p_dec );
|
||||
p_sad_array[0] = x264_pixel_sad_8x8_lsx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
|
||||
x264_predict_8x8c_h_lsx( p_dec );
|
||||
p_sad_array[1] = x264_pixel_sad_8x8_lsx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
|
||||
x264_predict_8x8c_v_lsx( p_dec );
|
||||
p_sad_array[2] = x264_pixel_sad_8x8_lsx( p_dec, FDEC_STRIDE,
|
||||
p_enc, FENC_STRIDE );
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
335
common/loongarch/pixel.h
Normal file
335
common/loongarch/pixel.h
Normal file
@@ -0,0 +1,335 @@
|
||||
/*****************************************************************************
|
||||
* pixel.h: loongarch pixel metrics
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2023-2025 x264 project
|
||||
*
|
||||
* Authors: Lu Wang <wanglu@loongson.cn>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X264_LOONGARCH_PIXEL_H
|
||||
#define X264_LOONGARCH_PIXEL_H
|
||||
|
||||
#define x264_pixel_satd_4x4_lsx x264_template(pixel_satd_4x4_lsx)
|
||||
int32_t x264_pixel_satd_4x4_lsx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_4x8_lsx x264_template(pixel_satd_4x8_lsx)
|
||||
int32_t x264_pixel_satd_4x8_lsx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_4x16_lsx x264_template(pixel_satd_4x16_lsx)
|
||||
int32_t x264_pixel_satd_4x16_lsx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_8x4_lsx x264_template(pixel_satd_8x4_lsx)
|
||||
int32_t x264_pixel_satd_8x4_lsx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_8x8_lsx x264_template(pixel_satd_8x8_lsx)
|
||||
int32_t x264_pixel_satd_8x8_lsx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_8x16_lsx x264_template(pixel_satd_8x16_lsx)
|
||||
int32_t x264_pixel_satd_8x16_lsx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_16x8_lsx x264_template(pixel_satd_16x8_lsx)
|
||||
int32_t x264_pixel_satd_16x8_lsx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_16x16_lsx x264_template(pixel_satd_16x16_lsx)
|
||||
int32_t x264_pixel_satd_16x16_lsx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
|
||||
#define x264_pixel_satd_4x8_lasx x264_template(pixel_satd_4x8_lasx)
|
||||
int32_t x264_pixel_satd_4x8_lasx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_4x16_lasx x264_template(pixel_satd_4x16_lasx)
|
||||
int32_t x264_pixel_satd_4x16_lasx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_8x4_lasx x264_template(pixel_satd_8x4_lasx)
|
||||
int32_t x264_pixel_satd_8x4_lasx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_8x8_lasx x264_template(pixel_satd_8x8_lasx)
|
||||
int32_t x264_pixel_satd_8x8_lasx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_8x16_lasx x264_template(pixel_satd_8x16_lasx)
|
||||
int32_t x264_pixel_satd_8x16_lasx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_16x8_lasx x264_template(pixel_satd_16x8_lasx)
|
||||
int32_t x264_pixel_satd_16x8_lasx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_satd_16x16_lasx x264_template(pixel_satd_16x16_lasx)
|
||||
int32_t x264_pixel_satd_16x16_lasx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
|
||||
#define x264_pixel_sad_x4_16x16_lsx x264_template(pixel_sad_x4_16x16_lsx)
|
||||
void x264_pixel_sad_x4_16x16_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
#define x264_pixel_sad_x4_16x8_lsx x264_template(pixel_sad_x4_16x8_lsx)
|
||||
void x264_pixel_sad_x4_16x8_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
#define x264_pixel_sad_x4_8x16_lsx x264_template(pixel_sad_x4_8x16_lsx)
|
||||
void x264_pixel_sad_x4_8x16_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
#define x264_pixel_sad_x4_8x8_lsx x264_template(pixel_sad_x4_8x8_lsx)
|
||||
void x264_pixel_sad_x4_8x8_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
#define x264_pixel_sad_x4_8x4_lsx x264_template(pixel_sad_x4_8x4_lsx)
|
||||
void x264_pixel_sad_x4_8x4_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
#define x264_pixel_sad_x4_4x8_lsx x264_template(pixel_sad_x4_4x8_lsx)
|
||||
void x264_pixel_sad_x4_4x8_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
|
||||
#define x264_pixel_sad_x4_16x16_lasx x264_template(pixel_sad_x4_16x16_lasx)
|
||||
void x264_pixel_sad_x4_16x16_lasx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
#define x264_pixel_sad_x4_16x8_lasx x264_template(pixel_sad_x4_16x8_lasx)
|
||||
void x264_pixel_sad_x4_16x8_lasx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
#define x264_pixel_sad_x4_8x8_lasx x264_template(pixel_sad_x4_8x8_lasx)
|
||||
void x264_pixel_sad_x4_8x8_lasx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
#define x264_pixel_sad_x4_8x4_lasx x264_template(pixel_sad_x4_8x4_lasx)
|
||||
void x264_pixel_sad_x4_8x4_lasx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
#define x264_pixel_sad_x4_4x4_lsx x264_template(pixel_sad_x4_4x4_lsx)
|
||||
void x264_pixel_sad_x4_4x4_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
uint8_t *p_ref3, intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[4] );
|
||||
|
||||
#define x264_pixel_sad_x3_16x16_lsx x264_template(pixel_sad_x3_16x16_lsx)
|
||||
void x264_pixel_sad_x3_16x16_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_pixel_sad_x3_16x8_lsx x264_template(pixel_sad_x3_16x8_lsx)
|
||||
void x264_pixel_sad_x3_16x8_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_pixel_sad_x3_8x16_lsx x264_template(pixel_sad_x3_8x16_lsx)
|
||||
void x264_pixel_sad_x3_8x16_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_pixel_sad_x3_8x8_lsx x264_template(pixel_sad_x3_8x8_lsx)
|
||||
void x264_pixel_sad_x3_8x8_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_pixel_sad_x3_8x4_lsx x264_template(pixel_sad_x3_8x4_lsx)
|
||||
void x264_pixel_sad_x3_8x4_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_pixel_sad_x3_4x4_lsx x264_template(pixel_sad_x3_4x4_lsx)
|
||||
void x264_pixel_sad_x3_4x4_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_pixel_sad_x3_4x8_lsx x264_template(pixel_sad_x3_4x8_lsx)
|
||||
void x264_pixel_sad_x3_4x8_lsx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[3] );
|
||||
|
||||
#define x264_pixel_sad_x3_16x16_lasx x264_template(pixel_sad_x3_16x16_lasx)
|
||||
void x264_pixel_sad_x3_16x16_lasx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_pixel_sad_x3_16x8_lasx x264_template(pixel_sad_x3_16x8_lasx)
|
||||
void x264_pixel_sad_x3_16x8_lasx( uint8_t *p_src, uint8_t *p_ref0,
|
||||
uint8_t *p_ref1, uint8_t *p_ref2,
|
||||
intptr_t i_ref_stride,
|
||||
int32_t p_sad_array[3] );
|
||||
|
||||
#define x264_pixel_sad_16x16_lsx x264_template(pixel_sad_16x16_lsx)
|
||||
int32_t x264_pixel_sad_16x16_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_sad_16x8_lsx x264_template(pixel_sad_16x8_lsx)
|
||||
int32_t x264_pixel_sad_16x8_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_sad_8x16_lsx x264_template(pixel_sad_8x16_lsx)
|
||||
int32_t x264_pixel_sad_8x16_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_sad_8x8_lsx x264_template(pixel_sad_8x8_lsx)
|
||||
int32_t x264_pixel_sad_8x8_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_sad_8x4_lsx x264_template(pixel_sad_8x4_lsx)
|
||||
int32_t x264_pixel_sad_8x4_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_sad_4x16_lsx x264_template(pixel_sad_4x16_lsx)
|
||||
int32_t x264_pixel_sad_4x16_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_sad_4x8_lsx x264_template(pixel_sad_4x8_lsx)
|
||||
int32_t x264_pixel_sad_4x8_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_sad_4x4_lsx x264_template(pixel_sad_4x4_lsx)
|
||||
int32_t x264_pixel_sad_4x4_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
|
||||
#define x264_pixel_sad_8x4_lasx x264_template(pixel_sad_8x4_lasx)
|
||||
int32_t x264_pixel_sad_8x4_lasx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
|
||||
#define x264_hadamard_ac_8x8_lsx x264_template(hadamard_ac_8x8_lsx)
|
||||
uint64_t x264_hadamard_ac_8x8_lsx( uint8_t *p_pix, intptr_t i_stride );
|
||||
#define x264_pixel_hadamard_ac_8x8_lsx x264_template(pixel_hadamard_ac_8x8_lsx)
|
||||
uint64_t x264_pixel_hadamard_ac_8x8_lsx( uint8_t *p_pix, intptr_t i_stride );
|
||||
#define x264_pixel_hadamard_ac_8x16_lsx x264_template(pixel_hadamard_ac_8x16_lsx)
|
||||
uint64_t x264_pixel_hadamard_ac_8x16_lsx( uint8_t *p_pix, intptr_t i_stride );
|
||||
#define x264_pixel_hadamard_ac_16x8_lsx x264_template(pixel_hadamard_ac_16x8_lsx)
|
||||
uint64_t x264_pixel_hadamard_ac_16x8_lsx( uint8_t *p_pix, intptr_t i_stride );
|
||||
#define x264_pixel_hadamard_ac_16x16_lsx x264_template(pixel_hadamard_ac_16x16_lsx)
|
||||
uint64_t x264_pixel_hadamard_ac_16x16_lsx( uint8_t *p_pix, intptr_t i_stride );
|
||||
|
||||
#define x264_hadamard_ac_8x8_lasx x264_template(hadamard_ac_8x8_lasx)
|
||||
uint64_t x264_hadamard_ac_8x8_lasx( uint8_t *p_pix, intptr_t i_stride );
|
||||
#define x264_pixel_hadamard_ac_8x8_lasx x264_template(pixel_hadamard_ac_8x8_lasx)
|
||||
uint64_t x264_pixel_hadamard_ac_8x8_lasx( uint8_t *p_pix, intptr_t i_stride );
|
||||
#define x264_pixel_hadamard_ac_8x16_lasx x264_template(pixel_hadamard_ac_8x16_lasx)
|
||||
uint64_t x264_pixel_hadamard_ac_8x16_lasx( uint8_t *p_pix, intptr_t i_stride );
|
||||
#define x264_pixel_hadamard_ac_16x8_lasx x264_template(pixel_hadamard_ac_16x8_lasx)
|
||||
uint64_t x264_pixel_hadamard_ac_16x8_lasx( uint8_t *p_pix, intptr_t i_stride );
|
||||
#define x264_pixel_hadamard_ac_16x16_lasx x264_template(pixel_hadamard_ac_16x16_lasx)
|
||||
uint64_t x264_pixel_hadamard_ac_16x16_lasx( uint8_t *p_pix, intptr_t i_stride );
|
||||
|
||||
#define x264_intra_satd_x3_16x16_lsx x264_template(intra_satd_x3_16x16_lsx)
|
||||
void x264_intra_satd_x3_16x16_lsx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_intra_satd_x3_8x8c_lsx x264_template(intra_satd_x3_8x8c_lsx)
|
||||
void x264_intra_satd_x3_8x8c_lsx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_intra_satd_x3_4x4_lsx x264_template(intra_satd_x3_4x4_lsx)
|
||||
void x264_intra_satd_x3_4x4_lsx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_intra_satd_x3_16x16_lasx x264_template(intra_satd_x3_16x16_lasx)
|
||||
void x264_intra_satd_x3_16x16_lasx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] );
|
||||
|
||||
#define x264_pixel_ssd_16x16_lsx x264_template(pixel_ssd_16x16_lsx)
|
||||
int32_t x264_pixel_ssd_16x16_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_ssd_16x8_lsx x264_template(pixel_ssd_16x8_lsx)
|
||||
int32_t x264_pixel_ssd_16x8_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_ssd_8x16_lsx x264_template(pixel_ssd_8x16_lsx)
|
||||
int32_t x264_pixel_ssd_8x16_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_ssd_8x8_lsx x264_template(pixel_ssd_8x8_lsx)
|
||||
int32_t x264_pixel_ssd_8x8_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_ssd_8x4_lsx x264_template(pixel_ssd_8x4_lsx)
|
||||
int32_t x264_pixel_ssd_8x4_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_ssd_4x16_lsx x264_template(pixel_ssd_4x16_lsx)
|
||||
int32_t x264_pixel_ssd_4x16_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_ssd_4x8_lsx x264_template(pixel_ssd_4x8_lsx)
|
||||
int32_t x264_pixel_ssd_4x8_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_ssd_4x4_lsx x264_template(pixel_ssd_4x4_lsx)
|
||||
int32_t x264_pixel_ssd_4x4_lsx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
|
||||
#define x264_pixel_ssd_16x16_lasx x264_template(pixel_ssd_16x16_lasx)
|
||||
int32_t x264_pixel_ssd_16x16_lasx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_ssd_16x8_lasx x264_template(pixel_ssd_16x8_lasx)
|
||||
int32_t x264_pixel_ssd_16x8_lasx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_ssd_8x16_lasx x264_template(pixel_ssd_8x16_lasx)
|
||||
int32_t x264_pixel_ssd_8x16_lasx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
#define x264_pixel_ssd_8x8_lasx x264_template(pixel_ssd_8x8_lasx)
|
||||
int32_t x264_pixel_ssd_8x8_lasx( uint8_t *p_src, intptr_t i_src_stride,
|
||||
uint8_t *p_ref, intptr_t i_ref_stride );
|
||||
|
||||
#define x264_pixel_var2_8x16_lsx x264_template(pixel_var2_8x16_lsx)
|
||||
int32_t x264_pixel_var2_8x16_lsx( uint8_t *p_pix1, uint8_t *p_pix2,
|
||||
int32_t ssd[2] );
|
||||
#define x264_pixel_var2_8x8_lsx x264_template(pixel_var2_8x8_lsx)
|
||||
int32_t x264_pixel_var2_8x8_lsx( uint8_t *p_pix1, uint8_t *p_pix2,
|
||||
int32_t ssd[2] );
|
||||
#define x264_pixel_var_16x16_lsx x264_template(pixel_var_16x16_lsx)
|
||||
uint64_t x264_pixel_var_16x16_lsx( uint8_t *p_pix, intptr_t i_stride );
|
||||
#define x264_pixel_var_8x16_lsx x264_template(pixel_var_8x16_lsx)
|
||||
uint64_t x264_pixel_var_8x16_lsx( uint8_t *p_pix, intptr_t i_stride );
|
||||
#define x264_pixel_var_8x8_lsx x264_template(pixel_var_8x8_lsx)
|
||||
uint64_t x264_pixel_var_8x8_lsx( uint8_t *p_pix, intptr_t i_stride );
|
||||
|
||||
#define x264_pixel_var2_8x16_lasx x264_template(pixel_var2_8x16_lasx)
|
||||
int32_t x264_pixel_var2_8x16_lasx( uint8_t *p_pix1, uint8_t *p_pix2,
|
||||
int32_t ssd[2] );
|
||||
#define x264_pixel_var2_8x8_lasx x264_template(pixel_var2_8x8_lasx)
|
||||
int32_t x264_pixel_var2_8x8_lasx( uint8_t *p_pix1, uint8_t *p_pix2,
|
||||
int32_t ssd[2] );
|
||||
|
||||
#define x264_pixel_sa8d_8x8_lsx x264_template(pixel_sa8d_8x8_lsx)
|
||||
int32_t x264_pixel_sa8d_8x8_lsx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_sa8d_16x16_lsx x264_template(pixel_sa8d_16x16_lsx)
|
||||
int32_t x264_pixel_sa8d_16x16_lsx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
|
||||
#define x264_intra_sa8d_x3_8x8_lsx x264_template(intra_sa8d_x3_8x8_lsx)
|
||||
void x264_intra_sa8d_x3_8x8_lsx( uint8_t *p_enc, uint8_t p_edge[36],
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_intra_sa8d_x3_8x8_lasx x264_template(intra_sa8d_x3_8x8_lasx)
|
||||
void x264_intra_sa8d_x3_8x8_lasx( uint8_t *p_enc, uint8_t p_edge[36],
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_pixel_sa8d_8x8_lasx x264_template(pixel_sa8d_8x8_lasx)
|
||||
int32_t x264_pixel_sa8d_8x8_lasx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
#define x264_pixel_sa8d_16x16_lasx x264_template(pixel_sa8d_16x16_lasx)
|
||||
int32_t x264_pixel_sa8d_16x16_lasx( uint8_t *p_pix1, intptr_t i_stride,
|
||||
uint8_t *p_pix2, intptr_t i_stride2 );
|
||||
|
||||
#define x264_intra_sad_x3_16x16_lsx x264_template(intra_sad_x3_16x16_lsx)
|
||||
void x264_intra_sad_x3_16x16_lsx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_intra_sad_x3_8x8_lsx x264_template(intra_sad_x3_8x8_lsx)
|
||||
void x264_intra_sad_x3_8x8_lsx( uint8_t *p_enc, uint8_t p_edge[36],
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_intra_sad_x3_8x8c_lsx x264_template(intra_sad_x3_8x8c_lsx)
|
||||
void x264_intra_sad_x3_8x8c_lsx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] );
|
||||
#define x264_intra_sad_x3_4x4_lsx x264_template(intra_sad_x3_4x4_lsx)
|
||||
void x264_intra_sad_x3_4x4_lsx( uint8_t *p_enc, uint8_t *p_dec,
|
||||
int32_t p_sad_array[3] );
|
||||
|
||||
#endif
|
||||
1383
common/loongarch/predict-a.S
Normal file
1383
common/loongarch/predict-a.S
Normal file
File diff suppressed because it is too large
Load Diff
106
common/loongarch/predict-c.c
Normal file
106
common/loongarch/predict-c.c
Normal file
@@ -0,0 +1,106 @@
|
||||
/*****************************************************************************
|
||||
* predict-c.c: loongarch intra prediction
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2023-2025 x264 project
|
||||
*
|
||||
* Authors: Xiwei Gu <guxiwei-hf@loongson.cn>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common/common.h"
|
||||
#include "predict.h"
|
||||
|
||||
void x264_predict_16x16_init_loongarch( int cpu, x264_predict_t pf[7] )
|
||||
{
|
||||
#if !HIGH_BIT_DEPTH
|
||||
if( cpu&X264_CPU_LSX )
|
||||
{
|
||||
pf[I_PRED_16x16_V ] = x264_predict_16x16_v_lsx;
|
||||
pf[I_PRED_16x16_H ] = x264_predict_16x16_h_lsx;
|
||||
pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_lsx;
|
||||
pf[I_PRED_16x16_DC_LEFT]= x264_predict_16x16_dc_left_lsx;
|
||||
pf[I_PRED_16x16_DC_TOP ]= x264_predict_16x16_dc_top_lsx;
|
||||
pf[I_PRED_16x16_DC_128 ]= x264_predict_16x16_dc_128_lsx;
|
||||
pf[I_PRED_16x16_P ] = x264_predict_16x16_p_lsx;
|
||||
}
|
||||
if( cpu&X264_CPU_LASX )
|
||||
{
|
||||
pf[I_PRED_16x16_P ] = x264_predict_16x16_p_lasx;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void x264_predict_8x8c_init_loongarch( int cpu, x264_predict_t pf[7] )
|
||||
{
|
||||
#if !HIGH_BIT_DEPTH
|
||||
if( cpu&X264_CPU_LSX )
|
||||
{
|
||||
pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_lsx;
|
||||
pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_lsx;
|
||||
pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_lsx;
|
||||
pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_lsx;
|
||||
pf[I_PRED_CHROMA_DC_128] = x264_predict_8x8c_dc_128_lsx;
|
||||
pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x8c_dc_top_lsx;
|
||||
pf[I_PRED_CHROMA_DC_LEFT]= x264_predict_8x8c_dc_left_lsx;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void x264_predict_8x8_init_loongarch( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter )
|
||||
{
|
||||
#if !HIGH_BIT_DEPTH
|
||||
if( cpu&X264_CPU_LSX )
|
||||
{
|
||||
pf[I_PRED_8x8_V] = x264_predict_8x8_v_lsx;
|
||||
pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_lsx;
|
||||
pf[I_PRED_8x8_DC_LEFT]= x264_predict_8x8_dc_left_lsx;
|
||||
pf[I_PRED_8x8_DC_TOP] = x264_predict_8x8_dc_top_lsx;
|
||||
pf[I_PRED_8x8_DC_128] = x264_predict_8x8_dc_128_lsx;
|
||||
pf[I_PRED_8x8_H] = x264_predict_8x8_h_lsx;
|
||||
pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_lsx;
|
||||
pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_lsx;
|
||||
pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_lsx;
|
||||
pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_lsx;
|
||||
}
|
||||
if( cpu&X264_CPU_LASX )
|
||||
{
|
||||
pf[I_PRED_8x8_H] = x264_predict_8x8_h_lasx;
|
||||
pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_lasx;
|
||||
pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_lasx;
|
||||
pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_lasx;
|
||||
pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_lasx;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void x264_predict_4x4_init_loongarch( int cpu, x264_predict_t pf[12] )
|
||||
{
|
||||
#if !HIGH_BIT_DEPTH
|
||||
if( cpu&X264_CPU_LSX )
|
||||
{
|
||||
pf[I_PRED_4x4_V] = x264_predict_4x4_v_lsx;
|
||||
pf[I_PRED_4x4_H] = x264_predict_4x4_h_lsx;
|
||||
pf[I_PRED_4x4_DC] = x264_predict_4x4_dc_lsx;
|
||||
pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_lsx;
|
||||
pf[I_PRED_4x4_DC_LEFT]= x264_predict_4x4_dc_left_lsx;
|
||||
pf[I_PRED_4x4_DC_TOP] = x264_predict_4x4_dc_top_lsx;
|
||||
pf[I_PRED_4x4_DC_128] = x264_predict_4x4_dc_128_lsx;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
150
common/loongarch/predict.h
Normal file
150
common/loongarch/predict.h
Normal file
@@ -0,0 +1,150 @@
|
||||
/*****************************************************************************
|
||||
* predict.h: loongarch intra prediction
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2023-2025 x264 project
|
||||
*
|
||||
* Authors: Xiwei Gu <guxiwei-hf@loongson.cn>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X264_LOONGARCH_PREDICT_H
|
||||
#define X264_LOONGARCH_PREDICT_H
|
||||
|
||||
#define x264_predict_8x8c_p_lsx x264_template(predict_8x8c_p_lsx)
|
||||
void x264_predict_8x8c_p_lsx(uint8_t *p_src);
|
||||
|
||||
#define x264_predict_8x8c_v_lsx x264_template(predict_8x8c_v_lsx)
|
||||
void x264_predict_8x8c_v_lsx(uint8_t *p_src);
|
||||
|
||||
#define x264_predict_8x8c_h_lsx x264_template(predict_8x8c_h_lsx)
|
||||
void x264_predict_8x8c_h_lsx(uint8_t *p_src);
|
||||
|
||||
#define x264_predict_8x8c_dc_lsx x264_template(predict_8x8c_dc_lsx)
|
||||
void x264_predict_8x8c_dc_lsx(pixel *src);
|
||||
|
||||
#define x264_predict_8x8c_dc_128_lsx x264_template(predict_8x8c_dc_128_lsx)
|
||||
void x264_predict_8x8c_dc_128_lsx(pixel *src);
|
||||
|
||||
#define x264_predict_8x8c_dc_top_lsx x264_template(predict_8x8c_dc_top_lsx)
|
||||
void x264_predict_8x8c_dc_top_lsx(pixel *src);
|
||||
|
||||
#define x264_predict_8x8c_dc_left_lsx x264_template(predict_8x8c_dc_left_lsx)
|
||||
void x264_predict_8x8c_dc_left_lsx(pixel *src);
|
||||
|
||||
#define x264_predict_16x16_dc_lsx x264_template(predict_16x16_dc_lsx)
|
||||
void x264_predict_16x16_dc_lsx( pixel *src );
|
||||
|
||||
#define x264_predict_16x16_dc_left_lsx x264_template(predict_16x16_dc_left_lsx)
|
||||
void x264_predict_16x16_dc_left_lsx( pixel *src );
|
||||
|
||||
#define x264_predict_16x16_dc_top_lsx x264_template(predict_16x16_dc_top_lsx)
|
||||
void x264_predict_16x16_dc_top_lsx( pixel *src );
|
||||
|
||||
#define x264_predict_16x16_dc_128_lsx x264_template(predict_16x16_dc_128_lsx)
|
||||
void x264_predict_16x16_dc_128_lsx( pixel *src );
|
||||
|
||||
#define x264_predict_16x16_h_lsx x264_template(predict_16x16_h_lsx)
|
||||
void x264_predict_16x16_h_lsx( pixel *src );
|
||||
|
||||
#define x264_predict_16x16_v_lsx x264_template(predict_16x16_v_lsx)
|
||||
void x264_predict_16x16_v_lsx( pixel *src );
|
||||
|
||||
#define x264_predict_16x16_p_lasx x264_template(predict_16x16_p_lasx)
|
||||
void x264_predict_16x16_p_lasx( pixel *src );
|
||||
|
||||
#define x264_predict_16x16_p_lsx x264_template(predict_16x16_p_lsx)
|
||||
void x264_predict_16x16_p_lsx( pixel *src );
|
||||
|
||||
#define x264_predict_8x8_v_lsx x264_template(predict_8x8_v_lsx)
|
||||
void x264_predict_8x8_v_lsx( pixel *src, pixel edge[36] );
|
||||
|
||||
#define x264_predict_8x8_h_lasx x264_template(predict_8x8_h_lasx)
|
||||
void x264_predict_8x8_h_lasx( pixel *src, pixel edge[36] );
|
||||
|
||||
#define x264_predict_8x8_h_lsx x264_template(predict_8x8_h_lsx)
|
||||
void x264_predict_8x8_h_lsx( pixel *src, pixel edge[36] );
|
||||
|
||||
#define x264_predict_8x8_dc_lsx x264_template(predict_8x8_dc_lsx)
|
||||
void x264_predict_8x8_dc_lsx( pixel *src, pixel edge[36] );
|
||||
|
||||
#define x264_predict_8x8_dc_left_lsx x264_template(predict_8x8_dc_left_lsx)
|
||||
void x264_predict_8x8_dc_left_lsx( pixel *src, pixel edge[36] );
|
||||
|
||||
#define x264_predict_8x8_dc_top_lsx x264_template(predict_8x8_dc_top_lsx)
|
||||
void x264_predict_8x8_dc_top_lsx( pixel *src, pixel edge[36] );
|
||||
|
||||
#define x264_predict_8x8_dc_128_lsx x264_template(predict_8x8_dc_128_lsx)
|
||||
void x264_predict_8x8_dc_128_lsx( pixel *src, pixel edge[36] );
|
||||
|
||||
#define x264_predict_8x8_ddl_lasx x264_template(predict_8x8_ddl_lasx)
|
||||
void x264_predict_8x8_ddl_lasx( pixel *src, pixel edge[36] );
|
||||
|
||||
#define x264_predict_8x8_ddl_lsx x264_template(predict_8x8_ddl_lsx)
|
||||
void x264_predict_8x8_ddl_lsx( pixel *src, pixel edge[36] );
|
||||
|
||||
#define x264_predict_8x8_ddr_lasx x264_template(predict_8x8_ddr_lasx)
|
||||
void x264_predict_8x8_ddr_lasx( pixel *src, pixel edge[36] );
|
||||
|
||||
#define x264_predict_8x8_ddr_lsx x264_template(predict_8x8_ddr_lsx)
|
||||
void x264_predict_8x8_ddr_lsx( pixel *src, pixel edge[36] );
|
||||
|
||||
#define x264_predict_8x8_vr_lasx x264_template(predict_8x8_vr_lasx)
|
||||
void x264_predict_8x8_vr_lasx( pixel *src, pixel edge[36] );
|
||||
|
||||
#define x264_predict_8x8_vr_lsx x264_template(predict_8x8_vr_lsx)
|
||||
void x264_predict_8x8_vr_lsx( pixel *src, pixel edge[36] );
|
||||
|
||||
#define x264_predict_8x8_vl_lasx x264_template(predict_8x8_vl_lasx)
|
||||
void x264_predict_8x8_vl_lasx( pixel *src, pixel edge[36] );
|
||||
|
||||
#define x264_predict_8x8_vl_lsx x264_template(predict_8x8_vl_lsx)
|
||||
void x264_predict_8x8_vl_lsx( pixel *src, pixel edge[36] );
|
||||
|
||||
#define x264_predict_4x4_v_lsx x264_template(predict_4x4_v_lsx)
|
||||
void x264_predict_4x4_v_lsx( pixel *p_src );
|
||||
|
||||
#define x264_predict_4x4_h_lsx x264_template(predict_4x4_h_lsx)
|
||||
void x264_predict_4x4_h_lsx( pixel *p_src );
|
||||
|
||||
#define x264_predict_4x4_dc_lsx x264_template(predict_4x4_dc_lsx)
|
||||
void x264_predict_4x4_dc_lsx( pixel *p_src );
|
||||
|
||||
#define x264_predict_4x4_ddl_lsx x264_template(predict_4x4_ddl_lsx)
|
||||
void x264_predict_4x4_ddl_lsx( pixel *p_src );
|
||||
|
||||
#define x264_predict_4x4_dc_top_lsx x264_template(predict_4x4_dc_top_lsx)
|
||||
void x264_predict_4x4_dc_top_lsx( pixel *p_src );
|
||||
|
||||
#define x264_predict_4x4_dc_left_lsx x264_template(predict_4x4_dc_left_lsx)
|
||||
void x264_predict_4x4_dc_left_lsx( pixel *p_src );
|
||||
|
||||
#define x264_predict_4x4_dc_128_lsx x264_template(predict_4x4_dc_128_lsx)
|
||||
void x264_predict_4x4_dc_128_lsx( pixel *p_src );
|
||||
|
||||
#define x264_predict_4x4_init_loongarch x264_template(predict_4x4_init_loongarch)
|
||||
void x264_predict_4x4_init_loongarch( int cpu, x264_predict_t pf[12] );
|
||||
#define x264_predict_8x8_init_loongarch x264_template(predict_8x8_init_loongarch)
|
||||
void x264_predict_8x8_init_loongarch( int cpu, x264_predict8x8_t pf[12],
|
||||
x264_predict_8x8_filter_t *predict_filter );
|
||||
#define x264_predict_8x8c_init_loongarch x264_template(predict_8x8c_init_loongarch)
|
||||
void x264_predict_8x8c_init_loongarch( int cpu, x264_predict_t pf[7] );
|
||||
#define x264_predict_16x16_init_loongarch x264_template(predict_16x16_init_loongarch)
|
||||
void x264_predict_16x16_init_loongarch( int cpu, x264_predict_t pf[7] );
|
||||
|
||||
#endif
|
||||
1231
common/loongarch/quant-a.S
Normal file
1231
common/loongarch/quant-a.S
Normal file
File diff suppressed because it is too large
Load Diff
96
common/loongarch/quant.h
Normal file
96
common/loongarch/quant.h
Normal file
@@ -0,0 +1,96 @@
|
||||
/*****************************************************************************
|
||||
* quant.h: loongarch quantization and level-run
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2023-2025 x264 project
|
||||
*
|
||||
* Authors: Shiyou Yin <yinshiyou-hf@loongson.cn>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X264_LOONGARCH_QUANT_H
|
||||
#define X264_LOONGARCH_QUANT_H
|
||||
|
||||
#define x264_coeff_last64_lsx x264_template(coeff_last64_lsx)
|
||||
int32_t x264_coeff_last64_lsx( int16_t *p_src );
|
||||
#define x264_coeff_last16_lsx x264_template(coeff_last16_lsx)
|
||||
int32_t x264_coeff_last16_lsx( int16_t *p_src );
|
||||
#define x264_coeff_last15_lsx x264_template(coeff_last15_lsx)
|
||||
int32_t x264_coeff_last15_lsx( int16_t *p_src );
|
||||
#define x264_coeff_last8_lsx x264_template(coeff_last8_lsx)
|
||||
int32_t x264_coeff_last8_lsx( int16_t *p_src );
|
||||
#define x264_coeff_last4_lsx x264_template(coeff_last4_lsx)
|
||||
int32_t x264_coeff_last4_lsx( int16_t *p_src );
|
||||
|
||||
#define x264_quant_4x4_lsx x264_template(quant_4x4_lsx)
|
||||
int32_t x264_quant_4x4_lsx( int16_t *p_dct, uint16_t *p_mf, uint16_t *p_bias );
|
||||
#define x264_quant_4x4x4_lsx x264_template(quant_4x4x4_lsx)
|
||||
int32_t x264_quant_4x4x4_lsx( int16_t p_dct[4][16],
|
||||
uint16_t pu_mf[16], uint16_t pu_bias[16] );
|
||||
#define x264_quant_8x8_lsx x264_template(quant_8x8_lsx)
|
||||
int32_t x264_quant_8x8_lsx( int16_t *p_dct, uint16_t *p_mf, uint16_t *p_bias );
|
||||
#define x264_quant_4x4_dc_lsx x264_template(quant_4x4_dc_lsx)
|
||||
int32_t x264_quant_4x4_dc_lsx( dctcoef dct[16], int32_t mf, int32_t bias );
|
||||
#define x264_quant_2x2_dc_lsx x264_template(quant_2x2_dc_lsx)
|
||||
int32_t x264_quant_2x2_dc_lsx( dctcoef dct[4], int32_t mf, int32_t bias );
|
||||
|
||||
#define x264_dequant_4x4_lsx x264_template(dequant_4x4_lsx)
|
||||
void x264_dequant_4x4_lsx( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
|
||||
#define x264_dequant_8x8_lsx x264_template(dequant_8x8_lsx)
|
||||
void x264_dequant_8x8_lsx( dctcoef dct[64], int dequant_mf[6][64], int i_qp );
|
||||
#define x264_dequant_4x4_dc_lsx x264_template(dequant_4x4_dc_lsx)
|
||||
void x264_dequant_4x4_dc_lsx( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
|
||||
|
||||
#define x264_decimate_score15_lsx x264_template(decimate_score15_lsx)
|
||||
int x264_decimate_score15_lsx( dctcoef *dct );
|
||||
#define x264_decimate_score16_lsx x264_template(decimate_score16_lsx)
|
||||
int x264_decimate_score16_lsx( dctcoef *dct );
|
||||
#define x264_decimate_score64_lsx x264_template(decimate_score64_lsx)
|
||||
int x264_decimate_score64_lsx( dctcoef *dct );
|
||||
|
||||
#define x264_coeff_last64_lasx x264_template(coeff_last64_lasx)
|
||||
int32_t x264_coeff_last64_lasx( int16_t *p_src );
|
||||
#define x264_coeff_last16_lasx x264_template(coeff_last16_lasx)
|
||||
int32_t x264_coeff_last16_lasx( int16_t *p_src );
|
||||
#define x264_coeff_last15_lasx x264_template(coeff_last15_lasx)
|
||||
int32_t x264_coeff_last15_lasx( int16_t *p_src );
|
||||
|
||||
#define x264_quant_4x4x4_lasx x264_template(quant_4x4x4_lasx)
|
||||
int32_t x264_quant_4x4x4_lasx( int16_t p_dct[4][16],
|
||||
uint16_t pu_mf[16], uint16_t pu_bias[16] );
|
||||
|
||||
#define x264_dequant_4x4_lasx x264_template(dequant_4x4_lasx)
|
||||
void x264_dequant_4x4_lasx( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
|
||||
#define x264_dequant_8x8_lasx x264_template(dequant_8x8_lasx)
|
||||
void x264_dequant_8x8_lasx( dctcoef dct[64], int dequant_mf[6][64], int i_qp );
|
||||
#define x264_dequant_4x4_dc_lasx x264_template(dequant_4x4_dc_lasx)
|
||||
void x264_dequant_4x4_dc_lasx( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
|
||||
|
||||
#define x264_coeff_level_run16_lasx x264_template(coeff_level_run16_lasx)
|
||||
int x264_coeff_level_run16_lasx( dctcoef *, x264_run_level_t * );
|
||||
#define x264_coeff_level_run15_lasx x264_template(coeff_level_run15_lasx)
|
||||
int x264_coeff_level_run15_lasx( dctcoef *, x264_run_level_t * );
|
||||
|
||||
#define x264_coeff_level_run16_lsx x264_template(coeff_level_run16_lsx)
|
||||
int x264_coeff_level_run16_lsx( dctcoef *, x264_run_level_t * );
|
||||
#define x264_coeff_level_run15_lsx x264_template(coeff_level_run15_lsx)
|
||||
int x264_coeff_level_run15_lsx( dctcoef *, x264_run_level_t * );
|
||||
#define x264_coeff_level_run8_lsx x264_template(coeff_level_run8_lsx)
|
||||
int x264_coeff_level_run8_lsx( dctcoef *, x264_run_level_t * );
|
||||
|
||||
#endif/* X264_LOONGARCH_QUANT_H */
|
||||
2585
common/loongarch/sad-a.S
Normal file
2585
common/loongarch/sad-a.S
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user