x264 source for verification 2026-05-22

2026-05-22 16:45:04 +08:00
commit 4647f166e5
270 changed files with 166522 additions and 0 deletions
--- a/common/loongarch/dct-a.S
+++ b/common/loongarch/dct-a.S
--- a/common/loongarch/dct.h
+++ b/common/loongarch/dct.h
@@ -0,0 +1,95 @@
+/*****************************************************************************
+ * dct.h: loongarch transform and zigzag
+ *****************************************************************************
+ * Copyright (C) 2023-2025 x264 project
+ *
+ * Authors: Peng Zhou <zhoupeng@loongson.cn>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#ifndef X264_LOONGARCH_DCT_H
+#define X264_LOONGARCH_DCT_H
+
+#define x264_sub8x8_dct_lasx x264_template(sub8x8_dct_lasx)
+void x264_sub8x8_dct_lasx( int16_t p_dst[4][16], uint8_t *p_src, uint8_t *p_ref );
+#define x264_sub16x16_dct_lasx x264_template(sub16x16_dct_lasx)
+void x264_sub16x16_dct_lasx( int16_t p_dst[16][16], uint8_t *p_src, uint8_t *p_ref );
+
+#define x264_sub8x8_dct8_lsx x264_template(sub8x8_dct8_lsx)
+void x264_sub8x8_dct8_lsx( int16_t pi_dct[64], uint8_t *p_pix1, uint8_t *p_pix2 );
+#define x264_sub16x16_dct8_lasx x264_template(sub16x16_dct8_lasx)
+void x264_sub16x16_dct8_lasx( int16_t pi_dct[4][64], uint8_t *p_pix1,
+                              uint8_t *p_pix2 );
+
+#define x264_add4x4_idct_lsx x264_template(add4x4_idct_lsx)
+void x264_add4x4_idct_lsx( uint8_t *p_dst, int16_t pi_dct[16] );
+#define x264_add8x8_idct_lasx x264_template(add8x8_idct_lasx)
+void x264_add8x8_idct_lasx( uint8_t *p_dst, int16_t pi_dct[4][16] );
+#define x264_add16x16_idct_lasx x264_template(add16x16_idct_lasx)
+void x264_add16x16_idct_lasx( uint8_t *p_dst, int16_t pi_dct[16][16] );
+#define x264_add8x8_idct8_lasx x264_template(add8x8_idct8_lasx)
+void x264_add8x8_idct8_lasx( uint8_t *p_dst, int16_t pi_dct[64] );
+#define x264_add8x8_idct_dc_lasx x264_template(add8x8_idct_dc_lasx)
+void x264_add8x8_idct_dc_lasx( uint8_t *p_dst, int16_t dct[4] );
+#define x264_add16x16_idct_dc_lasx x264_template(add16x16_idct_dc_lasx)
+void x264_add16x16_idct_dc_lasx( uint8_t *p_dst, int16_t dct[16] );
+
+#define x264_idct4x4dc_lasx x264_template(idct4x4dc_lasx)
+void x264_idct4x4dc_lasx( int16_t d[16] );
+#define x264_dct4x4dc_lasx x264_template(dct4x4dc_lasx)
+void x264_dct4x4dc_lasx( int16_t d[16] );
+
+#define x264_zigzag_scan_4x4_frame_lasx x264_template(zigzag_scan_4x4_frame_lasx)
+void x264_zigzag_scan_4x4_frame_lasx( int16_t level[16], int16_t dct[16] );
+
+#define x264_sub4x4_dct_lsx x264_template(sub4x4_dct_lsx)
+void x264_sub4x4_dct_lsx( int16_t p_dst[16], uint8_t *p_src, uint8_t *p_ref );
+#define x264_sub8x8_dct_lsx x264_template(sub8x8_dct_lsx)
+void x264_sub8x8_dct_lsx( int16_t p_dst[4][16], uint8_t *p_src, uint8_t *p_ref );
+#define x264_sub16x16_dct_lsx x264_template(sub16x16_dct_lsx)
+void x264_sub16x16_dct_lsx( int16_t p_dst[16][16], uint8_t *p_src, uint8_t *p_ref );
+
+#define x264_sub8x8_dct8_lsx x264_template(sub8x8_dct8_lsx)
+void x264_sub8x8_dct8_lsx( int16_t pi_dct[64], uint8_t *p_pix1, uint8_t *p_pix2 );
+#define x264_sub16x16_dct8_lsx x264_template(sub16x16_dct8_lsx)
+void x264_sub16x16_dct8_lsx( int16_t pi_dct[4][64], uint8_t *p_pix1,
+                              uint8_t *p_pix2 );
+
+#define x264_add4x4_idct_lsx x264_template(add4x4_idct_lsx)
+void x264_add4x4_idct_lsx( uint8_t *p_dst, int16_t pi_dct[16] );
+#define x264_add8x8_idct_lsx x264_template(add8x8_idct_lsx)
+void x264_add8x8_idct_lsx( uint8_t *p_dst, int16_t pi_dct[4][16] );
+#define x264_add16x16_idct_lsx x264_template(add16x16_idct_lsx)
+void x264_add16x16_idct_lsx( uint8_t *p_dst, int16_t pi_dct[16][16] );
+#define x264_add8x8_idct8_lsx x264_template(add8x8_idct8_lsx)
+void x264_add8x8_idct8_lsx( uint8_t *p_dst, int16_t pi_dct[64] );
+#define x264_add8x8_idct_dc_lsx x264_template(add8x8_idct_dc_lsx)
+void x264_add8x8_idct_dc_lsx( uint8_t *p_dst, int16_t dct[4] );
+#define x264_add16x16_idct_dc_lsx x264_template(add16x16_idct_dc_lsx)
+void x264_add16x16_idct_dc_lsx( uint8_t *p_dst, int16_t dct[16] );
+
+#define x264_idct4x4dc_lsx x264_template(idct4x4dc_lsx)
+void x264_idct4x4dc_lsx( int16_t d[16] );
+#define x264_dct4x4dc_lsx x264_template(dct4x4dc_lsx)
+void x264_dct4x4dc_lsx( int16_t d[16] );
+
+#define x264_zigzag_scan_4x4_frame_lsx x264_template(zigzag_scan_4x4_frame_lsx)
+void x264_zigzag_scan_4x4_frame_lsx( int16_t level[16], int16_t dct[16] );
+
+#endif
--- a/common/loongarch/deblock-a.S
+++ b/common/loongarch/deblock-a.S
--- a/common/loongarch/deblock.h
+++ b/common/loongarch/deblock.h
@@ -0,0 +1,54 @@
+/*****************************************************************************
+ * deblock.h: loongarch deblock
+ *****************************************************************************
+ * Copyright (C) 2023-2025 x264 project
+ *
+ * Authors: Hao Chen <chenhao@loongson.cn>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#ifndef X264_LOONGARCH_DEBLOCK_H
+#define X264_LOONGARCH_DEBLOCK_H
+
+#if !HIGH_BIT_DEPTH
+#define x264_deblock_v_luma_lasx x264_template(deblock_v_luma_lasx)
+void x264_deblock_v_luma_lasx( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_luma_lasx x264_template(deblock_h_luma_lasx)
+void x264_deblock_h_luma_lasx( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+
+#define x264_deblock_v_luma_intra_lsx x264_template(deblock_v_luma_intra_lsx)
+void x264_deblock_v_luma_intra_lsx( uint8_t *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_h_luma_intra_lsx x264_template(deblock_h_luma_intra_lsx)
+void x264_deblock_h_luma_intra_lsx( uint8_t *pix, intptr_t stride, int alpha, int beta );
+
+#define x264_deblock_v_luma_intra_lasx x264_template(deblock_v_luma_intra_lasx)
+void x264_deblock_v_luma_intra_lasx( uint8_t *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_h_luma_intra_lasx x264_template(deblock_h_luma_intra_lasx)
+void x264_deblock_h_luma_intra_lasx( uint8_t *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_strength_lsx x264_template(deblock_strength_lsx)
+void x264_deblock_strength_lsx( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
+                                int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
+                                int mvy_limit, int bframe );
+#define x264_deblock_strength_lasx x264_template(deblock_strength_lasx)
+void x264_deblock_strength_lasx( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
+                                 int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
+                                 int mvy_limit, int bframe );
+#endif
+
+#endif
--- a/common/loongarch/loongson_asm.S
+++ b/common/loongarch/loongson_asm.S
@@ -0,0 +1,770 @@
+/*********************************************************************
+ * Copyright (c) 2022-2024 Loongson Technology Corporation Limited
+ * Contributed by Xiwei Gu <guxiwei-hf@loongson.cn>
+ *                Shiyou Yin <yinshiyou-hf@loongson.cn>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *********************************************************************/
+
+/*
+ * This file is a LoongArch assembly helper file and available under ISC
+ * license. It provides a large number of macros and alias to simplify
+ * writing assembly code, especially for LSX and LASX optimizations.
+ *
+ * Any one can modify it or add new features for his/her own purposes.
+ * Contributing a patch will be appreciated as it might be useful for
+ * others as well. Send patches to loongson contributor mentioned above.
+ *
+ * MAJOR version: Usage changes, incompatible with previous version.
+ * MINOR version: Add new macros/functions, or bug fixes.
+ * MICRO version: Comment changes or implementation changes.
+ */
+
+#define LML_VERSION_MAJOR 0
+#define LML_VERSION_MINOR 4
+#define LML_VERSION_MICRO 0
+
+#define ASM_PREF
+#define DEFAULT_ALIGN    5
+
+/*
+ *============================================================================
+ * macros for specific projetc, set them as needed.
+ * Following LoongML macros for your reference.
+ *============================================================================
+ */
+
+.macro function name, align=DEFAULT_ALIGN
+.macro endfunc
+    jirl    $r0, $r1, 0x0
+    .size ASM_PREF\name, . - ASM_PREF\name
+    .purgem endfunc
+.endm
+.text ;
+.align \align ;
+.globl ASM_PREF\name ;
+.type  ASM_PREF\name, @function ;
+ASM_PREF\name: ;
+.endm
+
+.macro  const name, align=DEFAULT_ALIGN
+    .macro endconst
+    .size  \name, . - \name
+    .purgem endconst
+    .endm
+.section .rodata
+.align   \align
+\name:
+.endm
+
+/*
+ *============================================================================
+ * LoongArch register alias
+ *============================================================================
+ */
+
+#define a0 $a0
+#define a1 $a1
+#define a2 $a2
+#define a3 $a3
+#define a4 $a4
+#define a5 $a5
+#define a6 $a6
+#define a7 $a7
+
+#define t0 $t0
+#define t1 $t1
+#define t2 $t2
+#define t3 $t3
+#define t4 $t4
+#define t5 $t5
+#define t6 $t6
+#define t7 $t7
+#define t8 $t8
+
+#define s0 $s0
+#define s1 $s1
+#define s2 $s2
+#define s3 $s3
+#define s4 $s4
+#define s5 $s5
+#define s6 $s6
+#define s7 $s7
+#define s8 $s8
+
+#define zero $zero
+#define sp   $sp
+#define ra   $ra
+
+#define fa0  $fa0
+#define fa1  $fa1
+#define fa2  $fa2
+#define fa3  $fa3
+#define fa4  $fa4
+#define fa5  $fa5
+#define fa6  $fa6
+#define fa7  $fa7
+#define ft0  $ft0
+#define ft1  $ft1
+#define ft2  $ft2
+#define ft3  $ft3
+#define ft4  $ft4
+#define ft5  $ft5
+#define ft6  $ft6
+#define ft7  $ft7
+#define ft8  $ft8
+#define ft9  $ft9
+#define ft10 $ft10
+#define ft11 $ft11
+#define ft12 $ft12
+#define ft13 $ft13
+#define ft14 $ft14
+#define ft15 $ft15
+#define fs0  $fs0
+#define fs1  $fs1
+#define fs2  $fs2
+#define fs3  $fs3
+#define fs4  $fs4
+#define fs5  $fs5
+#define fs6  $fs6
+#define fs7  $fs7
+
+#define f0  $f0
+#define f1  $f1
+#define f2  $f2
+#define f3  $f3
+#define f4  $f4
+#define f5  $f5
+#define f6  $f6
+#define f7  $f7
+#define f8  $f8
+#define f9  $f9
+#define f10 $f10
+#define f11 $f11
+#define f12 $f12
+#define f13 $f13
+#define f14 $f14
+#define f15 $f15
+#define f16 $f16
+#define f17 $f17
+#define f18 $f18
+#define f19 $f19
+#define f20 $f20
+#define f21 $f21
+#define f22 $f22
+#define f23 $f23
+#define f24 $f24
+#define f25 $f25
+#define f26 $f26
+#define f27 $f27
+#define f28 $f28
+#define f29 $f29
+#define f30 $f30
+#define f31 $f31
+
+#define vr0 $vr0
+#define vr1 $vr1
+#define vr2 $vr2
+#define vr3 $vr3
+#define vr4 $vr4
+#define vr5 $vr5
+#define vr6 $vr6
+#define vr7 $vr7
+#define vr8 $vr8
+#define vr9 $vr9
+#define vr10 $vr10
+#define vr11 $vr11
+#define vr12 $vr12
+#define vr13 $vr13
+#define vr14 $vr14
+#define vr15 $vr15
+#define vr16 $vr16
+#define vr17 $vr17
+#define vr18 $vr18
+#define vr19 $vr19
+#define vr20 $vr20
+#define vr21 $vr21
+#define vr22 $vr22
+#define vr23 $vr23
+#define vr24 $vr24
+#define vr25 $vr25
+#define vr26 $vr26
+#define vr27 $vr27
+#define vr28 $vr28
+#define vr29 $vr29
+#define vr30 $vr30
+#define vr31 $vr31
+
+#define xr0 $xr0
+#define xr1 $xr1
+#define xr2 $xr2
+#define xr3 $xr3
+#define xr4 $xr4
+#define xr5 $xr5
+#define xr6 $xr6
+#define xr7 $xr7
+#define xr8 $xr8
+#define xr9 $xr9
+#define xr10 $xr10
+#define xr11 $xr11
+#define xr12 $xr12
+#define xr13 $xr13
+#define xr14 $xr14
+#define xr15 $xr15
+#define xr16 $xr16
+#define xr17 $xr17
+#define xr18 $xr18
+#define xr19 $xr19
+#define xr20 $xr20
+#define xr21 $xr21
+#define xr22 $xr22
+#define xr23 $xr23
+#define xr24 $xr24
+#define xr25 $xr25
+#define xr26 $xr26
+#define xr27 $xr27
+#define xr28 $xr28
+#define xr29 $xr29
+#define xr30 $xr30
+#define xr31 $xr31
+
+/*
+ *============================================================================
+ * LSX/LASX synthesize instructions
+ *============================================================================
+ */
+
+/*
+ * Description : Dot product of byte vector elements
+ * Arguments   : Inputs  - vj, vk
+ *               Outputs - vd
+ *               Return Type - halfword
+ */
+.macro vdp2.h.bu vd, vj, vk
+    vmulwev.h.bu      \vd,    \vj,    \vk
+    vmaddwod.h.bu     \vd,    \vj,    \vk
+.endm
+
+.macro vdp2.h.bu.b vd, vj, vk
+    vmulwev.h.bu.b    \vd,    \vj,    \vk
+    vmaddwod.h.bu.b   \vd,    \vj,    \vk
+.endm
+
+.macro vdp2.w.h vd, vj, vk
+    vmulwev.w.h       \vd,    \vj,    \vk
+    vmaddwod.w.h      \vd,    \vj,    \vk
+.endm
+
+.macro xvdp2.h.bu xd, xj, xk
+    xvmulwev.h.bu    \xd,    \xj,    \xk
+    xvmaddwod.h.bu   \xd,    \xj,    \xk
+.endm
+
+.macro xvdp2.h.bu.b xd, xj, xk
+    xvmulwev.h.bu.b    \xd,  \xj,    \xk
+    xvmaddwod.h.bu.b   \xd,  \xj,    \xk
+.endm
+
+.macro xvdp2.w.h xd, xj, xk
+    xvmulwev.w.h       \xd,  \xj,    \xk
+    xvmaddwod.w.h      \xd,  \xj,    \xk
+.endm
+
+/*
+ * Description : Dot product & addition of halfword vector elements
+ * Arguments   : Inputs  - vj, vk
+ *               Outputs - vd
+ *               Return Type - twice size of input
+ */
+.macro vdp2add.h.bu vd, vj, vk
+    vmaddwev.h.bu     \vd,    \vj,    \vk
+    vmaddwod.h.bu     \vd,    \vj,    \vk
+.endm
+
+.macro vdp2add.h.bu.b vd, vj, vk
+    vmaddwev.h.bu.b   \vd,    \vj,    \vk
+    vmaddwod.h.bu.b   \vd,    \vj,    \vk
+.endm
+
+.macro vdp2add.w.h vd, vj, vk
+    vmaddwev.w.h      \vd,    \vj,    \vk
+    vmaddwod.w.h      \vd,    \vj,    \vk
+.endm
+
+.macro xvdp2add.h.bu.b xd, xj, xk
+    xvmaddwev.h.bu.b   \xd,  \xj,    \xk
+    xvmaddwod.h.bu.b   \xd,  \xj,    \xk
+.endm
+
+.macro xvdp2add.w.h xd, xj, xk
+    xvmaddwev.w.h      \xd,  \xj,    \xk
+    xvmaddwod.w.h      \xd,  \xj,    \xk
+.endm
+
+/*
+ * Description : Range element vj[i] to vk[i] ~ vj[i]
+ * clip: vj > vk ? vj : vk && vj < va ? vj : va
+ */
+.macro vclip.h  vd,  vj, vk, va
+    vmax.h    \vd,  \vj,   \vk
+    vmin.h    \vd,  \vd,   \va
+.endm
+
+.macro vclip.w  vd,  vj, vk, va
+    vmax.w    \vd,  \vj,   \vk
+    vmin.w    \vd,  \vd,   \va
+.endm
+
+.macro xvclip.h  xd,  xj, xk, xa
+    xvmax.h    \xd,  \xj,   \xk
+    xvmin.h    \xd,  \xd,   \xa
+.endm
+
+.macro xvclip.w  xd,  xj, xk, xa
+    xvmax.w    \xd,  \xj,   \xk
+    xvmin.w    \xd,  \xd,   \xa
+.endm
+
+/*
+ * Description : Range element vj[i] to 0 ~ 255
+ * clip255: vj < 255 ? vj : 255 && vj > 0 ? vj : 0
+ */
+.macro vclip255.h  vd, vj
+    vmaxi.h   \vd,   \vj,  0
+    vsat.hu   \vd,   \vd,  7
+.endm
+
+.macro vclip255.w  vd, vj
+    vmaxi.w   \vd,   \vj,  0
+    vsat.wu   \vd,   \vd,  7
+.endm
+
+.macro xvclip255.h  xd, xj
+    xvmaxi.h   \xd,   \xj,  0
+    xvsat.hu   \xd,   \xd,  7
+.endm
+
+.macro xvclip255.w  xd, xj
+    xvmaxi.w   \xd,   \xj,  0
+    xvsat.wu   \xd,   \xd,  7
+.endm
+
+/*
+ * Description : Store elements of vector
+ * vd : Data vector to be stroed
+ * rk : Address of data storage
+ * ra : Offset of address
+ * si : Index of data in vd
+ */
+.macro vstelmx.b vd, rk, ra, si
+    add.d      \rk,  \rk,  \ra
+    vstelm.b   \vd,  \rk,  0, \si
+.endm
+
+.macro vstelmx.h vd, rk, ra, si
+    add.d      \rk,  \rk,  \ra
+    vstelm.h   \vd,  \rk,  0, \si
+.endm
+
+.macro vstelmx.w vd, rk, ra, si
+    add.d      \rk,  \rk,  \ra
+    vstelm.w   \vd,  \rk,  0, \si
+.endm
+
+.macro vstelmx.d  vd, rk, ra, si
+    add.d      \rk,  \rk,  \ra
+    vstelm.d   \vd,  \rk,  0, \si
+.endm
+
+.macro vmov xd, xj
+    vor.v  \xd,  \xj,  \xj
+.endm
+
+.macro xmov xd, xj
+    xvor.v  \xd,  \xj,  \xj
+.endm
+
+.macro xvstelmx.d  xd, rk, ra, si
+    add.d      \rk, \rk,  \ra
+    xvstelm.d  \xd, \rk,  0, \si
+.endm
+
+/*
+ *============================================================================
+ * LSX/LASX custom macros
+ *============================================================================
+ */
+
+/*
+ * Load 4 float, double, V128, v256 elements with stride.
+ */
+.macro FLDS_LOADX_4 src, stride, stride2, stride3, out0, out1, out2, out3
+    fld.s     \out0,    \src,    0
+    fldx.s    \out1,    \src,    \stride
+    fldx.s    \out2,    \src,    \stride2
+    fldx.s    \out3,    \src,    \stride3
+.endm
+
+.macro FLDD_LOADX_4 src, stride, stride2, stride3, out0, out1, out2, out3
+    fld.d     \out0,    \src,    0
+    fldx.d    \out1,    \src,    \stride
+    fldx.d    \out2,    \src,    \stride2
+    fldx.d    \out3,    \src,    \stride3
+.endm
+
+.macro LSX_LOADX_4 src, stride, stride2, stride3, out0, out1, out2, out3
+    vld     \out0,    \src,    0
+    vldx    \out1,    \src,    \stride
+    vldx    \out2,    \src,    \stride2
+    vldx    \out3,    \src,    \stride3
+.endm
+
+.macro LASX_LOADX_4 src, stride, stride2, stride3, out0, out1, out2, out3
+    xvld    \out0,    \src,    0
+    xvldx   \out1,    \src,    \stride
+    xvldx   \out2,    \src,    \stride2
+    xvldx   \out3,    \src,    \stride3
+.endm
+
+/*
+ * Description : Transpose 4x4 block with half-word elements in vectors
+ * Arguments   : Inputs  - in0, in1, in2, in3
+ *               Outputs - out0, out1, out2, out3
+ */
+.macro LSX_TRANSPOSE4x4_H in0, in1, in2, in3, out0, out1, out2, out3, \
+                          tmp0, tmp1
+    vilvl.h   \tmp0,  \in1,   \in0
+    vilvl.h   \tmp1,  \in3,   \in2
+    vilvl.w   \out0,  \tmp1,  \tmp0
+    vilvh.w   \out2,  \tmp1,  \tmp0
+    vilvh.d   \out1,  \out0,  \out0
+    vilvh.d   \out3,  \out0,  \out2
+.endm
+
+/*
+ * Description : Transpose 4x4 block with word elements in vectors
+ * Arguments   : Inputs  - in0, in1, in2, in3
+ *               Outputs - out0, out1, out2, out3
+ * Details     :
+ * Example     :
+ *               1, 2, 3, 4            1, 5, 9,13
+ *               5, 6, 7, 8    to      2, 6,10,14
+ *               9,10,11,12  =====>    3, 7,11,15
+ *              13,14,15,16            4, 8,12,16
+ */
+.macro LSX_TRANSPOSE4x4_W in0, in1, in2, in3, out0, out1, out2, out3, \
+                          tmp0, tmp1
+
+    vilvl.w    \tmp0,   \in1,    \in0
+    vilvh.w    \out1,   \in1,    \in0
+    vilvl.w    \tmp1,   \in3,    \in2
+    vilvh.w    \out3,   \in3,    \in2
+
+    vilvl.d    \out0,   \tmp1,   \tmp0
+    vilvl.d    \out2,   \out3,   \out1
+    vilvh.d    \out3,   \out3,   \out1
+    vilvh.d    \out1,   \tmp1,   \tmp0
+.endm
+
+/*
+ * Description : Transpose 8x8 block with half-word elements in vectors
+ * Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
+ *               Outputs - out0, out1, out2, out3, out4, out5, out6, out7
+ */
+.macro LSX_TRANSPOSE8x8_H in0, in1, in2, in3, in4, in5, in6, in7, out0, out1,   \
+                          out2, out3, out4, out5, out6, out7, tmp0, tmp1, tmp2, \
+                          tmp3, tmp4, tmp5, tmp6, tmp7
+    vilvl.h      \tmp0,    \in6,   \in4
+    vilvl.h      \tmp1,    \in7,   \in5
+    vilvl.h      \tmp2,    \in2,   \in0
+    vilvl.h      \tmp3,    \in3,   \in1
+
+    vilvl.h      \tmp4,    \tmp1,  \tmp0
+    vilvh.h      \tmp5,    \tmp1,  \tmp0
+    vilvl.h      \tmp6,    \tmp3,  \tmp2
+    vilvh.h      \tmp7,    \tmp3,  \tmp2
+
+    vilvh.h      \tmp0,    \in6,   \in4
+    vilvh.h      \tmp1,    \in7,   \in5
+    vilvh.h      \tmp2,    \in2,   \in0
+    vilvh.h      \tmp3,    \in3,   \in1
+
+    vpickev.d    \out0,    \tmp4,  \tmp6
+    vpickod.d    \out1,    \tmp4,  \tmp6
+    vpickev.d    \out2,    \tmp5,  \tmp7
+    vpickod.d    \out3,    \tmp5,  \tmp7
+
+    vilvl.h      \tmp4,    \tmp1,  \tmp0
+    vilvh.h      \tmp5,    \tmp1,  \tmp0
+    vilvl.h      \tmp6,    \tmp3,  \tmp2
+    vilvh.h      \tmp7,    \tmp3,  \tmp2
+
+    vpickev.d    \out4,    \tmp4,  \tmp6
+    vpickod.d    \out5,    \tmp4,  \tmp6
+    vpickev.d    \out6,    \tmp5,  \tmp7
+    vpickod.d    \out7,    \tmp5,  \tmp7
+.endm
+
+/*
+ * Description : Transpose 16x8 block with byte elements in vectors
+ * Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
+ *               Outputs - out0, out1, out2, out3, out4, out5, out6, out7
+ */
+.macro LASX_TRANSPOSE16X8_B in0, in1, in2, in3, in4, in5, in6, in7,        \
+                            in8, in9, in10, in11, in12, in13, in14, in15,  \
+                            out0, out1, out2, out3, out4, out5, out6, out7,\
+                            tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7
+    xvilvl.b   \tmp0,    \in2,     \in0
+    xvilvl.b   \tmp1,    \in3,     \in1
+    xvilvl.b   \tmp2,    \in6,     \in4
+    xvilvl.b   \tmp3,    \in7,     \in5
+    xvilvl.b   \tmp4,    \in10,    \in8
+    xvilvl.b   \tmp5,    \in11,    \in9
+    xvilvl.b   \tmp6,    \in14,    \in12
+    xvilvl.b   \tmp7,    \in15,    \in13
+    xvilvl.b   \out0,    \tmp1,    \tmp0
+    xvilvh.b   \out1,    \tmp1,    \tmp0
+    xvilvl.b   \out2,    \tmp3,    \tmp2
+    xvilvh.b   \out3,    \tmp3,    \tmp2
+    xvilvl.b   \out4,    \tmp5,    \tmp4
+    xvilvh.b   \out5,    \tmp5,    \tmp4
+    xvilvl.b   \out6,    \tmp7,    \tmp6
+    xvilvh.b   \out7,    \tmp7,    \tmp6
+    xvilvl.w   \tmp0,    \out2,    \out0
+    xvilvh.w   \tmp2,    \out2,    \out0
+    xvilvl.w   \tmp4,    \out3,    \out1
+    xvilvh.w   \tmp6,    \out3,    \out1
+    xvilvl.w   \tmp1,    \out6,    \out4
+    xvilvh.w   \tmp3,    \out6,    \out4
+    xvilvl.w   \tmp5,    \out7,    \out5
+    xvilvh.w   \tmp7,    \out7,    \out5
+    xvilvl.d   \out0,    \tmp1,    \tmp0
+    xvilvh.d   \out1,    \tmp1,    \tmp0
+    xvilvl.d   \out2,    \tmp3,    \tmp2
+    xvilvh.d   \out3,    \tmp3,    \tmp2
+    xvilvl.d   \out4,    \tmp5,    \tmp4
+    xvilvh.d   \out5,    \tmp5,    \tmp4
+    xvilvl.d   \out6,    \tmp7,    \tmp6
+    xvilvh.d   \out7,    \tmp7,    \tmp6
+.endm
+
+/*
+ * Description : Transpose 4x4 block with half-word elements in vectors
+ * Arguments   : Inputs  - in0, in1, in2, in3
+ *               Outputs - out0, out1, out2, out3
+ */
+.macro LASX_TRANSPOSE4x4_H in0, in1, in2, in3, out0, out1, out2, out3, \
+                           tmp0, tmp1
+    xvilvl.h   \tmp0,  \in1,   \in0
+    xvilvl.h   \tmp1,  \in3,   \in2
+    xvilvl.w   \out0,  \tmp1,  \tmp0
+    xvilvh.w   \out2,  \tmp1,  \tmp0
+    xvilvh.d   \out1,  \out0,  \out0
+    xvilvh.d   \out3,  \out0,  \out2
+.endm
+
+/*
+ * Description : Transpose 4x8 block with half-word elements in vectors
+ * Arguments   : Inputs  - in0, in1, in2, in3
+ *               Outputs - out0, out1, out2, out3
+ */
+.macro LASX_TRANSPOSE4x8_H in0, in1, in2, in3, out0, out1, out2, out3, \
+                           tmp0, tmp1
+    xvilvl.h      \tmp0,    \in2,   \in0
+    xvilvl.h      \tmp1,    \in3,   \in1
+    xvilvl.h      \out2,    \tmp1,  \tmp0
+    xvilvh.h      \out3,    \tmp1,  \tmp0
+
+    xvilvl.d      \out0,    \out2,  \out2
+    xvilvh.d      \out1,    \out2,  \out2
+    xvilvl.d      \out2,    \out3,  \out3
+    xvilvh.d      \out3,    \out3,  \out3
+.endm
+
+/*
+ * Description : Transpose 8x8 block with half-word elements in vectors
+ * Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
+ *               Outputs - out0, out1, out2, out3, out4, out5, out6, out7
+ */
+.macro LASX_TRANSPOSE8x8_H in0, in1, in2, in3, in4, in5, in6, in7,         \
+                           out0, out1, out2, out3, out4, out5, out6, out7, \
+                           tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7
+    xvilvl.h     \tmp0,   \in6,     \in4
+    xvilvl.h     \tmp1,   \in7,     \in5
+    xvilvl.h     \tmp2,   \in2,     \in0
+    xvilvl.h     \tmp3,   \in3,     \in1
+
+    xvilvl.h     \tmp4,   \tmp1,    \tmp0
+    xvilvh.h     \tmp5,   \tmp1,    \tmp0
+    xvilvl.h     \tmp6,   \tmp3,    \tmp2
+    xvilvh.h     \tmp7,   \tmp3,    \tmp2
+
+    xvilvh.h     \tmp0,   \in6,     \in4
+    xvilvh.h     \tmp1,   \in7,     \in5
+    xvilvh.h     \tmp2,   \in2,     \in0
+    xvilvh.h     \tmp3,   \in3,     \in1
+
+    xvpickev.d   \out0,   \tmp4,    \tmp6
+    xvpickod.d   \out1,   \tmp4,    \tmp6
+    xvpickev.d   \out2,   \tmp5,    \tmp7
+    xvpickod.d   \out3,   \tmp5,    \tmp7
+
+    xvilvl.h     \tmp4,   \tmp1,    \tmp0
+    xvilvh.h     \tmp5,   \tmp1,    \tmp0
+    xvilvl.h     \tmp6,   \tmp3,    \tmp2
+    xvilvh.h     \tmp7,   \tmp3,    \tmp2
+
+    xvpickev.d   \out4,   \tmp4,    \tmp6
+    xvpickod.d   \out5,   \tmp4,    \tmp6
+    xvpickev.d   \out6,   \tmp5,    \tmp7
+    xvpickod.d   \out7,   \tmp5,    \tmp7
+.endm
+
+/*
+ * Description : Transpose 2x4x4 block with half-word elements in vectors
+ * Arguments   : Inputs  - in0, in1, in2, in3
+ *               Outputs - out0, out1, out2, out3
+ */
+.macro LASX_TRANSPOSE2x4x4_H in0, in1, in2, in3, out0, out1, out2, out3, \
+                             tmp0, tmp1, tmp2
+    xvilvh.h   \tmp1,    \in0,     \in1
+    xvilvl.h   \out1,    \in0,     \in1
+    xvilvh.h   \tmp0,    \in2,     \in3
+    xvilvl.h   \out3,    \in2,     \in3
+
+    xvilvh.w   \tmp2,    \out3,    \out1
+    xvilvl.w   \out3,    \out3,    \out1
+
+    xvilvl.w   \out2,    \tmp0,    \tmp1
+    xvilvh.w   \tmp1,    \tmp0,    \tmp1
+
+    xvilvh.d   \out0,    \out2,    \out3
+    xvilvl.d   \out2,    \out2,    \out3
+    xvilvh.d   \out1,    \tmp1,    \tmp2
+    xvilvl.d   \out3,    \tmp1,    \tmp2
+.endm
+
+/*
+ * Description : Transpose 4x4 block with word elements in vectors
+ * Arguments   : Inputs  - in0, in1, in2, in3
+ *               Outputs - out0, out1, out2, out3
+ * Details     :
+ * Example     :
+ *               1, 2, 3, 4,  1, 2, 3, 4        1,5, 9,13, 1,5, 9,13
+ *               5, 6, 7, 8,  5, 6, 7, 8   to   2,6,10,14, 2,6,10,14
+ *               9,10,11,12,  9,10,11,12 =====> 3,7,11,15, 3,7,11,15
+ *              13,14,15,16, 13,14,15,16        4,8,12,16, 4,8,12,16
+ */
+.macro LASX_TRANSPOSE4x4_W in0, in1, in2, in3, out0, out1, out2, out3, \
+                           tmp0, tmp1
+
+    xvilvl.w    \tmp0,   \in1,    \in0
+    xvilvh.w    \out1,   \in1,    \in0
+    xvilvl.w    \tmp1,   \in3,    \in2
+    xvilvh.w    \out3,   \in3,    \in2
+
+    xvilvl.d    \out0,   \tmp1,   \tmp0
+    xvilvl.d    \out2,   \out3,   \out1
+    xvilvh.d    \out3,   \out3,   \out1
+    xvilvh.d    \out1,   \tmp1,   \tmp0
+.endm
+
+/*
+ * Description : Transpose 8x8 block with word elements in vectors
+ * Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
+ *               Outputs - out0, out1, out2, out3, out4, out5, out6,
+ *               _out7
+ * Example     : LASX_TRANSPOSE8x8_W
+ *         in0 : 1,2,3,4,5,6,7,8
+ *         in1 : 2,2,3,4,5,6,7,8
+ *         in2 : 3,2,3,4,5,6,7,8
+ *         in3 : 4,2,3,4,5,6,7,8
+ *         in4 : 5,2,3,4,5,6,7,8
+ *         in5 : 6,2,3,4,5,6,7,8
+ *         in6 : 7,2,3,4,5,6,7,8
+ *         in7 : 8,2,3,4,5,6,7,8
+ *
+ *        out0 : 1,2,3,4,5,6,7,8
+ *        out1 : 2,2,2,2,2,2,2,2
+ *        out2 : 3,3,3,3,3,3,3,3
+ *        out3 : 4,4,4,4,4,4,4,4
+ *        out4 : 5,5,5,5,5,5,5,5
+ *        out5 : 6,6,6,6,6,6,6,6
+ *        out6 : 7,7,7,7,7,7,7,7
+ *        out7 : 8,8,8,8,8,8,8,8
+ */
+.macro LASX_TRANSPOSE8x8_W in0, in1, in2, in3, in4, in5, in6, in7,\
+                           out0, out1, out2, out3, out4, out5, out6, out7,\
+                           tmp0, tmp1, tmp2, tmp3
+    xvilvl.w    \tmp0,   \in2,    \in0
+    xvilvl.w    \tmp1,   \in3,    \in1
+    xvilvh.w    \tmp2,   \in2,    \in0
+    xvilvh.w    \tmp3,   \in3,    \in1
+    xvilvl.w    \out0,   \tmp1,   \tmp0
+    xvilvh.w    \out1,   \tmp1,   \tmp0
+    xvilvl.w    \out2,   \tmp3,   \tmp2
+    xvilvh.w    \out3,   \tmp3,   \tmp2
+
+    xvilvl.w    \tmp0,   \in6,    \in4
+    xvilvl.w    \tmp1,   \in7,    \in5
+    xvilvh.w    \tmp2,   \in6,    \in4
+    xvilvh.w    \tmp3,   \in7,    \in5
+    xvilvl.w    \out4,   \tmp1,   \tmp0
+    xvilvh.w    \out5,   \tmp1,   \tmp0
+    xvilvl.w    \out6,   \tmp3,   \tmp2
+    xvilvh.w    \out7,   \tmp3,   \tmp2
+
+    xmov        \tmp0,   \out0
+    xmov        \tmp1,   \out1
+    xmov        \tmp2,   \out2
+    xmov        \tmp3,   \out3
+    xvpermi.q   \out0,   \out4,   0x02
+    xvpermi.q   \out1,   \out5,   0x02
+    xvpermi.q   \out2,   \out6,   0x02
+    xvpermi.q   \out3,   \out7,   0x02
+    xvpermi.q   \out4,   \tmp0,   0x31
+    xvpermi.q   \out5,   \tmp1,   0x31
+    xvpermi.q   \out6,   \tmp2,   0x31
+    xvpermi.q   \out7,   \tmp3,   0x31
+.endm
+
+/*
+ * Description : Transpose 4x4 block with double-word elements in vectors
+ * Arguments   : Inputs  - in0, in1, in2, in3
+ *               Outputs - out0, out1, out2, out3
+ * Example     : LASX_TRANSPOSE4x4_D
+ *         in0 : 1,2,3,4
+ *         in1 : 1,2,3,4
+ *         in2 : 1,2,3,4
+ *         in3 : 1,2,3,4
+ *
+ *        out0 : 1,1,1,1
+ *        out1 : 2,2,2,2
+ *        out2 : 3,3,3,3
+ *        out3 : 4,4,4,4
+ */
+.macro LASX_TRANSPOSE4x4_D in0, in1, in2, in3, out0, out1, out2, out3, \
+                           tmp0, tmp1
+    xvilvl.d    \tmp0,   \in1,    \in0
+    xvilvh.d    \out1,   \in1,    \in0
+    xvilvh.d    \tmp1,   \in3,    \in2
+    xvilvl.d    \out2,   \in3,    \in2
+
+    xvor.v      \out0,   \tmp0,   \tmp0
+    xvor.v      \out3,   \tmp1,   \tmp1
+
+    xvpermi.q   \out0,   \out2,   0x02
+    xvpermi.q   \out2,   \tmp0,   0x31
+    xvpermi.q   \out3,   \out1,   0x31
+    xvpermi.q   \out1,   \tmp1,   0x02
+.endm
--- a/common/loongarch/loongson_util.S
+++ b/common/loongarch/loongson_util.S
@@ -0,0 +1,47 @@
+/*****************************************************************************
+ * loongson_util.S: loongson utility macros
+ *****************************************************************************
+ * Copyright (C) 2023-2025 x264 project
+ *
+ * Authors: Shiyou Yin <yinshiyou-hf@loongson.cn>
+ *          Xiwei Gu <guxiwei-hf@loongson.cn>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#define GLUE(a, b) a ## b
+#define JOIN(a, b) GLUE(a, b)
+
+/* Set prefix as needed. */
+#define  ASM_REF  JOIN(JOIN(x264_, BIT_DEPTH), _)
+
+#define FENC_STRIDE      16
+#define FDEC_STRIDE      32
+
+.macro function_x264 name, align=DEFAULT_ALIGN
+.macro endfunc_x264
+    jirl    $r0, $r1, 0x0
+    .size ASM_REF\name, . - ASM_REF\name
+    .purgem endfunc_x264
+.endm
+.text ;
+.align \align ;
+.globl ASM_REF\name ;
+.type  ASM_REF\name, @function ;
+ASM_REF\name: ;
+.endm
--- a/common/loongarch/mc-a.S
+++ b/common/loongarch/mc-a.S
--- a/common/loongarch/mc-c.c
+++ b/common/loongarch/mc-c.c
@@ -0,0 +1,406 @@
+/*****************************************************************************
+ * mc-c.c: loongarch motion compensation
+ *****************************************************************************
+ * Copyright (C) 2023-2025 x264 project
+ *
+ * Authors: Xiwei Gu <guxiwei-hf@loongson.cn>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#include "common/common.h"
+#include "mc.h"
+
+#if !HIGH_BIT_DEPTH
+
+#define MC_WEIGHT_LSX(func)                                                                                        \
+static void (* mc##func##_wtab_lsx[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int ) =   \
+{                                                                                                                  \
+    x264_mc_weight_w4##func##_lsx,                                                                                 \
+    x264_mc_weight_w4##func##_lsx,                                                                                 \
+    x264_mc_weight_w8##func##_lsx,                                                                                 \
+    x264_mc_weight_w16##func##_lsx,                                                                                \
+    x264_mc_weight_w16##func##_lsx,                                                                                \
+    x264_mc_weight_w20##func##_lsx,                                                                                \
+};
+
+#define MC_WEIGHT(func)                                                                                             \
+static void (* mc##func##_wtab_lasx[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int ) =   \
+{                                                                                                                   \
+    x264_mc_weight_w4##func##_lasx,                                                                                 \
+    x264_mc_weight_w4##func##_lasx,                                                                                 \
+    x264_mc_weight_w8##func##_lasx,                                                                                 \
+    x264_mc_weight_w16##func##_lasx,                                                                                \
+    x264_mc_weight_w16##func##_lasx,                                                                                \
+    x264_mc_weight_w20##func##_lasx,                                                                                \
+};
+
+#if !HIGH_BIT_DEPTH
+MC_WEIGHT_LSX()
+MC_WEIGHT_LSX(_noden)
+MC_WEIGHT()
+MC_WEIGHT(_noden)
+#endif
+
+static void weight_cache_lsx( x264_t *h, x264_weight_t *w )
+{
+    if ( w->i_denom >= 1)
+    {
+        w->weightfn = mc_wtab_lsx;
+    }
+    else
+        w->weightfn = mc_noden_wtab_lsx;
+}
+
+static weight_fn_t mc_weight_wtab_lsx[6] =
+{
+    x264_mc_weight_w4_lsx,
+    x264_mc_weight_w4_lsx,
+    x264_mc_weight_w8_lsx,
+    x264_mc_weight_w16_lsx,
+    x264_mc_weight_w16_lsx,
+    x264_mc_weight_w20_lsx,
+};
+
+static void (* const pixel_avg_wtab_lsx[6])(uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ) =
+{
+    NULL,
+    x264_pixel_avg2_w4_lsx,
+    x264_pixel_avg2_w8_lsx,
+    x264_pixel_avg2_w16_lsx,
+    x264_pixel_avg2_w16_lsx,
+    x264_pixel_avg2_w20_lsx,
+};
+
+static void (* const mc_copy_wtab_lsx[5])( uint8_t *, intptr_t, uint8_t *, intptr_t, int ) =
+{
+    NULL,
+    x264_mc_copy_w4_lsx,
+    x264_mc_copy_w8_lsx,
+    NULL,
+    x264_mc_copy_w16_lsx,
+};
+
+static void weight_cache_lasx( x264_t *h, x264_weight_t *w )
+{
+    if ( w->i_denom >= 1)
+    {
+        w->weightfn = mc_wtab_lasx;
+    }
+    else
+        w->weightfn = mc_noden_wtab_lasx;
+}
+
+static weight_fn_t mc_weight_wtab_lasx[6] =
+{
+    x264_mc_weight_w4_lasx,
+    x264_mc_weight_w4_lasx,
+    x264_mc_weight_w8_lasx,
+    x264_mc_weight_w16_lasx,
+    x264_mc_weight_w16_lasx,
+    x264_mc_weight_w20_lasx,
+};
+
+static void (* const pixel_avg_wtab_lasx[6])(uint8_t *, intptr_t, uint8_t *,
+                                             intptr_t, uint8_t *, int ) =
+{
+    NULL,
+    x264_pixel_avg2_w4_lasx,
+    x264_pixel_avg2_w8_lasx,
+    x264_pixel_avg2_w16_lasx,
+    x264_pixel_avg2_w16_lasx,
+    x264_pixel_avg2_w20_lasx,
+};
+
+static void (* const mc_copy_wtab_lasx[5])( uint8_t *, intptr_t, uint8_t *,
+                                            intptr_t, int ) =
+{
+    NULL,
+    x264_mc_copy_w4_lasx,
+    x264_mc_copy_w8_lasx,
+    NULL,
+    x264_mc_copy_w16_lasx,
+};
+
+static uint8_t *get_ref_lsx( uint8_t *p_dst, intptr_t *p_dst_stride,
+                             uint8_t *p_src[4], intptr_t i_src_stride,
+                             int32_t m_vx, int32_t m_vy,
+                             int32_t i_width, int32_t i_height,
+                             const x264_weight_t *pWeight )
+{
+    int32_t i_qpel_idx;
+    int32_t i_offset;
+    uint8_t *p_src1;
+    int32_t r_vy = m_vy & 3;
+    int32_t r_vx = m_vx & 3;
+    int32_t width = i_width >> 2;
+
+    i_qpel_idx = ( r_vy << 2 ) + r_vx;
+    i_offset = ( m_vy >> 2 ) * i_src_stride + ( m_vx >> 2 );
+    p_src1 = p_src[x264_hpel_ref0[i_qpel_idx]] + i_offset +
+           ( 3 == r_vy ) * i_src_stride;
+
+    if( i_qpel_idx & 5 )
+    {
+        uint8_t *p_src2 = p_src[x264_hpel_ref1[i_qpel_idx]] +
+                          i_offset + ( 3 == r_vx );
+        pixel_avg_wtab_lsx[width](
+                p_dst, *p_dst_stride, p_src1, i_src_stride,
+                p_src2, i_height );
+
+        if( pWeight->weightfn )
+        {
+            pWeight->weightfn[width](p_dst, *p_dst_stride, p_dst, *p_dst_stride, pWeight, i_height);
+        }
+        return p_dst;
+    }
+    else if ( pWeight->weightfn )
+    {
+        pWeight->weightfn[width]( p_dst, *p_dst_stride, p_src1, i_src_stride, pWeight, i_height );
+        return p_dst;
+    }
+    else
+    {
+        *p_dst_stride = i_src_stride;
+        return p_src1;
+    }
+}
+
+static void mc_luma_lsx( uint8_t *p_dst, intptr_t i_dst_stride,
+                         uint8_t *p_src[4], intptr_t i_src_stride,
+                         int32_t m_vx, int32_t m_vy,
+                         int32_t i_width, int32_t i_height,
+                         const x264_weight_t *pWeight )
+{
+    int32_t  i_qpel_idx;
+    int32_t  i_offset;
+    uint8_t  *p_src1;
+
+    i_qpel_idx = ( ( m_vy & 3 ) << 2 ) + ( m_vx & 3 );
+    i_offset = ( m_vy >> 2 ) * i_src_stride + ( m_vx >> 2 );
+    p_src1 = p_src[x264_hpel_ref0[i_qpel_idx]] + i_offset +
+             ( 3 == ( m_vy & 3 ) ) * i_src_stride;
+
+    if( i_qpel_idx & 5 )
+    {
+        uint8_t *p_src2 = p_src[x264_hpel_ref1[i_qpel_idx]] +
+                          i_offset + ( 3 == ( m_vx & 3 ) );
+
+        pixel_avg_wtab_lsx[i_width >> 2](
+                p_dst, i_dst_stride, p_src1, i_src_stride,
+                p_src2, i_height );
+
+        if( pWeight->weightfn )
+        {
+            pWeight->weightfn[i_width>>2]( p_dst, i_dst_stride, p_dst, i_dst_stride, pWeight, i_height );
+        }
+    }
+    else if( pWeight->weightfn )
+    {
+        pWeight->weightfn[i_width>>2]( p_dst, i_dst_stride, p_src1, i_src_stride, pWeight, i_height );
+    }
+    else
+    {
+        mc_copy_wtab_lsx[i_width>>2]( p_dst, i_dst_stride, p_src1, i_src_stride, i_height );
+    }
+}
+
+PLANE_INTERLEAVE(lsx)
+PLANE_COPY_YUYV(32, lsx)
+
+#define x264_mc_chroma_lsx x264_template(mc_chroma_lsx)
+void x264_mc_chroma_lsx( uint8_t *p_dst_u, uint8_t *p_dst_v,
+                         intptr_t i_dst_stride,
+                         uint8_t *p_src, intptr_t i_src_stride,
+                         int32_t m_vx, int32_t m_vy,
+                         int32_t i_width, int32_t i_height );
+
+static uint8_t *get_ref_lasx( uint8_t *p_dst, intptr_t *p_dst_stride,
+                              uint8_t *p_src[4], intptr_t i_src_stride,
+                              int32_t m_vx, int32_t m_vy,
+                              int32_t i_width, int32_t i_height,
+                              const x264_weight_t *pWeight )
+{
+    int32_t i_qpel_idx;
+    int32_t i_offset;
+    uint8_t *p_src1;
+    int32_t r_vy = m_vy & 3;
+    int32_t r_vx = m_vx & 3;
+    int32_t width = i_width >> 2;
+
+    i_qpel_idx = ( r_vy << 2 ) + r_vx;
+    i_offset = ( m_vy >> 2 ) * i_src_stride + ( m_vx >> 2 );
+    p_src1 = p_src[x264_hpel_ref0[i_qpel_idx]] + i_offset +
+           ( 3 == r_vy ) * i_src_stride;
+
+    if( i_qpel_idx & 5 )
+    {
+        uint8_t *p_src2 = p_src[x264_hpel_ref1[i_qpel_idx]] +
+                          i_offset + ( 3 == r_vx );
+        pixel_avg_wtab_lasx[width](
+                p_dst, *p_dst_stride, p_src1, i_src_stride,
+                p_src2, i_height );
+
+        if( pWeight->weightfn )
+        {
+            pWeight->weightfn[width](p_dst, *p_dst_stride, p_dst, *p_dst_stride, pWeight, i_height);
+        }
+        return p_dst;
+    }
+    else if ( pWeight->weightfn )
+    {
+        pWeight->weightfn[width]( p_dst, *p_dst_stride, p_src1, i_src_stride, pWeight, i_height );
+        return p_dst;
+    }
+    else
+    {
+        *p_dst_stride = i_src_stride;
+        return p_src1;
+    }
+}
+
+static void mc_luma_lasx( uint8_t *p_dst, intptr_t i_dst_stride,
+                          uint8_t *p_src[4], intptr_t i_src_stride,
+                          int32_t m_vx, int32_t m_vy,
+                          int32_t i_width, int32_t i_height,
+                          const x264_weight_t *pWeight )
+{
+    int32_t  i_qpel_idx;
+    int32_t  i_offset;
+    uint8_t  *p_src1;
+
+    i_qpel_idx = ( ( m_vy & 3 ) << 2 ) + ( m_vx & 3 );
+    i_offset = ( m_vy >> 2 ) * i_src_stride + ( m_vx >> 2 );
+    p_src1 = p_src[x264_hpel_ref0[i_qpel_idx]] + i_offset +
+             ( 3 == ( m_vy & 3 ) ) * i_src_stride;
+
+    if( i_qpel_idx & 5 )
+    {
+        uint8_t *p_src2 = p_src[x264_hpel_ref1[i_qpel_idx]] +
+                          i_offset + ( 3 == ( m_vx & 3 ) );
+
+        pixel_avg_wtab_lasx[i_width >> 2](
+                p_dst, i_dst_stride, p_src1, i_src_stride,
+                p_src2, i_height );
+
+        if( pWeight->weightfn )
+        {
+            pWeight->weightfn[i_width>>2]( p_dst, i_dst_stride, p_dst, i_dst_stride, pWeight, i_height );
+        }
+    }
+    else if( pWeight->weightfn )
+    {
+        pWeight->weightfn[i_width>>2]( p_dst, i_dst_stride, p_src1, i_src_stride, pWeight, i_height );
+    }
+    else
+    {
+        mc_copy_wtab_lasx[i_width>>2]( p_dst, i_dst_stride, p_src1, i_src_stride, i_height );
+    }
+}
+
+PLANE_COPY_YUYV(64, lasx)
+
+#define x264_mc_chroma_lasx x264_template(mc_chroma_lasx)
+void x264_mc_chroma_lasx( uint8_t *p_dst_u, uint8_t *p_dst_v,
+                          intptr_t i_dst_stride,
+                          uint8_t *p_src, intptr_t i_src_stride,
+                          int32_t m_vx, int32_t m_vy,
+                          int32_t i_width, int32_t i_height );
+#endif // !HIGH_BIT_DEPTH
+
+void x264_mc_init_loongarch( int32_t cpu, x264_mc_functions_t *pf  )
+{
+#if !HIGH_BIT_DEPTH
+    if( cpu & X264_CPU_LSX )
+    {
+        pf->mc_luma = mc_luma_lsx;
+        pf->mc_chroma = x264_mc_chroma_lsx;
+        pf->get_ref = get_ref_lsx;
+
+        pf->avg[PIXEL_16x16]= x264_pixel_avg_16x16_lsx;
+        pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_lsx;
+        pf->avg[PIXEL_8x16] = x264_pixel_avg_8x16_lsx;
+        pf->avg[PIXEL_8x8] = x264_pixel_avg_8x8_lsx;
+        pf->avg[PIXEL_8x4] = x264_pixel_avg_8x4_lsx;
+        pf->avg[PIXEL_4x16] = x264_pixel_avg_4x16_lsx;
+        pf->avg[PIXEL_4x8] = x264_pixel_avg_4x8_lsx;
+        pf->avg[PIXEL_4x4] = x264_pixel_avg_4x4_lsx;
+        pf->avg[PIXEL_4x2] = x264_pixel_avg_4x2_lsx;
+
+        pf->weight = mc_weight_wtab_lsx;
+        pf->offsetadd = mc_weight_wtab_lsx;
+        pf->offsetsub = mc_weight_wtab_lsx;
+        pf->weight_cache = weight_cache_lsx;
+
+        pf->copy_16x16_unaligned = x264_mc_copy_w16_lsx;
+        pf->copy[PIXEL_16x16] = x264_mc_copy_w16_lsx;
+        pf->copy[PIXEL_8x8] = x264_mc_copy_w8_lsx;
+        pf->copy[PIXEL_4x4] = x264_mc_copy_w4_lsx;
+
+        pf->store_interleave_chroma = x264_store_interleave_chroma_lsx;
+        pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_lsx;
+        pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_lsx;
+
+        pf->plane_copy_interleave = plane_copy_interleave_lsx;
+        pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_lsx;
+        pf->plane_copy_deinterleave_yuyv = plane_copy_deinterleave_yuyv_lsx;
+
+        pf->hpel_filter = x264_hpel_filter_lsx;
+        pf->memcpy_aligned = x264_memcpy_aligned_lsx;
+        pf->memzero_aligned = x264_memzero_aligned_lsx;
+        pf->frame_init_lowres_core = x264_frame_init_lowres_core_lsx;
+
+        pf->prefetch_fenc_420 = x264_prefetch_fenc_420_lsx;
+        pf->prefetch_fenc_422 = x264_prefetch_fenc_422_lsx;
+        pf->prefetch_ref  = x264_prefetch_ref_lsx;
+    }
+
+    if( cpu & X264_CPU_LASX )
+    {
+        pf->mc_luma = mc_luma_lasx;
+        pf->mc_chroma = x264_mc_chroma_lasx;
+        pf->get_ref = get_ref_lasx;
+
+        pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_lasx;
+        pf->avg[PIXEL_8x16] = x264_pixel_avg_8x16_lasx;
+        pf->avg[PIXEL_8x8] = x264_pixel_avg_8x8_lasx;
+        pf->avg[PIXEL_8x4] = x264_pixel_avg_8x4_lasx;
+        pf->avg[PIXEL_4x16] = x264_pixel_avg_4x16_lasx;
+        pf->avg[PIXEL_4x8] = x264_pixel_avg_4x8_lasx;
+        pf->avg[PIXEL_4x4] = x264_pixel_avg_4x4_lasx;
+        pf->avg[PIXEL_4x2] = x264_pixel_avg_4x2_lasx;
+
+        pf->weight = mc_weight_wtab_lasx;
+        pf->offsetadd = mc_weight_wtab_lasx;
+        pf->offsetsub = mc_weight_wtab_lasx;
+        pf->weight_cache = weight_cache_lasx;
+
+        pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_lasx;
+        pf->plane_copy_deinterleave_yuyv = plane_copy_deinterleave_yuyv_lasx;
+
+        pf->copy_16x16_unaligned = x264_mc_copy_w16_lasx;
+        pf->copy[PIXEL_16x16] = x264_mc_copy_w16_lasx;
+        pf->copy[PIXEL_8x8] = x264_mc_copy_w8_lasx;
+        pf->copy[PIXEL_4x4] = x264_mc_copy_w4_lasx;
+
+        pf->hpel_filter = x264_hpel_filter_lasx;
+        pf->memzero_aligned = x264_memzero_aligned_lasx;
+        pf->frame_init_lowres_core = x264_frame_init_lowres_core_lasx;
+    }
+#endif // !HIGH_BIT_DEPTH
+}
--- a/common/loongarch/mc.h
+++ b/common/loongarch/mc.h
@@ -0,0 +1,196 @@
+/*****************************************************************************
+ * mc.h: loongarch motion compensation
+ *****************************************************************************
+ * Copyright (C) 2023-2025 x264 project
+ *
+ * Authors: Xiwei Gu <guxiwei-hf@loongson.cn>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#ifndef X264_LOONGARCH_MC_H
+#define X264_LOONGARCH_MC_H
+
+#define x264_mc_init_loongarch x264_template(mc_init_loongarch)
+void x264_mc_init_loongarch( int cpu, x264_mc_functions_t *pf );
+
+#define x264_pixel_avg_16x16_lsx x264_template(pixel_avg_16x16_lsx)
+void x264_pixel_avg_16x16_lsx( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int );
+#define x264_pixel_avg_16x8_lsx x264_template(pixel_avg_16x8_lsx)
+void x264_pixel_avg_16x8_lsx( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int );
+#define x264_pixel_avg_8x16_lsx x264_template(pixel_avg_8x16_lsx)
+void x264_pixel_avg_8x16_lsx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_8x8_lsx x264_template(pixel_avg_8x8_lsx)
+void x264_pixel_avg_8x8_lsx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_8x4_lsx x264_template(pixel_avg_8x4_lsx)
+void x264_pixel_avg_8x4_lsx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_4x16_lsx x264_template(pixel_avg_4x16_lsx)
+void x264_pixel_avg_4x16_lsx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_4x8_lsx x264_template(pixel_avg_4x8_lsx)
+void x264_pixel_avg_4x8_lsx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_4x4_lsx x264_template(pixel_avg_4x4_lsx)
+void x264_pixel_avg_4x4_lsx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_4x2_lsx x264_template(pixel_avg_4x2_lsx)
+void x264_pixel_avg_4x2_lsx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+
+#define x264_pixel_avg2_w4_lsx x264_template(pixel_avg2_w4_lsx)
+void x264_pixel_avg2_w4_lsx ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
+#define x264_pixel_avg2_w8_lsx x264_template(pixel_avg2_w8_lsx)
+void x264_pixel_avg2_w8_lsx ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
+#define x264_pixel_avg2_w16_lsx x264_template(pixel_avg2_w16_lsx)
+void x264_pixel_avg2_w16_lsx ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
+#define x264_pixel_avg2_w20_lsx x264_template(pixel_avg2_w20_lsx)
+void x264_pixel_avg2_w20_lsx ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
+
+#define x264_mc_weight_w20_lsx x264_template(mc_weight_w20_lsx)
+void x264_mc_weight_w20_lsx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
+#define x264_mc_weight_w20_noden_lsx x264_template(mc_weight_w20_noden_lsx)
+void x264_mc_weight_w20_noden_lsx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
+#define x264_mc_weight_w16_lsx x264_template(mc_weight_w16_lsx)
+void x264_mc_weight_w16_lsx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
+#define x264_mc_weight_w16_noden_lsx x264_template(mc_weight_w16_noden_lsx)
+void x264_mc_weight_w16_noden_lsx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
+#define x264_mc_weight_w8_lsx x264_template(mc_weight_w8_lsx)
+void x264_mc_weight_w8_lsx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
+#define x264_mc_weight_w8_noden_lsx x264_template(mc_weight_w8_noden_lsx)
+void x264_mc_weight_w8_noden_lsx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
+#define x264_mc_weight_w4_lsx x264_template(mc_weight_w4_lsx)
+void x264_mc_weight_w4_lsx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
+#define x264_mc_weight_w4_noden_lsx x264_template(mc_weight_w4_noden_lsx)
+void x264_mc_weight_w4_noden_lsx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
+
+#define x264_mc_copy_w16_lsx x264_template(mc_copy_w16_lsx)
+void x264_mc_copy_w16_lsx( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_mc_copy_w8_lsx x264_template(mc_copy_w8_lsx)
+void x264_mc_copy_w8_lsx( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_mc_copy_w4_lsx x264_template(mc_copy_w4_lsx)
+void x264_mc_copy_w4_lsx( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+
+#define x264_store_interleave_chroma_lsx x264_template(store_interleave_chroma_lsx)
+void x264_store_interleave_chroma_lsx( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
+#define x264_load_deinterleave_chroma_fenc_lsx x264_template(load_deinterleave_chroma_fenc_lsx)
+void x264_load_deinterleave_chroma_fenc_lsx( pixel *dst, pixel *src, intptr_t i_src, int height );
+#define x264_load_deinterleave_chroma_fdec_lsx x264_template(load_deinterleave_chroma_fdec_lsx)
+void x264_load_deinterleave_chroma_fdec_lsx( pixel *dst, pixel *src, intptr_t i_src, int height );
+
+#define x264_plane_copy_interleave_core_lsx x264_template(plane_copy_interleave_core_lsx)
+void x264_plane_copy_interleave_core_lsx( pixel *dst,  intptr_t i_dst,
+                                          pixel *srcu, intptr_t i_srcu,
+                                          pixel *srcv, intptr_t i_srcv, int w, int h );
+#define x264_plane_copy_deinterleave_lsx x264_template(plane_copy_deinterleave_lsx)
+void x264_plane_copy_deinterleave_lsx( pixel *dstu, intptr_t i_dstu,
+                                       pixel *dstv, intptr_t i_dstv,
+                                       pixel *src,  intptr_t i_src, int w, int h );
+
+#define x264_plane_copy_deinterleave_lasx x264_template(plane_copy_deinterleave_lasx)
+void x264_plane_copy_deinterleave_lasx( pixel *dstu, intptr_t i_dstu,
+                                        pixel *dstv, intptr_t i_dstv,
+                                        pixel *src,  intptr_t i_src, int w, int h );
+
+#define x264_prefetch_fenc_420_lsx x264_template(prefetch_fenc_420_lsx)
+void x264_prefetch_fenc_420_lsx( uint8_t *pix_y, intptr_t stride_y,
+                                 uint8_t *pix_uv, intptr_t stride_uv,
+                                 int32_t mb_x );
+#define x264_prefetch_fenc_422_lsx x264_template(prefetch_fenc_422_lsx)
+void x264_prefetch_fenc_422_lsx( uint8_t *pix_y, intptr_t stride_y,
+                                 uint8_t *pix_uv, intptr_t stride_uv,
+                                 int32_t mb_x );
+#define x264_prefetch_ref_lsx x264_template(prefetch_ref_lsx)
+void x264_prefetch_ref_lsx( uint8_t *pix, intptr_t stride, int32_t parity );
+
+#define x264_memcpy_aligned_lsx x264_template(memcpy_aligned_lsx)
+void *x264_memcpy_aligned_lsx( void *dst, const void *src, size_t n );
+#define x264_memzero_aligned_lsx x264_template(memzero_aligned_lsx)
+void x264_memzero_aligned_lsx( void *p_dst, size_t n );
+
+#define x264_hpel_filter_lsx x264_template(hpel_filter_lsx)
+void x264_hpel_filter_lsx( pixel *, pixel *, pixel *, pixel *, intptr_t, int, int, int16_t * );
+#define x264_frame_init_lowres_core_lsx x264_template(frame_init_lowres_core_lsx)
+void x264_frame_init_lowres_core_lsx( uint8_t *, uint8_t *, uint8_t *, uint8_t *,
+                                      uint8_t *, intptr_t, intptr_t, int, int );
+
+#define x264_pixel_avg_16x8_lasx x264_template(pixel_avg_16x8_lasx)
+void x264_pixel_avg_16x8_lasx( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int );
+#define x264_pixel_avg_8x16_lasx x264_template(pixel_avg_8x16_lasx)
+void x264_pixel_avg_8x16_lasx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_8x8_lasx x264_template(pixel_avg_8x8_lasx)
+void x264_pixel_avg_8x8_lasx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_8x4_lasx x264_template(pixel_avg_8x4_lasx)
+void x264_pixel_avg_8x4_lasx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_4x16_lasx x264_template(pixel_avg_4x16_lasx)
+void x264_pixel_avg_4x16_lasx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_4x8_lasx x264_template(pixel_avg_4x8_lasx)
+void x264_pixel_avg_4x8_lasx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_4x4_lasx x264_template(pixel_avg_4x4_lasx)
+void x264_pixel_avg_4x4_lasx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_4x2_lasx x264_template(pixel_avg_4x2_lasx)
+void x264_pixel_avg_4x2_lasx( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+
+#define x264_pixel_avg2_w4_lasx x264_template(pixel_avg2_w4_lasx)
+void x264_pixel_avg2_w4_lasx ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
+#define x264_pixel_avg2_w8_lasx x264_template(pixel_avg2_w8_lasx)
+void x264_pixel_avg2_w8_lasx ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
+#define x264_pixel_avg2_w16_lasx x264_template(pixel_avg2_w16_lasx)
+void x264_pixel_avg2_w16_lasx ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
+#define x264_pixel_avg2_w20_lasx x264_template(pixel_avg2_w20_lasx)
+void x264_pixel_avg2_w20_lasx ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
+
+#define x264_mc_weight_w20_lasx x264_template(mc_weight_w20_lasx)
+void x264_mc_weight_w20_lasx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
+#define x264_mc_weight_w20_noden_lasx x264_template(mc_weight_w20_noden_lasx)
+void x264_mc_weight_w20_noden_lasx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
+#define x264_mc_weight_w16_lasx x264_template(mc_weight_w16_lasx)
+void x264_mc_weight_w16_lasx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
+#define x264_mc_weight_w16_noden_lasx x264_template(mc_weight_w16_noden_lasx)
+void x264_mc_weight_w16_noden_lasx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
+#define x264_mc_weight_w8_lasx x264_template(mc_weight_w8_lasx)
+void x264_mc_weight_w8_lasx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
+#define x264_mc_weight_w8_noden_lasx x264_template(mc_weight_w8_noden_lasx)
+void x264_mc_weight_w8_noden_lasx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
+#define x264_mc_weight_w4_lasx x264_template(mc_weight_w4_lasx)
+void x264_mc_weight_w4_lasx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
+#define x264_mc_weight_w4_noden_lasx x264_template(mc_weight_w4_noden_lasx)
+void x264_mc_weight_w4_noden_lasx( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
+
+#define x264_mc_copy_w16_lasx x264_template(mc_copy_w16_lasx)
+void x264_mc_copy_w16_lasx( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_mc_copy_w8_lasx x264_template(mc_copy_w8_lasx)
+void x264_mc_copy_w8_lasx( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_mc_copy_w4_lasx x264_template(mc_copy_w4_lasx)
+void x264_mc_copy_w4_lasx( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+
+#define x264_plane_copy_interleave_core_lasx x264_template(plane_copy_interleave_core_lasx)
+void x264_plane_copy_interleave_core_lasx( pixel *dst,  intptr_t i_dst,
+                                           pixel *srcu, intptr_t i_srcu,
+                                           pixel *srcv, intptr_t i_srcv, int w, int h );
+
+#define x264_plane_copy_deinterleave_lasx x264_template(plane_copy_deinterleave_lasx)
+void x264_plane_copy_deinterleave_lasx( pixel *dstu, intptr_t i_dstu,
+                                        pixel *dstv, intptr_t i_dstv,
+                                        pixel *src,  intptr_t i_src, int w, int h );
+
+#define x264_memzero_aligned_lasx x264_template(memzero_aligned_lasx)
+void x264_memzero_aligned_lasx( void *p_dst, size_t n );
+
+#define x264_hpel_filter_lasx x264_template(hpel_filter_lasx)
+void x264_hpel_filter_lasx( pixel *, pixel *, pixel *, pixel *, intptr_t, int, int, int16_t * );
+#define x264_frame_init_lowres_core_lasx x264_template(frame_init_lowres_core_lasx)
+void x264_frame_init_lowres_core_lasx( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *,
+                                       intptr_t, intptr_t, int, int );
+
+#endif
--- a/common/loongarch/pixel-a.S
+++ b/common/loongarch/pixel-a.S
--- a/common/loongarch/pixel-c.c
+++ b/common/loongarch/pixel-c.c
@@ -0,0 +1,259 @@
+/*****************************************************************************
+ * pixel-c.c: loongarch pixel metrics
+ *****************************************************************************
+ * Copyright (C) 2023-2025 x264 project
+ *
+ * Authors: Hecai Yuan <yuanhecai@loongson.cn>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#include "common/common.h"
+#include "pixel.h"
+#include "predict.h"
+
+#if !HIGH_BIT_DEPTH
+
+uint64_t x264_pixel_hadamard_ac_8x8_lsx( uint8_t *p_pix, intptr_t i_stride )
+{
+    uint64_t u_sum;
+
+    u_sum = x264_hadamard_ac_8x8_lsx( p_pix, i_stride );
+
+    return ( ( u_sum >> 34 ) << 32 ) + ( ( uint32_t ) u_sum >> 1 );
+}
+
+uint64_t x264_pixel_hadamard_ac_8x16_lsx( uint8_t *p_pix, intptr_t i_stride )
+{
+    uint64_t u_sum;
+
+    u_sum = x264_hadamard_ac_8x8_lsx( p_pix, i_stride );
+    u_sum += x264_hadamard_ac_8x8_lsx( p_pix + 8 * i_stride, i_stride );
+
+    return ( ( u_sum >> 34 ) << 32 ) + ( ( uint32_t ) u_sum >> 1 );
+}
+
+uint64_t x264_pixel_hadamard_ac_16x8_lsx( uint8_t *p_pix, intptr_t i_stride )
+{
+    uint64_t u_sum;
+
+    u_sum = x264_hadamard_ac_8x8_lsx( p_pix, i_stride );
+    u_sum += x264_hadamard_ac_8x8_lsx( p_pix + 8, i_stride );
+
+    return ( ( u_sum >> 34 ) << 32 ) + ( ( uint32_t ) u_sum >> 1 );
+}
+
+uint64_t x264_pixel_hadamard_ac_16x16_lsx( uint8_t *p_pix, intptr_t i_stride )
+{
+    uint64_t u_sum;
+
+    u_sum = x264_hadamard_ac_8x8_lsx( p_pix, i_stride );
+    u_sum += x264_hadamard_ac_8x8_lsx( p_pix + 8, i_stride );
+    u_sum += x264_hadamard_ac_8x8_lsx( p_pix + 8 * i_stride, i_stride );
+    u_sum += x264_hadamard_ac_8x8_lsx( p_pix + 8 * i_stride + 8, i_stride );
+
+    return ( ( u_sum >> 34 ) << 32 ) + ( ( uint32_t ) u_sum >> 1 );
+}
+
+uint64_t x264_pixel_hadamard_ac_8x8_lasx( uint8_t *p_pix, intptr_t i_stride )
+{
+    uint64_t u_sum;
+
+    u_sum = x264_hadamard_ac_8x8_lasx( p_pix, i_stride );
+
+    return ( ( u_sum >> 34 ) << 32 ) + ( ( uint32_t ) u_sum >> 1 );
+}
+
+uint64_t x264_pixel_hadamard_ac_8x16_lasx( uint8_t *p_pix, intptr_t i_stride )
+{
+    uint64_t u_sum;
+
+    u_sum = x264_hadamard_ac_8x8_lasx( p_pix, i_stride );
+    u_sum += x264_hadamard_ac_8x8_lasx( p_pix + ( i_stride << 3 ), i_stride );
+
+    return ( ( u_sum >> 34 ) << 32 ) + ( ( uint32_t ) u_sum >> 1 );
+}
+
+void x264_intra_sa8d_x3_8x8_lsx( uint8_t *p_enc, uint8_t p_edge[36],
+                                 int32_t p_sad_array[3] )
+{
+    ALIGNED_ARRAY_16( uint8_t, pix, [8 * FDEC_STRIDE] );
+
+    x264_predict_8x8_v_lsx( pix, p_edge );
+    p_sad_array[0] = x264_pixel_sa8d_8x8_lsx( pix, FDEC_STRIDE,
+                                              p_enc, FENC_STRIDE );
+
+    x264_predict_8x8_h_lsx( pix, p_edge );
+    p_sad_array[1] = x264_pixel_sa8d_8x8_lsx( pix, FDEC_STRIDE,
+                                              p_enc, FENC_STRIDE );
+
+    x264_predict_8x8_dc_lsx( pix, p_edge );
+    p_sad_array[2] = x264_pixel_sa8d_8x8_lsx( pix, FDEC_STRIDE,
+                                              p_enc, FENC_STRIDE );
+}
+
+void x264_intra_sa8d_x3_8x8_lasx( uint8_t *p_enc, uint8_t p_edge[36],
+                                  int32_t p_sad_array[3] )
+{
+    ALIGNED_ARRAY_16( uint8_t, pix, [8 * FDEC_STRIDE] );
+
+    x264_predict_8x8_v_lsx( pix, p_edge );
+    p_sad_array[0] = x264_pixel_sa8d_8x8_lasx( pix, FDEC_STRIDE,
+                                               p_enc, FENC_STRIDE );
+
+    x264_predict_8x8_h_lasx( pix, p_edge );
+    p_sad_array[1] = x264_pixel_sa8d_8x8_lasx( pix, FDEC_STRIDE,
+                                               p_enc, FENC_STRIDE );
+
+    x264_predict_8x8_dc_lsx( pix, p_edge );
+    p_sad_array[2] = x264_pixel_sa8d_8x8_lasx( pix, FDEC_STRIDE,
+                                               p_enc, FENC_STRIDE );
+}
+
+void x264_intra_satd_x3_4x4_lsx( uint8_t *p_enc, uint8_t *p_dec,
+                                 int32_t p_sad_array[3] )
+{
+    x264_predict_4x4_v_lsx( p_dec );
+    p_sad_array[0] = x264_pixel_satd_4x4_lsx( p_dec, FDEC_STRIDE,
+                                              p_enc, FENC_STRIDE );
+
+    x264_predict_4x4_h_lsx( p_dec );
+    p_sad_array[1] = x264_pixel_satd_4x4_lsx( p_dec, FDEC_STRIDE,
+                                              p_enc, FENC_STRIDE );
+
+    x264_predict_4x4_dc_lsx( p_dec );
+    p_sad_array[2] = x264_pixel_satd_4x4_lsx( p_dec, FDEC_STRIDE,
+                                              p_enc, FENC_STRIDE );
+}
+
+void x264_intra_satd_x3_16x16_lsx( uint8_t *p_enc, uint8_t *p_dec,
+                                   int32_t p_sad_array[3] )
+{
+    x264_predict_16x16_v_lsx( p_dec );
+    p_sad_array[0] = x264_pixel_satd_16x16_lsx( p_dec, FDEC_STRIDE,
+                                                p_enc, FENC_STRIDE );
+
+    x264_predict_16x16_h_lsx( p_dec );
+    p_sad_array[1] = x264_pixel_satd_16x16_lsx( p_dec, FDEC_STRIDE,
+                                                p_enc, FENC_STRIDE );
+
+    x264_predict_16x16_dc_lsx( p_dec );
+    p_sad_array[2] = x264_pixel_satd_16x16_lsx( p_dec, FDEC_STRIDE,
+                                                p_enc, FENC_STRIDE );
+}
+
+void x264_intra_satd_x3_16x16_lasx( uint8_t *p_enc, uint8_t *p_dec,
+                                    int32_t p_sad_array[3] )
+{
+    x264_predict_16x16_v_lsx( p_dec );
+    p_sad_array[0] = x264_pixel_satd_16x16_lasx( p_dec, FDEC_STRIDE,
+                                                 p_enc, FENC_STRIDE );
+
+    x264_predict_16x16_h_lsx( p_dec );
+    p_sad_array[1] = x264_pixel_satd_16x16_lasx( p_dec, FDEC_STRIDE,
+                                                 p_enc, FENC_STRIDE );
+
+    x264_predict_16x16_dc_lsx( p_dec );
+    p_sad_array[2] = x264_pixel_satd_16x16_lasx( p_dec, FDEC_STRIDE,
+                                                 p_enc, FENC_STRIDE );
+}
+
+void x264_intra_satd_x3_8x8c_lsx( uint8_t *p_enc, uint8_t *p_dec,
+                                  int32_t p_sad_array[3] )
+{
+    x264_predict_8x8c_dc_lsx( p_dec );
+    p_sad_array[0] = x264_pixel_satd_8x8_lsx( p_dec, FDEC_STRIDE,
+                                              p_enc, FENC_STRIDE );
+
+    x264_predict_8x8c_h_lsx( p_dec );
+    p_sad_array[1] = x264_pixel_satd_8x8_lsx( p_dec, FDEC_STRIDE,
+                                              p_enc, FENC_STRIDE );
+
+    x264_predict_8x8c_v_lsx( p_dec );
+    p_sad_array[2] = x264_pixel_satd_8x8_lsx( p_dec, FDEC_STRIDE,
+                                              p_enc, FENC_STRIDE );
+}
+
+void x264_intra_sad_x3_4x4_lsx( uint8_t *p_enc, uint8_t *p_dec,
+                                int32_t p_sad_array[3] )
+{
+    x264_predict_4x4_v_lsx( p_dec );
+    p_sad_array[0] = x264_pixel_sad_4x4_lsx( p_dec, FDEC_STRIDE,
+                                             p_enc, FENC_STRIDE );
+
+    x264_predict_4x4_h_lsx( p_dec );
+    p_sad_array[1] = x264_pixel_sad_4x4_lsx( p_dec, FDEC_STRIDE,
+                                             p_enc, FENC_STRIDE );
+
+    x264_predict_4x4_dc_lsx( p_dec );
+    p_sad_array[2] = x264_pixel_sad_4x4_lsx( p_dec, FDEC_STRIDE,
+                                             p_enc, FENC_STRIDE );
+}
+
+void x264_intra_sad_x3_16x16_lsx( uint8_t *p_enc, uint8_t *p_dec,
+                                  int32_t p_sad_array[3] )
+{
+    x264_predict_16x16_v_lsx( p_dec );
+    p_sad_array[0] = x264_pixel_sad_16x16_lsx( p_dec, FDEC_STRIDE,
+                                               p_enc, FENC_STRIDE );
+
+    x264_predict_16x16_h_lsx( p_dec );
+    p_sad_array[1] = x264_pixel_sad_16x16_lsx( p_dec, FDEC_STRIDE,
+                                               p_enc, FENC_STRIDE );
+
+    x264_predict_16x16_dc_lsx( p_dec );
+    p_sad_array[2] = x264_pixel_sad_16x16_lsx( p_dec, FDEC_STRIDE,
+                                               p_enc, FENC_STRIDE );
+}
+
+void x264_intra_sad_x3_8x8_lsx( uint8_t *p_enc, uint8_t p_edge[36],
+                                int32_t p_sad_array[3] )
+{
+    ALIGNED_ARRAY_16( uint8_t, pix, [8 * FDEC_STRIDE] );
+
+    x264_predict_8x8_v_lsx( pix, p_edge );
+    p_sad_array[0] = x264_pixel_sad_8x8_lsx( pix, FDEC_STRIDE,
+                                             p_enc, FENC_STRIDE );
+
+    x264_predict_8x8_h_lsx( pix, p_edge );
+    p_sad_array[1] = x264_pixel_sad_8x8_lsx( pix, FDEC_STRIDE,
+                                             p_enc, FENC_STRIDE );
+
+    x264_predict_8x8_dc_lsx( pix, p_edge );
+    p_sad_array[2] = x264_pixel_sad_8x8_lsx( pix, FDEC_STRIDE,
+                                             p_enc, FENC_STRIDE );
+}
+
+void x264_intra_sad_x3_8x8c_lsx( uint8_t *p_enc, uint8_t *p_dec,
+                                 int32_t p_sad_array[3] )
+{
+    x264_predict_8x8c_dc_lsx( p_dec );
+    p_sad_array[0] = x264_pixel_sad_8x8_lsx( p_dec, FDEC_STRIDE,
+                                             p_enc, FENC_STRIDE );
+
+    x264_predict_8x8c_h_lsx( p_dec );
+    p_sad_array[1] = x264_pixel_sad_8x8_lsx( p_dec, FDEC_STRIDE,
+                                             p_enc, FENC_STRIDE );
+
+    x264_predict_8x8c_v_lsx( p_dec );
+    p_sad_array[2] = x264_pixel_sad_8x8_lsx( p_dec, FDEC_STRIDE,
+                                             p_enc, FENC_STRIDE );
+}
+
+
+#endif
--- a/common/loongarch/pixel.h
+++ b/common/loongarch/pixel.h
@@ -0,0 +1,335 @@
+/*****************************************************************************
+ * pixel.h: loongarch pixel metrics
+ *****************************************************************************
+ * Copyright (C) 2023-2025 x264 project
+ *
+ * Authors: Lu Wang <wanglu@loongson.cn>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#ifndef X264_LOONGARCH_PIXEL_H
+#define X264_LOONGARCH_PIXEL_H
+
+#define x264_pixel_satd_4x4_lsx x264_template(pixel_satd_4x4_lsx)
+int32_t x264_pixel_satd_4x4_lsx( uint8_t *p_pix1, intptr_t i_stride,
+                                 uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_satd_4x8_lsx x264_template(pixel_satd_4x8_lsx)
+int32_t x264_pixel_satd_4x8_lsx( uint8_t *p_pix1, intptr_t i_stride,
+                                 uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_satd_4x16_lsx x264_template(pixel_satd_4x16_lsx)
+int32_t x264_pixel_satd_4x16_lsx( uint8_t *p_pix1, intptr_t i_stride,
+                                  uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_satd_8x4_lsx x264_template(pixel_satd_8x4_lsx)
+int32_t x264_pixel_satd_8x4_lsx( uint8_t *p_pix1, intptr_t i_stride,
+                                 uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_satd_8x8_lsx x264_template(pixel_satd_8x8_lsx)
+int32_t x264_pixel_satd_8x8_lsx( uint8_t *p_pix1, intptr_t i_stride,
+                                 uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_satd_8x16_lsx x264_template(pixel_satd_8x16_lsx)
+int32_t x264_pixel_satd_8x16_lsx( uint8_t *p_pix1, intptr_t i_stride,
+                                  uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_satd_16x8_lsx x264_template(pixel_satd_16x8_lsx)
+int32_t x264_pixel_satd_16x8_lsx( uint8_t *p_pix1, intptr_t i_stride,
+                                  uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_satd_16x16_lsx x264_template(pixel_satd_16x16_lsx)
+int32_t x264_pixel_satd_16x16_lsx( uint8_t *p_pix1, intptr_t i_stride,
+                                   uint8_t *p_pix2, intptr_t i_stride2 );
+
+#define x264_pixel_satd_4x8_lasx x264_template(pixel_satd_4x8_lasx)
+int32_t x264_pixel_satd_4x8_lasx( uint8_t *p_pix1, intptr_t i_stride,
+                                  uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_satd_4x16_lasx x264_template(pixel_satd_4x16_lasx)
+int32_t x264_pixel_satd_4x16_lasx( uint8_t *p_pix1, intptr_t i_stride,
+                                   uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_satd_8x4_lasx x264_template(pixel_satd_8x4_lasx)
+int32_t x264_pixel_satd_8x4_lasx( uint8_t *p_pix1, intptr_t i_stride,
+                                  uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_satd_8x8_lasx x264_template(pixel_satd_8x8_lasx)
+int32_t x264_pixel_satd_8x8_lasx( uint8_t *p_pix1, intptr_t i_stride,
+                                  uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_satd_8x16_lasx x264_template(pixel_satd_8x16_lasx)
+int32_t x264_pixel_satd_8x16_lasx( uint8_t *p_pix1, intptr_t i_stride,
+                                   uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_satd_16x8_lasx x264_template(pixel_satd_16x8_lasx)
+int32_t x264_pixel_satd_16x8_lasx( uint8_t *p_pix1, intptr_t i_stride,
+                                   uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_satd_16x16_lasx x264_template(pixel_satd_16x16_lasx)
+int32_t x264_pixel_satd_16x16_lasx( uint8_t *p_pix1, intptr_t i_stride,
+                                    uint8_t *p_pix2, intptr_t i_stride2 );
+
+#define x264_pixel_sad_x4_16x16_lsx x264_template(pixel_sad_x4_16x16_lsx)
+void x264_pixel_sad_x4_16x16_lsx( uint8_t *p_src, uint8_t *p_ref0,
+                                  uint8_t *p_ref1, uint8_t *p_ref2,
+                                  uint8_t *p_ref3, intptr_t i_ref_stride,
+                                  int32_t p_sad_array[4] );
+#define x264_pixel_sad_x4_16x8_lsx x264_template(pixel_sad_x4_16x8_lsx)
+void x264_pixel_sad_x4_16x8_lsx( uint8_t *p_src, uint8_t *p_ref0,
+                                 uint8_t *p_ref1, uint8_t *p_ref2,
+                                 uint8_t *p_ref3, intptr_t i_ref_stride,
+                                 int32_t p_sad_array[4] );
+#define x264_pixel_sad_x4_8x16_lsx x264_template(pixel_sad_x4_8x16_lsx)
+void x264_pixel_sad_x4_8x16_lsx( uint8_t *p_src, uint8_t *p_ref0,
+                                 uint8_t *p_ref1, uint8_t *p_ref2,
+                                 uint8_t *p_ref3, intptr_t i_ref_stride,
+                                 int32_t p_sad_array[4] );
+#define x264_pixel_sad_x4_8x8_lsx x264_template(pixel_sad_x4_8x8_lsx)
+void x264_pixel_sad_x4_8x8_lsx( uint8_t *p_src, uint8_t *p_ref0,
+                                uint8_t *p_ref1, uint8_t *p_ref2,
+                                uint8_t *p_ref3, intptr_t i_ref_stride,
+                                int32_t p_sad_array[4] );
+#define x264_pixel_sad_x4_8x4_lsx x264_template(pixel_sad_x4_8x4_lsx)
+void x264_pixel_sad_x4_8x4_lsx( uint8_t *p_src, uint8_t *p_ref0,
+                                uint8_t *p_ref1, uint8_t *p_ref2,
+                                uint8_t *p_ref3, intptr_t i_ref_stride,
+                                int32_t p_sad_array[4] );
+#define x264_pixel_sad_x4_4x8_lsx x264_template(pixel_sad_x4_4x8_lsx)
+void x264_pixel_sad_x4_4x8_lsx( uint8_t *p_src, uint8_t *p_ref0,
+                                uint8_t *p_ref1, uint8_t *p_ref2,
+                                uint8_t *p_ref3, intptr_t i_ref_stride,
+                                int32_t p_sad_array[4] );
+
+#define x264_pixel_sad_x4_16x16_lasx x264_template(pixel_sad_x4_16x16_lasx)
+void x264_pixel_sad_x4_16x16_lasx( uint8_t *p_src, uint8_t *p_ref0,
+                                   uint8_t *p_ref1, uint8_t *p_ref2,
+                                   uint8_t *p_ref3, intptr_t i_ref_stride,
+                                   int32_t p_sad_array[4] );
+#define x264_pixel_sad_x4_16x8_lasx x264_template(pixel_sad_x4_16x8_lasx)
+void x264_pixel_sad_x4_16x8_lasx( uint8_t *p_src, uint8_t *p_ref0,
+                                  uint8_t *p_ref1, uint8_t *p_ref2,
+                                  uint8_t *p_ref3, intptr_t i_ref_stride,
+                                  int32_t p_sad_array[4] );
+#define x264_pixel_sad_x4_8x8_lasx x264_template(pixel_sad_x4_8x8_lasx)
+void x264_pixel_sad_x4_8x8_lasx( uint8_t *p_src, uint8_t *p_ref0,
+                                 uint8_t *p_ref1, uint8_t *p_ref2,
+                                 uint8_t *p_ref3, intptr_t i_ref_stride,
+                                 int32_t p_sad_array[4] );
+#define x264_pixel_sad_x4_8x4_lasx x264_template(pixel_sad_x4_8x4_lasx)
+void x264_pixel_sad_x4_8x4_lasx( uint8_t *p_src, uint8_t *p_ref0,
+                                 uint8_t *p_ref1, uint8_t *p_ref2,
+                                 uint8_t *p_ref3, intptr_t i_ref_stride,
+                                 int32_t p_sad_array[4] );
+#define x264_pixel_sad_x4_4x4_lsx x264_template(pixel_sad_x4_4x4_lsx)
+void x264_pixel_sad_x4_4x4_lsx( uint8_t *p_src, uint8_t *p_ref0,
+                                uint8_t *p_ref1, uint8_t *p_ref2,
+                                uint8_t *p_ref3, intptr_t i_ref_stride,
+                                int32_t p_sad_array[4] );
+
+#define x264_pixel_sad_x3_16x16_lsx x264_template(pixel_sad_x3_16x16_lsx)
+void x264_pixel_sad_x3_16x16_lsx( uint8_t *p_src, uint8_t *p_ref0,
+                                  uint8_t *p_ref1, uint8_t *p_ref2,
+                                  intptr_t i_ref_stride,
+                                  int32_t p_sad_array[3] );
+#define x264_pixel_sad_x3_16x8_lsx x264_template(pixel_sad_x3_16x8_lsx)
+void x264_pixel_sad_x3_16x8_lsx( uint8_t *p_src, uint8_t *p_ref0,
+                                 uint8_t *p_ref1, uint8_t *p_ref2,
+                                 intptr_t i_ref_stride,
+                                 int32_t p_sad_array[3] );
+#define x264_pixel_sad_x3_8x16_lsx x264_template(pixel_sad_x3_8x16_lsx)
+void x264_pixel_sad_x3_8x16_lsx( uint8_t *p_src, uint8_t *p_ref0,
+                                 uint8_t *p_ref1, uint8_t *p_ref2,
+                                 intptr_t i_ref_stride,
+                                 int32_t p_sad_array[3] );
+#define x264_pixel_sad_x3_8x8_lsx x264_template(pixel_sad_x3_8x8_lsx)
+void x264_pixel_sad_x3_8x8_lsx( uint8_t *p_src, uint8_t *p_ref0,
+                                uint8_t *p_ref1, uint8_t *p_ref2,
+                                intptr_t i_ref_stride,
+                                int32_t p_sad_array[3] );
+#define x264_pixel_sad_x3_8x4_lsx x264_template(pixel_sad_x3_8x4_lsx)
+void x264_pixel_sad_x3_8x4_lsx( uint8_t *p_src, uint8_t *p_ref0,
+                                uint8_t *p_ref1, uint8_t *p_ref2,
+                                intptr_t i_ref_stride,
+                                int32_t p_sad_array[3] );
+#define x264_pixel_sad_x3_4x4_lsx x264_template(pixel_sad_x3_4x4_lsx)
+void x264_pixel_sad_x3_4x4_lsx( uint8_t *p_src, uint8_t *p_ref0,
+                                uint8_t *p_ref1, uint8_t *p_ref2,
+                                intptr_t i_ref_stride,
+                                int32_t p_sad_array[3] );
+#define x264_pixel_sad_x3_4x8_lsx x264_template(pixel_sad_x3_4x8_lsx)
+void x264_pixel_sad_x3_4x8_lsx( uint8_t *p_src, uint8_t *p_ref0,
+                                uint8_t *p_ref1, uint8_t *p_ref2,
+                                intptr_t i_ref_stride,
+                                int32_t p_sad_array[3] );
+
+#define x264_pixel_sad_x3_16x16_lasx x264_template(pixel_sad_x3_16x16_lasx)
+void x264_pixel_sad_x3_16x16_lasx( uint8_t *p_src, uint8_t *p_ref0,
+                                  uint8_t *p_ref1, uint8_t *p_ref2,
+                                  intptr_t i_ref_stride,
+                                  int32_t p_sad_array[3] );
+#define x264_pixel_sad_x3_16x8_lasx x264_template(pixel_sad_x3_16x8_lasx)
+void x264_pixel_sad_x3_16x8_lasx( uint8_t *p_src, uint8_t *p_ref0,
+                                  uint8_t *p_ref1, uint8_t *p_ref2,
+                                  intptr_t i_ref_stride,
+                                  int32_t p_sad_array[3] );
+
+#define x264_pixel_sad_16x16_lsx x264_template(pixel_sad_16x16_lsx)
+int32_t x264_pixel_sad_16x16_lsx( uint8_t *p_src, intptr_t i_src_stride,
+                                  uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_sad_16x8_lsx x264_template(pixel_sad_16x8_lsx)
+int32_t x264_pixel_sad_16x8_lsx( uint8_t *p_src, intptr_t i_src_stride,
+                                 uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_sad_8x16_lsx x264_template(pixel_sad_8x16_lsx)
+int32_t x264_pixel_sad_8x16_lsx( uint8_t *p_src, intptr_t i_src_stride,
+                                 uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_sad_8x8_lsx x264_template(pixel_sad_8x8_lsx)
+int32_t x264_pixel_sad_8x8_lsx( uint8_t *p_src, intptr_t i_src_stride,
+                                uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_sad_8x4_lsx x264_template(pixel_sad_8x4_lsx)
+int32_t x264_pixel_sad_8x4_lsx( uint8_t *p_src, intptr_t i_src_stride,
+                                uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_sad_4x16_lsx x264_template(pixel_sad_4x16_lsx)
+int32_t x264_pixel_sad_4x16_lsx( uint8_t *p_src, intptr_t i_src_stride,
+                                 uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_sad_4x8_lsx x264_template(pixel_sad_4x8_lsx)
+int32_t x264_pixel_sad_4x8_lsx( uint8_t *p_src, intptr_t i_src_stride,
+                                uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_sad_4x4_lsx x264_template(pixel_sad_4x4_lsx)
+int32_t x264_pixel_sad_4x4_lsx( uint8_t *p_src, intptr_t i_src_stride,
+                                uint8_t *p_ref, intptr_t i_ref_stride );
+
+#define x264_pixel_sad_8x4_lasx x264_template(pixel_sad_8x4_lasx)
+int32_t x264_pixel_sad_8x4_lasx( uint8_t *p_src, intptr_t i_src_stride,
+                                 uint8_t *p_ref, intptr_t i_ref_stride );
+
+#define x264_hadamard_ac_8x8_lsx x264_template(hadamard_ac_8x8_lsx)
+uint64_t x264_hadamard_ac_8x8_lsx( uint8_t *p_pix, intptr_t i_stride );
+#define x264_pixel_hadamard_ac_8x8_lsx x264_template(pixel_hadamard_ac_8x8_lsx)
+uint64_t x264_pixel_hadamard_ac_8x8_lsx( uint8_t *p_pix, intptr_t i_stride );
+#define x264_pixel_hadamard_ac_8x16_lsx x264_template(pixel_hadamard_ac_8x16_lsx)
+uint64_t x264_pixel_hadamard_ac_8x16_lsx( uint8_t *p_pix, intptr_t i_stride );
+#define x264_pixel_hadamard_ac_16x8_lsx x264_template(pixel_hadamard_ac_16x8_lsx)
+uint64_t x264_pixel_hadamard_ac_16x8_lsx( uint8_t *p_pix, intptr_t i_stride );
+#define x264_pixel_hadamard_ac_16x16_lsx x264_template(pixel_hadamard_ac_16x16_lsx)
+uint64_t x264_pixel_hadamard_ac_16x16_lsx( uint8_t *p_pix, intptr_t i_stride );
+
+#define x264_hadamard_ac_8x8_lasx x264_template(hadamard_ac_8x8_lasx)
+uint64_t x264_hadamard_ac_8x8_lasx( uint8_t *p_pix, intptr_t i_stride );
+#define x264_pixel_hadamard_ac_8x8_lasx x264_template(pixel_hadamard_ac_8x8_lasx)
+uint64_t x264_pixel_hadamard_ac_8x8_lasx( uint8_t *p_pix, intptr_t i_stride );
+#define x264_pixel_hadamard_ac_8x16_lasx x264_template(pixel_hadamard_ac_8x16_lasx)
+uint64_t x264_pixel_hadamard_ac_8x16_lasx( uint8_t *p_pix, intptr_t i_stride );
+#define x264_pixel_hadamard_ac_16x8_lasx x264_template(pixel_hadamard_ac_16x8_lasx)
+uint64_t x264_pixel_hadamard_ac_16x8_lasx( uint8_t *p_pix, intptr_t i_stride );
+#define x264_pixel_hadamard_ac_16x16_lasx x264_template(pixel_hadamard_ac_16x16_lasx)
+uint64_t x264_pixel_hadamard_ac_16x16_lasx( uint8_t *p_pix, intptr_t i_stride );
+
+#define x264_intra_satd_x3_16x16_lsx x264_template(intra_satd_x3_16x16_lsx)
+void x264_intra_satd_x3_16x16_lsx( uint8_t *p_enc, uint8_t *p_dec,
+                                   int32_t p_sad_array[3] );
+#define x264_intra_satd_x3_8x8c_lsx x264_template(intra_satd_x3_8x8c_lsx)
+void x264_intra_satd_x3_8x8c_lsx( uint8_t *p_enc, uint8_t *p_dec,
+                                  int32_t p_sad_array[3] );
+#define x264_intra_satd_x3_4x4_lsx x264_template(intra_satd_x3_4x4_lsx)
+void x264_intra_satd_x3_4x4_lsx( uint8_t *p_enc, uint8_t *p_dec,
+                                 int32_t p_sad_array[3] );
+#define x264_intra_satd_x3_16x16_lasx x264_template(intra_satd_x3_16x16_lasx)
+void x264_intra_satd_x3_16x16_lasx( uint8_t *p_enc, uint8_t *p_dec,
+                                    int32_t p_sad_array[3] );
+
+#define x264_pixel_ssd_16x16_lsx x264_template(pixel_ssd_16x16_lsx)
+int32_t x264_pixel_ssd_16x16_lsx( uint8_t *p_src, intptr_t i_src_stride,
+                                  uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_ssd_16x8_lsx x264_template(pixel_ssd_16x8_lsx)
+int32_t x264_pixel_ssd_16x8_lsx( uint8_t *p_src, intptr_t i_src_stride,
+                                 uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_ssd_8x16_lsx x264_template(pixel_ssd_8x16_lsx)
+int32_t x264_pixel_ssd_8x16_lsx( uint8_t *p_src, intptr_t i_src_stride,
+                                 uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_ssd_8x8_lsx x264_template(pixel_ssd_8x8_lsx)
+int32_t x264_pixel_ssd_8x8_lsx( uint8_t *p_src, intptr_t i_src_stride,
+                                uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_ssd_8x4_lsx x264_template(pixel_ssd_8x4_lsx)
+int32_t x264_pixel_ssd_8x4_lsx( uint8_t *p_src, intptr_t i_src_stride,
+                                uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_ssd_4x16_lsx x264_template(pixel_ssd_4x16_lsx)
+int32_t x264_pixel_ssd_4x16_lsx( uint8_t *p_src, intptr_t i_src_stride,
+                                 uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_ssd_4x8_lsx x264_template(pixel_ssd_4x8_lsx)
+int32_t x264_pixel_ssd_4x8_lsx( uint8_t *p_src, intptr_t i_src_stride,
+                                uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_ssd_4x4_lsx x264_template(pixel_ssd_4x4_lsx)
+int32_t x264_pixel_ssd_4x4_lsx( uint8_t *p_src, intptr_t i_src_stride,
+                                uint8_t *p_ref, intptr_t i_ref_stride );
+
+#define x264_pixel_ssd_16x16_lasx x264_template(pixel_ssd_16x16_lasx)
+int32_t x264_pixel_ssd_16x16_lasx( uint8_t *p_src, intptr_t i_src_stride,
+                                   uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_ssd_16x8_lasx x264_template(pixel_ssd_16x8_lasx)
+int32_t x264_pixel_ssd_16x8_lasx( uint8_t *p_src, intptr_t i_src_stride,
+                                  uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_ssd_8x16_lasx x264_template(pixel_ssd_8x16_lasx)
+int32_t x264_pixel_ssd_8x16_lasx( uint8_t *p_src, intptr_t i_src_stride,
+                                  uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_ssd_8x8_lasx x264_template(pixel_ssd_8x8_lasx)
+int32_t x264_pixel_ssd_8x8_lasx( uint8_t *p_src, intptr_t i_src_stride,
+                                 uint8_t *p_ref, intptr_t i_ref_stride );
+
+#define x264_pixel_var2_8x16_lsx x264_template(pixel_var2_8x16_lsx)
+int32_t x264_pixel_var2_8x16_lsx( uint8_t *p_pix1, uint8_t *p_pix2,
+                                   int32_t ssd[2] );
+#define x264_pixel_var2_8x8_lsx x264_template(pixel_var2_8x8_lsx)
+int32_t x264_pixel_var2_8x8_lsx( uint8_t *p_pix1, uint8_t *p_pix2,
+                                 int32_t ssd[2] );
+#define x264_pixel_var_16x16_lsx x264_template(pixel_var_16x16_lsx)
+uint64_t x264_pixel_var_16x16_lsx( uint8_t *p_pix, intptr_t i_stride );
+#define x264_pixel_var_8x16_lsx x264_template(pixel_var_8x16_lsx)
+uint64_t x264_pixel_var_8x16_lsx( uint8_t *p_pix, intptr_t i_stride );
+#define x264_pixel_var_8x8_lsx x264_template(pixel_var_8x8_lsx)
+uint64_t x264_pixel_var_8x8_lsx( uint8_t *p_pix, intptr_t i_stride );
+
+#define x264_pixel_var2_8x16_lasx x264_template(pixel_var2_8x16_lasx)
+int32_t x264_pixel_var2_8x16_lasx( uint8_t *p_pix1, uint8_t *p_pix2,
+                                   int32_t ssd[2] );
+#define x264_pixel_var2_8x8_lasx x264_template(pixel_var2_8x8_lasx)
+int32_t x264_pixel_var2_8x8_lasx( uint8_t *p_pix1, uint8_t *p_pix2,
+                                  int32_t ssd[2] );
+
+#define x264_pixel_sa8d_8x8_lsx x264_template(pixel_sa8d_8x8_lsx)
+int32_t x264_pixel_sa8d_8x8_lsx( uint8_t *p_pix1, intptr_t i_stride,
+                                 uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_sa8d_16x16_lsx x264_template(pixel_sa8d_16x16_lsx)
+int32_t x264_pixel_sa8d_16x16_lsx( uint8_t *p_pix1, intptr_t i_stride,
+                                   uint8_t *p_pix2, intptr_t i_stride2 );
+
+#define x264_intra_sa8d_x3_8x8_lsx x264_template(intra_sa8d_x3_8x8_lsx)
+void x264_intra_sa8d_x3_8x8_lsx( uint8_t *p_enc, uint8_t p_edge[36],
+                                 int32_t p_sad_array[3] );
+#define x264_intra_sa8d_x3_8x8_lasx x264_template(intra_sa8d_x3_8x8_lasx)
+void x264_intra_sa8d_x3_8x8_lasx( uint8_t *p_enc, uint8_t p_edge[36],
+                                  int32_t p_sad_array[3] );
+#define x264_pixel_sa8d_8x8_lasx x264_template(pixel_sa8d_8x8_lasx)
+int32_t x264_pixel_sa8d_8x8_lasx( uint8_t *p_pix1, intptr_t i_stride,
+                                  uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_sa8d_16x16_lasx x264_template(pixel_sa8d_16x16_lasx)
+int32_t x264_pixel_sa8d_16x16_lasx( uint8_t *p_pix1, intptr_t i_stride,
+                                    uint8_t *p_pix2, intptr_t i_stride2 );
+
+#define x264_intra_sad_x3_16x16_lsx x264_template(intra_sad_x3_16x16_lsx)
+void x264_intra_sad_x3_16x16_lsx( uint8_t *p_enc, uint8_t *p_dec,
+                                  int32_t p_sad_array[3] );
+#define x264_intra_sad_x3_8x8_lsx x264_template(intra_sad_x3_8x8_lsx)
+void x264_intra_sad_x3_8x8_lsx( uint8_t *p_enc, uint8_t p_edge[36],
+                                int32_t p_sad_array[3] );
+#define x264_intra_sad_x3_8x8c_lsx x264_template(intra_sad_x3_8x8c_lsx)
+void x264_intra_sad_x3_8x8c_lsx( uint8_t *p_enc, uint8_t *p_dec,
+                                 int32_t p_sad_array[3] );
+#define x264_intra_sad_x3_4x4_lsx x264_template(intra_sad_x3_4x4_lsx)
+void x264_intra_sad_x3_4x4_lsx( uint8_t *p_enc, uint8_t *p_dec,
+                                int32_t p_sad_array[3] );
+
+#endif
--- a/common/loongarch/predict-a.S
+++ b/common/loongarch/predict-a.S
--- a/common/loongarch/predict-c.c
+++ b/common/loongarch/predict-c.c
@@ -0,0 +1,106 @@
+/*****************************************************************************
+ * predict-c.c: loongarch intra prediction
+ *****************************************************************************
+ * Copyright (C) 2023-2025 x264 project
+ *
+ * Authors: Xiwei Gu <guxiwei-hf@loongson.cn>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#include "common/common.h"
+#include "predict.h"
+
+void x264_predict_16x16_init_loongarch( int cpu, x264_predict_t pf[7] )
+{
+#if !HIGH_BIT_DEPTH
+    if( cpu&X264_CPU_LSX )
+    {
+        pf[I_PRED_16x16_V ]     = x264_predict_16x16_v_lsx;
+        pf[I_PRED_16x16_H ]     = x264_predict_16x16_h_lsx;
+        pf[I_PRED_16x16_DC]     = x264_predict_16x16_dc_lsx;
+        pf[I_PRED_16x16_DC_LEFT]= x264_predict_16x16_dc_left_lsx;
+        pf[I_PRED_16x16_DC_TOP ]= x264_predict_16x16_dc_top_lsx;
+        pf[I_PRED_16x16_DC_128 ]= x264_predict_16x16_dc_128_lsx;
+        pf[I_PRED_16x16_P ]     = x264_predict_16x16_p_lsx;
+    }
+    if( cpu&X264_CPU_LASX )
+    {
+        pf[I_PRED_16x16_P ]     = x264_predict_16x16_p_lasx;
+    }
+#endif
+}
+
+void x264_predict_8x8c_init_loongarch( int cpu, x264_predict_t pf[7] )
+{
+#if !HIGH_BIT_DEPTH
+    if( cpu&X264_CPU_LSX )
+    {
+        pf[I_PRED_CHROMA_P]      = x264_predict_8x8c_p_lsx;
+        pf[I_PRED_CHROMA_V]      = x264_predict_8x8c_v_lsx;
+        pf[I_PRED_CHROMA_H]      = x264_predict_8x8c_h_lsx;
+        pf[I_PRED_CHROMA_DC]     = x264_predict_8x8c_dc_lsx;
+        pf[I_PRED_CHROMA_DC_128] = x264_predict_8x8c_dc_128_lsx;
+        pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x8c_dc_top_lsx;
+        pf[I_PRED_CHROMA_DC_LEFT]= x264_predict_8x8c_dc_left_lsx;
+    }
+#endif
+}
+
+void x264_predict_8x8_init_loongarch( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter )
+{
+#if !HIGH_BIT_DEPTH
+    if( cpu&X264_CPU_LSX )
+    {
+        pf[I_PRED_8x8_V]      = x264_predict_8x8_v_lsx;
+        pf[I_PRED_8x8_DC]     = x264_predict_8x8_dc_lsx;
+        pf[I_PRED_8x8_DC_LEFT]= x264_predict_8x8_dc_left_lsx;
+        pf[I_PRED_8x8_DC_TOP] = x264_predict_8x8_dc_top_lsx;
+        pf[I_PRED_8x8_DC_128] = x264_predict_8x8_dc_128_lsx;
+        pf[I_PRED_8x8_H]      = x264_predict_8x8_h_lsx;
+        pf[I_PRED_8x8_DDL]    = x264_predict_8x8_ddl_lsx;
+        pf[I_PRED_8x8_DDR]    = x264_predict_8x8_ddr_lsx;
+        pf[I_PRED_8x8_VR]     = x264_predict_8x8_vr_lsx;
+        pf[I_PRED_8x8_VL]     = x264_predict_8x8_vl_lsx;
+    }
+    if( cpu&X264_CPU_LASX )
+    {
+        pf[I_PRED_8x8_H]      = x264_predict_8x8_h_lasx;
+        pf[I_PRED_8x8_DDL]    = x264_predict_8x8_ddl_lasx;
+        pf[I_PRED_8x8_DDR]    = x264_predict_8x8_ddr_lasx;
+        pf[I_PRED_8x8_VR]     = x264_predict_8x8_vr_lasx;
+        pf[I_PRED_8x8_VL]     = x264_predict_8x8_vl_lasx;
+    }
+#endif
+}
+
+void x264_predict_4x4_init_loongarch( int cpu, x264_predict_t pf[12] )
+{
+#if !HIGH_BIT_DEPTH
+    if( cpu&X264_CPU_LSX )
+    {
+        pf[I_PRED_4x4_V]      = x264_predict_4x4_v_lsx;
+        pf[I_PRED_4x4_H]      = x264_predict_4x4_h_lsx;
+        pf[I_PRED_4x4_DC]     = x264_predict_4x4_dc_lsx;
+        pf[I_PRED_4x4_DDL]    = x264_predict_4x4_ddl_lsx;
+        pf[I_PRED_4x4_DC_LEFT]= x264_predict_4x4_dc_left_lsx;
+        pf[I_PRED_4x4_DC_TOP] = x264_predict_4x4_dc_top_lsx;
+        pf[I_PRED_4x4_DC_128] = x264_predict_4x4_dc_128_lsx;
+    }
+#endif
+}
--- a/common/loongarch/predict.h
+++ b/common/loongarch/predict.h
@@ -0,0 +1,150 @@
+/*****************************************************************************
+ * predict.h: loongarch intra prediction
+ *****************************************************************************
+ * Copyright (C) 2023-2025 x264 project
+ *
+ * Authors: Xiwei Gu <guxiwei-hf@loongson.cn>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#ifndef X264_LOONGARCH_PREDICT_H
+#define X264_LOONGARCH_PREDICT_H
+
+#define x264_predict_8x8c_p_lsx x264_template(predict_8x8c_p_lsx)
+void x264_predict_8x8c_p_lsx(uint8_t *p_src);
+
+#define x264_predict_8x8c_v_lsx x264_template(predict_8x8c_v_lsx)
+void x264_predict_8x8c_v_lsx(uint8_t *p_src);
+
+#define x264_predict_8x8c_h_lsx x264_template(predict_8x8c_h_lsx)
+void x264_predict_8x8c_h_lsx(uint8_t *p_src);
+
+#define x264_predict_8x8c_dc_lsx x264_template(predict_8x8c_dc_lsx)
+void x264_predict_8x8c_dc_lsx(pixel *src);
+
+#define x264_predict_8x8c_dc_128_lsx x264_template(predict_8x8c_dc_128_lsx)
+void x264_predict_8x8c_dc_128_lsx(pixel *src);
+
+#define x264_predict_8x8c_dc_top_lsx x264_template(predict_8x8c_dc_top_lsx)
+void x264_predict_8x8c_dc_top_lsx(pixel *src);
+
+#define x264_predict_8x8c_dc_left_lsx x264_template(predict_8x8c_dc_left_lsx)
+void x264_predict_8x8c_dc_left_lsx(pixel *src);
+
+#define x264_predict_16x16_dc_lsx x264_template(predict_16x16_dc_lsx)
+void x264_predict_16x16_dc_lsx( pixel *src );
+
+#define x264_predict_16x16_dc_left_lsx x264_template(predict_16x16_dc_left_lsx)
+void x264_predict_16x16_dc_left_lsx( pixel *src );
+
+#define x264_predict_16x16_dc_top_lsx x264_template(predict_16x16_dc_top_lsx)
+void x264_predict_16x16_dc_top_lsx( pixel *src );
+
+#define x264_predict_16x16_dc_128_lsx x264_template(predict_16x16_dc_128_lsx)
+void x264_predict_16x16_dc_128_lsx( pixel *src );
+
+#define x264_predict_16x16_h_lsx x264_template(predict_16x16_h_lsx)
+void x264_predict_16x16_h_lsx( pixel *src );
+
+#define x264_predict_16x16_v_lsx x264_template(predict_16x16_v_lsx)
+void x264_predict_16x16_v_lsx( pixel *src );
+
+#define x264_predict_16x16_p_lasx x264_template(predict_16x16_p_lasx)
+void x264_predict_16x16_p_lasx( pixel *src );
+
+#define x264_predict_16x16_p_lsx x264_template(predict_16x16_p_lsx)
+void x264_predict_16x16_p_lsx( pixel *src );
+
+#define x264_predict_8x8_v_lsx x264_template(predict_8x8_v_lsx)
+void x264_predict_8x8_v_lsx( pixel *src, pixel edge[36] );
+
+#define x264_predict_8x8_h_lasx x264_template(predict_8x8_h_lasx)
+void x264_predict_8x8_h_lasx( pixel *src, pixel edge[36] );
+
+#define x264_predict_8x8_h_lsx x264_template(predict_8x8_h_lsx)
+void x264_predict_8x8_h_lsx( pixel *src, pixel edge[36] );
+
+#define x264_predict_8x8_dc_lsx x264_template(predict_8x8_dc_lsx)
+void x264_predict_8x8_dc_lsx( pixel *src, pixel edge[36] );
+
+#define x264_predict_8x8_dc_left_lsx x264_template(predict_8x8_dc_left_lsx)
+void x264_predict_8x8_dc_left_lsx( pixel *src, pixel edge[36] );
+
+#define x264_predict_8x8_dc_top_lsx x264_template(predict_8x8_dc_top_lsx)
+void x264_predict_8x8_dc_top_lsx( pixel *src, pixel edge[36] );
+
+#define x264_predict_8x8_dc_128_lsx x264_template(predict_8x8_dc_128_lsx)
+void x264_predict_8x8_dc_128_lsx( pixel *src, pixel edge[36] );
+
+#define x264_predict_8x8_ddl_lasx x264_template(predict_8x8_ddl_lasx)
+void x264_predict_8x8_ddl_lasx( pixel *src, pixel edge[36] );
+
+#define x264_predict_8x8_ddl_lsx x264_template(predict_8x8_ddl_lsx)
+void x264_predict_8x8_ddl_lsx( pixel *src, pixel edge[36] );
+
+#define x264_predict_8x8_ddr_lasx x264_template(predict_8x8_ddr_lasx)
+void x264_predict_8x8_ddr_lasx( pixel *src, pixel edge[36] );
+
+#define x264_predict_8x8_ddr_lsx x264_template(predict_8x8_ddr_lsx)
+void x264_predict_8x8_ddr_lsx( pixel *src, pixel edge[36] );
+
+#define x264_predict_8x8_vr_lasx x264_template(predict_8x8_vr_lasx)
+void x264_predict_8x8_vr_lasx( pixel *src, pixel edge[36] );
+
+#define x264_predict_8x8_vr_lsx x264_template(predict_8x8_vr_lsx)
+void x264_predict_8x8_vr_lsx( pixel *src, pixel edge[36] );
+
+#define x264_predict_8x8_vl_lasx x264_template(predict_8x8_vl_lasx)
+void x264_predict_8x8_vl_lasx( pixel *src, pixel edge[36] );
+
+#define x264_predict_8x8_vl_lsx x264_template(predict_8x8_vl_lsx)
+void x264_predict_8x8_vl_lsx( pixel *src, pixel edge[36] );
+
+#define x264_predict_4x4_v_lsx x264_template(predict_4x4_v_lsx)
+void x264_predict_4x4_v_lsx( pixel *p_src );
+
+#define x264_predict_4x4_h_lsx x264_template(predict_4x4_h_lsx)
+void x264_predict_4x4_h_lsx( pixel *p_src );
+
+#define x264_predict_4x4_dc_lsx x264_template(predict_4x4_dc_lsx)
+void x264_predict_4x4_dc_lsx( pixel *p_src );
+
+#define x264_predict_4x4_ddl_lsx x264_template(predict_4x4_ddl_lsx)
+void x264_predict_4x4_ddl_lsx( pixel *p_src );
+
+#define x264_predict_4x4_dc_top_lsx x264_template(predict_4x4_dc_top_lsx)
+void x264_predict_4x4_dc_top_lsx( pixel *p_src );
+
+#define x264_predict_4x4_dc_left_lsx x264_template(predict_4x4_dc_left_lsx)
+void x264_predict_4x4_dc_left_lsx( pixel *p_src );
+
+#define x264_predict_4x4_dc_128_lsx x264_template(predict_4x4_dc_128_lsx)
+void x264_predict_4x4_dc_128_lsx( pixel *p_src );
+
+#define x264_predict_4x4_init_loongarch x264_template(predict_4x4_init_loongarch)
+void x264_predict_4x4_init_loongarch( int cpu, x264_predict_t pf[12] );
+#define x264_predict_8x8_init_loongarch x264_template(predict_8x8_init_loongarch)
+void x264_predict_8x8_init_loongarch( int cpu, x264_predict8x8_t pf[12],
+                                      x264_predict_8x8_filter_t *predict_filter );
+#define x264_predict_8x8c_init_loongarch x264_template(predict_8x8c_init_loongarch)
+void x264_predict_8x8c_init_loongarch( int cpu, x264_predict_t pf[7] );
+#define x264_predict_16x16_init_loongarch x264_template(predict_16x16_init_loongarch)
+void x264_predict_16x16_init_loongarch( int cpu, x264_predict_t pf[7] );
+
+#endif
--- a/common/loongarch/quant-a.S
+++ b/common/loongarch/quant-a.S
--- a/common/loongarch/quant.h
+++ b/common/loongarch/quant.h
@@ -0,0 +1,96 @@
+/*****************************************************************************
+ * quant.h: loongarch quantization and level-run
+ *****************************************************************************
+ * Copyright (C) 2023-2025 x264 project
+ *
+ * Authors: Shiyou Yin <yinshiyou-hf@loongson.cn>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#ifndef X264_LOONGARCH_QUANT_H
+#define X264_LOONGARCH_QUANT_H
+
+#define x264_coeff_last64_lsx x264_template(coeff_last64_lsx)
+int32_t x264_coeff_last64_lsx( int16_t *p_src );
+#define x264_coeff_last16_lsx x264_template(coeff_last16_lsx)
+int32_t x264_coeff_last16_lsx( int16_t *p_src );
+#define x264_coeff_last15_lsx x264_template(coeff_last15_lsx)
+int32_t x264_coeff_last15_lsx( int16_t *p_src );
+#define x264_coeff_last8_lsx x264_template(coeff_last8_lsx)
+int32_t x264_coeff_last8_lsx( int16_t *p_src );
+#define x264_coeff_last4_lsx x264_template(coeff_last4_lsx)
+int32_t x264_coeff_last4_lsx( int16_t *p_src );
+
+#define x264_quant_4x4_lsx x264_template(quant_4x4_lsx)
+int32_t x264_quant_4x4_lsx( int16_t *p_dct, uint16_t *p_mf, uint16_t *p_bias );
+#define x264_quant_4x4x4_lsx x264_template(quant_4x4x4_lsx)
+int32_t x264_quant_4x4x4_lsx( int16_t p_dct[4][16],
+                               uint16_t pu_mf[16], uint16_t pu_bias[16] );
+#define x264_quant_8x8_lsx x264_template(quant_8x8_lsx)
+int32_t x264_quant_8x8_lsx( int16_t *p_dct, uint16_t *p_mf, uint16_t *p_bias );
+#define x264_quant_4x4_dc_lsx x264_template(quant_4x4_dc_lsx)
+int32_t x264_quant_4x4_dc_lsx( dctcoef dct[16], int32_t mf, int32_t bias );
+#define x264_quant_2x2_dc_lsx x264_template(quant_2x2_dc_lsx)
+int32_t x264_quant_2x2_dc_lsx( dctcoef dct[4], int32_t mf, int32_t bias );
+
+#define x264_dequant_4x4_lsx x264_template(dequant_4x4_lsx)
+void x264_dequant_4x4_lsx( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_8x8_lsx x264_template(dequant_8x8_lsx)
+void x264_dequant_8x8_lsx( dctcoef dct[64], int dequant_mf[6][64], int i_qp );
+#define x264_dequant_4x4_dc_lsx x264_template(dequant_4x4_dc_lsx)
+void x264_dequant_4x4_dc_lsx( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
+
+#define x264_decimate_score15_lsx x264_template(decimate_score15_lsx)
+int x264_decimate_score15_lsx( dctcoef *dct );
+#define x264_decimate_score16_lsx x264_template(decimate_score16_lsx)
+int x264_decimate_score16_lsx( dctcoef *dct );
+#define x264_decimate_score64_lsx x264_template(decimate_score64_lsx)
+int x264_decimate_score64_lsx( dctcoef *dct );
+
+#define x264_coeff_last64_lasx x264_template(coeff_last64_lasx)
+int32_t x264_coeff_last64_lasx( int16_t *p_src );
+#define x264_coeff_last16_lasx x264_template(coeff_last16_lasx)
+int32_t x264_coeff_last16_lasx( int16_t *p_src );
+#define x264_coeff_last15_lasx x264_template(coeff_last15_lasx)
+int32_t x264_coeff_last15_lasx( int16_t *p_src );
+
+#define x264_quant_4x4x4_lasx x264_template(quant_4x4x4_lasx)
+int32_t x264_quant_4x4x4_lasx( int16_t p_dct[4][16],
+                               uint16_t pu_mf[16], uint16_t pu_bias[16] );
+
+#define x264_dequant_4x4_lasx x264_template(dequant_4x4_lasx)
+void x264_dequant_4x4_lasx( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_8x8_lasx x264_template(dequant_8x8_lasx)
+void x264_dequant_8x8_lasx( dctcoef dct[64], int dequant_mf[6][64], int i_qp );
+#define x264_dequant_4x4_dc_lasx x264_template(dequant_4x4_dc_lasx)
+void x264_dequant_4x4_dc_lasx( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
+
+#define x264_coeff_level_run16_lasx x264_template(coeff_level_run16_lasx)
+int x264_coeff_level_run16_lasx( dctcoef *, x264_run_level_t * );
+#define x264_coeff_level_run15_lasx x264_template(coeff_level_run15_lasx)
+int x264_coeff_level_run15_lasx( dctcoef *, x264_run_level_t * );
+
+#define x264_coeff_level_run16_lsx x264_template(coeff_level_run16_lsx)
+int x264_coeff_level_run16_lsx( dctcoef *, x264_run_level_t * );
+#define x264_coeff_level_run15_lsx x264_template(coeff_level_run15_lsx)
+int x264_coeff_level_run15_lsx( dctcoef *, x264_run_level_t * );
+#define x264_coeff_level_run8_lsx x264_template(coeff_level_run8_lsx)
+int x264_coeff_level_run8_lsx( dctcoef *, x264_run_level_t * );
+
+#endif/* X264_LOONGARCH_QUANT_H */
--- a/common/loongarch/sad-a.S
+++ b/common/loongarch/sad-a.S