;************************************************************************ ;* SIMD-optimized HuffYUV encoding functions ;* Copyright (c) 2000, 2001 Fabrice Bellard ;* Copyright (c) 2002-2004 Michael Niedermayer ;* ;* MMX optimization by Nick Kurshev ;* Conversion to NASM format by Tiancheng "Timothy" Gu ;* ;* This file is part of FFmpeg. ;* ;* FFmpeg is free software; you can redistribute it and/or ;* modify it under the terms of the GNU Lesser General Public ;* License as published by the Free Software Foundation; either ;* version 2.1 of the License, or (at your option) any later version. ;* ;* FFmpeg is distributed in the hope that it will be useful, ;* but WITHOUT ANY WARRANTY; without even the implied warranty of ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;* Lesser General Public License for more details. ;* ;* You should have received a copy of the GNU Lesser General Public ;* License along with FFmpeg; if not, write to the Free Software ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;****************************************************************************** %include "libavutil/x86/x86util.asm" SECTION .text %include "libavcodec/x86/huffyuvdsp_template.asm" ;------------------------------------------------------------------------------ ; void ff_diff_int16(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ; unsigned mask, int w); ;------------------------------------------------------------------------------ %macro DIFF_INT16 0 cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp test src1q, mmsize-1 jnz .unaligned test src2q, mmsize-1 jnz .unaligned test dstq, mmsize-1 jnz .unaligned INT16_LOOP a, sub .unaligned: INT16_LOOP u, sub %endmacro INIT_XMM sse2 DIFF_INT16 %if HAVE_AVX2_EXTERNAL INIT_YMM avx2 DIFF_INT16 %endif %macro SUB_HFYU_MEDIAN_PRED_INT16 1 ; u,s for pmaxuw vs pmaxsw cglobal sub_hfyu_median_pred_int16, 7,7,6, dst, src1, src2, mask, w, left, left_top movd xm5, maskd lea wd, [wd+wd-(mmsize-1)] movu xm0, [src1q] movu xm2, [src2q] SPLATW m5, xm5 add dstq, wq movd xm1, [left_topq] neg wq movd xm3, [leftq] %if mmsize >= 32 movu xm4, [src1q+14] %endif sub src1q, wq pslldq xm0, 2 pslldq xm2, 2 por xm0, xm1 %if mmsize >= 32 vinserti128 m0, xm4, 1 %endif por xm2, xm3 %if mmsize >= 32 vinserti128 m2, [src2q+14], 1 %endif sub src2q, wq jmp .init .loop: movu m0, [src1q + wq - 2] ; lt movu m2, [src2q + wq - 2] ; l .init: movu m1, [src1q + wq] ; t movu m3, [src2q + wq] psubw m4, m2, m0 ; l - lt pmax%1w m0, m1, m2 paddw m4, m1 ; l - lt + t pmin%1w m2, m1 pand m4, m5 ; (l - lt + t)&mask pmin%1w m4, m0 pmax%1w m4, m2 ; pred psubw m3, m4 ; l - pred pand m3, m5 movu [dstq + wq], m3 add wq, mmsize js .loop cmp wd, mmsize-1 jne .tail movzx src1d, word [src1q + (mmsize-1) - 2] movzx src2d, word [src2q + (mmsize-1) - 2] mov [left_topq], src1d mov [leftq], src2d RET .tail: mov wq, -1 jmp .loop %endmacro INIT_XMM sse2 SUB_HFYU_MEDIAN_PRED_INT16 s %if HAVE_AVX2_EXTERNAL INIT_YMM avx2 SUB_HFYU_MEDIAN_PRED_INT16 u %endif