diff --git a/libavcodec/x86/blockdsp.asm b/libavcodec/x86/blockdsp.asm index c793858861..af2b0decd1 100644 --- a/libavcodec/x86/blockdsp.asm +++ b/libavcodec/x86/blockdsp.asm @@ -64,7 +64,7 @@ cglobal clear_blocks, 1, 2, %1, blocks, len add blocksq, 768 mov lenq, -768 ZERO m0, m0 -.loop +.loop: mova [blocksq+lenq+mmsize*0], m0 mova [blocksq+lenq+mmsize*1], m0 mova [blocksq+lenq+mmsize*2], m0 diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm index 1ac237885a..030b4d4dd8 100644 --- a/libavcodec/x86/dcadsp.asm +++ b/libavcodec/x86/dcadsp.asm @@ -333,7 +333,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ %if ARCH_X86_32 mov buf2, synth_buf2mp %endif -.mainloop +.mainloop: ; m1 = a m2 = b m3 = c m4 = d SETZERO m3 SETZERO m4 diff --git a/libavcodec/x86/diracdsp_yasm.asm b/libavcodec/x86/diracdsp_yasm.asm index d3cf9f1971..40fe2c8212 100644 --- a/libavcodec/x86/diracdsp_yasm.asm +++ b/libavcodec/x86/diracdsp_yasm.asm @@ -149,7 +149,7 @@ cglobal put_signed_rect_clamped_%1, 5,9,3, dst, dst_stride, src, src_stride, w, %define hd r5mp %endif -.loopy +.loopy: lea src2q, [srcq+src_strideq*2] lea dst2q, [dstq+dst_strideq] .loopx: diff --git a/libavcodec/x86/h264_qpel_10bit.asm b/libavcodec/x86/h264_qpel_10bit.asm index 757c425898..7e9be36f27 100644 --- a/libavcodec/x86/h264_qpel_10bit.asm +++ b/libavcodec/x86/h264_qpel_10bit.asm @@ -386,7 +386,7 @@ MC_CACHE MC10 ; void ff_h264_qpel_mc02(uint8_t *dst, uint8_t *src, int stride) ;----------------------------------------------------------------------------- %macro V_FILT 10 -v_filt%9_%10_10 +v_filt%9_%10_10: add r4, r2 .no_addr4: FILT_V m0, m1, m2, m3, m4, m5, m6, m7 diff --git a/libavcodec/x86/h264_weight.asm b/libavcodec/x86/h264_weight.asm index 897c616a81..6c57d57bc0 100644 --- a/libavcodec/x86/h264_weight.asm +++ b/libavcodec/x86/h264_weight.asm @@ -139,12 +139,12 @@ WEIGHT_FUNC_HALF_MM 8, 8 je .nonnormal cmp r5, 128 jne .normal -.nonnormal +.nonnormal: sar r5, 1 sar r6, 1 sar off_regd, 1 sub r4, 1 -.normal +.normal: %if cpuflag(ssse3) movd m4, r5d movd m0, r6d diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm index 986493f20c..98a803883e 100644 --- a/libavcodec/x86/hevc_mc.asm +++ b/libavcodec/x86/hevc_mc.asm @@ -683,7 +683,7 @@ HEVC_BI_PEL_PIXELS %1, %2 %macro HEVC_PEL_PIXELS 2 cglobal hevc_put_hevc_pel_pixels%1_%2, 4, 4, 3, dst, src, srcstride,height pxor m2, m2 -.loop +.loop: SIMPLE_LOAD %1, %2, srcq, m0 MC_PIXEL_COMPUTE %1, %2, 1 PEL_10STORE%1 dstq, m0, m1 @@ -693,7 +693,7 @@ cglobal hevc_put_hevc_pel_pixels%1_%2, 4, 4, 3, dst, src, srcstride,height %macro HEVC_UNI_PEL_PIXELS 2 cglobal hevc_put_hevc_uni_pel_pixels%1_%2, 5, 5, 2, dst, dststride, src, srcstride,height -.loop +.loop: SIMPLE_LOAD %1, %2, srcq, m0 PEL_%2STORE%1 dstq, m0, m1 add dstq, dststrideq ; dst += dststride @@ -707,7 +707,7 @@ cglobal hevc_put_hevc_uni_pel_pixels%1_%2, 5, 5, 2, dst, dststride, src, srcstri cglobal hevc_put_hevc_bi_pel_pixels%1_%2, 6, 6, 6, dst, dststride, src, srcstride, src2, height pxor m2, m2 movdqa m5, [pw_bi_%2] -.loop +.loop: SIMPLE_LOAD %1, %2, srcq, m0 SIMPLE_BILOAD %1, src2q, m3, m4 MC_PIXEL_COMPUTE %1, %2, 1 @@ -739,7 +739,7 @@ cglobal hevc_put_hevc_bi_pel_pixels%1_%2, 6, 6, 6, dst, dststride, src, srcstrid cglobal hevc_put_hevc_epel_h%1_%2, 5, 6, XMM_REGS, dst, src, srcstride, height, mx, rfilter %assign %%stride ((%2 + 7)/8) EPEL_FILTER %2, mx, m4, m5, rfilter -.loop +.loop: EPEL_LOAD %2, srcq-%%stride, %%stride, %1 EPEL_COMPUTE %2, %1, m4, m5, 1 PEL_10STORE%1 dstq, m0, m1 @@ -750,7 +750,7 @@ cglobal hevc_put_hevc_uni_epel_h%1_%2, 6, 7, XMM_REGS, dst, dststride, src, srcs %assign %%stride ((%2 + 7)/8) movdqa m6, [pw_%2] EPEL_FILTER %2, mx, m4, m5, rfilter -.loop +.loop: EPEL_LOAD %2, srcq-%%stride, %%stride, %1 EPEL_COMPUTE %2, %1, m4, m5 UNI_COMPUTE %1, %2, m0, m1, m6 @@ -764,7 +764,7 @@ cglobal hevc_put_hevc_uni_epel_h%1_%2, 6, 7, XMM_REGS, dst, dststride, src, srcs cglobal hevc_put_hevc_bi_epel_h%1_%2, 7, 8, XMM_REGS, dst, dststride, src, srcstride, src2, height, mx, rfilter movdqa m6, [pw_bi_%2] EPEL_FILTER %2, mx, m4, m5, rfilter -.loop +.loop: EPEL_LOAD %2, srcq-%%stride, %%stride, %1 EPEL_COMPUTE %2, %1, m4, m5, 1 SIMPLE_BILOAD %1, src2q, m2, m3 @@ -788,7 +788,7 @@ cglobal hevc_put_hevc_epel_v%1_%2, 4, 6, XMM_REGS, dst, src, srcstride, height, sub srcq, srcstrideq EPEL_FILTER %2, my, m4, m5, r3src lea r3srcq, [srcstrideq*3] -.loop +.loop: EPEL_LOAD %2, srcq, srcstride, %1 EPEL_COMPUTE %2, %1, m4, m5, 1 PEL_10STORE%1 dstq, m0, m1 @@ -801,7 +801,7 @@ cglobal hevc_put_hevc_uni_epel_v%1_%2, 5, 7, XMM_REGS, dst, dststride, src, srcs sub srcq, srcstrideq EPEL_FILTER %2, my, m4, m5, r3src lea r3srcq, [srcstrideq*3] -.loop +.loop: EPEL_LOAD %2, srcq, srcstride, %1 EPEL_COMPUTE %2, %1, m4, m5 UNI_COMPUTE %1, %2, m0, m1, m6 @@ -819,7 +819,7 @@ cglobal hevc_put_hevc_bi_epel_v%1_%2, 6, 8, XMM_REGS, dst, dststride, src, srcst sub srcq, srcstrideq EPEL_FILTER %2, my, m4, m5, r3src lea r3srcq, [srcstrideq*3] -.loop +.loop: EPEL_LOAD %2, srcq, srcstride, %1 EPEL_COMPUTE %2, %1, m4, m5, 1 SIMPLE_BILOAD %1, src2q, m2, m3 @@ -866,7 +866,7 @@ cglobal hevc_put_hevc_epel_hv%1_%2, 6, 7, 16 , dst, src, srcstride, height, mx, %endif SWAP m6, m0 add srcq, srcstrideq -.loop +.loop: EPEL_LOAD %2, srcq-%%stride, %%stride, %1 EPEL_COMPUTE %2, %1, m14, m15 %if (%1 > 8 && (%2 == 8)) @@ -932,7 +932,7 @@ cglobal hevc_put_hevc_uni_epel_hv%1_%2, 7, 8, 16 , dst, dststride, src, srcstrid %endif SWAP m6, m0 add srcq, srcstrideq -.loop +.loop: EPEL_LOAD %2, srcq-%%stride, %%stride, %1 EPEL_COMPUTE %2, %1, m14, m15 %if (%1 > 8 && (%2 == 8)) @@ -996,7 +996,7 @@ cglobal hevc_put_hevc_bi_epel_hv%1_%2, 8, 9, 16, dst, dststride, src, srcstride, %endif SWAP m6, m0 add srcq, srcstrideq -.loop +.loop: EPEL_LOAD %2, srcq-%%stride, %%stride, %1 EPEL_COMPUTE %2, %1, m14, m15 %if (%1 > 8 && (%2 == 8)) @@ -1054,7 +1054,7 @@ cglobal hevc_put_hevc_bi_epel_hv%1_%2, 8, 9, 16, dst, dststride, src, srcstride, %macro HEVC_PUT_HEVC_QPEL 2 cglobal hevc_put_hevc_qpel_h%1_%2, 5, 6, 16, dst, src, srcstride, height, mx, rfilter QPEL_FILTER %2, mx -.loop +.loop: QPEL_H_LOAD %2, srcq, %1, 10 QPEL_COMPUTE %1, %2, 1 %if %2 > 8 @@ -1067,7 +1067,7 @@ cglobal hevc_put_hevc_qpel_h%1_%2, 5, 6, 16, dst, src, srcstride, height, mx, rf cglobal hevc_put_hevc_uni_qpel_h%1_%2, 6, 7, 16 , dst, dststride, src, srcstride, height, mx, rfilter mova m9, [pw_%2] QPEL_FILTER %2, mx -.loop +.loop: QPEL_H_LOAD %2, srcq, %1, 10 QPEL_COMPUTE %1, %2 %if %2 > 8 @@ -1084,7 +1084,7 @@ cglobal hevc_put_hevc_uni_qpel_h%1_%2, 6, 7, 16 , dst, dststride, src, srcstride cglobal hevc_put_hevc_bi_qpel_h%1_%2, 7, 8, 16 , dst, dststride, src, srcstride, src2, height, mx, rfilter movdqa m9, [pw_bi_%2] QPEL_FILTER %2, mx -.loop +.loop: QPEL_H_LOAD %2, srcq, %1, 10 QPEL_COMPUTE %1, %2, 1 %if %2 > 8 @@ -1111,7 +1111,7 @@ cglobal hevc_put_hevc_qpel_v%1_%2, 4, 8, 16, dst, src, srcstride, height, r3src, movifnidn myd, mym lea r3srcq, [srcstrideq*3] QPEL_FILTER %2, my -.loop +.loop: QPEL_V_LOAD %2, srcq, srcstride, %1, r7 QPEL_COMPUTE %1, %2, 1 %if %2 > 8 @@ -1126,7 +1126,7 @@ cglobal hevc_put_hevc_uni_qpel_v%1_%2, 5, 9, 16, dst, dststride, src, srcstride, movdqa m9, [pw_%2] lea r3srcq, [srcstrideq*3] QPEL_FILTER %2, my -.loop +.loop: QPEL_V_LOAD %2, srcq, srcstride, %1, r8 QPEL_COMPUTE %1, %2 %if %2 > 8 @@ -1145,7 +1145,7 @@ cglobal hevc_put_hevc_bi_qpel_v%1_%2, 6, 10, 16, dst, dststride, src, srcstride, movdqa m9, [pw_bi_%2] lea r3srcq, [srcstrideq*3] QPEL_FILTER %2, my -.loop +.loop: QPEL_V_LOAD %2, srcq, srcstride, %1, r9 QPEL_COMPUTE %1, %2, 1 %if %2 > 8 @@ -1209,7 +1209,7 @@ cglobal hevc_put_hevc_qpel_hv%1_%2, 6, 8, 16, dst, src, srcstride, height, mx, m QPEL_HV_COMPUTE %1, %2, mx, ackssdw SWAP m14, m0 add srcq, srcstrideq -.loop +.loop: QPEL_H_LOAD %2, srcq, %1, 15 QPEL_HV_COMPUTE %1, %2, mx, ackssdw SWAP m15, m0 @@ -1285,7 +1285,7 @@ cglobal hevc_put_hevc_uni_qpel_hv%1_%2, 7, 9, 16 , dst, dststride, src, srcstrid QPEL_HV_COMPUTE %1, %2, mx, ackssdw SWAP m14, m0 add srcq, srcstrideq -.loop +.loop: QPEL_H_LOAD %2, srcq, %1, 15 QPEL_HV_COMPUTE %1, %2, mx, ackssdw SWAP m15, m0 @@ -1366,7 +1366,7 @@ cglobal hevc_put_hevc_bi_qpel_hv%1_%2, 8, 10, 16, dst, dststride, src, srcstride QPEL_HV_COMPUTE %1, %2, mx, ackssdw SWAP m14, m0 add srcq, srcstrideq -.loop +.loop: QPEL_H_LOAD %2, srcq, %1, 15 QPEL_HV_COMPUTE %1, %2, mx, ackssdw SWAP m15, m0 @@ -1444,7 +1444,7 @@ cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, dststride, src, height, denom, w %if WIN64 || ARCH_X86_32 mov SHIFT, heightm %endif -.loop +.loop: SIMPLE_LOAD %1, 10, srcq, m0 %if %1 <= 4 punpcklwd m0, m1 @@ -1513,7 +1513,7 @@ cglobal hevc_put_hevc_bi_w%1_%2, 4, 6, 10, dst, dststride, src, src2, height, de %endif pslld m4, m0 -.loop +.loop: SIMPLE_LOAD %1, 10, srcq, m0 SIMPLE_LOAD %1, 10, src2q, m8 %if %1 <= 4 diff --git a/libavcodec/x86/hevc_sao.asm b/libavcodec/x86/hevc_sao.asm index 86ef847ba2..2461740bf3 100644 --- a/libavcodec/x86/hevc_sao.asm +++ b/libavcodec/x86/hevc_sao.asm @@ -141,7 +141,7 @@ cglobal hevc_sao_band_filter_%1_8, 6, 6, 15, 7*mmsize*ARCH_X86_32, dst, src, dst HEVC_SAO_BAND_FILTER_INIT 8 align 16 -.loop +.loop: %if %1 == 8 movq m8, [srcq] punpcklbw m8, m14 @@ -191,7 +191,7 @@ cglobal hevc_sao_band_filter_%2_%1, 6, 6, 15, 7*mmsize*ARCH_X86_32, dst, src, ds HEVC_SAO_BAND_FILTER_INIT %1 align 16 -.loop +.loop: %if %2 == 8 movu m8, [srcq] HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8 @@ -528,7 +528,7 @@ cglobal hevc_sao_edge_filter_%2_%1, 1, 6, 8, 5*mmsize, dst, src, dststride, a_st %endif align 16 -.loop +.loop: %if %2 == 8 mova m1, [srcq] diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm index 85ee56dff2..54905db262 100644 --- a/libavcodec/x86/huffyuvdsp.asm +++ b/libavcodec/x86/huffyuvdsp.asm @@ -196,7 +196,7 @@ cglobal add_bytes, 3,4,2, dst, src, w, size add dstq, wq add srcq, wq neg wq -.3 +.3: mov sizeb, [srcq + wq] add [dstq + wq], sizeb inc wq diff --git a/libavcodec/x86/jpeg2000dsp.asm b/libavcodec/x86/jpeg2000dsp.asm index 712a298610..56b5fbd606 100644 --- a/libavcodec/x86/jpeg2000dsp.asm +++ b/libavcodec/x86/jpeg2000dsp.asm @@ -69,7 +69,7 @@ cglobal ict_float, 4, 4, %1, src0, src1, src2, csize %endif ; ARCH align 16 -.loop +.loop: movaps m0, [src0q+csizeq] movaps m1, [src1q+csizeq] movaps m2, [src2q+csizeq] diff --git a/libavcodec/x86/me_cmp.asm b/libavcodec/x86/me_cmp.asm index 0160dc348f..ad06d485ab 100644 --- a/libavcodec/x86/me_cmp.asm +++ b/libavcodec/x86/me_cmp.asm @@ -794,7 +794,7 @@ cglobal vsad_intra%1, 5, 5, 3, v, pix1, pix2, lsize, h %endif sub hd, 2 -.loop +.loop: lea pix1q, [pix1q + 2*lsizeq] %if %1 == mmsize mova m1, [pix1q] @@ -875,7 +875,7 @@ cglobal vsad%1_approx, 5, 5, 5, v, pix1, pix2, lsize, h %endif sub hd, 2 -.loop +.loop: lea pix1q, [pix1q + 2*lsizeq] lea pix2q, [pix2q + 2*lsizeq] mova m2, [pix1q] diff --git a/libavcodec/x86/svq1enc.asm b/libavcodec/x86/svq1enc.asm index 24ee70f108..869db34ef1 100644 --- a/libavcodec/x86/svq1enc.asm +++ b/libavcodec/x86/svq1enc.asm @@ -26,7 +26,7 @@ SECTION_TEXT %macro SSD_INT8_VS_INT16 0 cglobal ssd_int8_vs_int16, 3, 3, 3, pix1, pix2, size pxor m0, m0 -.loop +.loop: sub sizeq, 8 movq m1, [pix1q + sizeq] mova m2, [pix2q + sizeq*2] diff --git a/libavcodec/x86/v210.asm b/libavcodec/x86/v210.asm index 400a1f3f9e..c24c765e5b 100644 --- a/libavcodec/x86/v210.asm +++ b/libavcodec/x86/v210.asm @@ -45,7 +45,7 @@ cglobal v210_planar_unpack_%1, 5, 5, 7 mova m4, [v210_mask] mova m5, [v210_luma_shuf] mova m6, [v210_chroma_shuf] -.loop +.loop: %ifidn %1, unaligned movu m0, [r0] %else diff --git a/libavcodec/x86/v210enc.asm b/libavcodec/x86/v210enc.asm index 751675fc5e..859e2d9455 100644 --- a/libavcodec/x86/v210enc.asm +++ b/libavcodec/x86/v210enc.asm @@ -60,7 +60,7 @@ cglobal v210_planar_pack_10, 5, 5, 4, y, u, v, dst, width mova m2, [v210_enc_min_10] mova m3, [v210_enc_max_10] -.loop +.loop: movu m0, [yq+2*widthq] CLIPW m0, m2, m3 @@ -102,7 +102,7 @@ cglobal v210_planar_pack_8, 5, 5, 7, y, u, v, dst, width mova m5, [v210_enc_max_8] pxor m6, m6 -.loop +.loop: movu m1, [yq+2*widthq] CLIPUB m1, m4, m5 diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm index ee5a6bf67a..d457cd7de5 100644 --- a/libavcodec/x86/vp3dsp.asm +++ b/libavcodec/x86/vp3dsp.asm @@ -167,7 +167,7 @@ INIT_MMX mmx cglobal put_vp_no_rnd_pixels8_l2, 5, 6, 0, dst, src1, src2, stride, h, stride3 mova m6, [pb_FE] lea stride3q,[strideq+strideq*2] -.loop +.loop: mova m0, [src1q] mova m1, [src2q] mova m2, [src1q+strideq]