celp_math: cleanup ff_dot_product()

based on code & idea by vitor Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
2025-07-11 14:30:22 +02:00 · 2011-09-29 21:21:26 +02:00
parent 84f8aef40c
commit 11512367d3
3 changed files with 16 additions and 17 deletions
--- a/libavcodec/celp_math.c
+++ b/libavcodec/celp_math.c
@ -197,14 +197,14 @@ int ff_log2(uint32_t value)
    return (power_int << 15) + value;
 }

-int ff_dot_product(const int16_t *a, const int16_t *b, int length, int shift)
+int ff_dot_product(const int16_t *a, const int16_t *b, int length)
 {
-    int i, sum = 0;
+    int i;
+    int64_t sum = 0;
+
+    for (i = 0; i < length; i++)
+        sum += MUL16(a[i], b[i]);

-    for (i = 0; i < length; i++) {
-        int64_t prod = av_clipl_int32(MUL64(a[i], b[i]) << shift);
-        sum = av_clipl_int32(sum + prod);
-    }
    return sum;
 }

--- a/libavcodec/celp_math.h
+++ b/libavcodec/celp_math.h
@ -68,11 +68,10 @@ static inline int bidir_sal(int value, int offset)
 * @param a input data array
 * @param b input data array
 * @param length number of elements
- * @param shift the result is scaled by 2^shift
 *
 * @return dot product = sum of elementwise products
 */
-int ff_dot_product(const int16_t *a, const int16_t *b, int length, int shift);
+int ff_dot_product(const int16_t *a, const int16_t *b, int length);

 /**
 * returns the dot product.
--- a/libavcodec/g723_1.c
+++ b/libavcodec/g723_1.c
@ -553,7 +553,7 @@ static void gen_acb_excitation(int16_t *vector, int16_t *prev_excitation,
    /* Calculate adaptive vector */
    cb_ptr += subfrm.ad_cb_gain * 20;
    for (i = 0; i < SUBFRAME_LEN; i++) {
-        sum = ff_dot_product(residual + i, cb_ptr, PITCH_ORDER, 1);
+        sum = ff_dot_product(residual + i, cb_ptr, PITCH_ORDER)<<1;
        vector[i] = av_clipl_int32((sum << 1) + (1 << 15)) >> 16;
    }
 }
@ -579,7 +579,7 @@ static int autocorr_max(G723_1_Context *p, int offset, int *ccr_max,
    limit     = FFMIN(FRAME_LEN + PITCH_MAX - offset - length, pitch_lag + 3);

    for (i = pitch_lag - 3; i <= limit; i++) {
-        ccr = ff_dot_product(buf, buf + dir * i, length, 1);
+        ccr = ff_dot_product(buf, buf + dir * i, length)<<1;

        if (ccr > *ccr_max) {
            *ccr_max = ccr;
@ -678,17 +678,17 @@ static void comp_ppf_coeff(G723_1_Context *p, int offset, int pitch_lag,
        return;

    /* Compute target energy */
-    energy[0] = ff_dot_product(buf, buf, SUBFRAME_LEN, 1);
+    energy[0] = ff_dot_product(buf, buf, SUBFRAME_LEN)<<1;

    /* Compute forward residual energy */
    if (fwd_lag)
        energy[2] = ff_dot_product(buf + fwd_lag, buf + fwd_lag,
-                                   SUBFRAME_LEN, 1);
+                                   SUBFRAME_LEN)<<1;

    /* Compute backward residual energy */
    if (back_lag)
        energy[4] = ff_dot_product(buf - back_lag, buf - back_lag,
-                                   SUBFRAME_LEN, 1);
+                                   SUBFRAME_LEN)<<1;

    /* Normalize and shorten */
    temp1 = 0;
@ -749,7 +749,7 @@ static int comp_interp_index(G723_1_Context *p, int pitch_lag,
    ccr   = av_clipl_int32((int64_t)ccr + (1 << 15)) >> 16;

    /* Compute target energy */
-    tgt_eng  = ff_dot_product(buf, buf, SUBFRAME_LEN * 2, 1);
+    tgt_eng  = ff_dot_product(buf, buf, SUBFRAME_LEN * 2)<<1;
    *exc_eng = av_clipl_int32(tgt_eng + (1 << 15)) >> 16;

    if (ccr <= 0)
@ -757,7 +757,7 @@ static int comp_interp_index(G723_1_Context *p, int pitch_lag,

    /* Compute best energy */
    best_eng = ff_dot_product(buf - index, buf - index,
-                              SUBFRAME_LEN * 2, 1);
+                              SUBFRAME_LEN * 2)<<1;
    best_eng = av_clipl_int32((int64_t)best_eng + (1 << 15)) >> 16;

    temp = best_eng * *exc_eng >> 3;
@ -911,9 +911,9 @@ static void formant_postfilter(G723_1_Context *p, int16_t *lpc, int16_t *buf)

        /* Compute auto correlation coefficients */
        auto_corr[0] = ff_dot_product(temp_vector, temp_vector + 1,
-                                      SUBFRAME_LEN - 1, 1);
+                                      SUBFRAME_LEN - 1)<<1;
        auto_corr[1] = ff_dot_product(temp_vector, temp_vector,
-                                      SUBFRAME_LEN, 1);
+                                      SUBFRAME_LEN)<<1;

        /* Compute reflection coefficient */
        temp = auto_corr[1] >> 16;