/*
 * Decompiled with CFR 0.152.
 */
package org.elasticsearch.simdvec.internal.vectorization;

import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.Constants;
import org.elasticsearch.simdvec.internal.vectorization.ESVectorUtilSupport;

final class DefaultESVectorUtilSupport
implements ESVectorUtilSupport {
    private static float fma(float a, float b, float c) {
        if (Constants.HAS_FAST_SCALAR_FMA) {
            return Math.fma(a, b, c);
        }
        return a * b + c;
    }

    DefaultESVectorUtilSupport() {
    }

    @Override
    public long ipByteBinByte(byte[] q, byte[] d) {
        return DefaultESVectorUtilSupport.ipByteBinByteImpl(q, d);
    }

    @Override
    public int ipByteBit(byte[] q, byte[] d) {
        return DefaultESVectorUtilSupport.ipByteBitImpl(q, d);
    }

    @Override
    public float ipFloatBit(float[] q, byte[] d) {
        return DefaultESVectorUtilSupport.ipFloatBitImpl(q, d);
    }

    public static int ipByteBitImpl(byte[] q, byte[] d) {
        assert (q.length == d.length * 8);
        int acc0 = 0;
        int acc1 = 0;
        int acc2 = 0;
        int acc3 = 0;
        for (int i = 0; i < d.length; ++i) {
            byte mask = d[i];
            acc0 += q[i * 8 + 0] * (mask >> 7 & 1);
            acc1 += q[i * 8 + 1] * (mask >> 6 & 1);
            acc2 += q[i * 8 + 2] * (mask >> 5 & 1);
            acc3 += q[i * 8 + 3] * (mask >> 4 & 1);
            acc0 += q[i * 8 + 4] * (mask >> 3 & 1);
            acc1 += q[i * 8 + 5] * (mask >> 2 & 1);
            acc2 += q[i * 8 + 6] * (mask >> 1 & 1);
            acc3 += q[i * 8 + 7] * (mask >> 0 & 1);
        }
        return acc0 + acc1 + acc2 + acc3;
    }

    public static float ipFloatBitImpl(float[] q, byte[] d) {
        assert (q.length == d.length * 8);
        float acc0 = 0.0f;
        float acc1 = 0.0f;
        float acc2 = 0.0f;
        float acc3 = 0.0f;
        for (int i = 0; i < d.length; ++i) {
            byte mask = d[i];
            acc0 = DefaultESVectorUtilSupport.fma(q[i * 8 + 0], mask >> 7 & 1, acc0);
            acc1 = DefaultESVectorUtilSupport.fma(q[i * 8 + 1], mask >> 6 & 1, acc1);
            acc2 = DefaultESVectorUtilSupport.fma(q[i * 8 + 2], mask >> 5 & 1, acc2);
            acc3 = DefaultESVectorUtilSupport.fma(q[i * 8 + 3], mask >> 4 & 1, acc3);
            acc0 = DefaultESVectorUtilSupport.fma(q[i * 8 + 4], mask >> 3 & 1, acc0);
            acc1 = DefaultESVectorUtilSupport.fma(q[i * 8 + 5], mask >> 2 & 1, acc1);
            acc2 = DefaultESVectorUtilSupport.fma(q[i * 8 + 6], mask >> 1 & 1, acc2);
            acc3 = DefaultESVectorUtilSupport.fma(q[i * 8 + 7], mask >> 0 & 1, acc3);
        }
        return acc0 + acc1 + acc2 + acc3;
    }

    public static long ipByteBinByteImpl(byte[] q, byte[] d) {
        long ret = 0L;
        int size = d.length;
        for (int i = 0; i < 4; ++i) {
            int r;
            long subRet = 0L;
            int upperBound = d.length & 0xFFFFFFFC;
            for (r = 0; r < upperBound; r += 4) {
                subRet += (long)Integer.bitCount(BitUtil.VH_NATIVE_INT.get(q, i * size + r) & BitUtil.VH_NATIVE_INT.get(d, r));
            }
            while (r < d.length) {
                subRet += (long)Integer.bitCount(q[i * size + r] & d[r] & 0xFF);
                ++r;
            }
            ret += subRet << i;
        }
        return ret;
    }
}

