// GNU D Compiler SIMD support functions and intrinsics.
// Copyright (C) 2022-2023 Free Software Foundation, Inc.
// GCC is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 3, or (at your option) any later
// version.
// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
// for more details.
// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.
// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
// <http://www.gnu.org/licenses/>.
module gcc.simd;
pure:
nothrow:
@safe:
@nogc:
pragma(inline, true):
/**
* Emit prefetch instruction.
* Params:
*    address = address to be prefetched
*    writeFetch = true for write fetch, false for read fetch
*    locality = 0..3 (0 meaning least local, 3 meaning most local)
*/
void prefetch(bool writeFetch, ubyte locality)(const(void)* address)
{
    static assert(locality < 4, "0..3 expected for locality");
    import gcc.builtins : __builtin_prefetch;
    __builtin_prefetch(address, writeFetch, locality);
}
/**
 * Load unaligned vector from address.
 * This is a compiler intrinsic.
 * Params:
 *    p = pointer to vector
 * Returns:
 *    vector
 */
V loadUnaligned(V)(const V* p) if (isVectorType!V);
/**
 * Store vector to unaligned address.
 * This is a compiler intrinsic.
 * Params:
 *    p = pointer to vector
 *    value = value to store
 * Returns:
 *    value
 */
V storeUnaligned(V)(V* p, V value) if (isVectorType!V);
/**
 * Construct a permutation of elements from one or two vectors, returning a
 * vector of the same type as the input vector(s). The `mask` is an integral
 * vector with the same width and element count as the output vector.
 * Params:
 *    op1 = input vector
 *    op2 = input vector
 *    mask = integer vector mask
 * Returns:
 *    vector with the same type as `op1` and `op2`
 * Example:
 * ---
 * int4 a = [1, 2, 3, 4];
 * int4 b = [5, 6, 7, 8];
 * int4 mask1 = [0, 1, 1, 3];
 * int4 mask2 = [0, 4, 2, 5];
 * assert(shuffle(a, mask1).array == [1, 2, 2, 4]);
 * assert(shuffle(a, b, mask2).array == [1, 5, 3, 6]);
 * ---
 */
template shuffle(V0, V1, M)
{
    static assert(isVectorType!V0, "first argument must be vector");
    static assert(isVectorType!V1, "second argument must be vector");
    static assert(is(BaseType!V0 == BaseType!V1),
                  "first and second argument vectors must have the same element type");
    static assert(isVectorType!M && is(BaseType!M : long),
                  "last argument must be an integer vector");
    static assert(numElements!V0 == numElements!M && numElements!V1 == numElements!M,
                  "argument vectors and mask vector should have the same number of elements");
    static assert(BaseType!V0.sizeof == BaseType!M.sizeof,
                  "argument vectors and mask vector should have the same element type size");
    V0 shuffle(V0 op1, V1 op2, M mask);
}
/// Ditto
template shuffle(V, M)
{
    static assert(isVectorType!V, "first argument must be a vector");
    static assert(isVectorType!M && is(BaseType!M : long),
                  "last argument must be an integer vector");
    static assert(numElements!V == numElements!M,
                  "argument vector and mask vector should have the same number of elements");
    static assert(BaseType!V.sizeof == BaseType!M.sizeof,
                  "argument vector and mask vector should have the same element type size");
    V shuffle(V op1, M mask)
    {
        return shuffle(op1, op1, mask);
    }
}
/**
 * Construct a permutation of elements from two vectors, returning a vector with
 * the same element type as the input vector(s), and same length as the `mask`.
 * Params:
 *    op1 = input vector
 *    op2 = input vector
 *    index = elements indices of the vectors that should be extracted and returned
 * Returns:
 *    vector with the same element type as `op1` and `op2`, but has an element count
 *    equal to the number of indices in `index`.
 * Example:
 * ---
 * int8 a = [1, -2, 3, -4, 5, -6, 7, -8];
 * int4 b = shufflevector(a, a, 0, 2, 4, 6);
 * assert(b.array == [1, 3, 5, 7]);
 * int4 c = [-2, -4, -6, -8];
 * int d = shufflevector(c, b, 4, 0, 5, 1, 6, 2, 7, 3);
 * assert(d.array == a.array);
 * ---
 */
template shufflevector(V1, V2, M...)
{
    static assert(isVectorType!V1, "first argument must be vector");
    static assert(isVectorType!V2, "second argument must be vector");
    static assert(is(BaseType!V1 == BaseType!V2),
                  "first and second argument vectors must have the same element type");
    static assert(isPowerOf2!(M.length),
                  "number of index arguments must be a power of 2");
    __vector(BaseType!V1[M.length]) shufflevector(V1 op1, V2 op2, M index);
}
/// Ditto
template shufflevector(V, index...)
{
    // Defined for compatibility with LDC.
    static assert(isVectorType!V, "first argument must be a vector type");
    static assert(numElements!V == index.length,
                  "number of index arguments must be the same number of vector elements");
    private template ctfeConstants(m...)
    {
        static if (m.length == 0) enum ctfeConstants = 1;
        else enum ctfeConstants = m[0] | ctfeConstants!(m[1 .. $]);
    }
    static assert(__traits(compiles, ctfeConstants!index),
                  "all index arguments must be compile time constants");
    private template validIndexes(m...)
    {
        static if (m.length == 0) enum validIndexes = true;
        else enum validIndexes = (cast(long)m[0] > -1) && validIndexes!(m[1 .. $]);
    }
    static assert(validIndexes!index,
                  "all index arguments must be greater than or equal to 0");
    V shufflevector(V op1, V op2)
    {
        return shufflevector(op1, op2, index);
    }
}
/**
 * Extracts a single scalar element from a vector at a specified index.
 * Defined for compatibility with LDC.
 * Params:
 *    val = vector to extract element from
 *    idx = index indicating the position from which to extract the element
 * Returns:
 *    scalar of the same type as the element type of val
 * Example:
 * ---
 * int4 a = [0, 10, 20, 30];
 * int k = extractelement!(int4, 2)(a);
 * assert(k == 20);
 * ---
 */
BaseType!V extractelement(V, int idx)(V val)
    if (isVectorType!V && idx < numElements!V)
{
    return val[idx];
}
/**
 * Inserts a scalar element into a vector at a specified index.
 * Defined for compatibility with LDC.
 * Params:
 *    val = vector to assign element to
 *    elt = scalar whose type is the element type of val
 *    idx = index indicating the position from which to extract the element
 * Returns:
 *    vector of the same type as val
 * Example:
 * ---
 * int4 a = [0, 10, 20, 30];
 * int4 b = insertelement!(int4, 2)(a, 50);
 * assert(b.array == [0, 10, 50, 30]);
 * ---
 */
V insertelement(V, int idx)(V val, BaseType!V elt)
    if (isVectorType!V && idx < numElements!V)
{
    val[idx] = elt;
    return val;
}
/**
 * Convert a vector from one integral or floating vector type to another.
 * The result is an integral or floating vector that has had every element
 * cast to the element type of the return type.
 * Params:
 *    from = input vector
 * Returns:
 *    converted vector
 * Example:
 * ---
 * int4 a = [1, -2, 3, -4];
 * float4 b = [1.5, -2.5, 3, 7];
 * assert(convertvector!float4(a).array == [1, -2, 3, -4]);
 * assert(convertvector!double4(a).array == [1, -2, 3, -4]);
 * assert(convertvector!double4(b).array == [1.5, -2.5, 3, 7]);
 * assert(convertvector!int4(b).array == [1, -2, 3, 7]);
 * ---
 */
template convertvector(V, T)
{
    static assert(isVectorType!V && (is(BaseType!V : long) || is(BaseType!V : real)),
                  "first argument must be an integer or floating vector type");
    static assert(isVectorType!T && (is(BaseType!T : long) || is(BaseType!T : real)),
                  "second argument must be an integer or floating vector");
    static assert(numElements!V == numElements!T,
                  "first and second argument vectors should have the same number of elements");
    V convertvector(T);
}
/**
 * Construct a conditional merge of elements from two vectors, returning a
 * vector of the same type as the input vector(s). The `mask` is an integral
 * vector with the same width and element count as the output vector.
 * Params:
 *    op1 = input vector
 *    op2 = input vector
 *    mask = integer vector mask
 * Returns:
 *    vector with the same type as `op1` and `op2`
 * Example:
 * ---
 * int4 a = [1, 2, 3, 4];
 * int4 b = [5, 6, 7, 8];
 * int4 mask1 = [0, 1, 1, 3];
 * int4 mask2 = [0, 4, 2, 5];
 * assert(shuffle(a, mask1).array == [1, 2, 2, 4]);
 * assert(shuffle(a, b, mask2).array == [1, 5, 3, 6]);
 * ---
 */
template blendvector(V0, V1, M)
{
    static assert(isVectorType!V0, "first argument must be vector");
    static assert(isVectorType!V1, "second argument must be vector");
    static assert(is(BaseType!V0 == BaseType!V1),
                  "first and second argument vectors must have the same element type");
    static assert(isVectorType!M && is(BaseType!M : long),
                  "last argument must be an integer vector");
    static assert(numElements!V0 == numElements!M && numElements!V1 == numElements!M,
                  "argument vectors and mask vector should have the same number of elements");
    static assert(BaseType!V0.sizeof == BaseType!M.sizeof,
                  "argument vectors and mask vector should have the same element type size");
    V0 blendvector(V0 op1, V1 op2, M mask);
}
/**
 * Perform an element-wise comparison between two vectors, producing `0` when
 * the comparison is false and `-1` (all bits are set to 1) otherwise.
 * Params:
 *    op1 = input vector
 *    op2 = input vector
 * Returns:
 *    vector of the same width and number of elements as the comparison
 *    operands with a signed integral element type
 * Example:
 * ---
 * float4 a = [1, 3, 5, 7];
 * float4 b = [2, 3, 4, 5];
 * int4 c = greaterMask!float4(a, b);
 * assert(c.array == [0, 0, -1, -1]);
 * ---
 */
V equalMask(V)(V op1, V op2) if (isVectorType!V)
{
    return op1 == op2;
}
/// Ditto
V notEqualMask(V)(V op1, V op2) if (isVectorType!V)
{
    return op1 != op2;
}
/// Ditto
V greaterMask(V)(V op1, V op2) if (isVectorType!V)
{
    return op1 > op2;
}
/// Ditto
V greaterOrEqualMask(V)(V op1, V op2) if (isVectorType!V)
{
    return op1 >= op2;
}
/**
 * Perform an element-wise logical comparison between two vectors, producing
 * `0` when the comparison is false and `-1` (all bits are set to 1) otherwise.
 * Params:
 *    op1 = input vector
 *    op2 = input vector
 * Returns:
 *    vector of the same width and number of elements as the comparison
 *    operands with a signed integral element type
 */
V notMask(V)(V op1) if (isVectorType!V)
{
    return op1 == 0;
}
/// Ditto
V andAndMask(V)(V op1, V op2) if (isVectorType!V)
{
    return (op1 != 0) & (op2 != 0);
}
/// Ditto
V orOrMask(V)(V op1, V op2) if (isVectorType!V)
{
    return (op1 != 0) | (op2 != 0);
}
// Private helper templates.
private:
enum bool isVectorType(T) = is(T : __vector(V[N]), V, size_t N);
template BaseType(V)
{
    alias typeof(V.array[0]) BaseType;
}
template numElements(V)
{
    enum numElements = V.sizeof / BaseType!(V).sizeof;
}
enum bool isPowerOf2(int Y) = Y && (Y & -Y) == Y;