Data transfer
From memory:
load values from memory |
|
load values from aligned memory |
|
load values from unaligned memory |
|
load values, forcing a type conversion |
From a scalar:
broadcasting a value to all slots |
|
broadcasting a value, forcing a type conversion |
To memory:
store values to memory |
|
store values to aligned memory |
|
store values to unaligned memory |
|
store values, forcing a type conversion |
In place:
rearrange slots within the batch |
|
bitwise shift the whole batch to the left |
|
bitwise shift the whole batch to the right |
|
bitwise rotate the whole batch to the left |
|
bitwise rotate the whole batch to the right |
|
modify a single batch slot |
|
pack elements according to a mask |
|
select contiguous elements from the batch |
Between batches:
tranpose a matrix as an array of batches |
|
interleave low halves of two batches |
|
interleave high halves of two batches |
-
template<class T, class A = default_arch>
inline batch<T, A> broadcast(T v) noexcept Creates a batch from the single value
v
.- Parameters:
v – the value used to initialize the batch
- Returns:
a new batch instance
-
template<class To, class A = default_arch, class From>
inline simd_return_type<From, To, A> broadcast_as(From v) noexcept Creates a batch from the single value
v
and the specified batch value typeTo
.- Parameters:
v – the value used to initialize the batch
- Returns:
a new batch instance
-
template<class T, class A>
inline batch<T, A> compress(batch<T, A> const &x, batch_bool<T, A> const &mask) noexcept Pick elements from
x
selected bymask
, and append them to the resulting vector, zeroing the remaining slots.
-
template<class T, class A>
inline batch<T, A> expand(batch<T, A> const &x, batch_bool<T, A> const &mask) noexcept Load contiguous elements from
x
and place them in slots selected bymask
, zeroing the other slots.
-
template<class T, class A, size_t I>
inline batch<T, A> insert(batch<T, A> const &x, T val, index<I> pos) noexcept Create a new batch equivalent to
x
but with elementval
set at positionpos
.- Parameters:
x – batch
val – value to set
pos – index of the updated slot
- Returns:
copy of
x
with positionpos
set toval
-
template<class To, class A = default_arch, class From>
inline simd_return_type<From, To, A> load_as(From const *ptr, aligned_mode) noexcept Creates a batch from the buffer
ptr
and the specifed batch value typeTo
.The memory needs to be aligned.
- Parameters:
ptr – the memory buffer to read
- Returns:
a new batch instance
-
template<class To, class A = default_arch, class From>
inline simd_return_type<From, To, A> load_as(From const *ptr, unaligned_mode) noexcept Creates a batch from the buffer
ptr
and the specifed batch value typeTo
.The memory does not need to be aligned.
- Parameters:
ptr – the memory buffer to read
- Returns:
a new batch instance
-
template<class A = default_arch, class From>
inline batch<From, A> load(From const *ptr, aligned_mode = {}) noexcept Creates a batch from the buffer
ptr
.The memory needs to be aligned.
- Parameters:
ptr – the memory buffer to read
- Returns:
a new batch instance
-
template<class A = default_arch, class From>
inline batch<From, A> load(From const *ptr, unaligned_mode) noexcept Creates a batch from the buffer
ptr
.The memory does not need to be aligned.
- Parameters:
ptr – the memory buffer to read
- Returns:
a new batch instance
-
template<class A = default_arch, class From>
inline batch<From, A> load_aligned(From const *ptr) noexcept Creates a batch from the buffer
ptr
.The memory needs to be aligned.
- Parameters:
ptr – the memory buffer to read
- Returns:
a new batch instance
-
template<class A = default_arch, class From>
inline batch<From, A> load_unaligned(From const *ptr) noexcept Creates a batch from the buffer
ptr
.The memory does not need to be aligned.
- Parameters:
ptr – the memory buffer to read
- Returns:
a new batch instance
-
template<size_t N, class T, class A>
inline batch<T, A> rotate_left(batch<T, A> const &x) noexcept Slide the whole batch to the left by
n
elements, and reintroduce the slided out elements from the right.This is different from
rotl
that rotates each batch element to the left.- Template Parameters:
N – Amount of elements to rotate to the left.
- Parameters:
x – batch of integer values.
- Returns:
rotated batch.
-
template<size_t N, class T, class A>
inline batch<T, A> rotate_right(batch<T, A> const &x) noexcept Slide the whole batch to the right by
n
elements, and reintroduce the slided out elements from the left.This is different from
rotr
that rotates each batch element to the right.- Template Parameters:
N – Amount of elements to rotate to the right.
- Parameters:
x – batch of integer values.
- Returns:
rotated batch.
-
template<class T, class A, class Vt, Vt... Values>
inline std::enable_if<std::is_arithmetic<T>::value, batch<T, A>>::type shuffle(batch<T, A> const &x, batch<T, A> const &y, batch_constant<Vt, A, Values...> mask) noexcept Combine elements from
x
andy
according to selectormask
.- Parameters:
x – batch
y – batch
mask – constant batch mask of integer elements of the same size as element of
x
andy
. Each element of the mask index the vector that would be formed by the concatenation ofx
andy
. For instancePicksbatch_constant<uint32_t, sse2, 0, 4, 3, 7>
x
[0],y
[0],x
[3],y
[3]
- Returns:
combined batch
-
template<size_t N, class T, class A>
inline batch<T, A> slide_left(batch<T, A> const &x) noexcept Slide the whole batch to the left by
n
bytes.This is different from
bitwise_lshift
that shifts each batch element to the left.- Template Parameters:
N – Amount of bytes to slide to the left.
- Parameters:
x – batch of integer values.
- Returns:
slided batch.
-
template<size_t N, class T, class A>
inline batch<T, A> slide_right(batch<T, A> const &x) noexcept Slide the whole batch to the right by
N
bytes.This is different from
bitwise_rshift
that shifts each batch element to the right.- Template Parameters:
N – Amount of bytes to slide to the right.
- Parameters:
x – batch of integer values.
- Returns:
slided batch.
-
template<class To, class A = default_arch, class From>
inline void store_as(To *dst, batch<From, A> const &src, aligned_mode) noexcept Copy content of batch
src
to the bufferdst
.The memory needs to be aligned.
- Parameters:
dst – the memory buffer to write to
src – the batch to copy
-
template<class To, class A = default_arch, class From>
inline void store_as(To *dst, batch<From, A> const &src, unaligned_mode) noexcept Copy content of batch
src
to the bufferdst
.The memory does not need to be aligned.
- Parameters:
dst – the memory buffer to write to
src – the batch to copy
-
template<class A, class T>
inline void store(T *mem, batch<T, A> const &val, aligned_mode = {}) noexcept Copy content of batch
val
to the buffermem
.The memory does not need to be aligned.
- Parameters:
mem – the memory buffer to write to
val – the batch to copy from
-
template<class A, class T>
inline void store(T *mem, batch<T, A> const &val, unaligned_mode) noexcept Copy content of batch
val
to the buffermem
.The memory does not need to be aligned.
- Parameters:
mem – the memory buffer to write to
val – the batch to copy from
-
template<class A, class T>
inline void store_aligned(T *mem, batch<T, A> const &val) noexcept Copy content of batch
val
to the buffermem
.The memory needs to be aligned.
- Parameters:
mem – the memory buffer to write to
val – the batch to copy from
-
template<class A, class T>
inline void store_unaligned(T *mem, batch<T, A> const &val) noexcept Copy content of batch
val
to the buffermem
.The memory does not need to be aligned.
- Parameters:
mem – the memory buffer to write to
val – the batch to copy
-
template<class T, class A, class Vt, Vt... Values>
inline std::enable_if<std::is_arithmetic<T>::value, batch<T, A>>::type swizzle(batch<T, A> const &x, batch_constant<Vt, A, Values...> mask) noexcept Rearrange elements from
x
according to constant maskmask
.- Parameters:
x – batch
mask – constant batch mask of integer elements of the same size as element of
x
- Returns:
swizzled batch
-
template<class T, class A, class Vt>
inline std::enable_if<std::is_arithmetic<T>::value, batch<T, A>>::type swizzle(batch<T, A> const &x, batch<Vt, A> mask) noexcept Rearrange elements from
x
according to maskmask
.- Parameters:
x – batch
mask – batch mask of integer elements of the same size as element of
x
- Returns:
swizzled batch
-
template<class T, class A>
inline void transpose(batch<T, A> *matrix_begin, batch<T, A> *matrix_end) noexcept Transposes in place the matrix whose line are each of the batch passed as argument.
- Parameters:
matrix_begin – pointer to the first line of the matrix to transpose
matrix_end – pointer to one element after the last line of the matrix to transpose
-
template<class T, class A>
inline batch<T, A> zip_hi(batch<T, A> const &x, batch<T, A> const &y) noexcept Unpack and interleave data from the HIGH half of batches
x
andy
.Store the results in the Return value.
- Parameters:
x – a batch of integer or floating point or double precision values.
y – a batch of integer or floating point or double precision values.
- Returns:
a batch of the high part of shuffled values.
-
template<class T, class A>
inline batch<T, A> zip_lo(batch<T, A> const &x, batch<T, A> const &y) noexcept Unpack and interleave data from the LOW half of batches
x
andy
.Store the results in the Return value.
- Parameters:
x – a batch of integer or floating point or double precision values.
y – a batch of integer or floating point or double precision values.
- Returns:
a batch of the low part of shuffled values.
The following empty types are used for tag dispatching:
-
struct aligned_mode
tag for load and store of aligned memory.
-
struct unaligned_mode
tag for load and store of unaligned memory.