x64 (amd64) Intrinsics List

This document lists intrinsics that the Visual C++ compiler supports when x64 (also referred to as amd64) is targeted.

For information about individual intrinsics, see these resources, as appropriate for the processor you're targeting:

The following table lists the intrinsics available on x64 processors. The Technology column lists required instruction-set support. Use the __cpuid intrinsic to determine instruction-set support at run time. A [1] indicates the intrinsic is available only on AMD processors. A [2] indicates the intrinsic is available only on Intel processors. The required header for the function prototype is listed in the Header column.

Intrinsic name

Technology

Header

Function prototype

_addcarry_u16

intrin.h

unsigned char _addcarry_u16(unsigned char c_in,unsigned short src1,unsigned short src2,unsigned short *sum)

_addcarry_u32

intrin.h

unsigned char _addcarry_u32(unsigned char c_in,unsigned int src1,unsigned int src2,unsigned int *sum)

_addcarry_u64

intrin.h

unsigned char _addcarry_u64(unsigned char c_in,unsigned __int64 src1,unsigned __int64 src2,unsigned __int64 *sum)

_addcarry_u8

intrin.h

unsigned char _addcarry_u8(unsigned char c_in,unsigned char src1,unsigned char src2,unsigned char *sum)

_addcarryx_u32

ADX [2]

immintrin.h

unsigned char _addcarryx_u32(unsigned char c_in,unsigned int src1,unsigned int src2,unsigned int *sum)

_addcarryx_u64

ADX [2]

immintrin.h

unsigned char _addcarryx_u64(unsigned char c_in,unsigned __int64 src1,unsigned __int64 src2,unsigned __int64 *sum)

__addgsbyte

intrin.h

void __addgsbyte(unsigned long,unsigned char)

__addgsdword

intrin.h

void __addgsdword(unsigned long,unsigned int)

__addgsqword

intrin.h

void __addgsqword(unsigned long,unsigned __int64)

__addgsword

intrin.h

void __addgsword(unsigned long,unsigned short)

_AddressOfReturnAddress

intrin.h

void * _AddressOfReturnAddress(void)

_andn_u32

BMI [1]

ammintrin.h

unsigned int _andn_u32(unsigned int,unsigned int)

_andn_u64

BMI [1]

ammintrin.h

unsigned __int64 _andn_u64(unsigned __int64,unsigned __int64)

_bextr_u32

BMI

ammintrin.h, immintrin.h

unsigned int _bextr_u32(unsigned int,unsigned int,unsigned int)

_bextr_u64

BMI

ammintrin.h, immintrin.h

unsigned __int64 _bextr_u64(unsigned __int64,unsigned int,unsigned int)

_bextri_u32

ABM [1]

ammintrin.h

unsigned int _bextri_u32(unsigned int,unsigned int)

_bextri_u64

ABM [1]

ammintrin.h

unsigned __int64 _bextri_u64(unsigned __int64,unsigned int)

_BitScanForward

intrin.h

BOOLEAN _BitScanForward(OUT ULONG* Index,IN ULONG Mask)

_BitScanForward64

intrin.h

BOOLEAN _BitScanForward64(OUT ULONG* Index,IN ULONG64 Mask)

_BitScanReverse

intrin.h

BOOLEAN _BitScanReverse(OUT ULONG* Index,IN ULONG Mask)

_BitScanReverse64

intrin.h

BOOLEAN _BitScanReverse64(OUT ULONG* Index,IN ULONG64 Mask)

_bittest

intrin.h

unsigned char _bittest(long const *a,long b)

_bittest64

intrin.h

unsigned char _bittest64(__int64 const *a,__int64 b)

_bittestandcomplement

intrin.h

unsigned char _bittestandcomplement(long *a,long b)

_bittestandcomplement64

intrin.h

unsigned char _bittestandcomplement64(__int64 *a,__int64 b)

_bittestandreset

intrin.h

unsigned char _bittestandreset(long *a,long b)

_bittestandreset64

intrin.h

unsigned char _bittestandreset64(__int64 *a,__int64 b)

_bittestandset

intrin.h

unsigned char _bittestandset(long *a,long b)

_bittestandset64

intrin.h

unsigned char _bittestandset64(__int64 *a,__int64 b)

_blcfill_u32

ABM [1]

ammintrin.h

unsigned int _blcfill_u32(unsigned int)

_blcfill_u64

ABM [1]

ammintrin.h

unsigned __int64 _blcfill_u64(unsigned __int64)

_blci_u32

ABM [1]

ammintrin.h

unsigned int _blci_u32(unsigned int)

_blci_u64

ABM [1]

ammintrin.h

unsigned __int64 _blci_u64(unsigned __int64)

_blcic_u32

ABM [1]

ammintrin.h

unsigned int _blcic_u32(unsigned int)

_blcic_u64

ABM [1]

ammintrin.h

unsigned __int64 _blcic_u64(unsigned __int64)

_blcmsk_u32

ABM [1]

ammintrin.h

unsigned int _blcmsk_u32(unsigned int)

_blcmsk_u64

ABM [1]

ammintrin.h

unsigned __int64 _blcmsk_u64(unsigned __int64)

_blcs_u32

ABM [1]

ammintrin.h

unsigned int _blcs_u32(unsigned int)

_blcs_u64

ABM [1]

ammintrin.h

unsigned __int64 _blcs_u64(unsigned __int64)

_blsfill_u32

ABM [1]

ammintrin.h

unsigned int _blsfill_u32(unsigned int)

_blsfill_u64

ABM [1]

ammintrin.h

unsigned __int64 _blsfill_u64(unsigned __int64)

_blsi_u32

BMI

ammintrin.h, immintrin.h

unsigned int _blsi_u32(unsigned int)

_blsi_u64

BMI

ammintrin.h, immintrin.h

unsigned __int64 _blsi_u64(unsigned __int64)

_blsic_u32

ABM [1]

ammintrin.h

unsigned int _blsic_u32(unsigned int)

_blsic_u64

ABM [1]

ammintrin.h

unsigned __int64 _blsic_u64(unsigned __int64)

_blsmsk_u32

BMI

ammintrin.h, immintrin.h

unsigned int _blsmsk_u32(unsigned int)

_blsmsk_u64

BMI

ammintrin.h, immintrin.h

unsigned __int64 _blsmsk_u64(unsigned __int64)

_blsr_u32

BMI

ammintrin.h, immintrin.h

unsigned int _blsr_u32(unsigned int)

_blsr_u64

BMI

ammintrin.h, immintrin.h

unsigned __int64 _blsr_u64(unsigned __int64)

_bzhi_u32

BMI [2]

immintrin.h

unsigned int _bzhi_u32(unsigned int,unsigned int)

_bzhi_u64

BMI [2]

immintrin.h

unsigned __int64 _bzhi_u64(unsigned __int64,unsigned int)

_clac

SMAP

intrin.h

void _clac(void)

__cpuid

intrin.h

void __cpuid(int *a,int b)

__cpuidex

intrin.h

void __cpuidex(int *a,int b,int c)

__debugbreak

intrin.h

void __debugbreak(void)

_disable

intrin.h

void _disable(void)

__emul

intrin.h

__int64 [pascal/cdecl] __emul(int,int)

__emulu

intrin.h

unsigned __int64 [pascal/cdecl]__emulu(unsigned int,unsigned int)

_enable

intrin.h

void _enable(void)

__fastfail

intrin.h

void __fastfail(unsigned int)

__faststorefence

intrin.h

void __faststorefence(void)

_fxrstor

FXSR [2]

immintrin.h

void _fxrstor(void const*)

_fxrstor64

FXSR [2]

immintrin.h

void _fxrstor64(void const*)

_fxsave

FXSR [2]

immintrin.h

void _fxsave(void*)

_fxsave64

FXSR [2]

immintrin.h

void _fxsave64(void*)

__getcallerseflags

intrin.h

(unsigned int __getcallerseflags())

__halt

intrin.h

void __halt(void)

__inbyte

intrin.h

unsigned char __inbyte(unsigned short Port)

__inbytestring

intrin.h

void __inbytestring(unsigned short Port,unsigned char *Buffer,unsigned long Count)

__incgsbyte

intrin.h

void __incgsbyte(unsigned long)

__incgsdword

intrin.h

void __incgsdword(unsigned long)

__incgsqword

intrin.h

void __incgsqword(unsigned long)

__incgsword

intrin.h

void __incgsword(unsigned long)

__indword

intrin.h

unsigned long __indword(unsigned short Port)

__indwordstring

intrin.h

void __indwordstring(unsigned short Port,unsigned long *Buffer,unsigned long Count)

__int2c

intrin.h

void __int2c(void)

_InterlockedAnd

intrin.h

long _InterlockedAnd(long volatile *,long)

_InterlockedAnd_HLEAcquire

HLE [2]

immintrin.h

long _InterlockedAnd_HLEAcquire(long volatile *,long)

_InterlockedAnd_HLERelease

HLE [2]

immintrin.h

long _InterlockedAnd_HLERelease(long volatile *,long)

_InterlockedAnd_np

intrin.h

long _InterlockedAnd_np(long *,long)

_InterlockedAnd16

intrin.h

short _InterlockedAnd16(short volatile *,short)

_InterlockedAnd16_np

intrin.h

short _InterlockedAnd16_np(short *,short)

_InterlockedAnd64

intrin.h

__int64 _InterlockedAnd64(__int64 volatile *,__int64)

_InterlockedAnd64_HLEAcquire

HLE [2]

immintrin.h

__int64 _InterlockedAnd64_HLEAcquire(__int64 volatile *,__int64)

_InterlockedAnd64_HLERelease

HLE [2]

immintrin.h

__int64 _InterlockedAnd64_HLERelease(__int64 volatile *,__int64)

_InterlockedAnd64_np

intrin.h

__int64 _InterlockedAnd64_np(__int64 *,__int64)

_InterlockedAnd8

intrin.h

char _InterlockedAnd8(char volatile *,char)

_InterlockedAnd8_np

intrin.h

char _InterlockedAnd8_np(char *,char)

_interlockedbittestandreset

intrin.h

unsigned char _interlockedbittestandreset(long *a,long b)

_interlockedbittestandreset_HLEAcquire

HLE [2]

immintrin.h

unsigned char _interlockedbittestandreset_HLEAcquire(long *a,long b)

_interlockedbittestandreset_HLERelease

HLE [2]

immintrin.h

unsigned char _interlockedbittestandreset_HLERelease(long *a,long b)

_interlockedbittestandreset64

intrin.h

unsigned char _interlockedbittestandreset64(__int64 *a,__int64 b)

_interlockedbittestandreset64_HLEAcquire

HLE [2]

immintrin.h

unsigned char _interlockedbittestandreset64_HLEAcquire(__int64 *a,__int64 b)

_interlockedbittestandreset64_HLERelease

HLE [2]

immintrin.h

unsigned char _interlockedbittestandreset64_HLERelease(__int64 *a,__int64 b)

_interlockedbittestandset

intrin.h

unsigned char _interlockedbittestandset(long *a,long b)

_interlockedbittestandset_HLEAcquire

HLE [2]

immintrin.h

unsigned char _interlockedbittestandset_HLEAcquire(long *a,long b)

_interlockedbittestandset_HLERelease

HLE [2]

immintrin.h

unsigned char _interlockedbittestandset_HLERelease(long *a,long b)

_interlockedbittestandset64

intrin.h

unsigned char _interlockedbittestandset64(__int64 *a,__int64 b)

_interlockedbittestandset64_HLEAcquire

HLE [2]

immintrin.h

unsigned char _interlockedbittestandset64_HLEAcquire(__int64 *a,__int64 b)

_interlockedbittestandset64_HLERelease

HLE [2]

immintrin.h

unsigned char _interlockedbittestandset64_HLERelease(__int64 *a,__int64 b)

_InterlockedCompareExchange

intrin.h

long _InterlockedCompareExchange (long volatile *,long,long)

_InterlockedCompareExchange_HLEAcquire

HLE [2]

immintrin.h

long _InterlockedCompareExchange_HLEAcquire(long volatile *,long,long)

_InterlockedCompareExchange_HLERelease

HLE [2]

immintrin.h

long _InterlockedCompareExchange_HLERelease(long volatile *,long,long)

_InterlockedCompareExchange_np

intrin.h

long _InterlockedCompareExchange_np (long *,long,long)

_InterlockedCompareExchange128

intrin.h

unsigned char _InterlockedCompareExchange128(__int64 volatile *,__int64,__int64,__int64*)

_InterlockedCompareExchange128_np

intrin.h

unsigned char _InterlockedCompareExchange128(__int64 volatile *,__int64,__int64,__int64*)

_InterlockedCompareExchange16

intrin.h

short _InterlockedCompareExchange16(short volatile *Destination,short Exchange,short Comparand)

_InterlockedCompareExchange16_np

intrin.h

short _InterlockedCompareExchange16_np(short volatile *Destination,short Exchange,short Comparand)

_InterlockedCompareExchange64

intrin.h

__int64 _InterlockedCompareExchange64(__int64 volatile *,__int64,__int64)

_InterlockedCompareExchange64_HLEAcquire

HLE [2]

immintrin.h

__int64 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *,__int64,__int64)

_InterlockedCompareExchange64_HLERelease

HLE [2]

immintrin.h

__int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *,__int64,__int64)

_InterlockedCompareExchange64_np

intrin.h

__int64 _InterlockedCompareExchange64_np(__int64 *,__int64,__int64)

_InterlockedCompareExchange8

intrin.h

char _InterlockedCompareExchange8(char volatile *Destination,char Exchange,char Comparand)

_InterlockedCompareExchangePointer

intrin.h

void *_InterlockedCompareExchangePointer (void *volatile *,void *,void *)

_InterlockedCompareExchangePointer_HLEAcquire

HLE [2]

immintrin.h

void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *,void *,void *)

_InterlockedCompareExchangePointer_HLERelease

HLE [2]

immintrin.h

void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *,void *,void *)

_InterlockedCompareExchangePointer_np

intrin.h

void *_InterlockedCompareExchangePointer_np(void **,void *,void *)

_InterlockedDecrement

intrin.h

long _InterlockedDecrement(long volatile *)

_InterlockedDecrement16

intrin.h

short _InterlockedDecrement16(short volatile *Addend)

_InterlockedDecrement64

intrin.h

__int64 _InterlockedDecrement64(__int64 volatile *)

_InterlockedExchange

intrin.h

long _InterlockedExchange(long volatile *,long)

_InterlockedExchange_HLEAcquire

HLE [2]

immintrin.h

long _InterlockedExchange_HLEAcquire(long volatile *,long)

_InterlockedExchange_HLERelease

HLE [2]

immintrin.h

long _InterlockedExchange_HLERelease(long volatile *,long)

_InterlockedExchange16

intrin.h

short _InterlockedExchange16(short volatile *,short)

_InterlockedExchange64

intrin.h

__int64 _InterlockedExchange64(__int64 volatile *,__int64)

_InterlockedExchange64_HLEAcquire

HLE [2]

immintrin.h

__int64 _InterlockedExchange64_HLEAcquire(__int64 volatile *,__int64)

_InterlockedExchange64_HLERelease

HLE [2]

immintrin.h

__int64 _InterlockedExchange64_HLERelease(__int64 volatile *,__int64)

_InterlockedExchange8

intrin.h

char _InterlockedExchange8(char volatile *,char)

_InterlockedExchangeAdd

intrin.h

long _InterlockedExchangeAdd(long volatile *,long)

_InterlockedExchangeAdd_HLEAcquire

HLE [2]

immintrin.h

long _InterlockedExchangeAdd_HLEAcquire(long volatile *,long)

_InterlockedExchangeAdd_HLERelease

HLE [2]

immintrin.h

long _InterlockedExchangeAdd_HLERelease(long volatile *,long)

_InterlockedExchangeAdd16

intrin.h

short _InterlockedExchangeAdd16(short volatile *,short)

_InterlockedExchangeAdd64

intrin.h

__int64 _InterlockedExchangeAdd64(__int64 volatile *,__int64)

_InterlockedExchangeAdd64_HLEAcquire

HLE [2]

immintrin.h

__int64 _InterlockedExchangeAdd64_HLEAcquire(__int64 volatile *,__int64)

_InterlockedExchangeAdd64_HLERelease

HLE [2]

immintrin.h

__int64 _InterlockedExchangeAdd64_HLERelease(__int64 volatile *,__int64)

_InterlockedExchangeAdd8

intrin.h

char _InterlockedExchangeAdd8(char volatile *,char)

_InterlockedExchangePointer

intrin.h

void * _InterlockedExchangePointer(void *volatile *,void *)

_InterlockedExchangePointer_HLEAcquire

HLE [2]

immintrin.h

void * _InterlockedExchangePointer_HLEAcquire(void *volatile *,void *)

_InterlockedExchangePointer_HLERelease

HLE [2]

immintrin.h

void * _InterlockedExchangePointer_HLERelease(void *volatile *,void *)

_InterlockedIncrement

intrin.h

long _InterlockedIncrement(long volatile *)

_InterlockedIncrement16

intrin.h

short _InterlockedIncrement16(short volatile *Addend)

_InterlockedIncrement64

intrin.h

__int64 _InterlockedIncrement64(__int64 volatile *)

_InterlockedOr

intrin.h

long _InterlockedOr(long volatile *,long)

_InterlockedOr_HLEAcquire

HLE [2]

immintrin.h

long _InterlockedOr_HLEAcquire(long volatile *,long)

_InterlockedOr_HLERelease

HLE [2]

immintrin.h

long _InterlockedOr_HLERelease(long volatile *,long)

_InterlockedOr_np

intrin.h

long _InterlockedOr_np(long *,long)

_InterlockedOr16

intrin.h

short _InterlockedOr16(short volatile *,short)

_InterlockedOr16_np

intrin.h

short _InterlockedOr16_np(short *,short)

_InterlockedOr64

intrin.h

__int64 _InterlockedOr64(__int64 volatile *,__int64)

_InterlockedOr64_HLEAcquire

HLE [2]

immintrin.h

__int64 _InterlockedOr64_HLEAcquire(__int64 volatile *,__int64)

_InterlockedOr64_HLERelease

HLE [2]

immintrin.h

__int64 _InterlockedOr64_HLERelease(__int64 volatile *,__int64)

_InterlockedOr64_np

intrin.h

__int64 _InterlockedOr64_np(__int64 *,__int64)

_InterlockedOr8

intrin.h

char _InterlockedOr8(char volatile *,char)

_InterlockedOr8_np

intrin.h

char _InterlockedOr8_np(char *,char)

_InterlockedXor

intrin.h

long _InterlockedXor(long volatile *,long)

_InterlockedXor_HLEAcquire

HLE [2]

immintrin.h

long _InterlockedXor_HLEAcquire(long volatile *,long)

_InterlockedXor_HLERelease

HLE [2]

immintrin.h

long _InterlockedXor_HLERelease(long volatile *,long)

_InterlockedXor_np

intrin.h

long _InterlockedXor_np(long *,long)

_InterlockedXor16

intrin.h

short _InterlockedXor16(short volatile *,short)

_InterlockedXor16_np

intrin.h

short _InterlockedXor16_np(short *,short)

_InterlockedXor64

intrin.h

__int64 _InterlockedXor64(__int64 volatile *,__int64)

_InterlockedXor64_HLEAcquire

HLE [2]

immintrin.h

__int64 _InterlockedXor64_HLEAcquire(__int64 volatile *,__int64)

_InterlockedXor64_HLERelease

HLE [2]

immintrin.h

__int64 _InterlockedXor64_HLERelease(__int64 volatile *,__int64)

_InterlockedXor64_np

intrin.h

__int64 _InterlockedXor64_np(__int64 *,__int64)

_InterlockedXor8

intrin.h

char _InterlockedXor8(char volatile *,char)

_InterlockedXor8_np

intrin.h

char _InterlockedXor8_np(char *,char)

__invlpg

intrin.h

void __invlpg(void*)

_invpcid

INVPCID [2]

immintrin.h

void _invpcid(unsigned int,void *)

__inword

intrin.h

unsigned short __inword(unsigned short Port)

__inwordstring

intrin.h

void __inwordstring(unsigned short Port,unsigned short *Buffer,unsigned long Count)

_lgdt

intrin.h

void _lgdt(void*)

__lidt

intrin.h

void __lidt(void*)

__ll_lshift

intrin.h

unsigned __int64 [pascal/cdecl] __ll_lshift(unsigned __int64,int)

__ll_rshift

intrin.h

__int64 [pascal/cdecl] __ll_rshift(__int64,int)

__llwpcb

LWP [1]

ammintrin.h

void __llwpcb(void *)

__lwpins32

LWP [1]

ammintrin.h

unsigned char __lwpins32(unsigned int,unsigned int,unsigned int)

__lwpins64

LWP [1]

ammintrin.h

unsigned char __lwpins64(unsigned __int64,unsigned int,unsigned int)

__lwpval32

LWP [1]

ammintrin.h

void __lwpval32(unsigned int,unsigned int,unsigned int)

__lwpval64

LWP [1]

ammintrin.h

void __lwpval64(unsigned __int64,unsigned int,unsigned int)

__lzcnt

LZCNT

intrin.h

unsigned int __lzcnt(unsigned int)

_lzcnt_u32

BMI

ammintrin.h, immintrin.h

unsigned int _lzcnt_u32(unsigned int)

_lzcnt_u64

BMI

ammintrin.h, immintrin.h

unsigned __int64 _lzcnt_u64(unsigned __int64)

__lzcnt16

LZCNT

intrin.h

unsigned short __lzcnt16(unsigned short)

__lzcnt64

LZCNT

intrin.h

unsigned __int64 __lzcnt64(unsigned __int64)

_m_prefetch

3DNOW

intrin.h

void _m_prefetch(void*)

_m_prefetchw

3DNOW

intrin.h

void _m_prefetchw(void*)

_mm_abs_epi16

SSSE3

intrin.h

__m128i _mm_abs_epi16(__m128i)

_mm_abs_epi32

SSSE3

intrin.h

__m128i _mm_abs_epi32(__m128i)

_mm_abs_epi8

SSSE3

intrin.h

__m128i _mm_abs_epi8(__m128i)

_mm_add_epi16

SSE2

intrin.h

__m128i _mm_add_epi16(__m128i,__m128i)

_mm_add_epi32

SSE2

intrin.h

__m128i _mm_add_epi32(__m128i,__m128i)

_mm_add_epi64

SSE2

intrin.h

__m128i _mm_add_epi64(__m128i,__m128i)

_mm_add_epi8

SSE2

intrin.h

__m128i _mm_add_epi8(__m128i,__m128i)

_mm_add_pd

SSE2

intrin.h

__m128d _mm_add_pd(__m128d,__m128d)

_mm_add_ps

SSE

intrin.h

__m128 _mm_add_ps(__m128,__m128)

_mm_add_sd

SSE2

intrin.h

__m128d _mm_add_sd(__m128d,__m128d)

_mm_add_ss

SSE

intrin.h

__m128 _mm_add_ss(__m128,__m128)

_mm_adds_epi16

SSE2

intrin.h

__m128i _mm_adds_epi16(__m128i,__m128i)

_mm_adds_epi8

SSE2

intrin.h

__m128i _mm_adds_epi8(__m128i,__m128i)

_mm_adds_epu16

SSE2

intrin.h

__m128i _mm_adds_epu16(__m128i,__m128i)

_mm_adds_epu8

SSE2

intrin.h

__m128i _mm_adds_epu8(__m128i,__m128i)

_mm_addsub_pd

SSE3

intrin.h

__m128d _mm_addsub_pd(__m128d,__m128d)

_mm_addsub_ps

SSE3

intrin.h

__m128 _mm_addsub_ps(__m128,__m128)

_mm_aesdec_si128

AESNI [2]

immintrin.h

__m128i _mm_aesdec_si128( __m128i,__m128i )

_mm_aesdeclast_si128

AESNI [2]

immintrin.h

__m128i _mm_aesdeclast_si128( __m128i,__m128i )

_mm_aesenc_si128

AESNI [2]

immintrin.h

__m128i _mm_aesenc_si128( __m128i,__m128i )

_mm_aesenclast_si128

AESNI [2]

immintrin.h

__m128i _mm_aesenclast_si128( __m128i,__m128i )

_mm_aesimc_si128

AESNI [2]

immintrin.h

__m128i _mm_aesimc_si128 (__m128i )

_mm_aeskeygenassist_si128

AESNI [2]

immintrin.h

__m128i _mm_aeskeygenassist_si128 (__m128i,const int )

_mm_alignr_epi8

SSSE3

intrin.h

__m128i _mm_alignr_epi8(__m128i,__m128i,int)

_mm_and_pd

SSE2

intrin.h

__m128d _mm_and_pd(__m128d,__m128d)

_mm_and_ps

SSE

intrin.h

__m128 _mm_and_ps(__m128,__m128)

_mm_and_si128

SSE2

intrin.h

__m128i _mm_and_si128(__m128i,__m128i)

_mm_andnot_pd

SSE2

intrin.h

__m128d _mm_andnot_pd(__m128d,__m128d)

_mm_andnot_ps

SSE

intrin.h

__m128 _mm_andnot_ps(__m128,__m128)

_mm_andnot_si128

SSE2

intrin.h

__m128i _mm_andnot_si128(__m128i,__m128i)

_mm_avg_epu16

SSE2

intrin.h

__m128i _mm_avg_epu16(__m128i,__m128i)

_mm_avg_epu8

SSE2

intrin.h

__m128i _mm_avg_epu8(__m128i,__m128i)

_mm_blend_epi16

SSE41

intrin.h

__m128i _mm_blend_epi16 (__m128i,__m128i,const int )

_mm_blend_epi32

AVX2 [2]

immintrin.h

__m128i _mm_blend_epi32(__m128i,__m128i,const int)

_mm_blend_pd

SSE41

intrin.h

__m128d _mm_blend_pd (__m128d,__m128d,const int )

_mm_blend_ps

SSE41

intrin.h

__m128 _mm_blend_ps (__m128,__m128,const int )

_mm_blendv_epi8

SSE41

intrin.h

__m128i _mm_blendv_epi8 (__m128i,__m128i,__m128i )

_mm_blendv_pd

SSE41

intrin.h

__m128d _mm_blendv_pd(__m128d,__m128d,__m128d)

_mm_blendv_ps

SSE41

intrin.h

__m128 _mm_blendv_ps(__m128,__m128,__m128 )

_mm_broadcast_ss

AVX [2]

immintrin.h

__m128 _mm_broadcast_ss(float const *)

_mm_broadcastb_epi8

AVX2 [2]

immintrin.h

__m128i _mm_broadcastb_epi8(__m128i)

_mm_broadcastd_epi32

AVX2 [2]

immintrin.h

__m128i _mm_broadcastd_epi32(__m128i)

_mm_broadcastq_epi64

AVX2 [2]

immintrin.h

__m128i _mm_broadcastq_epi64(__m128i)

_mm_broadcastsd_pd

AVX2 [2]

immintrin.h

__m128d _mm_broadcastsd_pd(__m128d)

_mm_broadcastss_ps

AVX2 [2]

immintrin.h

__m128 _mm_broadcastss_ps(__m128)

_mm_broadcastw_epi16

AVX2 [2]

immintrin.h

__m128i _mm_broadcastw_epi16(__m128i)

_mm_castpd_ps

SSSE3

intrin.h

__m128 _mm_castpd_ps(__m128d)

_mm_castpd_si128

SSSE3

intrin.h

__m128i _mm_castpd_si128(__m128d)

_mm_castps_pd

SSSE3

intrin.h

__m128d _mm_castps_pd(__m128)

_mm_castps_si128

SSSE3

intrin.h

__m128i _mm_castps_si128(__m128)

_mm_castsi128_pd

SSSE3

intrin.h

__m128d _mm_castsi128_pd(__m128i)

_mm_castsi128_ps

SSSE3

intrin.h

__m128 _mm_castsi128_ps(__m128i)

_mm_clflush

SSE2

intrin.h

void _mm_clflush(void const *)

_mm_clmulepi64_si128

PCLMULQDQ [2]

immintrin.h

__m128i _mm_clmulepi64_si128 (__m128i,__m128i,const int )

_mm_cmov_si128

XOP [1]

ammintrin.h

__m128i _mm_cmov_si128(__m128i,__m128i,__m128i)

_mm_cmp_pd

AVX [2]

immintrin.h

__m128d _mm_cmp_pd(__m128d,__m128d,const int)

_mm_cmp_ps

AVX [2]

immintrin.h

__m128 _mm_cmp_ps(__m128,__m128,const int)

_mm_cmp_sd

AVX [2]

immintrin.h

__m128d _mm_cmp_sd(__m128d,__m128d,const int)

_mm_cmp_ss

AVX [2]

immintrin.h

__m128 _mm_cmp_ss(__m128,__m128,const int)

_mm_cmpeq_epi16

SSE2

intrin.h

__m128i _mm_cmpeq_epi16(__m128i,__m128i)

_mm_cmpeq_epi32

SSE2

intrin.h

__m128i _mm_cmpeq_epi32(__m128i,__m128i)

_mm_cmpeq_epi64

SSE41

intrin.h

__m128i _mm_cmpeq_epi64(__m128i,__m128i )

_mm_cmpeq_epi8

SSE2

intrin.h

__m128i _mm_cmpeq_epi8(__m128i,__m128i)

_mm_cmpeq_pd

SSE2

intrin.h

__m128d _mm_cmpeq_pd(__m128d,__m128d)

_mm_cmpeq_ps

SSE

intrin.h

__m128 _mm_cmpeq_ps(__m128,__m128)

_mm_cmpeq_sd

SSE2

intrin.h

__m128d _mm_cmpeq_sd(__m128d,__m128d)

_mm_cmpeq_ss

SSE

intrin.h

__m128 _mm_cmpeq_ss(__m128,__m128)

_mm_cmpestra

SSE42

intrin.h

int _mm_cmpestra(__m128i,int,__m128i,int,const int)

_mm_cmpestrc

SSE42

intrin.h

int _mm_cmpestrc(__m128i,int,__m128i,int,const int)

_mm_cmpestri

SSE42

intrin.h

int _mm_cmpestri(__m128i,int,__m128i,int,const int)

_mm_cmpestrm

SSE42

intrin.h

__m128i _mm_cmpestrm(__m128i,int,__m128i,int,const int)

_mm_cmpestro

SSE42

intrin.h

int _mm_cmpestro(__m128i,int,__m128i,int,const int)

_mm_cmpestrs

SSE42

intrin.h

int _mm_cmpestrs(__m128i,int,__m128i,int,const int)

_mm_cmpestrz

SSE42

intrin.h

int _mm_cmpestrz(__m128i,int,__m128i,int,const int)

_mm_cmpge_pd

SSE2

intrin.h

__m128d _mm_cmpge_pd(__m128d,__m128d)

_mm_cmpge_ps

SSE

intrin.h

__m128 _mm_cmpge_ps(__m128,__m128)

_mm_cmpge_sd

SSE2

intrin.h

__m128d _mm_cmpge_sd(__m128d,__m128d)

_mm_cmpge_ss

SSE

intrin.h

__m128 _mm_cmpge_ss(__m128,__m128)

_mm_cmpgt_epi16

SSE2

intrin.h

__m128i _mm_cmpgt_epi16(__m128i,__m128i)

_mm_cmpgt_epi32

SSE2

intrin.h

__m128i _mm_cmpgt_epi32(__m128i,__m128i)

_mm_cmpgt_epi64

SSE42

intrin.h

__m128i _mm_cmpgt_epi64(__m128i,__m128i )

_mm_cmpgt_epi8

SSE2

intrin.h

__m128i _mm_cmpgt_epi8(__m128i,__m128i)

_mm_cmpgt_pd

SSE2

intrin.h

__m128d _mm_cmpgt_pd(__m128d,__m128d)

_mm_cmpgt_ps

SSE

intrin.h

__m128 _mm_cmpgt_ps(__m128,__m128)

_mm_cmpgt_sd

SSE2

intrin.h

__m128d _mm_cmpgt_sd(__m128d,__m128d)

_mm_cmpgt_ss

SSE

intrin.h

__m128 _mm_cmpgt_ss(__m128,__m128)

_mm_cmpistra

SSE42

intrin.h

int _mm_cmpistra(__m128i,__m128i,const int)

_mm_cmpistrc

SSE42

intrin.h

int _mm_cmpistrc(__m128i,__m128i,const int)

_mm_cmpistri

SSE42

intrin.h

int _mm_cmpistri(__m128i,__m128i,const int)

_mm_cmpistrm

SSE42

intrin.h

__m128i _mm_cmpistrm(__m128i,__m128i,const int)

_mm_cmpistro

SSE42

intrin.h

int _mm_cmpistro(__m128i,__m128i,const int)

_mm_cmpistrs

SSE42

intrin.h

int _mm_cmpistrs(__m128i,__m128i,const int)

_mm_cmpistrz

SSE42

intrin.h

int _mm_cmpistrz(__m128i,__m128i,const int)

_mm_cmple_pd

SSE2

intrin.h

__m128d _mm_cmple_pd(__m128d,__m128d)

_mm_cmple_ps

SSE

intrin.h

__m128 _mm_cmple_ps(__m128,__m128)

_mm_cmple_sd

SSE2

intrin.h

__m128d _mm_cmple_sd(__m128d,__m128d)

_mm_cmple_ss

SSE

intrin.h

__m128 _mm_cmple_ss(__m128,__m128)

_mm_cmplt_epi16

SSE2

intrin.h

__m128i _mm_cmplt_epi16(__m128i,__m128i)

_mm_cmplt_epi32

SSE2

intrin.h

__m128i _mm_cmplt_epi32(__m128i,__m128i)

_mm_cmplt_epi8

SSE2

intrin.h

__m128i _mm_cmplt_epi8(__m128i,__m128i)

_mm_cmplt_pd

SSE2

intrin.h

__m128d _mm_cmplt_pd(__m128d,__m128d)

_mm_cmplt_ps

SSE

intrin.h

__m128 _mm_cmplt_ps(__m128,__m128)

_mm_cmplt_sd

SSE2

intrin.h

__m128d _mm_cmplt_sd(__m128d,__m128d)

_mm_cmplt_ss

SSE

intrin.h

__m128 _mm_cmplt_ss(__m128,__m128)

_mm_cmpneq_pd

SSE2

intrin.h

__m128d _mm_cmpneq_pd(__m128d,__m128d)

_mm_cmpneq_ps

SSE

intrin.h

__m128 _mm_cmpneq_ps(__m128,__m128)

_mm_cmpneq_sd

SSE2

intrin.h

__m128d _mm_cmpneq_sd(__m128d,__m128d)

_mm_cmpneq_ss

SSE

intrin.h

__m128 _mm_cmpneq_ss(__m128,__m128)

_mm_cmpnge_pd

SSE2

intrin.h

__m128d _mm_cmpnge_pd(__m128d,__m128d)

_mm_cmpnge_ps

SSE

intrin.h

__m128 _mm_cmpnge_ps(__m128,__m128)

_mm_cmpnge_sd

SSE2

intrin.h

__m128d _mm_cmpnge_sd(__m128d,__m128d)

_mm_cmpnge_ss

SSE

intrin.h

__m128 _mm_cmpnge_ss(__m128,__m128)

_mm_cmpngt_pd

SSE2

intrin.h

__m128d _mm_cmpngt_pd(__m128d,__m128d)

_mm_cmpngt_ps

SSE

intrin.h

__m128 _mm_cmpngt_ps(__m128,__m128)

_mm_cmpngt_sd

SSE2

intrin.h

__m128d _mm_cmpngt_sd(__m128d,__m128d)

_mm_cmpngt_ss

SSE

intrin.h

__m128 _mm_cmpngt_ss(__m128,__m128)

_mm_cmpnle_pd

SSE2

intrin.h

__m128d _mm_cmpnle_pd(__m128d,__m128d)

_mm_cmpnle_ps

SSE

intrin.h

__m128 _mm_cmpnle_ps(__m128,__m128)

_mm_cmpnle_sd

SSE2

intrin.h

__m128d _mm_cmpnle_sd(__m128d,__m128d)

_mm_cmpnle_ss

SSE

intrin.h

__m128 _mm_cmpnle_ss(__m128,__m128)

_mm_cmpnlt_pd

SSE2

intrin.h

__m128d _mm_cmpnlt_pd(__m128d,__m128d)

_mm_cmpnlt_ps

SSE

intrin.h

__m128 _mm_cmpnlt_ps(__m128,__m128)

_mm_cmpnlt_sd

SSE2

intrin.h

__m128d _mm_cmpnlt_sd(__m128d,__m128d)

_mm_cmpnlt_ss

SSE

intrin.h

__m128 _mm_cmpnlt_ss(__m128,__m128)

_mm_cmpord_pd

SSE2

intrin.h

__m128d _mm_cmpord_pd(__m128d,__m128d)

_mm_cmpord_ps

SSE

intrin.h

__m128 _mm_cmpord_ps(__m128,__m128)

_mm_cmpord_sd

SSE2

intrin.h

__m128d _mm_cmpord_sd(__m128d,__m128d)

_mm_cmpord_ss

SSE

intrin.h

__m128 _mm_cmpord_ss(__m128,__m128)

_mm_cmpunord_pd

SSE2

intrin.h

__m128d _mm_cmpunord_pd(__m128d,__m128d)

_mm_cmpunord_ps

SSE

intrin.h

__m128 _mm_cmpunord_ps(__m128,__m128)

_mm_cmpunord_sd

SSE2

intrin.h

__m128d _mm_cmpunord_sd(__m128d,__m128d)

_mm_cmpunord_ss

SSE

intrin.h

__m128 _mm_cmpunord_ss(__m128,__m128)

_mm_com_epi16

XOP [1]

ammintrin.h

__m128i _mm_com_epi16(__m128i,__m128i,int)

_mm_com_epi32

XOP [1]

ammintrin.h

__m128i _mm_com_epi32(__m128i,__m128i,int)

_mm_com_epi64

XOP [1]

ammintrin.h

__m128i _mm_com_epi32(__m128i,__m128i,int)

_mm_com_epi8

XOP [1]

ammintrin.h

__m128i _mm_com_epi8(__m128i,__m128i,int)

_mm_com_epu16

XOP [1]

ammintrin.h

__m128i _mm_com_epu16(__m128i,__m128i,int)

_mm_com_epu32

XOP [1]

ammintrin.h

__m128i _mm_com_epu32(__m128i,__m128i,int)

_mm_com_epu64

XOP [1]

ammintrin.h

__m128i _mm_com_epu32(__m128i,__m128i,int)

_mm_com_epu8

XOP [1]

ammintrin.h

__m128i _mm_com_epu8(__m128i,__m128i,int)

_mm_comieq_sd

SSE2

intrin.h

int _mm_comieq_sd(__m128d,__m128d)

_mm_comieq_ss

SSE

intrin.h

int _mm_comieq_ss(__m128,__m128)

_mm_comige_sd

SSE2

intrin.h

int _mm_comige_sd(__m128d,__m128d)

_mm_comige_ss

SSE

intrin.h

int _mm_comige_ss(__m128,__m128)

_mm_comigt_sd

SSE2

intrin.h

int _mm_comigt_sd(__m128d,__m128d)

_mm_comigt_ss

SSE

intrin.h

int _mm_comigt_ss(__m128,__m128)

_mm_comile_sd

SSE2

intrin.h

int _mm_comile_sd(__m128d,__m128d)

_mm_comile_ss

SSE

intrin.h

int _mm_comile_ss(__m128,__m128)

_mm_comilt_sd

SSE2

intrin.h

int _mm_comilt_sd(__m128d,__m128d)

_mm_comilt_ss

SSE

intrin.h

int _mm_comilt_ss(__m128,__m128)

_mm_comineq_sd

SSE2

intrin.h

int _mm_comineq_sd(__m128d,__m128d)

_mm_comineq_ss

SSE

intrin.h

int _mm_comineq_ss(__m128,__m128)

_mm_crc32_u16

SSE42

intrin.h

unsigned int _mm_crc32_u16(unsigned int,unsigned short)

_mm_crc32_u32

SSE42

intrin.h

unsigned int _mm_crc32_u32(unsigned int,unsigned int)

_mm_crc32_u64

SSE42

intrin.h

unsigned __int64 _mm_crc32_u64(unsigned __int64,unsigned __int64)

_mm_crc32_u8

SSE42

intrin.h

unsigned int _mm_crc32_u8(unsigned int,unsigned char)

_mm_cvt_si2ss

SSE

intrin.h

__m128 _mm_cvt_si2ss(__m128,int)

_mm_cvt_ss2si

SSE

intrin.h

int _mm_cvt_ss2si(__m128)

_mm_cvtepi16_epi32

SSE41

intrin.h

__m128i _mm_cvtepi16_epi32(__m128i )

_mm_cvtepi16_epi64

SSE41

intrin.h

__m128i _mm_cvtepi16_epi64(__m128i )

_mm_cvtepi32_epi64

SSE41

intrin.h

__m128i _mm_cvtepi32_epi64(__m128i )

_mm_cvtepi32_pd

SSE2

intrin.h

__m128d _mm_cvtepi32_pd(__m128i)

_mm_cvtepi32_ps

SSE2

intrin.h

__m128 _mm_cvtepi32_ps(__m128i)

_mm_cvtepi8_epi16

SSE41

intrin.h

__m128i _mm_cvtepi8_epi16 (__m128i )

_mm_cvtepi8_epi32

SSE41

intrin.h

__m128i _mm_cvtepi8_epi32 (__m128i )

_mm_cvtepi8_epi64

SSE41

intrin.h

__m128i _mm_cvtepi8_epi64 (__m128i )

_mm_cvtepu16_epi32

SSE41

intrin.h

__m128i _mm_cvtepu16_epi32(__m128i )

_mm_cvtepu16_epi64

SSE41

intrin.h

__m128i _mm_cvtepu16_epi64(__m128i )

_mm_cvtepu32_epi64

SSE41

intrin.h

__m128i _mm_cvtepu32_epi64(__m128i )

_mm_cvtepu8_epi16

SSE41

intrin.h

__m128i _mm_cvtepu8_epi16 (__m128i )

_mm_cvtepu8_epi32

SSE41

intrin.h

__m128i _mm_cvtepu8_epi32 (__m128i )

_mm_cvtepu8_epi64

SSE41

intrin.h

__m128i _mm_cvtepu8_epi64 (__m128i )

_mm_cvtpd_epi32

SSE2

intrin.h

__m128i _mm_cvtpd_epi32(__m128d)

_mm_cvtpd_ps

SSE2

intrin.h

__m128 _mm_cvtpd_ps(__m128d)

_mm_cvtph_ps

F16C [2]

immintrin.h

__m128 _mm_cvtph_ps(__m128i)

_mm_cvtps_epi32

SSE2

intrin.h

__m128i _mm_cvtps_epi32(__m128)

_mm_cvtps_pd

SSE2

intrin.h

__m128d _mm_cvtps_pd(__m128)

_mm_cvtps_ph

F16C [2]

immintrin.h

__m128i _mm_cvtps_ph(__m128,const int)

_mm_cvtsd_f64

SSSE3

intrin.h

double _mm_cvtsd_f64(__m128d)

_mm_cvtsd_si32

SSE2

intrin.h

int _mm_cvtsd_si32(__m128d)

_mm_cvtsd_si64

SSE2

intrin.h

__int64 _mm_cvtsd_si64(__m128d)

_mm_cvtsd_si64x

SSE2

intrin.h

__int64 _mm_cvtsd_si64x(__m128d a)

_mm_cvtsd_ss

SSE2

intrin.h

__m128 _mm_cvtsd_ss(__m128,__m128d)

_mm_cvtsi128_si32

SSE2

intrin.h

int _mm_cvtsi128_si32(__m128i)

_mm_cvtsi128_si64

SSE2

intrin.h

__int64 _mm_cvtsi128_si64(__m128i)

_mm_cvtsi128_si64x

SSE2

intrin.h

__int64 _mm_cvtsi128_si64x(__m128i a)

_mm_cvtsi32_sd

SSE2

intrin.h

__m128d _mm_cvtsi32_sd(__m128d,int)

_mm_cvtsi32_si128

SSE2

intrin.h

__m128i _mm_cvtsi32_si128(int)

_mm_cvtsi64_sd

SSE2

intrin.h

__m128d _mm_cvtsi64_sd(__m128d,__int64)

_mm_cvtsi64_si128

SSE2

intrin.h

__m128i _mm_cvtsi64_si128(__int64)

_mm_cvtsi64_ss

SSE

intrin.h

__m128 _mm_cvtsi64_ss(__m128,__int64)

_mm_cvtsi64x_sd

SSE2

intrin.h

__m128d _mm_cvtsi64x_sd(__m128d a,__int64 b)

_mm_cvtsi64x_si128

SSE2

intrin.h

__m128i _mm_cvtsi64x_si128(__int64 a)

_mm_cvtsi64x_ss

SSE2

intrin.h

__m128 _mm_cvtsi64x_ss(__m128 a,__int64 b)

_mm_cvtss_f32

SSSE3

intrin.h

float _mm_cvtss_f32(__m128)

_mm_cvtss_sd

SSE2

intrin.h

__m128d _mm_cvtss_sd(__m128d,__m128)

_mm_cvtss_si64

SSE

intrin.h

__int64 _mm_cvtss_si64(__m128)

_mm_cvtss_si64x

SSE2

intrin.h

__int64 _mm_cvtss_si64x(__m128 a)

_mm_cvtt_ss2si

SSE

intrin.h

int _mm_cvtt_ss2si(__m128)

_mm_cvttpd_epi32

SSE2

intrin.h

__m128i _mm_cvttpd_epi32(__m128d)

_mm_cvttps_epi32

SSE2

intrin.h

__m128i _mm_cvttps_epi32(__m128)

_mm_cvttsd_si32

SSE2

intrin.h

int _mm_cvttsd_si32(__m128d)

_mm_cvttsd_si64

SSE2

intrin.h

__int64 _mm_cvttsd_si64(__m128d)

_mm_cvttsd_si64x

SSE2

intrin.h

__int64 _mm_cvttsd_si64x(__m128d a)

_mm_cvttss_si64

SSE2

intrin.h

__int64 _mm_cvttss_si64(__m128)

_mm_cvttss_si64x

SSE2

intrin.h

__int64 _mm_cvttss_si64x(__m128 a)

_mm_div_pd

SSE2

intrin.h

__m128d _mm_div_pd(__m128d,__m128d)

_mm_div_ps

SSE

intrin.h

__m128 _mm_div_ps(__m128,__m128)

_mm_div_sd

SSE2

intrin.h

__m128d _mm_div_sd(__m128d,__m128d)

_mm_div_ss

SSE

intrin.h

__m128 _mm_div_ss(__m128,__m128)

_mm_dp_pd

SSE41

intrin.h

__m128d _mm_dp_pd(__m128d,__m128d,const int )

_mm_dp_ps

SSE41

intrin.h

__m128 _mm_dp_ps(__m128,__m128,const int )

_mm_extract_epi16

SSE2

intrin.h

int _mm_extract_epi16(__m128i,int)

_mm_extract_epi32

SSE41

intrin.h

int _mm_extract_epi32(__m128i,const int )

_mm_extract_epi64

SSE41

intrin.h

__int64 _mm_extract_epi64(__m128i,const int )

_mm_extract_epi8

SSE41

intrin.h

int _mm_extract_epi8 (__m128i,const int )

_mm_extract_ps

SSE41

intrin.h

int _mm_extract_ps(__m128,const int )

_mm_extract_si64

SSE4a

intrin.h

__m128i _mm_extract_si64(__m128i,__m128i)

_mm_extracti_si64

SSE4a

intrin.h

__m128i _mm_extracti_si64(__m128i,int,int)

_mm_fmadd_pd

FMA [2]

immintrin.h

__m128d _mm_fmadd_pd (__m128d a,__m128d b,__m128d c)

_mm_fmadd_ps

FMA [2]

immintrin.h

__m128 _mm_fmadd_ps (__m128 a,__m128 b,__m128 c)

_mm_fmadd_sd

FMA [2]

immintrin.h

__m128d _mm_fmadd_sd (__m128d a,__m128d b,__m128d c)

_mm_fmadd_ss

FMA [2]

immintrin.h

__m128 _mm_fmadd_ss (__m128 a,__m128 b,__m128 c)

_mm_fmaddsub_pd

FMA [2]

immintrin.h

__m128d _mm_fmaddsub_pd (__m128d a,__m128d b,__m128d c)

_mm_fmaddsub_ps

FMA [2]

immintrin.h

__m128 _mm_fmaddsub_ps (__m128 a,__m128 b,__m128 c)

_mm_fmsub_pd

FMA [2]

immintrin.h

__m128d _mm_fmsub_pd (__m128d a,__m128d b,__m128d c)

_mm_fmsub_ps

FMA [2]

immintrin.h

__m128 _mm_fmsub_ps (__m128 a,__m128 b,__m128 c)

_mm_fmsub_sd

FMA [2]

immintrin.h

__m128d _mm_fmsub_sd (__m128d a,__m128d b,__m128d c)

_mm_fmsub_ss

FMA [2]

immintrin.h

__m128 _mm_fmsub_ss (__m128 a,__m128 b,__m128 c)

_mm_fmsubadd_pd

FMA [2]

immintrin.h

__m128d _mm_fmsubadd_pd (__m128d a,__m128d b,__m128d c)

_mm_fmsubadd_ps

FMA [2]

immintrin.h

__m128 _mm_fmsubadd_ps (__m128 a,__m128 b,__m128 c)

_mm_fnmadd_pd

FMA [2]

immintrin.h

__m128d _mm_fnmadd_pd (__m128d a,__m128d b,__m128d c)

_mm_fnmadd_ps

FMA [2]

immintrin.h

__m128 _mm_fnmadd_ps (__m128 a,__m128 b,__m128 c)

_mm_fnmadd_sd

FMA [2]

immintrin.h

__m128d _mm_fnmadd_sd (__m128d a,__m128d b,__m128d c)

_mm_fnmadd_ss

FMA [2]

immintrin.h

__m128 _mm_fnmadd_ss (__m128 a,__m128 b,__m128 c)

_mm_fnmsub_pd

FMA [2]

immintrin.h

__m128d _mm_fnmsub_pd (__m128d a,__m128d b,__m128d c)

_mm_fnmsub_ps

FMA [2]

immintrin.h

__m128 _mm_fnmsub_ps (__m128 a,__m128 b,__m128 c)

_mm_fnmsub_sd

FMA [2]

immintrin.h

__m128d _mm_fnmsub_sd (__m128d a,__m128d b,__m128d c)

_mm_fnmsub_ss

FMA [2]

immintrin.h

__m128 _mm_fnmsub_ss (__m128 a,__m128 b,__m128 c)

_mm_frcz_pd

XOP [1]

ammintrin.h

__m128d _mm_frcz_pd(__m128d)

_mm_frcz_ps

XOP [1]

ammintrin.h

__m128 _mm_frcz_ps(__m128)

_mm_frcz_sd

XOP [1]

ammintrin.h

__m128d _mm_frcz_sd(__m128d,__m128d)

_mm_frcz_ss

XOP [1]

ammintrin.h

__m128 _mm_frcz_ss(__m128,__m128)

_mm_getcsr

SSE

intrin.h

unsigned int _mm_getcsr(void)

_mm_hadd_epi16

SSSE3

intrin.h

__m128i _mm_hadd_epi16(__m128i,__m128i)

_mm_hadd_epi32

SSSE3

intrin.h

__m128i _mm_hadd_epi32(__m128i,__m128i)

_mm_hadd_pd

SSE3

intrin.h

__m128d _mm_hadd_pd(__m128d,__m128d)

_mm_hadd_ps

SSE3

intrin.h

__m128 _mm_hadd_ps(__m128,__m128)

_mm_haddd_epi16

XOP [1]

ammintrin.h

__m128i _mm_haddd_epi16(__m128i)

_mm_haddd_epi8

XOP [1]

ammintrin.h

__m128i _mm_haddd_epi8(__m128i)

_mm_haddd_epu16

XOP [1]

ammintrin.h

__m128i _mm_haddd_epu16(__m128i)

_mm_haddd_epu8

XOP [1]

ammintrin.h

__m128i _mm_haddd_epu8(__m128i)

_mm_haddq_epi16

XOP [1]

ammintrin.h

__m128i _mm_haddq_epi16(__m128i)

_mm_haddq_epi32

XOP [1]

ammintrin.h

__m128i _mm_haddq_epi32(__m128i)

_mm_haddq_epi8

XOP [1]

ammintrin.h

__m128i _mm_haddq_epi8(__m128i)

_mm_haddq_epu16

XOP [1]

ammintrin.h

__m128i _mm_haddq_epu16(__m128i)

_mm_haddq_epu32

XOP [1]

ammintrin.h

__m128i _mm_haddq_epu32(__m128i)

_mm_haddq_epu8

XOP [1]

ammintrin.h

__m128i _mm_haddq_epu8(__m128i)

_mm_hadds_epi16

SSSE3

intrin.h

__m128i _mm_hadds_epi16(__m128i,__m128i)

_mm_haddw_epi8

XOP [1]

ammintrin.h

__m128i _mm_haddw_epi8(__m128i)

_mm_haddw_epu8

XOP [1]

ammintrin.h

__m128i _mm_haddw_epu8(__m128i)

_mm_hsub_epi16

SSSE3

intrin.h

__m128i _mm_hsub_epi16(__m128i,__m128i)

_mm_hsub_epi32

SSSE3

intrin.h

__m128i _mm_hsub_epi32(__m128i,__m128i)

_mm_hsub_pd

SSE3

intrin.h

__m128d _mm_hsub_pd(__m128d,__m128d)

_mm_hsub_ps

SSE3

intrin.h

__m128 _mm_hsub_ps(__m128,__m128)

_mm_hsubd_epi16

XOP [1]

ammintrin.h

__m128i _mm_hsubd_epi16(__m128i)

_mm_hsubq_epi32

XOP [1]

ammintrin.h

__m128i _mm_hsubq_epi32(__m128i)

_mm_hsubs_epi16

SSSE3

intrin.h

__m128i _mm_hsubs_epi16(__m128i,__m128i)

_mm_hsubw_epi8

XOP [1]

ammintrin.h

__m128i _mm_hsubw_epi8(__m128i)

_mm_i32gather_epi32

AVX2 [2]

immintrin.h

__m128i _mm_i32gather_epi32(int const *base,__m128i index,const int scale)

_mm_i32gather_epi64

AVX2 [2]

immintrin.h

__m128i _mm_i32gather_epi64(__int64 const *base,__m128i index,const int scale)

_mm_i32gather_pd

AVX2 [2]

immintrin.h

__m128d _mm_i32gather_pd(double const *base,__m128i index,const int scale)

_mm_i32gather_ps

AVX2 [2]

immintrin.h

__m128 _mm_i32gather_ps(float const *base,__m128i index,const int scale)

_mm_i64gather_epi32

AVX2 [2]

immintrin.h

__m128i _mm_i64gather_epi32(int const *base,__m128i index,const int scale)

_mm_i64gather_epi64

AVX2 [2]

immintrin.h

__m128i _mm_i64gather_epi64(__int64 const *base,__m128i index,const int scale)

_mm_i64gather_pd

AVX2 [2]

immintrin.h

__m128d _mm_i64gather_pd(double const *base,__m128i index,const int scale)

_mm_i64gather_ps

AVX2 [2]

immintrin.h

__m128 _mm_i64gather_ps(float const *base,__m128i index,const int scale)

_mm_insert_epi16

SSE2

intrin.h

__m128i _mm_insert_epi16(__m128i,int,int)

_mm_insert_epi32

SSE41

intrin.h

__m128i _mm_insert_epi32(__m128i,int,const int )

_mm_insert_epi64

SSE41

intrin.h

__m128i _mm_insert_epi64(__m128i,__int64,const int )

_mm_insert_epi8

SSE41

intrin.h

__m128i _mm_insert_epi8 (__m128i,int,const int )

_mm_insert_ps

SSE41

intrin.h

__m128 _mm_insert_ps(__m128,__m128,const int )

_mm_insert_si64

SSE4a

intrin.h

__m128i _mm_insert_si64(__m128i,__m128i)

_mm_inserti_si64

SSE4a

intrin.h

__m128i _mm_inserti_si64(__m128i,__m128i,int,int)

_mm_lddqu_si128

SSE3

intrin.h

__m128i _mm_lddqu_si128(__m128i const*)

_mm_lfence

SSE2

intrin.h

void _mm_lfence(void)

_mm_load_pd

SSE2

intrin.h

__m128d _mm_load_pd(double*)

_mm_load_ps

SSE

intrin.h

__m128 _mm_load_ps(float*)

_mm_load_ps1

SSE

intrin.h

__m128 _mm_load_ps1(float*)

_mm_load_sd

SSE2

intrin.h

__m128d _mm_load_sd(double*)

_mm_load_si128

SSE2

intrin.h

__m128i _mm_load_si128(__m128i*)

_mm_load_ss

SSE

intrin.h

__m128 _mm_load_ss(float*)

_mm_load1_pd

SSE2

intrin.h

__m128d _mm_load1_pd(double*)

_mm_loaddup_pd

SSE3

intrin.h

__m128d _mm_loaddup_pd(double const*)

_mm_loadh_pd

SSE2

intrin.h

__m128d _mm_loadh_pd(__m128d,double*)

_mm_loadh_pi

SSE

intrin.h

__m128 _mm_loadh_pi(__m128,__m64*)

_mm_loadl_epi64

SSE2

intrin.h

__m128i _mm_loadl_epi64(__m128i*)

_mm_loadl_pd

SSE2

intrin.h

__m128d _mm_loadl_pd(__m128d,double*)

_mm_loadl_pi

SSE

intrin.h

__m128 _mm_loadl_pi(__m128,__m64*)

_mm_loadr_pd

SSE2

intrin.h

__m128d _mm_loadr_pd(double*)

_mm_loadr_ps

SSE

intrin.h

__m128 _mm_loadr_ps(float*)

_mm_loadu_pd

SSE2

intrin.h

__m128d _mm_loadu_pd(double*)

_mm_loadu_ps

SSE

intrin.h

__m128 _mm_loadu_ps(float*)

_mm_loadu_si128

SSE2

intrin.h

__m128i _mm_loadu_si128(__m128i*)

_mm_macc_epi16

XOP [1]

ammintrin.h

__m128i _mm_macc_epi16(__m128i,__m128i,__m128i)

_mm_macc_epi32

XOP [1]

ammintrin.h

__m128i _mm_macc_epi32(__m128i,__m128i,__m128i)

_mm_macc_pd

FMA4 [1]

ammintrin.h

__m128d _mm_macc_pd(__m128d,__m128d,__m128d)

_mm_macc_ps

FMA4 [1]

ammintrin.h

__m128 _mm_macc_ps(__m128,__m128,__m128)

_mm_macc_sd

FMA4 [1]

ammintrin.h

__m128d _mm_macc_sd(__m128d,__m128d,__m128d)

_mm_macc_ss

FMA4 [1]

ammintrin.h

__m128 _mm_macc_ss(__m128,__m128,__m128)

_mm_maccd_epi16

XOP [1]

ammintrin.h

__m128i _mm_maccd_epi16(__m128i,__m128i,__m128i)

_mm_macchi_epi32

XOP [1]

ammintrin.h

__m128i _mm_macchi_epi32(__m128i,__m128i,__m128i)

_mm_macclo_epi32

XOP [1]

ammintrin.h

__m128i _mm_macclo_epi32(__m128i,__m128i,__m128i)

_mm_maccs_epi16

XOP [1]

ammintrin.h

__m128i _mm_maccs_epi16(__m128i,__m128i,__m128i)

_mm_maccs_epi32

XOP [1]

ammintrin.h

__m128i _mm_maccs_epi32(__m128i,__m128i,__m128i)

_mm_maccsd_epi16

XOP [1]

ammintrin.h

__m128i _mm_maccsd_epi16(__m128i,__m128i,__m128i)

_mm_maccshi_epi32

XOP [1]

ammintrin.h

__m128i _mm_maccshi_epi32(__m128i,__m128i,__m128i)

_mm_maccslo_epi32

XOP [1]

ammintrin.h

__m128i _mm_maccslo_epi32(__m128i,__m128i,__m128i)

_mm_madd_epi16

SSE2

intrin.h

__m128i _mm_madd_epi16(__m128i,__m128i)

_mm_maddd_epi16

XOP [1]

ammintrin.h

__m128i _mm_maddd_epi16(__m128i,__m128i,__m128i)

_mm_maddsd_epi16

XOP [1]

ammintrin.h

__m128i _mm_maddsd_epi16(__m128i,__m128i,__m128i)

_mm_maddsub_pd

FMA4 [1]

ammintrin.h

__m128d _mm_maddsub_pd(__m128d,__m128d,__m128d)

_mm_maddsub_ps

FMA4 [1]

ammintrin.h

__m128 _mm_maddsub_ps(__m128,__m128,__m128)

_mm_maddubs_epi16

SSSE3

intrin.h

__m128i _mm_maddubs_epi16(__m128i,__m128i)

_mm_mask_i32gather_epi32

AVX2 [2]

immintrin.h

__m128i _mm_mask_i32gather_epi32(__m128i src,int const *base,__m128i index,__m128i mask,const int scale)

_mm_mask_i32gather_epi64

AVX2 [2]

immintrin.h

__m128i _mm_mask_i32gather_epi64(__m128i src,__int64 const *base,__m128i index,__m128i mask,const int scale)

_mm_mask_i32gather_pd

AVX2 [2]

immintrin.h

__m128d _mm_mask_i32gather_pd(__m128d src,double const *base,__m128i index,__m128d mask,const int scale)

_mm_mask_i32gather_ps

AVX2 [2]

immintrin.h

__m128 _mm_mask_i32gather_ps(__m128 src,float const *base,__m128i index,__m128 mask,const int scale)

_mm_mask_i64gather_epi32

AVX2 [2]

immintrin.h

__m128i _mm_mask_i64gather_epi32(__m128i src,int const *base,__m128i index,__m128i mask,const int scale)

_mm_mask_i64gather_epi64

AVX2 [2]

immintrin.h

__m128i _mm_mask_i64gather_epi64(__m128i src,__int64 const *base,__m128i index,__m128i mask,const int scale)

_mm_mask_i64gather_pd

AVX2 [2]

immintrin.h

__m128d _mm_mask_i64gather_pd(__m128d src,double const *base,__m128i index,__m128d mask,const int scale)

_mm_mask_i64gather_ps

AVX2 [2]

immintrin.h

__m128 _mm_mask_i64gather_ps(__m128 src,float const *base,__m128i index,__m128 mask,const int scale)

_mm_maskload_epi32

AVX2 [2]

immintrin.h

__m128i _mm_maskload_epi32(int const *,__m128i)

_mm_maskload_epi64

AVX2 [2]

immintrin.h

__m128i _mm_maskload_epi64( __int64 const *,__m128i)

_mm_maskload_pd

AVX [2]

immintrin.h

__m128d _mm_maskload_pd(double const *,__m128i)

_mm_maskload_ps

AVX [2]

immintrin.h

__m128 _mm_maskload_ps(float const *,__m128i)

_mm_maskmoveu_si128

SSE2

intrin.h

void _mm_maskmoveu_si128(__m128i,__m128i,char*)

_mm_maskstore_epi32

AVX2 [2]

immintrin.h

void _mm_maskstore_epi32(int *,__m128i,__m128i)

_mm_maskstore_epi64

AVX2 [2]

immintrin.h

void _mm_maskstore_epi64(__int64 *,__m128i,__m128i)

_mm_maskstore_pd

AVX [2]

immintrin.h

void _mm_maskstore_pd(double *,__m128i,__m128d)

_mm_maskstore_ps

AVX [2]

immintrin.h

void _mm_maskstore_ps(float *,__m128i,__m128)

_mm_max_epi16

SSE2

intrin.h

__m128i _mm_max_epi16(__m128i,__m128i)

_mm_max_epi32

SSE41

intrin.h

__m128i _mm_max_epi32(__m128i,__m128i )

_mm_max_epi8

SSE41

intrin.h

__m128i _mm_max_epi8 (__m128i,__m128i )

_mm_max_epu16

SSE41

intrin.h

__m128i _mm_max_epu16(__m128i,__m128i )

_mm_max_epu32

SSE41

intrin.h

__m128i _mm_max_epu32(__m128i,__m128i )

_mm_max_epu8

SSE2

intrin.h

__m128i _mm_max_epu8(__m128i,__m128i)

_mm_max_pd

SSE2

intrin.h

__m128d _mm_max_pd(__m128d,__m128d)

_mm_max_ps

SSE

intrin.h

__m128 _mm_max_ps(__m128,__m128)

_mm_max_sd

SSE2

intrin.h

__m128d _mm_max_sd(__m128d,__m128d)

_mm_max_ss

SSE

intrin.h

__m128 _mm_max_ss(__m128,__m128)

_mm_mfence

SSE2

intrin.h

void _mm_mfence(void)

_mm_min_epi16

SSE2

intrin.h

__m128i _mm_min_epi16(__m128i,__m128i)

_mm_min_epi32

SSE41

intrin.h

__m128i _mm_min_epi32(__m128i,__m128i )

_mm_min_epi8

SSE41

intrin.h

__m128i _mm_min_epi8 (__m128i,__m128i )

_mm_min_epu16

SSE41

intrin.h

__m128i _mm_min_epu16(__m128i,__m128i )

_mm_min_epu32

SSE41

intrin.h

__m128i _mm_min_epu32(__m128i,__m128i )

_mm_min_epu8

SSE2

intrin.h

__m128i _mm_min_epu8(__m128i,__m128i)

_mm_min_pd

SSE2

intrin.h

__m128d _mm_min_pd(__m128d,__m128d)

_mm_min_ps

SSE

intrin.h

__m128 _mm_min_ps(__m128,__m128)

_mm_min_sd

SSE2

intrin.h

__m128d _mm_min_sd(__m128d,__m128d)

_mm_min_ss

SSE

intrin.h

__m128 _mm_min_ss(__m128,__m128)

_mm_minpos_epu16

SSE41

intrin.h

__m128i _mm_minpos_epu16(__m128i )

_mm_monitor

SSE3

intrin.h

void _mm_monitor(void const*,unsigned int,unsigned int)

_mm_move_epi64

SSE2

intrin.h

__m128i _mm_move_epi64(__m128i)

_mm_move_sd

SSE2

intrin.h

__m128d _mm_move_sd(__m128d,__m128d)

_mm_move_ss

SSE

intrin.h

__m128 _mm_move_ss(__m128,__m128)

_mm_movedup_pd

SSE3

intrin.h

__m128d _mm_movedup_pd(__m128d)

_mm_movehdup_ps

SSE3

intrin.h

__m128 _mm_movehdup_ps(__m128)

_mm_movehl_ps

SSE

intrin.h

__m128 _mm_movehl_ps(__m128,__m128)

_mm_moveldup_ps

SSE3

intrin.h

__m128 _mm_moveldup_ps(__m128)

_mm_movelh_ps

SSE

intrin.h

__m128 _mm_movelh_ps(__m128,__m128)

_mm_movemask_epi8

SSE2

intrin.h

int _mm_movemask_epi8(__m128i)

_mm_movemask_pd

SSE2

intrin.h

int _mm_movemask_pd(__m128d)

_mm_movemask_ps

SSE

intrin.h

int _mm_movemask_ps(__m128)

_mm_mpsadbw_epu8

SSE41

intrin.h

__m128i _mm_mpsadbw_epu8(__m128i s1,__m128i,const int)

_mm_msub_pd

FMA4 [1]

ammintrin.h

__m128d _mm_msub_pd(__m128d,__m128d,__m128d)

_mm_msub_ps

FMA4 [1]

ammintrin.h

__m128 _mm_msub_ps(__m128,__m128,__m128)

_mm_msub_sd

FMA4 [1]

ammintrin.h

__m128d _mm_msub_sd(__m128d,__m128d,__m128d)

_mm_msub_ss

FMA4 [1]

ammintrin.h

__m128 _mm_msub_ss(__m128,__m128,__m128)

_mm_msubadd_pd

FMA4 [1]

ammintrin.h

__m128d _mm_msubadd_pd(__m128d,__m128d,__m128d)

_mm_msubadd_ps

FMA4 [1]

ammintrin.h

__m128 _mm_msubadd_ps(__m128,__m128,__m128)

_mm_mul_epi32

SSE41

intrin.h

__m128i _mm_mul_epi32(__m128i,__m128i )

_mm_mul_epu32

SSE2

intrin.h

__m128i _mm_mul_epu32(__m128i,__m128i)

_mm_mul_pd

SSE2

intrin.h

__m128d _mm_mul_pd(__m128d,__m128d)

_mm_mul_ps

SSE

intrin.h

__m128 _mm_mul_ps(__m128,__m128)

_mm_mul_sd

SSE2

intrin.h

__m128d _mm_mul_sd(__m128d,__m128d)

_mm_mul_ss

SSE

intrin.h

__m128 _mm_mul_ss(__m128,__m128)

_mm_mulhi_epi16

SSE2

intrin.h

__m128i _mm_mulhi_epi16(__m128i,__m128i)

_mm_mulhi_epu16

SSE2

intrin.h

__m128i _mm_mulhi_epu16(__m128i,__m128i)

_mm_mulhrs_epi16

SSSE3

intrin.h

__m128i _mm_mulhrs_epi16(__m128i,__m128i)

_mm_mullo_epi16

SSE2

intrin.h

__m128i _mm_mullo_epi16(__m128i,__m128i)

_mm_mullo_epi32

SSE41

intrin.h

__m128i _mm_mullo_epi32(__m128i,__m128i )

_mm_mwait

SSE3

intrin.h

void _mm_mwait(unsigned int,unsigned int)

_mm_nmacc_pd

FMA4 [1]

ammintrin.h

__m128d _mm_nmacc_pd(__m128d,__m128d,__m128d)

_mm_nmacc_ps

FMA4 [1]

ammintrin.h

__m128 _mm_nmacc_ps(__m128,__m128,__m128)

_mm_nmacc_sd

FMA4 [1]

ammintrin.h

__m128d _mm_nmacc_sd(__m128d,__m128d,__m128d)

_mm_nmacc_ss

FMA4 [1]

ammintrin.h

__m128 _mm_nmacc_ss(__m128,__m128,__m128)

_mm_nmsub_pd

FMA4 [1]

ammintrin.h

__m128d _mm_nmsub_pd(__m128d,__m128d,__m128d)

_mm_nmsub_ps

FMA4 [1]

ammintrin.h

__m128 _mm_nmsub_ps(__m128,__m128,__m128)

_mm_nmsub_sd

FMA4 [1]

ammintrin.h

__m128d _mm_nmsub_sd(__m128d,__m128d,__m128d)

_mm_nmsub_ss

FMA4 [1]

ammintrin.h

__m128 _mm_nmsub_ss(__m128,__m128,__m128)

_mm_or_pd

SSE2

intrin.h

__m128d _mm_or_pd(__m128d,__m128d)

_mm_or_ps

SSE

intrin.h

__m128 _mm_or_ps(__m128,__m128)

_mm_or_si128

SSE2

intrin.h

__m128i _mm_or_si128(__m128i,__m128i)

_mm_packs_epi16

SSE2

intrin.h

__m128i _mm_packs_epi16(__m128i,__m128i)

_mm_packs_epi32

SSE2

intrin.h

__m128i _mm_packs_epi32(__m128i,__m128i)

_mm_packus_epi16

SSE2

intrin.h

__m128i _mm_packus_epi16(__m128i,__m128i)

_mm_packus_epi32

SSE41

intrin.h

__m128i _mm_packus_epi32(__m128i,__m128i )

_mm_pause

SSE2

intrin.h

void _mm_pause(void)

_mm_perm_epi8

XOP [1]

ammintrin.h

__m128i _mm_perm_epi8(__m128i,__m128i,__m128i)

_mm_permute_pd

AVX [2]

immintrin.h

__m128d _mm_permute_pd(__m128d,int)

_mm_permute_ps

AVX [2]

immintrin.h

__m128 _mm_permute_ps(__m128,int)

_mm_permute2_pd

XOP [1]

ammintrin.h

__m128d _mm_permute2_pd(__m128d,__m128d,__m128i,int)

_mm_permute2_ps

XOP [1]

ammintrin.h

__m128 _mm_permute2_ps(__m128,__m128,__m128i,int)

_mm_permutevar_pd

AVX [2]

immintrin.h

__m128d _mm_permutevar_pd(__m128d,__m128i)

_mm_permutevar_ps

AVX [2]

immintrin.h

__m128 _mm_permutevar_ps(__m128,__m128i)

_mm_popcnt_u32

POPCNT

intrin.h

int _mm_popcnt_u32(unsigned int)

_mm_popcnt_u64

POPCNT

intrin.h

__int64 _mm_popcnt_u64(unsigned __int64)

_mm_prefetch

SSE

intrin.h

void _mm_prefetch(char*,int)

_mm_rcp_ps

SSE

intrin.h

__m128 _mm_rcp_ps(__m128)

_mm_rcp_ss

SSE

intrin.h

__m128 _mm_rcp_ss(__m128)

_mm_rot_epi16

XOP [1]

ammintrin.h

__m128i _mm_rot_epi16(__m128i,__m128i)

_mm_rot_epi32

XOP [1]

ammintrin.h

__m128i _mm_rot_epi32(__m128i,__m128i)

_mm_rot_epi64

XOP [1]

ammintrin.h

__m128i _mm_rot_epi64(__m128i,__m128i)

_mm_rot_epi8

XOP [1]

ammintrin.h

__m128i _mm_rot_epi8(__m128i,__m128i)

_mm_roti_epi16

XOP [1]

ammintrin.h

__m128i _mm_rot_epi16(__m128i,int)

_mm_roti_epi32

XOP [1]

ammintrin.h

__m128i _mm_rot_epi32(__m128i,int)

_mm_roti_epi64

XOP [1]

ammintrin.h

__m128i _mm_rot_epi64(__m128i,int)

_mm_roti_epi8

XOP [1]

ammintrin.h

__m128i _mm_rot_epi8(__m128i,int)

_mm_round_pd

SSE41

intrin.h

__m128d _mm_round_pd(__m128d,const int )

_mm_round_ps

SSE41

intrin.h

__m128 _mm_round_ps(__m128,const int )

_mm_round_sd

SSE41

intrin.h

__m128d _mm_round_sd(__m128d,__m128d,const int )

_mm_round_ss

SSE41

intrin.h

__m128 _mm_round_ss(__m128,__m128,const int )

_mm_rsqrt_ps

SSE

intrin.h

__m128 _mm_rsqrt_ps(__m128)

_mm_rsqrt_ss

SSE

intrin.h

__m128 _mm_rsqrt_ss(__m128)

_mm_sad_epu8

SSE2

intrin.h

__m128i _mm_sad_epu8(__m128i,__m128i)

_mm_set_epi16

SSE2

intrin.h

__m128i _mm_set_epi16(short,short,short,short,short,short,short,short)

_mm_set_epi32

SSE2

intrin.h

__m128i _mm_set_epi32(int,int,int,int)

_mm_set_epi64x

SSE2

intrin.h

__m128i _mm_set_epi64x(__int64 i1,__int64 i0)

_mm_set_epi8

SSE2

intrin.h

__m128i _mm_set_epi8(char,char,char,char,char,char,char,char,char,char,char,char,char,char,char,char)

_mm_set_pd

SSE2

intrin.h

__m128d _mm_set_pd(double,double)

_mm_set_ps

SSE

intrin.h

__m128 _mm_set_ps(float,float,float,float)

_mm_set_ps1

SSE

intrin.h

__m128 _mm_set_ps1(float)

_mm_set_sd

SSE2

intrin.h

__m128d _mm_set_sd(double)

_mm_set_ss

SSE

intrin.h

__m128 _mm_set_ss(float)

_mm_set1_epi16

SSE2

intrin.h

__m128i _mm_set1_epi16(short)

_mm_set1_epi32

SSE2

intrin.h

__m128i _mm_set1_epi32(int)

_mm_set1_epi64x

SSE2

intrin.h

__m128i _mm_set1_epi64x(__int64 i)

_mm_set1_epi8

SSE2

intrin.h

__m128i _mm_set1_epi8(char)

_mm_set1_pd

SSE2

intrin.h

__m128d _mm_set1_pd(double)

_mm_setcsr

SSE

intrin.h

void _mm_setcsr(unsigned int)

_mm_setl_epi64

SSE2

intrin.h

__m128i _mm_setl_epi64(__m128i)

_mm_setr_epi16

SSE2

intrin.h

__m128i _mm_setr_epi16(short,short,short,short,short,short,short,short)

_mm_setr_epi32

SSE2

intrin.h

__m128i _mm_setr_epi32(int,int,int,int)

_mm_setr_epi8

SSE2

intrin.h

__m128i _mm_setr_epi8(char,char,char,char,char,char,char,char,char,char,char,char,char,char,char,char)

_mm_setr_pd

SSE2

intrin.h

__m128d _mm_setr_pd(double,double)

_mm_setr_ps

SSE

intrin.h

__m128 _mm_setr_ps(float,float,float,float)

_mm_setzero_pd

SSE2

intrin.h

__m128d _mm_setzero_pd(void)

_mm_setzero_ps

SSE

intrin.h

__m128 _mm_setzero_ps(void)

_mm_setzero_si128

SSE2

intrin.h

__m128i _mm_setzero_si128(void)

_mm_sfence

SSE

intrin.h

void _mm_sfence(void)

_mm_sha_epi16

XOP [1]

ammintrin.h

__m128i _mm_sha_epi16(__m128i,__m128i)

_mm_sha_epi32

XOP [1]

ammintrin.h

__m128i _mm_sha_epi32(__m128i,__m128i)

_mm_sha_epi64

XOP [1]

ammintrin.h

__m128i _mm_sha_epi64(__m128i,__m128i)

_mm_sha_epi8

XOP [1]

ammintrin.h

__m128i _mm_sha_epi8(__m128i,__m128i)

_mm_shl_epi16

XOP [1]

ammintrin.h

__m128i _mm_shl_epi16(__m128i,__m128i)

_mm_shl_epi32

XOP [1]

ammintrin.h

__m128i _mm_shl_epi32(__m128i,__m128i)

_mm_shl_epi64

XOP [1]

ammintrin.h

__m128i _mm_shl_epi64(__m128i,__m128i)

_mm_shl_epi8

XOP [1]

ammintrin.h

__m128i _mm_shl_epi8(__m128i,__m128i)

_mm_shuffle_epi32

SSE2

intrin.h

__m128i _mm_shuffle_epi32(__m128i,int)

_mm_shuffle_epi8

SSSE3

intrin.h

__m128i _mm_shuffle_epi8(__m128i,__m128i)

_mm_shuffle_pd

SSE2

intrin.h

__m128d _mm_shuffle_pd(__m128d,__m128d,int)

_mm_shuffle_ps

SSE

intrin.h

__m128 _mm_shuffle_ps(__m128,__m128,unsigned int)

_mm_shufflehi_epi16

SSE2

intrin.h

__m128i _mm_shufflehi_epi16(__m128i,int)

_mm_shufflelo_epi16

SSE2

intrin.h

__m128i _mm_shufflelo_epi16(__m128i,int)

_mm_sign_epi16

SSSE3

intrin.h

__m128i _mm_sign_epi16(__m128i,__m128i)

_mm_sign_epi32

SSSE3

intrin.h

__m128i _mm_sign_epi32(__m128i,__m128i)

_mm_sign_epi8

SSSE3

intrin.h

__m128i _mm_sign_epi8(__m128i,__m128i)

_mm_sll_epi16

SSE2

intrin.h

__m128i _mm_sll_epi16(__m128i,__m128i)

_mm_sll_epi32

SSE2

intrin.h

__m128i _mm_sll_epi32(__m128i,__m128i)

_mm_sll_epi64

SSE2

intrin.h

__m128i _mm_sll_epi64(__m128i,__m128i)

_mm_slli_epi16

SSE2

intrin.h

__m128i _mm_slli_epi16(__m128i,int)

_mm_slli_epi32

SSE2

intrin.h

__m128i _mm_slli_epi32(__m128i,int)

_mm_slli_epi64

SSE2

intrin.h

__m128i _mm_slli_epi64(__m128i,int)

_mm_slli_si128

SSE2

intrin.h

__m128i _mm_slli_si128(__m128i,int)

_mm_sllv_epi32

AVX2 [2]

immintrin.h

__m128i _mm_sllv_epi32(__m128i,__m128i)

_mm_sllv_epi64

AVX2 [2]

immintrin.h

__m128i _mm_sllv_epi64(__m128i,__m128i)

_mm_sqrt_pd

SSE2

intrin.h

__m128d _mm_sqrt_pd(__m128d)

_mm_sqrt_ps

SSE

intrin.h

__m128 _mm_sqrt_ps(__m128)

_mm_sqrt_sd

SSE2

intrin.h

__m128d _mm_sqrt_sd(__m128d,__m128d)

_mm_sqrt_ss

SSE

intrin.h

__m128 _mm_sqrt_ss(__m128)

_mm_sra_epi16

SSE2

intrin.h

__m128i _mm_sra_epi16(__m128i,__m128i)

_mm_sra_epi32

SSE2

intrin.h

__m128i _mm_sra_epi32(__m128i,__m128i)

_mm_srai_epi16

SSE2

intrin.h

__m128i _mm_srai_epi16(__m128i,int)

_mm_srai_epi32

SSE2

intrin.h

__m128i _mm_srai_epi32(__m128i,int)

_mm_srav_epi32

AVX2 [2]

immintrin.h

__m128i _mm_srav_epi32(__m128i,__m128i)

_mm_srl_epi16

SSE2

intrin.h

__m128i _mm_srl_epi16(__m128i,__m128i)

_mm_srl_epi32

SSE2

intrin.h

__m128i _mm_srl_epi32(__m128i,__m128i)

_mm_srl_epi64

SSE2

intrin.h

__m128i _mm_srl_epi64(__m128i,__m128i)

_mm_srli_epi16

SSE2

intrin.h

__m128i _mm_srli_epi16(__m128i,int)

_mm_srli_epi32

SSE2

intrin.h

__m128i _mm_srli_epi32(__m128i,int)

_mm_srli_epi64

SSE2

intrin.h

__m128i _mm_srli_epi64(__m128i,int)

_mm_srli_si128

SSE2

intrin.h

__m128i _mm_srli_si128(__m128i,int)

_mm_srlv_epi32

AVX2 [2]

immintrin.h

__m128i _mm_srlv_epi32(__m128i,__m128i)

_mm_srlv_epi64

AVX2 [2]

immintrin.h

__m128i _mm_srlv_epi64(__m128i,__m128i)

_mm_store_pd

SSE2

intrin.h

void _mm_store_pd(double*,__m128d)

_mm_store_ps

SSE

intrin.h

void _mm_store_ps(float*,__m128)

_mm_store_ps1

SSE

intrin.h

void _mm_store_ps1(float*,__m128)

_mm_store_sd

SSE2

intrin.h

void _mm_store_sd(double*,__m128d)

_mm_store_si128

SSE2

intrin.h

void _mm_store_si128(__m128i*,__m128i)

_mm_store_ss

SSE

intrin.h

void _mm_store_ss(float*,__m128)

_mm_store1_pd

SSE2

intrin.h

void _mm_store1_pd(double*,__m128d)

_mm_storeh_pd

SSE2

intrin.h

void _mm_storeh_pd(double*,__m128d)

_mm_storeh_pi

SSE

intrin.h

void _mm_storeh_pi(__m64*,__m128)

_mm_storel_epi64

SSE2

intrin.h

void _mm_storel_epi64(__m128i*,__m128i)

_mm_storel_pd

SSE2

intrin.h

void _mm_storel_pd(double*,__m128d)

_mm_storel_pi

SSE

intrin.h

void _mm_storel_pi(__m64*,__m128)

_mm_storer_pd

SSE2

intrin.h

void _mm_storer_pd(double*,__m128d)

_mm_storer_ps

SSE

intrin.h

void _mm_storer_ps(float*,__m128)

_mm_storeu_pd

SSE2

intrin.h

void _mm_storeu_pd(double*,__m128d)

_mm_storeu_ps

SSE

intrin.h

void _mm_storeu_ps(float*,__m128)

_mm_storeu_si128

SSE2

intrin.h

void _mm_storeu_si128(__m128i*,__m128i)

_mm_stream_load_si128

SSE41

intrin.h

__m128i _mm_stream_load_si128(__m128i* )

_mm_stream_pd

SSE2

intrin.h

void _mm_stream_pd(double*,__m128d)

_mm_stream_ps

SSE

intrin.h

void _mm_stream_ps(float*,__m128)

_mm_stream_sd

SSE4a

intrin.h

void _mm_stream_sd(double*,__m128d)

_mm_stream_si128

SSE2

intrin.h

void _mm_stream_si128(__m128i*,__m128i)

_mm_stream_si32

SSE2

intrin.h

void _mm_stream_si32(int*,int)

_mm_stream_si64x

SSE2

intrin.h

void _mm_stream_si64x(__int64 *,__int64)

_mm_stream_ss

SSE4a

intrin.h

void _mm_stream_ss(float*,__m128)

_mm_sub_epi16

SSE2

intrin.h

__m128i _mm_sub_epi16(__m128i,__m128i)

_mm_sub_epi32

SSE2

intrin.h

__m128i _mm_sub_epi32(__m128i,__m128i)

_mm_sub_epi64

SSE2

intrin.h

__m128i _mm_sub_epi64(__m128i,__m128i)

_mm_sub_epi8

SSE2

intrin.h

__m128i _mm_sub_epi8(__m128i,__m128i)

_mm_sub_pd

SSE2

intrin.h

__m128d _mm_sub_pd(__m128d,__m128d)

_mm_sub_ps

SSE

intrin.h

__m128 _mm_sub_ps(__m128,__m128)

_mm_sub_sd

SSE2

intrin.h

__m128d _mm_sub_sd(__m128d,__m128d)

_mm_sub_ss

SSE

intrin.h

__m128 _mm_sub_ss(__m128,__m128)

_mm_subs_epi16

SSE2

intrin.h

__m128i _mm_subs_epi16(__m128i,__m128i)

_mm_subs_epi8

SSE2

intrin.h

__m128i _mm_subs_epi8(__m128i,__m128i)

_mm_subs_epu16

SSE2

intrin.h

__m128i _mm_subs_epu16(__m128i,__m128i)

_mm_subs_epu8

SSE2

intrin.h

__m128i _mm_subs_epu8(__m128i,__m128i)

_mm_testc_pd

AVX [2]

immintrin.h

int _mm_testc_pd(__m128d,__m128d)

_mm_testc_ps

AVX [2]

immintrin.h

int _mm_testc_ps(__m128,__m128)

_mm_testc_si128

SSE41

intrin.h

int _mm_testc_si128(__m128i,__m128i )

_mm_testnzc_pd

AVX [2]

immintrin.h

int _mm_testnzc_pd(__m128d,__m128d)

_mm_testnzc_ps

AVX [2]

immintrin.h

int _mm_testnzc_ps(__m128,__m128)

_mm_testnzc_si128

SSE41

intrin.h

int _mm_testnzc_si128(__m128i,__m128i )

_mm_testz_pd

AVX [2]

immintrin.h

int _mm_testz_pd(__m128d,__m128d)

_mm_testz_ps

AVX [2]

immintrin.h

int _mm_testz_ps(__m128,__m128)

_mm_testz_si128

SSE41

intrin.h

int _mm_testz_si128(__m128i,__m128i )

_mm_ucomieq_sd

SSE2

intrin.h

int _mm_ucomieq_sd(__m128d,__m128d)

_mm_ucomieq_ss

SSE

intrin.h

int _mm_ucomieq_ss(__m128,__m128)

_mm_ucomige_sd

SSE2

intrin.h

int _mm_ucomige_sd(__m128d,__m128d)

_mm_ucomige_ss

SSE

intrin.h

int _mm_ucomige_ss(__m128,__m128)

_mm_ucomigt_sd

SSE2

intrin.h

int _mm_ucomigt_sd(__m128d,__m128d)

_mm_ucomigt_ss

SSE

intrin.h

int _mm_ucomigt_ss(__m128,__m128)

_mm_ucomile_sd

SSE2

intrin.h

int _mm_ucomile_sd(__m128d,__m128d)

_mm_ucomile_ss

SSE

intrin.h

int _mm_ucomile_ss(__m128,__m128)

_mm_ucomilt_sd

SSE2

intrin.h

int _mm_ucomilt_sd(__m128d,__m128d)

_mm_ucomilt_ss

SSE

intrin.h

int _mm_ucomilt_ss(__m128,__m128)

_mm_ucomineq_sd

SSE2

intrin.h

int _mm_ucomineq_sd(__m128d,__m128d)

_mm_ucomineq_ss

SSE

intrin.h

int _mm_ucomineq_ss(__m128,__m128)

_mm_unpackhi_epi16

SSE2

intrin.h

__m128i _mm_unpackhi_epi16(__m128i,__m128i)

_mm_unpackhi_epi32

SSE2

intrin.h

__m128i _mm_unpackhi_epi32(__m128i,__m128i)

_mm_unpackhi_epi64

SSE2

intrin.h

__m128i _mm_unpackhi_epi64(__m128i,__m128i)

_mm_unpackhi_epi8

SSE2

intrin.h

__m128i _mm_unpackhi_epi8(__m128i,__m128i)

_mm_unpackhi_pd

SSE2

intrin.h

__m128d _mm_unpackhi_pd(__m128d,__m128d)

_mm_unpackhi_ps

SSE

intrin.h

__m128 _mm_unpackhi_ps(__m128,__m128)

_mm_unpacklo_epi16

SSE2

intrin.h

__m128i _mm_unpacklo_epi16(__m128i,__m128i)

_mm_unpacklo_epi32

SSE2

intrin.h

__m128i _mm_unpacklo_epi32(__m128i,__m128i)

_mm_unpacklo_epi64

SSE2

intrin.h

__m128i _mm_unpacklo_epi64(__m128i,__m128i)

_mm_unpacklo_epi8

SSE2

intrin.h

__m128i _mm_unpacklo_epi8(__m128i,__m128i)

_mm_unpacklo_pd

SSE2

intrin.h

__m128d _mm_unpacklo_pd(__m128d,__m128d)

_mm_unpacklo_ps

SSE

intrin.h

__m128 _mm_unpacklo_ps(__m128,__m128)

_mm_xor_pd

SSE2

intrin.h

__m128d _mm_xor_pd(__m128d,__m128d)

_mm_xor_ps

SSE

intrin.h

__m128 _mm_xor_ps(__m128,__m128)

_mm_xor_si128

SSE2

intrin.h

__m128i _mm_xor_si128(__m128i,__m128i)

_mm256_abs_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_abs_epi16(__m256i)

_mm256_abs_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_abs_epi32(__m256i)

_mm256_abs_epi8

AVX2 [2]

immintrin.h

__m256i _mm256_abs_epi8(__m256i)

_mm256_add_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_add_epi16(__m256i,__m256i)

_mm256_add_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_add_epi32(__m256i,__m256i)

_mm256_add_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_add_epi64(__m256i,__m256i)

_mm256_add_epi8

AVX2 [2]

immintrin.h

__m256i _mm256_add_epi8(__m256i,__m256i)

_mm256_add_pd

AVX [2]

immintrin.h

__m256d _mm256_add_pd(__m256d,__m256d)

_mm256_add_ps

AVX [2]

immintrin.h

__m256 _mm256_add_ps(__m256,__m256)

_mm256_adds_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_adds_epi16(__m256i,__m256i)

_mm256_adds_epi8

AVX2 [2]

immintrin.h

__m256i _mm256_adds_epi8(__m256i,__m256i)

_mm256_adds_epu16

AVX2 [2]

immintrin.h

__m256i _mm256_adds_epu16(__m256i,__m256i)

_mm256_adds_epu8

AVX2 [2]

immintrin.h

__m256i _mm256_adds_epu8(__m256i,__m256i)

_mm256_addsub_pd

AVX [2]

immintrin.h

__m256d _mm256_addsub_pd(__m256d,__m256d)

_mm256_addsub_ps

AVX [2]

immintrin.h

__m256 _mm256_addsub_ps(__m256,__m256)

_mm256_alignr_epi8

AVX2 [2]

immintrin.h

__m256i _mm256_alignr_epi8(__m256i,__m256i,const int)

_mm256_and_pd

AVX [2]

immintrin.h

__m256d _mm256_and_pd(__m256d,__m256d)

_mm256_and_ps

AVX [2]

immintrin.h

__m256 _mm256_and_ps(__m256,__m256)

_mm256_and_si256

AVX2 [2]

immintrin.h

__m256i _mm256_and_si256(__m256i,__m256i)

_mm256_andnot_pd

AVX [2]

immintrin.h

__m256d _mm256_andnot_pd(__m256d,__m256d)

_mm256_andnot_ps

AVX [2]

immintrin.h

__m256 _mm256_andnot_ps(__m256,__m256)

_mm256_andnot_si256

AVX2 [2]

immintrin.h

__m256i _mm256_andnot_si256(__m256i,__m256i)

_mm256_avg_epu16

AVX2 [2]

immintrin.h

__m256i _mm256_avg_epu16(__m256i,__m256i)

_mm256_avg_epu8

AVX2 [2]

immintrin.h

__m256i _mm256_avg_epu8(__m256i,__m256i)

_mm256_blend_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_blend_epi16(__m256i,__m256i,const int)

_mm256_blend_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_blend_epi32(__m256i,__m256i,const int)

_mm256_blend_pd

AVX [2]

immintrin.h

__m256d _mm256_blend_pd(__m256d,__m256d,const int)

_mm256_blend_ps

AVX [2]

immintrin.h

__m256 _mm256_blend_ps(__m256,__m256,const int)

_mm256_blendv_epi8

AVX2 [2]

immintrin.h

__m256i _mm256_blendv_epi8(__m256i,__m256i,__m256i)

_mm256_blendv_pd

AVX [2]

immintrin.h

__m256d _mm256_blendv_pd(__m256d,__m256d,__m256d)

_mm256_blendv_ps

AVX [2]

immintrin.h

__m256 _mm256_blendv_ps(__m256,__m256,__m256)

_mm256_broadcast_pd

AVX [2]

immintrin.h

__m256d _mm256_broadcast_pd(__m128d const *)

_mm256_broadcast_ps

AVX [2]

immintrin.h

__m256 _mm256_broadcast_ps(__m128 const *)

_mm256_broadcast_sd

AVX [2]

immintrin.h

__m256d _mm256_broadcast_sd(double const *)

_mm256_broadcast_ss

AVX [2]

immintrin.h

__m256 _mm256_broadcast_ss(float const *)

_mm256_broadcastb_epi8

AVX2 [2]

immintrin.h

__m256i _mm256_broadcastb_epi8 (__m128i)

_mm256_broadcastd_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_broadcastd_epi32(__m128i)

_mm256_broadcastq_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_broadcastq_epi64(__m128i)

_mm256_broadcastsd_pd

AVX2 [2]

immintrin.h

__m256d _mm256_broadcastsd_pd(__m128d)

_mm256_broadcastsi128_si256

AVX2 [2]

immintrin.h

__m256i _mm256_broadcastsi128_si256(__m128i)

_mm256_broadcastss_ps

AVX2 [2]

immintrin.h

__m256 _mm256_broadcastss_ps(__m128)

_mm256_broadcastw_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_broadcastw_epi16(__m128i)

_mm256_castpd_ps

AVX [2]

immintrin.h

__m256 _mm256_castpd_ps(__m256d)

_mm256_castpd_si256

AVX [2]

immintrin.h

__m256i _mm256_castpd_si256(__m256d)

_mm256_castpd128_pd256

AVX [2]

immintrin.h

__m256d _mm256_castpd128_pd256(__m128d)

_mm256_castpd256_pd128

AVX [2]

immintrin.h

__m128d _mm256_castpd256_pd128(__m256d)

_mm256_castps_pd

AVX [2]

immintrin.h

__m256d _mm256_castps_pd(__m256)

_mm256_castps_si256

AVX [2]

immintrin.h

__m256i _mm256_castps_si256(__m256)

_mm256_castps128_ps256

AVX [2]

immintrin.h

__m256 _mm256_castps128_ps256(__m128)

_mm256_castps256_ps128

AVX [2]

immintrin.h

__m128 _mm256_castps256_ps128(__m256)

_mm256_castsi128_si256

AVX [2]

immintrin.h

__m256i _mm256_castsi128_si256(__m128i)

_mm256_castsi256_pd

AVX [2]

immintrin.h

__m256d _mm256_castsi256_pd(__m256i)

_mm256_castsi256_ps

AVX [2]

immintrin.h

__m256 _mm256_castsi256_ps(__m256i)

_mm256_castsi256_si128

AVX [2]

immintrin.h

__m128i _mm256_castsi256_si128(__m256i)

_mm256_cmov_si256

XOP [1]

ammintrin.h

__m256i _mm256_cmov_si256(__m256i,__m256i,__m256i)

_mm256_cmp_pd

AVX [2]

immintrin.h

__m256d _mm256_cmp_pd(__m256d,__m256d,const int)

_mm256_cmp_ps

AVX [2]

immintrin.h

__m256 _mm256_cmp_ps(__m256,__m256,const int)

_mm256_cmpeq_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_cmpeq_epi16(__m256i,__m256i)

_mm256_cmpeq_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_cmpeq_epi32(__m256i,__m256i)

_mm256_cmpeq_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_cmpeq_epi64(__m256i,__m256i)

_mm256_cmpeq_epi8

AVX2 [2]

immintrin.h

__m256i _mm256_cmpeq_epi8(__m256i,__m256i)

_mm256_cmpgt_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_cmpgt_epi16(__m256i,__m256i)

_mm256_cmpgt_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_cmpgt_epi32(__m256i,__m256i)

_mm256_cmpgt_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_cmpgt_epi64(__m256i,__m256i)

_mm256_cmpgt_epi8

AVX2 [2]

immintrin.h

__m256i _mm256_cmpgt_epi8(__m256i,__m256i)

_mm256_cvtepi16_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_cvtepi16_epi32(__m128i)

_mm256_cvtepi16_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_cvtepi16_epi64(__m128i)

_mm256_cvtepi32_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_cvtepi32_epi64(__m128i)

_mm256_cvtepi32_pd

AVX [2]

immintrin.h

__m256d _mm256_cvtepi32_pd(__m128i)

_mm256_cvtepi32_ps

AVX [2]

immintrin.h

__m256 _mm256_cvtepi32_ps(__m256i)

_mm256_cvtepi8_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_cvtepi8_epi16(__m128i)

_mm256_cvtepi8_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_cvtepi8_epi32(__m128i)

_mm256_cvtepi8_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_cvtepi8_epi64(__m128i)

_mm256_cvtepu16_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_cvtepu16_epi32(__m128i)

_mm256_cvtepu16_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_cvtepu16_epi64(__m128i)

_mm256_cvtepu32_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_cvtepu32_epi64(__m128i)

_mm256_cvtepu8_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_cvtepu8_epi16(__m128i)

_mm256_cvtepu8_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_cvtepu8_epi32(__m128i)

_mm256_cvtepu8_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_cvtepu8_epi64(__m128i)

_mm256_cvtpd_epi32

AVX [2]

immintrin.h

__m128i _mm256_cvtpd_epi32(__m256d)

_mm256_cvtpd_ps

AVX [2]

immintrin.h

__m128 _mm256_cvtpd_ps(__m256d)

_mm256_cvtph_ps

F16C [2]

immintrin.h

__m256 _mm256_cvtph_ps(__m128i)

_mm256_cvtps_epi32

AVX [2]

immintrin.h

__m256i _mm256_cvtps_epi32(__m256)

_mm256_cvtps_pd

AVX [2]

immintrin.h

__m256d _mm256_cvtps_pd(__m128)

_mm256_cvtps_ph

F16C [2]

immintrin.h

__m128i _mm256_cvtps_ph(__m256,const int)

_mm256_cvttpd_epi32

AVX [2]

immintrin.h

__m128i _mm256_cvttpd_epi32(__m256d)

_mm256_cvttps_epi32

AVX [2]

immintrin.h

__m256i _mm256_cvttps_epi32(__m256)

_mm256_div_pd

AVX [2]

immintrin.h

__m256d _mm256_div_pd(__m256d,__m256d)

_mm256_div_ps

AVX [2]

immintrin.h

__m256 _mm256_div_ps(__m256,__m256)

_mm256_dp_ps

AVX [2]

immintrin.h

__m256 _mm256_dp_ps(__m256,__m256,const int)

_mm256_extractf128_pd

AVX [2]

immintrin.h

__m128d _mm256_extractf128_pd(__m256d,const int)

_mm256_extractf128_ps

AVX [2]

immintrin.h

__m128 _mm256_extractf128_ps(__m256,const int)

_mm256_extractf128_si256

AVX [2]

immintrin.h

__m128i _mm256_extractf128_si256(__m256i,const int)

_mm256_extracti128_si256

AVX2 [2]

immintrin.h

__m128i _mm256_extracti128_si256(__m256i a,int offset)

_mm256_fmadd_pd

FMA [2]

immintrin.h

__m256d _mm256_fmadd_pd (__m256d a,__m256d b,__m256d c)

_mm256_fmadd_ps

FMA [2]

immintrin.h

__m256 _mm256_fmadd_ps (__m256 a,__m256 b,__m256 c)

_mm256_fmaddsub_pd

FMA [2]

immintrin.h

__m256d _mm256_fmaddsub_pd (__m256d a,__m256d b,__m256d c)

_mm256_fmaddsub_ps

FMA [2]

immintrin.h

__m256 _mm256_fmaddsub_ps (__m256 a,__m256 b,__m256 c)

_mm256_fmsub_pd

FMA [2]

immintrin.h

__m256d _mm256_fmsub_pd (__m256d a,__m256d b,__m256d c)

_mm256_fmsub_ps

FMA [2]

immintrin.h

__m256 _mm256_fmsub_ps (__m256 a,__m256 b,__m256 c)

_mm256_fmsubadd_pd

FMA [2]

immintrin.h

__m256d _mm256_fmsubadd_pd (__m256d a,__m256d b,__m256d c)

_mm256_fmsubadd_ps

FMA [2]

immintrin.h

__m256 _mm256_fmsubadd_ps (__m256 a,__m256 b,__m256 c)

_mm256_fnmadd_pd

FMA [2]

immintrin.h

__m256d _mm256_fnmadd_pd (__m256d a,__m256d b,__m256d c)

_mm256_fnmadd_ps

FMA [2]

immintrin.h

__m256 _mm256_fnmadd_ps (__m256 a,__m256 b,__m256 c)

_mm256_fnmsub_pd

FMA [2]

immintrin.h

__m256d _mm256_fnmsub_pd (__m256d a,__m256d b,__m256d c)

_mm256_fnmsub_ps

FMA [2]

immintrin.h

__m256 _mm256_fnmsub_ps (__m256 a,__m256 b,__m256 c)

_mm256_frcz_pd

XOP [1]

ammintrin.h

__m256d _mm256_frcz_pd(__m256d)

_mm256_frcz_ps

XOP [1]

ammintrin.h

__m256 _mm256_frcz_ps(__m256)

_mm256_hadd_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_hadd_epi16(__m256i,__m256i)

_mm256_hadd_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_hadd_epi32(__m256i,__m256i)

_mm256_hadd_pd

AVX [2]

immintrin.h

__m256d _mm256_hadd_pd(__m256d,__m256d)

_mm256_hadd_ps

AVX [2]

immintrin.h

__m256 _mm256_hadd_ps(__m256,__m256)

_mm256_hadds_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_hadds_epi16(__m256i,__m256i)

_mm256_hsub_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_hsub_epi16(__m256i,__m256i)

_mm256_hsub_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_hsub_epi32(__m256i,__m256i)

_mm256_hsub_pd

AVX [2]

immintrin.h

__m256d _mm256_hsub_pd(__m256d,__m256d)

_mm256_hsub_ps

AVX [2]

immintrin.h

__m256 _mm256_hsub_ps(__m256,__m256)

_mm256_hsubs_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_hsubs_epi16(__m256i,__m256i)

_mm256_i32gather_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_i32gather_epi32(int const *base,__m256i index,const int scale)

_mm256_i32gather_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_i32gather_epi64(__int64 const *base,__m128i index,const int scale)

_mm256_i32gather_pd

AVX2 [2]

immintrin.h

__m256d _mm256_i32gather_pd(double const *base,__m128i index,const int scale)

_mm256_i32gather_ps

AVX2 [2]

immintrin.h

__m256 _mm256_i32gather_ps(float const *base,__m256i index,const int scale)

_mm256_i64gather_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_i64gather_epi32(int const *base,__m256i index,const int scale)

_mm256_i64gather_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_i64gather_epi64(__int64 const *base,__m256i index,const int scale)

_mm256_i64gather_pd

AVX2 [2]

immintrin.h

__m256d _mm256_i64gather_pd(double const *base,__m256i index,const int scale)

_mm256_i64gather_ps

AVX2 [2]

immintrin.h

__m128 _mm256_i64gather_ps(float const *base,__m256i index,const int scale)

_mm256_insertf128_pd

AVX [2]

immintrin.h

__m256d _mm256_insertf128_pd(__m256d,__m128d,int )

_mm256_insertf128_ps

AVX [2]

immintrin.h

__m256 _mm256_insertf128_ps(__m256,__m128,int )

_mm256_insertf128_si256

AVX [2]

immintrin.h

__m256i _mm256_insertf128_si256(__m256i,__m128i,int )

_mm256_inserti128_si256

AVX2 [2]

immintrin.h

__m256i _mm256_inserti128_si256(__m256i,__m128i,int)

_mm256_lddqu_si256

AVX [2]

immintrin.h

__m256i _mm256_lddqu_si256(__m256i *)

_mm256_load_pd

AVX [2]

immintrin.h

__m256d _mm256_load_pd(double const *)

_mm256_load_ps

AVX [2]

immintrin.h

__m256 _mm256_load_ps(float const *)

_mm256_load_si256

AVX [2]

immintrin.h

__m256i _mm256_load_si256(__m256i *)

_mm256_loadu_pd

AVX [2]

immintrin.h

__m256d _mm256_loadu_pd(double const *)

_mm256_loadu_ps

AVX [2]

immintrin.h

__m256 _mm256_loadu_ps(float const *)

_mm256_loadu_si256

AVX [2]

immintrin.h

__m256i _mm256_loadu_si256(__m256i *)

_mm256_macc_pd

FMA4 [1]

ammintrin.h

__m256d _mm_macc_pd(__m256d,__m256d,__m256d)

_mm256_macc_ps

FMA4 [1]

ammintrin.h

__m256 _mm_macc_ps(__m256,__m256,__m256)

_mm256_madd_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_madd_epi16(__m256i,__m256i)

_mm256_maddsub_pd

FMA4 [1]

ammintrin.h

__m256d _mm_maddsub_pd(__m256d,__m256d,__m256d)

_mm256_maddsub_ps

FMA4 [1]

ammintrin.h

__m256 _mm_maddsub_ps(__m256,__m256,__m256)

_mm256_maddubs_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_maddubs_epi16(__m256i,__m256i)

_mm256_mask_i32gather_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_mask_i32gather_epi32(__m256i src,int const *base,__m256i index,__m256i mask,const int scale)

_mm256_mask_i32gather_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_mask_i32gather_epi64(__m256i src,__int64 const *base,__m128i index,__m256i mask,const int scale)

_mm256_mask_i32gather_pd

AVX2 [2]

immintrin.h

__m256d _mm256_mask_i32gather_pd(__m256d src,double const *base,__m128i index,__m256d mask,const int scale)

_mm256_mask_i32gather_ps

AVX2 [2]

immintrin.h

__m256 _mm256_mask_i32gather_ps(__m256 src,float const *base,__m256i index,__m256 mask,const int scale)

_mm256_mask_i64gather_epi32

AVX2 [2]

immintrin.h

__m128i _mm256_mask_i64gather_epi32(__m128i src,int const *base,__m256i index,__m128i mask,const int scale)

_mm256_mask_i64gather_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_mask_i64gather_epi64(__m256i src,__int64 const *base,__m256i index,__m256i mask,const int scale)

_mm256_mask_i64gather_pd

AVX2 [2]

immintrin.h

__m256d _mm256_mask_i64gather_pd(__m256d src,double const *base,__m256i index,__m256d mask,const int scale)

_mm256_mask_i64gather_ps

AVX2 [2]

immintrin.h

__m128 _mm256_mask_i64gather_ps(__m128 src,float const *base,__m256i index,__m128 mask,const int scale)

_mm256_maskload_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_maskload_epi32(int const *,__m256i)

_mm256_maskload_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_maskload_epi64( __int64 const *,__m256i)

_mm256_maskload_pd

AVX [2]

immintrin.h

__m256d _mm256_maskload_pd(double const *,__m256i)

_mm256_maskload_ps

AVX [2]

immintrin.h

__m256 _mm256_maskload_ps(float const *,__m256i)

_mm256_maskstore_epi32

AVX2 [2]

immintrin.h

void _mm256_maskstore_epi32(int *,__m256i,__m256i)

_mm256_maskstore_epi64

AVX2 [2]

immintrin.h

void _mm256_maskstore_epi64(__int64 *,__m256i,__m256i)

_mm256_maskstore_pd

AVX [2]

immintrin.h

void _mm256_maskstore_pd(double *,__m256i,__m256d)

_mm256_maskstore_ps

AVX [2]

immintrin.h

void _mm256_maskstore_ps(float *,__m256i,__m256)

_mm256_max_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_max_epi16(__m256i,__m256i)

_mm256_max_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_max_epi32(__m256i,__m256i)

_mm256_max_epi8

AVX2 [2]

immintrin.h

__m256i _mm256_max_epi8(__m256i,__m256i)

_mm256_max_epu16

AVX2 [2]

immintrin.h

__m256i _mm256_max_epu16(__m256i,__m256i)

_mm256_max_epu32

AVX2 [2]

immintrin.h

__m256i _mm256_max_epu32(__m256i,__m256i)

_mm256_max_epu8

AVX2 [2]

immintrin.h

__m256i _mm256_max_epu8(__m256i,__m256i)

_mm256_max_pd

AVX [2]

immintrin.h

__m256d _mm256_max_pd(__m256d,__m256d)

_mm256_max_ps

AVX [2]

immintrin.h

__m256 _mm256_max_ps(__m256,__m256)

_mm256_min_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_min_epi16(__m256i,__m256i)

_mm256_min_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_min_epi32(__m256i,__m256i)

_mm256_min_epi8

AVX2 [2]

immintrin.h

__m256i _mm256_min_epi8(__m256i,__m256i)

_mm256_min_epu16

AVX2 [2]

immintrin.h

__m256i _mm256_min_epu16(__m256i,__m256i)

_mm256_min_epu32

AVX2 [2]

immintrin.h

__m256i _mm256_min_epu32(__m256i,__m256i)

_mm256_min_epu8

AVX2 [2]

immintrin.h

__m256i _mm256_min_epu8(__m256i,__m256i)

_mm256_min_pd

AVX [2]

immintrin.h

__m256d _mm256_min_pd(__m256d,__m256d)

_mm256_min_ps

AVX [2]

immintrin.h

__m256 _mm256_min_ps(__m256,__m256)

_mm256_movedup_pd

AVX [2]

immintrin.h

__m256d _mm256_movedup_pd(__m256d)

_mm256_movehdup_ps

AVX [2]

immintrin.h

__m256 _mm256_movehdup_ps(__m256)

_mm256_moveldup_ps

AVX [2]

immintrin.h

__m256 _mm256_moveldup_ps(__m256)

_mm256_movemask_epi8

AVX2 [2]

immintrin.h

int _mm256_movemask_epi8(__m256i)

_mm256_movemask_pd

AVX [2]

immintrin.h

int _mm256_movemask_pd(__m256d)

_mm256_movemask_ps

AVX [2]

immintrin.h

int _mm256_movemask_ps(__m256)

_mm256_mpsadbw_epu8

AVX2 [2]

immintrin.h

__m256i _mm256_mpsadbw_epu8(__m256i,__m256i,const int)

_mm256_msub_pd

FMA4 [1]

ammintrin.h

__m256d _mm_msub_pd(__m256d,__m256d,__m256d)

_mm256_msub_ps

FMA4 [1]

ammintrin.h

__m256 _mm_msub_ps(__m256,__m256,__m256)

_mm256_msubadd_pd

FMA4 [1]

ammintrin.h

__m256d _mm_msubadd_pd(__m256d,__m256d,__m256d)

_mm256_msubadd_ps

FMA4 [1]

ammintrin.h

__m256 _mm_msubadd_ps(__m256,__m256,__m256)

_mm256_mul_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_mul_epi32(__m256i,__m256i)

_mm256_mul_epu32

AVX2 [2]

immintrin.h

__m256i _mm256_mul_epu32(__m256i,__m256i)

_mm256_mul_pd

AVX [2]

immintrin.h

__m256d _mm256_mul_pd(__m256d,__m256d)

_mm256_mul_ps

AVX [2]

immintrin.h

__m256 _mm256_mul_ps(__m256,__m256)

_mm256_mulhi_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_mulhi_epi16(__m256i,__m256i)

_mm256_mulhi_epu16

AVX2 [2]

immintrin.h

__m256i _mm256_mulhi_epu16(__m256i,__m256i)

_mm256_mulhrs_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_mulhrs_epi16(__m256i,__m256i)

_mm256_mullo_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_mullo_epi16(__m256i,__m256i)

_mm256_mullo_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_mullo_epi32(__m256i,__m256i)

_mm256_nmacc_pd

FMA4 [1]

ammintrin.h

__m256d _mm_nmacc_pd(__m256d,__m256d,__m256d)

_mm256_nmacc_ps

FMA4 [1]

ammintrin.h

__m256 _mm_nmacc_ps(__m256,__m256,__m256)

_mm256_nmsub_pd

FMA4 [1]

ammintrin.h

__m256d _mm_nmsub_pd(__m256d,__m256d,__m256d)

_mm256_nmsub_ps

FMA4 [1]

ammintrin.h

__m256 _mm_nmsub_ps(__m256,__m256,__m256)

_mm256_or_pd

AVX [2]

immintrin.h

__m256d _mm256_or_pd(__m256d,__m256d)

_mm256_or_ps

AVX [2]

immintrin.h

__m256 _mm256_or_ps(__m256,__m256)

_mm256_or_si256

AVX2 [2]

immintrin.h

__m256i _mm256_or_si256(__m256i,__m256i)

_mm256_packs_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_packs_epi16(__m256i,__m256i)

_mm256_packs_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_packs_epi32(__m256i,__m256i)

_mm256_packus_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_packus_epi16(__m256i,__m256i)

_mm256_packus_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_packus_epi32(__m256i,__m256i)

_mm256_permute_pd

AVX [2]

immintrin.h

__m256d _mm256_permute_pd(__m256d,int)

_mm256_permute_ps

AVX [2]

immintrin.h

__m256 _mm256_permute_ps(__m256,int)

_mm256_permute2_pd

XOP [1]

ammintrin.h

__m256d _mm256_permute2_pd(__m256d,__m256d,__m256i,int)

_mm256_permute2_ps

XOP [1]

ammintrin.h

__m256 _mm256_permute2_ps(__m256,__m256,__m256i,int)

_mm256_permute2f128_pd

AVX [2]

immintrin.h

__m256d _mm256_permute2f128_pd(__m256d,__m256d,int)

_mm256_permute2f128_ps

AVX [2]

immintrin.h

__m256 _mm256_permute2f128_ps(__m256,__m256,int)

_mm256_permute2f128_si256

AVX [2]

immintrin.h

__m256i _mm256_permute2f128_si256(__m256i,__m256i,int)

_mm256_permute2x128_si256

AVX2 [2]

immintrin.h

__m256i _mm256_permute2x128_si256(__m256i,__m256i,const int)

_mm256_permute4x64_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_permute4x64_epi64 (__m256i,const int)

_mm256_permute4x64_pd

AVX2 [2]

immintrin.h

__m256d _mm256_permute4x64_pd(__m256d,const int)

_mm256_permutevar_pd

AVX [2]

immintrin.h

__m256d _mm256_permutevar_pd(__m256d,__m256i)

_mm256_permutevar_ps

AVX [2]

immintrin.h

__m256 _mm256_permutevar_ps(__m256,__m256i)

_mm256_permutevar8x32_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_permutevar8x32_epi32(__m256i,__m256i)

_mm256_permutevar8x32_ps

AVX2 [2]

immintrin.h

__m256 _mm256_permutevar8x32_ps (__m256,__m256i)

_mm256_rcp_ps

AVX [2]

immintrin.h

__m256 _mm256_rcp_ps(__m256)

_mm256_round_pd

AVX [2]

immintrin.h

__m256d _mm256_round_pd(__m256d,int)

_mm256_round_ps

AVX [2]

immintrin.h

__m256 _mm256_round_ps(__m256,int)

_mm256_rsqrt_ps

AVX [2]

immintrin.h

__m256 _mm256_rsqrt_ps(__m256)

_mm256_sad_epu8

AVX2 [2]

immintrin.h

__m256i _mm256_sad_epu8(__m256i,__m256i)

_mm256_set_epi16

AVX [2]

immintrin.h

(__m256i _mm256_set_epi16(short

_mm256_set_epi32

AVX [2]

immintrin.h

__m256i _mm256_set_epi32(int,int,int,int,int,int,int,int)

_mm256_set_epi64x

AVX [2]

immintrin.h

__m256i _mm256_set_epi64x(long long,long long,long long,long long)

_mm256_set_epi8

AVX [2]

immintrin.h

__m256i _mm256_set_epi8(char,char,char,char,char,char,char,char,char,char,char,char,char,char,char,char,char,char,char,char,char,char,char,char,char,char,char,char,char,char,char,char)

_mm256_set_pd

AVX [2]

immintrin.h

__m256d _mm256_set_pd(double,double,double,double)

_mm256_set_ps

AVX [2]

immintrin.h

__m256 _mm256_set_ps(float,float,float,float,float,float,float,float)

_mm256_set1_epi16

AVX [2]

immintrin.h

__m256i _mm256_set1_epi16(short)

_mm256_set1_epi32

AVX [2]

immintrin.h

__m256i _mm256_set1_epi32(int)

_mm256_set1_epi64x

AVX [2]

immintrin.h

__m256i _mm256_set1_epi64x(long long)

_mm256_set1_epi8

AVX [2]

immintrin.h

__m256i _mm256_set1_epi8(char)

_mm256_set1_pd

AVX [2]

immintrin.h

__m256d _mm256_set1_pd(double)

_mm256_set1_ps

AVX [2]

immintrin.h

__m256 _mm256_set1_ps(float)

_mm256_setr_epi16

AVX [2]

immintrin.h

(__m256i _mm256_setr_epi16(short

_mm256_setr_epi32

AVX [2]

immintrin.h

__m256i _mm256_setr_epi32(int,int,int,int,int,int,int,int)

_mm256_setr_epi64x

AVX [2]

immintrin.h

__m256i _mm256_setr_epi64x(long long,long long,long long,long long)

_mm256_setr_epi8

AVX [2]

immintrin.h

(__m256i _mm256_setr_epi8(char

_mm256_setr_pd

AVX [2]

immintrin.h

__m256d _mm256_setr_pd(double,double,double,double)

_mm256_setr_ps

AVX [2]

immintrin.h

__m256 _mm256_setr_ps(float,float,float,float,float,float,float,float)

_mm256_setzero_pd

AVX [2]

immintrin.h

__m256d _mm256_setzero_pd(void)

_mm256_setzero_ps

AVX [2]

immintrin.h

__m256 _mm256_setzero_ps(void)

_mm256_setzero_si256

AVX [2]

immintrin.h

__m256i _mm256_setzero_si256(void)

_mm256_shuffle_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_shuffle_epi32(__m256i,const int)

_mm256_shuffle_epi8

AVX2 [2]

immintrin.h

__m256i _mm256_shuffle_epi8(__m256i,__m256i)

_mm256_shuffle_pd

AVX [2]

immintrin.h

__m256d _mm256_shuffle_pd(__m256d,__m256d,const int)

_mm256_shuffle_ps

AVX [2]

immintrin.h

__m256 _mm256_shuffle_ps(__m256,__m256,const int)

_mm256_shufflehi_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_shufflehi_epi16(__m256i,const int)

_mm256_shufflelo_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_shufflelo_epi16(__m256i,const int)

_mm256_sign_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_sign_epi16(__m256i,__m256i)

_mm256_sign_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_sign_epi32(__m256i,__m256i)

_mm256_sign_epi8

AVX2 [2]

immintrin.h

__m256i _mm256_sign_epi8(__m256i,__m256i)

_mm256_sll_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_sll_epi16(__m256i,__m128i)

_mm256_sll_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_sll_epi32(__m256i,__m128i)

_mm256_sll_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_sll_epi64(__m256i,__m128i)

_mm256_slli_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_slli_epi16(__m256i,int)

_mm256_slli_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_slli_epi32(__m256i,int)

_mm256_slli_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_slli_epi64(__m256i,int)

_mm256_slli_si256

AVX2 [2]

immintrin.h

__m256i _mm256_slli_si256(__m256i,int)

_mm256_sllv_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_sllv_epi32(__m256i,__m256i)

_mm256_sllv_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_sllv_epi64(__m256i,__m256i)

_mm256_sqrt_pd

AVX [2]

immintrin.h

__m256d _mm256_sqrt_pd(__m256d)

_mm256_sqrt_ps

AVX [2]

immintrin.h

__m256 _mm256_sqrt_ps(__m256)

_mm256_sra_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_sra_epi16(__m256i,__m128i)

_mm256_sra_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_sra_epi32(__m256i,__m128i)

_mm256_srai_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_srai_epi16(__m256i,int)

_mm256_srai_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_srai_epi32(__m256i,int)

_mm256_srav_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_srav_epi32(__m256i,__m256i)

_mm256_srl_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_srl_epi16(__m256i,__m128i)

_mm256_srl_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_srl_epi32(__m256i,__m128i)

_mm256_srl_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_srl_epi64(__m256i,__m128i)

_mm256_srli_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_srli_epi16(__m256i,int)

_mm256_srli_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_srli_epi32(__m256i,int)

_mm256_srli_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_srli_epi64(__m256i,int)

_mm256_srli_si256

AVX2 [2]

immintrin.h

__m256i _mm256_srli_si256(__m256i,int)

_mm256_srlv_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_srlv_epi32(__m256i,__m256i)

_mm256_srlv_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_srlv_epi64(__m256i,__m256i)

_mm256_store_pd

AVX [2]

immintrin.h

void _mm256_store_pd(double *,__m256d)

_mm256_store_ps

AVX [2]

immintrin.h

void _mm256_store_ps(float *,__m256)

_mm256_store_si256

AVX [2]

immintrin.h

void _mm256_store_si256(__m256i *,__m256i)

_mm256_storeu_pd

AVX [2]

immintrin.h

void _mm256_storeu_pd(double *,__m256d)

_mm256_storeu_ps

AVX [2]

immintrin.h

void _mm256_storeu_ps(float *,__m256)

_mm256_storeu_si256

AVX [2]

immintrin.h

void _mm256_storeu_si256(__m256i *,__m256i)

_mm256_stream_load_si256

AVX2 [2]

immintrin.h

__m256i _mm256_stream_load_si256(__m256i const *)

_mm256_stream_pd

AVX [2]

immintrin.h

void __mm256_stream_pd(double *,__m256d)

_mm256_stream_ps

AVX [2]

immintrin.h

void _mm256_stream_ps(float *p,__m256 a)

_mm256_stream_si256

AVX [2]

immintrin.h

void __mm256_stream_si256(__m256i *,__m256i)

_mm256_sub_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_sub_epi16(__m256i,__m256i)

_mm256_sub_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_sub_epi32(__m256i,__m256i)

_mm256_sub_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_sub_epi64(__m256i,__m256i)

_mm256_sub_epi8

AVX2 [2]

immintrin.h

__m256i _mm256_sub_epi8(__m256i,__m256i)

_mm256_sub_pd

AVX [2]

immintrin.h

__m256d _mm256_sub_pd(__m256d,__m256d)

_mm256_sub_ps

AVX [2]

immintrin.h

__m256 _mm256_sub_ps(__m256,__m256)

_mm256_subs_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_subs_epi16(__m256i,__m256i)

_mm256_subs_epi8

AVX2 [2]

immintrin.h

__m256i _mm256_subs_epi8(__m256i,__m256i)

_mm256_subs_epu16

AVX2 [2]

immintrin.h

__m256i _mm256_subs_epu16(__m256i,__m256i)

_mm256_subs_epu8

AVX2 [2]

immintrin.h

__m256i _mm256_subs_epu8(__m256i,__m256i)

_mm256_testc_pd

AVX [2]

immintrin.h

int _mm256_testc_pd(__m256d,__m256d)

_mm256_testc_ps

AVX [2]

immintrin.h

int _mm256_testc_ps(__m256,__m256)

_mm256_testc_si256

AVX [2]

immintrin.h

int _mm256_testc_si256(__m256i,__m256i)

_mm256_testnzc_pd

AVX [2]

immintrin.h

int _mm256_testnzc_pd(__m256d,__m256d)

_mm256_testnzc_ps

AVX [2]

immintrin.h

int _mm256_testnzc_ps(__m256,__m256)

_mm256_testnzc_si256

AVX [2]

immintrin.h

int _mm256_testnzc_si256(__m256i,__m256i)

_mm256_testz_pd

AVX [2]

immintrin.h

int _mm256_testz_pd(__m256d,__m256d)

_mm256_testz_ps

AVX [2]

immintrin.h

int _mm256_testz_ps(__m256,__m256)

_mm256_testz_si256

AVX [2]

immintrin.h

int _mm256_testz_si256(__m256i,__m256i)

_mm256_unpackhi_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_unpackhi_epi16(__m256i,__m256i)

_mm256_unpackhi_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_unpackhi_epi32(__m256i,__m256i)

_mm256_unpackhi_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_unpackhi_epi64(__m256i,__m256i)

_mm256_unpackhi_epi8

AVX2 [2]

immintrin.h

__m256i _mm256_unpackhi_epi8(__m256i,__m256i)

_mm256_unpackhi_pd

AVX [2]

immintrin.h

__m256d _mm256_unpackhi_pd(__m256d,__m256d)

_mm256_unpackhi_ps

AVX [2]

immintrin.h

__m256 _mm256_unpackhi_ps(__m256,__m256)

_mm256_unpacklo_epi16

AVX2 [2]

immintrin.h

__m256i _mm256_unpacklo_epi16(__m256i,__m256i)

_mm256_unpacklo_epi32

AVX2 [2]

immintrin.h

__m256i _mm256_unpacklo_epi32(__m256i,__m256i)

_mm256_unpacklo_epi64

AVX2 [2]

immintrin.h

__m256i _mm256_unpacklo_epi64(__m256i,__m256i)

_mm256_unpacklo_epi8

AVX2 [2]

immintrin.h

__m256i _mm256_unpacklo_epi8(__m256i,__m256i)

_mm256_unpacklo_pd

AVX [2]

immintrin.h

__m256d _mm256_unpacklo_pd(__m256d,__m256d)

_mm256_unpacklo_ps

AVX [2]

immintrin.h

__m256 _mm256_unpacklo_ps(__m256,__m256)

_mm256_xor_pd

AVX [2]

immintrin.h

__m256d _mm256_xor_pd(__m256d,__m256d)

_mm256_xor_ps

AVX [2]

immintrin.h

__m256 _mm256_xor_ps(__m256,__m256)

_mm256_xor_si256

AVX2 [2]

immintrin.h

__m256i _mm256_xor_si256(__m256i,__m256i)

_mm256_zeroall

AVX [2]

immintrin.h

void _mm256_zeroall(void)

_mm256_zeroupper

AVX [2]

immintrin.h

void _mm256_zeroupper(void)

__movsb

intrin.h

VOID __movsb(IN PBYTE,IN BYTE const *,IN SIZE_T)

__movsd

intrin.h

VOID __movsd(IN PDWORD,IN DWORD const *,IN SIZE_T)

__movsq

intrin.h

VOID __movsq(IN PDWORD64,IN DWORD64 const *,IN SIZE_T)

__movsw

intrin.h

VOID __movsw(IN PWORD,IN WORD const *,IN SIZE_T)

_mul128

intrin.h

__int64 _mul128(__int64 multiplier,__int64 multiplicand,__int64 *highproduct)

__mulh

intrin.h

__int64 __mulh(__int64,__int64)

_mulx_u32

BMI [2]

immintrin.h

unsigned int _mulx_u32(unsigned int,unsigned int,unsigned int*)

_mulx_u64

BMI [2]

immintrin.h

unsigned __int64 _mulx_u64(unsigned __int64,unsigned __int64,unsigned __int64*)

__nop

intrin.h

void __nop(void)

__nvreg_restore_fence

intrin.h

void __nvreg_restore_fence(void)

__nvreg_save_fence

intrin.h

void __nvreg_save_fence(void)

__outbyte

intrin.h

void __outbyte(unsigned short Port,unsigned char Data)

__outbytestring

intrin.h

void __outbytestring(unsigned short Port,unsigned char *Buffer,unsigned long Count)

__outdword

intrin.h

void __outdword(unsigned short Port,unsigned long Data)

__outdwordstring

intrin.h

void __outdwordstring(unsigned short Port,unsigned long *Buffer,unsigned long Count)

__outword

intrin.h

void __outword(unsigned short Port,unsigned short Data)

__outwordstring

intrin.h

void __outwordstring(unsigned short Port,unsigned short *Buffer,unsigned long Count)

_pdep_u32

BMI [2]

immintrin.h

unsigned int _pdep_u32(unsigned int,unsigned int)

_pdep_u64

BMI [2]

immintrin.h

unsigned __int64 _pdep_u64(unsigned __int64,unsigned __int64)

_pext_u32

BMI [2]

immintrin.h

unsigned int _pext_u32(unsigned int,unsigned int)

_pext_u64

BMI [2]

immintrin.h

unsigned __int64 _pext_u64(unsigned __int64,unsigned __int64)

__popcnt

POPCNT

intrin.h

unsigned int __popcnt(unsigned int)

__popcnt16

POPCNT

intrin.h

unsigned short __popcnt16(unsigned short)

__popcnt64

POPCNT

intrin.h

unsigned __int64 __popcnt64(unsigned __int64)

_rdrand16_step

RDRAND [2]

immintrin.h

int _rdrand16_step(unsigned short *)

_rdrand32_step

RDRAND [2]

immintrin.h

int _rdrand32_step(unsigned int *)

_rdrand64_step

RDRAND [2]

immintrin.h

int _rdrand64_step(unsigned __int64 *)

_rdseed16_step

RDSEED [2]

immintrin.h

int _rdseed16_step(unsigned short *)

_rdseed32_step

RDSEED [2]

immintrin.h

int _rdseed32_step(unsigned int *)

_rdseed64_step

RDSEED [2]

immintrin.h

int _rdseed64_step(unsigned __int64 *)

__rdtsc

intrin.h

unsigned __int64 __rdtsc(void)

__rdtscp

RDTSCP

intrin.h

unsigned __int64 __rdtscp(unsigned int*)

_ReadBarrier

intrin.h

void _ReadBarrier(void)

__readcr0

intrin.h

unsigned __int64 __readcr0(void)

__readcr2

intrin.h

unsigned __int64 __readcr2(void)

__readcr3

intrin.h

unsigned __int64 __readcr3(void)

__readcr4

intrin.h

unsigned __int64 __readcr4(void)

__readcr8

intrin.h

unsigned __int64 __readcr8(void)

__readdr

intrin.h

unsigned __int64 __readdr(unsigned)

__readeflags

intrin.h

unsigned __int64 __readeflags(void)

_readfsbase_u32

FSGSBASE [2]

immintrin.h

unsigned int _readfsbase_u32(void)

_readfsbase_u64

FSGSBASE [2]

immintrin.h

unsigned __int64 _readfsbase_u64(void)

_readgsbase_u32

FSGSBASE [2]

immintrin.h

unsigned int _readgsbase_u32(void)

_readgsbase_u64

FSGSBASE [2]

immintrin.h

unsigned __int64 _readgsbase_u64(void)

__readgsbyte

intrin.h

unsigned char __readgsbyte(unsigned long Offset)

__readgsdword

intrin.h

unsigned long __readgsdword(unsigned long Offset)

__readgsqword

intrin.h

unsigned __int64 __readgsqword(unsigned long Offset)

__readgsword

intrin.h

unsigned short __readgsword(unsigned long Offset)

__readmsr

intrin.h

unsigned __int64 __readmsr(unsigned long)

__readpmc

intrin.h

unsigned __int64 __readpmc(unsigned long a)

_ReadWriteBarrier

intrin.h

void _ReadWriteBarrier(void)

_ReturnAddress

intrin.h

void * _ReturnAddress(void)

_rorx_u32

BMI [2]

immintrin.h

unsigned int _rorx_u32(unsigned int,const unsigned int)

_rorx_u64

BMI [2]

immintrin.h

unsigned __int64 _rorx_u64(unsigned __int64,const unsigned int)

_rotl16

intrin.h

unsigned short _rotl16(unsigned short value,unsigned char shift)

_rotl8

intrin.h

unsigned char _rotl8(unsigned char value,unsigned char shift)

_rotr16

intrin.h

unsigned short _rotr16(unsigned short value,unsigned char shift)

_rotr8

intrin.h

unsigned char _rotr8(unsigned char value,unsigned char shift)

_rsm

intrin.h

void _rsm(void)

_sarx_i32

BMI [2]

immintrin.h

int _sarx_i32(int,unsigned int)

_sarx_i64

BMI [2]

immintrin.h

__int64 _sarx_i64(__int64,unsigned int)

__segmentlimit

intrin.h

unsigned long __segmentlimit(unsigned long a)

_sgdt

intrin.h

void _sgdt(void*)

__shiftleft128

intrin.h

unsigned __int64 __shiftleft128(unsigned __int64 LowPart,unsigned __int64 HighPart,unsigned char Shift)

__shiftright128

intrin.h

unsigned __int64 __shiftright128(unsigned __int64 LowPart,unsigned __int64 HighPart,unsigned char Shift)

_shlx_u32

BMI [2]

immintrin.h

unsigned int _shlx_u32(unsigned int,unsigned int)

_shlx_u64

BMI [2]

immintrin.h

unsigned __int64 _shlx_u64(unsigned __int64,unsigned int)

_shrx_u32

BMI [2]

immintrin.h

unsigned int _shrx_u32(unsigned int,unsigned int)

_shrx_u64

BMI [2]

immintrin.h

unsigned __int64 _shrx_u64(unsigned __int64,unsigned int)

__sidt

intrin.h

void __sidt(void*)

__slwpcb

LWP [1]

ammintrin.h

void *__slwpcb(void)

_stac

SMAP

intrin.h

void _stac(void)

_Store_HLERelease

HLE [2]

immintrin.h

void _Store_HLERelease(long volatile *,long)

_Store64_HLERelease

HLE [2]

immintrin.h

void _Store64_HLERelease(__int64 volatile *,__int64)

_StorePointer_HLERelease

HLE [2]

immintrin.h

void _StorePointer_HLERelease(void * volatile *,void *)

__stosb

intrin.h

void __stosb(IN PBYTE,IN BYTE,IN SIZE_T)

__stosd

intrin.h

void __stosd(IN PDWORD,IN DWORD,IN SIZE_T)

__stosq

intrin.h

void __stosq(IN PDWORD64,IN DWORD64,IN SIZE_T)

__stosw

intrin.h

void __stosw(IN PWORD,IN WORD,IN SIZE_T)

_subborrow_u16

intrin.h

unsigned char _subborrow_u16(unsigned char b_in,unsigned short src1,unsigned short src2,unsigned short *diff)

_subborrow_u32

intrin.h

unsigned char _subborrow_u32(unsigned char b_in,unsigned int src1,unsigned int src2,unsigned int *diff)

_subborrow_u64

intrin.h

unsigned char _subborrow_u64(unsigned char b_in,unsigned __int64 src1,unsigned __int64 src2,unsigned __int64 *diff)

_subborrow_u8

intrin.h

unsigned char _subborrow_u8(unsigned char b_in,unsigned char src1,unsigned char src2,unsigned char *diff)

__svm_clgi

intrin.h

void __svm_clgi(void)

__svm_invlpga

intrin.h

void __svm_invlpga(void*,int)

__svm_skinit

intrin.h

void __svm_skinit(int)

__svm_stgi

intrin.h

void __svm_stgi(void)

__svm_vmload

intrin.h

void __svm_vmload(size_t)

__svm_vmrun

intrin.h

void __svm_vmrun(size_t)

__svm_vmsave

intrin.h

void __svm_vmsave(size_t)

_t1mskc_u32

ABM [1]

ammintrin.h

unsigned int _t1mskc_u32(unsigned int)

_t1mskc_u64

ABM [1]

ammintrin.h

unsigned __int64 _t1mskc_u64(unsigned __int64)

_tzcnt_u32

BMI

ammintrin.h, immintrin.h

unsigned int _tzcnt_u32(unsigned int)

_tzcnt_u64

BMI

ammintrin.h, immintrin.h

unsigned __int64 _tzcnt_u64(unsigned __int64)

_tzmsk_u32

ABM [1]

ammintrin.h

unsigned int _tzmsk_u32(unsigned int)

_tzmsk_u64

ABM [1]

ammintrin.h

unsigned __int64 _tzmsk_u64(unsigned __int64)

__ud2

intrin.h

void __ud2(void)

__ull_rshift

intrin.h

unsigned __int64 [pascal/cdecl] __ull_rshift(unsigned __int64,int)

_umul128

intrin.h

unsigned __int64 _umul128(unsigned __int64 multiplier,unsigned __int64 multiplicand,unsigned __int64 *highproduct)

__umulh

intrin.h

unsigned __int64 __umulh(unsigned __int64,unsigned __int64)

__vmx_off

intrin.h

void __vmx_off(void)

__vmx_on

intrin.h

unsigned char __vmx_on(unsigned __int64*)

__vmx_vmclear

intrin.h

unsigned char __vmx_vmclear(unsigned __int64*)

__vmx_vmlaunch

intrin.h

unsigned char __vmx_vmlaunch(void)

__vmx_vmptrld

intrin.h

unsigned char __vmx_vmptrld(unsigned __int64*)

__vmx_vmptrst

intrin.h

void __vmx_vmptrst(unsigned __int64 *)

__vmx_vmread

intrin.h

unsigned char __vmx_vmread(size_t,size_t*)

__vmx_vmresume

intrin.h

unsigned char __vmx_vmresume(void)

__vmx_vmwrite

intrin.h

unsigned char __vmx_vmwrite(size_t,size_t)

__wbinvd

intrin.h

void __wbinvd(void)

_WriteBarrier

intrin.h

void _WriteBarrier(void)

__writecr0

intrin.h

void __writecr0(unsigned __int64)

__writecr3

intrin.h

void __writecr3(unsigned __int64)

__writecr4

intrin.h

void __writecr4(unsigned __int64)

__writecr8

intrin.h

void __writecr8(unsigned __int64)

__writedr

intrin.h

void __writedr(unsigned,unsigned __int64)

__writeeflags

intrin.h

void __writeeflags(unsigned __int64)

_writefsbase_u32

FSGSBASE [2]

immintrin.h

void _writefsbase_u32(unsigned int)

_writefsbase_u64

FSGSBASE [2]

immintrin.h

void _writefsbase_u64(unsigned __int64)

_writegsbase_u32

FSGSBASE [2]

immintrin.h

void _writegsbase_u32(unsigned int)

_writegsbase_u64

FSGSBASE [2]

immintrin.h

void _writegsbase_u64(unsigned __int64)

__writegsbyte

intrin.h

void __writegsbyte(unsigned long Offset,unsigned char Data)

__writegsdword

intrin.h

void __writegsdword(unsigned long Offset,unsigned long Data)

__writegsqword

intrin.h

void __writegsqword(unsigned long Offset,unsigned __int64 Data)

__writegsword

intrin.h

void __writegsword(unsigned long Offset,unsigned short Data)

__writemsr

intrin.h

void __writemsr(unsigned long,unsigned __int64)

_xabort

RTM [2]

immintrin.h

void _xabort(unsigned int)

_xbegin

RTM [2]

immintrin.h

unsigned _xbegin(void)

_xend

RTM [2]

immintrin.h

void _xend(void)

_xgetbv

XSAVE [2]

immintrin.h

unsigned __int64 _xgetbv(unsigned int)

_xrstor

XSAVE [2]

immintrin.h

void _xrstor(void const*,unsigned __int64)

_xrstor64

XSAVE [2]

immintrin.h

void _xrstor64(void const*,unsigned __int64)

_xsave

XSAVE [2]

immintrin.h

void _xsave(void*,unsigned __int64)

_xsave64

XSAVE [2]

immintrin.h

void _xsave64(void*,unsigned __int64)

_xsaveopt

XSAVEOPT [2]

immintrin.h

void _xsaveopt(void*,unsigned __int64)

_xsaveopt64

XSAVEOPT [2]

immintrin.h

void _xsaveopt64(void*,unsigned __int64)

_xsetbv

XSAVE [2]

immintrin.h

void _xsetbv(unsigned int,unsigned __int64)

_xtest

XTEST [2]

immintrin.h

unsigned char _xtest(void)

See Also

Reference

Compiler Intrinsics

ARM Intrinsics

x86 Intrinsics