Skip to content
4 changes: 4 additions & 0 deletions fearless_simd/src/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@ impl SimdElement for i64 {
type Mask = i64;
}

impl SimdElement for u64 {
type Mask = i64;
}

/// Construction of integer vectors from floats by truncation
pub trait SimdCvtTruncate<T> {
fn truncate_from(x: T) -> Self;
Expand Down
3 changes: 3 additions & 0 deletions fearless_simd_gen/src/mk_avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,12 @@ fn mk_simd_impl() -> TokenStream {
type i16s = i16x8<Self>;
type u32s = u32x4<Self>;
type i32s = i32x4<Self>;
type u64s = u64x2<Self>;
type i64s = i64x2<Self>;
type mask8s = mask8x16<Self>;
type mask16s = mask16x8<Self>;
type mask32s = mask32x4<Self>;
type mask64s = mask64x2<Self>;
#[inline(always)]
fn level(self) -> Level {
Level::#level_tok(self)
Expand Down
3 changes: 3 additions & 0 deletions fearless_simd_gen/src/mk_fallback.rs
Original file line number Diff line number Diff line change
Expand Up @@ -402,9 +402,12 @@ fn mk_simd_impl() -> TokenStream {
type i16s = i16x8<Self>;
type u32s = u32x4<Self>;
type i32s = i32x4<Self>;
type u64s = u64x2<Self>;
type i64s = i64x2<Self>;
type mask8s = mask8x16<Self>;
type mask16s = mask16x8<Self>;
type mask32s = mask32x4<Self>;
type mask64s = mask64x2<Self>;
#[inline(always)]
fn level(self) -> Level {
Level::#level_tok(self)
Expand Down
3 changes: 3 additions & 0 deletions fearless_simd_gen/src/mk_neon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -409,9 +409,12 @@ fn mk_simd_impl(level: Level) -> TokenStream {
type i16s = i16x8<Self>;
type u32s = u32x4<Self>;
type i32s = i32x4<Self>;
type u64s = u64x2<Self>;
type i64s = i64x2<Self>;
type mask8s = mask8x16<Self>;
type mask16s = mask16x8<Self>;
type mask32s = mask32x4<Self>;
type mask64s = mask64x2<Self>;
#[inline(always)]
fn level(self) -> Level {
Level::#level_tok(self)
Expand Down
6 changes: 4 additions & 2 deletions fearless_simd_gen/src/mk_simd_trait.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,12 @@ pub fn mk_simd_trait() -> TokenStream {
type u32s: SimdInt<u32, Self, Block = u32x4<Self>, Mask = Self::mask32s> + SimdCvtTruncate<Self::f32s>;
type i32s: SimdInt<i32, Self, Block = i32x4<Self>, Mask = Self::mask32s, Bytes = <Self::u32s as Bytes>::Bytes> + SimdCvtTruncate<Self::f32s>
+ core::ops::Neg<Output = Self::i32s>;
type u64s: SimdInt<u64, Self, Block = u64x2<Self>, Mask = Self::mask64s>; // + SimdCvtTruncate<Self::f64s>;
type i64s: SimdInt<i64, Self, Block = i64x2<Self>, Mask = Self::mask64s, Bytes = <Self::u64s as Bytes>::Bytes> + core::ops::Neg<Output = Self::i64s>; // + SimdCvtTruncate<Self::f64s>;
type mask8s: SimdMask<i8, Self, Block = mask8x16<Self>, Bytes = <Self::u8s as Bytes>::Bytes> + Select<Self::u8s> + Select<Self::i8s> + Select<Self::mask8s>;
type mask16s: SimdMask<i16, Self, Block = mask16x8<Self>, Bytes = <Self::u16s as Bytes>::Bytes> + Select<Self::u16s> + Select<Self::i16s> + Select<Self::mask16s>;
type mask32s: SimdMask<i32, Self, Block = mask32x4<Self>, Bytes = <Self::u32s as Bytes>::Bytes>
+ Select<Self::f32s> + Select<Self::u32s> + Select<Self::i32s> + Select<Self::mask32s>;
type mask32s: SimdMask<i32, Self, Block = mask32x4<Self>, Bytes = <Self::u32s as Bytes>::Bytes> + Select<Self::f32s> + Select<Self::u32s> + Select<Self::i32s> + Select<Self::mask32s>;
type mask64s: SimdMask<i64, Self, Block = mask64x2<Self>, Bytes = <Self::u64s as Bytes>::Bytes> + Select<Self::u64s> + Select<Self::i64s> + Select<Self::mask64s>; // + Select<Self::f64s>
fn level(self) -> Level;

/// Call function with CPU features enabled.
Expand Down
37 changes: 37 additions & 0 deletions fearless_simd_gen/src/mk_sse4_2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,12 @@ fn mk_simd_impl() -> TokenStream {
type i16s = i16x8<Self>;
type u32s = u32x4<Self>;
type i32s = i32x4<Self>;
type u64s = u64x2<Self>;
type i64s = i64x2<Self>;
type mask8s = mask8x16<Self>;
type mask16s = mask16x8<Self>;
type mask32s = mask32x4<Self>;
type mask64s = mask64x2<Self>;
#[inline(always)]
fn level(self) -> Level {
Level::#level_tok(self)
Expand Down Expand Up @@ -258,13 +261,18 @@ pub(crate) fn handle_compare(

let max_min_expr = arch.expr(max_min, vec_ty, &args);
quote! { #eq_intrinsic(#max_min_expr, a.into()) }
} else if matches!(method, "simd_eq") && vec_ty.scalar_bits == 64 {
let eq =
simple_sign_unaware_intrinsic("cmpeq", vec_ty.scalar, vec_ty.scalar_bits, ty_bits);
quote! { #eq(a.into(), b.into()) }
} else if vec_ty.scalar == ScalarType::Unsigned {
// SSE4.2 only has signed GT/LT, but not unsigned.
let set = set1_intrinsic(vec_ty.scalar, vec_ty.scalar_bits, ty_bits);
let sign = match vec_ty.scalar_bits {
8 => quote! { 0x80u8 },
16 => quote! { 0x8000u16 },
32 => quote! { 0x80000000u32 },
64 => quote! { 0x8000000000000000u64 },
_ => unimplemented!(),
};
let gt =
Expand All @@ -282,10 +290,29 @@ pub(crate) fn handle_compare(

#gt(#args)
}
} else if vec_ty.scalar_bits == 64 {
let intrinsic_name = if matches!(method, "simd_eq") {
"cmpeq"
} else {
"cmpgt"
};

let cmp = simple_intrinsic(intrinsic_name, vec_ty.scalar, vec_ty.scalar_bits, ty_bits);
// SSE4.2 only has signed GT for i64
let args = if method == "simd_lt" {
quote! { b.into(), a.into() }
} else {
quote! { a.into(), b.into() }
};

quote! {
#cmp(#args)
}
} else {
arch.expr(method, vec_ty, &args)
}
} else {
// Floating point comparison
arch.expr(method, vec_ty, &args)
};

Expand Down Expand Up @@ -596,6 +623,16 @@ pub(crate) fn handle_unzip(
quote! { unsafe { #intrinsic::<#mask>(a.into(), b.into()).simd_into(self) } }
} else {
match vec_ty.scalar_bits {
64 => {
let op = if select_even { "lo" } else { "hi" };
let intrinsic = format_ident!("_mm_unpack{op}_epi64");

quote! {
unsafe {
#intrinsic(a.into(), b.into()).simd_into(self)
}
}
}
32 => {
let op = if select_even { "lo" } else { "hi" };

Expand Down
79 changes: 76 additions & 3 deletions fearless_simd_gen/src/mk_wasm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ fn mk_simd_impl(level: Level) -> TokenStream {
#[inline(always)]
fn #method_ident(#args) -> #ret_ty
};

let m = match sig {
OpSig::Splat => {
let expr = Wasm.expr(method, vec_ty, &[quote! { val }]);
Expand Down Expand Up @@ -118,6 +119,45 @@ fn mk_simd_impl(level: Level) -> TokenStream {
OpSig::Binary => {
let args = [quote! { a.into() }, quote! { b.into() }];
match method {
"max" | "min" if vec_ty.scalar_bits == 64 && vec_ty.len == 2 => {
let is_max = method == "max";

let xor_for_unsigned = if vec_ty.scalar == ScalarType::Unsigned {
quote! {
let sign_bit = i64x2_splat(0x8000_0000_0000_0000u64 as i64);
let a_signed = v128_xor(a.into(), sign_bit);
let b_signed = v128_xor(b.into(), sign_bit);
}
} else {
quote! {
let a_signed = a.into();
let b_signed = b.into();
}
};

let body = if is_max {
quote! {
let mask = i64x2_gt(a_signed, b_signed);
let a_masked = v128_and(mask, a.into());
let b_masked = v128_andnot(mask, b.into());
v128_or(a_masked, b_masked)
}
} else {
quote! {
let mask = i64x2_gt(a_signed, b_signed);
let a_masked = v128_andnot(mask, a.into());
let b_masked = v128_and(mask, b.into());
v128_or(a_masked, b_masked)
}
};

quote! {
#method_sig {
#xor_for_unsigned
#body.simd_into(self)
}
}
}
"mul" if vec_ty.scalar_bits == 8 && vec_ty.len == 16 => {
let (extmul_low, extmul_high) = match vec_ty.scalar {
ScalarType::Unsigned => (
Expand Down Expand Up @@ -183,9 +223,31 @@ fn mk_simd_impl(level: Level) -> TokenStream {
OpSig::Compare => {
let args = [quote! { a.into() }, quote! { b.into() }];
let expr = Wasm.expr(method, vec_ty, &args);
quote! {
#method_sig {
#expr.simd_into(self)

let missing_op = ["lt", "gt", "le", "ge"]
.iter()
.find(|&op| method.ends_with(op));

if vec_ty.scalar_bits == 64
&& vec_ty.scalar == ScalarType::Unsigned
&& missing_op.is_some()
{
let op = missing_op.unwrap();
let wasm_ident = format_ident!("i64x2_{}", op);
quote! {
#method_sig {
let sign_bit = i64x2_splat(0x8000_0000_0000_0000u64 as i64);
let a_signed = v128_xor(a.into(), sign_bit);
let b_signed = v128_xor(b.into(), sign_bit);

#wasm_ident(a_signed, b_signed).simd_into(self)
}
}
} else {
quote! {
#method_sig {
#expr.simd_into(self)
}
}
}
}
Expand Down Expand Up @@ -386,6 +448,13 @@ fn mk_simd_impl(level: Level) -> TokenStream {
quote! { 2, 3, 6, 7 },
quote! { u32x4_shuffle },
),
64 => (
quote! { 0, 2 },
quote! { 1, 3 },
quote! { 0, 1 },
quote! { 2, 3 },
quote! { u64x2_shuffle },
),
_ => panic!("unsupported scalar_bits"),
};

Expand Down Expand Up @@ -455,6 +524,7 @@ fn mk_simd_impl(level: Level) -> TokenStream {
quote! { 2, 6, 3, 7 },
quote! { u32x4_shuffle },
),
64 => (quote! { 0, 2 }, quote! { 1, 3 }, quote! { u64x2_shuffle }),
_ => panic!("unsupported scalar_bits"),
};

Expand Down Expand Up @@ -526,9 +596,12 @@ fn mk_simd_impl(level: Level) -> TokenStream {
type i16s = i16x8<Self>;
type u32s = u32x4<Self>;
type i32s = i32x4<Self>;
type u64s = u64x2<Self>;
type i64s = i64x2<Self>;
type mask8s = mask8x16<Self>;
type mask16s = mask16x8<Self>;
type mask32s = mask32x4<Self>;
type mask64s = mask64x2<Self>;

#[inline(always)]
fn level(self) -> Level {
Expand Down
6 changes: 6 additions & 0 deletions fearless_simd_gen/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ pub const SIMD_TYPES: &[VecType] = &[
VecType::new(ScalarType::Mask, 32, 4),
VecType::new(ScalarType::Float, 64, 2),
VecType::new(ScalarType::Mask, 64, 2),
VecType::new(ScalarType::Int, 64, 2),
VecType::new(ScalarType::Unsigned, 64, 2),
// 256 bit types
VecType::new(ScalarType::Float, 32, 8),
VecType::new(ScalarType::Int, 8, 32),
Expand All @@ -129,6 +131,8 @@ pub const SIMD_TYPES: &[VecType] = &[
VecType::new(ScalarType::Mask, 32, 8),
VecType::new(ScalarType::Float, 64, 4),
VecType::new(ScalarType::Mask, 64, 4),
VecType::new(ScalarType::Int, 64, 4),
VecType::new(ScalarType::Unsigned, 64, 4),
// 512 bit types
VecType::new(ScalarType::Float, 32, 16),
VecType::new(ScalarType::Int, 8, 64),
Expand All @@ -142,6 +146,8 @@ pub const SIMD_TYPES: &[VecType] = &[
VecType::new(ScalarType::Mask, 32, 16),
VecType::new(ScalarType::Float, 64, 8),
VecType::new(ScalarType::Mask, 64, 8),
VecType::new(ScalarType::Int, 64, 8),
VecType::new(ScalarType::Unsigned, 64, 8),
];

pub fn type_imports() -> TokenStream {
Expand Down
Loading
Loading