Rust provides experimental support for SIMD vectors. These SIMD vectors are
exposed as structs (f32x4
, u8x16
, etc.), that implement basic operations
(+
, -
, *
, etc) using SIMD instructions under the hood.
// simd.rs
use std::simd::f32x4;
fn main() {
// create simd vectors
let x = f32x4(1.0, 2.0, 3.0, 4.0);
let y = f32x4(4.0, 3.0, 2.0, 1.0);
// simd product
let z = x * y;
// like any struct, the simd vector can be destructured using `let`
let f32x4(a, b, c, d) = z;
println!("{}", (a, b, c, d));
}
{simd.out}
Here's a more complex example that sums two Vec<f32>
, using the f32x4
type
to operate on 4-element chunks at a time.
// simd_add.rs
#![feature(macro_rules)]
use std::simd::f32x4;
macro_rules! assert_equal_len {
($a:ident, $b: ident) => {
assert!($a.len() == $b.len(),
"add_assign: dimension mismatch: {} += {}",
($a.len(),),
($b.len(),));
}
}
// element-wise addition
fn add_assign(xs: &mut Vec<f32>, ys: &Vec<f32>) {
assert_equal_len!(xs, ys);
for (x, y) in xs.iter_mut().zip(ys.iter()) {
*x += *y;
}
}
// simd accelerated addition
fn simd_add_assign(xs: &mut Vec<f32>, ys: &Vec<f32>) {
assert_equal_len!(xs, ys);
let size = xs.len() as int;
let chunks = size / 4;
// pointer to the start of the vector data
let p_x: *mut f32 = xs.as_mut_ptr();
let p_y: *const f32 = ys.as_ptr();
// sum excess elements that don't fit in the simd vector
for i in range(4 * chunks, size) {
// dereferencing a raw pointer requires an unsafe block
unsafe {
// offset by i elements
*p_x.offset(i) += *p_y.offset(i);
}
}
// treat f32 vector as an simd f32x4 vector
let simd_p_x = p_x as *mut f32x4;
let simd_p_y = p_y as *const f32x4;
// sum "simd vector"
for i in range(0, chunks) {
unsafe {
*simd_p_x.offset(i) += *simd_p_y.offset(i);
}
}
}
mod bench {
extern crate test;
use self::test::Bencher;
static BENCH_SIZE: uint = 10_000;
macro_rules! bench {
($name:ident, $func:ident) => {
#[bench]
fn $name(b: &mut Bencher) {
let mut x = Vec::from_elem(BENCH_SIZE, 1.0f32);
let y = Vec::from_elem(BENCH_SIZE, 0.1f32);
b.iter(|| {
super::$func(&mut x, &y);
})
}
}
}
bench!(vanilla, add_assign);
bench!(simd, simd_add_assign);
}
And here's the result of the benchmark:
$ rustc -O --test simd_add.rs && ./simd_add --bench
running 4 tests
test test::simd ... ignored
test test::vanilla ... ignored
test bench::simd ... bench: 1852 ns/iter (+/- 17)
test bench::vanilla ... bench: 8346 ns/iter (+/- 103)
test result: ok. 0 passed; 0 failed; 2 ignored; 2 measured