S
Silvers11
Guest
Hello!
I have a special question about Vector128. I am using Net.Core 3.1. In this example I am counting how many of the numbers:
0,1,2,3 that exist in the: inputArray
The correct answer to this is: 3,9,1,3 which is shown in the MessageBox when running this code. This code works!
Now is my problem this:
If we look at the inputArray we can see that the number 0 exists on index 0,9,10
If we look at the indexArray we can see that on index 0,9,10 we have the numbers: 9,43,5
Those numbers are the ones that I want to store in the 2D array: indexes0123 in the most efficient way possible.
The indexes0123 should have 4 dimensions somehow where each dimension stores the numbers from the indexArray. I am looking for the fastest way to add the numbers: 9,43,5 so I am open to exactly any idéa here that is more efficient/fast.
I beleive this is where this code happens that perheps it is possible to do something. Looking at this code where it on the first iteration for val = 0 finds matches for the number 0 for the 16 indexes at the very same time:
We can see here that where 1 exists in the below string are exactly on the same indexes in the inputArray where: 0 is found which are on indexes: 0,9,10. Now I want to also add the numbers 9,43,5 from the indexArray here as fast as possible to indexes0123 (or any other method that is faster)
[1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0]
var v = Avx.LoadVector128(&fixedInput[0]);
for (byte val = 0; val < 4; val++)
{
//[1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0] -CompareEqual to: [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
var match = Avx.CompareEqual(v, Vector128.Create(val));
counts[val] = Avx.Subtract(counts[val], match);
}
The complete code that works is the below: (Copy/Paste and run the code)
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
private void button3_Click(object sender, EventArgs e)
{
//Create 16 indexes with numbers between: 0-3. The goal is to count how many of those occurences we have for the numbers: 0-3
byte[] v1 = new byte[16]; int[] v2 = new int[16];
v1[0] = 0; v2[0] = 9;
v1[1] = 1; v2[1] = 12;
v1[2] = 2; v2[2] = 55;
v1[3] = 3; v2[3] = 23;
v1[4] = 1; v2[4] = 568;
v1[5] = 1; v2[5] = 4;
v1[6] = 1; v2[6] = 6;
v1[7] = 1; v2[7] = 1008;
v1[8] = 1; v2[8] = 188800;
v1[9] = 0; v2[9] = 43;
v1[10] = 0; v2[10] = 5;
v1[11] = 3; v2[11] = 2;
v1[12] = 1; v2[12] = 1;
v1[13] = 1; v2[13] = 58;
v1[14] = 1; v2[14] = 8;
v1[15] = 3; v2[15] = 15;
/*---------------*/
ReadOnlySpan<byte> inputArray = v1;
ReadOnlySpan<int> indexArray = v2;
//Call function
countElements(inputArray, indexArray);
}
private unsafe static void countElements(ReadOnlySpan<byte> inputArray, ReadOnlySpan<int> indexArray)
{
int[,] indexes0123 = new int[,] { }; //Store arrays for indexes found for the numbers: 0,1,2,3 in the "indexArray"
//Below starts the SIMD calculations!
double[] countArray = new double[4];
Span<Vector128<byte>> counts = stackalloc Vector128<byte>[4];
Span<Vector128<UInt64>> sum64 = stackalloc Vector128<UInt64>[4];
unsafe
{
fixed (byte* fixedInput = inputArray)
{
var v = Avx.LoadVector128(&fixedInput[0]);
for (byte val = 0; val < 4; val++)
{
//[1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0] -CompareEqual to: [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
var match = Avx.CompareEqual(v, Vector128.Create(val));
counts[val] = Avx.Subtract(counts[val], match);
}
//Here SumAbsoluteDifferences
for (int i3 = 0; i3 < 4; i3++)
{
sum64[i3] = Sse2.Add(sum64[i3], Sse2.SumAbsoluteDifferences(counts[i3], Vector128<byte>.Zero).AsUInt64()); //sum64: <2,0,0,0,3,0,0,0>
}
//UnpackHigh and get the lower element from the Vector128<UInt64>
for (int i3 = 0; i3 < 4; i3++)
{
Vector128<UInt64> upper = Sse2.UnpackHigh(sum64[i3], sum64[i3]).AsUInt64(); //3
countArray[i3] += Sse2.Add(sum64[i3], upper).ToScalar();
}
//We now know how many 0,1,2,3 we have of each: (But we also need to collect which indexes from "indexlist" each of these 4 buckets has)
//3,9,1,3
MessageBox.Show(countArray[0] + "," + countArray[1] + "," + countArray[2] + "," + countArray[3]);
}
}
}
Continue reading...
I have a special question about Vector128. I am using Net.Core 3.1. In this example I am counting how many of the numbers:
0,1,2,3 that exist in the: inputArray
The correct answer to this is: 3,9,1,3 which is shown in the MessageBox when running this code. This code works!
Now is my problem this:
If we look at the inputArray we can see that the number 0 exists on index 0,9,10
If we look at the indexArray we can see that on index 0,9,10 we have the numbers: 9,43,5
Those numbers are the ones that I want to store in the 2D array: indexes0123 in the most efficient way possible.
The indexes0123 should have 4 dimensions somehow where each dimension stores the numbers from the indexArray. I am looking for the fastest way to add the numbers: 9,43,5 so I am open to exactly any idéa here that is more efficient/fast.
I beleive this is where this code happens that perheps it is possible to do something. Looking at this code where it on the first iteration for val = 0 finds matches for the number 0 for the 16 indexes at the very same time:
We can see here that where 1 exists in the below string are exactly on the same indexes in the inputArray where: 0 is found which are on indexes: 0,9,10. Now I want to also add the numbers 9,43,5 from the indexArray here as fast as possible to indexes0123 (or any other method that is faster)
[1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0]
var v = Avx.LoadVector128(&fixedInput[0]);
for (byte val = 0; val < 4; val++)
{
//[1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0] -CompareEqual to: [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
var match = Avx.CompareEqual(v, Vector128.Create(val));
counts[val] = Avx.Subtract(counts[val], match);
}
The complete code that works is the below: (Copy/Paste and run the code)
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
private void button3_Click(object sender, EventArgs e)
{
//Create 16 indexes with numbers between: 0-3. The goal is to count how many of those occurences we have for the numbers: 0-3
byte[] v1 = new byte[16]; int[] v2 = new int[16];
v1[0] = 0; v2[0] = 9;
v1[1] = 1; v2[1] = 12;
v1[2] = 2; v2[2] = 55;
v1[3] = 3; v2[3] = 23;
v1[4] = 1; v2[4] = 568;
v1[5] = 1; v2[5] = 4;
v1[6] = 1; v2[6] = 6;
v1[7] = 1; v2[7] = 1008;
v1[8] = 1; v2[8] = 188800;
v1[9] = 0; v2[9] = 43;
v1[10] = 0; v2[10] = 5;
v1[11] = 3; v2[11] = 2;
v1[12] = 1; v2[12] = 1;
v1[13] = 1; v2[13] = 58;
v1[14] = 1; v2[14] = 8;
v1[15] = 3; v2[15] = 15;
/*---------------*/
ReadOnlySpan<byte> inputArray = v1;
ReadOnlySpan<int> indexArray = v2;
//Call function
countElements(inputArray, indexArray);
}
private unsafe static void countElements(ReadOnlySpan<byte> inputArray, ReadOnlySpan<int> indexArray)
{
int[,] indexes0123 = new int[,] { }; //Store arrays for indexes found for the numbers: 0,1,2,3 in the "indexArray"
//Below starts the SIMD calculations!
double[] countArray = new double[4];
Span<Vector128<byte>> counts = stackalloc Vector128<byte>[4];
Span<Vector128<UInt64>> sum64 = stackalloc Vector128<UInt64>[4];
unsafe
{
fixed (byte* fixedInput = inputArray)
{
var v = Avx.LoadVector128(&fixedInput[0]);
for (byte val = 0; val < 4; val++)
{
//[1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0] -CompareEqual to: [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
var match = Avx.CompareEqual(v, Vector128.Create(val));
counts[val] = Avx.Subtract(counts[val], match);
}
//Here SumAbsoluteDifferences
for (int i3 = 0; i3 < 4; i3++)
{
sum64[i3] = Sse2.Add(sum64[i3], Sse2.SumAbsoluteDifferences(counts[i3], Vector128<byte>.Zero).AsUInt64()); //sum64: <2,0,0,0,3,0,0,0>
}
//UnpackHigh and get the lower element from the Vector128<UInt64>
for (int i3 = 0; i3 < 4; i3++)
{
Vector128<UInt64> upper = Sse2.UnpackHigh(sum64[i3], sum64[i3]).AsUInt64(); //3
countArray[i3] += Sse2.Add(sum64[i3], upper).ToScalar();
}
//We now know how many 0,1,2,3 we have of each: (But we also need to collect which indexes from "indexlist" each of these 4 buckets has)
//3,9,1,3
MessageBox.Show(countArray[0] + "," + countArray[1] + "," + countArray[2] + "," + countArray[3]);
}
}
}
Continue reading...