M
mnemonix84
Guest
Hi folks,
yesterday I came across this blog post Coder Corner » Archivio Blog » Restrict this about the __restrict keyword and aliasing in general. Now my question is how do I get the same optimization with the __restrict keyword as in the hand-optimized method below? At least the usage of the __restrict keyword in the restrict_fun method has no effect in Visual C++. But in gcc 9.2 it has. Below I wanted to say that m_store cannot be an alias of &m_value.
struct Test
{
int restrict_fun(int n) __restrict;
int handoptimized_fun(int n);
int * __restrict m_restrict_store = nullptr;
int * m_store = nullptr;
int m_value = 0;
};
int Test::restrict_fun(int n) __restrict
{
while (n--)
{
*m_restrict_store++ = m_value;
m_value++;
}
return m_value;
}
int Test::handoptimized_fun(int n)
{
int value = m_value;
int *store = m_store;
while (n--)
{
*store++ = value;
value++;
}
m_value = value;
m_store = store;
return m_value;
}
Here is the godbolt.org assembly output (x64 msvc v19.22, -O2):
this$ = 8
n$ = 16
int Test::restrict_fun(int) __restrict PROC ; Test::restrict_fun, COMDAT
mov eax, DWORD PTR [rcx+16]
mov r8d, edx
test edx, edx
je SHORT $LN9@restrict_f
npad 6
$LL2@restrict_f:
mov rdx, QWORD PTR [rcx]
mov DWORD PTR [rdx], eax
lea rdx, QWORD PTR [rdx+4]
mov QWORD PTR [rcx], rdx
lea eax, DWORD PTR [rax+1]
sub r8d, 1
jne SHORT $LL2@restrict_f
mov DWORD PTR [rcx+16], eax
$LN9@restrict_f:
ret 0
int Test::restrict_fun(int) __restrict ENDP ; Test::restrict_fun
this$ = 8
n$ = 16
int Test::handoptimized_fun(int) PROC ; Test::handoptimized_fun, COMDAT
mov eax, DWORD PTR [rcx+16]
mov r8, QWORD PTR [rcx+8]
test edx, edx
je SHORT $LN6@handoptimi
npad 5
$LL2@handoptimi:
mov DWORD PTR [r8], eax
add r8, 4
inc eax
sub edx, 1
jne SHORT $LL2@handoptimi
$LN6@handoptimi:
mov DWORD PTR [rcx+16], eax
mov QWORD PTR [rcx+8], r8
ret 0
int Test::handoptimized_fun(int) ENDP ; Test::handoptimized_fun
As you can see, within the loop $LL2@restrict_f variables are reloaded every time but in the handoptimized code within the loop $LL2@handoptimi (by using helper locals) a register is used to store e.g. m_value.
So what can I do to get the same effect only with the __restrict keyword?
Thanks in advance.
Continue reading...
yesterday I came across this blog post Coder Corner » Archivio Blog » Restrict this about the __restrict keyword and aliasing in general. Now my question is how do I get the same optimization with the __restrict keyword as in the hand-optimized method below? At least the usage of the __restrict keyword in the restrict_fun method has no effect in Visual C++. But in gcc 9.2 it has. Below I wanted to say that m_store cannot be an alias of &m_value.
struct Test
{
int restrict_fun(int n) __restrict;
int handoptimized_fun(int n);
int * __restrict m_restrict_store = nullptr;
int * m_store = nullptr;
int m_value = 0;
};
int Test::restrict_fun(int n) __restrict
{
while (n--)
{
*m_restrict_store++ = m_value;
m_value++;
}
return m_value;
}
int Test::handoptimized_fun(int n)
{
int value = m_value;
int *store = m_store;
while (n--)
{
*store++ = value;
value++;
}
m_value = value;
m_store = store;
return m_value;
}
Here is the godbolt.org assembly output (x64 msvc v19.22, -O2):
this$ = 8
n$ = 16
int Test::restrict_fun(int) __restrict PROC ; Test::restrict_fun, COMDAT
mov eax, DWORD PTR [rcx+16]
mov r8d, edx
test edx, edx
je SHORT $LN9@restrict_f
npad 6
$LL2@restrict_f:
mov rdx, QWORD PTR [rcx]
mov DWORD PTR [rdx], eax
lea rdx, QWORD PTR [rdx+4]
mov QWORD PTR [rcx], rdx
lea eax, DWORD PTR [rax+1]
sub r8d, 1
jne SHORT $LL2@restrict_f
mov DWORD PTR [rcx+16], eax
$LN9@restrict_f:
ret 0
int Test::restrict_fun(int) __restrict ENDP ; Test::restrict_fun
this$ = 8
n$ = 16
int Test::handoptimized_fun(int) PROC ; Test::handoptimized_fun, COMDAT
mov eax, DWORD PTR [rcx+16]
mov r8, QWORD PTR [rcx+8]
test edx, edx
je SHORT $LN6@handoptimi
npad 5
$LL2@handoptimi:
mov DWORD PTR [r8], eax
add r8, 4
inc eax
sub edx, 1
jne SHORT $LL2@handoptimi
$LN6@handoptimi:
mov DWORD PTR [rcx+16], eax
mov QWORD PTR [rcx+8], r8
ret 0
int Test::handoptimized_fun(int) ENDP ; Test::handoptimized_fun
As you can see, within the loop $LL2@restrict_f variables are reloaded every time but in the handoptimized code within the loop $LL2@handoptimi (by using helper locals) a register is used to store e.g. m_value.
So what can I do to get the same effect only with the __restrict keyword?
Thanks in advance.
Continue reading...