Here is the OpenCL (I've marked the statements that seem to cause the issue - lines 8 and 21):
(If I were to change tempint on those lines to any literal uint the kernel compiles fine - madness)
uint wide_add_vector(uint* res, const uint* a, const uint* b)
{ ulong carry=0; #pragma unroll for(uint i=0;i<4;i++){ ulong tmp=(ulong)(a[i])+b[i]+carry; uint tempint = (uint)(tmp&0xFFFFFFFF); res[i] = tempint; // <---- Problem statement carry=tmp>>32; } return carry;
}
uint wide_add_scalar(uint* res, const uint* a, uint b)
{
ulong carry=b; #pragma unroll for(uint i=0;i<4;i++){ ulong tmp=a[i]+carry; uint tempint = (uint)(tmp&0xFFFFFFFF); res[i] = tempint; // <---- Problem statement carry=tmp>>32; } return carry;
}
void wide_mul(uint* res_hi, uint* res_lo, const uint* a, const uint* b)
{
ulong carry=0, acc=0; #pragma unroll for(uint i=0; i<4; i++){ #pragma unroll for(uint j=0; j<=i; j++){ ulong tmp=(ulong)(a[j])*b[i-j]; acc+=tmp; carry+=(acc < tmp); } res_lo[i]=(uint)(acc&0xFFFFFFFF); acc= (carry<<32) | (acc>>32); carry=carry>>32; } #pragma unroll for(uint i=1; i<4; i++){ #pragma unroll for(uint j=i; j<4; j++){ ulong tmp=(ulong)(a[j])*b[4-j+i-1]; acc+=tmp; carry+=(acc < tmp); } res_hi[i-1]=(uint)(acc&0xFFFFFFFF); acc= (carry<<32) | (acc>>32); carry=carry>>32; } res_hi[3]=acc;
}
void wide_copy_global(__global uint *res, const uint *a)
{
#pragma unroll for(uint i=0;i<8;i++){ res[i]=a[i]; }
}
__kernel void bitecoin_miner(ulong roundId,ulong roundSalt,ulong chainHash, uint4 c, uint hashSteps, __global uint* proofBuffer)
{
uint workerID = get_global_id(0); uint cArray[4] = {c.x,c.y,c.z,c.w}; uint x[8] = {workerID,0,(uint)roundId,(uint)roundId,(uint)roundSalt,(uint)roundSalt,(uint)chainHash,(uint)chainHash}; for(uint j=0;j<hashSteps;j++) { uint tmp[8]; wide_mul(tmp+4, tmp, x, cArray); // cArray; not to be confused with carry. uint carry=wide_add_vector(x, tmp, x+4); wide_add_scalar(x+4, tmp+4, carry); } wide_copy_global(proofBuffer+8*workerID,x);
}
When run I get:
LogLevel = 2 -> 2
[MyClient], 1395075385.62, 2, Created log.
Will try to connect to address Minty at port 4000
Found 1 platforms Platform 0 : Advanced Micro Devices, Inc.
Choosing platform 0
Found 2 devices Device 0 : Tahiti Device 1 : Intel(R) Core(TM) i7-4770K CPU @ 3.50GHz
Choosing device 0
LLVM ERROR: Cannot select: 0x855acbc3a0: i32 = setcc 0x855acbcca0, 0x855ac3a080, 0x855ac3a480 [ORD=52] [ID=30] 0x855acbcca0: i64 = add 0x855ac3a080, 0x855ac3aa80 [ORD=49] [ID=28] 0x855ac3a080: i64,ch = CopyFromReg 0x855ac2b1d0, 0x855ac3a680 [ORD=49] [ID=19] 0x855ac3a680: i64 = Register %vreg33 [ORD=49] [ID=7] 0x855ac3aa80: i64 = mul 0x855acbcda0, 0x855ac37450 [ORD=48] [ID=27] 0x855acbcda0: i64,ch = load 0x855ac2b1d0, 0x855ac37250, 0x855ac3a380<LD4[%scevgep106], zext from i32> [ORD=47] [ID=26] 0x855ac37250: i32 = add 0x855ac36640, 0x855ac38960 [ORD=45] [ID=25] 0x855ac36640: i32 = sub 0x855ac37850, 0x855ac37050 [ORD=44] [ID=24] 0x855ac37850: i32 = FrameIndex<0> [ORD=41] [ID=1] 0x855ac37050: i32 = shl 0x855acbbc90, 0x855ac3a980 [ORD=44] [ID=23] 0x855acbbc90: i32,ch = CopyFromReg 0x855ac2b1d0, 0x855ac36940 [ORD=43] [ID=18] 0x855ac36940: i32 = Register %vreg30 [ORD=43] [ID=3] 0x855ac3a980: i32 = Constant<2> [ORD=44] [ID=4] 0x855ac38960: i32 = Constant<8> [ORD=45] [ID=5] 0x855ac3a380: i32 = undef [ORD=46] [ID=6] 0x855ac37450: i64 = zero_extend 0x855acbbd90 [ORD=42] [ID=21] 0x855acbbd90: i32,ch = CopyFromReg 0x855ac2b1d0, 0x855acbba90 [ORD=42] [ID=17] 0x855acbba90: i32 = Register %vreg31 [ORD=42] [ID=2] 0x855ac3a080: i64,ch = CopyFromReg 0x855ac2b1d0, 0x855ac3a680 [ORD=49] [ID=19] 0x855ac3a680: i64 = Register %vreg33 [ORD=49] [ID=7]
In function: __OpenCL_bitecoin_miner_kernel
Press any key to continue . . .
If I put it into Kernel Analyzer it just freezes.
Any ideas?
The system is:
Windows 8.1 64-bit, Visual Studio 2013
HD7970 Driver Version 13.350.1005.0
Catalyst 14.2
AMD APP SDK 2.9
Many Thanks
Henry