aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authors_nakamoto <s_nakamoto@1a98c847-1fd6-4fd8-948a-caf3550aa51b>2010-08-15 21:05:16 +0000
committers_nakamoto <s_nakamoto@1a98c847-1fd6-4fd8-948a-caf3550aa51b>2010-08-15 21:05:16 +0000
commit4bd188c4383d6e614e18f79dc337fbabe8464c82 (patch)
tree4a557aef24b72923d52721c93d7a25b6f64f0d80
parent01cd2fdaf3ac6071304ceb80fb7436ac02b1059e (diff)
downloadbitcoin-4bd188c4383d6e614e18f79dc337fbabe8464c82.tar.xz
misc changes
git-svn-id: https://bitcoin.svn.sourceforge.net/svnroot/bitcoin/trunk@131 1a98c847-1fd6-4fd8-948a-caf3550aa51b
-rw-r--r--init.cpp2
-rw-r--r--main.cpp72
-rw-r--r--main.h3
-rw-r--r--script.cpp76
-rw-r--r--serialize.h4
-rw-r--r--setup.nsi6
-rw-r--r--sha256.cpp419
-rw-r--r--util.cpp8
8 files changed, 538 insertions, 52 deletions
diff --git a/init.cpp b/init.cpp
index b99237ec66..a2df01be84 100644
--- a/init.cpp
+++ b/init.cpp
@@ -373,6 +373,8 @@ bool AppInit2(int argc, char* argv[])
wxMessageBox(_("Invalid amount for -paytxfee=<amount>"), "Bitcoin");
return false;
}
+ if (nTransactionFee > 1 * COIN)
+ wxMessageBox(_("Warning: -paytxfee is set very high. This is the transaction fee you will pay if you send a transaction."), "Bitcoin");
}
//
diff --git a/main.cpp b/main.cpp
index 4b7146577c..3e7d2b9ecc 100644
--- a/main.cpp
+++ b/main.cpp
@@ -538,7 +538,7 @@ bool CTransaction::AcceptTransaction(CTxDB& txdb, bool fCheckInputs, bool* pfMis
// Check against previous transactions
map<uint256, CTxIndex> mapUnused;
int64 nFees = 0;
- if (fCheckInputs && !ConnectInputs(txdb, mapUnused, CDiskTxPos(1,1,1), 0, nFees, false, false))
+ if (fCheckInputs && !ConnectInputs(txdb, mapUnused, CDiskTxPos(1,1,1), pindexBest, nFees, false, false))
{
if (pfMissingInputs)
*pfMissingInputs = true;
@@ -744,7 +744,7 @@ void ResendWalletTransactions()
if (GetTime() < nNextTime)
return;
bool fFirst = (nNextTime == 0);
- nNextTime = GetTime() + GetRand(120 * 60);
+ nNextTime = GetTime() + GetRand(30 * 60);
if (fFirst)
return;
@@ -760,7 +760,7 @@ void ResendWalletTransactions()
CWalletTx& wtx = item.second;
// Don't rebroadcast until it's had plenty of time that
// it should have gotten in already by now.
- if (nTimeBestReceived - wtx.nTimeReceived > 60 * 60)
+ if (nTimeBestReceived - (int64)wtx.nTimeReceived > 5 * 60)
mapSorted.insert(make_pair(wtx.nTimeReceived, &wtx));
}
foreach(PAIRTYPE(const unsigned int, CWalletTx*)& item, mapSorted)
@@ -931,7 +931,8 @@ bool CTransaction::DisconnectInputs(CTxDB& txdb)
}
-bool CTransaction::ConnectInputs(CTxDB& txdb, map<uint256, CTxIndex>& mapTestPool, CDiskTxPos posThisTx, int nHeight, int64& nFees, bool fBlock, bool fMiner, int64 nMinFee)
+bool CTransaction::ConnectInputs(CTxDB& txdb, map<uint256, CTxIndex>& mapTestPool, CDiskTxPos posThisTx,
+ CBlockIndex* pindexBlock, int64& nFees, bool fBlock, bool fMiner, int64 nMinFee)
{
// Take over previous transactions' spent pointers
if (!IsCoinBase())
@@ -983,9 +984,9 @@ bool CTransaction::ConnectInputs(CTxDB& txdb, map<uint256, CTxIndex>& mapTestPoo
// If prev is coinbase, check that it's matured
if (txPrev.IsCoinBase())
- for (CBlockIndex* pindex = pindexBest; pindex && nBestHeight - pindex->nHeight < COINBASE_MATURITY-1; pindex = pindex->pprev)
+ for (CBlockIndex* pindex = pindexBlock; pindex && pindexBlock->nHeight - pindex->nHeight < COINBASE_MATURITY; pindex = pindex->pprev)
if (pindex->nBlockPos == txindex.pos.nBlockPos && pindex->nFile == txindex.pos.nFile)
- return error("ConnectInputs() : tried to spend coinbase at depth %d", nBestHeight - pindex->nHeight);
+ return error("ConnectInputs() : tried to spend coinbase at depth %d", pindexBlock->nHeight - pindex->nHeight);
// Verify signature
if (!VerifySignature(txPrev, *this, i))
@@ -1019,7 +1020,7 @@ bool CTransaction::ConnectInputs(CTxDB& txdb, map<uint256, CTxIndex>& mapTestPoo
if (fBlock)
{
// Add transaction to disk index
- if (!txdb.AddTxIndex(*this, posThisTx, nHeight))
+ if (!txdb.AddTxIndex(*this, posThisTx, pindexBlock->nHeight))
return error("ConnectInputs() : AddTxPos failed");
}
else if (fMiner)
@@ -1108,7 +1109,7 @@ bool CBlock::ConnectBlock(CTxDB& txdb, CBlockIndex* pindex)
CDiskTxPos posThisTx(pindex->nFile, pindex->nBlockPos, nTxPos);
nTxPos += ::GetSerializeSize(tx, SER_DISK);
- if (!tx.ConnectInputs(txdb, mapUnused, posThisTx, pindex->nHeight, nFees, true, false))
+ if (!tx.ConnectInputs(txdb, mapUnused, posThisTx, pindex, nFees, true, false))
return false;
}
@@ -1379,14 +1380,12 @@ bool CBlock::AcceptBlock()
return error("AcceptBlock() : incorrect proof of work");
// Check that the block chain matches the known block chain up to a checkpoint
- if (pindexPrev->nHeight+1 == 11111 && hash != uint256("0x0000000069e244f73d78e8fd29ba2fd2ed618bd6fa2ee92559f542fdb26e7c1d"))
- return error("AcceptBlock() : rejected by checkpoint lockin at 11111");
- if (pindexPrev->nHeight+1 == 33333 && hash != uint256("0x000000002dd5588a74784eaa7ab0507a18ad16a236e7b1ce69f00d7ddfb5d0a6"))
- return error("AcceptBlock() : rejected by checkpoint lockin at 33333");
- if (pindexPrev->nHeight+1 == 68555 && hash != uint256("0x00000000001e1b4903550a0b96e9a9405c8a95f387162e4944e8d9fbe501cd6a"))
- return error("AcceptBlock() : rejected by checkpoint lockin at 68555");
- if (pindexPrev->nHeight+1 == 70567 && hash != uint256("0x00000000006a49b14bcf27462068f1264c961f11fa2e0eddd2be0791e1d4124a"))
- return error("AcceptBlock() : rejected by checkpoint lockin at 70567");
+ if ((pindexPrev->nHeight+1 == 11111 && hash != uint256("0x0000000069e244f73d78e8fd29ba2fd2ed618bd6fa2ee92559f542fdb26e7c1d")) ||
+ (pindexPrev->nHeight+1 == 33333 && hash != uint256("0x000000002dd5588a74784eaa7ab0507a18ad16a236e7b1ce69f00d7ddfb5d0a6")) ||
+ (pindexPrev->nHeight+1 == 68555 && hash != uint256("0x00000000001e1b4903550a0b96e9a9405c8a95f387162e4944e8d9fbe501cd6a")) ||
+ (pindexPrev->nHeight+1 == 70567 && hash != uint256("0x00000000006a49b14bcf27462068f1264c961f11fa2e0eddd2be0791e1d4124a")) ||
+ (pindexPrev->nHeight+1 == 74000 && hash != uint256("0x0000000000573993a3c9e41ce34471c079dcf5f52a0e824a81e7f953b8661a20")))
+ return error("AcceptBlock() : rejected by checkpoint lockin");
// Write block to history file
if (!CheckDiskSpace(::GetSerializeSize(*this, SER_DISK)))
@@ -2577,6 +2576,8 @@ inline void SHA256Transform(void* pstate, void* pinput, const void* pinit)
CryptoPP::SHA256::Transform((CryptoPP::word32*)pstate, (CryptoPP::word32*)pinput);
}
+static const int NPAR = 32;
+extern void Double_BlockSHA256(const void* pin, void* pout, const void* pinit, unsigned int hash[8][NPAR], const void* init2);
@@ -2658,7 +2659,7 @@ void BitcoinMiner()
int64 nMinFee = tx.GetMinFee(nBlockSize);
map<uint256, CTxIndex> mapTestPoolTmp(mapTestPool);
- if (!tx.ConnectInputs(txdb, mapTestPoolTmp, CDiskTxPos(1,1,1), 0, nFees, false, true, nMinFee))
+ if (!tx.ConnectInputs(txdb, mapTestPoolTmp, CDiskTxPos(1,1,1), pindexPrev, nFees, false, true, nMinFee))
continue;
swap(mapTestPool, mapTestPoolTmp);
@@ -2719,14 +2720,40 @@ void BitcoinMiner()
//
// Search
//
+ bool f4WaySSE2 = mapArgs.count("-4way");
int64 nStart = GetTime();
uint256 hashTarget = CBigNum().SetCompact(pblock->nBits).getuint256();
uint256 hashbuf[2];
uint256& hash = *alignup<16>(hashbuf);
loop
{
- SHA256Transform(&tmp.hash1, (char*)&tmp.block + 64, &midstate);
- SHA256Transform(&hash, &tmp.hash1, pSHA256InitState);
+#ifdef FOURWAYSSE2
+ if (f4WaySSE2)
+ {
+ // tcatm's 4-way SSE2 SHA-256
+ tmp.block.nNonce += NPAR;
+ unsigned int thashbuf[9][NPAR];
+ unsigned int (&thash)[9][NPAR] = *alignup<16>(&thashbuf);
+ Double_BlockSHA256((char*)&tmp.block + 64, &tmp.hash1, &midstate, thash, pSHA256InitState);
+ ((unsigned short*)&hash)[14] = 0xffff;
+ for (int j = 0; j < NPAR; j++)
+ {
+ if (thash[7][j] == 0)
+ {
+ for (int i = 0; i < sizeof(hash)/4; i++)
+ ((unsigned int*)&hash)[i] = thash[i][j];
+ pblock->nNonce = ByteReverse(tmp.block.nNonce + j);
+ }
+ }
+ }
+ else
+#endif
+ {
+ // Crypto++ SHA-256
+ tmp.block.nNonce++;
+ SHA256Transform(&tmp.hash1, (char*)&tmp.block + 64, &midstate);
+ SHA256Transform(&hash, &tmp.hash1, pSHA256InitState);
+ }
if (((unsigned short*)&hash)[14] == 0)
{
@@ -2736,7 +2763,10 @@ void BitcoinMiner()
if (hash <= hashTarget)
{
- pblock->nNonce = ByteReverse(tmp.block.nNonce);
+#ifdef FOURWAYSSE2
+ if (!f4WaySSE2)
+#endif
+ pblock->nNonce = ByteReverse(tmp.block.nNonce);
assert(hash == pblock->GetHash());
//// debug print
@@ -2775,7 +2805,7 @@ void BitcoinMiner()
// Update nTime every few seconds
const unsigned int nMask = 0xffff;
const int nHashesPerCycle = (nMask+1);
- if ((++tmp.block.nNonce & nMask) == 0)
+ if ((tmp.block.nNonce & nMask) == 0)
{
// Meter hashes/sec
static int nCycleCounter;
diff --git a/main.h b/main.h
index 2ddac59d4f..8ed3ef228b 100644
--- a/main.h
+++ b/main.h
@@ -613,7 +613,8 @@ public:
bool DisconnectInputs(CTxDB& txdb);
- bool ConnectInputs(CTxDB& txdb, map<uint256, CTxIndex>& mapTestPool, CDiskTxPos posThisTx, int nHeight, int64& nFees, bool fBlock, bool fMiner, int64 nMinFee=0);
+ bool ConnectInputs(CTxDB& txdb, map<uint256, CTxIndex>& mapTestPool, CDiskTxPos posThisTx,
+ CBlockIndex* pindexBlock, int64& nFees, bool fBlock, bool fMiner, int64 nMinFee=0);
bool ClientConnectInputs();
bool AcceptTransaction(CTxDB& txdb, bool fCheckInputs=true, bool* pfMissingInputs=NULL);
diff --git a/script.cpp b/script.cpp
index a88f915900..b33becc113 100644
--- a/script.cpp
+++ b/script.cpp
@@ -16,12 +16,30 @@ static const CBigNum bnZero(0);
static const CBigNum bnOne(1);
static const CBigNum bnFalse(0);
static const CBigNum bnTrue(1);
-static const size_t nMaxNumSize = 258;
+static const size_t nMaxNumSize = 4;
+CBigNum CastToBigNum(const valtype& vch)
+{
+ if (vch.size() > nMaxNumSize)
+ throw runtime_error("CastToBigNum() : overflow");
+ // Get rid of extra leading zeros
+ return CBigNum(CBigNum(vch).getvch());
+}
+
bool CastToBool(const valtype& vch)
{
- return (CBigNum(vch) != bnZero);
+ for (int i = 0; i < vch.size(); i++)
+ {
+ if (vch[i] != 0)
+ {
+ // Can be negative zero
+ if (i == vch.size()-1 && vch[i] == 0x80)
+ return false;
+ return true;
+ }
+ }
+ return false;
}
void MakeSameSize(valtype& vch1, valtype& vch2)
@@ -68,11 +86,28 @@ bool EvalScript(vector<vector<unsigned char> >& stack, const CScript& script, co
valtype vchPushValue;
if (!script.GetOp(pc, opcode, vchPushValue))
return false;
- if (vchPushValue.size() > 5000)
+ if (vchPushValue.size() > 520)
return false;
if (opcode > OP_16 && nOpCount++ > 200)
return false;
+ if (opcode == OP_CAT ||
+ opcode == OP_SUBSTR ||
+ opcode == OP_LEFT ||
+ opcode == OP_RIGHT ||
+ opcode == OP_INVERT ||
+ opcode == OP_AND ||
+ opcode == OP_OR ||
+ opcode == OP_XOR ||
+ opcode == OP_2MUL ||
+ opcode == OP_2DIV ||
+ opcode == OP_MUL ||
+ opcode == OP_DIV ||
+ opcode == OP_MOD ||
+ opcode == OP_LSHIFT ||
+ opcode == OP_RSHIFT)
+ return false;
+
if (fExec && opcode <= OP_PUSHDATA4)
stack.push_back(vchPushValue);
else if (fExec || (OP_IF <= opcode && opcode <= OP_ENDIF))
@@ -332,7 +367,7 @@ bool EvalScript(vector<vector<unsigned char> >& stack, const CScript& script, co
// (xn ... x2 x1 x0 n - ... x2 x1 x0 xn)
if (stack.size() < 2)
return false;
- int n = CBigNum(stacktop(-1)).getint();
+ int n = CastToBigNum(stacktop(-1)).getint();
stack.pop_back();
if (n < 0 || n >= stack.size())
return false;
@@ -387,7 +422,7 @@ bool EvalScript(vector<vector<unsigned char> >& stack, const CScript& script, co
valtype& vch2 = stacktop(-1);
vch1.insert(vch1.end(), vch2.begin(), vch2.end());
stack.pop_back();
- if (stacktop(-1).size() > 5000)
+ if (stacktop(-1).size() > 520)
return false;
}
break;
@@ -398,8 +433,8 @@ bool EvalScript(vector<vector<unsigned char> >& stack, const CScript& script, co
if (stack.size() < 3)
return false;
valtype& vch = stacktop(-3);
- int nBegin = CBigNum(stacktop(-2)).getint();
- int nEnd = nBegin + CBigNum(stacktop(-1)).getint();
+ int nBegin = CastToBigNum(stacktop(-2)).getint();
+ int nEnd = nBegin + CastToBigNum(stacktop(-1)).getint();
if (nBegin < 0 || nEnd < nBegin)
return false;
if (nBegin > vch.size())
@@ -420,7 +455,7 @@ bool EvalScript(vector<vector<unsigned char> >& stack, const CScript& script, co
if (stack.size() < 2)
return false;
valtype& vch = stacktop(-2);
- int nSize = CBigNum(stacktop(-1)).getint();
+ int nSize = CastToBigNum(stacktop(-1)).getint();
if (nSize < 0)
return false;
if (nSize > vch.size())
@@ -531,9 +566,7 @@ bool EvalScript(vector<vector<unsigned char> >& stack, const CScript& script, co
// (in -- out)
if (stack.size() < 1)
return false;
- if (stacktop(-1).size() > nMaxNumSize)
- return false;
- CBigNum bn(stacktop(-1));
+ CBigNum bn = CastToBigNum(stacktop(-1));
switch (opcode)
{
case OP_1ADD: bn += bnOne; break;
@@ -572,11 +605,8 @@ bool EvalScript(vector<vector<unsigned char> >& stack, const CScript& script, co
// (x1 x2 -- out)
if (stack.size() < 2)
return false;
- if (stacktop(-2).size() > nMaxNumSize ||
- stacktop(-1).size() > nMaxNumSize)
- return false;
- CBigNum bn1(stacktop(-2));
- CBigNum bn2(stacktop(-1));
+ CBigNum bn1 = CastToBigNum(stacktop(-2));
+ CBigNum bn2 = CastToBigNum(stacktop(-1));
CBigNum bn;
switch (opcode)
{
@@ -646,13 +676,9 @@ bool EvalScript(vector<vector<unsigned char> >& stack, const CScript& script, co
// (x min max -- out)
if (stack.size() < 3)
return false;
- if (stacktop(-3).size() > nMaxNumSize ||
- stacktop(-2).size() > nMaxNumSize ||
- stacktop(-1).size() > nMaxNumSize)
- return false;
- CBigNum bn1(stacktop(-3));
- CBigNum bn2(stacktop(-2));
- CBigNum bn3(stacktop(-1));
+ CBigNum bn1 = CastToBigNum(stacktop(-3));
+ CBigNum bn2 = CastToBigNum(stacktop(-2));
+ CBigNum bn3 = CastToBigNum(stacktop(-1));
bool fValue = (bn2 <= bn1 && bn1 < bn3);
stack.pop_back();
stack.pop_back();
@@ -748,7 +774,7 @@ bool EvalScript(vector<vector<unsigned char> >& stack, const CScript& script, co
if (stack.size() < i)
return false;
- int nKeysCount = CBigNum(stacktop(-i)).getint();
+ int nKeysCount = CastToBigNum(stacktop(-i)).getint();
if (nKeysCount < 0)
return false;
int ikey = ++i;
@@ -756,7 +782,7 @@ bool EvalScript(vector<vector<unsigned char> >& stack, const CScript& script, co
if (stack.size() < i)
return false;
- int nSigsCount = CBigNum(stacktop(-i)).getint();
+ int nSigsCount = CastToBigNum(stacktop(-i)).getint();
if (nSigsCount < 0 || nSigsCount > nKeysCount)
return false;
int isig = ++i;
diff --git a/serialize.h b/serialize.h
index 0c60bb9e03..591aaa49e3 100644
--- a/serialize.h
+++ b/serialize.h
@@ -19,8 +19,8 @@ class CScript;
class CDataStream;
class CAutoFile;
-static const int VERSION = 308;
-static const char* pszSubVer = ".4";
+static const int VERSION = 309;
+static const char* pszSubVer = ".0";
diff --git a/setup.nsi b/setup.nsi
index ae871fddb0..da03820bba 100644
--- a/setup.nsi
+++ b/setup.nsi
@@ -7,7 +7,7 @@ RequestExecutionLevel highest
# General Symbol Definitions
!define REGKEY "SOFTWARE\$(^Name)"
-!define VERSION 0.3.8
+!define VERSION 0.3.9
!define COMPANY "Bitcoin project"
!define URL http://www.bitcoin.org/
@@ -42,12 +42,12 @@ Var StartMenuGroup
!insertmacro MUI_LANGUAGE English
# Installer attributes
-OutFile bitcoin-0.3.8-win32-setup.exe
+OutFile bitcoin-0.3.9-win32-setup.exe
InstallDir $PROGRAMFILES\Bitcoin
CRCCheck on
XPStyle on
ShowInstDetails show
-VIProductVersion 0.3.8.0
+VIProductVersion 0.3.9.0
VIAddVersionKey ProductName Bitcoin
VIAddVersionKey ProductVersion "${VERSION}"
VIAddVersionKey CompanyName "${COMPANY}"
diff --git a/sha256.cpp b/sha256.cpp
new file mode 100644
index 0000000000..bcab2b9639
--- /dev/null
+++ b/sha256.cpp
@@ -0,0 +1,419 @@
+// Copyright (c) 2010 Satoshi Nakamoto
+// Distributed under the MIT/X11 software license, see the accompanying
+// file license.txt or http://www.opensource.org/licenses/mit-license.php.
+
+#include <string.h>
+#include <assert.h>
+
+#include <xmmintrin.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define NPAR 32
+
+static const unsigned int sha256_consts[] = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, /* 0 */
+ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, /* 8 */
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, /* 16 */
+ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, /* 24 */
+ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, /* 32 */
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, /* 40 */
+ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, /* 48 */
+ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, /* 56 */
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+
+static inline __m128i Ch(const __m128i b, const __m128i c, const __m128i d) {
+ return (b & c) ^ (~b & d);
+}
+
+static inline __m128i Maj(const __m128i b, const __m128i c, const __m128i d) {
+ return (b & c) ^ (b & d) ^ (c & d);
+}
+
+static inline __m128i ROTR(__m128i x, const int n) {
+ return _mm_srli_epi32(x, n) | _mm_slli_epi32(x, 32 - n);
+}
+
+static inline __m128i SHR(__m128i x, const int n) {
+ return _mm_srli_epi32(x, n);
+}
+
+/* SHA256 Functions */
+#define BIGSIGMA0_256(x) (ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22))
+#define BIGSIGMA1_256(x) (ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25))
+#define SIGMA0_256(x) (ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3))
+#define SIGMA1_256(x) (ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10))
+
+static inline unsigned int store32(const __m128i x, int i) {
+ union { unsigned int ret[4]; __m128i x; } box;
+ box.x = x;
+ return box.ret[i];
+}
+
+static inline void store_epi32(const __m128i x, unsigned int *x0, unsigned int *x1, unsigned int *x2, unsigned int *x3) {
+ union { unsigned int ret[4]; __m128i x; } box;
+ box.x = x;
+ *x0 = box.ret[3]; *x1 = box.ret[2]; *x2 = box.ret[1]; *x3 = box.ret[0];
+}
+
+#define add4(x0, x1, x2, x3) _mm_add_epi32(_mm_add_epi32(_mm_add_epi32(x0, x1), x2), x3)
+#define add5(x0, x1, x2, x3, x4) _mm_add_epi32(add4(x0, x1, x2, x3), x4)
+
+#define SHA256ROUND(a, b, c, d, e, f, g, h, i, w) \
+ T1 = add5(h, BIGSIGMA1_256(e), Ch(e, f, g), _mm_set1_epi32(sha256_consts[i]), w); \
+d = _mm_add_epi32(d, T1); \
+h = _mm_add_epi32(T1, _mm_add_epi32(BIGSIGMA0_256(a), Maj(a, b, c)));
+
+static inline void dumpreg(__m128i x, char *msg) {
+ union { unsigned int ret[4]; __m128i x; } box;
+ box.x = x ;
+ printf("%s %08x %08x %08x %08x\n", msg, box.ret[0], box.ret[1], box.ret[2], box.ret[3]);
+}
+
+#if 1
+#define dumpstate(i) printf("%s: %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", \
+ __func__, store32(w0, i), store32(a, i), store32(b, i), store32(c, i), store32(d, i), store32(e, i), store32(f, i), store32(g, i), store32(h, i));
+#else
+#define dumpstate()
+#endif
+void Double_BlockSHA256(const void* pin, void* pad, const void *pre, unsigned int thash[9][NPAR], const void *init)
+{
+ unsigned int* In = (unsigned int*)pin;
+ unsigned int* Pad = (unsigned int*)pad;
+ unsigned int* hPre = (unsigned int*)pre;
+ unsigned int* hInit = (unsigned int*)init;
+ unsigned int i, j, k;
+
+ /* vectors used in calculation */
+ __m128i w0, w1, w2, w3, w4, w5, w6, w7;
+ __m128i w8, w9, w10, w11, w12, w13, w14, w15;
+ __m128i T1;
+ __m128i a, b, c, d, e, f, g, h;
+ __m128i nonce;
+
+ /* nonce offset for vector */
+ __m128i offset = _mm_set_epi32(0x00000003, 0x00000002, 0x00000001, 0x00000000);
+
+
+ for(k = 0; k<NPAR; k+=4) {
+ w0 = _mm_set1_epi32(In[0]);
+ w1 = _mm_set1_epi32(In[1]);
+ w2 = _mm_set1_epi32(In[2]);
+ //w3 = _mm_set1_epi32(In[3]); nonce will be later hacked into the hash
+ w4 = _mm_set1_epi32(In[4]);
+ w5 = _mm_set1_epi32(In[5]);
+ w6 = _mm_set1_epi32(In[6]);
+ w7 = _mm_set1_epi32(In[7]);
+ w8 = _mm_set1_epi32(In[8]);
+ w9 = _mm_set1_epi32(In[9]);
+ w10 = _mm_set1_epi32(In[10]);
+ w11 = _mm_set1_epi32(In[11]);
+ w12 = _mm_set1_epi32(In[12]);
+ w13 = _mm_set1_epi32(In[13]);
+ w14 = _mm_set1_epi32(In[14]);
+ w15 = _mm_set1_epi32(In[15]);
+
+ /* hack nonce into lowest byte of w3 */
+ nonce = _mm_set1_epi32(In[3]);
+ nonce = _mm_add_epi32(nonce, offset);
+ nonce = _mm_add_epi32(nonce, _mm_set1_epi32(k));
+ w3 = nonce;
+
+ a = _mm_set1_epi32(hPre[0]);
+ b = _mm_set1_epi32(hPre[1]);
+ c = _mm_set1_epi32(hPre[2]);
+ d = _mm_set1_epi32(hPre[3]);
+ e = _mm_set1_epi32(hPre[4]);
+ f = _mm_set1_epi32(hPre[5]);
+ g = _mm_set1_epi32(hPre[6]);
+ h = _mm_set1_epi32(hPre[7]);
+
+ SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
+ SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
+ SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
+ SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
+ SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
+ SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
+ SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
+ SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
+ SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
+ SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
+ SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
+ SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
+ SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
+ SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
+ SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
+ SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
+
+ w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
+ SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
+ w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
+ SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
+ w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
+ SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
+ w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
+ SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
+ w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
+ SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
+ w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
+ SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
+ w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
+ SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
+ w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
+ SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
+ w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
+ SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
+ w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
+ SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
+ w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
+ SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
+ w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
+ SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
+ w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
+ SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
+ w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
+ SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
+ w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
+ SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
+ w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
+ SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
+
+ w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
+ SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
+ w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
+ SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
+ w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
+ SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
+ w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
+ SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
+ w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
+ SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
+ w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
+ SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
+ w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
+ SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
+ w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
+ SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
+ w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
+ SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
+ w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
+ SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
+ w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
+ SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
+ w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
+ SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
+ w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
+ SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
+ w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
+ SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
+ w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
+ SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
+ w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
+ SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
+
+ w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
+ SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
+ w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
+ SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
+ w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
+ SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
+ w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
+ SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
+ w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
+ SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
+ w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
+ SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
+ w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
+ SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
+ w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
+ SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
+ w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
+ SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
+ w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
+ SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
+ w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
+ SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
+ w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
+ SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
+ w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
+ SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
+ w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
+ SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
+ w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
+ SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
+ w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
+ SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
+
+#define store_load(x, i, dest) \
+ T1 = _mm_set1_epi32((hPre)[i]); \
+ dest = _mm_add_epi32(T1, x);
+
+ store_load(a, 0, w0);
+ store_load(b, 1, w1);
+ store_load(c, 2, w2);
+ store_load(d, 3, w3);
+ store_load(e, 4, w4);
+ store_load(f, 5, w5);
+ store_load(g, 6, w6);
+ store_load(h, 7, w7);
+
+ w8 = _mm_set1_epi32(Pad[8]);
+ w9 = _mm_set1_epi32(Pad[9]);
+ w10 = _mm_set1_epi32(Pad[10]);
+ w11 = _mm_set1_epi32(Pad[11]);
+ w12 = _mm_set1_epi32(Pad[12]);
+ w13 = _mm_set1_epi32(Pad[13]);
+ w14 = _mm_set1_epi32(Pad[14]);
+ w15 = _mm_set1_epi32(Pad[15]);
+
+ a = _mm_set1_epi32(hInit[0]);
+ b = _mm_set1_epi32(hInit[1]);
+ c = _mm_set1_epi32(hInit[2]);
+ d = _mm_set1_epi32(hInit[3]);
+ e = _mm_set1_epi32(hInit[4]);
+ f = _mm_set1_epi32(hInit[5]);
+ g = _mm_set1_epi32(hInit[6]);
+ h = _mm_set1_epi32(hInit[7]);
+
+ SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
+ SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
+ SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
+ SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
+ SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
+ SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
+ SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
+ SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
+ SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
+ SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
+ SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
+ SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
+ SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
+ SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
+ SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
+ SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
+
+ w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
+ SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
+ w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
+ SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
+ w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
+ SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
+ w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
+ SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
+ w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
+ SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
+ w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
+ SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
+ w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
+ SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
+ w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
+ SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
+ w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
+ SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
+ w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
+ SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
+ w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
+ SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
+ w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
+ SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
+ w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
+ SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
+ w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
+ SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
+ w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
+ SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
+ w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
+ SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
+
+ w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
+ SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
+ w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
+ SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
+ w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
+ SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
+ w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
+ SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
+ w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
+ SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
+ w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
+ SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
+ w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
+ SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
+ w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
+ SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
+ w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
+ SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
+ w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
+ SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
+ w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
+ SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
+ w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
+ SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
+ w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
+ SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
+ w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
+ SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
+ w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
+ SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
+ w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
+ SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
+
+ w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
+ SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
+ w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
+ SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
+ w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
+ SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
+ w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
+ SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
+ w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
+ SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
+ w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
+ SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
+ w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
+ SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
+ w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
+ SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
+ w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
+ SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
+ w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
+ SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
+ w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
+ SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
+ w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
+ SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
+ w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
+ SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
+ w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
+ SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
+ w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
+ SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
+ w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
+ SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
+
+ /* store resulsts directly in thash */
+#define store_2(x,i) \
+ w0 = _mm_set1_epi32(hInit[i]); \
+ *(__m128i *)&(thash)[i][0+k] = _mm_add_epi32(w0, x);
+
+ store_2(a, 0);
+ store_2(b, 1);
+ store_2(c, 2);
+ store_2(d, 3);
+ store_2(e, 4);
+ store_2(f, 5);
+ store_2(g, 6);
+ store_2(h, 7);
+ *(__m128i *)&(thash)[8][0+k] = nonce;
+ }
+
+}
diff --git a/util.cpp b/util.cpp
index 7cb6600fd1..8f06e30870 100644
--- a/util.cpp
+++ b/util.cpp
@@ -19,6 +19,14 @@ bool fCommandLine = false;
+// Workaround for "multiple definition of `_tls_used'"
+// http://svn.boost.org/trac/boost/ticket/4258
+extern "C" void tss_cleanup_implemented() { }
+
+
+
+
+
// Init openssl library multithreading support
static boost::interprocess::interprocess_mutex** ppmutexOpenSSL;
void locking_callback(int mode, int i, const char* file, int line)