bmw_large.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626
  1. /* bmw_large.c */
  2. /*
  3. This file is part of the ARM-Crypto-Lib.
  4. Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de)
  5. This program is free software: you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation, either version 3 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. */
  16. /*
  17. * \file bmw_large.c
  18. * \author Daniel Otte
  19. * \email daniel.otte@rub.de
  20. * \date 2009-04-27
  21. * \license GPLv3 or later
  22. *
  23. */
  24. #include <stdint.h>
  25. #include <string.h>
  26. #include <crypto/bmw_large.h>
  27. #define SHL64(a,n) shiftl64(a,n)
  28. #define SHR64(a,n) shiftr64(a,n)
  29. #define ROTL64(a,n) rotl64(a,n)
  30. #define ROTR64(a,n) rotr64(a,n)
  31. #define TWEAK 1
  32. #define BUG24 0
  33. #define F0_HACK 2
  34. #define DEBUG 0
  35. #if DEBUG
  36. #include <crypto/cli.h>
  37. void ctx_dump(const bmw_large_ctx_t* ctx){
  38. uint8_t i;
  39. cli_putstr("\r\n==== ctx dump ====");
  40. for(i=0; i<16;++i){
  41. cli_putstr("\r\n h[");
  42. cli_hexdump(&i, 1);
  43. cli_putstr("] = ");
  44. cli_hexdump_rev(&(ctx->h[i]), 8);
  45. }
  46. cli_putstr("\r\n counter = ");
  47. cli_hexdump(&(ctx->counter), 4);
  48. }
  49. void dump_x(const uint64_t* q, uint8_t elements, char x){
  50. uint8_t i;
  51. cli_putstr("\r\n==== ");
  52. cli_putc(x);
  53. cli_putstr(" dump ====");
  54. for(i=0; i<elements;++i){
  55. cli_putstr("\r\n ");
  56. cli_putc(x);
  57. cli_putstr("[");
  58. cli_hexdump(&i, 1);
  59. cli_putstr("] = ");
  60. cli_hexdump_rev(&(q[i]), 8);
  61. }
  62. }
  63. #else
  64. #define ctx_dump(x)
  65. #define dump_x(a,b,c)
  66. #endif
  67. static
  68. uint64_t rotl64(uint64_t a, uint8_t r){
  69. return (a<<r)|(a>>(64-r));
  70. }
  71. static
  72. uint64_t rotr64(uint64_t a, uint8_t r){
  73. return (a>>r)|(a<<(64-r));
  74. }
  75. static
  76. uint64_t shiftl64(uint64_t a, uint8_t r){
  77. return (a<<r);
  78. }
  79. static
  80. uint64_t shiftr64(uint64_t a, uint8_t r){
  81. return (a>>r);
  82. }
  83. static
  84. uint64_t bmw_large_s0(uint64_t x){
  85. uint64_t r;
  86. r = SHR64(x, 1)
  87. ^ SHL64(x, 3)
  88. ^ ROTL64(x, 4)
  89. ^ ROTR64(x, 64-37);
  90. return r;
  91. }
  92. static
  93. uint64_t bmw_large_s1(uint64_t x){
  94. uint64_t r;
  95. r = SHR64(x, 1)
  96. ^ SHL64(x, 2)
  97. ^ ROTL64(x,13)
  98. ^ ROTR64(x,64-43);
  99. return r;
  100. }
  101. static
  102. uint64_t bmw_large_s2(uint64_t x){
  103. uint64_t r;
  104. r = SHR64(x, 2)
  105. ^ SHL64(x, 1)
  106. ^ ROTL64(x, 19)
  107. ^ ROTR64(x, 64-53);
  108. return r;
  109. }
  110. static
  111. uint64_t bmw_large_s3(uint64_t x){
  112. uint64_t r;
  113. r = SHR64(x, 2)
  114. ^ SHL64(x, 2)
  115. ^ ROTL64(x, 28)
  116. ^ ROTR64(x, 64-59);
  117. return r;
  118. }
  119. static
  120. uint64_t bmw_large_s4(uint64_t x){
  121. uint64_t r;
  122. r = SHR64(x, 1)
  123. ^ x;
  124. return r;
  125. }
  126. static
  127. uint64_t bmw_large_s5(uint64_t x){
  128. uint64_t r;
  129. r = SHR64(x, 2)
  130. ^ x;
  131. return r;
  132. }
  133. static
  134. uint64_t bmw_large_r1(uint64_t x){
  135. uint64_t r;
  136. r = ROTL64(x, 5);
  137. return r;
  138. }
  139. static
  140. uint64_t bmw_large_r2(uint64_t x){
  141. uint64_t r;
  142. r = ROTL64(x, 11);
  143. return r;
  144. }
  145. static
  146. uint64_t bmw_large_r3(uint64_t x){
  147. uint64_t r;
  148. r = ROTL64(x, 27);
  149. return r;
  150. }
  151. static
  152. uint64_t bmw_large_r4(uint64_t x){
  153. uint64_t r;
  154. r = ROTL64(x, 32);
  155. return r;
  156. }
  157. static
  158. uint64_t bmw_large_r5(uint64_t x){
  159. uint64_t r;
  160. r = ROTR64(x, 64-37);
  161. return r;
  162. }
  163. static
  164. uint64_t bmw_large_r6(uint64_t x){
  165. uint64_t r;
  166. r = ROTR64(x, 64-43);
  167. return r;
  168. }
  169. static
  170. uint64_t bmw_large_r7(uint64_t x){
  171. uint64_t r;
  172. r = ROTR64(x, 64-53);
  173. return r;
  174. }
  175. /*
  176. #define K 0x0555555555555555LL
  177. #define MASK 0xFFFFFFFFFFFFFFFFLL
  178. static
  179. uint64_t k_lut[] PROGMEM = {
  180. 16LL*K, 17LL*K, 18LL*K, 19LL*K,
  181. 20LL*K, 21LL*K, 22LL*K, 23LL*K,
  182. 24LL*K, 25LL*K, 26LL*K, 27LL*K,
  183. 28LL*K, 29LL*K, 30LL*K, 31LL*K };
  184. */
  185. /* the same as above but precomputed to avoid compiler warnings */
  186. static const
  187. uint64_t k_lut[] = {
  188. 0x5555555555555550LL, 0x5aaaaaaaaaaaaaa5LL, 0x5ffffffffffffffaLL,
  189. 0x655555555555554fLL, 0x6aaaaaaaaaaaaaa4LL, 0x6ffffffffffffff9LL,
  190. 0x755555555555554eLL, 0x7aaaaaaaaaaaaaa3LL, 0x7ffffffffffffff8LL,
  191. 0x855555555555554dLL, 0x8aaaaaaaaaaaaaa2LL, 0x8ffffffffffffff7LL,
  192. 0x955555555555554cLL, 0x9aaaaaaaaaaaaaa1LL, 0x9ffffffffffffff6LL,
  193. 0xa55555555555554bLL };
  194. static
  195. uint64_t bmw_large_expand1(uint8_t j, const uint64_t* q, const void* m, const void* h){
  196. uint64_t(*s[])(uint64_t) = {bmw_large_s1, bmw_large_s2, bmw_large_s3, bmw_large_s0};
  197. uint64_t a = 0;
  198. union{
  199. uint64_t v64;
  200. uint32_t v32[2];
  201. } r;
  202. uint8_t i;
  203. /* r = 0x0555555555555555LL*(j+16); */
  204. r.v64 = k_lut[j];
  205. for(i=0; i<16; ++i){
  206. a += s[i%4](q[j+i]);
  207. }
  208. #if TWEAK
  209. a += ( ROTL64(((uint64_t*)m)[(j)&0xf], ((j+ 0)&0xf)+1)
  210. + ROTL64(((uint64_t*)m)[(j+3)&0xf], ((j+ 3)&0xf)+1)
  211. + r.v64
  212. - ROTL64(((uint64_t*)m)[(j+10)&0xf],((j+10)&0xf)+1)
  213. ) ^ ((uint64_t*)h)[(j+7)&0xf];
  214. #else
  215. a += ((uint64_t*)m)[j&0xf];
  216. a += ((uint64_t*)m)[(j+3)&0xf];
  217. a -= ((uint64_t*)m)[(j+10)&0xf];
  218. a += r.v64;
  219. #endif
  220. return a;
  221. }
  222. static
  223. uint64_t bmw_large_expand2(uint8_t j, const uint64_t* q, const void* m, const void* h){
  224. uint64_t(*rf[])(uint64_t) = {bmw_large_r1, bmw_large_r2, bmw_large_r3,
  225. bmw_large_r4, bmw_large_r5, bmw_large_r6,
  226. bmw_large_r7};
  227. uint64_t a=0;
  228. union{
  229. uint64_t v64;
  230. uint32_t v32[2];
  231. } r;
  232. uint8_t i;
  233. /* r = 0x0555555555555555LL*(j+16); */
  234. r.v64 = k_lut[j];
  235. for(i=0; i<14; i+=2){
  236. a += q[j+i];
  237. }
  238. for(i=0; i<14; i+=2){
  239. a += rf[i/2](q[j+i+1]);
  240. }
  241. #if TWEAK
  242. a += bmw_large_s4(q[j+14]);
  243. a += bmw_large_s5(q[j+15]);
  244. #else
  245. a += bmw_large_s5(q[j+14]);
  246. a += bmw_large_s4(q[j+15]);
  247. #endif
  248. #if TWEAK
  249. /*
  250. if(j==(22-16)){
  251. uint64_t t;
  252. cli_putstr("\n+++++++++ expand_2 ++++++++++++");
  253. dump_x(&a, 1, 'a');
  254. dump_x(&r, 1, 'r');
  255. t=ROTL64(((uint64_t*)m)[j], ((j+ 0)&0xf)+1);
  256. dump_x(&t, 1, '0');
  257. t=ROTL64(((uint64_t*)m)[j], ((j+ 3)&0xf)+1);
  258. dump_x(&t, 1, '0');
  259. t=ROTL64(((uint64_t*)m)[j], ((j+ 0)&0xf)+1);
  260. dump_x(&t, 1, '0');
  261. }
  262. */
  263. a += ( ROTL64(((uint64_t*)m)[(j)&0xf], ((j+ 0)&0xf)+1)
  264. + ROTL64(((uint64_t*)m)[(j+3)&0xf], ((j+ 3)&0xf)+1)
  265. + r.v64
  266. - ROTL64(((uint64_t*)m)[(j+10)&0xf],((j+10)&0xf)+1)
  267. ) ^ ((uint64_t*)h)[(j+7)&0xf];
  268. #else
  269. a += ((uint64_t*)m)[j&0xf];
  270. a += ((uint64_t*)m)[(j+3)&0xf];
  271. a -= ((uint64_t*)m)[(j+10)&0xf];
  272. a += r.v64;
  273. #endif
  274. return a;
  275. }
  276. #if F0_HACK==2
  277. /* to understand this implementation take a look at f0-opt-table.txt */
  278. static uint16_t hack_table[5] = { 0x0311, 0xDDB3, 0x2A79, 0x07AA, 0x51C2 };
  279. static uint8_t offset_table[5] = { 4+16, 6+16, 9+16, 12+16, 13+16 };
  280. static
  281. void bmw_large_f0(uint64_t* q, const uint64_t* h, const void* m){
  282. uint16_t hack_reg;
  283. uint8_t i,j,c;
  284. uint64_t(*s[])(uint64_t)={ bmw_large_s0, bmw_large_s1, bmw_large_s2,
  285. bmw_large_s3, bmw_large_s4 };
  286. for(i=0; i<16; ++i){
  287. ((uint64_t*)h)[i] ^= ((uint64_t*)m)[i];
  288. }
  289. dump_x(h, 16, 'T');
  290. memset(q, 0, 8*16);
  291. c=4;
  292. do{
  293. i=15;
  294. j = offset_table[c];
  295. hack_reg = hack_table[c];
  296. do{
  297. if(hack_reg&1){
  298. q[i]-= h[j&15];
  299. }else{
  300. q[i]+= h[j&15];
  301. }
  302. --j;
  303. hack_reg>>= 1;
  304. }while(i--!=0);
  305. }while(c--!=0);
  306. dump_x(q, 16, 'W');
  307. for(i=0; i<16; ++i){
  308. q[i] = s[i%5](q[i]);
  309. }
  310. #if TWEAK
  311. for(i=0; i<16; ++i){
  312. ((uint64_t*)h)[i] ^= ((uint64_t*)m)[i];
  313. }
  314. for(i=0; i<16; ++i){
  315. q[i] += h[(i+1)&0xf];
  316. }
  317. #endif /* TWEAK */
  318. }
  319. #endif /* F0_HACK==2 */
  320. #if F0_HACK==1
  321. static
  322. uint8_t f0_lut[] PROGMEM ={
  323. 5<<1, ( 7<<1)+1, (10<<1)+0, (13<<1)+0, (14<<1)+0,
  324. 6<<1, ( 8<<1)+1, (11<<1)+0, (14<<1)+0, (15<<1)+1,
  325. 0<<1, ( 7<<1)+0, ( 9<<1)+0, (12<<1)+1, (15<<1)+0,
  326. 0<<1, ( 1<<1)+1, ( 8<<1)+0, (10<<1)+1, (13<<1)+0,
  327. 1<<1, ( 2<<1)+0, ( 9<<1)+0, (11<<1)+1, (14<<1)+1,
  328. 3<<1, ( 2<<1)+1, (10<<1)+0, (12<<1)+1, (15<<1)+0,
  329. 4<<1, ( 0<<1)+1, ( 3<<1)+1, (11<<1)+1, (13<<1)+0,
  330. 1<<1, ( 4<<1)+1, ( 5<<1)+1, (12<<1)+1, (14<<1)+1,
  331. 2<<1, ( 5<<1)+1, ( 6<<1)+1, (13<<1)+0, (15<<1)+1,
  332. 0<<1, ( 3<<1)+1, ( 6<<1)+0, ( 7<<1)+1, (14<<1)+0,
  333. 8<<1, ( 1<<1)+1, ( 4<<1)+1, ( 7<<1)+1, (15<<1)+0,
  334. 8<<1, ( 0<<1)+1, ( 2<<1)+1, ( 5<<1)+1, ( 9<<1)+0,
  335. 1<<1, ( 3<<1)+0, ( 6<<1)+1, ( 9<<1)+1, (10<<1)+0,
  336. 2<<1, ( 4<<1)+0, ( 7<<1)+0, (10<<1)+0, (11<<1)+0,
  337. 3<<1, ( 5<<1)+1, ( 8<<1)+0, (11<<1)+1, (12<<1)+1,
  338. 12<<1, ( 4<<1)+1, ( 6<<1)+1, ( 9<<1)+1, (13<<1)+0
  339. };
  340. static
  341. void bmw_large_f0(uint64_t* q, const uint64_t* h, const void* m){
  342. uint8_t i,j=-1,v,sign,l=0;
  343. uint64_t(*s[])(uint64_t)={ bmw_large_s0, bmw_large_s1, bmw_large_s2,
  344. bmw_large_s3, bmw_large_s4 };
  345. for(i=0; i<16; ++i){
  346. ((uint64_t*)h)[i] ^= ((uint64_t*)m)[i];
  347. }
  348. dump_x(h, 16, 'T');
  349. // memset(q, 0, 4*16);
  350. for(i=0; i<5*16; ++i){
  351. v = pgm_read_byte(f0_lut+i);
  352. sign = v&1;
  353. v >>=1;
  354. if(i==l){
  355. j++;
  356. l+=5;
  357. q[j] = h[v];
  358. continue;
  359. }
  360. if(sign){
  361. q[j] -= h[v];
  362. }else{
  363. q[j] += h[v];
  364. }
  365. }
  366. dump_x(q, 16, 'W');
  367. for(i=0; i<16; ++i){
  368. q[i] = s[i%5](q[i]);
  369. }
  370. #if TWEAK
  371. for(i=0; i<16; ++i){
  372. ((uint64_t*)h)[i] ^= ((uint64_t*)m)[i];
  373. }
  374. for(i=0; i<16; ++i){
  375. q[i] += h[(i+1)&0xf];
  376. }
  377. #endif /* TWEAK */
  378. }
  379. #endif /* F0_HACK==1 */
  380. #if F0_HACK==0
  381. static
  382. void bmw_large_f0(uint64_t* q, const uint64_t* h, const void* m){
  383. uint8_t i;
  384. uint64_t(*s[])(uint64_t)={ bmw_large_s0, bmw_large_s1, bmw_large_s2,
  385. bmw_large_s3, bmw_large_s4 };
  386. for(i=0; i<16; ++i){
  387. ((uint64_t*)h)[i] ^= ((uint64_t*)m)[i];
  388. }
  389. // dump_x(t, 16, 'T');
  390. q[ 0] = (h[ 5] - h[ 7] + h[10] + h[13] + h[14]);
  391. q[ 1] = (h[ 6] - h[ 8] + h[11] + h[14] - h[15]);
  392. q[ 2] = (h[ 0] + h[ 7] + h[ 9] - h[12] + h[15]);
  393. q[ 3] = (h[ 0] - h[ 1] + h[ 8] - h[10] + h[13]);
  394. q[ 4] = (h[ 1] + h[ 2] + h[ 9] - h[11] - h[14]);
  395. q[ 5] = (h[ 3] - h[ 2] + h[10] - h[12] + h[15]);
  396. q[ 6] = (h[ 4] - h[ 0] - h[ 3] - h[11] + h[13]);
  397. q[ 7] = (h[ 1] - h[ 4] - h[ 5] - h[12] - h[14]);
  398. q[ 8] = (h[ 2] - h[ 5] - h[ 6] + h[13] - h[15]);
  399. q[ 9] = (h[ 0] - h[ 3] + h[ 6] - h[ 7] + h[14]);
  400. q[10] = (h[ 8] - h[ 1] - h[ 4] - h[ 7] + h[15]);
  401. q[11] = (h[ 8] - h[ 0] - h[ 2] - h[ 5] + h[ 9]);
  402. q[12] = (h[ 1] + h[ 3] - h[ 6] - h[ 9] + h[10]);
  403. q[13] = (h[ 2] + h[ 4] + h[ 7] + h[10] + h[11]);
  404. q[14] = (h[ 3] - h[ 5] + h[ 8] - h[11] - h[12]);
  405. q[15] = (h[12] - h[ 4] - h[ 6] - h[ 9] + h[13]);
  406. dump_x(q, 16, 'W');
  407. for(i=0; i<16; ++i){
  408. q[i] = s[i%5](q[i]);
  409. }
  410. #if TWEAK
  411. for(i=0; i<16; ++i){
  412. ((uint64_t*)h)[i] ^= ((uint64_t*)m)[i];
  413. }
  414. for(i=0; i<16; ++i){
  415. q[i] += h[(i+1)&0xf];
  416. }
  417. #endif /* TWEAK */
  418. }
  419. #endif /* F0_HACK==0 */
  420. static
  421. void bmw_large_f1(uint64_t* q, const void* m, const uint64_t* h){
  422. uint8_t i;
  423. q[16] = bmw_large_expand1(0, q, m, h);
  424. q[17] = bmw_large_expand1(1, q, m, h);
  425. for(i=2; i<16; ++i){
  426. q[16+i] = bmw_large_expand2(i, q, m, h);
  427. }
  428. }
  429. static
  430. void bmw_large_f2(uint64_t* h, const uint64_t* q, const void* m){
  431. uint64_t xl=0, xh;
  432. uint8_t i;
  433. for(i=16;i<24;++i){
  434. xl ^= q[i];
  435. }
  436. xh = xl;
  437. for(i=24;i<32;++i){
  438. xh ^= q[i];
  439. }
  440. #if DEBUG
  441. cli_putstr("\r\n XL = ");
  442. cli_hexdump_rev(&xl, 4);
  443. cli_putstr("\r\n XH = ");
  444. cli_hexdump_rev(&xh, 4);
  445. #endif
  446. memcpy(h, m, 16*8);
  447. h[0] ^= SHL64(xh, 5) ^ SHR64(q[16], 5);
  448. h[1] ^= SHR64(xh, 7) ^ SHL64(q[17], 8);
  449. h[2] ^= SHR64(xh, 5) ^ SHL64(q[18], 5);
  450. h[3] ^= SHR64(xh, 1) ^ SHL64(q[19], 5);
  451. h[4] ^= SHR64(xh, 3) ^ q[20];
  452. h[5] ^= SHL64(xh, 6) ^ SHR64(q[21], 6);
  453. h[6] ^= SHR64(xh, 4) ^ SHL64(q[22], 6);
  454. h[7] ^= SHR64(xh,11) ^ SHL64(q[23], 2);
  455. for(i=0; i<8; ++i){
  456. h[i] += xl ^ q[24+i] ^ q[i];
  457. }
  458. for(i=0; i<8; ++i){
  459. h[8+i] ^= xh ^ q[24+i];
  460. h[8+i] += ROTL64(h[(4+i)%8],i+9);
  461. }
  462. h[ 8] += SHL64(xl, 8) ^ q[23] ^ q[ 8];
  463. h[ 9] += SHR64(xl, 6) ^ q[16] ^ q[ 9];
  464. h[10] += SHL64(xl, 6) ^ q[17] ^ q[10];
  465. h[11] += SHL64(xl, 4) ^ q[18] ^ q[11];
  466. h[12] += SHR64(xl, 3) ^ q[19] ^ q[12];
  467. h[13] += SHR64(xl, 4) ^ q[20] ^ q[13];
  468. h[14] += SHR64(xl, 7) ^ q[21] ^ q[14];
  469. h[15] += SHR64(xl, 2) ^ q[22] ^ q[15];
  470. }
  471. void bmw_large_nextBlock(bmw_large_ctx_t* ctx, const void* block){
  472. uint64_t q[32];
  473. dump_x(block, 16, 'M');
  474. bmw_large_f0(q, ctx->h, block);
  475. dump_x(q, 16, 'Q');
  476. bmw_large_f1(q, block, ctx->h);
  477. dump_x(q, 32, 'Q');
  478. bmw_large_f2(ctx->h, q, block);
  479. ctx->counter += 1;
  480. ctx_dump(ctx);
  481. }
  482. void bmw_large_lastBlock(bmw_large_ctx_t* ctx, const void* block, uint16_t length_b){
  483. union {
  484. uint8_t v8[128];
  485. uint64_t v64[ 16];
  486. } buffer;
  487. while(length_b >= BMW_LARGE_BLOCKSIZE){
  488. bmw_large_nextBlock(ctx, block);
  489. length_b -= BMW_LARGE_BLOCKSIZE;
  490. block = (uint8_t*)block + BMW_LARGE_BLOCKSIZE_B;
  491. }
  492. memset(buffer.v8, 0, 128);
  493. memcpy(buffer.v8, block, (length_b+7)/8);
  494. buffer.v8[length_b>>3] |= 0x80 >> (length_b&0x07);
  495. if(length_b+1>128*8-64){
  496. bmw_large_nextBlock(ctx, buffer.v8);
  497. memset(buffer.v8, 0, 128-8);
  498. ctx->counter -= 1;
  499. }
  500. buffer.v64[15] = (uint64_t)(ctx->counter*1024LL)+(uint64_t)length_b;
  501. bmw_large_nextBlock(ctx, buffer.v8);
  502. #if TWEAK
  503. uint8_t i;
  504. uint64_t q[32];
  505. memset(buffer.v8, 0xaa, 128);
  506. for(i=0; i<16; ++i){
  507. buffer.v8[8*i] = i + 0xa0;
  508. }
  509. bmw_large_f0(q, buffer.v64, ctx->h);
  510. bmw_large_f1(q, ctx->h, buffer.v64);
  511. bmw_large_f2(buffer.v64, q, ctx->h);
  512. memcpy(ctx->h, buffer.v8, 128);
  513. #endif
  514. }
  515. void bmw384_init(bmw384_ctx_t* ctx){
  516. uint8_t i;
  517. ctx->h[0] = 0x0001020304050607LL;
  518. for(i=1; i<16; ++i){
  519. ctx->h[i] = ctx->h[i-1]+ 0x0808080808080808LL;
  520. }
  521. #if BUG24
  522. ctx->h[6] = 0x3031323324353637LL;
  523. #endif
  524. ctx->counter=0;
  525. ctx_dump(ctx);
  526. }
  527. void bmw512_init(bmw512_ctx_t* ctx){
  528. uint8_t i;
  529. ctx->h[0] = 0x8081828384858687LL;
  530. for(i=1; i<16; ++i){
  531. ctx->h[i] = ctx->h[i-1]+ 0x0808080808080808LL;
  532. }
  533. ctx->counter=0;
  534. ctx_dump(ctx);
  535. }
  536. void bmw384_nextBlock(bmw384_ctx_t* ctx, const void* block){
  537. bmw_large_nextBlock(ctx, block);
  538. }
  539. void bmw512_nextBlock(bmw512_ctx_t* ctx, const void* block){
  540. bmw_large_nextBlock(ctx, block);
  541. }
  542. void bmw384_lastBlock(bmw384_ctx_t* ctx, const void* block, uint16_t length_b){
  543. bmw_large_lastBlock(ctx, block, length_b);
  544. }
  545. void bmw512_lastBlock(bmw512_ctx_t* ctx, const void* block, uint16_t length_b){
  546. bmw_large_lastBlock(ctx, block, length_b);
  547. }
  548. void bmw384_ctx2hash(void* dest, const bmw384_ctx_t* ctx){
  549. memcpy(dest, &(ctx->h[10]), 384/8);
  550. }
  551. void bmw512_ctx2hash(void* dest, const bmw512_ctx_t* ctx){
  552. memcpy(dest, &(ctx->h[8]), 512/8);
  553. }
  554. void bmw384(void* dest, const void* msg, uint32_t length_b){
  555. bmw_large_ctx_t ctx;
  556. bmw384_init(&ctx);
  557. while(length_b>=BMW_LARGE_BLOCKSIZE){
  558. bmw_large_nextBlock(&ctx, msg);
  559. length_b -= BMW_LARGE_BLOCKSIZE;
  560. msg = (uint8_t*)msg + BMW_LARGE_BLOCKSIZE_B;
  561. }
  562. bmw_large_lastBlock(&ctx, msg, length_b);
  563. bmw384_ctx2hash(dest, &ctx);
  564. }
  565. void bmw512(void* dest, const void* msg, uint32_t length_b){
  566. bmw_large_ctx_t ctx;
  567. bmw512_init(&ctx);
  568. while(length_b>=BMW_LARGE_BLOCKSIZE){
  569. bmw_large_nextBlock(&ctx, msg);
  570. length_b -= BMW_LARGE_BLOCKSIZE;
  571. msg = (uint8_t*)msg + BMW_LARGE_BLOCKSIZE_B;
  572. }
  573. bmw_large_lastBlock(&ctx, msg, length_b);
  574. bmw512_ctx2hash(dest, &ctx);
  575. }