[A51] Query on Kraken

Jan Hrach jenda at yakumo.hrach.eu
Thu Apr 7 15:39:48 CEST 2016


> and also why are you looping 25 + 8 + 8 times in Process() function

It looks like he is computing 4 iterations of A5/1 in each cycle, and the tables were generated with 100 dummy clockings (25*4 = 100) and then you need 64 bits of keystream ((8+8)*4).

> Can you take some time out and let me know how you are using these masks for keysearching

This is some optimization how to compute 4 iterations in one cycle.
It would be interesting to test whether it makes sense on current SIMD hardware, or if bitslicing is faster.


On 7.4.2016 14:05, Ajay Fuloria wrote:
> Hi!!
> 
> I went through the kraken code and I must commend you for your work. I was trying to understand the code so that I came make use of it.
> 
> I am facing problem in understanding the CalcTable() function in A5cpu.cpp ... You create a mask lookup in this function. Can you take some time out and let me know how you are using these masks for keysearching and also why are you looping 25 + 8 + 8 times in Process() function.
> 
> Thanks and regards,
> 
> Ajay
> 
> 
> For your quick reference the code of these two functions is as below :
> 
> 
> void A5Cpu::CalcTables(void)
> {
>    /* Calculate clocking table */
>     for(int i=0; i< 16 ; i++) {
>         for(int j=0; j< 16 ; j++) {
>             for(int k=0; k< 16 ; k++) {
>                 /* Copy input */
>                 int m1 = i;
>                 int m2 = j;
>                 int m3 = k;
>                 /* Generate masks */
>                 int cm1 = 0;
>                 int cm2 = 0;
>                 int cm3 = 0;
> /* Counter R2 */
> int r2count = 0;
>                 for (int l = 0; l < 4 ; l++ ) {
>                     cm1 = cm1 << 1;
>                     cm2 = cm2 << 1;
>                     cm3 = cm3 << 1;
>                     int maj = ((m1>>3)+(m2>>3)+(m3>>3))>>1;
>                     if ((m1>>3)==maj) {
>                         m1 = (m1<<1)&0x0f;
>                         cm1 |= 0x01;
>                     }
>                     if ((m2>>3)==maj) {
>                         m2 = (m2<<1)&0x0f;
>                         cm2 |= 0x01;
> r2count++;
>                     }
>                     if ((m3>>3)==maj) {
>                         m3 = (m3<<1)&0x0f;
>                         cm3 |= 0x01;
>                     }
>                 }
>                 // printf( "%x %x %x -> %x:%x:%x\n", i,j,k, cm1, cm2, cm3);
>                 int index = i*16*16+j*16+k;
>                 mClockMask[index] = (r2count<<12) | (cm1<<8) | (cm2<<4) | cm3;
>             }
>         }
>     }
> 
>     /* Calculate 111000 + clock mask table */
>     for (int i=0; i < 64 ; i++ ) {
>         for(int j=0; j<16; j++) {
>             int count = PopcountNibble(j);
>             int feedback = 0;
>             int data = i;
>             for (int k=0; k<count; k++) {
>                 feedback = feedback << 1;
>                 int v = (data>>5) ^ (data>>4) ^ (data>>3);
>                 data = data << 1;
>                 feedback ^= (v&0x01);
>             }
>             data = i;
>             int mask = j;
>             int output = 0;
>             for (int k=0; k<4; k++) {
>                 output = (output<<1) ^ ((data>>5)&0x01);
>                 if (mask&0x08) {
>                     data = data << 1;
>                 }
>                 mask = mask << 1;
>             }
>             int index = i * 16 + j; 
>             mTable6bit[index] = (feedback<<4) | output;
>             // printf("%02x:%x -> %x %x\n", i,j,feedback, output);
>         }
>     }
> 
>     /* Calculate 11000 + clock mask table */
>     for (int i=0; i < 32 ; i++ ) {
>         for(int j=0; j<16; j++) {
>             int count = PopcountNibble(j);
>             int feedback = 0;
>             int data = i;
>             for (int k=0; k<count; k++) {
>                 feedback = feedback << 1;
>                 int v = (data>>4) ^ (data>>3);
>                 data = data << 1;
>                 feedback ^= (v&0x01);
>             }
>             data = i;
>             int mask = j;
>             int output = 0;
>             for (int k=0; k<4; k++) {
>                 output = (output<<1) ^ ((data>>4)&0x01);
>                 if (mask&0x08) {
>                     data = data << 1;
>                 }
>                 mask = mask << 1;
>             }
>             int index = i * 16 + j; 
>             mTable5bit[index] = (feedback<<4) | output;
>             // printf("%02x:%x -> %x %x\n", i,j,feedback, output);
>         }
>     }
> 
>     /* Calculate 1000 + clock mask table */
>     for (int i=0; i < 16 ; i++ ) {
>         for(int j=0; j<16; j++) {
>             int count = PopcountNibble(j);
>             int feedback = 0;
>             int data = i;
>             for (int k=0; k<count; k++) {
>                 feedback = feedback << 1;
>                 int v = (data>>3);
>                 data = data << 1;
>                 feedback ^= (v&0x01);
>             }
>             int index = i * 16 + j;
>             mTable4bit[index] = (count<<4)|feedback;
>             // printf("%02x:%x -> %x\n", i,j,feedback );
>         }
>     }
> }
> 
> 
> 
> ---------------------------------------------------------
> 
> 
> void A5Cpu::Process(void)
> {
>   bool active = false;
>   struct timeval tStart;
>   struct timeval tEnd;
> 
>   uint64_t start_point;
>   uint64_t target;
>   uint64_t start_point_r;
>   int32_t  start_round;
>   int32_t  stop_round;
>   uint32_t advance;
>   const uint32_t* RFtable;
>   void* context;
> 
>   for(;;) {
>     if (!mRunning) break;
> 
>     /* Get input */
>     sem_wait(&mMutex);
>     if (mInputStart.size()) {
>       start_point = mInputStart.front();
>       mInputStart.pop_front();
>       target = mInputTarget.front();
>       mInputTarget.pop_front();
>       start_point_r = ReverseBits(start_point);
>       start_round = mInputRound.front();
>       mInputRound.pop_front();
>       stop_round = mInputRoundStop.front();
>       mInputRoundStop.pop_front();
>       advance = mInputAdvance.front();
>       mInputAdvance.pop_front();
>       context =  mInputContext.front();
>       mInputContext.pop_front();
>       map< uint32_t, class Advance* >::iterator it = mAdvances.find(advance);
>       if (it==mAdvances.end()) {
>           class Advance* adv = new Advance(advance, mMaxRound);
>           mAdvances[advance] = adv;
>           RFtable = adv->getRFtable();
>       } else {
>           RFtable = (*it).second->getRFtable();
>       }
>       active = true;
>       // printf("Insert\n");
>     }
>     sem_post(&mMutex);
> 
>     if (!active) {
>       /* Don't use CPU while idle */
>       usleep(250);
>       continue;
>     }
> 
>     gettimeofday( &tStart, NULL );
>     /* Do something */
>     unsigned int out_hi = start_point_r>>32;
>     unsigned int out_lo = start_point_r;
> 
>     unsigned int target_lo = target;
>     unsigned int target_hi = target >> 32;
> 
>     unsigned int last_key_lo;
>     unsigned int last_key_hi;
> 
>     bool keysearch = (target != 0ULL);
> 
>     for (int round=start_round; round < stop_round; ) {
>         out_lo = out_lo ^ RFtable[2*round];////not convibced
>         out_hi = out_hi ^ RFtable[2*round+1];
> 
>         if ((out_hi>>mCondition)==0) {// check for distinguished points else new round
>             // uint64_t res = (((uint64_t)out_hi)<<32)|out_lo;
>             // res = ReverseBits(res);
>             // printf("New round %i %016llx %08x:%08x\n", round, res, out_hi, out_lo);
>             round++;
>             if (round>=stop_round) break;
>         }
> 
>         unsigned int lfsr1 = out_lo;
>         unsigned int lfsr2 = (out_hi << 13) | (out_lo >> 19);
>         unsigned int lfsr3 = out_hi >> 9;
> 
>         last_key_hi = out_hi;
>         last_key_lo =out_lo;
> 
>         for (int i=0; i<25 ; i++) {
>             int clocks = ((lfsr1<<3)&0xf00) | ((lfsr2>>3)&0xf0) | ((lfsr3>>7)&0xf);
>             int masks = mClockMask[clocks];
> 
>             /* lfsr1 */
>             unsigned int tmask = (masks>>8)&0x0f;
>             unsigned int tval = mTable6bit[((lfsr1>>9)&0x3f0)|tmask];
>             unsigned int tval2 = mTable4bit[((lfsr1>>6)&0xf0)|tmask];
>             lfsr1 = (lfsr1<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
> 
>             /* lfsr2 */
>             tmask = (masks>>4)&0x0f;
>             tval = mTable5bit[((lfsr2>>13)&0x1f0)|tmask];
>             out_hi = out_hi ^ (tval&0x0f);
>             lfsr2 = (lfsr2<<(masks>>12))^(tval>>4);
> 
>             /* lfsr3 */
>             tmask = masks & 0x0f;
>             tval = mTable6bit[((lfsr3>>13)&0x3f0)|tmask];
>             tval2 = mTable4bit[(lfsr3&0xf0)|tmask];
>             lfsr3 = (lfsr3<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
>         }
>         for (int i=0; i<8 ; i++) {
>             int clocks = ((lfsr1<<3)&0xf00) | ((lfsr2>>3)&0xf0) | ((lfsr3>>7)&0xf);
>             int masks = mClockMask[clocks];
>             
>             /* lfsr1 */
>             unsigned int tmask = (masks>>8)&0x0f;
>             unsigned int tval = mTable6bit[((lfsr1>>9)&0x3f0)|tmask];
>             out_hi = (out_hi << 4) | (tval&0x0f);
>             unsigned int tval2 = mTable4bit[((lfsr1>>6)&0xf0)|tmask];
>             lfsr1 = (lfsr1<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
> 
>             /* lfsr2 */
>             tmask = (masks>>4)&0x0f;
>             tval = mTable5bit[((lfsr2>>13)&0x1f0)|tmask];
>             out_hi = out_hi ^ (tval&0x0f);
>             lfsr2 = (lfsr2<<(masks>>12))^(tval>>4);        
> 
>             /* lfsr3 */
>             tmask = masks & 0x0f;
>             tval = mTable6bit[((lfsr3>>13)&0x3f0)|tmask];
>             out_hi =  out_hi ^ (tval&0x0f);
>             tval2 = mTable4bit[(lfsr3&0xf0)|tmask];
>             lfsr3 = (lfsr3<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
>         }
>         for (int i=0; i<8 ; i++) {
>             int clocks = ((lfsr1<<3)&0xf00) | ((lfsr2>>3)&0xf0) | ((lfsr3>>7)&0xf);
>             int masks = mClockMask[clocks];
> 
>             /* lfsr1 */
>             unsigned int tmask = (masks>>8)&0x0f;
>             unsigned int tval = mTable6bit[((lfsr1>>9)&0x3f0)|tmask];
>             out_lo = (out_lo << 4) | (tval&0x0f);
>             unsigned int tval2 = mTable4bit[((lfsr1>>6)&0xf0)|tmask];
>             lfsr1 = (lfsr1<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
> 
>             /* lfsr2 */
>             tmask = (masks>>4)&0x0f;
>             tval = mTable5bit[((lfsr2>>13)&0x1f0)|tmask];
>             out_lo = out_lo ^ (tval&0x0f);
>             lfsr2 = (lfsr2<<(masks>>12))^(tval>>4);        
> 
>             /* lfsr3 */
>             tmask = masks & 0x0f;
>             tval = mTable6bit[((lfsr3>>13)&0x3f0)|tmask];
>             out_lo =  out_lo ^ (tval&0x0f);
>             tval2 = mTable4bit[(lfsr3&0xf0)|tmask];
>             lfsr3 = (lfsr3<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
>         }
>         if (keysearch&&(target_hi==out_hi)&&(target_lo==out_lo)) {
>             /* report key as finishing state */
>             out_hi = last_key_hi;
>             out_lo = last_key_lo;
>             start_round = -1;
>             break;
>         }
>     }
> 
>     
> /////////////////////////////
> gettimeofday( &tEnd, NULL );
>     unsigned int uSecs = 1000000 * (tEnd.tv_sec - tStart.tv_sec);
>     uSecs += (tEnd.tv_usec - tStart.tv_usec);
> 
>     // printf("Completed in %i ms\n", uSecs/1000);
> 
>     /* Report completed chains */
>     sem_wait(&mMutex);
> 
>     uint64_t res = (((uint64_t)out_hi)<<32)|out_lo;
>     res = ReverseBits(res);
>     mOutput.push( pair<uint64_t,uint64_t>(start_point,res) );
>     mOutputStartRound.push( start_round );
>     mOutputContext.push( context );
>     active = false;
> 
>     sem_post(&mMutex);
> 
>   }
> }
> 
> 
> 
> -- 
> Ajay Fuloria
> merlinsignals.blogspot.in <http://merlinsignals.blogspot.in>
>> 
> 
> _______________________________________________
> A51 mailing list
> A51 at lists.srlabs.de
> https://lists.srlabs.de/cgi-bin/mailman/listinfo/a51
> 


-- 
Jan Hrach | http://jenda.hrach.eu/
GPG CD98 5440 4372 0C6D 164D A24D F019 2F8E 6527 282E



More information about the A51 mailing list