How are the four 8-bit permutation tables of Present cipher calculated?

Question

the fast implementation of present cipher for 8-bit register uses four 8-bit lookups tables for permutation. Table 3 & 2 are :

static const uint8_t sbox_pmt_3[256] = {
    0xF0, 0xB1, 0xB4, 0xE5, 0xE1, 0xA0, 0xE4, 0xF1, 0xA5, 0xF4, 0xF5, 0xE0, 0xB0, 0xB5, 0xA1, 0xA4, 
    0x72, 0x33, 0x36, 0x67, 0x63, 0x22, 0x66, 0x73, 0x27, 0x76, 0x77, 0x62, 0x32, 0x37, 0x23, 0x26, 
    0x78, 0x39, 0x3C, 0x6D, 0x69, 0x28, 0x6C, 0x79, 0x2D, 0x7C, 0x7D, 0x68, 0x38, 0x3D, 0x29, 0x2C, 
    0xDA, 0x9B, 0x9E, 0xCF, 0xCB, 0x8A, 0xCE, 0xDB, 0x8F, 0xDE, 0xDF, 0xCA, 0x9A, 0x9F, 0x8B, 0x8E, 
    0xD2, 0x93, 0x96, 0xC7, 0xC3, 0x82, 0xC6, 0xD3, 0x87, 0xD6, 0xD7, 0xC2, 0x92, 0x97, 0x83, 0x86, 
    0x50, 0x11, 0x14, 0x45, 0x41, 0x00, 0x44, 0x51, 0x05, 0x54, 0x55, 0x40, 0x10, 0x15, 0x01, 0x04, 
    0xD8, 0x99, 0x9C, 0xCD, 0xC9, 0x88, 0xCC, 0xD9, 0x8D, 0xDC, 0xDD, 0xC8, 0x98, 0x9D, 0x89, 0x8C, 
    0xF2, 0xB3, 0xB6, 0xE7, 0xE3, 0xA2, 0xE6, 0xF3, 0xA7, 0xF6, 0xF7, 0xE2, 0xB2, 0xB7, 0xA3, 0xA6, 
    0x5A, 0x1B, 0x1E, 0x4F, 0x4B, 0x0A, 0x4E, 0x5B, 0x0F, 0x5E, 0x5F, 0x4A, 0x1A, 0x1F, 0x0B, 0x0E, 
    0xF8, 0xB9, 0xBC, 0xED, 0xE9, 0xA8, 0xEC, 0xF9, 0xAD, 0xFC, 0xFD, 0xE8, 0xB8, 0xBD, 0xA9, 0xAC, 
    0xFA, 0xBB, 0xBE, 0xEF, 0xEB, 0xAA, 0xEE, 0xFB, 0xAF, 0xFE, 0xFF, 0xEA, 0xBA, 0xBF, 0xAB, 0xAE, 
    0xD0, 0x91, 0x94, 0xC5, 0xC1, 0x80, 0xC4, 0xD1, 0x85, 0xD4, 0xD5, 0xC0, 0x90, 0x95, 0x81, 0x84, 
    0x70, 0x31, 0x34, 0x65, 0x61, 0x20, 0x64, 0x71, 0x25, 0x74, 0x75, 0x60, 0x30, 0x35, 0x21, 0x24, 
    0x7A, 0x3B, 0x3E, 0x6F, 0x6B, 0x2A, 0x6E, 0x7B, 0x2F, 0x7E, 0x7F, 0x6A, 0x3A, 0x3F, 0x2B, 0x2E, 
    0x52, 0x13, 0x16, 0x47, 0x43, 0x02, 0x46, 0x53, 0x07, 0x56, 0x57, 0x42, 0x12, 0x17, 0x03, 0x06, 
    0x58, 0x19, 0x1C, 0x4D, 0x49, 0x08, 0x4C, 0x59, 0x0D, 0x5C, 0x5D, 0x48, 0x18, 0x1D, 0x09, 0x0C,
};

static const uint8_t sbox_pmt_2[256] = {
    0x3C, 0x6C, 0x2D, 0x79, 0x78, 0x28, 0x39, 0x7C, 0x69, 0x3D, 0x7D, 0x38, 0x2C, 0x6D, 0x68, 0x29, 
    0x9C, 0xCC, 0x8D, 0xD9, 0xD8, 0x88, 0x99, 0xDC, 0xC9, 0x9D, 0xDD, 0x98, 0x8C, 0xCD, 0xC8, 0x89, 
    0x1E, 0x4E, 0x0F, 0x5B, 0x5A, 0x0A, 0x1B, 0x5E, 0x4B, 0x1F, 0x5F, 0x1A, 0x0E, 0x4F, 0x4A, 0x0B, 
    0xB6, 0xE6, 0xA7, 0xF3, 0xF2, 0xA2, 0xB3, 0xF6, 0xE3, 0xB7, 0xF7, 0xB2, 0xA6, 0xE7, 0xE2, 0xA3, 
    0xB4, 0xE4, 0xA5, 0xF1, 0xF0, 0xA0, 0xB1, 0xF4, 0xE1, 0xB5, 0xF5, 0xB0, 0xA4, 0xE5, 0xE0, 0xA1, 
    0x14, 0x44, 0x05, 0x51, 0x50, 0x00, 0x11, 0x54, 0x41, 0x15, 0x55, 0x10, 0x04, 0x45, 0x40, 0x01, 
    0x36, 0x66, 0x27, 0x73, 0x72, 0x22, 0x33, 0x76, 0x63, 0x37, 0x77, 0x32, 0x26, 0x67, 0x62, 0x23, 
    0xBC, 0xEC, 0xAD, 0xF9, 0xF8, 0xA8, 0xB9, 0xFC, 0xE9, 0xBD, 0xFD, 0xB8, 0xAC, 0xED, 0xE8, 0xA9, 
    0x96, 0xC6, 0x87, 0xD3, 0xD2, 0x82, 0x93, 0xD6, 0xC3, 0x97, 0xD7, 0x92, 0x86, 0xC7, 0xC2, 0x83, 
    0x3E, 0x6E, 0x2F, 0x7B, 0x7A, 0x2A, 0x3B, 0x7E, 0x6B, 0x3F, 0x7F, 0x3A, 0x2E, 0x6F, 0x6A, 0x2B, 
    0xBE, 0xEE, 0xAF, 0xFB, 0xFA, 0xAA, 0xBB, 0xFE, 0xEB, 0xBF, 0xFF, 0xBA, 0xAE, 0xEF, 0xEA, 0xAB, 
    0x34, 0x64, 0x25, 0x71, 0x70, 0x20, 0x31, 0x74, 0x61, 0x35, 0x75, 0x30, 0x24, 0x65, 0x60, 0x21, 
    0x1C, 0x4C, 0x0D, 0x59, 0x58, 0x08, 0x19, 0x5C, 0x49, 0x1D, 0x5D, 0x18, 0x0C, 0x4D, 0x48, 0x09, 
    0x9E, 0xCE, 0x8F, 0xDB, 0xDA, 0x8A, 0x9B, 0xDE, 0xCB, 0x9F, 0xDF, 0x9A, 0x8E, 0xCF, 0xCA, 0x8B, 
    0x94, 0xC4, 0x85, 0xD1, 0xD0, 0x80, 0x91, 0xD4, 0xC1, 0x95, 0xD5, 0x90, 0x84, 0xC5, 0xC0, 0x81, 
    0x16, 0x46, 0x07, 0x53, 0x52, 0x02, 0x13, 0x56, 0x43, 0x17, 0x57, 0x12, 0x06, 0x47, 0x42, 0x03,
};

The values in sbox_pmt_1 are the reverse of sbox_pmt_3 (similarly for sbox_pmt_0 and sbox_pmt_2).

Q. How are sbox_pmt_3 and sbox_pmt_2 are caluclated?

score 2 · Accepted Answer · answered Sep 01 '19 at 19:30

In short: Trace back where bits come from in every byte in the output state (8 bytes) of the SP_network.

In one round, after we have xored the round key bits, we have 8 bytes (a 64 bit word, with rightmost byte 0, leftmost byte 7 etc. which in the linked software (which is byte oriented) is represented as state[0] to state[7]) are put nibblewise through the S-box: this is easy to do as a byte substitution as well. Then we trace where the bits in the post-permutation state come from (generated by a simple program, here the part for output byte 0, i.e. state[7]:)

bit 0 = byte 0, subbit 0 from byte 0, subbit 0
bit 1 = byte 0, subbit 1 from byte 0, subbit 4
bit 2 = byte 0, subbit 2 from byte 1, subbit 0
bit 3 = byte 0, subbit 3 from byte 1, subbit 4
bit 4 = byte 0, subbit 4 from byte 2, subbit 0
bit 5 = byte 0, subbit 5 from byte 2, subbit 4
bit 6 = byte 0, subbit 6 from byte 3, subbit 0
bit 7 = byte 0, subbit 7 from byte 3, subbit 4

We see that we use the bits 0 of the two S-box nibbles (which are 0 and 5 in the byte) to build a half-nibble (bits 0,1 or bits 2,3 etc.) of an output byte. So we can start building four byte-indexed tables, say T0,T1,T2,T3 with the constraints that

T0[b] & 0x03 equals the bits 4,0 from SS[b] (i.e. the double nibble byte S-box.)
T1[b] & 0xc0 equals the bits 4,0 from SS[b]
T2[b] & 0x30 equals the bits 4,0 from SS[b]
T3[b] & 0xc0 equals the bits 4,0 from SS[b]

That way we can build up post-permutation byte 0 or state[7] as a bitwise-or of masked outputs from these 4 tables (which we want to construct).

To build up the next to last byte 1 (state[6]) we have the following similar pattern:

bit 8 = byte 1, subbit 0 from byte 4, subbit 0
bit 9 = byte 1, subbit 1 from byte 4, subbit 4
bit 10 = byte 1, subbit 2 from byte 5, subbit 0
bit 11 = byte 1, subbit 3 from byte 5, subbit 4
bit 12 = byte 1, subbit 4 from byte 6, subbit 0
bit 13 = byte 1, subbit 5 from byte 6, subbit 4
bit 14 = byte 1, subbit 6 from byte 7, subbit 0
bit 15 = byte 1, subbit 7 from byte 7, subbit 4

So the same tables can be re-used but on different S-box output bytes.

Next we consider byte 2, state[5], again from the same output:

bit 16 = byte 2, subbit 0 from byte 0, subbit 1
bit 17 = byte 2, subbit 1 from byte 0, subbit 5
bit 18 = byte 2, subbit 2 from byte 1, subbit 1
bit 19 = byte 2, subbit 3 from byte 1, subbit 5
bit 20 = byte 2, subbit 4 from byte 2, subbit 1
bit 21 = byte 2, subbit 5 from byte 2, subbit 5
bit 22 = byte 2, subbit 6 from byte 3, subbit 1
bit 23 = byte 2, subbit 7 from byte 3, subbit 5

So to build up that byte we need the half-nibble built from the bits 1 of the S-box output (1,5 of the SS-box byte), so the constraints change slightly, where the order of the tables has been changed to get non-contradicting restraints (the lowest bits of T0[b] have already been fixed, etc.), so I use them cyclically:

T1[b] & 0x03 equals the bits 5,1 from SS[b]
T2[b] & 0x0c equals the bits 5,1 from SS[b]
T3[b] & 0x30 equals the bits 5,1 from SS[b]
T0[b] & 0xc0 equals the bits 5,1 from SS[b]

The same tables and masks can be re-used for byte 3 (state[4]) as witnessed by

bit 24 = byte 3, subbit 0 from byte 4, subbit 1
bit 25 = byte 3, subbit 1 from byte 4, subbit 5
bit 26 = byte 3, subbit 2 from byte 5, subbit 1
bit 27 = byte 3, subbit 3 from byte 5, subbit 5
bit 28 = byte 3, subbit 4 from byte 6, subbit 1
bit 29 = byte 3, subbit 5 from byte 6, subbit 5
bit 30 = byte 3, subbit 6 from byte 7, subbit 1
bit 31 = byte 3, subbit 7 from byte 7, subbit 5

Thirdly, for byte 4 (and again, similarly 5) we see

bit 32 = byte 4, subbit 0 from byte 0, subbit 2
bit 33 = byte 4, subbit 1 from byte 0, subbit 6
bit 34 = byte 4, subbit 2 from byte 1, subbit 2
bit 35 = byte 4, subbit 3 from byte 1, subbit 6
bit 36 = byte 4, subbit 4 from byte 2, subbit 2
bit 37 = byte 4, subbit 5 from byte 2, subbit 6
bit 38 = byte 4, subbit 6 from byte 3, subbit 2
bit 39 = byte 4, subbit 7 from byte 3, subbit 6

where we build succesive half-nibbles all from the different masks of the combination of bits 6,2 (bit 2 from a single S-box) of the SS-box output. So again we can build new constraints, going on cyclically from before:

T2[b] & 0x03 equals the bits 6,2 from SS[b]
T3[b] & 0x0c equals the bits 6,2 from SS[b]
T0[b] & 0x30 equals the bits 6,2 from SS[b]
T1[b] & 0xc0 equals the bits 6,2 from SS[b]

where we can re-use the exact same bitwise-or recipe for byte 5.

Finally, for bytes 6 (and 7) we get as the origins:

bit 48 = byte 6, subbit 0 from byte 0, subbit 3
bit 49 = byte 6, subbit 1 from byte 0, subbit 7
bit 50 = byte 6, subbit 2 from byte 1, subbit 3
bit 51 = byte 6, subbit 3 from byte 1, subbit 7
bit 52 = byte 6, subbit 4 from byte 2, subbit 3
bit 53 = byte 6, subbit 5 from byte 2, subbit 7
bit 54 = byte 6, subbit 6 from byte 3, subbit 3
bit 55 = byte 6, subbit 7 from byte 3, subbit 7

So we use the combination of bits 7,3 (or bit 4 from a single S-box) from different bytes to form this byte. So the next set of new constraints becomes:

T4[b] & 0x03 equals the bits 7,3 from SS[b]
T0[b] & 0x0c equals the bits 7,3 from SS[b]
T1[b] & 0x30 equals the bits 7,3 from SS[b]
T2[b] & 0xc0 equals the bits 7,3 from SS[b]

Now we can gather the results: to actually find T0[b] we consider all we know about it from before:

T0[b] & 0x0c equals the bits 7,3 from SS[b]
T0[b] & 0x30 equals the bits 6,2 from SS[b]
T0[b] & 0xc0 equals the bits 5,1 from SS[b]
T0[b] & 0x03 equals the bits 4,0 from SS[b]

and compute the table directly. It will be the table pmt_3 from your linked software. This code (gist on GitHub) will compute these combined tables (in a slightly different order I believe then used in the linked software) to see how to do this in more detail.

How are the four 8-bit permutation tables of Present cipher calculated?

1 Answers1