Quantcast
Channel: CodeSection,代码区,网络安全 - CodeSec
Viewing all articles
Browse latest Browse all 12749

Symbolic execution and (amateur) cryptography

$
0
0
28-Feb-2017: Symbolic execution and (amateur) cryptography

Let's back to the method we previously used to construct expressions using running python function.

We can try to build expression for the output of XXTEA encryption algorithm:

class Expr: def __init__(self,s): self.s=s def __str__(self): return self.s def convert_to_Expr_if_int(self, n): if isinstance(n, int): return Expr(str(n)) if isinstance(n, Expr): return n raise AssertionError # unsupported type def __xor__(self, other): return Expr("(" + self.s + "^" + self.convert_to_Expr_if_int(other).s + ")") def __mul__(self, other): return Expr("(" + self.s + "*" + self.convert_to_Expr_if_int(other).s + ")") def __add__(self, other): return Expr("(" + self.s + "+" + self.convert_to_Expr_if_int(other).s + ")") def __and__(self, other): return Expr("(" + self.s + "&" + self.convert_to_Expr_if_int(other).s + ")") def __lshift__(self, other): return Expr("(" + self.s + "<<" + self.convert_to_Expr_if_int(other).s + ")") def __rshift__(self, other): return Expr("(" + self.s + ">>" + self.convert_to_Expr_if_int(other).s + ")") def __getitem__(self, d): return Expr("(" + self.s + "[" + d.s + "])") # reworked from: # Pure Python (2.x) implementation of the XXTEA cipher # (c) 2009. Ivan Voras # Released under the BSD License. def raw_xxtea(v, n, k): def MX(): return ((z>>5)^(y<<2)) + ((y>>3)^(z<<4))^(sum^y) + (k[(Expr(str(p)) & 3)^e]^z) y = v[0] sum = Expr("0") DELTA = 0x9e3779b9 # Encoding only z = v[n-1] # number of rounds: #q = 6 + 52 / n q=1 while q > 0: q -= 1 sum = sum + DELTA e = (sum >> 2) & 3 p = 0 while p < n - 1: y = v[p+1] z = v = v + MX() p += 1 y = v[0] z = v[n-1] = v[n-1] + MX() return 0 v=[Expr("input1"), Expr("input2"), Expr("input3"), Expr("input4")] k=Expr("key") raw_xxtea(v, 4, k) for i in range(4): print i, ":", v[i] #print len(str(v[0]))+len(str(v[1]))+len(str(v[2]))+len(str(v[3])) A key is choosen according to input data, and, obviously, we can't know it during symbolic execution, so we leave expression like k[...].

Now results for just one round, for each of 4 outputs:

0 : (input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+ ((key[((0&3)^(((0+2654435769)>>2)&3))])^input4)))) 1 : (input2+(((((input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^ input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))>>5)^(input3<<2))+((input3>>3)^((input1+ ((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^ (((0+2654435769)>>2)&3))])^input4))))<<4)))^(((0+2654435769)^input3)+((key[((1&3)^(((0+2654435769)>>2)& 3))])^(input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+ ((key[((0&3)^(((0+2654435769)>>2)&3))])^input4)))))))) 2 : (input3+(((((input2+(((((input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^ (((0+2654435769)^input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))>>5)^(input3<<2))+ ((input3>>3)^((input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^ input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))<<4)))^(((0+2654435769)^input3)+ ((key[((1&3)^(((0+2654435769)>>2)&3))])^(input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4))) ^(((0+2654435769)^input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))))))>>5)^(input4<<2))+ ((input4>>3)^((input2+(((((input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^ input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))>>5)^(input3<<2))+((input3>>3)^((input1+ ((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^ (((0+2654435769)>>2)&3))])^input4))))<<4)))^(((0+2654435769)^input3)+((key[((1&3)^(((0+2654435769)>>2)&3))])^ (input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^ (((0+2654435769)>>2)&3))])^input4))))))))<<4)))^(((0+2654435769)^input4)+((key[((2&3)^(((0+2654435769)>>2)& 3))])^(input2+(((((input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^ input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))>>5)^(input3<<2))+((input3>>3)^((input1+ ((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^ (((0+2654435769)>>2)&3))])^input4))))<<4)))^(((0+2654435769)^input3)+((key[((1&3)^(((0+2654435769)>>2)& 3))])^(input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^ (((0+2654435769)>>2)&3))])^input4)))))))))))) 3 : (input4+(((((input3+(((((input2+(((((input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^ (((0+2654435769)^input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))>>5)^(input3<<2))+((input3>>3)^ ((input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^ (((0+2654435769)>>2)&3))])^input4))))<<4)))^(((0+2654435769)^input3)+((key[((1&3)^(((0+2654435769)>>2)&3))])^ (input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^ (((0+2654435769)>>2)&3))])^input4))))))))>>5)^(input4<<2))+((input4>>3)^((input2+(((((input1+((((input4>>5)^ (input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^ input4))))>>5)^(input3<<2))+((input3>>3)^((input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^ (((0+2654435769)^input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))<<4)))^(((0+2654435769)^input3)+ ((key[((1&3)^(((0+2654435769)>>2)&3))])^(input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^ input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))))))<<4)))^(((0+2654435769)^input4)+((key[((2&3)^(((0+ 2654435769)>>2)&3))])^(input2+(((((input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^ input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))>>5)^(input3<<2))+((input3>>3)^((input1+((((input4>>5)^ (input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))<< 4)))^(((0+2654435769)^input3)+((key[((1&3)^(((0+2654435769)>>2)&3))])^(input1+((((input4>>5)^(input2<<2))+((input2>>3)^ (input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))))))))))>>5)^((input1+ ((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^(((0+2654435769)>> 2)&3))])^input4))))<<2))+(((input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^ input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))>>3)^((input3+(((((input2+(((((input1+((((input4>> 5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^ input4))))>>5)^(input3<<2))+((input3>>3)^((input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+ 2654435769)^input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))<<4)))^(((0+2654435769)^input3)+((key[((1&3)^ (((0+2654435769)>>2)&3))])^(input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+ ((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))))))>>5)^(input4<<2))+((input4>>3)^((input2+(((((input1+((((input4>> 5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))>> 5)^(input3<<2))+((input3>>3)^((input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+ ((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))<<4)))^(((0+2654435769)^input3)+((key[((1&3)^(((0+2654435769)>>2)&3))])^ (input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^(((0+2654435769)>> 2)&3))])^input4))))))))<<4)))^(((0+2654435769)^input4)+((key[((2&3)^(((0+2654435769)>>2)&3))])^(input2+(((((input1+ ((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^ input4))))>>5)^(input3<<2))+((input3>>3)^((input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+ 2654435769)^input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))<<4)))^(((0+2654435769)^input3)+((key[((1&3)^ (((0+2654435769)>>2)&3))])^(input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+ ((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))))))))))<<4)))^(((0+2654435769)^(input1+((((input4>>5)^(input2<< 2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^input4)))))+((key[((3& 3)^(((0+2654435769)>>2)&3))])^(input3+(((((input2+(((((input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^ (((0+2654435769)^input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))>>5)^(input3<<2))+((input3>>3)^((input1+ ((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^(((0+2654435769)>>2)& 3))])^input4))))<<4)))^(((0+2654435769)^input3)+((key[((1&3)^(((0+2654435769)>>2)&3))])^(input1+((((input4>>5)^ (input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^ input4))))))))>>5)^(input4<<2))+((input4>>3)^((input2+(((((input1+((((input4>>5)^(input2<<2))+((input2>>3)^ (input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))>>5)^(input3<<2))+((input3>> 3)^((input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^(((0+ 2654435769)>>2)&3))])^input4))))<<4)))^(((0+2654435769)^input3)+((key[((1&3)^(((0+2654435769)>>2)&3))])^(input1+ ((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^ input4))))))))<<4)))^(((0+2654435769)^input4)+((key[((2&3)^(((0+2654435769)>>2)&3))])^(input2+(((((input1+((((input4>> 5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))>> 5)^(input3<<2))+((input3>>3)^((input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+ ((key[((0&3)^(((0+2654435769)>>2)&3))])^input4))))<<4)))^(((0+2654435769)^input3)+((key[((1&3)^(((0+2654435769)>>2)& 3))])^(input1+((((input4>>5)^(input2<<2))+((input2>>3)^(input4<<4)))^(((0+2654435769)^input2)+((key[((0&3)^(((0+ 2654435769)>>2)&3))])^input4))))))))))))))))

Somehow, size of expression for each subsequent output is bigger. I hope I haven't been mistaken? And this is just for 1 round. For 2 rounds, size of all 4 expression is ~970KB. For 3 rounds, this is ~115MB. For 4 rounds, I have not enough RAM on my computer. Expressions exploding exponentially. And there are 19 rounds. You can weigh it.

Perhaps, you can simplify these expressions: there are a lot of excessive parenthesis, but I'm highly pessimistic, cryptoalgorithms constructed in such a way to not have any spare operations.

In order to crack it, you can use these expressions as system of equation and try to solve it using SMT-solver. This is called algebraic attack.

In other words, theoretically, you can build system of equation like this: $MD5(x)=12341234...$, but expressions are so huge so it's impossible to solve them. Yes, cryptographers are fully aware of this and one of the goals of the successful cipher is to make expressions as big as possible, using resonable time and size of code.

Nevertheless, you can find numerous papers about breaking these cryptosystems with reduced number of rounds: when expression isn't exploded yet, sometimes it's possible. It's not practical, but such experience has some interesting and theoretical interest.

Attempts to break "serious" crypto

CryptoMiniSat itself exist to support XOR operation, which is ubiquitous in cryptography.

Bitcoin mining with SAT solver: http://jheusser.github.io/2013/02/03/satcoin.html , https://github.com/msoos/sha256-sat-bitcoin . Alexander Semenov, attempts to break A5/1, etc. (Russian presentation) Vegard Nossum - SAT-based preimage attacks on SHA-1 Algebraic Attacks on the Crypto-1 Stream Cipher in MiFare Classic and Oyster Cards Attacking Bivium Using SAT Solvers Extending SAT Solvers to Cryptographic Problems Applications of SAT Solvers to Cryptanalysis of Hash Functions Algebraic-Differential Cryptanalysis of DES

This is what you can find in serial numbers, license keys, executable file packers, CTF, malware, etc. Sometimes even ransomware (but rarely nowadays).

Amateur cryptography is often can be broken using SMT solver, or even KLEE.

Amateur cryptography is usually based not on theory, but on visual complexity: if one getting results which are seems chaotic enough, one stops to improve it further. This is security not even on obscurity, but on chaotic mess.

Devising your own cryptoalgorithm is very tricky thing to do. This can be compared to devising your own pseudorandom number generator. Even famous Donald Knuth in 1959 constructed one, and it was visually very complex, but, as it turns out in practice, it has very short cycle of length 3178. See also: TAOCP volume II page 4, (1997).

The very first problem is that making an algorithm which can generate very long expressions is tricky thing itself. Common error is to use operations like XOR and rotations/permutations, which can't help much. Even worse: some people think that XORing a value several times can be better, like: (x^1234)^5678. Obviously, these two XOR operations (or many more) can be reduced to single one. Same story about applied operations like addition and subtraction - they all also can be reduced to single one.

Real cryptoalgorithms, like IDEA, can use several operations from different groups, like XOR, addition and multiplication. Applying them all in specific order will make your expression irreducible.

When I prepared this blog post, I tried to make an example of such amateur hash function:

// copypasted from http://blog.regehr.org/archives/1063 uint32_t rotl32b (uint32_t x, uint32_t n) { assert (n<32); if (!n) return x; return (x< >(32-n)); } uint32_t rotr32b (uint32_t x, uint32_t n) { assert (n<32); if (!n) return x; return (x>>n) | (x<<(32-n)); } void megahash (uint32_t buf[4]) { for (int i=0; i<4; i++) { uint32_t t0=buf[0]^0x12345678^buf[1]; uint32_t t1=buf[1]^0xabcdef01^buf[2]; uint32_t t2=buf[2]^0x23456789^buf[3]; uint32_t t3=buf[3]^0x0abcdef0^buf[0]; buf[0]=rotl32b(t0, 1); buf[1]=rotr32b(t1, 2); buf[2]=rotl32b(t2, 3); buf[3]=rotr32b(t3, 4); }; }; int main() { uint32_t buf[4]; klee_make_symbolic(buf, sizeof buf); megahash (buf); if (buf[0]==0x18f71ce6 // or whatever && buf[1]==0xf37c2fc9 && buf[2]==0x1cfe96fe && buf[3]==0x8c02c75e) klee_assert(0); };

KLEE can break it with little effort. Functions of such complexity is common in shareware, which checks license keys, etc.

Here is how we can make its work harder by making rotations dependent of inputs, and this makes number of possible inputs much, much bigger:

void megahash (uint32_t buf[4]) { for (int i=0; i<16; i++) { uint32_t t0=buf[0]^0x12345678^buf[1]; uint32_t t1=buf[1]^0xabcdef01^buf[2]; uint32_t t2=buf[2]^0x23456789^buf[3]; uint32_t t3=buf[3]^0x0abcdef0^buf[0]; buf[0]=rotl32b(t0, t1&0x1F); buf[1]=rotr32b(t1, t2&0x1F); buf[2]=rotl32b(t2, t3&0x1F); buf[3]=rotr32b(t3, t0&0x1F); }; };

Addition (ormodular addition, as cryptographers say) can make thing even harder:

void megahash (uint32_t buf[4]) { for (int i=0; i<4; i++) { uint32_t t0=buf[0]^0x12345678^buf[1]; uint32_t t1=buf[1]^0xabcdef01^buf[2]; uint32_t t2=buf[2]^0x23456789^buf[3]; uint32_t t3=buf[3]^0x0abcdef0^buf[0]; buf[0]=rotl32b(t0, t2&0x1F)+t1; buf[1]=rotr32b(t1, t3&0x1F)+t2; buf[2]=rotl32b(t2, t1&0x1F)+t3; buf[3]=rotr32b(t3, t2&0x1F)+t4; }; };

As en exercise, you can try to make a block cipher which KLEE wouldn't break. This is quite sobering experience. But even if you can, this is not a panacea, there is a huge array of other cryptoanalytical methods to break it.

Summary: if you deal with amateur cryptography, you can always give KLEE and SMT solver a try. Even more: sometimes you have only decryption function, and if algorithm is simple enough, KLEE or SMT solver can reverse things back.

One fun thing to mention: if you try to implement amateur cryptoalgorithm in Verilog/VHDL language to run it on FPGA, maybe in brute-force way, you can find that EDA tools can optimize many things during synthesis (this is the word they use for "compilation") and can leave amateur cryptoalgorithm much smaller/simpler/faster than it was. Even if you try to define DES algorithm with a fixed key, Altera Quartus can optimize first round of it and make it smaller than others.

You can find a lot of blog posts about breaking CTF-level crypto using Z3, etc. Here is one of them: http://doar-e.github.io/blog/2015/08/18/keygenning-with-klee/ .

→ [list of blog posts] The page last updated on 28-February-2017

Viewing all articles
Browse latest Browse all 12749

Trending Articles