olllvm学习笔记

启明星辰金睛安全研究团队 2020-08-31

694

LLVM Obfuscator是一个基于LLVM框架实现的一个开源代码混淆器，整个项目包含了三个相对独立的LLVM pass, 每个pass实现了一种混淆方式，通过这些混淆手段，可以模糊原程序的流程或者某一部分的算法，给逆向分析带来一些困难。

指令变换：https://github.com/obfuscator-llvm/obfuscator/wiki/Instructions-Substitution

流程伪造：https://github.com/obfuscator-llvm/obfuscator/wiki/Bogus-Control-Flow

流程平坦化：https://github.com/obfuscator-llvm/obfuscator/wiki/Control-Flow-Flattening

指令变换

Github文档上有说，指令变换的混淆技术，在功能等效的指令序列替换标准的二进制运算符，当有很多个等效的指令序列随机选取一个，这种混淆很简单，可以很轻松的就将其删除。

- `-mllvm -sub`: activateinstructions substitution

- `-mllvm -sub_loop=3`: if thepass is activated, applies it 3 times on a function. Default : 1

目前，可以进行以下替换：

前置的一点点知识：其中全局变量用@，局部变量用%，%1指的是第一个寄存器（抽象意义上的寄存器，不同于CPU的寄存器）， alloca 是声明， align是字节对齐位数，store是存储，load是加载

Addition

a = b - (-c)

%0 = load i32* %a, align 4

%1 = load i32* %b, align 4

%2 = sub i32 0, %1c

%3 = sub nsw i32 %0, %2

%a，%b四字节对齐（ i32 代表的就是32位转化就是4字节）， %a - 0-%b = %a + %b

a = -(-b + (-c))

%0 = load i32* %a, align 4

%1 = load i32* %b, align 4

%2 = sub i32 0, %0c

%3 = sub i32 0, %1

%4 = add i32 %2, %3

%5 = sub nsw i32 0, %4

%5 = 0- (0 - %a + 0 - %b)

r = rand (); a = b + r; a = a + c; a = a - r

%0 = load i32* %a, align 4

%1 = load i32* %b, align 4

%2 = add i32 %0, 1107414009

%3 = add i32 %2, %1c

%4 = sub nsw i32 %3, 1107414009

rand() 随机数定义 %4 = %a + rand() + %b - rand()

r = rand (); a = b - r; a = a + b; a = a + r

%0 = load i32* %a, align 4

%1 = load i32* %b, align 4

%2 = sub i32 %0, 1108523271c

%3 = add i32 %2, %1

%4 = add nsw i32 %3, 1108523271

%4 = %a - rand() + %b + rand()

Subtraction

a = b + (-c)

%0 = load i32* %a, align 4

%1 = load i32* %b, align 4

%2 = sub i32 0, %1c

%3 = add nsw i32 %0, %2

%3 = %a + 0 - %b

r = rand (); a = b + r; a = a - c; a = a - r

%0 = load i32* %a, align 4

%1 = load i32* %b, align 4

%2 = add i32 %0, 1571022666c

%3 = sub i32 %2, %1

%4 = sub nsw i32 %3, 1571022666

%4 = %a + rand() - %b - rand()

r = rand (); a = b - r; a = a - c; a = a + r

%0 = load i32* %a, align 4

%1 = load i32* %b, align 4

%2 = sub i32 %0, 1057193181c

%3 = sub i32 %2, %1

%4 = add nsw i32 %3, 1057193181

%4 = %a - rand() - %b + rand()

AND

a = b & c -> a = (b ^ ~c) & b

%0 = load i32* %a, align 4

%1 = load i32* %b, align 4

%2 = xor i32 %1, -1c

%3 = xor i32 %0, %2

%4 = and i32 %3, %0

%b xor -1 = not (%b)

%4 = (%a xor not(%b)) and %a

a = b | c -> a = (b & c) | (b ^ c)

%0 = load i32* %a, align 4

%1 = load i32* %b, align 4

%2 = and i32 %0, %1c

%3 = xor i32 %0, %1

%4 = or i32 %2, %3

%4 = (%a xor %b) or (%a and %b)

XOR

a = a ^ b -> a = (~a & b) | (a & ~b)

%0 = load i32* %a, align 4

%1 = load i32* %b, align 4

%2 = xor i32 %0, -1

%3 = and i32 %1, %2

%4 = xor i32 %1, -1

%5 = and i32 %0, %4c

%6 = or i32 %3, %5

%6 = (not %a and %b) or (not %b and %a)

这里看完以后ollvm的指令变换特别像数电模电中的一些东西，如果慢点分析还是可以分析出来的，可以这里全部改成与非门，之前在看雪也有看过这样的题，所以感觉与非门来说相对复杂度大一些，而且在ida中会有一个big function的限定，可以稍微卡一下不懂这方面修改的人，而且看起来会比较复杂一些。

流程伪造

该方法通过在当前基本块之前添加一个基本块来修改函数调用图。这个新的基本块包含一个不透明的谓词，然后有条件地跳转到原始基本块。原始的基本块也将被克隆并填充以随机选择的垃圾指令，这里和我在CTF中的一些看到的很多if...else...语句相对应了，中间会有很多的分支，有一个是正确的。

- `-mllvm -bcf`: activates thebogus control flow pass

- `-mllvm -bcf_loop=3`: if thepass is activated, applies it 3 times on a function. Default: 1

- `-mllvm -bcf_prob=40`: if thepass is activated, a basic bloc will be obfuscated with a probability of 40%.Default: 30

这里原作者给了很详细的一个说明来说明这个流程伪造：

#include <stdlib.h>

int main(int argc, char** argv){

int a = atoi(argv[1]);

if(a == 0)

return 1;

else

return 10;

return0;

}

那么我们正常的一个流程是：

我们进行了流程伪造会变成：

就是创建很多的分支来去混淆我们原先的代码，从而达到一个安全的角度，避免被分析出来，不过一般的话这里也是可以慢慢分析出来的。

流程平坦化

<<OBFUSCATING C++ PROGRAMS VIA CONTROL FLOW FLATTENING>> 这本文献里面详细的介绍了流程平坦化。

给了几个例子来理解：

- `-mllvm -fla`: activatescontrol flow flattening

- `-mllvm -split`: activatesbasic block splitting. Improve the flattening when applied together.

- `-mllvm -split_num=3`: if thepass is activated, applies it 3 times on each basic block. Default: 1

大概这里也给了我们源代码的问题的对应，一个是平坦化之前，一个是平坦化之后的：

#include <stdlib.h>

int main(int argc, char** argv){

int a = atoi(argv[1]);

if(a == 0)

return 1;

else

return 10;

return 0;

}

我们平坦化后：

#include <stdlib.h>

int main(int argc, char** argv){

int a = atoi(argv[1]);

int b = 0;

while(1) {

switch(b) {

case 0:

if(a == 0)

b = 1;

else

b = 2;

break;

case 1:

return 1;

case 2:

return 10;

default:

break;

}

return 0;

}

总体来说，控制流程平坦化这个特性，抽象下来，主要是通过这几个步骤来实现的：

1. 在整个代码流程中，分析搜集出所有的基本代码块(Basic Block)（译者注：遇到条件分支就算是一个新的代码块了）

2. 把基本代码块放到控制流图的最底部，然后删除掉原来的基本块之间的跳转关系

3. 添加混淆器的流程控制分发逻辑，通过新的复杂分发逻辑还原原来程序块之间的逻辑关系

就跟我们上面说举的例子一样，我们通过用更复杂的逻辑关系从而去等同于现在的逻辑关系，我们去搭建一下ollvm的环境，然后用ida去测试一下。

clang -c -emit-llvm *.c -o *.bc编译产生字节码

clang -S -emit-llvm *.c -o *.ll编译产生可视化字节码

llvm-dis *.bc -o *.ll 将字节码转换为可视化字节码

llvm-as *.ll - o *.bc 将可视化字节码转换为字节码

搭建ollvm环境的命令：

1 cd..

2 pwd

3 sudo apt-get install g++ cmake git

4 cdollvm && git clone -b llvm-4.0https://github.com/obfuscator-llvm/obfuscator.git

5 cdobfuscator/

6 ls

7 mkdir build

8 cdbuild

9 cmake -DCMAKE_BUILD_TYPE=Release ../obfuscator/

10 cmake-DCMAKE_BUILD_TYPE=Release ../../obfuscator/

11 cmake --version

12 g++ --version

13 cmake -DCMAKE_BUILD_TYPE=Release ../../obfuscator/

14 cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_INCLUDE_TESTS=OFF../../obfuscator/

15 time make -j8

我们平坦化后的代码：

int __cdecl main(int argc,const char **argv, const char **envp)

{

signed int v4; [rsp+20h] [rbp-10h]

signed int v5; [rsp+24h] [rbp-Ch]

v5 = 1;

v4 = 602246107;

while ( 1 )

{

while ( v4 == 602246107 )

{

envp = (const char **)1;

v4 = 1441673877;

}

if ( v4 == 1256810293 )

break;

if ( v4 == 1441673877 )

{

++v5;

v4 = 1256810293;

printf("hello ollvm", argv,envp);

}

else if ( v4 == 1923071755 )

{

v4 = 1256810293;

printf("hello word!", argv,envp);

}

return 0;

}

流程变得更复杂了⼀点：

可执⾏流程块：

源码分析

这里还是比较推荐看一下相关文档的源码，可以看看是怎么实现的，为什么这么实现的，直接去解读一下：https://github.com/obfuscator-llvm/obfuscator/tree/llvm-4.0/lib/Transforms/Obfuscation

对于我们的OLLVM的每个pass，主要是继承对应的pass类，对应的方法进行重写。

IR基本结构：

IR代码是由一个个Module组成的，每个Module(块)之间互相联系，而Module又是由一个个Function(函数)组成，Function又是由一个个BasicBlock(基本快)组成，在BasicBlock中又包含了一条条Instruction(指令)

指令分割

如果我们把ABC三个代码快每个都再进行细粒度的切割，变成A1,A2...Ax, B1,B2,Bx, C1,C2,Cx这样，那再按照流程平坦化的逻辑去进行一次代码重组，重组之后的画面，重组之后的代码会充斥着switch case，而且case的顺序还是随机的，结果是大大的降低了整个代码逻辑的可读性。

指令分割实现与：SplitBasicBlock.cpp中：

namespace {

struct SplitBasicBlock : publicFunctionPass {

static char ID; Pass identification,replacement for typeid

bool flag;

SplitBasicBlock() : FunctionPass(ID) {}

SplitBasicBlock(bool flag) : FunctionPass(ID){

this->flag = flag;

}

bool runOnFunction(Function &F);

void split(Function *f);

bool containsPHI(BasicBlock *b);

void shuffle(std::vector<int>&vec);

};

}

这里面我们继承了FunctionPass,去实现runOnFunction函数

STATISTIC(Split,"Basicblock splitted");

static cl::opt<int>SplitNum("split_num", cl::init(2),

cl::desc("Split <split_num> time each BB"));

namespace {

struct SplitBasicBlock : publicFunctionPass {

static char ID; / Pass identification,replacement for typeid

bool flag;

SplitBasicBlock() : FunctionPass(ID) {}

SplitBasicBlock(bool flag) : FunctionPass(ID){

this->flag = flag;

}

bool runOnFunction(Function &F);

void split(Function *f);

bool containsPHI(BasicBlock *b);

void shuffle(std::vector<int>&vec);

};

}

char SplitBasicBlock::ID = 0;

staticRegisterPass<SplitBasicBlock> X("splitbbl", "BasicBlocksplitting");

Pass*llvm::createSplitBasicBlock(bool flag) {

return new SplitBasicBlock(flag);

}

//继承FunctionPass，重新写一下runOnFunction的功能

boolSplitBasicBlock::runOnFunction(Function &F) {

// Check if the number of applications iscorrect

if (!((SplitNum > 1) && (SplitNum<= 10))) {

errs()<<"Split application basicblock percentage\

-split_num=x must be 1 < x <= 10";

return false;

}

//切割次数必须要在1到10次之间，如果再这个范围之外就会flase，这里的代码用来判断切割次数

Function *tmp = &F;

// Do we obfuscate

if (toObfuscate(flag, tmp,"split")) {

split(tmp);

++Split;

}

//如果我们的SplitNum符合，我们会进行toObfuscate进行处理，在我们的Utils.h的头文件里面，之后执行我们的split函数

return false;

}

首先会对我们的基本块进行分割以及判断

clang -mllvm -split test.c

clang -mllvm -split_num=3test.c

默认次数是1，第二个是告诉我们分割次数是3，源码中写了必须再1-10次之内

bool toObfuscate(bool flag,Function *f, std::string attribute) {

std::string attr = attribute;

std::string attrNo = "no" + attr;

// Check if declaration

if (f->isDeclaration()) {

return false;

}

// Check external linkage

if(f->hasAvailableExternallyLinkage() !=0) {

return false;

}

// We have to check the nofla flag first

// Because .find("fla") is true fora string like "fla" or

// "nofla"

if (readAnnotate(f).find(attrNo) !=std::string::npos) {

return false;

}

// If fla annotations

if (readAnnotate(f).find(attr) !=std::string::npos) {

return true;

}

// If fla flag is set

if (flag == true) {

/* Check if the number of applications iscorrect

if (!((Percentage > 0) &&(Percentage <= 100))) {

LLVMContext &ctx =llvm::getGlobalContext();

ctx.emitError(Twine("Flatteningapplication function\

percentage -perFLA=x must be 0< x <= 100"));

}

// Check name

else if (func.size() != 0 &&func.find(f->getName()) != std::string::npos) {

return true;

}

if((((int)llvm::cryptoutils->get_range(100))) < Percentage) {

return true;

}

return true;

}

return false;

}

这是判断是否启用了split功能，bool flag, Function *f, std::string attribute 是我们的依据

voidSplitBasicBlock::split(Function *f) {

std::vector<BasicBlock *> origBB;

int splitN = SplitNum;

// Save all basic blocks

for (Function::iterator I = f->begin(), IE= f->end(); I != IE; ++I) {

origBB.push_back(&*I);

}

for (std::vector<BasicBlock*>::iterator I = origBB.begin(),

IE =origBB.end();

I != IE; ++I) {

BasicBlock *curr = *I;

// No need to split a 1 inst bb

// Or ones containing a PHI node

if (curr->size() < 2 ||containsPHI(curr)) {

continue;

}

// Check splitN and current BB size

if ((size_t)splitN > curr->size()) {

splitN = curr->size() - 1;

}

// Generate splits point

std::vector<int> test;

for (unsigned i = 1; i <curr->size(); ++i) {

test.push_back(i);

}

// Shuffle

if (test.size() != 1) {

shuffle(test);

std::sort(test.begin(), test.begin() +splitN);

}

// Split

BasicBlock::iterator it = curr->begin();

BasicBlock *toSplit = curr;

int last = 0;

for (int i = 0; i < splitN; ++i) {

for (int j = 0; j < test[i] - last;++j) {

++it;

}

last = test[i];

if(toSplit->size() < 2)

continue;

toSplit = toSplit->splitBasicBlock(it,toSplit->getName() + ".split");

}

++Split;

}

vector数组origBB用于保存所有的block块，遍历origBB，每一个curr，如果它包含的指令数只有1个或者包含PHI节点，则不分割该block，对于待分割的block，首先生成分割点，用test数组存放分割点，用shuffle打乱指令的顺序，使sort函数排序前splitN个数能尽量随机，最后分割block是调用splitBasicBlock函数分割基本块

总结起来就是：按照 -mllvm -split_num=x 指定的参数值，把原BaiscBlock切割成x+1块，新老块之间用无条件跳转指令连起来

这边有一个很直观的图：

指令替换功能

指令替换功能实现在Substitution.cpp，可以看到这边里的函数swtich case只对我们的加减或与异或进行了操作

bool Substitution::substitute(Function*f) {

Function *tmp = f;

// Loop for the number of time we run thepass on the function

int times = ObfTimes;

do {

for (Function::iterator bb =tmp->begin(); bb != tmp->end(); ++bb) {

for (BasicBlock::iterator inst =bb->begin(); inst != bb->end(); ++inst) {

if (inst->isBinaryOp()) {

switch (inst->getOpcode()) {

case BinaryOperator::Add:

// case BinaryOperator::FAdd:

// Substitute with random addoperation

(this->*funcAdd[llvm::cryptoutils->get_range(NUMBER_ADD_SUBST)])(

cast<BinaryOperator>(inst));

++Add;

break;

case BinaryOperator::Sub:

// case BinaryOperator::FSub:

// Substitute with random suboperation

(this->*funcSub[llvm::cryptoutils->get_range(NUMBER_SUB_SUBST)])(

cast<BinaryOperator>(inst));

++Sub;

break;

case BinaryOperator::Mul:

case BinaryOperator::FMul:

//++Mul;

break;

case BinaryOperator::UDiv:

case BinaryOperator::SDiv:

case BinaryOperator::FDiv:

//++Div;

break;

case BinaryOperator::URem:

case BinaryOperator::SRem:

case BinaryOperator::FRem:

//++Rem;

break;

case Instruction::Shl:

//++Shi;

break;

case Instruction::LShr:

//++Shi;

break;

case Instruction::AShr:

//++Shi;

break;

case Instruction::And:

(this->*

funcAnd[llvm::cryptoutils->get_range(2)])(cast<BinaryOperator>(inst));

++And;

break;

case Instruction::Or:

(this->*

funcOr[llvm::cryptoutils->get_range(2)])(cast<BinaryOperator>(inst));

++Or;

break;

case Instruction::Xor:

(this->*

funcXor[llvm::cryptoutils->get_range(2)])(cast<BinaryOperator>(inst));

++Xor;

break;

default:

break;

} // End switch

} // End isBinaryOp

} // End for basickblock

} // End for Function

} while (--times > 0); // for times

return false;

}

这里面操作的方法会有对应的函数的跳转操作，一般我们的case的位置跳转会在我们上面进行了定义

// Implementation of a = b -(-c)

voidSubstitution::addNeg(BinaryOperator *bo) {

BinaryOperator *op = NULL;

// Create sub

if (bo->getOpcode() == Instruction::Add) {

op =BinaryOperator::CreateNeg(bo->getOperand(1), "", bo);

op =

BinaryOperator::Create(Instruction::Sub, bo->getOperand(0), op,"", bo);

// Check signed wrap

//op->setHasNoSignedWrap(bo->hasNoSignedWrap());

//op->setHasNoUnsignedWrap(bo->hasNoUnsignedWrap());

bo->replaceAllUsesWith(op);

}/* else {

op =BinaryOperator::CreateFNeg(bo->getOperand(1), "", bo);

op =BinaryOperator::Create(Instruction::FSub, bo->getOperand(0), op,"",

bo);

}*/

}

// Implementation of a = -(-b +(-c))

voidSubstitution::addDoubleNeg(BinaryOperator *bo) {

BinaryOperator *op, *op2 = NULL;

if (bo->getOpcode() == Instruction::Add) {

op = BinaryOperator::CreateNeg(bo->getOperand(0),"", bo);

op2 =BinaryOperator::CreateNeg(bo->getOperand(1), "", bo);

op =BinaryOperator::Create(Instruction::Add, op, op2, "", bo);

op = BinaryOperator::CreateNeg(op,"", bo);

// Check signed wrap

//op->setHasNoSignedWrap(bo->hasNoSignedWrap());

//op->setHasNoUnsignedWrap(bo->hasNoUnsignedWrap());

} else {

op =BinaryOperator::CreateFNeg(bo->getOperand(0), "", bo);

op2 =BinaryOperator::CreateFNeg(bo->getOperand(1), "", bo);

op =BinaryOperator::Create(Instruction::FAdd, op, op2, "", bo);

op = BinaryOperator::CreateFNeg(op,"", bo);

}

bo->replaceAllUsesWith(op);

}

// Implementation of r = rand (); a = b + r; a = a + c; a = a - r

voidSubstitution::addRand(BinaryOperator *bo) {

BinaryOperator *op = NULL;

if (bo->getOpcode() == Instruction::Add) {

Type *ty = bo->getType();

ConstantInt *co =

(ConstantInt *)ConstantInt::get(ty,llvm::cryptoutils->get_uint64_t());

op =

BinaryOperator::Create(Instruction::Add,bo->getOperand(0), co, "", bo);

op =

BinaryOperator::Create(Instruction::Add, op, bo->getOperand(1),"", bo);

op =BinaryOperator::Create(Instruction::Sub, op, co, "", bo);

// Check signed wrap

//op->setHasNoSignedWrap(bo->hasNoSignedWrap());

//op->setHasNoUnsignedWrap(bo->hasNoUnsignedWrap());

bo->replaceAllUsesWith(op);

}

/* else {

Type *ty = bo->getType();

ConstantFP *co =

(ConstantFP*)ConstantFP::get(ty,(float)llvm::cryptoutils->get_uint64_t());

op =BinaryOperator::Create(Instruction::FAdd,bo->getOperand(0),co,"",bo);

op =BinaryOperator::Create(Instruction::FAdd,op,bo->getOperand(1),"",bo);

op = BinaryOperator::Create(Instruction::FSub,op,co,"",bo);

} */

}

// Implementation of r = rand(); a = b - r; a = a + b; a = a + r

voidSubstitution::addRand2(BinaryOperator *bo) {

BinaryOperator *op = NULL;

if (bo->getOpcode() == Instruction::Add) {

Type *ty = bo->getType();

ConstantInt *co =

(ConstantInt *)ConstantInt::get(ty,llvm::cryptoutils->get_uint64_t());

op =

BinaryOperator::Create(Instruction::Sub, bo->getOperand(0), co,"", bo);

op =

BinaryOperator::Create(Instruction::Add,op, bo->getOperand(1), "", bo);

op =BinaryOperator::Create(Instruction::Add, op, co, "", bo);

// Check signed wrap

//op->setHasNoSignedWrap(bo->hasNoSignedWrap());

//op->setHasNoUnsignedWrap(bo->hasNoUnsignedWrap());

bo->replaceAllUsesWith(op);

}

/* else {

Type *ty = bo->getType();

ConstantFP *co =

(ConstantFP*)ConstantFP::get(ty,(float)llvm::cryptoutils->get_uint64_t());

op =BinaryOperator::Create(Instruction::FAdd,bo->getOperand(0),co,"",bo);

op =BinaryOperator::Create(Instruction::FAdd,op,bo->getOperand(1),"",bo);

op =BinaryOperator::Create(Instruction::FSub,op,co,"",bo);

} */

}

// Implementation of a = b +(-c)

voidSubstitution::subNeg(BinaryOperator *bo) {

BinaryOperator *op = NULL;

if (bo->getOpcode() == Instruction::Sub) {

op =BinaryOperator::CreateNeg(bo->getOperand(1), "", bo);

op =

BinaryOperator::Create(Instruction::Add, bo->getOperand(0), op,"", bo);

// Check signed wrap

//op->setHasNoSignedWrap(bo->hasNoSignedWrap());

//op->setHasNoUnsignedWrap(bo->hasNoUnsignedWrap());

} else {

op =BinaryOperator::CreateFNeg(bo->getOperand(1), "", bo);

op =BinaryOperator::Create(Instruction::FAdd, bo->getOperand(0), op,"",

bo);

}

bo->replaceAllUsesWith(op);

}

// Implementation of r = rand (); a = b + r; a = a - c; a = a - r

voidSubstitution::subRand(BinaryOperator *bo) {

BinaryOperator *op = NULL;

if (bo->getOpcode() == Instruction::Sub) {

Type *ty = bo->getType();

ConstantInt *co =

(ConstantInt *)ConstantInt::get(ty,llvm::cryptoutils->get_uint64_t());

op =

BinaryOperator::Create(Instruction::Add, bo->getOperand(0), co,"", bo);

op =

BinaryOperator::Create(Instruction::Sub,op, bo->getOperand(1), "", bo);

op =BinaryOperator::Create(Instruction::Sub, op, co, "", bo);

// Check signed wrap

//op->setHasNoSignedWrap(bo->hasNoSignedWrap());

//op->setHasNoUnsignedWrap(bo->hasNoUnsignedWrap());

bo->replaceAllUsesWith(op);

}

/* else {

Type *ty = bo->getType();

ConstantFP *co =

(ConstantFP*)ConstantFP::get(ty,(float)llvm::cryptoutils->get_uint64_t());

op =BinaryOperator::Create(Instruction::FAdd,bo->getOperand(0),co,"",bo);

op =BinaryOperator::Create(Instruction::FSub,op,bo->getOperand(1),"",bo);

op =BinaryOperator::Create(Instruction::FSub,op,co,"",bo);

} */

}

// Implementation of r = rand (); a = b - r; a = a - c; a = a + r

voidSubstitution::subRand2(BinaryOperator *bo) {

BinaryOperator *op = NULL;

if (bo->getOpcode() == Instruction::Sub) {

Type *ty = bo->getType();

ConstantInt *co =

(ConstantInt *)ConstantInt::get(ty,llvm::cryptoutils->get_uint64_t());

op =

BinaryOperator::Create(Instruction::Sub, bo->getOperand(0), co,"", bo);

op =

BinaryOperator::Create(Instruction::Sub, op, bo->getOperand(1),"", bo);

op = BinaryOperator::Create(Instruction::Add,op, co, "", bo);

// Check signed wrap

//op->setHasNoSignedWrap(bo->hasNoSignedWrap());

//op->setHasNoUnsignedWrap(bo->hasNoUnsignedWrap());

bo->replaceAllUsesWith(op);

}

/* else {

Type *ty = bo->getType();

ConstantFP *co =

(ConstantFP*)ConstantFP::get(ty,(float)llvm::cryptoutils->get_uint64_t());

op =BinaryOperator::Create(Instruction::FSub,bo->getOperand(0),co,"",bo);

op =BinaryOperator::Create(Instruction::FSub,op,bo->getOperand(1),"",bo);

op =BinaryOperator::Create(Instruction::FAdd,op,co,"",bo);

} */

}

// Implementation of a = b& c => a = (b^~c)& b

voidSubstitution::andSubstitution(BinaryOperator *bo) {

BinaryOperator *op = NULL;

// Create NOT on second operand => ~c

op =BinaryOperator::CreateNot(bo->getOperand(1), "", bo);

// Create XOR => (b^~c)

BinaryOperator *op1 =

BinaryOperator::Create(Instruction::Xor,bo->getOperand(0), op, "", bo);

// Create AND => (b^~c) & b

op = BinaryOperator::Create(Instruction::And,op1, bo->getOperand(0), "", bo);

bo->replaceAllUsesWith(op);

}

// Implementation of a = a&& b <=> !(!a | !b) && (r | !r)

voidSubstitution::andSubstitutionRand(BinaryOperator *bo) {

// Copy of the BinaryOperator type to createthe random number with the

// same type of the operands

Type *ty = bo->getType();

// r (Random number)

ConstantInt *co =

(ConstantInt *)ConstantInt::get(ty,llvm::cryptoutils->get_uint64_t());

// !a

BinaryOperator *op =BinaryOperator::CreateNot(bo->getOperand(0), "", bo);

// !b

BinaryOperator *op1 =BinaryOperator::CreateNot(bo->getOperand(1), "", bo);

// !r

BinaryOperator *opr =BinaryOperator::CreateNot(co, "", bo);

// (!a | !b)

BinaryOperator *opa =

BinaryOperator::Create(Instruction::Or,op, op1, "", bo);

// (r | !r)

opr = BinaryOperator::Create(Instruction::Or,co, opr, "", bo);

// !(!a | !b)

op = BinaryOperator::CreateNot(opa,"", bo);

// !(!a | !b) && (r | !r)

op = BinaryOperator::Create(Instruction::And,op, opr, "", bo);

// We replace all the old AND operators withthe new one transformed

bo->replaceAllUsesWith(op);

}

// Implementation of a = b | c=> a = (b & c) | (b ^ c)

voidSubstitution::orSubstitutionRand(BinaryOperator *bo) {

Type *ty = bo->getType();

ConstantInt *co =

(ConstantInt *)ConstantInt::get(ty,llvm::cryptoutils->get_uint64_t());

// !a

BinaryOperator *op =BinaryOperator::CreateNot(bo->getOperand(0), "", bo);

// !b

BinaryOperator *op1 =BinaryOperator::CreateNot(bo->getOperand(1), "", bo);

// !r

BinaryOperator *op2 =BinaryOperator::CreateNot(co, "", bo);

// !a && r

BinaryOperator *op3 =

BinaryOperator::Create(Instruction::And,op, co, "", bo);

// a && !r

BinaryOperator *op4 =

BinaryOperator::Create(Instruction::And,bo->getOperand(0), op2, "", bo);

// !b && r

BinaryOperator *op5 =

BinaryOperator::Create(Instruction::And,op1, co, "", bo);

// b && !r

BinaryOperator *op6 =

BinaryOperator::Create(Instruction::And,bo->getOperand(1), op2, "", bo);

// (!a && r) || (a && !r)

op3 = BinaryOperator::Create(Instruction::Or,op3, op4, "", bo);

// (!b && r) ||(b && !r)

op4 = BinaryOperator::Create(Instruction::Or,op5, op6, "", bo);

// (!a && r) || (a && !r) ^(!b && r) ||(b && !r)

op5 = BinaryOperator::Create(Instruction::Xor,op3, op4, "", bo);

// !a || !b

op3 = BinaryOperator::Create(Instruction::Or,op, op1, "", bo);

// !(!a || !b)

op3 = BinaryOperator::CreateNot(op3,"", bo);

// r || !r

op4 = BinaryOperator::Create(Instruction::Or,co, op2, "", bo);

// !(!a|| !b) && (r || !r)

op4 =BinaryOperator::Create(Instruction::And, op3, op4, "", bo);

// [(!a && r) || (a && !r) ^(!b && r) ||(b && !r) ] || [!(!a || !b) && (r ||

// !r)]

op = BinaryOperator::Create(Instruction::Or,op5, op4, "", bo);

bo->replaceAllUsesWith(op);

}

voidSubstitution::orSubstitution(BinaryOperator *bo) {

BinaryOperator *op = NULL;

// Creating first operand (b & c)

op = BinaryOperator::Create(Instruction::And,bo->getOperand(0),

bo->getOperand(1),"", bo);

// Creating second operand (b ^ c)

BinaryOperator *op1 = BinaryOperator::Create(

Instruction::Xor, bo->getOperand(0),bo->getOperand(1), "", bo);

// final op

op = BinaryOperator::Create(Instruction::Or,op, op1, "", bo);

bo->replaceAllUsesWith(op);

}

// Implementation of a = a ~ b=> a = (!a && b) || (a && !b)

voidSubstitution::xorSubstitution(BinaryOperator *bo) {

BinaryOperator *op = NULL;

// Create NOT on first operand

op = BinaryOperator::CreateNot(bo->getOperand(0),"", bo); // !a

// Create AND

op = BinaryOperator::Create(Instruction::And,bo->getOperand(1), op, "",

bo); // !a&& b

// Create NOT on second operand

BinaryOperator *op1 =

BinaryOperator::CreateNot(bo->getOperand(1), "", bo); // !b

// Create AND

op1 =BinaryOperator::Create(Instruction::And, bo->getOperand(0), op1,"",

bo); // a&& !b

// Create OR

op = BinaryOperator::Create(Instruction::Or,op, op1, "",

bo); // (!a&& b) || (a && !b)

bo->replaceAllUsesWith(op);

}

// implementation of a = a ^ b<=> (a ^ r) ^ (b ^ r) <=> (!a && r || a && !r) ^

// (!b && r || b&& !r)

// note : r is a random number

voidSubstitution::xorSubstitutionRand(BinaryOperator *bo) {

BinaryOperator *op = NULL;

Type *ty = bo->getType();

ConstantInt *co =

(ConstantInt *)ConstantInt::get(ty,llvm::cryptoutils->get_uint64_t());

// !a

op = BinaryOperator::CreateNot(bo->getOperand(0),"", bo);

// !a && r

op = BinaryOperator::Create(Instruction::And,co, op, "", bo);

// !r

BinaryOperator *opr =BinaryOperator::CreateNot(co, "", bo);

// a && !r

BinaryOperator *op1 =

BinaryOperator::Create(Instruction::And,bo->getOperand(0), opr, "", bo);

// !b

BinaryOperator *op2 =BinaryOperator::CreateNot(bo->getOperand(1), "", bo);

// !b && r

op2 =BinaryOperator::Create(Instruction::And, op2, co, "", bo);

// b && !r

BinaryOperator *op3 =

BinaryOperator::Create(Instruction::And,bo->getOperand(1), opr, "", bo);

// (!a && r) || (a && !r)

op = BinaryOperator::Create(Instruction::Or,op, op1, "", bo);

// (!b && r) || (b && !r)

op1 = BinaryOperator::Create(Instruction::Or,op2, op3, "", bo);

// (!a && r) || (a && !r) ^(!b && r) || (b && !r)

op = BinaryOperator::Create(Instruction::Xor,op, op1, "", bo);

bo->replaceAllUsesWith(op);

这个就是我们的指令替换的一个转换

控制流平坦功能

控制流平坦功能实现在Flattening.cpp中

可以看到还是继承了FunctionPass，可以直接去看一下runOnFunction

namespace {

struct Flattening : publicFunctionPass {

static char ID; // Pass identification, replacement fortypeid

bool flag;

Flattening() : FunctionPass(ID) {}

Flattening(bool flag) : FunctionPass(ID) {this->flag = flag; }

bool runOnFunction(Function &F);

bool flatten(Function *f);

};

}

我们和原来的toObfuscate的函数调用一样，还是去看是否传入了 -mllvm -fla参数

boolFlattening::runOnFunction(Function &F) {

Function *tmp = &F;

// Do we obfuscate

if (toObfuscate(flag, tmp, "fla")){

if (flatten(tmp)) {

++Flattened;

}

return false;

功能实现在flatten函数

boolFlattening::flatten(Function *f) {

vector<BasicBlock *> origBB;

BasicBlock *loopEntry;

BasicBlock *loopEnd;

LoadInst *load;

SwitchInst *switchI;

AllocaInst *switchVar;

// SCRAMBLER

//llvm::cryptoutils->get_bytes产生一个随机种子

char scrambling_key[16];

llvm::cryptoutils->get_bytes(scrambling_key, 16);

// END OF SCRAMBLER

// Lower switch

//调用了一个外部Pass,LowerSwitch这个Pass,内部逻辑是消除了当前函数中的switch方式组织的代码，抓换成if else这种分支调用，方便后面进行代码块分割，从而进行平坦化操作

FunctionPass *lower =createLowerSwitchPass();

lower->runOnFunction(*f);

// Save all original BB

//遍历当前函数中的所有BasicBlock，并保存到origBB vector数组

for (Function::iterator i = f->begin(); i!= f->end(); ++i) {

BasicBlock *tmp = &*i;

origBB.push_back(tmp);

BasicBlock *bb = &*i;

if (isa<InvokeInst>(bb->getTerminator())){

return false;

}

// Nothing to flatten

if (origBB.size() <= 1) {

return false;

}

// Remove first BB

//把第一块BasicBlock从origBB这个vector中移除了，因为按照流程平坦化的设计，第一块是单独处理的，作为整个混淆流程的开始逻辑

origBB.erase(origBB.begin());

// Get a pointer on the first BB

Function::iterator tmp = f->begin(); //++tmp;

BasicBlock *insert = &*tmp;

// If main begin with an if

//查第一块中是否包含条件跳转分支，如果包含条件跳转分支，则按照条件分支的位置进行代码块分割，分割逻辑跟SplitBasicBlock的逻辑一致

BranchInst *br = NULL;

if(isa<BranchInst>(insert->getTerminator())) {

br =cast<BranchInst>(insert->getTerminator());

}

if ((br != NULL &&br->isConditional()) ||

insert->getTerminator()->getNumSuccessors() > 1) {

BasicBlock::iterator i = insert->end();

--i;

if (insert->size() > 1) {

--i;

}

BasicBlock *tmpBB =insert->splitBasicBlock(i, "first");

origBB.insert(origBB.begin(), tmpBB);

}

// Remove jump

insert->getTerminator()->eraseFromParent();

// Create switch variable and set as it

//创建了一个switch用的变量，相当于switch(caseVar) 这个语句中的caseVar, 并通过StoreInst赋予初始值，初始值通过llvm::cryptoutils->scramble32(0,scrambling_key))生成，其中scrambling_key就是函数开头生成的随机种子

switchVar =

newAllocaInst(Type::getInt32Ty(f->getContext()), 0, "switchVar",insert);

new StoreInst(

ConstantInt::get(Type::getInt32Ty(f->getContext()),

llvm::cryptoutils->scramble32(0, scrambling_key)),

switchVar, insert);

// Create main loop

//创建了一个代码块loopEntry, 空代码块

loopEntry =BasicBlock::Create(f->getContext(), "loopEntry", f, insert);

//创建了一个代码块loopEnd, 空代码块

loopEnd =BasicBlock::Create(f->getContext(), "loopEnd", f, insert);

//创建了一条load指令，load是上面创建的switchVar, 并把这句代码放入loopEntry

load = new LoadInst(switchVar,"switchVar", loopEntry);

// Move first BB on top

//把函数第一个BasicBlock和loopEntry用一个无条件跳转指令连起来

insert->moveBefore(loopEntry);

BranchInst::Create(loopEntry, insert);

// loopEnd jump to loopEntry

// loopEntry, loopEnd用无条件跳转连接起来

BranchInst::Create(loopEntry, loopEnd);

//创建了一个BasicBlock, 并用无条件跳转指令连接起来

BasicBlock *swDefault =

BasicBlock::Create(f->getContext(),"switchDefault", f, loopEnd);

BranchInst::Create(loopEnd, swDefault);

//Create switch instruction itself and set condition

//创建了一个switch结构，并放入到loopEntry代码块中

switchI =SwitchInst::Create(&*f->begin(), swDefault, 0, loopEntry);

switchI->setCondition(load);

// Remove branch jump from 1st BB and make ajump to the while

f->begin()->getTerminator()->eraseFromParent();

BranchInst::Create(loopEntry,&*f->begin());

// Put all BB in the switch

for (vector<BasicBlock *>::iterator b =origBB.begin(); b != origBB.end();

++b) {

BasicBlock *i = *b;

ConstantInt *numCase = NULL;

// Move the BB inside the switch (onlyvisual, no code logic)

i->moveBefore(loopEnd);

// Add case to switch

numCase =cast<ConstantInt>(ConstantInt::get(

switchI->getCondition()->getType(),

llvm::cryptoutils->scramble32(switchI->getNumCases(),scrambling_key)));

switchI->addCase(numCase, i);

}

// Recalculate switchVar

for (vector<BasicBlock *>::iterator b =origBB.begin(); b != origBB.end();

++b) {

BasicBlock *i = *b;

ConstantInt *numCase = NULL;

// Ret BB

if(i->getTerminator()->getNumSuccessors() == 0) {

continue;

}

// If it's a non-conditional jump

if(i->getTerminator()->getNumSuccessors() == 1) {

// Get successor and delete terminator

BasicBlock *succ =i->getTerminator()->getSuccessor(0);

i->getTerminator()->eraseFromParent();

// Get next case

numCase = switchI->findCaseDest(succ);

// If next case == default case(switchDefault)

if (numCase == NULL) {

numCase = cast<ConstantInt>(

ConstantInt::get(switchI->getCondition()->getType(),

llvm::cryptoutils->scramble32(

switchI->getNumCases() - 1, scrambling_key)));

}

// Update switchVar and jump to the endof loop

new StoreInst(numCase,load->getPointerOperand(), i);

BranchInst::Create(loopEnd, i);

continue;

}

// If it's a conditional jump

if(i->getTerminator()->getNumSuccessors() == 2) {

// Get next cases

ConstantInt *numCaseTrue =

switchI->findCaseDest(i->getTerminator()->getSuccessor(0));

ConstantInt *numCaseFalse =

switchI->findCaseDest(i->getTerminator()->getSuccessor(1));

// Check if next case == default case(switchDefault)

if (numCaseTrue == NULL) {

numCaseTrue = cast<ConstantInt>(

ConstantInt::get(switchI->getCondition()->getType(),

llvm::cryptoutils->scramble32(

switchI->getNumCases() - 1, scrambling_key)));

}

if (numCaseFalse == NULL) {

numCaseFalse = cast<ConstantInt>(

ConstantInt::get(switchI->getCondition()->getType(),

llvm::cryptoutils->scramble32(

switchI->getNumCases() - 1, scrambling_key)));

}

// Create a SelectInst

BranchInst *br =cast<BranchInst>(i->getTerminator());

SelectInst *sel =

SelectInst::Create(br->getCondition(), numCaseTrue, numCaseFalse,"",

i->getTerminator());

// Erase terminator

i->getTerminator()->eraseFromParent();

// Update switchVar and jump to the endof loop

new StoreInst(sel,load->getPointerOperand(), i);

BranchInst::Create(loopEnd, i);

continue;

}

最后的形成就是上面的平坦化的功能

数据库

文章转载自启明星辰金睛安全研究团队，如果涉嫌侵权，请发送邮件至：contact@modb.pro进行举报，并提供相关证据，一经查实，墨天轮将立刻删除相关内容。

olllvm学习笔记

评论