LLVM(Low Level Virtual Machine)的设计理念

  • 统一(LLVM IR)

编译器通常分成三部分:

  1. 前端:对源码进行不完全处理 ->
  2. 中端:对前端产物优化
  3. 后端:得到机器码

llvm pass
llvm pass -> llvm IR 处理

  • pass的基本类型:
  1. 分析型pass
  2. 转换型pass
  3. 实用型pass *
  • pass的处理单位:
  1. 处理函数:FounctionPass
  2. 处理模块:ModulePass
  3. 处理单个基本块:BasicBlockPass
  4. 处理循环:LoopPass

clang和llvm

1
2
3
4
5
6
# Ubuntu-18.04
sudo apt install clang-8
sudo apt install llvm-8

sudo apt install clang-10
sudo apt install llvm-10

编译

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# pass
# 编译
clang-10 -g -c -fPIC -fno-rtti ./Hello.cpp -o Hello.o $(llvm-config-10 --cxxflags)
# 链接
clang-10 -shared -Wl,-znodelete Hello.o $(llvm-config-10 --ldflags --libs --system-libs) -o LLVMHello.so
# 编译链接
clang-10 -g -fPIC -fno-rtti -shared -Wl,-znodelete ./Hello.cpp -o LLVMHello.so $(llvm-config-10 --cxxflags --ldflags --libs --system-libs)
# 不同文件之间的转换
# .c -> .ll:
clang-8 -emit-llvm -S pwwn.c -o pwwn.ll
#.c -> .bc: 
clang-8 -emit-llvm -c pwwn.c -o pwwn.bc
#.ll -> .bc: 
llvm-as a.ll -o a.bc
#.bc -> .ll: 
llvm-dis a.bc -o a.ll
#.bc -> .s: 
llc a.bc -o a.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
// Hello.cpp  
#include "llvm/Pass.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
using namespace llvm;
 
namespace {
  struct Hello : public FunctionPass {
    static char ID;
    Hello() : FunctionPass(ID) {}
    bool runOnFunction(Function &F) override 
    {
      errs() << "Hello: ";
      errs().write_escaped(F.getName()) << '\n';
      SymbolTableList<BasicBlock>::const_iterator bbEnd = F.end();
      for(SymbolTableList<BasicBlock>::const_iterator bbIter = F.begin(); 
bbIter != bbEnd; ++bbIter)
{
         SymbolTableList<Instruction>::const_iterator instIter = bbIter->begin();
         SymbolTableList<Instruction>::const_iterator instEnd  = bbIter->end();
         for(; instIter != instEnd; ++instIter)
         {
            errs() << "OpcodeName = " << instIter->getOpcodeName() 
            << " NumOperands = " << instIter->getNumOperands() << "\n";
            if (instIter->getOpcode() == 56)
            {
                if(const CallInst* call_inst = dyn_cast<CallInst>(instIter)) 
                {
                    errs() << call_inst->getCalledFunction()->getName() << "\n";
                    for (int i = 0; i < instIter->getNumOperands()-1; i++)
                    {
                        if (isa<ConstantInt>(call_inst->getOperand(i)))
                        {
                            errs() << "Operand " << i << " = " 
                            << dyn_cast<ConstantInt>
                            (call_inst->getArgOperand(i))->getZExtValue() << "\n";
                        }
                    }
                }
            }
         }
      }
      return false;
    }
  };
}
 
char Hello::ID = 0;
 
// Register for opt
static RegisterPass<Hello> X("Hello""Hello World Pass");
 
// Register for clang
static RegisterStandardPasses Y(PassManagerBuilder::EP_EarlyAsPossible,
  [](const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) {
    PM.add(new Hello());
  });
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
// LLVMHello.cpp
#include "llvm/IR/Function.h"
#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"

using namespace llvm;

namespace {
struct LLVMHello : public FunctionPass {
static char ID;
LLVMHello() : FunctionPass(ID) {}

bool runOnFunction(Function &F) override {
errs() << "Hello: " << F.getName() << '\n';
return false; // 如果没有修改函数,返回false
}
};
}

char LLVMHello::ID = 0;
static RegisterPass<LLVMHello> X("llvm-hello", "LLVM Hello World Pass");

优化

1
opt-10 -load ./LLVMHello.so -"hello" ./test.ll -S -o test_new.ll

gdb调试

1
2
3
4
gdb ./opt-8
set args -load ./VMPass.so -"VMPass" ./pwwn.ll
# 0x4b8d60
# 0x7ffff2396000

流程

  • 源码 → Clang 前端 → LLVM IR → opt 优化 → llc 后端 → 汇编代码 → 汇编器 → 可执行文件

逆向

.data.rel.ro的最后一个函数就是RunOnFunction
1754196824976
![[Pasted image 20250803125108.png]]

RedHat2021-simpleVM

1
2
find /usr/include -name "Instruction.def"
cat ./llvm-8/llvm/IR/Instruction.def | grep 55

HANDLE_OTHER_INST(55, Call , CallInst ) // Call a function
….