[root@memzero]# ls

2022/07/07 - Jit C in memory using LLVM ORC api

EDIT:

Based on the in-memory compiler shared in the last post (C to LLVM IR in memory using libclang), this post demonstrates a small just in time (JIT) compiler which allows to compile C code to host native code in-memory.

The JIT compiler is based on the LLVM ORCv2 API (the newest LLVM JIT API at the time of writing) and the crucial parts are taken from the JIT tutorial.

The sources are available under llvm-orc-jit.

main.cc

#include "ccompiler.h"
#include "jit.h"

int main() {
  const char code[] =
      "extern void libc_puts(const char*);"
      "struct S { int a; int b; };"
      "static void init_a(struct S* s) { s->a = 1111; }"
      "static void init_b(struct S* s) { s->b = 2222; }"
      "void init(struct S* s) {"
      "init_a(s); init_b(s);"
      "libc_puts(\"libc_puts()\"); }";

  auto R = cc::CCompiler().compile(code);
  // Abort if compilation failed.
  auto [C, M] = cantFail(std::move(R));
  // M->print(llvm::errs(), nullptr);

  // -- JIT compiler the IR module.

  llvm::InitializeNativeTarget();
  llvm::InitializeNativeTargetAsmPrinter();

  auto JIT = jit::Jit::Create();
  auto TSM = llvm::orc::ThreadSafeModule(std::move(M), std::move(C));

  auto RT = JIT->addModule(std::move(TSM));
  if (auto E = RT.takeError()) {
    llvm::errs() << llvm::toString(std::move(E)) << '\n';
    return 1;
  }

  if (auto ADDR = JIT->lookup("init")) {
    std::printf("JIT ADDR 0x%lx\n", (*ADDR).getAddress().getValue());

    struct S {
      int a, b;
    } state = {0, 0};
    auto JIT_FN = (*ADDR).getAddress().toPtr<void(struct S*)>();

    std::printf("S { a=%d b=%d }\n", state.a, state.b);
    JIT_FN(&state);
    std::printf("S { a=%d b=%d }\n", state.a, state.b);
  }

  // Remove jitted code tracked by this RT.
  cantFail((*RT)->remove());

  if (auto E = JIT->lookup("init").takeError()) {
    // In ERROR state, as expected, consume the error.
    llvm::consumeError(std::move(E));
  } else {
    // In SUCCESS state, not expected as code was dropped.
    llvm::errs() << "Expected error, we removed code tracked by RT and "
                    "hence 'init' should be "
                    "removed from the JIT!\n";
  }

  return 0;
}

jit.h

#ifndef JIT_H
#define JIT_H

#include <llvm/ExecutionEngine/JITSymbol.h>
#include <llvm/ExecutionEngine/Orc/CompileUtils.h>
#include <llvm/ExecutionEngine/Orc/Core.h>
#include <llvm/ExecutionEngine/Orc/ExecutionUtils.h>
#include <llvm/ExecutionEngine/Orc/ExecutorProcessControl.h>
#include <llvm/ExecutionEngine/Orc/IRCompileLayer.h>
#include <llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h>
#include <llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h>
#include <llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h>
#include <llvm/ExecutionEngine/SectionMemoryManager.h>
#include <llvm/IR/DataLayout.h>
#include <llvm/IR/LLVMContext.h>

namespace jit {
using llvm::cantFail;
using llvm::DataLayout;
using llvm::Expected;
using llvm::JITEvaluatedSymbol;
using llvm::JITSymbolFlags;
using llvm::SectionMemoryManager;
using llvm::StringRef;

using llvm::orc::ConcurrentIRCompiler;
// using llvm::orc::DynamicLibrarySearchGenerator;
using llvm::orc::ExecutionSession;
using llvm::orc::ExecutorAddr;
using llvm::orc::ExecutorSymbolDef;
using llvm::orc::IRCompileLayer;
using llvm::orc::JITDylib;
using llvm::orc::JITTargetMachineBuilder;
using llvm::orc::MangleAndInterner;
using llvm::orc::ResourceTrackerSP;
using llvm::orc::RTDyldObjectLinkingLayer;
using llvm::orc::SelfExecutorProcessControl;
using llvm::orc::ThreadSafeModule;

// Simple JIT engine based on the KaleidoscopeJIT.
// https://www.llvm.org/docs/tutorial/BuildingAJIT1.html
class Jit {
private:
  std::unique_ptr<ExecutionSession> ES;

  DataLayout DL;
  MangleAndInterner Mangle;

  RTDyldObjectLinkingLayer ObjectLayer;
  IRCompileLayer CompileLayer;

  JITDylib& JD;

public:
  Jit(std::unique_ptr<ExecutionSession> ES,
      JITTargetMachineBuilder JTMB,
      DataLayout DL)
      : ES(std::move(ES)),
        DL(std::move(DL)),
        Mangle(*this->ES, this->DL),
        ObjectLayer(*this->ES,
                    []() { return std::make_unique<SectionMemoryManager>(); }),
        CompileLayer(*this->ES,
                     ObjectLayer,
                     std::make_unique<ConcurrentIRCompiler>(std::move(JTMB))),
        JD(this->ES->createBareJITDylib("main")) {
    // https://www.llvm.org/docs/ORCv2.html#how-to-add-process-and-library-symbols-to-jitdylibs
    // JD.addGenerator(
    //     cantFail(DynamicLibrarySearchGenerator::GetForCurrentProcess(
    //         DL.getGlobalPrefix())));
    cantFail(JD.define(llvm::orc::absoluteSymbols(
        {{Mangle("libc_puts"),
          {ExecutorAddr::fromPtr(&puts), JITSymbolFlags::Exported}}})));
  }

  ~Jit() {
    if (auto Err = ES->endSession())
      ES->reportError(std::move(Err));
  }

  static std::unique_ptr<Jit> Create() {
    auto EPC = cantFail(SelfExecutorProcessControl::Create());
    auto ES = std::make_unique<ExecutionSession>(std::move(EPC));

    JITTargetMachineBuilder JTMB(
        ES->getExecutorProcessControl().getTargetTriple());

    auto DL = cantFail(JTMB.getDefaultDataLayoutForTarget());

    return std::make_unique<Jit>(std::move(ES), std::move(JTMB), std::move(DL));
  }

  Expected<ResourceTrackerSP> addModule(ThreadSafeModule TSM) {
    auto RT = JD.createResourceTracker();
    if (auto E = CompileLayer.add(RT, std::move(TSM))) {
      return E;
    }
    return RT;
  }

  Expected<ExecutorSymbolDef> lookup(StringRef Name) {
    return ES->lookup({&JD}, Mangle(Name.str()));
  }
};

}  // namespace jit

#endif

compiler.h

#ifndef CCOMPILER_H
#define CCOMPILER_H

#include <clang/Basic/DiagnosticOptions.h>
#include <clang/CodeGen/CodeGenAction.h>
#include <clang/Frontend/CompilerInstance.h>
#include <clang/Frontend/TextDiagnosticPrinter.h>
#include <clang/Lex/PreprocessorOptions.h>

#include <llvm/IR/Module.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/TargetParser/Host.h>

namespace cc {

using clang::CompilerInstance;
using clang::CompilerInvocation;
using clang::DiagnosticConsumer;
using clang::DiagnosticOptions;
using clang::DiagnosticsEngine;
using clang::EmitLLVMOnlyAction;
using clang::TextDiagnosticPrinter;

using llvm::Expected;
using llvm::IntrusiveRefCntPtr;
using llvm::LLVMContext;
using llvm::MemoryBuffer;
using llvm::Module;
using llvm::StringError;

class CCompiler {
public:
  CCompiler() {
    // Setup custom diagnostic options.
    auto DO = IntrusiveRefCntPtr<DiagnosticOptions>(new DiagnosticOptions());
    DO->ShowColors = 1;

    // Setup stderr custom diagnostic consumer.
    DC = std::make_unique<TextDiagnosticPrinter>(llvm::errs(), DO.get());

    // Create custom diagnostics engine.
    // The engine will NOT take ownership of the DiagnosticConsumer object.
    DE = std::make_unique<DiagnosticsEngine>(
        nullptr /* DiagnosticIDs */, std::move(DO), DC.get(),
        false /* own DiagnosticConsumer */);
  }

  struct CompileResult {
    std::unique_ptr<LLVMContext> C;
    std::unique_ptr<Module> M;
  };

  Expected<CompileResult> compile(const char* code) const {
    using std::errc;
    const auto err = [](errc ec) { return std::make_error_code(ec); };

    const char code_fname[] = "jit.c";

    // Create compiler instance.
    CompilerInstance CC;

    // Setup compiler invocation.
    bool ok = CompilerInvocation::CreateFromArgs(CC.getInvocation(),
                                                 {code_fname}, *DE);
    // We control the arguments, so we assert.
    assert(ok);

    // Setup custom diagnostic printer.
    CC.createDiagnostics(DC.get(), false /* own DiagnosticConsumer */);

    // Configure remapping from pseudo file name to in-memory code buffer
    // code_fname -> code_buffer.
    //
    // PreprocessorOptions take ownership of MemoryBuffer.
    CC.getPreprocessorOpts().addRemappedFile(
        code_fname, MemoryBuffer::getMemBuffer(code).release());

    // Configure codegen options.
    auto& CG = CC.getCodeGenOpts();
    CG.OptimizationLevel = 3;
    CG.setInlining(clang::CodeGenOptions::NormalInlining);

    // Generate LLVM IR.
    EmitLLVMOnlyAction A;
    if (!CC.ExecuteAction(A)) {
      return llvm::make_error<StringError>(
          "Failed to generate LLVM IR from C code!",
          err(errc::invalid_argument));
    }

    // Take generated LLVM IR module and the LLVMContext.
    auto M = A.takeModule();
    auto C = std::unique_ptr<LLVMContext>(A.takeLLVMContext());

    // TODO: Can this become nullptr when the action succeeds?
    assert(M);

    return CompileResult{std::move(C), std::move(M)};
  }

private:
  std::unique_ptr<DiagnosticConsumer> DC;
  std::unique_ptr<DiagnosticsEngine> DE;
};

}  // namespace cc

#endif

CMakeLists.txt

cmake_minimum_required(VERSION 3.15)
project(llvm-orc-jit)

add_executable(main main.cc)

# Enable warnings / warnings as errors.
target_compile_options(main PRIVATE -Wall -Wextra -Werror)

# -- LLVM/CLANG ----------------------------------------------------------------

find_package(Clang REQUIRED CONFIG HINTS "${CLANG_INSTALL_PREFIX}/lib/cmake/clang")

if (NOT ${CLANG_INSTALL_PREFIX} STREQUAL "/")
    # Treat custom LLVM/CLANG include path as system include path, such that
    # warnings are suppressed for those header files.
    target_include_directories(main SYSTEM PRIVATE ${CLANG_INCLUDE_DIRS})
endif()

target_link_libraries(main clang-cpp)

message(STATUS "Using LLVMConfig.cmake: ${LLVM_CONFIG}")
message(STATUS "LLVM version: ${LLVM_VERSION}")
message(STATUS "Using ClangConfig.cmake: ${Clang_CONFIG}")
message(STATUS "Clang version: ${Clang_VERSION}")

# -- SANITIZER -----------------------------------------------------------------

option(SANITIZER "Enable ASAN/LSAN/UBSAN" ON)

if (SANITIZER)
    target_compile_options(main PRIVATE -fsanitize=address -fsanitize=leak
                                        -fsanitize=undefined -fno-rtti)
    target_link_options(main PRIVATE -fsanitize=address -fsanitize=leak
                                     -fsanitize=undefined)
endif()

The following Makefile provides a convenience wrapper to configure, build, and run the example with a single make invocation.

Additionally, the build-llvm.sh script is provided to build specific LLVM versions.