Files
linux_media/tools/bpf/bpftool/jit_disasm.c
Quentin Monnet eb9d1acf63 bpftool: Add LLVM as default library for disassembling JIT-ed programs
To disassemble instructions for JIT-ed programs, bpftool has relied on
the libbfd library. This has been problematic in the past: libbfd's
interface is not meant to be stable and has changed several times. For
building bpftool, we have to detect how the libbfd version on the system
behaves, which is why we have to handle features disassembler-four-args
and disassembler-init-styled in the Makefile. When it comes to shipping
bpftool, this has also caused issues with several distribution
maintainers unwilling to support the feature (see for example Debian's
page for binutils-dev, which ships libbfd: "Note that building Debian
packages which depend on the shared libbfd is Not Allowed." [0]).

For these reasons, we add support for LLVM as an alternative to libbfd
for disassembling instructions of JIT-ed programs. Thanks to the
preparation work in the previous commits, it's easy to add the library
by passing the relevant compilation options in the Makefile, and by
adding the functions for setting up the LLVM disassembler in file
jit_disasm.c.

The LLVM disassembler requires the LLVM development package (usually
llvm-dev or llvm-devel).

The expectation is that the interface for this disassembler will be more
stable. There is a note in LLVM's Developer Policy [1] stating that the
stability for the C API is "best effort" and not guaranteed, but at
least there is some effort to keep compatibility when possible (which
hasn't really been the case for libbfd so far). Furthermore, the Debian
page for the related LLVM package does not caution against linking to
the lib, as binutils-dev page does.

Naturally, the display of disassembled instructions comes with a few
minor differences. Here is a sample output with libbfd (already
supported before this patch):

    # bpftool prog dump jited id 56
    bpf_prog_6deef7357e7b4530:
       0:   nopl   0x0(%rax,%rax,1)
       5:   xchg   %ax,%ax
       7:   push   %rbp
       8:   mov    %rsp,%rbp
       b:   push   %rbx
       c:   push   %r13
       e:   push   %r14
      10:   mov    %rdi,%rbx
      13:   movzwq 0xb4(%rbx),%r13
      1b:   xor    %r14d,%r14d
      1e:   or     $0x2,%r14d
      22:   mov    $0x1,%eax
      27:   cmp    $0x2,%r14
      2b:   jne    0x000000000000002f
      2d:   xor    %eax,%eax
      2f:   pop    %r14
      31:   pop    %r13
      33:   pop    %rbx
      34:   leave
      35:   ret

LLVM supports several variants that we could set when initialising the
disassembler, for example with:

    LLVMSetDisasmOptions(*ctx,
                         LLVMDisassembler_Option_AsmPrinterVariant);

but the default printer is used for now. Here is the output with LLVM:

    # bpftool prog dump jited id 56
    bpf_prog_6deef7357e7b4530:
       0:   nopl    (%rax,%rax)
       5:   nop
       7:   pushq   %rbp
       8:   movq    %rsp, %rbp
       b:   pushq   %rbx
       c:   pushq   %r13
       e:   pushq   %r14
      10:   movq    %rdi, %rbx
      13:   movzwq  180(%rbx), %r13
      1b:   xorl    %r14d, %r14d
      1e:   orl     $2, %r14d
      22:   movl    $1, %eax
      27:   cmpq    $2, %r14
      2b:   jne     0x2f
      2d:   xorl    %eax, %eax
      2f:   popq    %r14
      31:   popq    %r13
      33:   popq    %rbx
      34:   leave
      35:   retq

The LLVM disassembler comes as the default choice, with libbfd as a
fall-back.

Of course, we could replace libbfd entirely and avoid supporting two
different libraries. One reason for keeping libbfd is that, right now,
it works well, we have all we need in terms of features detection in the
Makefile, so it provides a fallback for disassembling JIT-ed programs if
libbfd is installed but LLVM is not. The other motivation is that libbfd
supports nfp instruction for Netronome's SmartNICs and can be used to
disassemble offloaded programs, something that LLVM cannot do. If
libbfd's interface breaks again in the future, we might reconsider
keeping support for it.

[0] https://packages.debian.org/buster/binutils-dev
[1] https://llvm.org/docs/DeveloperPolicy.html#c-api-changes

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Tested-by: Niklas Söderlund <niklas.soderlund@corigine.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221025150329.97371-7-quentin@isovalent.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-10-25 10:11:56 -07:00

405 lines
8.2 KiB
C

// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/*
* Based on:
*
* Minimal BPF JIT image disassembler
*
* Disassembles BPF JIT compiler emitted opcodes back to asm insn's for
* debugging or verification purposes.
*
* Copyright 2013 Daniel Borkmann <daniel@iogearbox.net>
* Licensed under the GNU General Public License, version 2.0 (GPLv2)
*/
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <stdio.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/stat.h>
#include <limits.h>
#include <bpf/libbpf.h>
#ifdef HAVE_LLVM_SUPPORT
#include <llvm-c/Core.h>
#include <llvm-c/Disassembler.h>
#include <llvm-c/Target.h>
#include <llvm-c/TargetMachine.h>
#endif
#ifdef HAVE_LIBBFD_SUPPORT
#include <bfd.h>
#include <dis-asm.h>
#include <tools/dis-asm-compat.h>
#endif
#include "json_writer.h"
#include "main.h"
static int oper_count;
#ifdef HAVE_LLVM_SUPPORT
#define DISASM_SPACER
typedef LLVMDisasmContextRef disasm_ctx_t;
static int printf_json(char *s)
{
s = strtok(s, " \t");
jsonw_string_field(json_wtr, "operation", s);
jsonw_name(json_wtr, "operands");
jsonw_start_array(json_wtr);
oper_count = 1;
while ((s = strtok(NULL, " \t,()")) != 0) {
jsonw_string(json_wtr, s);
oper_count++;
}
return 0;
}
/* This callback to set the ref_type is necessary to have the LLVM disassembler
* print PC-relative addresses instead of byte offsets for branch instruction
* targets.
*/
static const char *
symbol_lookup_callback(__maybe_unused void *disasm_info,
__maybe_unused uint64_t ref_value,
uint64_t *ref_type, __maybe_unused uint64_t ref_PC,
__maybe_unused const char **ref_name)
{
*ref_type = LLVMDisassembler_ReferenceType_InOut_None;
return NULL;
}
static int
init_context(disasm_ctx_t *ctx, const char *arch,
__maybe_unused const char *disassembler_options,
__maybe_unused unsigned char *image, __maybe_unused ssize_t len)
{
char *triple;
if (arch) {
p_err("Architecture %s not supported", arch);
return -1;
}
triple = LLVMGetDefaultTargetTriple();
if (!triple) {
p_err("Failed to retrieve triple");
return -1;
}
*ctx = LLVMCreateDisasm(triple, NULL, 0, NULL, symbol_lookup_callback);
LLVMDisposeMessage(triple);
if (!*ctx) {
p_err("Failed to create disassembler");
return -1;
}
return 0;
}
static void destroy_context(disasm_ctx_t *ctx)
{
LLVMDisposeMessage(*ctx);
}
static int
disassemble_insn(disasm_ctx_t *ctx, unsigned char *image, ssize_t len, int pc)
{
char buf[256];
int count;
count = LLVMDisasmInstruction(*ctx, image + pc, len - pc, pc,
buf, sizeof(buf));
if (json_output)
printf_json(buf);
else
printf("%s", buf);
return count;
}
int disasm_init(void)
{
LLVMInitializeNativeTarget();
LLVMInitializeNativeDisassembler();
return 0;
}
#endif /* HAVE_LLVM_SUPPORT */
#ifdef HAVE_LIBBFD_SUPPORT
#define DISASM_SPACER "\t"
typedef struct {
struct disassemble_info *info;
disassembler_ftype disassemble;
bfd *bfdf;
} disasm_ctx_t;
static int get_exec_path(char *tpath, size_t size)
{
const char *path = "/proc/self/exe";
ssize_t len;
len = readlink(path, tpath, size - 1);
if (len <= 0)
return -1;
tpath[len] = 0;
return 0;
}
static int printf_json(void *out, const char *fmt, va_list ap)
{
char *s;
int err;
err = vasprintf(&s, fmt, ap);
if (err < 0)
return -1;
if (!oper_count) {
int i;
/* Strip trailing spaces */
i = strlen(s) - 1;
while (s[i] == ' ')
s[i--] = '\0';
jsonw_string_field(json_wtr, "operation", s);
jsonw_name(json_wtr, "operands");
jsonw_start_array(json_wtr);
oper_count++;
} else if (!strcmp(fmt, ",")) {
/* Skip */
} else {
jsonw_string(json_wtr, s);
oper_count++;
}
free(s);
return 0;
}
static int fprintf_json(void *out, const char *fmt, ...)
{
va_list ap;
int r;
va_start(ap, fmt);
r = printf_json(out, fmt, ap);
va_end(ap);
return r;
}
static int fprintf_json_styled(void *out,
enum disassembler_style style __maybe_unused,
const char *fmt, ...)
{
va_list ap;
int r;
va_start(ap, fmt);
r = printf_json(out, fmt, ap);
va_end(ap);
return r;
}
static int init_context(disasm_ctx_t *ctx, const char *arch,
const char *disassembler_options,
unsigned char *image, ssize_t len)
{
struct disassemble_info *info;
char tpath[PATH_MAX];
bfd *bfdf;
memset(tpath, 0, sizeof(tpath));
if (get_exec_path(tpath, sizeof(tpath))) {
p_err("failed to create disasembler (get_exec_path)");
return -1;
}
ctx->bfdf = bfd_openr(tpath, NULL);
if (!ctx->bfdf) {
p_err("failed to create disassembler (bfd_openr)");
return -1;
}
if (!bfd_check_format(ctx->bfdf, bfd_object)) {
p_err("failed to create disassembler (bfd_check_format)");
goto err_close;
}
bfdf = ctx->bfdf;
ctx->info = malloc(sizeof(struct disassemble_info));
if (!ctx->info) {
p_err("mem alloc failed");
goto err_close;
}
info = ctx->info;
if (json_output)
init_disassemble_info_compat(info, stdout,
(fprintf_ftype) fprintf_json,
fprintf_json_styled);
else
init_disassemble_info_compat(info, stdout,
(fprintf_ftype) fprintf,
fprintf_styled);
/* Update architecture info for offload. */
if (arch) {
const bfd_arch_info_type *inf = bfd_scan_arch(arch);
if (inf) {
bfdf->arch_info = inf;
} else {
p_err("No libbfd support for %s", arch);
goto err_free;
}
}
info->arch = bfd_get_arch(bfdf);
info->mach = bfd_get_mach(bfdf);
if (disassembler_options)
info->disassembler_options = disassembler_options;
info->buffer = image;
info->buffer_length = len;
disassemble_init_for_target(info);
#ifdef DISASM_FOUR_ARGS_SIGNATURE
ctx->disassemble = disassembler(info->arch,
bfd_big_endian(bfdf),
info->mach,
bfdf);
#else
ctx->disassemble = disassembler(bfdf);
#endif
if (!ctx->disassemble) {
p_err("failed to create disassembler");
goto err_free;
}
return 0;
err_free:
free(info);
err_close:
bfd_close(ctx->bfdf);
return -1;
}
static void destroy_context(disasm_ctx_t *ctx)
{
free(ctx->info);
bfd_close(ctx->bfdf);
}
static int
disassemble_insn(disasm_ctx_t *ctx, __maybe_unused unsigned char *image,
__maybe_unused ssize_t len, int pc)
{
return ctx->disassemble(pc, ctx->info);
}
int disasm_init(void)
{
bfd_init();
return 0;
}
#endif /* HAVE_LIBBPFD_SUPPORT */
int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
const char *arch, const char *disassembler_options,
const struct btf *btf,
const struct bpf_prog_linfo *prog_linfo,
__u64 func_ksym, unsigned int func_idx,
bool linum)
{
const struct bpf_line_info *linfo = NULL;
unsigned int nr_skip = 0;
int count, i, pc = 0;
disasm_ctx_t ctx;
if (!len)
return -1;
if (init_context(&ctx, arch, disassembler_options, image, len))
return -1;
if (json_output)
jsonw_start_array(json_wtr);
do {
if (prog_linfo) {
linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo,
func_ksym + pc,
func_idx,
nr_skip);
if (linfo)
nr_skip++;
}
if (json_output) {
jsonw_start_object(json_wtr);
oper_count = 0;
if (linfo)
btf_dump_linfo_json(btf, linfo, linum);
jsonw_name(json_wtr, "pc");
jsonw_printf(json_wtr, "\"0x%x\"", pc);
} else {
if (linfo)
btf_dump_linfo_plain(btf, linfo, "; ",
linum);
printf("%4x:" DISASM_SPACER, pc);
}
count = disassemble_insn(&ctx, image, len, pc);
if (json_output) {
/* Operand array, was started in fprintf_json. Before
* that, make sure we have a _null_ value if no operand
* other than operation code was present.
*/
if (oper_count == 1)
jsonw_null(json_wtr);
jsonw_end_array(json_wtr);
}
if (opcodes) {
if (json_output) {
jsonw_name(json_wtr, "opcodes");
jsonw_start_array(json_wtr);
for (i = 0; i < count; ++i)
jsonw_printf(json_wtr, "\"0x%02hhx\"",
(uint8_t)image[pc + i]);
jsonw_end_array(json_wtr);
} else {
printf("\n\t");
for (i = 0; i < count; ++i)
printf("%02x ",
(uint8_t)image[pc + i]);
}
}
if (json_output)
jsonw_end_object(json_wtr);
else
printf("\n");
pc += count;
} while (count > 0 && pc < len);
if (json_output)
jsonw_end_array(json_wtr);
destroy_context(&ctx);
return 0;
}