Skip to content

Commit

Permalink
i#2987: Add new "drdisas" CLI disassembly tool (#4020)
Browse files Browse the repository at this point in the history
Adds a new standalone executable "drdisas" which disassembles hex
strings passed as arguments or via stdin.  This is useful to evaluate
DR's decoder versus other decoders, as well as just being a nice
utility.  Example usage:

$ clients/bin64/drdisas -mode x86  62 e2 f5 47 40 41 37 90
 62 e2 f5 47 40 41 37 vpmullq {%k7} %zmm17 0x00000dc0(%ecx)[64byte] -> %zmm16
 90                   nop

Adds a test of drdisas for all our platforms.
Tested on x86, x86_64, arm (and thumb), and aarch64.

Fixes #2987
  • Loading branch information
derekbruening authored Jan 16, 2020
1 parent 40b61af commit 224dc41
Show file tree
Hide file tree
Showing 7 changed files with 292 additions and 4 deletions.
2 changes: 2 additions & 0 deletions api/docs/release.dox
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,8 @@ Further non-compatibility-affecting changes include:
- Added drmgr_register_low_on_memory_event(), drmgr_unregister_low_on_memory_event()
and their variants so that drmgr can support low-on-memory events.
- Added drmgr_is_first_nonlabel_instr() and instrlist_first_nonlabel().
- Added a new standalone tool "drdisas" which disassembles raw bytes using
DR's decoder.

**************************************************
<hr>
Expand Down
48 changes: 48 additions & 0 deletions clients/drdisas/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# **********************************************************
# Copyright (c) 2020 Google, Inc. All rights reserved.
# **********************************************************

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of Google, Inc. nor the names of its contributors may be
# used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
# DAMAGE.

cmake_minimum_required(VERSION 2.6)

include(../../make/policies.cmake NO_POLICY_SCOPE)

add_executable(drdisas drdisas.cpp)
configure_DynamoRIO_decoder(drdisas)
use_DynamoRIO_extension(drdisas droption)

if (WIN32)
append_property_string(TARGET drdisas COMPILE_FLAGS "/EHsc")
endif ()

if (DEFINED DR_INSTALL_DEPLOY_BIN_DEST)
set(dst ${DR_INSTALL_DEPLOY_BIN_DEST})
else ()
set(dst "${INSTALL_BIN}")
endif ()
DR_target_install(TARGETS drdisas DESTINATION ${dst})
188 changes: 188 additions & 0 deletions clients/drdisas/drdisas.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
/* **********************************************************
* Copyright (c) 2020 Google, Inc. All rights reserved.
* **********************************************************/

/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of Google, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/

#define DR_FAST_IR 1
#include "dr_api.h"
#include "droption.h"
#include <iostream>
#include <sstream>

namespace {

// XXX i#1684: We want cross-arch decoding support so a single build can decode
// AArchXX and x86. For now, a separate build is needed.
#ifdef X86_64
droption_t<std::string> op_mode(DROPTION_SCOPE_FRONTEND, "mode", "x64",
"Decodes using the specified mode: 'x64' or 'x86'.",
"Decodes using the specified mode: 'x64' or 'x86'.");
#elif defined(ARM)
droption_t<std::string> op_mode(DROPTION_SCOPE_FRONTEND, "mode", "arm",
"Decodes using the specified mode: 'arm' or 'thumb'.",
"Decodes using the specified mode: 'arm' or 'thumb'.");
#endif

// TODO i#4021: Add a -syntax option for specifying the output style, and
// add a test for each style.

droption_t<bool> op_show_bytes(DROPTION_SCOPE_FRONTEND, "show_bytes", true,
"Display the instruction encoding bytes.",
"Display the instruction encoding bytes.");

#if defined(AARCH64) || defined(ARM)
# define MAX_INSTR_LENGTH 4
#else
# define MAX_INSTR_LENGTH 17
#endif

static bool
parse_bytes(std::string token, std::vector<byte> &bytes)
{
// Assume everything is hex even if it has no leading 0x or \x.
// Assume that values larger than one byte are machine words in
// little-endian form, which we want to split into bytes in the endian order.
// (This is how aarchxx encodings are always represented; for x86, this is
// the format for raw data obtained from od or gdb or a binary file.)
uint64 entry;
std::stringstream stream;
stream << std::hex << token;
if (!(stream >> entry))
return false;
do {
bytes.push_back(entry & 0xff);
entry >>= 8;
} while (entry > 0);
return true;
}
};

int
main(int argc, const char *argv[])
{
int last_index;
std::string parse_err;
if (!droption_parser_t::parse_argv(DROPTION_SCOPE_FRONTEND, argc, argv, &parse_err,
&last_index)) {
std::cerr << "Usage error: " << parse_err << "\nUsage:\n " << argv[0]
<< " [options] <hexadecimal bytes to decode as args or stdin>\n"
<< "Bytes do not need leading 0x. Single-token multi-byte values are "
<< "assumed to be little-endian words.\n"
<< "Options:\n"
<< droption_parser_t::usage_short(DROPTION_SCOPE_ALL);
return 1;
}

void *dcontext = GLOBAL_DCONTEXT;

#if defined(X86_64) || defined(ARM)
// Set the ISA mode if supplied.
if (!op_mode.get_value().empty()) {
# ifdef X86_64
dr_isa_mode_t mode = DR_ISA_AMD64;
if (op_mode.get_value() == "x86")
mode = DR_ISA_IA32;
else if (op_mode.get_value() == "x64")
mode = DR_ISA_AMD64;
# elif defined(ARM)
dr_isa_mode_t mode = DR_ISA_ARM_A32;
if (op_mode.get_value() == "arm")
mode = DR_ISA_ARM_A32;
else if (op_mode.get_value() == "thumb")
mode = DR_ISA_ARM_THUMB;
# endif
else {
std::cerr << "Unknown mode '" << op_mode.get_value() << "'\n";
return 1;
}
if (!dr_set_isa_mode(dcontext, mode, NULL)) {
std::cerr << "Failed to set ISA mode.\n";
return 1;
}
}
#endif

// Turn the arguments into a series of hex values.
std::vector<byte> bytes;
for (int i = last_index; i < argc; ++i) {
if (!parse_bytes(argv[i], bytes)) {
std::cerr << "failed to parse '" << argv[i] << "' as a hexadecimal number\n";
return 1;
}
}

// Process stdin if there are no arguments.
if (last_index == argc) {
std::string line;
while (std::getline(std::cin, line)) {
std::stringstream tokenize(line);
std::string token;
while (tokenize >> token) {
if (!parse_bytes(token, bytes)) {
std::cerr << "failed to parse '" << token
<< "' as a hexadecimal number\n";
return 1;
}
}
}
}

if (bytes.empty()) {
std::cerr << "no bytes specified to disassemble\n";
return 1;
}

size_t data_size = bytes.size();
// Now allocate a "redzone" to avoid DR's decoder going off the end of the
// buffer.
for (int i = 0; i < MAX_INSTR_LENGTH; ++i)
bytes.push_back(0);
byte *pc = &bytes[0];
byte *stop_pc = &bytes[data_size - 1];
while (pc <= stop_pc) {
// Check ahead of time to see whether this instruction enters the redzone
// (or, we could disassemble into a buffer and check before printing it).
if (pc + decode_sizeof(dcontext, pc, NULL _IF_X86_64(NULL)) > stop_pc) {
std::cerr << "disassembly failed: invalid instruction: not enough bytes:";
for (; pc <= stop_pc; ++pc)
std::cerr << " 0x" << std::hex << static_cast<int>(*pc);
std::cerr << "\n";
break;
}
pc =
disassemble_with_info(dcontext, pc, STDOUT, false, op_show_bytes.get_value());
if (pc == NULL) {
std::cerr << "disassembly failed: invalid instruction\n";
break;
}
}

return 0;
}
22 changes: 22 additions & 0 deletions clients/drdisas/test_simple.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#if defined(AARCH64)
f94017a0 ldr +0x28(%x29)[8byte] -> %x0
a9be7bfd stp %x29 %x30 %sp $0xffffffffffffffe0 -> -0x20(%sp)[16byte] %sp
disassembly failed: invalid instruction: not enough bytes: 0x88
#elif defined(ARM) && !defined(thumb)
f2436813 vtst.8 %d3 %d3 -> %d22
b2db42c8 sbcs.lt %r11 $0x8000000c -> %r4
disassembly failed: invalid instruction: not enough bytes: 0x88
#elif defined(ARM) && defined(thumb)
6813 ldr (%r2)[4byte] -> %r3
f243 42c8 movw $0x000034c8 -> %r2
b2db uxtb %r3[1byte] -> %r3
disassembly failed: invalid instruction: not enough bytes: 0x88
#elif defined(X86)
66 90 nop
# ifdef X64
c4 e2 65 90 14 80 vpgatherdd (%rax,%ymm0,4)[4byte] %ymm3 -> %ymm2 %ymm3
# else
c4 e2 65 90 14 80 vpgatherdd (%eax,%ymm0,4)[4byte] %ymm3 -> %ymm2 %ymm3
# endif
disassembly failed: invalid instruction: not enough bytes: 0x88
#endif
4 changes: 2 additions & 2 deletions core/arch/instr_inline.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2012-2016 Google, Inc. All rights reserved.
* Copyright (c) 2012-2020 Google, Inc. All rights reserved.
* **********************************************************/

/*
Expand Down Expand Up @@ -231,7 +231,7 @@ opnd_t
opnd_create_reg_ex(reg_id_t r, opnd_size_t subsize, dr_opnd_flags_t flags)
{
opnd_t opnd = opnd_create_reg_partial(r, subsize);
opnd.aux.flags = flags;
opnd.aux.flags = (ushort)flags;
return opnd;
}

Expand Down
8 changes: 7 additions & 1 deletion ext/droption/droption.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ class droption_parser_t {
* droption_t class fields.
* On success, returns true, with the index of the start of the remaining
* unparsed options, if any, returned in \p last_index (typically this
* will be options separated by "--").
* will be options separated by "--" or when encountering a token that
* does not start with a leading "-").
* On failure, returns false, and if \p error_msg != NULL, stores a string
* describing the error there. On failure, \p last_index is set to the
* index of the problematic option or option value.
Expand All @@ -184,6 +185,11 @@ class droption_parser_t {
++i; // for last_index
break;
}
// Also stop on a non-leading-dash token to support arguments without
// a separating "--".
if (argv[i][0] != '-') {
break;
}
bool matched = false;
bool swept = false;
for (std::vector<droption_parser_t *>::iterator opi = allops().begin();
Expand Down
24 changes: 23 additions & 1 deletion suite/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# **********************************************************
# Copyright (c) 2010-2019 Google, Inc. All rights reserved.
# Copyright (c) 2010-2020 Google, Inc. All rights reserved.
# Copyright (c) 2009-2010 VMware, Inc. All rights reserved.
# Copyright (c) 2016 ARM Limited. All rights reserved.
# **********************************************************
Expand Down Expand Up @@ -3258,6 +3258,28 @@ endif ()
# XXX i#1732: add more tests. However, we don't want the suite to fail
# b/c our tests have too-new instrs for older machines.
endif (X86)

###########################################################################
# drdisas tests
# XXX: We should try to export or share the test list, filtering, matching,
# and other code here to support tests being declared locally rather than
# in one central place.
if (AARCH64)
set(drdisas_args "f94017a0" "a9be7bfd" "88")
elseif (ARM)
set(drdisas_args "6813" "f243" "42c8" "b2db" "88")
else ()
set(drdisas_args "66" "90" "c4" "e2" "65" "90" "14" "80" "88")
endif ()
torunonly_api(tool.drdisas drdisas
"../../clients/drdisas/test_simple.template" "" "${drdisas_args}" ON)
if (ARM)
torunonly_api(tool.drdisas_thumb drdisas
# Pass -thumb as a "DR op" to define it in the .template file.
"../../clients/drdisas/test_simple.template"
"-thumb" "-mode;thumb;${drdisas_args}" ON)
endif ()

endif (BUILD_CLIENTS)

endif (CLIENT_INTERFACE)
Expand Down

0 comments on commit 224dc41

Please sign in to comment.