diff --git a/api/docs/intro.dox b/api/docs/intro.dox
index 4ddf11dea3f..67710fb6a1e 100644
--- a/api/docs/intro.dox
+++ b/api/docs/intro.dox
@@ -1,5 +1,5 @@
 /* ******************************************************************************
- * Copyright (c) 2010-2019 Google, Inc.  All rights reserved.
+ * Copyright (c) 2010-2020 Google, Inc.  All rights reserved.
  * Copyright (c) 2011 Massachusetts Institute of Technology  All rights reserved.
  * Copyright (c) 2007-2010 VMware, Inc.  All rights reserved.
  * ******************************************************************************/
@@ -2078,19 +2078,6 @@ When using DynamoRIO's CMake support, use the configure_DynamoRIO_decoder()
 function to set up include directories and to link with \p drdecode.  The
 next section describes how to link with the DynamoRIO shared library.
 
-\subsection sec_relativize Re-Relativization of Jumps and Calls
-
-When encoding a relative jump or call to a different location than it was
-decoded from while in standalone mode, a re-encode must be forced in order
-to work around an issue where DynamoRIO does not re-relativize the target:
-
-\code
-instr_set_raw_bits_valid(instr, false)
-\endcode
-
-When not in standalone mode, all branches are mangled and thus this is
-never an issue.  This should be fixed in a future release.
-
 
 \section sec_standalone_shared DynamoRIO Shared Library Issues
 
diff --git a/api/docs/release.dox b/api/docs/release.dox
index 6e8a1e58507..ec5c9140d80 100644
--- a/api/docs/release.dox
+++ b/api/docs/release.dox
@@ -175,6 +175,12 @@ compatibility changes:
  - Changed the #PFX format string specifier to use %p rather than %x internally.
  - DR no longer forwards _snprintf, _snwprintf, _vsnprintf, sprintf, or sscanf to
    ntdll.  Clients should use the dr_-prefixed versions of these functions.
+ - PC-relative control transfer instructions are now auto-re-relativized by the
+   general decoder and encoder.  This affects clients and standalone tools that use
+   decode_from_copy() or instr_encode_to_copy() or instrlist_encode_to_copy().
+   Previously, re-relativization for instruction references only happened when an
+   instruction was re-encoded.  This auto-PC-relativization can be avoided by calling
+   instr_set_rip_rel_valid() and setting the validity of the PC-relative data to false.
 
 Further non-compatibility-affecting changes include:
 
@@ -223,6 +229,8 @@ Further non-compatibility-affecting changes include:
  - Added drmgr_register_low_on_memory_event(), drmgr_unregister_low_on_memory_event()
    and their variants so that drmgr can support low-on-memory events.
  - Added drmgr_is_first_nonlabel_instr() and instrlist_first_nonlabel().
+ - Added decode_sizeof_ex() and instr_get_rel_data_or_instr_target() handling
+   relative instruction references.
 
 **************************************************
 <hr>
diff --git a/core/arch/aarch64/codec.c b/core/arch/aarch64/codec.c
index 6abf17f4e63..4318ec55372 100644
--- a/core/arch/aarch64/codec.c
+++ b/core/arch/aarch64/codec.c
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2017 Google, Inc.  All rights reserved.
+ * Copyright (c) 2017-2020 Google, Inc.  All rights reserved.
  * Copyright (c) 2016 ARM Limited. All rights reserved.
  * **********************************************************/
 
@@ -3161,6 +3161,7 @@ decode_common(dcontext_t *dcontext, byte *pc, byte *orig_pc, instr_t *instr)
     if (orig_pc != pc) {
         /* We do not want to copy when encoding and condone an invalid
          * relative target.
+         * TODO i#4016: Add re-relativization support without having to re-encode.
          */
         instr_set_raw_bits_valid(instr, false);
         instr_set_translation(instr, orig_pc);
diff --git a/core/arch/aarch64/encode.c b/core/arch/aarch64/encode.c
index f706ba5cc54..adeed88be98 100644
--- a/core/arch/aarch64/encode.c
+++ b/core/arch/aarch64/encode.c
@@ -1,4 +1,5 @@
 /* **********************************************************
+ * Copyright (c) 2020 Google, Inc. All rights reserved.
  * Copyright (c) 2016 ARM Limited. All rights reserved.
  * **********************************************************/
 
@@ -193,7 +194,7 @@ byte *
 copy_and_re_relativize_raw_instr(dcontext_t *dcontext, instr_t *instr, byte *dst_pc,
                                  byte *final_pc)
 {
-    /* FIXME i#1569: re-relativizing is NYI */
+    /* TODO i#4016: re-relativizing is NYI */
     /* OP_ldstex is always relocatable. */
     ASSERT(instr_raw_bits_valid(instr) || instr_get_opcode(instr) == OP_ldstex);
     memcpy(dst_pc, instr->bytes, instr->length);
diff --git a/core/arch/arm/decode.c b/core/arch/arm/decode.c
index bdb0f4c566d..1dcebec88ca 100644
--- a/core/arch/arm/decode.c
+++ b/core/arch/arm/decode.c
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2014-2019 Google, Inc.  All rights reserved.
+ * Copyright (c) 2014-2020 Google, Inc.  All rights reserved.
  * **********************************************************/
 
 /*
@@ -2500,6 +2500,7 @@ decode_common(dcontext_t *dcontext, byte *pc, byte *orig_pc, instr_t *instr)
     if (orig_pc != pc) {
         /* We do not want to copy when encoding and condone an invalid
          * relative target
+         * TODO i#4016: Add re-relativization support without having to re-encode.
          */
         instr_set_raw_bits_valid(instr, false);
         instr_set_translation(instr, orig_pc);
diff --git a/core/arch/arm/encode.c b/core/arch/arm/encode.c
index 039e67e2516..a4a31610c68 100644
--- a/core/arch/arm/encode.c
+++ b/core/arch/arm/encode.c
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2014-2015 Google, Inc.  All rights reserved.
+ * Copyright (c) 2014-2020 Google, Inc.  All rights reserved.
  * **********************************************************/
 
 /*
@@ -3099,7 +3099,7 @@ byte *
 copy_and_re_relativize_raw_instr(dcontext_t *dcontext, instr_t *instr, byte *dst_pc,
                                  byte *final_pc)
 {
-    /* FIXME i#1551: re-relativizing is NYI */
+    /* TODO i#4016: re-relativizing is NYI */
     ASSERT(instr_raw_bits_valid(instr));
     memcpy(dst_pc, instr->bytes, instr->length);
     return dst_pc + instr->length;
diff --git a/core/arch/decode_fast.h b/core/arch/decode_fast.h
index de8b64eeb90..319e4f01064 100644
--- a/core/arch/decode_fast.h
+++ b/core/arch/decode_fast.h
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2015-2019 Google, Inc.  All rights reserved.
+ * Copyright (c) 2015-2020 Google, Inc.  All rights reserved.
  * Copyright (c) 2001-2009 VMware, Inc.  All rights reserved.
  * **********************************************************/
 
@@ -46,15 +46,37 @@ DR_API
  * Decodes only enough of the instruction at address \p pc to determine its size.
  * Returns that size.
  * If \p num_prefixes is non-NULL, returns the number of prefix bytes.
- * If \p rip_rel_pos is non-NULL, returns the offset into the instruction
- * of a rip-relative addressing displacement (for data only: ignores
- * control-transfer relative addressing), or 0 if none.
+ *
+ * On x86, if \p rip_rel_pos is non-NULL, returns the offset into the instruction of a
+ * rip-relative addressing displacement (for data only: ignores control-transfer
+ * relative addressing; use decode_sizeof_ex() for that), or 0 if none.
+ * The \p rip_rel_pos parameter is only implemented for x86, where the displacement
+ * is always 4 bytes in size.
+ *
  * May return 0 size for certain invalid instructions.
  */
 int
 decode_sizeof(dcontext_t *dcontext, byte *pc,
               int *num_prefixes _IF_X86_64(uint *rip_rel_pos));
 
+#ifdef X86
+DR_API
+/**
+ * Decodes only enough of the instruction at address \p pc to determine its size.
+ * Returns that size.
+ * If \p num_prefixes is non-NULL, returns the number of prefix bytes.
+ *
+ * On x86, if \p rip_rel_pos is non-NULL, returns the offset into the instruction of a
+ * rip-relative addressing displacement for data or control-transfer relative
+ * addressing, or 0 if none.  This is only implemented for x86, where the displacement
+ * is always 4 bytes for data but can be 1 byte for control.
+ *
+ * May return 0 size for certain invalid instructions.
+ */
+int
+decode_sizeof_ex(dcontext_t *dcontext, byte *pc, int *num_prefixes, uint *rip_rel_pos);
+#endif
+
 DR_API
 /**
  * Decodes only enough of the instruction at address \p pc to determine its size.
@@ -99,7 +121,7 @@ DR_UNS_EXCEPT_TESTS_API
  * Does NOT fill in any other prefix flags unless this is a cti instr
  * and the flags affect the instr.
  *
- * For x64, calls instr_set_rip_rel_pos().  Thus, if the raw bytes are
+ * For x86, calls instr_set_rip_rel_pos().  Thus, if the raw bytes are
  * not modified prior to encode time, any rip-relative offset will be
  * automatically re-relativized (though encoding will fail if the new
  * encode location cannot reach the original target).
diff --git a/core/arch/instr.h b/core/arch/instr.h
index 38b4f30c354..1d6e9aafb9c 100644
--- a/core/arch/instr.h
+++ b/core/arch/instr.h
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2011-2019 Google, Inc.  All rights reserved.
+ * Copyright (c) 2011-2020 Google, Inc.  All rights reserved.
  * Copyright (c) 2000-2010 VMware, Inc.  All rights reserved.
  * **********************************************************/
 
@@ -460,7 +460,7 @@ struct _instr_t {
 
     uint opcode;
 
-#    ifdef X86_64
+#    ifdef X86
     /* PR 251479: offset into instr's raw bytes of rip-relative 4-byte displacement */
     byte rip_rel_pos;
 #    endif
@@ -1870,12 +1870,24 @@ bool
 instr_is_xsave(instr_t *instr);
 #endif
 
+DR_API
+/**
+ * If any of \p instr's operands is a rip-relative data or instruction
+ * memory reference, returns the address that reference targets.  Else
+ * returns false.  For instruction references, only PC operands are
+ * considered: not instruction pointer operands.
+ *
+ * \note Currently this is only implemented for x86.
+ */
+bool
+instr_get_rel_data_or_instr_target(instr_t *instr, /*OUT*/ app_pc *target);
+
 /* DR_API EXPORT BEGIN */
 #if defined(X64) || defined(ARM)
 /* DR_API EXPORT END */
 DR_API
 /**
- * Returns true iff any of \p instr's operands is a rip-relative memory reference.
+ * Returns true iff any of \p instr's operands is a rip-relative data memory reference.
  *
  * \note For 64-bit DR builds only.
  */
@@ -1884,7 +1896,7 @@ instr_has_rel_addr_reference(instr_t *instr);
 
 DR_API
 /**
- * If any of \p instr's operands is a rip-relative memory reference, returns the
+ * If any of \p instr's operands is a rip-relative data memory reference, returns the
  * address that reference targets.  Else returns false.
  *
  * \note For 64-bit DR builds only.
@@ -1894,7 +1906,7 @@ instr_get_rel_addr_target(instr_t *instr, /*OUT*/ app_pc *target);
 
 DR_API
 /**
- * If any of \p instr's destination operands is a rip-relative memory
+ * If any of \p instr's destination operands is a rip-relative data memory
  * reference, returns the operand position.  If there is no such
  * destination operand, returns -1.
  *
@@ -1917,7 +1929,7 @@ instr_get_rel_addr_src_idx(instr_t *instr);
 #endif /* X64 || ARM */
 /* DR_API EXPORT END */
 
-#ifdef X86_64
+#ifdef X86
 /* We're not exposing the low-level rip_rel_pos routines directly to clients,
  * who should only use this level 1-3 feature via decode_cti + encode.
  */
@@ -1949,7 +1961,7 @@ instr_get_rip_rel_pos(instr_t *instr);
  */
 void
 instr_set_rip_rel_pos(instr_t *instr, uint pos);
-#endif /* X64 */
+#endif /* X86 */
 
 /* not exported: for PR 267260 */
 bool
@@ -3007,7 +3019,7 @@ instr_create_save_to_reg(dcontext_t *dcontext, reg_id_t reg1, reg_id_t reg2);
 instr_t *
 instr_create_restore_from_reg(dcontext_t *dcontext, reg_id_t reg1, reg_id_t reg2);
 
-#ifdef X64
+#ifdef X86_64
 byte *
 instr_raw_is_rip_rel_lea(byte *pc, byte *read_end);
 #endif
diff --git a/core/arch/instr_shared.c b/core/arch/instr_shared.c
index 018b153c6d2..3774b8ebd1c 100644
--- a/core/arch/instr_shared.c
+++ b/core/arch/instr_shared.c
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2011-2019 Google, Inc.  All rights reserved.
+ * Copyright (c) 2011-2020 Google, Inc.  All rights reserved.
  * Copyright (c) 2000-2010 VMware, Inc.  All rights reserved.
  * **********************************************************/
 
@@ -257,7 +257,7 @@ instr_reuse(dcontext_t *dcontext, instr_t *instr)
     bool alloc = false;
     bool mangle = instr_is_app(instr);
     dr_isa_mode_t isa_mode = instr_get_isa_mode(instr);
-#ifdef X86_64
+#ifdef X86
     uint rip_rel_pos = instr_rip_rel_valid(instr) ? instr->rip_rel_pos : 0;
 #endif
     instr_t *next = instr->next;
@@ -288,7 +288,7 @@ instr_reuse(dcontext_t *dcontext, instr_t *instr)
     }
     /* preserve across the up-decode */
     instr_set_isa_mode(instr, isa_mode);
-#ifdef X86_64
+#ifdef X86
     if (rip_rel_pos > 0)
         instr_set_rip_rel_pos(instr, rip_rel_pos);
 #endif
@@ -365,7 +365,7 @@ private_instr_encode(dcontext_t *dcontext, instr_t *instr, bool always_cache)
         ((valid_to_cache && instr_is_app(instr)) ||
          always_cache /*caller will use then invalidate*/)) {
         bool valid = instr_operands_valid(instr);
-#ifdef X86_64
+#ifdef X86
         /* we can't call instr_rip_rel_valid() b/c the raw bytes are not yet
          * set up: we rely on instr_encode() setting instr->rip_rel_pos and
          * the valid flag, even though raw bytes weren't there at the time.
@@ -384,7 +384,7 @@ private_instr_encode(dcontext_t *dcontext, instr_t *instr, bool always_cache)
          */
         tmp = instr->bytes;
         instr->bytes = buf;
-#ifdef X86_64
+#ifdef X86
         instr_set_rip_rel_valid(instr, rip_rel_valid);
 #endif
         copy_and_re_relativize_raw_instr(dcontext, instr, tmp, tmp);
@@ -1031,7 +1031,7 @@ instr_set_raw_bits(instr_t *instr, byte *addr, uint length)
     instr->flags |= INSTR_RAW_BITS_VALID;
     instr->bytes = addr;
     instr->length = length;
-#ifdef X86_64
+#ifdef X86
     instr_set_rip_rel_valid(instr, false); /* relies on original raw bits */
 #endif
 }
@@ -1045,7 +1045,7 @@ instr_shift_raw_bits(instr_t *instr, ssize_t offs)
 {
     if ((instr->flags & INSTR_RAW_BITS_VALID) != 0)
         instr->bytes += offs;
-#ifdef X86_64
+#ifdef X86
     instr_set_rip_rel_valid(instr, false); /* relies on original raw bits */
 #endif
 }
@@ -1065,7 +1065,7 @@ instr_set_raw_bits_valid(instr_t *instr, bool valid)
          * addresses for exception/signal handlers
          * Also do not de-allocate allocated bits
          */
-#ifdef X86_64
+#ifdef X86
         instr_set_rip_rel_valid(instr, false);
 #endif
     }
@@ -1113,7 +1113,7 @@ instr_allocate_raw_bits(dcontext_t *dcontext, instr_t *instr, uint num_bytes)
     instr->flags |= INSTR_RAW_BITS_ALLOCATED;
     instr->flags &= ~INSTR_OPERANDS_VALID;
     instr->flags &= ~INSTR_EFLAGS_VALID;
-#ifdef X86_64
+#ifdef X86
     instr_set_rip_rel_valid(instr, false); /* relies on original raw bits */
 #endif
 }
@@ -1208,7 +1208,7 @@ instr_set_raw_byte(instr_t *instr, uint pos, byte val)
     CLIENT_ASSERT(pos >= 0 && pos < instr->length && instr->bytes != NULL,
                   "instr_set_raw_byte: ordinal invalid, or no raw bits");
     instr->bytes[pos] = (byte)val;
-#ifdef X86_64
+#ifdef X86
     instr_set_rip_rel_valid(instr, false); /* relies on original raw bits */
 #endif
 }
@@ -1225,7 +1225,7 @@ instr_set_raw_bytes(instr_t *instr, byte *start, uint num_bytes)
     CLIENT_ASSERT(num_bytes <= instr->length && instr->bytes != NULL,
                   "instr_set_raw_bytes: ordinal invalid, or no raw bits");
     memcpy(instr->bytes, start, num_bytes);
-#ifdef X86_64
+#ifdef X86
     instr_set_rip_rel_valid(instr, false); /* relies on original raw bits */
 #endif
 }
@@ -1242,7 +1242,7 @@ instr_set_raw_word(instr_t *instr, uint pos, uint word)
     CLIENT_ASSERT(pos >= 0 && pos + 3 < instr->length && instr->bytes != NULL,
                   "instr_set_raw_word: ordinal invalid, or no raw bits");
     *((uint *)(instr->bytes + pos)) = word;
-#ifdef X86_64
+#ifdef X86
     instr_set_rip_rel_valid(instr, false); /* relies on original raw bits */
 #endif
 }
@@ -1525,7 +1525,7 @@ instr_decode_opcode(dcontext_t *dcontext, instr_t *instr)
     if (!instr_opcode_valid(instr)) {
         byte *next_pc;
         DEBUG_EXT_DECLARE(int old_len = instr->length;)
-#ifdef X86_64
+#ifdef X86
         bool rip_rel_valid = instr_rip_rel_valid(instr);
 #endif
         /* decode_opcode() will use the dcontext mode, but we want the instr mode */
@@ -1536,7 +1536,7 @@ instr_decode_opcode(dcontext_t *dcontext, instr_t *instr)
         instr_reuse(dcontext, instr);
         next_pc = decode_opcode(dcontext, instr->bytes, instr);
         dr_set_isa_mode(dcontext, old_mode, NULL);
-#ifdef X86_64
+#ifdef X86
         /* decode_opcode sets raw bits which invalidates rip_rel, but
          * it should still be valid on an up-decode of the opcode */
         if (rip_rel_valid)
@@ -1558,7 +1558,7 @@ instr_decode(dcontext_t *dcontext, instr_t *instr)
     if (!instr_operands_valid(instr)) {
         byte *next_pc;
         DEBUG_EXT_DECLARE(int old_len = instr->length;)
-#ifdef X86_64
+#ifdef X86
         bool rip_rel_valid = instr_rip_rel_valid(instr);
 #endif
         /* decode() will use the current dcontext mode, but we want the instr mode */
@@ -1572,7 +1572,7 @@ instr_decode(dcontext_t *dcontext, instr_t *instr)
             instr_set_translation(instr, instr_get_raw_bits(instr));
 #endif
         dr_set_isa_mode(dcontext, old_mode, NULL);
-#ifdef X86_64
+#ifdef X86
         /* decode sets raw bits which invalidates rip_rel, but
          * it should still be valid on an up-decode */
         if (rip_rel_valid)
@@ -2037,7 +2037,6 @@ instr_is_xsave(instr_t *instr)
 }
 #endif /* X86 */
 
-#if defined(X64) || defined(ARM)
 /* PR 251479: support general re-relativization.  If INSTR_RIP_REL_VALID is set and
  * the raw bits are valid, instr->rip_rel_pos is assumed to hold the offset into the
  * instr of a 32-bit rip-relative displacement, which is used to re-relativize during
@@ -2050,14 +2049,14 @@ instr_is_xsave(instr_t *instr)
  * raw bits: we can't rely just on the raw bits invalidation.
  * There can only be one rip-relative operand per instruction.
  */
-/* FIXME i#1551: for ARM we don't have a large displacement on every reference.
+/* TODO i#4016: for AArchXX we don't have a large displacement on every reference.
  * Some have no disp at all, others have just 12 bits or smaller.
  * We need to come up with a strategy for handling encode-time re-relativization.
  * Xref copy_and_re_relativize_raw_instr().
  * For now, we do use some of these routines, but none that use the rip_rel_pos.
  */
 
-#    ifdef X86_64
+#ifdef X86
 bool
 instr_rip_rel_valid(instr_t *instr)
 {
@@ -2087,28 +2086,54 @@ instr_set_rip_rel_pos(instr_t *instr, uint pos)
     instr->rip_rel_pos = (byte)pos;
     instr_set_rip_rel_valid(instr, true);
 }
-#    endif /* X86_64 */
+#endif /* X86 */
 
 bool
-instr_get_rel_addr_target(instr_t *instr, app_pc *target)
+instr_get_rel_target(instr_t *instr, /*OUT*/ app_pc *target, bool data_only)
 {
-    int i;
-    opnd_t curop;
     if (!instr_valid(instr))
         return false;
-#    ifdef X86_64
+
+    /* For PC operands we have to look at the high-level *before* rip_rel_pos, to
+     * support decode_from_copy().  As documented, we ignore instr_t targets.
+     */
+    if (!data_only && instr_operands_valid(instr) && instr_num_srcs(instr) > 0 &&
+        opnd_is_pc(instr_get_src(instr, 0))) {
+        *target = opnd_get_pc(instr_get_src(instr, 0));
+        return true;
+    }
+
+#ifdef X86
     /* PR 251479: we support rip-rel info in level 1 instrs */
     if (instr_rip_rel_valid(instr)) {
-        if (instr_get_rip_rel_pos(instr) > 0) {
+        int rip_rel_pos = instr_get_rip_rel_pos(instr);
+        if (rip_rel_pos > 0) {
+            if (data_only &&
+                /* Invariant: no instruction has 2 rip-rel immeds. */
+                ((instr_is_cti(instr) && !instr_is_mbr(instr)) ||
+                 instr_get_opcode(instr) == OP_xbegin))
+                return false;
             if (target != NULL) {
-                *target = instr->bytes + instr->length +
-                    *((int *)(instr->bytes + instr_get_rip_rel_pos(instr)));
+                /* We only support non-4-byte rip-rel disps for 1-byte instr-final
+                 * (jcc_short).
+                 */
+                if (rip_rel_pos + 1 == instr->length) {
+                    *target = instr->bytes + instr->length +
+                        *((char *)(instr->bytes + rip_rel_pos));
+                } else {
+                    ASSERT(rip_rel_pos + 4 <= instr->length);
+                    *target = instr->bytes + instr->length +
+                        *((int *)(instr->bytes + rip_rel_pos));
+                }
             }
             return true;
         } else
             return false;
     }
-#    endif
+#endif
+#if defined(X64) || defined(ARM)
+    int i;
+    opnd_t curop;
     /* else go to level 3 operands */
     for (i = 0; i < instr_num_dsts(instr); i++) {
         curop = instr_get_dst(instr, i);
@@ -2156,9 +2181,23 @@ instr_get_rel_addr_target(instr_t *instr, app_pc *target)
                 }
             });
     }
+#endif
     return false;
 }
 
+bool
+instr_get_rel_data_or_instr_target(instr_t *instr, /*OUT*/ app_pc *target)
+{
+    return instr_get_rel_target(instr, target, false /*all*/);
+}
+
+#if defined(X64) || defined(ARM)
+bool
+instr_get_rel_addr_target(instr_t *instr, /*OUT*/ app_pc *target)
+{
+    return instr_get_rel_target(instr, target, true /*data-only*/);
+}
+
 bool
 instr_has_rel_addr_reference(instr_t *instr)
 {
diff --git a/core/arch/mangle_shared.c b/core/arch/mangle_shared.c
index 5c7d3ed14c8..ba136938201 100644
--- a/core/arch/mangle_shared.c
+++ b/core/arch/mangle_shared.c
@@ -1,5 +1,5 @@
 /* ******************************************************************************
- * Copyright (c) 2010-2019 Google, Inc.  All rights reserved.
+ * Copyright (c) 2010-2020 Google, Inc.  All rights reserved.
  * Copyright (c) 2010 Massachusetts Institute of Technology  All rights reserved.
  * Copyright (c) 2000-2010 VMware, Inc.  All rights reserved.
  * ******************************************************************************/
@@ -1101,6 +1101,12 @@ mangle_rseq_insert_native_sequence(dcontext_t *dcontext, instrlist_t *ilist,
             opnd_is_pc(instr_get_target(copy))) {
             app_pc tgt = opnd_get_pc(instr_get_target(copy));
             if (tgt >= start && tgt < end) {
+                /* We do not want to use the absolute PC and re-relativize: we want
+                 * to use the same relative offset.  (An alternative would be to
+                 * convert the PC operand to an instr_t pointer but that would take
+                 * extra passes.)
+                 */
+                instr_set_rip_rel_valid(copy, false);
                 PRE(ilist, insert_at, copy);
                 continue;
             }
diff --git a/core/arch/x86/decode.c b/core/arch/x86/decode.c
index e7d5c9f2da2..f3bb565475f 100644
--- a/core/arch/x86/decode.c
+++ b/core/arch/x86/decode.c
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2011-2019 Google, Inc.  All rights reserved.
+ * Copyright (c) 2011-2020 Google, Inc.  All rights reserved.
  * Copyright (c) 2000-2010 VMware, Inc.  All rights reserved.
  * **********************************************************/
 
@@ -500,6 +500,7 @@ read_operand(byte *pc, decode_info_t *di, byte optype, opnd_size_t opsize)
     }
     case TYPE_J: {
         byte *end_pc;
+        di->disp_abs = pc; /* For re-relativization support. */
         pc = read_immed(pc, di, opsize, &val);
         if (di->orig_pc != di->start_pc) {
             CLIENT_ASSERT(di->start_pc != NULL,
@@ -608,7 +609,10 @@ read_modrm(byte *pc, decode_info_t *di)
             /* 4-byte disp */
             di->has_disp = true;
             di->disp = *((int *)pc);
-            IF_X64(di->disp_abs = pc); /* used to set instr->rip_rel_pos */
+#ifdef X64
+            if (X64_MODE(di) && di->mod == 0 && di->rm == 5)
+                di->disp_abs = pc; /* Used to set instr->rip_rel_pos. */
+#endif
             pc += 4;
         } else if (di->mod == 1) {
             /* 1-byte disp */
@@ -955,6 +959,7 @@ read_instruction(byte *pc, byte *orig_pc, const instr_info_t **ret_info,
     di->repne_prefix = false;
     di->vex_encoded = false;
     di->evex_encoded = false;
+    di->disp_abs = 0;
     /* FIXME: set data and addr sizes to current mode
      * for now I assume always 32-bit mode (or 64 for X64_MODE(di))!
      */
@@ -2444,12 +2449,10 @@ decode_opcode(dcontext_t *dcontext, byte *pc, instr_t *instr)
     const instr_info_t *info;
     decode_info_t di;
     int sz;
-#ifdef X64
     /* PR 251479: we need to know about all rip-relative addresses.
      * Since change/setting raw bits invalidates, we must set this
      * on every return. */
     uint rip_rel_pos;
-#endif
     IF_X64(di.x86_mode = instr_get_x86_mode(instr));
     /* when pass true to read_instruction it doesn't decode immeds,
      * so have to call decode_next_pc, but that ends up being faster
@@ -2458,7 +2461,7 @@ decode_opcode(dcontext_t *dcontext, byte *pc, instr_t *instr)
     read_instruction(pc, pc, &info, &di,
                      true /* just opcode */
                      _IF_DEBUG(!TEST(INSTR_IGNORE_INVALID, instr->flags)));
-    sz = decode_sizeof(dcontext, pc, NULL _IF_X64(&rip_rel_pos));
+    sz = decode_sizeof_ex(dcontext, pc, NULL, &rip_rel_pos);
     IF_X64(instr_set_x86_mode(instr, get_x86_mode(dcontext)));
     instr_set_opcode(instr, info->type);
     /* read_instruction sets opcode to OP_INVALID for illegal instr.
@@ -2477,7 +2480,7 @@ decode_opcode(dcontext_t *dcontext, byte *pc, instr_t *instr)
     /* raw bits are valid though and crucial for encoding */
     instr_set_raw_bits(instr, pc, sz);
     /* must set rip_rel_pos after setting raw bits */
-    IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
+    instr_set_rip_rel_pos(instr, rip_rel_pos);
     return pc + sz;
 }
 
@@ -2654,27 +2657,18 @@ decode_common(dcontext_t *dcontext, byte *pc, byte *orig_pc, instr_t *instr)
      * in other situations does not result in #UD so we ignore.
      */
 
-    if (orig_pc != pc) {
-        /* We do not want to copy when encoding and condone an invalid
-         * relative target
-         */
-        instr_set_raw_bits_valid(instr, false);
-        instr_set_translation(instr, orig_pc);
-    } else {
-        /* we set raw bits AFTER setting all srcs and dsts b/c setting
-         * a src or dst marks instr as having invalid raw bits
-         */
-        IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_uint(next_pc - pc)));
-        instr_set_raw_bits(instr, pc, (uint)(next_pc - pc));
-#ifdef X64
-        if (X64_MODE(&di) && TEST(HAS_MODRM, info->flags) && di.mod == 0 && di.rm == 5) {
-            CLIENT_ASSERT(di.disp_abs > di.start_pc, "decode: internal rip-rel error");
-            CLIENT_ASSERT(CHECK_TRUNCATE_TYPE_int(di.disp_abs - di.start_pc),
-                          "decode: internal rip-rel error");
-            /* must do this AFTER setting raw bits to avoid being invalidated */
-            instr_set_rip_rel_pos(instr, (int)(di.disp_abs - di.start_pc));
-        }
-#endif
+    instr_set_translation(instr, orig_pc);
+    /* We set raw bits AFTER setting all srcs and dsts b/c setting
+     * a src or dst marks instr as having invalid raw bits.
+     */
+    IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_uint(next_pc - pc)));
+    instr_set_raw_bits(instr, pc, (uint)(next_pc - pc));
+    if (di.disp_abs > di.start_pc) {
+        CLIENT_ASSERT(di.disp_abs > di.start_pc, "decode: internal rip-rel error");
+        CLIENT_ASSERT(CHECK_TRUNCATE_TYPE_int(di.disp_abs - di.start_pc),
+                      "decode: internal rip-rel error");
+        /* We must do this AFTER setting raw bits to avoid being invalidated. */
+        instr_set_rip_rel_pos(instr, (int)(di.disp_abs - di.start_pc));
     }
 
     return next_pc;
diff --git a/core/arch/x86/decode_fast.c b/core/arch/x86/decode_fast.c
index 488f71792c8..ad131ced2a3 100644
--- a/core/arch/x86/decode_fast.c
+++ b/core/arch/x86/decode_fast.c
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2011-2014 Google, Inc.  All rights reserved.
+ * Copyright (c) 2011-2020 Google, Inc.  All rights reserved.
  * Copyright (c) 2001-2010 VMware, Inc.  All rights reserved.
  * **********************************************************/
 
@@ -225,16 +225,15 @@ static const char x64_adjustment[256] = {
 /* Prototypes for the functions that calculate the variable
  * part of the x86 instruction length. */
 static int
-sizeof_modrm(dcontext_t *dcontext, byte *pc, bool addr16 _IF_X64(byte **rip_rel_pc));
+sizeof_modrm(dcontext_t *dcontext, byte *pc, bool addr16, byte **rip_rel_pc);
 static int
-sizeof_fp_op(dcontext_t *dcontext, byte *pc, bool addr16 _IF_X64(byte **rip_rel_pc));
+sizeof_fp_op(dcontext_t *dcontext, byte *pc, bool addr16, byte **rip_rel_pc);
 static int
-sizeof_escape(dcontext_t *dcontext, byte *pc, bool addr16 _IF_X64(byte **rip_rel_pc));
+sizeof_escape(dcontext_t *dcontext, byte *pc, bool addr16, byte **rip_rel_pc);
 static int
-sizeof_3byte_38(dcontext_t *dcontext, byte *pc, bool addr16,
-                bool vex _IF_X64(byte **rip_rel_pc));
+sizeof_3byte_38(dcontext_t *dcontext, byte *pc, bool addr16, bool vex, byte **rip_rel_pc);
 static int
-sizeof_3byte_3a(dcontext_t *dcontext, byte *pc, bool addr16 _IF_X64(byte **rip_rel_pc));
+sizeof_3byte_3a(dcontext_t *dcontext, byte *pc, bool addr16, byte **rip_rel_pc);
 
 enum {
     VARLEN_NONE,
@@ -243,36 +242,40 @@ enum {
     VARLEN_ESCAPE,          /* 2-byte opcodes */
     VARLEN_3BYTE_38_ESCAPE, /* 3-byte opcodes 0f 38 */
     VARLEN_3BYTE_3A_ESCAPE, /* 3-byte opcodes 0f 3a */
+    VARLEN_RIP_REL_1BYTE,   /* Ends in a 1-byte rip-rel immediate. */
+    VARLEN_RIP_REL_4BYTE,   /* Ends in a 4-byte rip-rel immediate. */
 };
 
 /* Some macros to make the following table look better. */
 #define m VARLEN_MODRM
 #define f VARLEN_FP_OP
 #define e VARLEN_ESCAPE
+#define r1 VARLEN_RIP_REL_1BYTE
+#define r4 VARLEN_RIP_REL_4BYTE
 
 /* Data table indicating what function to use to calculate
    the variable part of the x86 instruction.  This table
    is indexed by the primary opcode.  */
 static const byte variable_length[256] = {
-    m, m, m, m, 0, 0, 0, 0, m, m, m, m, 0, 0, 0, e, /* 0 */
-    m, m, m, m, 0, 0, 0, 0, m, m, m, m, 0, 0, 0, 0, /* 1 */
-    m, m, m, m, 0, 0, 0, 0, m, m, m, m, 0, 0, 0, 0, /* 2 */
-    m, m, m, m, 0, 0, 0, 0, m, m, m, m, 0, 0, 0, 0, /* 3 */
-
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4 */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5 */
-    0, 0, m, m, 0, 0, 0, 0, 0, m, 0, m, 0, 0, 0, 0, /* 6 */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7 */
-
-    m, m, m, m, m, m, m, m, m, m, m, m, m, m, m, m, /* 8 */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 9 */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B */
-
-    m, m, 0, 0, m, m, m, m, 0, 0, 0, 0, 0, 0, 0, 0, /* C */
-    m, m, m, m, 0, 0, 0, 0, f, f, f, f, f, f, f, f, /* D */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E */
-    0, 0, 0, 0, 0, 0, m, m, 0, 0, 0, 0, 0, 0, m, m  /* F */
+    m,  m,  m,  m,  0,  0,  0,  0,  m,  m,  m,  m,  0,  0,  0,  e, /* 0 */
+    m,  m,  m,  m,  0,  0,  0,  0,  m,  m,  m,  m,  0,  0,  0,  0, /* 1 */
+    m,  m,  m,  m,  0,  0,  0,  0,  m,  m,  m,  m,  0,  0,  0,  0, /* 2 */
+    m,  m,  m,  m,  0,  0,  0,  0,  m,  m,  m,  m,  0,  0,  0,  0, /* 3 */
+
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 4 */
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 5 */
+    0,  0,  m,  m,  0,  0,  0,  0,  0,  m,  0,  m,  0,  0,  0,  0,  /* 6 */
+    r1, r1, r1, r1, r1, r1, r1, r1, r1, r1, r1, r1, r1, r1, r1, r1, /* 7 */
+
+    m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m, /* 8 */
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 9 */
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* A */
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* B */
+
+    m,  m,  0,  0,  m,  m,  m,  m,  0,  0,  0,  0,  0,  0,  0,  0, /* C */
+    m,  m,  m,  m,  0,  0,  0,  0,  f,  f,  f,  f,  f,  f,  f,  f, /* D */
+    r1, r1, r1, r1, 0,  0,  0,  0,  r4, r4, 0,  r1, 0,  0,  0,  0, /* E */
+    0,  0,  0,  0,  0,  0,  m,  m,  0,  0,  0,  0,  0,  0,  m,  m  /* F */
 };
 
 /* eliminate the macros */
@@ -319,25 +322,25 @@ static const byte escape_fixed_length[256] = {
    the variable part of the escaped x86 instruction.  This table
    is indexed by the 2nd opcode byte.  */
 static const byte escape_variable_length[256] = {
-    m, m, m, m, 0, 0, 0, 0, 0,  0, 0,  0, 0, m, 0, m, /* 0 */
-    m, m, m, m, m, m, m, m, m,  m, m,  m, m, m, m, m, /* 1 */
-    m, m, m, m, 0, 0, 0, 0, m,  m, m,  m, m, m, m, m, /* 2 */
-    0, 0, 0, 0, 0, 0, 0, 0, e1, 0, e2, 0, 0, 0, 0, 0, /* 3 */
-
-    m, m, m, m, m, m, m, m, m,  m, m,  m, m, m, m, m, /* 4 */
-    m, m, m, m, m, m, m, m, m,  m, m,  m, m, m, m, m, /* 5 */
-    m, m, m, m, m, m, m, m, m,  m, m,  m, m, m, m, m, /* 6 */
-    m, m, m, m, m, m, m, 0, m,  m, m,  m, m, m, m, m, /* 7 */
-
-    0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0,  0, 0, 0, 0, 0, /* 8 */
-    m, m, m, m, m, m, m, m, m,  m, m,  m, m, m, m, m, /* 9 */
-    0, 0, 0, m, m, m, 0, 0, 0,  0, 0,  m, m, m, m, m, /* A */
-    m, m, m, m, m, m, m, m, m,  0, m,  m, m, m, m, m, /* B */
-
-    m, m, m, m, m, m, m, m, 0,  0, 0,  0, 0, 0, 0, 0, /* C */
-    m, m, m, m, m, m, m, m, m,  m, m,  m, m, m, m, m, /* D */
-    m, m, m, m, m, m, m, m, m,  m, m,  m, m, m, m, m, /* E */
-    m, m, m, m, m, m, m, m, m,  m, m,  m, m, m, m, 0  /* F */
+    m,  m,  m,  m,  0,  0,  0,  0,  0,  0,  0,  0,  0,  m,  0,  m, /* 0 */
+    m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m, /* 1 */
+    m,  m,  m,  m,  0,  0,  0,  0,  m,  m,  m,  m,  m,  m,  m,  m, /* 2 */
+    0,  0,  0,  0,  0,  0,  0,  0,  e1, 0,  e2, 0,  0,  0,  0,  0, /* 3 */
+
+    m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m, /* 4 */
+    m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m, /* 5 */
+    m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m, /* 6 */
+    m,  m,  m,  m,  m,  m,  m,  0,  m,  m,  m,  m,  m,  m,  m,  m, /* 7 */
+
+    r4, r4, r4, r4, r4, r4, r4, r4, r4, r4, r4, r4, r4, r4, r4, r4, /* 8 */
+    m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  /* 9 */
+    0,  0,  0,  m,  m,  m,  0,  0,  0,  0,  0,  m,  m,  m,  m,  m,  /* A */
+    m,  m,  m,  m,  m,  m,  m,  m,  m,  0,  m,  m,  m,  m,  m,  m,  /* B */
+
+    m,  m,  m,  m,  m,  m,  m,  m,  0,  0,  0,  0,  0,  0,  0,  0, /* C */
+    m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m, /* D */
+    m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m, /* E */
+    m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  m,  0  /* F */
 };
 
 /* eliminate the macros */
@@ -462,8 +465,8 @@ static const byte xop_a_extra[256] = {
  * May return 0 size for certain invalid instructions
  */
 int
-decode_sizeof(dcontext_t *dcontext, byte *start_pc,
-              int *num_prefixes _IF_X64(uint *rip_rel_pos))
+decode_sizeof_ex(dcontext_t *dcontext, byte *start_pc, int *num_prefixes,
+                 uint *rip_rel_pos)
 {
     byte *pc = start_pc;
     uint opc = (uint)*pc;
@@ -476,9 +479,7 @@ decode_sizeof(dcontext_t *dcontext, byte *start_pc,
     bool rep_prefix = false;
     bool evex_prefix = false;
     byte reg_opcode; /* reg_opcode field of modrm byte */
-#ifdef X64
     byte *rip_rel_pc = NULL;
-#endif
 
     /* Check for prefix byte(s) */
     while (found_prefix) {
@@ -570,15 +571,14 @@ decode_sizeof(dcontext_t *dcontext, byte *start_pc,
                     bool implied_escape = (!vex3 && !evex_prefix) ||
                         ((vex3 || evex_prefix) && (vex_mm == 1));
                     if (implied_escape) {
-                        sz += sizeof_escape(dcontext, pc, addr16 _IF_X64(&rip_rel_pc));
+                        sz += sizeof_escape(dcontext, pc, addr16, &rip_rel_pc);
                         goto decode_sizeof_done;
                     } else if (vex_mm == 2) {
-                        sz += sizeof_3byte_38(dcontext, pc - 1, addr16,
-                                              true _IF_X64(&rip_rel_pc));
+                        sz +=
+                            sizeof_3byte_38(dcontext, pc - 1, addr16, true, &rip_rel_pc);
                         goto decode_sizeof_done;
                     } else if (vex_mm == 3) {
-                        sz += sizeof_3byte_3a(dcontext, pc - 1,
-                                              addr16 _IF_X64(&rip_rel_pc));
+                        sz += sizeof_3byte_3a(dcontext, pc - 1, addr16, &rip_rel_pc);
                         goto decode_sizeof_done;
                     }
                 } else
@@ -599,7 +599,7 @@ decode_sizeof(dcontext_t *dcontext, byte *start_pc,
                     if (num_prefixes != NULL)
                         *num_prefixes = sz;
                     /* all have modrm */
-                    sz += sizeof_modrm(dcontext, pc + 1, addr16 _IF_X64(&rip_rel_pc));
+                    sz += sizeof_modrm(dcontext, pc + 1, addr16, &rip_rel_pc);
                     if (map_select == 0x8) {
                         /* these always have an immediate byte */
                         sz += 1;
@@ -660,9 +660,9 @@ decode_sizeof(dcontext_t *dcontext, byte *start_pc,
      */
 
     if (varlen == VARLEN_MODRM)
-        sz += sizeof_modrm(dcontext, pc + 1, addr16 _IF_X64(&rip_rel_pc));
+        sz += sizeof_modrm(dcontext, pc + 1, addr16, &rip_rel_pc);
     else if (varlen == VARLEN_ESCAPE) {
-        sz += sizeof_escape(dcontext, pc + 1, addr16 _IF_X64(&rip_rel_pc));
+        sz += sizeof_escape(dcontext, pc + 1, addr16, &rip_rel_pc);
         /* special case: Intel and AMD added size-differing prefix-dependent instrs! */
         if (*(pc + 1) == 0x78) {
             /* XXX: if have rex.w prefix we clear word_operands: is that legal combo? */
@@ -671,9 +671,13 @@ decode_sizeof(dcontext_t *dcontext, byte *start_pc,
                 sz += 2;
             } /* else, vmread, w/ no immeds */
         }
-    } else if (varlen == VARLEN_FP_OP)
-        sz += sizeof_fp_op(dcontext, pc + 1, addr16 _IF_X64(&rip_rel_pc));
-    else
+    } else if (varlen == VARLEN_FP_OP) {
+        sz += sizeof_fp_op(dcontext, pc + 1, addr16, &rip_rel_pc);
+    } else if (varlen == VARLEN_RIP_REL_1BYTE) {
+        rip_rel_pc = start_pc + sz - 1;
+    } else if (varlen == VARLEN_RIP_REL_4BYTE) {
+        rip_rel_pc = start_pc + sz - 4;
+    } else
         CLIENT_ASSERT(varlen == VARLEN_NONE, "internal decoding error");
 
     /* special case that doesn't fit the mold (of course one had to exist) */
@@ -686,9 +690,11 @@ decode_sizeof(dcontext_t *dcontext, byte *start_pc,
         else
             sz += 4; /* TEST El,il -- add size of immediate */
     }
+    /* Another special case: xbegin. */
+    if (opc == 0xc7 && *(pc + 1) == 0xf8)
+        rip_rel_pc = start_pc + sz - 4;
 
 decode_sizeof_done:
-#ifdef X64
     if (rip_rel_pos != NULL) {
         if (rip_rel_pc != NULL) {
             CLIENT_ASSERT(X64_MODE_DC(dcontext),
@@ -699,14 +705,23 @@ decode_sizeof(dcontext_t *dcontext, byte *start_pc,
         } else
             *rip_rel_pos = 0;
     }
-#endif
 
     return sz;
 }
 
+int
+decode_sizeof(dcontext_t *dcontext, byte *start_pc,
+              int *num_prefixes _IF_X64(uint *rip_rel_pos))
+{
+#ifdef X64
+    return decode_sizeof_ex(dcontext, start_pc, num_prefixes, rip_rel_pos);
+#else
+    return decode_sizeof_ex(dcontext, start_pc, num_prefixes, NULL);
+#endif
+}
+
 static int
-sizeof_3byte_38(dcontext_t *dcontext, byte *pc, bool addr16,
-                bool vex _IF_X64(byte **rip_rel_pc))
+sizeof_3byte_38(dcontext_t *dcontext, byte *pc, bool addr16, bool vex, byte **rip_rel_pc)
 {
     int sz = 1; /* opcode past 0x0f 0x38 */
     uint opc = *(++pc);
@@ -715,18 +730,18 @@ sizeof_3byte_38(dcontext_t *dcontext, byte *pc, bool addr16,
      * use the threebyte_38_fixed_length[opc] entry and assume 1 */
     if (vex)
         sz += threebyte_38_vex_extra[opc];
-    sz += sizeof_modrm(dcontext, pc + 1, addr16 _IF_X64(rip_rel_pc));
+    sz += sizeof_modrm(dcontext, pc + 1, addr16, rip_rel_pc);
     return sz;
 }
 
 static int
-sizeof_3byte_3a(dcontext_t *dcontext, byte *pc, bool addr16 _IF_X64(byte **rip_rel_pc))
+sizeof_3byte_3a(dcontext_t *dcontext, byte *pc, bool addr16, byte **rip_rel_pc)
 {
     pc++;
     /* so far all 0f 3a 3-byte instrs have modrm bytes and 1-byte immeds */
     /* to be robust for future additions we don't actually
      * use the threebyte_3a_fixed_length[opc] entry and assume 1 */
-    return 1 + sizeof_modrm(dcontext, pc + 1, addr16 _IF_X64(rip_rel_pc)) + 1;
+    return 1 + sizeof_modrm(dcontext, pc + 1, addr16, rip_rel_pc) + 1;
 }
 
 /* Two-byte opcode map (Tables A-4 and A-5).  You use this routine
@@ -736,7 +751,7 @@ sizeof_3byte_3a(dcontext_t *dcontext, byte *pc, bool addr16 _IF_X64(byte **rip_r
  * May return 0 size for certain invalid instructions.
  */
 static int
-sizeof_escape(dcontext_t *dcontext, byte *pc, bool addr16 _IF_X64(byte **rip_rel_pc))
+sizeof_escape(dcontext_t *dcontext, byte *pc, bool addr16, byte **rip_rel_pc)
 {
     uint opc = (uint)*pc;
     int sz = escape_fixed_length[opc];
@@ -747,11 +762,15 @@ sizeof_escape(dcontext_t *dcontext, byte *pc, bool addr16 _IF_X64(byte **rip_rel
      */
 
     if (varlen == VARLEN_MODRM)
-        return sz + sizeof_modrm(dcontext, pc + 1, addr16 _IF_X64(rip_rel_pc));
+        return sz + sizeof_modrm(dcontext, pc + 1, addr16, rip_rel_pc);
     else if (varlen == VARLEN_3BYTE_38_ESCAPE) {
-        return sz + sizeof_3byte_38(dcontext, pc, addr16, false _IF_X64(rip_rel_pc));
+        return sz + sizeof_3byte_38(dcontext, pc, addr16, false, rip_rel_pc);
     } else if (varlen == VARLEN_3BYTE_3A_ESCAPE) {
-        return sz + sizeof_3byte_3a(dcontext, pc, addr16 _IF_X64(rip_rel_pc));
+        return sz + sizeof_3byte_3a(dcontext, pc, addr16, rip_rel_pc);
+    } else if (varlen == VARLEN_RIP_REL_1BYTE) {
+        *rip_rel_pc = pc + sz - 1;
+    } else if (varlen == VARLEN_RIP_REL_4BYTE) {
+        *rip_rel_pc = pc + sz - 4;
     } else
         CLIENT_ASSERT(varlen == VARLEN_NONE, "internal decoding error");
 
@@ -773,7 +792,7 @@ sizeof_escape(dcontext_t *dcontext, byte *pc, bool addr16 _IF_X64(byte **rip_rel
  *   where (*) is 6 if base==5 and 2 otherwise.
  */
 static int
-sizeof_modrm(dcontext_t *dcontext, byte *pc, bool addr16 _IF_X64(byte **rip_rel_pc))
+sizeof_modrm(dcontext_t *dcontext, byte *pc, bool addr16, byte **rip_rel_pc)
 {
     int l = 0; /* return value for sizeof(eAddr) */
 
@@ -834,13 +853,13 @@ sizeof_modrm(dcontext_t *dcontext, byte *pc, bool addr16 _IF_X64(byte **rip_rel_
  * to determine the number of extra bytes in the entire
  * instruction. */
 static int
-sizeof_fp_op(dcontext_t *dcontext, byte *pc, bool addr16 _IF_X64(byte **rip_rel_pc))
+sizeof_fp_op(dcontext_t *dcontext, byte *pc, bool addr16, byte **rip_rel_pc)
 {
     if (*pc > 0xbf)
         return 1; /* entire ModR/M byte is an opcode extension */
 
     /* fp opcode in reg/opcode field */
-    return sizeof_modrm(dcontext, pc, addr16 _IF_X64(rip_rel_pc));
+    return sizeof_modrm(dcontext, pc, addr16, rip_rel_pc);
 }
 
 /* Table indicating "interesting" instructions, i.e., ones we
@@ -1305,13 +1324,12 @@ decode_cti(dcontext_t *dcontext, byte *pc, instr_t *instr)
     int eflags;
     int i;
     byte modrm = 0; /* used only for EFLAGS_6_SPECIAL */
-#ifdef X64
     /* PR 251479: we need to know about all rip-relative addresses.
      * Since change/setting raw bits invalidates, we must set this
-     * on every return. */
+     * on every return.
+     */
     uint rip_rel_pos;
-#endif
-    int sz = decode_sizeof(dcontext, pc, &prefixes _IF_X64(&rip_rel_pos));
+    int sz = decode_sizeof_ex(dcontext, pc, &prefixes, &rip_rel_pos);
     if (sz == 0) {
         /* invalid instruction! */
         instr_set_opcode(instr, OP_INVALID);
@@ -1434,7 +1452,7 @@ decode_cti(dcontext_t *dcontext, byte *pc, instr_t *instr)
         /* assumption: opcode already OP_UNDECODED */
         /* assumption: operands are already marked invalid (instr was reset) */
         instr_set_raw_bits(instr, start_pc, sz);
-        IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
+        instr_set_rip_rel_pos(instr, rip_rel_pos);
         return (start_pc + sz);
     }
 
@@ -1461,7 +1479,7 @@ decode_cti(dcontext_t *dcontext, byte *pc, instr_t *instr)
         /* don't bother to set dsts/srcs */
         instr_set_operands_valid(instr, false);
         instr_set_raw_bits(instr, start_pc, sz);
-        IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
+        instr_set_rip_rel_pos(instr, rip_rel_pos);
         return (start_pc + sz);
     }
 #endif
@@ -1474,7 +1492,7 @@ decode_cti(dcontext_t *dcontext, byte *pc, instr_t *instr)
         instr_set_num_opnds(dcontext, instr, 0, 1);
         instr_set_target(instr, opnd_create_pc(tgt));
         instr_set_raw_bits(instr, start_pc, sz);
-        IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
+        instr_set_rip_rel_pos(instr, rip_rel_pos);
         return (pc + 2);
     }
 
@@ -1490,7 +1508,7 @@ decode_cti(dcontext_t *dcontext, byte *pc, instr_t *instr)
         instr_set_target(instr, opnd_create_pc(tgt));
 
         instr_set_raw_bits(instr, start_pc, sz);
-        IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
+        instr_set_rip_rel_pos(instr, rip_rel_pos);
         return (pc + 2);
     }
 
@@ -1507,7 +1525,7 @@ decode_cti(dcontext_t *dcontext, byte *pc, instr_t *instr)
                           REG_XSP, REG_NULL, 0, 0,
                           resolve_variable_size_dc(dcontext, 0, OPSZ_call, false)));
         instr_set_raw_bits(instr, start_pc, sz);
-        IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
+        instr_set_rip_rel_pos(instr, rip_rel_pos);
         return (pc + 5);
     }
 
@@ -1518,7 +1536,7 @@ decode_cti(dcontext_t *dcontext, byte *pc, instr_t *instr)
         instr_set_num_opnds(dcontext, instr, 0, 1);
         instr_set_target(instr, opnd_create_pc(tgt));
         instr_set_raw_bits(instr, start_pc, sz);
-        IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
+        instr_set_rip_rel_pos(instr, rip_rel_pos);
         return (pc + 5);
     }
 
@@ -1535,7 +1553,7 @@ decode_cti(dcontext_t *dcontext, byte *pc, instr_t *instr)
         instr_set_target(instr, opnd_create_pc(tgt));
 
         instr_set_raw_bits(instr, start_pc, sz);
-        IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
+        instr_set_rip_rel_pos(instr, rip_rel_pos);
         return (pc + 6);
     }
 
@@ -1574,18 +1592,18 @@ decode_cti(dcontext_t *dcontext, byte *pc, instr_t *instr)
                     resolve_variable_size_dc(
                         dcontext, 0, nibble1 == 2 ? OPSZ_ret : OPSZ_REXVARSTACK, false)));
             instr_set_raw_bits(instr, start_pc, sz);
-            IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
+            instr_set_rip_rel_pos(instr, rip_rel_pos);
             return (pc + 3);
         case 3: /* ret w/ no immed */
             instr_set_opcode(instr, OP_ret);
             instr_set_raw_bits(instr, start_pc, sz);
-            IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
+            instr_set_rip_rel_pos(instr, rip_rel_pos);
             /* we don't set any operands and leave to an up-decode for that */
             return (pc + 1);
         case 0xb: /* far ret w/ no immed */
             instr_set_opcode(instr, OP_ret_far);
             instr_set_raw_bits(instr, start_pc, sz);
-            IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
+            instr_set_rip_rel_pos(instr, rip_rel_pos);
             /* we don't set any operands and leave to an up-decode for that */
             return (pc + 1);
         }
@@ -1623,7 +1641,7 @@ decode_cti(dcontext_t *dcontext, byte *pc, instr_t *instr)
             instr_set_src(instr, 1, opnd_create_reg(REG_XCX));
             instr_set_target(instr, opnd_create_pc(tgt));
             instr_set_raw_bits(instr, start_pc, sz);
-            IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
+            instr_set_rip_rel_pos(instr, rip_rel_pos);
             return (pc + 2);
         }
         /* otherwise it wasn't a funny 8-bit cbr so continue */
@@ -1647,7 +1665,7 @@ decode_cti(dcontext_t *dcontext, byte *pc, instr_t *instr)
         instr_set_src(instr, 0, opnd_create_immed_int((char)byte1, OPSZ_1));
         instr_set_src(instr, 1, opnd_create_reg(REG_XSP));
         instr_set_raw_bits(instr, start_pc, sz);
-        IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
+        instr_set_rip_rel_pos(instr, rip_rel_pos);
         return (pc + 2);
     }
     /* sys{enter,exit,call,ret} */
@@ -1670,14 +1688,14 @@ decode_cti(dcontext_t *dcontext, byte *pc, instr_t *instr)
             instr_set_num_opnds(dcontext, instr, 0, 0);
         }
         instr_set_raw_bits(instr, start_pc, sz);
-        IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
+        instr_set_rip_rel_pos(instr, rip_rel_pos);
         return (pc + 2);
     }
     /* iret */
     if (byte0 == 0xcf) {
         instr_set_opcode(instr, OP_iret);
         instr_set_raw_bits(instr, start_pc, sz);
-        IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
+        instr_set_rip_rel_pos(instr, rip_rel_pos);
         return (pc + 1);
     }
     /* popf */
@@ -1698,7 +1716,7 @@ decode_cti(dcontext_t *dcontext, byte *pc, instr_t *instr)
                 stack_sized_reg, REG_NULL, 0, 0,
                 resolve_variable_size_dc(dcontext, prefixes, OPSZ_VARSTACK, false)));
         instr_set_dst(instr, 0, opnd_create_reg(stack_sized_reg));
-        IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
+        instr_set_rip_rel_pos(instr, rip_rel_pos);
         return (pc + 1);
     }
 
@@ -1707,7 +1725,7 @@ decode_cti(dcontext_t *dcontext, byte *pc, instr_t *instr)
     if (INTERNAL_OPTION(mangle_app_seg) && (byte0 == 0x8c || byte0 == 0x8e)) {
         instr_set_opcode(instr, OP_mov_seg);
         instr_set_raw_bits(instr, start_pc, sz);
-        IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
+        instr_set_rip_rel_pos(instr, rip_rel_pos);
         return (start_pc + sz);
     }
 #endif
@@ -1725,7 +1743,7 @@ decode_cti(dcontext_t *dcontext, byte *pc, instr_t *instr)
     /* all non-pc-relative instructions */
     /* assumption: opcode already OP_UNDECODED */
     instr_set_raw_bits(instr, start_pc, sz);
-    IF_X64(instr_set_rip_rel_pos(instr, rip_rel_pos));
+    instr_set_rip_rel_pos(instr, rip_rel_pos);
     /* assumption: operands are already marked invalid (instr was reset) */
     return (start_pc + sz);
 }
diff --git a/core/arch/x86/decode_private.h b/core/arch/x86/decode_private.h
index 9677d7b7cfa..e3cf62072db 100644
--- a/core/arch/x86/decode_private.h
+++ b/core/arch/x86/decode_private.h
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2011-2019 Google, Inc.  All rights reserved.
+ * Copyright (c) 2011-2020 Google, Inc.  All rights reserved.
  * Copyright (c) 2000-2010 VMware, Inc.  All rights reserved.
  * **********************************************************/
 
@@ -325,8 +325,9 @@ struct _decode_info_t {
     byte *start_pc;
     byte *final_pc;
     uint len;
-    /* This field is only used when encoding rip-relative data refs.
-     * To save space we could make it a union with disp.
+    /* This field is only used when encoding rip-relative data refs, and for
+     * re-relativizing level 1-3 relative jumps.  To save space we could make it a
+     * union with disp.
      */
     byte *disp_abs;
 #ifdef X64
diff --git a/core/arch/x86/encode.c b/core/arch/x86/encode.c
index 5c18c511291..b1117828b18 100644
--- a/core/arch/x86/encode.c
+++ b/core/arch/x86/encode.c
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2011-2019 Google, Inc.  All rights reserved.
+ * Copyright (c) 2011-2020 Google, Inc.  All rights reserved.
  * Copyright (c) 2001-2010 VMware, Inc.  All rights reserved.
  * **********************************************************/
 
@@ -2663,10 +2663,6 @@ copy_and_re_relativize_raw_instr(dcontext_t *dcontext, instr_t *instr, byte *dst
 {
     byte *orig_dst_pc = dst_pc;
     ASSERT(instr_raw_bits_valid(instr));
-    /* FIXME i#731: if want to support ctis as well, need
-     * instr->rip_rel_disp_sz and need to set both for non-x64 as well
-     * in decode_sizeof(): or only in decode_cti()?
-     */
     /* For PR 251646 we have special support for mangled jecxz/loop* */
     if (instr_is_cti_short_rewrite(instr, NULL)) {
         app_pc target;
@@ -2681,7 +2677,6 @@ copy_and_re_relativize_raw_instr(dcontext_t *dcontext, instr_t *instr, byte *dst
         }
         *((int *)dst_pc) = (int)(target - (final_pc + instr->length));
     }
-#ifdef X64
     /* We test the flag directly to support cases where the raw bits are
      * being set by private_instr_encode() */
     else if (instr_rip_rel_valid(instr) && instr_get_rip_rel_pos(instr) > 0) {
@@ -2690,9 +2685,9 @@ copy_and_re_relativize_raw_instr(dcontext_t *dcontext, instr_t *instr, byte *dst
         ptr_int_t new_offs;
         bool addr32 = false;
         uint rip_rel_pos = instr_get_rip_rel_pos(instr); /* disp offs within instr */
-        DEBUG_DECLARE(bool ok;)
         ASSERT(!instr_is_level_0(instr));
-        DEBUG_DECLARE(ok =) instr_get_rel_addr_target(instr, &target);
+        DEBUG_DECLARE(bool ok;)
+        DEBUG_DECLARE(ok =) instr_get_rel_data_or_instr_target(instr, &target);
         ASSERT(ok);
         new_offs = target - (final_pc + instr->length);
         /* PR 253327: we don't record whether addr32 so we have to deduce it now */
@@ -2700,7 +2695,7 @@ copy_and_re_relativize_raw_instr(dcontext_t *dcontext, instr_t *instr, byte *dst
             int num_prefixes;
             int i;
             IF_X64(bool old_mode = set_x86_mode(dcontext, instr_get_x86_mode(instr));)
-            decode_sizeof(dcontext, instr->bytes, &num_prefixes, NULL);
+            decode_sizeof(dcontext, instr->bytes, &num_prefixes _IF_X64(NULL));
             IF_X64(set_x86_mode(dcontext, old_mode));
             for (i = 0; i < num_prefixes; i++) {
                 if (*(instr->bytes + i) == ADDR_PREFIX_OPCODE) {
@@ -2722,14 +2717,20 @@ copy_and_re_relativize_raw_instr(dcontext_t *dcontext, instr_t *instr, byte *dst
         }
         memcpy(dst_pc, instr->bytes, rip_rel_pos);
         dst_pc += rip_rel_pos;
-        *((int *)dst_pc) = (int)new_offs;
-        if (rip_rel_pos + 4U < instr->length) {
-            /* suffix byte */
-            memcpy(dst_pc + 4, instr->bytes + rip_rel_pos + 4,
-                   instr->length - (rip_rel_pos + 4));
+        /* We only support non-4-byte rip-rel disps for 1-byte instr-final (jcc_short). */
+        if (rip_rel_pos + 1 == instr->length) {
+            ASSERT(CHECK_TRUNCATE_TYPE_sbyte(new_offs));
+            *((char *)dst_pc) = (char)new_offs;
+        } else {
+            ASSERT(rip_rel_pos + 4 <= instr->length);
+            *((int *)dst_pc) = (int)new_offs;
+            if (rip_rel_pos + 4U < instr->length) {
+                /* suffix byte */
+                memcpy(dst_pc + 4, instr->bytes + rip_rel_pos + 4,
+                       instr->length - (rip_rel_pos + 4));
+            }
         }
     } else
-#endif
         memcpy(dst_pc, instr->bytes, instr->length);
     return orig_dst_pc + instr->length;
 }
@@ -3106,7 +3107,6 @@ instr_encode_arch(dcontext_t *dcontext, instr_t *instr, byte *copy_pc, byte *fin
     }
 
     if (disp_relativize_at != NULL) {
-        CLIENT_ASSERT(X64_MODE(&di), "encode error: no rip-relative in x86 mode!");
         if (check_reachable &&
             !CHECK_TRUNCATE_TYPE_int(di.disp_abs - (field_ptr - copy_pc + final_pc)) &&
             /* PR 253327: we auto-add addr prefix for out-of-reach low tgt */
@@ -3121,7 +3121,7 @@ instr_encode_arch(dcontext_t *dcontext, instr_t *instr, byte *copy_pc, byte *fin
          * private_instr_encode()), set rip_rel_pos */
         CLIENT_ASSERT(CHECK_TRUNCATE_TYPE_byte(disp_relativize_at - di.start_pc),
                       "internal encode error: rip-relative instr pos too large");
-        IF_X64(instr_set_rip_rel_pos(instr, (byte)(disp_relativize_at - di.start_pc)));
+        instr_set_rip_rel_pos(instr, (byte)(disp_relativize_at - di.start_pc));
     }
 
 #if DEBUG_DISABLE /* turn back on if want to debug */
diff --git a/suite/tests/api/ir_x86.c b/suite/tests/api/ir_x86.c
index 3bb9222c63d..7266ecf039d 100644
--- a/suite/tests/api/ir_x86.c
+++ b/suite/tests/api/ir_x86.c
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2011-2019 Google, Inc.  All rights reserved.
+ * Copyright (c) 2011-2020 Google, Inc.  All rights reserved.
  * Copyright (c) 2007-2008 VMware, Inc.  All rights reserved.
  * **********************************************************/
 
@@ -2168,6 +2168,97 @@ test_reg_exact_reads(void *dc)
     instr_destroy(dc, instr);
 }
 
+static void
+test_re_relativization_disp32_opc16(void *dcontext, byte opc1, byte opc2)
+{
+    byte buf_dec_enc[] = { opc1, opc2,
+                           /* disp32 of 0 which targets the next PC. */
+                           0x00, 0x00, 0x00, 0x00,
+                           /* We encode here. */ 0x90, 0x90, 0x90, 0x90, 0x90, 0x90 };
+    instr_t instr;
+    instr_init(dcontext, &instr);
+    byte *pc = decode_from_copy(dcontext, buf_dec_enc, buf_dec_enc + 1, &instr);
+    ASSERT(pc != NULL);
+    ASSERT(instr_raw_bits_valid(&instr)); /* i#731. */
+    ASSERT(opnd_get_pc(instr_get_src(&instr, 0)) == buf_dec_enc + 7);
+    pc = instr_encode(dcontext, &instr, buf_dec_enc + 6);
+    ASSERT(pc != NULL);
+    instr_reset(dcontext, &instr);
+    pc = decode(dcontext, buf_dec_enc + 6, &instr);
+    ASSERT(pc != NULL);
+    ASSERT(opnd_get_pc(instr_get_src(&instr, 0)) == buf_dec_enc + 7);
+    instr_free(dcontext, &instr);
+}
+
+static void
+test_re_relativization_disp8_opc8(void *dcontext, byte opc)
+{
+    byte buf_dec_enc[] = { opc,
+                           /* disp8 of 0 which targets the next PC. */
+                           0x00,
+                           /* We encode here. */ 0x90, 0x90 };
+    instr_t instr;
+    instr_init(dcontext, &instr);
+    byte *pc = decode_from_copy(dcontext, buf_dec_enc, buf_dec_enc + 1, &instr);
+    ASSERT(pc != NULL);
+    ASSERT(instr_raw_bits_valid(&instr)); /* i#731. */
+    ASSERT(opnd_get_pc(instr_get_src(&instr, 0)) == buf_dec_enc + 3);
+    pc = instr_encode(dcontext, &instr, buf_dec_enc + 2);
+    ASSERT(pc != NULL);
+    instr_reset(dcontext, &instr);
+    pc = decode(dcontext, buf_dec_enc + 2, &instr);
+    ASSERT(pc != NULL);
+    ASSERT(opnd_get_pc(instr_get_src(&instr, 0)) == buf_dec_enc + 3);
+    instr_free(dcontext, &instr);
+}
+
+/* XXX: Have DR export its raw opcodes, which overlap this list. */
+enum {
+    RAW_OPCODE_jmp_short = 0xeb,
+    RAW_OPCODE_jcc_short_start = 0x70,
+    RAW_OPCODE_jcc_short_end = 0x7f,
+    RAW_OPCODE_jcc_byte1 = 0x0f,
+    RAW_OPCODE_jcc_byte2_start = 0x80,
+    RAW_OPCODE_jcc_byte2_end = 0x8f,
+    RAW_OPCODE_loop_start = 0xe0,
+    RAW_OPCODE_loop_end = 0xe3,
+    RAW_OPCODE_xbegin_byte1 = 0xc7,
+    RAW_OPCODE_xbegin_byte2 = 0xf8,
+};
+
+static void
+test_re_relativization(void *dcontext)
+{
+    instr_t instr;
+    instr_init(dcontext, &instr);
+    byte *pc;
+
+    /* Test the i#4017 2-byte nop where re-encoding results in a 1-byte length. */
+    const byte buf_nop2[] = { 0x66, 0x90 };
+    instr_reset(dcontext, &instr);
+    pc = decode_from_copy(dcontext, (byte *)buf_nop2, (byte *)buf_nop2 + 1, &instr);
+    ASSERT(pc != NULL);
+    ASSERT(instr_length(dcontext, &instr) == sizeof(buf_nop2));
+
+    /* Test i#731 on short jumps. */
+    test_re_relativization_disp8_opc8(dcontext, RAW_OPCODE_jmp_short);
+    test_re_relativization_disp8_opc8(dcontext, RAW_OPCODE_loop_start);
+    test_re_relativization_disp8_opc8(dcontext, RAW_OPCODE_loop_end);
+    test_re_relativization_disp8_opc8(dcontext, RAW_OPCODE_jcc_short_start);
+    test_re_relativization_disp8_opc8(dcontext, RAW_OPCODE_jcc_short_end);
+
+    /* Test xbegin. */
+    test_re_relativization_disp32_opc16(dcontext, RAW_OPCODE_xbegin_byte1,
+                                        RAW_OPCODE_xbegin_byte2);
+    /* Test jcc. */
+    test_re_relativization_disp32_opc16(dcontext, RAW_OPCODE_jcc_byte1,
+                                        RAW_OPCODE_jcc_byte2_start);
+    test_re_relativization_disp32_opc16(dcontext, RAW_OPCODE_jcc_byte1,
+                                        RAW_OPCODE_jcc_byte2_end);
+
+    instr_free(dcontext, &instr);
+}
+
 int
 main(int argc, char *argv[])
 {
@@ -2237,6 +2328,8 @@ main(int argc, char *argv[])
 
     test_reg_exact_reads(dcontext);
 
+    test_re_relativization(dcontext);
+
 #ifndef STANDALONE_DECODER /* speed up compilation */
     test_all_opcodes_2_avx512_vex(dcontext);
     test_all_opcodes_3_avx512_vex(dcontext);