/*
 * Copyright (c) 2012 The Native Client Authors. All rights reserved.
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */

/*
 * This is the core of amd64-mode validator.  Please note that this file
 * combines ragel machine description and C language actions.  Please read
 * validator_internals.html first to understand how the whole thing is built:
 * it explains how the byte sequences are constructed, what constructs like
 * “@{}” or “REX_WRX?” mean, etc.
 */

#include <assert.h>
#include <errno.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "native_client/src/trusted/validator_ragel/unreviewed/validator_internal.h"

%%{
  machine x86_64_validator;
  alphtype unsigned char;
  variable p current_position;
  variable pe end_of_bundle;
  variable eof end_of_bundle;
  variable cs current_state;

  include byte_machine "byte_machines.rl";

  include prefix_actions
    "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
  include prefixes_parsing_noaction
    "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
  include rex_actions
    "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
  include rex_parsing
    "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
  include vex_actions_amd64
    "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
  include vex_parsing_amd64
    "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
  include displacement_fields_actions
    "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
  include displacement_fields_parsing
    "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
  include modrm_actions_amd64
    "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
  include modrm_parsing_amd64
    "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
  include operand_actions_amd64
    "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
  include immediate_fields_actions
    "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
  include immediate_fields_parsing_amd64
    "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
  # rel8 actions are used in relative jumps with 8-bit offset.
  action rel8_operand {
    Rel8Operand(current_position + 1, data, jump_dests, size,
                &instruction_info_collected);
  }
  # rel16 actions are used in relative jumps with 16-bit offset.
  #
  # Such instructions should not be included in the validator's DFA, but we can
  # not just exlude them because they are refenced in relative_fields_parsing
  # ragel machine.  Ensure compilations error in case of accidental usage.
  action rel16_operand {
    #error rel16_operand should never be used in nacl
  }
  # rel32 actions are used in relative calls and jumps with 32-bit offset.
  action rel32_operand {
    Rel32Operand(current_position + 1, data, jump_dests, size,
                 &instruction_info_collected);
  }
  include relative_fields_parsing
    "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
  include cpuid_actions
    "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";

  action check_access {
    CheckAccess(instruction_start - data, base, index, restricted_register,
                valid_targets, &instruction_info_collected);
  }

  # Action which marks last byte as not immediate.  Most 3DNow! instructions,
  # some AVX and XOP instructions have this proerty.  It's referenced by
  # decode_x86_32 machine in [autogenerated] “validator_x86_32_instruction.rl”
  # file.
  action last_byte_is_not_immediate {
    instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE;
  }

  action modifiable_instruction {
    instruction_info_collected |= MODIFIABLE_INSTRUCTION;
  }

  action process_0_operands {
    Process0Operands(&restricted_register, &instruction_info_collected);
  }
  action process_1_operand {
    Process1Operand(&restricted_register, &instruction_info_collected,
                    rex_prefix, operand_states);
  }
  action process_1_operand_zero_extends {
    Process1OperandZeroExtends(&restricted_register,
                               &instruction_info_collected, rex_prefix,
                               operand_states);
  }
  action process_2_operands {
    Process2Operands(&restricted_register, &instruction_info_collected,
                     rex_prefix, operand_states);
  }
  action process_2_operands_zero_extends {
    Process2OperandsZeroExtends(&restricted_register,
                                &instruction_info_collected, rex_prefix,
                                operand_states);
  }

  include decode_x86_64 "validator_x86_64_instruction.rl";

  # Special %rbp modifications — the ones which don't need a sandboxing.
  #
  # Note that there are two different opcodes for “mov”: in x86-64 there are two
  # fields in ModR/M byte (REG field and RM field) and “mov” may be used to move
  # from REG field to RM or in the other direction thus there are two encodings
  # for the register-to-register move.
  rbp_modifications =
    (b_0100_10x0 0x89 0xe5                         | # mov %rsp,%rbp
     b_0100_10x0 0x8b 0xec)                          # mov %rsp,%rbp
    @process_0_operands;

  # Special instructions used for %rbp sandboxing.
  #
  # This is the “second half” of the %rbp sandboxing.  Any zero-extending
  # instruction which stores the data in %ebp can be first half, but unlike
  # the situation with other “normal” registers you can not just write to
  # %ebp and continue: such activity MUST restore the status quo immediately
  # via one of these instructions.
  rbp_sandboxing =
    (b_0100_11x0 0x01 0xfd                  | # add %r15,%rbp
     b_0100_10x1 0x03 0xef                  | # add %r15,%rbp
     # Note that unlike %rsp case, there is no 'lea (%rbp,%r15,1),%rbp'
     # instruction (it gets assembled as 'lea 0x00(%rbp,%r15,1),%rbp').
     0x4a 0x8d 0x6c 0x3d 0x00               | # lea 0x00(%rbp,%r15,1),%rbp
     0x4a 0x8d 0xac 0x3d 0x00 0x00 0x00 0x00) # lea 0x00000000(%rbp,%r15,1),%rbp
    # Note: restricted_register keeps the restricted register as explained in
    # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x86-64-systems
    #
    # “Normal” instructions can not be used in a place where %rbp is restricted.
    # But since these instructions are “second half” of the %rbp sandboxing they
    # can be used *only* when %rbp is restricted.
    #
    # That is (normal instruction):
    #   mov %eax,%ebp
    #   mov %esi,%edi   ← Error: %ebp is restricted
    # vs
    #   mov %esi,%edi
    #   add %r15,%rbp   ← Error: %ebp is *not* restricted
    # vs
    #   mov %eax,%ebp
    #   add %r15,%rbp   ← Ok: %rbp is restricted as it should be
    #
    # Check this precondition and mark the beginning of the instruction as
    # invalid jump for target.
    @{ if (restricted_register == REG_RBP)
         instruction_info_collected |= RESTRICTED_REGISTER_USED;
       else
         instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED;
       restricted_register = NO_REG;
       UnmarkValidJumpTarget((instruction_start - data), valid_targets);
    };

  # Special %rsp modifications — the ones which don't need a sandboxing.
  #
  # Note that there are two different opcodes for “mov”: in x86-64 there are two
  # fields in ModR/M byte (REG field and RM field) and “mov” may be used to move
  # from REG field to RM or in the other direction thus there are two encodings
  # for the register-to-register move.
  rsp_modifications =
    (b_0100_10x0 0x89 0xec                         | # mov %rbp,%rsp
     b_0100_10x0 0x8b 0xe5                         | # mov %rbp,%rsp
     # Superfluous bits are not supported:
     # http://code.google.com/p/nativeclient/issues/detail?id=3012
     b_0100_1000 0x83 0xe4 (0x80 .. 0xff))           # and $XXX,%rsp
    @process_0_operands;

  # Special instructions used for %rsp sandboxing.
  #
  # This is the “second half” of the %rsp sandboxing.  Any zero-extending
  # instruction which stores the data in %esp can be first half, but unlike
  # the situation with other “normal” registers you can not just write to
  # %esp and continue: such activity MUST restore the status quo immediately
  # via one of these instructions.
  rsp_sandboxing =
    (b_0100_11x0 0x01 0xfc                  | # add %r15,%rsp
     b_0100_10x1 0x03 0xe7                  | # add %r15,%rsp
     # OR can be used as well, see
     # http://code.google.com/p/nativeclient/issues/detail?id=3070
     b_0100_11x0 0x09 0xfc                  | # or %r15,%rsp
     b_0100_10x1 0x0b 0xe7                  | # or %r15,%rsp
     0x4a 0x8d 0x24 0x3c                    | # lea (%rsp,%r15,1),%rsp
     0x4a 0x8d 0x64 0x3c 0x00               | # lea 0x00(%rsp,%r15,1),%rsp
     0x4a 0x8d 0xa4 0x3c 0x00 0x00 0x00 0x00) # lea 0x00000000(%rsp,%r15,1),%rsp
    # Note: restricted_register keeps the restricted register as explained in
    # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x86-64-systems
    #
    # “Normal” instructions can not be used in a place where %rsp is restricted.
    # But since these instructions are “second half” of the %rsp sandboxing they
    # can be used *only* when %rsp is restricted.
    #
    # That is (normal instruction):
    #   mov %eax,%esp
    #   mov %esi,%edi   ← Error: %esp is restricted
    # vs
    #   mov %esi,%edi
    #   add %r15,%rsp   ← Error: %esp is *not* restricted
    # vs
    #   mov %eax,%esp
    #   add %r15,%rsp   ← Ok: %rsp is restricted as it should be
    #
    # Check this precondition and mark the beginning of the instruction as
    # invalid jump for target.
    @{ if (restricted_register == REG_RSP)
         instruction_info_collected |= RESTRICTED_REGISTER_USED;
       else
         instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED;
       restricted_register = NO_REG;
       UnmarkValidJumpTarget((instruction_start - data), valid_targets);
    };

  # naclcall or nacljmp. These are three-instruction indirection-jump sequences.
  #    and $~0x1f, %eXX
  #    and RBASE, %rXX
  #    jmpq *%rXX   (or: callq *%rXX)
  # Note: first “and $~0x1f, %eXX” is a normal instruction (it can occur not
  # just as part of the naclcall/nacljmp, but also as a standolene instruction).
  #
  # This means that when naclcall_or_nacljmp ragel machine will be combined with
  # “normal_instruction*” regular action process_1_operand_zero_extends will be
  # triggered when main ragel machine will accept “and $~0x1f, %eXX” x86-64
  # instruction.  This action will check if %rbp/%rsp is legally modified thus
  # we don't need to duplicate this logic in naclcall_or_nacljmp ragel machine.
  #
  # There are number of variants present which differ by the REX prefix usage:
  # we need to make sure “%eXX” in “and”, “%rXX” in “add”, and “%eXX” in “jmpq”
  # or “callq” is the same register and it's much simpler to do if one single
  # action handles only fixed number of bytes.
  #
  # Additional complication arises because x86-64 contains two different “add”
  # instruction: with “0x01” and “0x03” opcode.  They differ in the direction
  # used: both can encode “add %src_register, %dst_register”, but the first one
  # uses field REG of the ModR/M byte for the src and field RM of the ModR/M
  # byte for the dst while last one uses field RM of the ModR/M byte for the src
  # and field REG of the ModR/M byte for dst.  Both should be allowed.
  #
  # See AMD/Intel manual for clarification “add” instruction encoding.
  #
  # REGISTER USAGE ABBREVIATIONS:
  #     E86:   legacy ia32 registers (all eight: %eax to %edi)
  #     R86:   64-bit counterparts for legacy 386 registers (%rax to %rdi)
  #     E64:   32-bit counterparts for new amd64 registers (%r8d to %r14d)
  #     R64:   new amd64 registers (only seven: %r8 to %r14)
  #     RBASE: %r15 (used as “base of untrusted world” in NaCl for amd64)
  naclcall_or_nacljmp =
    # This block encodes call and jump “superinstruction” of the following form:
    #     0: 83 e_ e0    and    $~0x1f,E86
    #     3: 4_ 01 f_    add    RBASE,R86
    #     6: ff e_       jmpq   *R86
    #### INSTRUCTION ONE (three bytes)
    # and $~0x1f, E86
    (0x83 b_11_100_xxx 0xe0
    #### INSTRUCTION TWO (three bytes)
    # add RBASE, R86 (0x01 opcode)
     b_0100_11x0 0x01 b_11_111_xxx
    #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
    # callq R86
     ((REX_WRX? 0xff b_11_010_xxx) |
    #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
    # jmpq R86
      (REX_WRX? 0xff b_11_100_xxx)))
    @{
      ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected,
                                       &instruction_start, current_position,
                                       data, valid_targets);
    } |

    # This block encodes call and jump “superinstruction” of the following form:
    #     0: 83 e_ e0    and    $~0x1f,E86
    #     3: 4_ 03 _f    add    RBASE,R86
    #     6: ff e_       jmpq   *R86
    #### INSTRUCTION ONE (three bytes)
    # and $~0x1f, E86
    (0x83 b_11_100_xxx 0xe0
    #### INSTRUCTION TWO (three bytes)
    # add RBASE, R86 (0x03 opcode)
     b_0100_10x1 0x03 b_11_xxx_111
    #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
    # callq R86
     ((REX_WRX? 0xff b_11_010_xxx) |
    #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
    # jmpq R86
      (REX_WRX? 0xff b_11_100_xxx)))
    @{
      ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected,
                                        &instruction_start, current_position,
                                        data, valid_targets);
    } |

    # This block encodes call and jump “superinstruction” of the following form:
    #     0: 4_ 83 e_ e0 and    $~0x1f,E86
    #     4: 4_ 01 f_    add    RBASE,R86
    #     7: ff e_       jmpq   *R86
    #### INSTRUCTION ONE (four bytes)
    # and $~0x1f, E86
    ((REX_RX 0x83 b_11_100_xxx 0xe0
    #### INSTRUCTION TWO (three bytes)
    # add RBASE, R86 (0x01 opcode)
     b_0100_11x0 0x01 b_11_111_xxx
    #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
    # callq R86
     ((REX_WRX? 0xff b_11_010_xxx) |
    #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
    # jmpq R86
      (REX_WRX? 0xff b_11_100_xxx))) |

    # This block encodes call and jump “superinstruction” of the following form:
    #     0: 4_ 83 e_ e0 and    $~0x1f,E64
    #     4: 4_ 01 f_    add    RBASE,R64
    #     7: 4_ ff e_    jmpq   *R64
    #### INSTRUCTION ONE (four bytes)
    # and $~0x1f, E64
    (b_0100_0xx1 0x83 (b_11_100_xxx - b_11_100_111) 0xe0
    #### INSTRUCTION TWO (three bytes)
    # add RBASE, R64 (0x01 opcode)
     b_0100_11x1 0x01 (b_11_111_xxx - b_11_111_111)
    #### INSTRUCTION THREE: call (three bytes)
    # callq R64
     ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) |
    #### INSTRUCTION THREE: jmp (three bytes)
    # jmpq R64
      (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))
    @{
      ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected,
                                         &instruction_start, current_position,
                                         data, valid_targets);
    } |

    # This block encodes call and jump “superinstruction” of the following form:
    #     0: 4_ 83 e_ e0 and    $~0x1f,E86
    #     4: 4_ 03 _f    add    RBASE,R86
    #     7: ff e_       jmpq   *R86
    #### INSTRUCTION ONE (four bytes)
    # and $~0x1f, E86
    ((REX_RX 0x83 b_11_100_xxx 0xe0
    #### INSTRUCTION TWO (three bytes)
    # add RBASE, R86 (0x03 opcode)
     b_0100_10x1 0x03 b_11_xxx_111
    #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
    # callq R86
     ((REX_WRX? 0xff b_11_010_xxx) |
    #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
    # jmpq R86
      (REX_WRX? 0xff b_11_100_xxx))) |

    # This block encodes call and jump “superinstruction” of the following form:
    #     0: 4_ 83 e_ e0 and    $~0x1f,E64
    #     4: 4_ 03 _f    add    RBASE,R64
    #     7: 4_ ff e_    jmpq   *R64
    #### INSTRUCTION ONE (four bytes)
    # and $~0x1f, E64
     (b_0100_0xx1 0x83 (b_11_100_xxx - b_11_100_111) 0xe0
    #### INSTRUCTION TWO (three bytes)
    # add RBASE, R64 (0x03 opcode)
      b_0100_11x1 0x03 (b_11_xxx_111 - b_11_111_111)
    #### INSTRUCTION THREE: call (three bytes)
    # callq R64
      ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) |
    #### INSTRUCTION THREE: jmp (three bytes)
    # jmpq R64
       (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))
    @{
      ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected,
                                          &instruction_start, current_position,
                                          data, valid_targets);
    };

  # EMMS/SSE2/AVX instructions which have implicit %ds:(%rsi) operand
  # maskmovq %mmX,%mmY
  maskmovq =
      REX_WRXB? (0x0f 0xf7)
      @CPUFeature_EMMX modrm_registers;
  # maskmovdqu %xmmX, %xmmY
  maskmovdqu =
      0x66 REX_WRXB? (0x0f 0xf7) @not_data16_prefix
      @CPUFeature_SSE2 modrm_registers;
  # vmaskmovdqu %xmmX, %xmmY
  vmaskmovdqu =
      ((0xc4 (VEX_RB & VEX_map00001) 0x79 @vex_prefix3) |
      (0xc5 (0x79 | 0xf9) @vex_prefix_short)) 0xf7
      @CPUFeature_AVX modrm_registers;
  mmx_sse_rdi_instruction = maskmovq | maskmovdqu | vmaskmovdqu;

  # Temporary fix: for string instructions combination of data16 and rep(ne)
  # prefixes is disallowed to mimic old validator behavior.
  # See http://code.google.com/p/nativeclient/issues/detail?id=1950

  # data16rep = (data16 | rep data16 | data16 rep);
  # data16condrep = (data16 | condrep data16 | data16 condrep);
  data16rep = data16;
  data16condrep = data16;

  # String instructions which use only %ds:(%rsi)
  string_instruction_rsi_no_rdi =
    (rep? 0xac                 | # lods   %ds:(%rsi),%al
     data16rep 0xad            | # lods   %ds:(%rsi),%ax
     rep? REXW_NONE? 0xad);      # lods   %ds:(%rsi),%eax/%rax

  # String instructions which use only %ds:(%rdi)
  string_instruction_rdi_no_rsi =
    condrep? 0xae             | # scas   %es:(%rdi),%al
    data16condrep 0xaf        | # scas   %es:(%rdi),%ax
    condrep? REXW_NONE? 0xaf  | # scas   %es:(%rdi),%eax/%rax

    rep? 0xaa                 | # stos   %al,%es:(%rdi)
    data16rep 0xab            | # stos   %ax,%es:(%rdi)
    rep? REXW_NONE? 0xab;       # stos   %eax/%rax,%es:(%rdi)

  # String instructions which use both %ds:(%rsi) and %es:(%rdi)
  string_instruction_rsi_rdi =
    condrep? 0xa6            | # cmpsb    %es:(%rdi),%ds:(%rsi)
    data16condrep 0xa7       | # cmpsw    %es:(%rdi),%ds:(%rsi)
    condrep? REXW_NONE? 0xa7 | # cmps[lq] %es:(%rdi),%ds:(%rsi)

    rep? 0xa4                | # movsb    %ds:(%rsi),%es:(%rdi)
    data16rep 0xa5           | # movsw    %ds:(%rsi),%es:(%rdi)
    rep? REXW_NONE? 0xa5;      # movs[lq] %ds:(%rsi),%es:(%rdi)

  # “Superinstruction” which includes %rsi sandboxing.
  #
  # There are two variants which handle spurious REX prefixes.
  #
  # Note that both “0x89 0xf6” and “0x8b 0xf6” encode “mov %edi,%edi”: in x86-64
  # there are two fields in ModR/M byte (REG field and RM field) and “mov” may
  # be used to move from REG field to RM or in the other direction thus there
  # are two encodings for the register-to-register move (and since REG and RM
  # are identical here only opcode differs).
  sandbox_instruction_rsi_no_rdi =
    (0x89 | 0x8b) 0xf6       # mov %esi,%esi
    0x49 0x8d 0x34 0x37      # lea (%r15,%rsi,1),%rsi
    string_instruction_rsi_no_rdi
    @{
       ExpandSuperinstructionBySandboxingBytes(
         2 /* mov */ + 4 /* lea */, &instruction_start, data, valid_targets);
    } |

    REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi
    0x49 0x8d 0x34 0x37      # lea (%r15,%rsi,1),%rsi
    string_instruction_rsi_no_rdi
    @{
       ExpandSuperinstructionBySandboxingBytes(
         3 /* mov */ + 4 /* lea */, &instruction_start, data, valid_targets);
    };

  # “Superinstruction” which includes %rdi sandboxing.
  #
  # There are two variants which handle spurious REX prefixes.
  #
  # Note that both “0x89 0xff” and “0x8b 0xff” encode “mov %edi,%edi”: in x86-64
  # there are two fields in ModR/M byte (REG field and RM field) and “mov” may
  # be used to move from REG field to RM or in the other direction thus there
  # are two encodings for the register-to-register move (and since REG and RM
  # are identical here only opcode differs).
  sandbox_instruction_rdi_no_rsi =
    (0x89 | 0x8b) 0xff       # mov %edi,%edi
    0x49 0x8d 0x3c 0x3f      # lea (%r15,%rdi,1),%rdi
    (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)
    @{
       ExpandSuperinstructionBySandboxingBytes(
         2 /* mov */ + 4 /* lea */, &instruction_start, data, valid_targets);
    } |

    REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi
    0x49 0x8d 0x3c 0x3f      . # lea (%r15,%rdi,1),%rdi
    (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)
    @{
       ExpandSuperinstructionBySandboxingBytes(
         3 /* mov */ + 4 /* lea */, &instruction_start, data, valid_targets);
    };


  # “Superinstruction” which includes both %rsi and %rdi sandboxing.
  #
  # There are four variants which handle spurious REX prefixes.
  #
  # Note that both “0x89 0xf6” and “0x8b 0xf6” encode “mov %esi,%esi” while both
  # “0x89 0xff” and “0x8b 0xff” encode “mov %edi,%edi”: in x86-64 there are two
  # fields in ModR/M byte (REG field and RM field) and “mov” may be used to move
  # from REG field to RM or in the other direction thus there are two encodings
  # for the register-to-register move (and since REG and RM are identical here
  # only opcode differs).
  sandbox_instruction_rsi_rdi =
    (0x89 | 0x8b) 0xf6       # mov %esi,%esi
    0x49 0x8d 0x34 0x37      # lea (%r15,%rsi,1),%rsi
    (0x89 | 0x8b) 0xff       # mov %edi,%edi
    0x49 0x8d 0x3c 0x3f      # lea (%r15,%rdi,1),%rdi
    string_instruction_rsi_rdi
    @{
       ExpandSuperinstructionBySandboxingBytes(
         2 /* mov */ + 4 /* lea */ + 2 /* mov */ + 4 /* lea */,
         &instruction_start, data, valid_targets);
    } |

    (((0x89 | 0x8b) 0xf6       # mov %esi,%esi
      0x49 0x8d 0x34 0x37      # lea (%r15,%rsi,1),%rsi
      REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi
      0x49 0x8d 0x3c 0x3f) |   # lea (%r15,%rdi,1),%rdi

     (REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi
      0x49 0x8d 0x34 0x37      # lea (%r15,%rsi,1),%rsi
      (0x89 | 0x8b) 0xff       # mov %edi,%edi
      0x49 0x8d 0x3c 0x3f))      # lea (%r15,%rdi,1),%rdi
     string_instruction_rsi_rdi
    @{
       ExpandSuperinstructionBySandboxingBytes(
         2 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */
         /* == 3 (* mov *) + 4 (* lea *) + 2 (* mov *) + 4 (* lea *) */,
         &instruction_start, data, valid_targets);
    } |

    REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi
    0x49 0x8d 0x34 0x37      . # lea (%r15,%rsi,1),%rsi
    REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi
    0x49 0x8d 0x3c 0x3f      . # lea (%r15,%rdi,1),%rdi
    string_instruction_rsi_rdi
    @{
       ExpandSuperinstructionBySandboxingBytes(
         3 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */,
         &instruction_start, data, valid_targets);
    };

  # All the “special” instructions (== instructions which obey non-standard
  # rules).  Three groups:
  #  • %rsp/%rsp related instructions (these instructions are special because
  #    they must be in the range %r15…%r15+4294967295 except momentarily they
  #    can be in the range 0…4294967295)
  #  • string instructions (which can not use %r15 as base and thus need special
  #    handling both in compiler and validator)
  #  • naclcall/nacljmp (indirect jumps need special care)
  special_instruction =
    (rbp_modifications |
     rsp_modifications |
     rbp_sandboxing |
     rsp_sandboxing |
     sandbox_instruction_rsi_no_rdi |
     sandbox_instruction_rdi_no_rsi |
     sandbox_instruction_rsi_rdi |
     naclcall_or_nacljmp)
    # Mark the instruction as special — currently this information is used only
    # in tests, but in the future we may use it for dynamic code modification
    # support.
    @{
       instruction_info_collected |= SPECIAL_INSTRUCTION;
    };

  # Remove special instructions which are only allowed in special cases.
  normal_instruction = one_instruction - special_instruction;

  # Check if call is properly aligned.
  #
  # For direct call we explicitly encode all variations.  For indirect call
  # we accept all the special instructions which ends with indirect call.
  call_alignment =
    ((normal_instruction &
      # Direct call
      ((data16 REX_RXB? 0xe8 rel16) |
       (REX_WRXB? 0xe8 rel32) |
       (data16 REXW_RXB 0xe8 rel32))) |
     (special_instruction &
      # Indirect call
      (any* data16? REX_WRXB? 0xff ((opcode_2 | opcode_3) any* &
                                    (modrm_memory | modrm_registers)))))
    # Call instruction must aligned to the end of bundle.  Previously this was
    # strict requirement, today it's just warning to aid with debugging.
    @{
      if (((current_position - data) & kBundleMask) != kBundleMask)
        instruction_info_collected |= BAD_CALL_ALIGNMENT;
    };

  # This is main ragel machine: it does 99% of validation work. There are only
  # one thing to do with bundle if this machine accepts the bundle:
  #  • check for the state of the restricted_register at the end of the bundle.
  #     It's an error is %rbp or %rsp is restricted at the end of the bundle.
  # Additionally if all the bundles are fine you need to check that direct jumps
  # are corect.  Thiis is done in the following way:
  #  • DFA fills two arrays: valid_targets and jump_dests.
  #  • ProcessInvalidJumpTargets checks that jump_dests ⊂ valid_targets.
  # All other checks are done here.
  main := ((call_alignment | normal_instruction | special_instruction)
     # Here we call the user callback if there are validation errors or if the
     # CALL_USER_CALLBACK_ON_EACH_INSTRUCTION option is used.
     #
     # After that we move instruction_start and clean all the variables which
     # only used in the processing of a single instruction (prefixes, operand
     # states and instruction_info_collected).
     @{
       if ((instruction_info_collected & VALIDATION_ERRORS_MASK) ||
           (options & CALL_USER_CALLBACK_ON_EACH_INSTRUCTION)) {
         result &= user_callback(
             instruction_start, current_position,
             instruction_info_collected |
             ((restricted_register << RESTRICTED_REGISTER_SHIFT) &
              RESTRICTED_REGISTER_MASK), callback_data);
       }
       /* On successful match the instruction start must point to the next byte
        * to be able to report the new offset as the start of instruction
        * causing error.  */
       instruction_start = current_position + 1;
       /* Mark this position as a valid target for jump.  */
       MarkValidJumpTarget(current_position + 1 - data, valid_targets);
       /* Clear variables.  */
       instruction_info_collected = 0;
       SET_REX_PREFIX(FALSE);
       /* Top three bits of VEX2 are inverted: see AMD/Intel manual.  */
       SET_VEX_PREFIX2(VEX_R | VEX_X | VEX_B);
       SET_VEX_PREFIX3(0x00);
       operand_states = 0;
     })*
    $err{
        result &= user_callback(instruction_start, current_position,
                                UNRECOGNIZED_INSTRUCTION, callback_data);
        /*
         * Process the next bundle: “continue” here is for the “for” cycle in
         * the ValidateChunkAMD64 function.
         *
         * It does not affect the case which we really care about (when code
         * is validatable), but makes it possible to detect more errors in one
         * run in tools like ncval.
         */
        continue;
    };

}%%

%% write data;

Bool ValidateChunkAMD64(const uint8_t *data, size_t size,
                        uint32_t options,
                        const NaClCPUFeaturesX86 *cpu_features,
                        ValidationCallbackFunc user_callback,
                        void *callback_data) {
  bitmap_word valid_targets_small[2];
  bitmap_word jump_dests_small[2];
  bitmap_word *valid_targets;
  bitmap_word *jump_dests;
  const uint8_t *current_position;
  const uint8_t *end_of_bundle;
  int result = TRUE;

  CHECK(sizeof valid_targets_small == sizeof jump_dests_small);
  CHECK(size % kBundleSize == 0);

  /*
   * For a very small sequences (one bundle) malloc is too expensive.
   *
   * Note1: we allocate one extra bit, because we set valid jump target bits
   * _after_ instructions, so there will be one at the end of the chunk.
   *
   * Note2: we don't ever mark first bit as a valid jump target but this is
   * not a problem because any aligned address is valid jump target.
   */
  if ((size + 1) <= (sizeof valid_targets_small * 8)) {
    memset(valid_targets_small, 0, sizeof valid_targets_small);
    valid_targets = valid_targets_small;
    memset(jump_dests_small, 0, sizeof jump_dests_small);
    jump_dests = jump_dests_small;
  } else {
    valid_targets = BitmapAllocate(size + 1);
    jump_dests = BitmapAllocate(size + 1);
    if (!valid_targets || !jump_dests) {
      free(jump_dests);
      free(valid_targets);
      errno = ENOMEM;
      return FALSE;
    }
  }

  /*
   * This option is usually used in tests: we will process the whole chunk
   * in one pass. Usually each bundle is processed separately which means
   * instructions (and super-instructions) can not cross borders of the bundle.
   */
  if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM)
    end_of_bundle = data + size;
  else
    end_of_bundle = data + kBundleSize;

  /*
   * Main loop.  Here we process the data array bundle-after-bundle.
   * Ragel-produced DFA does all the checks with one exception: direct jumps.
   * It collects the two arrays: valid_targets and jump_dests which are used
   * to test direct jumps later.
   */
  for (current_position = data;
       current_position < data + size;
       current_position = end_of_bundle,
       end_of_bundle = current_position + kBundleSize) {
    /* Start of the instruction being processed.  */
    const uint8_t *instruction_start = current_position;
    int current_state;
    uint32_t instruction_info_collected = 0;
    /* Keeps one byte of information per operand in the current instruction:
     *  2 bits for register kinds,
     *  5 bits for register numbers (16 regs plus RIZ). */
    uint32_t operand_states = 0;
    enum OperandName base = NO_REG;
    enum OperandName index = NO_REG;
    enum OperandName restricted_register =
      RESTRICTED_REGISTER_INITIAL_VALUE(options);
    uint8_t rex_prefix = FALSE;
    /* Top three bits of VEX2 are inverted: see AMD/Intel manual.  */
    uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B;
    uint8_t vex_prefix3 = 0x00;

    %% write init;
    %% write exec;

    /*
     * Ragel DFA accepted the bundle, but we still need to make sure the last
     * instruction haven't left %rbp or %rsp in restricted state.
     */
    if (restricted_register == REG_RBP)
      result &= user_callback(end_of_bundle, end_of_bundle,
                              RESTRICTED_RBP_UNPROCESSED |
                              ((REG_RBP << RESTRICTED_REGISTER_SHIFT) &
                               RESTRICTED_REGISTER_MASK), callback_data);
    else if (restricted_register == REG_RSP)
      result &= user_callback(end_of_bundle, end_of_bundle,
                              RESTRICTED_RSP_UNPROCESSED |
                              ((REG_RSP << RESTRICTED_REGISTER_SHIFT) &
                               RESTRICTED_REGISTER_MASK), callback_data);
  }

  /*
   * Check the direct jumps.  All the targets from jump_dests must be in
   * valid_targets.
   */
  result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests,
                                      user_callback, callback_data);

  /* We only use malloc for a large code sequences  */
  if (jump_dests != jump_dests_small) free(jump_dests);
  if (valid_targets != valid_targets_small) free(valid_targets);
  if (!result) errno = EINVAL;
  return result;
}
