/* automatically generated by m68k-insns-auto.sh, do not edit! */
_TME_RCSID("$Id: m68k-insns-auto.sh,v 1.26 2009/08/29 19:38:23 fredette Exp $");

#include "m68k-impl.h"


/* this does a 8-bit "add SRC, DST": */
TME_M68K_INSN(tme_m68k_add8)
{
  tme_uint8_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op0 = *((tme_uint8_t *) _op0);
  op1 = *((tme_uint8_t *) _op1);

  /* perform the operation: */
  res = op1 + op0;

  /* store the result: */
  *((tme_uint8_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= ((tme_uint8_t) (((op0 ^ op1 ^ 0xff) & (op1 ^ res)) >> (8 - 1))) * TME_M68K_FLAG_V;
  if (op0 > (op1 ^ 0xff)) flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 8-bit "sub SRC, DST": */
TME_M68K_INSN(tme_m68k_sub8)
{
  tme_uint8_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op0 = *((tme_uint8_t *) _op0);
  op1 = *((tme_uint8_t *) _op1);

  /* perform the operation: */
  res = op1 - op0;

  /* store the result: */
  *((tme_uint8_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= ((tme_uint8_t) (((op0 ^ op1) & (op1 ^ res)) >> (8 - 1))) * TME_M68K_FLAG_V;
  if (op0 > op1) flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 8-bit "cmp SRC, DST": */
TME_M68K_INSN(tme_m68k_cmp8)
{
  tme_uint8_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op0 = *((tme_uint8_t *) _op0);
  op1 = *((tme_uint8_t *) _op1);

  /* perform the operation: */
  res = op1 - op0;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= ((tme_uint8_t) (((op0 ^ op1) & (op1 ^ res)) >> (8 - 1))) * TME_M68K_FLAG_V;
  if (op0 > op1) flags |= TME_M68K_FLAG_C;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 8-bit "neg DST": */
TME_M68K_INSN(tme_m68k_neg8)
{
  tme_uint8_t res, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op1 = *((tme_uint8_t *) _op1);

  /* perform the operation: */
  res = 0 - op1;

  /* store the result: */
  *((tme_uint8_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= ((tme_uint8_t) (((op1 ^ 0) & (0 ^ res)) >> (8 - 1))) * TME_M68K_FLAG_V;
  if (op1 > 0) flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 8-bit "or SRC, DST": */
TME_M68K_INSN(tme_m68k_or8)
{
  tme_uint8_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op0 = *((tme_uint8_t *) _op0);
  op1 = *((tme_uint8_t *) _op1);

  /* perform the operation: */
  res = op1 | op0;

  /* store the result: */
  *((tme_uint8_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 8-bit "and SRC, DST": */
TME_M68K_INSN(tme_m68k_and8)
{
  tme_uint8_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op0 = *((tme_uint8_t *) _op0);
  op1 = *((tme_uint8_t *) _op1);

  /* perform the operation: */
  res = op1 & op0;

  /* store the result: */
  *((tme_uint8_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 8-bit "eor SRC, DST": */
TME_M68K_INSN(tme_m68k_eor8)
{
  tme_uint8_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op0 = *((tme_uint8_t *) _op0);
  op1 = *((tme_uint8_t *) _op1);

  /* perform the operation: */
  res = op1 ^ op0;

  /* store the result: */
  *((tme_uint8_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 8-bit "not DST": */
TME_M68K_INSN(tme_m68k_not8)
{
  tme_uint8_t res, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op1 = *((tme_uint8_t *) _op1);

  /* perform the operation: */
  res = ~ op1;

  /* store the result: */
  *((tme_uint8_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 8-bit "tst DST": */
TME_M68K_INSN(tme_m68k_tst8)
{
  tme_uint8_t res, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op1 = *((tme_uint8_t *) _op1);

  /* perform the operation: */
  res = op1;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 8-bit "move DST": */
TME_M68K_INSN(tme_m68k_move8)
{
  tme_uint8_t res, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op1 = *((tme_uint8_t *) _op1);

  /* perform the operation: */
  res = op1;

  /* store the result: */
  *((tme_uint8_t *) _op0) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 8-bit "clr DST": */
TME_M68K_INSN(tme_m68k_clr8)
{
  tme_uint8_t res;
  tme_uint8_t flags;

  /* load the operand(s): */

  /* perform the operation: */
  res = 0;

  /* store the result: */
  *((tme_uint8_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 8-bit "negx DST": */
TME_M68K_INSN(tme_m68k_negx8)
{
  tme_uint8_t res, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op1 = *((tme_uint8_t *) _op1);

  /* perform the operation: */
  res = 0 - op1 - ((ic->tme_m68k_ireg_ccr / TME_M68K_FLAG_X) & 1);

  /* store the result: */
  *((tme_uint8_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z);
  flags |= ((tme_uint8_t) (((op1 ^ 0) & (0 ^ res)) >> (8 - 1))) * TME_M68K_FLAG_V;
  if (op1 > 0 || (op1 == 0 && (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X))) flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 8-bit "addx SRC, DST": */
TME_M68K_INSN(tme_m68k_addx8)
{
  tme_uint8_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  unsigned int function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  int ireg_src = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 0, 3);
  int ireg_dst = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 9, 3);
  tme_uint32_t ireg_src_adjust = sizeof(tme_uint8_t) + ((ireg_src + 1) >> 3);
  tme_uint32_t ireg_dst_adjust = sizeof(tme_uint8_t) + ((ireg_dst + 1) >> 3);
  tme_uint16_t memory;

  memory = (TME_M68K_INSN_OPCODE & TME_BIT(3));
  if (memory) {
    TME_M68K_INSN_CANFAULT;
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_src) -= ireg_src_adjust;
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_src);
    }
    tme_m68k_read_mem8(ic, TME_M68K_IREG_MEMY8);
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst) -= ireg_dst_adjust;
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst);
    }
    tme_m68k_read_memx8(ic);
    op1 = ic->tme_m68k_ireg_memx8;
    op0 = ic->tme_m68k_ireg_memy8;
  }
  else {
    op0 = ic->tme_m68k_ireg_uint8((TME_M68K_IREG_D0 + ireg_src) << 2);
    op1 = ic->tme_m68k_ireg_uint8((TME_M68K_IREG_D0 + ireg_dst) << 2);
  }

  /* perform the operation: */
  res = op1 + op0 + ((ic->tme_m68k_ireg_ccr / TME_M68K_FLAG_X) & 1);

  /* store the result: */
  if (memory) {
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_memx8 = res;
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst);
    }
    tme_m68k_write_memx8(ic);
  }
  else {
    ic->tme_m68k_ireg_uint8((TME_M68K_IREG_D0 + ireg_dst) << 2) = res;
  }

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z);
  flags |= ((tme_uint8_t) (((op0 ^ op1 ^ 0xff) & (op1 ^ res)) >> (8 - 1))) * TME_M68K_FLAG_V;
  if (op0 > (op1 ^ 0xff) || (op0 == (op1 ^ 0xff) && (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X))) flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 8-bit "subx SRC, DST": */
TME_M68K_INSN(tme_m68k_subx8)
{
  tme_uint8_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  unsigned int function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  int ireg_src = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 0, 3);
  int ireg_dst = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 9, 3);
  tme_uint32_t ireg_src_adjust = sizeof(tme_uint8_t) + ((ireg_src + 1) >> 3);
  tme_uint32_t ireg_dst_adjust = sizeof(tme_uint8_t) + ((ireg_dst + 1) >> 3);
  tme_uint16_t memory;

  memory = (TME_M68K_INSN_OPCODE & TME_BIT(3));
  if (memory) {
    TME_M68K_INSN_CANFAULT;
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_src) -= ireg_src_adjust;
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_src);
    }
    tme_m68k_read_mem8(ic, TME_M68K_IREG_MEMY8);
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst) -= ireg_dst_adjust;
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst);
    }
    tme_m68k_read_memx8(ic);
    op1 = ic->tme_m68k_ireg_memx8;
    op0 = ic->tme_m68k_ireg_memy8;
  }
  else {
    op0 = ic->tme_m68k_ireg_uint8((TME_M68K_IREG_D0 + ireg_src) << 2);
    op1 = ic->tme_m68k_ireg_uint8((TME_M68K_IREG_D0 + ireg_dst) << 2);
  }

  /* perform the operation: */
  res = op1 - op0 - ((ic->tme_m68k_ireg_ccr / TME_M68K_FLAG_X) & 1);

  /* store the result: */
  if (memory) {
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_memx8 = res;
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst);
    }
    tme_m68k_write_memx8(ic);
  }
  else {
    ic->tme_m68k_ireg_uint8((TME_M68K_IREG_D0 + ireg_dst) << 2) = res;
  }

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z);
  flags |= ((tme_uint8_t) (((op0 ^ op1) & (op1 ^ res)) >> (8 - 1))) * TME_M68K_FLAG_V;
  if (op0 > op1 || (op0 == op1 && (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X))) flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 8-bit "cmpm SRC, DST": */
TME_M68K_INSN(tme_m68k_cmpm8)
{
  tme_uint8_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  unsigned int function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  int ireg_src = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 0, 3);
  int ireg_dst = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 9, 3);
  tme_uint32_t ireg_src_adjust = sizeof(tme_uint8_t) + ((ireg_src + 1) >> 3);
  tme_uint32_t ireg_dst_adjust = sizeof(tme_uint8_t) + ((ireg_dst + 1) >> 3);

  TME_M68K_INSN_CANFAULT;

  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->_tme_m68k_ea_function_code = function_code;
    ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_src);
    ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_src) += ireg_src_adjust;
  }
  tme_m68k_read_mem8(ic, TME_M68K_IREG_MEMY8);
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->_tme_m68k_ea_function_code = function_code;
    ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst);
    ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst) += ireg_dst_adjust;
  }
  tme_m68k_read_memx8(ic);
  op1 = ic->tme_m68k_ireg_memx8;
  op0 = ic->tme_m68k_ireg_memy8;

  /* perform the operation: */
  res = op1 - op0;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= ((tme_uint8_t) (((op0 ^ op1) & (op1 ^ res)) >> (8 - 1))) * TME_M68K_FLAG_V;
  if (op0 > op1) flags |= TME_M68K_FLAG_C;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* the btst function on a 8-byte EA: */
TME_M68K_INSN(tme_m68k_btst8)
{
  tme_uint8_t value, bit;
  bit = _TME_BIT(tme_uint8_t, TME_M68K_INSN_OP0(tme_uint8_t) & (8 - 1));
  value = TME_M68K_INSN_OP1(tme_uint8_t);
  if (value & bit) {
    ic->tme_m68k_ireg_ccr &= ~TME_M68K_FLAG_Z;
  }
  else {
    ic->tme_m68k_ireg_ccr |= TME_M68K_FLAG_Z;
  }
  TME_M68K_INSN_OK;
}

/* the bchg function on a 8-byte EA: */
TME_M68K_INSN(tme_m68k_bchg8)
{
  tme_uint8_t value, bit;
  bit = _TME_BIT(tme_uint8_t, TME_M68K_INSN_OP0(tme_uint8_t) & (8 - 1));
  value = TME_M68K_INSN_OP1(tme_uint8_t);
  if (value & bit) {
    ic->tme_m68k_ireg_ccr &= ~TME_M68K_FLAG_Z;
  }
  else {
    ic->tme_m68k_ireg_ccr |= TME_M68K_FLAG_Z;
  }
  TME_M68K_INSN_OP1(tme_uint8_t) = value ^ bit;
  TME_M68K_INSN_OK;
}

/* the bclr function on a 8-byte EA: */
TME_M68K_INSN(tme_m68k_bclr8)
{
  tme_uint8_t value, bit;
  bit = _TME_BIT(tme_uint8_t, TME_M68K_INSN_OP0(tme_uint8_t) & (8 - 1));
  value = TME_M68K_INSN_OP1(tme_uint8_t);
  if (value & bit) {
    ic->tme_m68k_ireg_ccr &= ~TME_M68K_FLAG_Z;
  }
  else {
    ic->tme_m68k_ireg_ccr |= TME_M68K_FLAG_Z;
  }
  TME_M68K_INSN_OP1(tme_uint8_t) = value & ~bit;
  TME_M68K_INSN_OK;
}

/* the bset function on a 8-byte EA: */
TME_M68K_INSN(tme_m68k_bset8)
{
  tme_uint8_t value, bit;
  bit = _TME_BIT(tme_uint8_t, TME_M68K_INSN_OP0(tme_uint8_t) & (8 - 1));
  value = TME_M68K_INSN_OP1(tme_uint8_t);
  if (value & bit) {
    ic->tme_m68k_ireg_ccr &= ~TME_M68K_FLAG_Z;
  }
  else {
    ic->tme_m68k_ireg_ccr |= TME_M68K_FLAG_Z;
  }
  TME_M68K_INSN_OP1(tme_uint8_t) = value | bit;
  TME_M68K_INSN_OK;
}

/* the asl function on a 8-byte EA: */
TME_M68K_INSN(tme_m68k_asl8)
{
  unsigned int count;
  tme_uint8_t sign_bits, sign_bits_mask;
  tme_uint8_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint8_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {

    /* we need to see how the sign of the result will change during
       shifting in order to generate V.

       in general, the idea is to get all of the bits that will ever
       appear in the sign position into sign_bits, with a mask in
       sign_bits_mask.  if (sign_bits & sign_bits_mask) is zero or
       sign_bits_mask, clear V, else set V.

       start by loading the operand into sign_bits and setting
       sign_bits_mask to all-bits-one.

       if the shift count is exactly 8 - 1, then all of the bits
       of the operand will appear in the sign position.

       if the shift count is less than 8 - 1, then some of the
       less significant bits of the operand will never appear in the
       sign position, so we can shift sign_bits_mask to ignore them.

       if the shift count is greater than 8 - 1, then all of the
       bits in the operand, plus at least one zero bit, will appear in
       the sign position.  the only way that the sign bit will never
       change during the shift is if the operand was zero to begin with.
       without any changes to sign_bits or sign_bits_mask, the final
       test will always work, except when sign_bits is all-bits-one.
       the magic below clears the least-significant bit of sign_bits
       iff sign_bits is all-bits-one: */
    sign_bits = res;
    if (63 > SHIFTMAX_INT8_T
        && count > 8) {
      res = 0;
    }
    res <<= (count - 1);
    flags = (res >> (8 - 1));
    flags *= TME_M68K_FLAG_C;
    flags |= (flags * TME_M68K_FLAG_X);
    res <<= 1;
    sign_bits_mask = (tme_uint8_t) -1;
    if (count != 8 - 1) {
      if (count < 8) {
        sign_bits_mask <<= ((8 - 1) - count);
      }
      else {
        sign_bits ^= !(sign_bits + 1);
      }
    }
    sign_bits &= sign_bits_mask;
    if (sign_bits != 0 && sign_bits != sign_bits_mask) {
      flags |= TME_M68K_FLAG_V;
    }
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint8_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the asr function on a 8-byte EA: */
TME_M68K_INSN(tme_m68k_asr8)
{
  unsigned int count;
  tme_int8_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_int8_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    if (63 > SHIFTMAX_INT8_T
        && count > 8) {
      res = 0 - (res < 0);
    }
#ifdef SHIFTSIGNED_INT8_T
    res >>= (count - 1);
#else  /* !SHIFTSIGNED_INT8_T */
    for (; --count > 0; ) {
      res = (res & ~((tme_int8_t) 1)) / 2;
    }
#endif /* !SHIFTSIGNED_INT8_T */
    flags = (res & 1);
    flags *= TME_M68K_FLAG_C;
    flags |= (flags * TME_M68K_FLAG_X);
#ifdef SHIFTSIGNED_INT8_T
    res >>= 1;
#else  /* !SHIFTSIGNED_INT8_T */
    res = (res & ~((tme_int8_t) 1)) / 2;
#endif /* !SHIFTSIGNED_INT8_T */
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_int8_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the lsl function on a 8-byte EA: */
TME_M68K_INSN(tme_m68k_lsl8)
{
  unsigned int count;
  tme_uint8_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint8_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    if (63 > SHIFTMAX_INT8_T
        && count > 8) {
      res = 0;
    }
    res <<= (count - 1);
    flags = (res >> (8 - 1));
    flags *= TME_M68K_FLAG_C;
    flags |= (flags * TME_M68K_FLAG_X);
    res <<= 1;
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint8_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the lsr function on a 8-byte EA: */
TME_M68K_INSN(tme_m68k_lsr8)
{
  unsigned int count;
  tme_uint8_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint8_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    if (63 > SHIFTMAX_INT8_T
        && count > 8) {
      res = 0;
    }
    res >>= (count - 1);
    flags = (res & 1);
    flags *= TME_M68K_FLAG_C;
    flags |= (flags * TME_M68K_FLAG_X);
    res >>= 1;
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint8_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the rol function on a 8-byte EA: */
TME_M68K_INSN(tme_m68k_rol8)
{
  unsigned int count;
  tme_uint8_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint8_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    count &= (8 - 1);
    res = (res << count) | (res >> (8 - count));
    flags |= ((res & 1) * TME_M68K_FLAG_C);
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint8_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the ror function on a 8-byte EA: */
TME_M68K_INSN(tme_m68k_ror8)
{
  unsigned int count;
  tme_uint8_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint8_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    count &= (8 - 1);
    res = (res << (8 - count)) | (res >> count);
    flags |= ((res >> (8 - 1)) * TME_M68K_FLAG_C);
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint8_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the roxl function on a 8-byte EA: */
TME_M68K_INSN(tme_m68k_roxl8)
{
  unsigned int count;
  tme_uint8_t xbit;
  tme_uint8_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint8_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;
  xbit = (flags / TME_M68K_FLAG_X);
  flags |= (xbit * TME_M68K_FLAG_C);

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    count %= (8 + 1);
    flags = xbit;
    if (count > 0) {
      flags = (res >> (8 - count)) & 1;
      if (8 > SHIFTMAX_INT8_T
          && count == 8) {
        res = 0 | (xbit << (8 - 1)) | (res >> ((8 + 1) - 8));
      }
      else if (8 > SHIFTMAX_INT8_T
               && count == 1) {
        res = (res << 1) | (xbit << (1 - 1)) | 0;
      }
      else {
        res = (res << count) | (xbit << (count - 1)) | (res >> ((8 + 1) - count));
      }
    }
    flags *= TME_M68K_FLAG_C;
    flags |= (flags * TME_M68K_FLAG_X);
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint8_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the roxr function on a 8-byte EA: */
TME_M68K_INSN(tme_m68k_roxr8)
{
  unsigned int count;
  tme_uint8_t xbit;
  tme_uint8_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint8_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;
  xbit = (flags / TME_M68K_FLAG_X);
  flags |= (xbit * TME_M68K_FLAG_C);

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    count %= (8 + 1);
    flags = xbit;
    if (count > 0) {
      flags = (res >> (count - 1)) & 1;
      if (8 > SHIFTMAX_INT8_T
          && count == 8) {
        res = (res << ((8 + 1) - 8)) | (xbit << (8 - 8)) | 0;
      }
      else if (8 > SHIFTMAX_INT8_T
               && count == 1) {
        res = 0 | (xbit << (8 - 1)) | (res >> 1);
      }
      else {
        res = (res << ((8 + 1) - count)) | (xbit << (8 - count)) | (res >> count);
      }
    }
    flags *= TME_M68K_FLAG_C;
    flags |= (flags * TME_M68K_FLAG_X);
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint8_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint8_t) res) >> (8 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* cas8: */
TME_M68K_INSN(tme_m68k_cas8)
{
  struct tme_m68k_rmw rmw;
  struct tme_m68k_tlb *tlb;
  int ireg_dc, ireg_du;
  tme_uint8_t value_dc, value_du, value_mem;

  /* start the read/modify/write cycle: */
  rmw.tme_m68k_rmw_addresses[0] = ic->_tme_m68k_ea_address;
  rmw.tme_m68k_rmw_address_count = 1;
  rmw.tme_m68k_rmw_size = sizeof(tme_uint8_t);
  if (tme_m68k_rmw_start(ic,
                         &rmw)) {
    TME_M68K_INSN_OK;
  }

  /* get the compare and update registers: */
  ireg_dc = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_SPECOP, 0, 3);
  ireg_du = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_SPECOP, 6, 3);

  /* if we can do the fast compare-and-exchange: */
  if (!rmw.tme_m68k_rmw_slow_reads[0]) {

    /* get the compare and update values in big-endian byte order: */
    value_dc = ic->tme_m68k_ireg_uint8(ireg_dc << 2);
    value_du = ic->tme_m68k_ireg_uint8(ireg_du << 2);

    /* get this TLB entry: */
    tlb = rmw.tme_m68k_rmw_tlbs[0];

    /* this TLB entry must allow fast reading and fast writing
       to the same memory: */
    assert (tlb->tme_m68k_tlb_emulator_off_read != TME_EMULATOR_OFF_UNDEF
            && tlb->tme_m68k_tlb_emulator_off_write == tlb->tme_m68k_tlb_emulator_off_read);

    /* do the compare-and-exchange: */
    value_mem =
      tme_memory_atomic_cx8(((tme_shared tme_uint8_t *)
                                   (tlb->tme_m68k_tlb_emulator_off_read
                                    + ic->_tme_m68k_ea_address)),
                                  value_dc,
                                  value_du,
                                  tlb->tme_m68k_tlb_bus_rwlock,
                                  sizeof(tme_uint8_t));
    ic->tme_m68k_ireg_memx8 = (value_mem);

    /* step the transfer count once for the read, and once for the write: */
    TME_M68K_SEQUENCE_TRANSFER_STEP;
    TME_M68K_SEQUENCE_TRANSFER_STEP;
  }

  /* compare the compare operand to the effective address operand: */
  tme_m68k_cmp8(ic, &ic->tme_m68k_ireg_uint8(ireg_dc << 2), &ic->tme_m68k_ireg_memx8);

  /* if the comparison succeeded: */
  if (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z) {

    /* write the update operand to the effective address operand: */
    ic->tme_m68k_ireg_memx8 = ic->tme_m68k_ireg_uint8(ireg_du << 2);
  }

  /* otherwise, the comparison failed: */
  else {

    /* write the effective address operand to the compare operand: */
    ic->tme_m68k_ireg_uint8(ireg_dc << 2) = ic->tme_m68k_ireg_memx8;
  }

  /* finish the read/modify/write cycle: */
  tme_m68k_rmw_finish(ic,
                      &rmw,
                      (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z) != 0);
  TME_M68K_INSN_OK;
}

/* moves8: */
TME_M68K_INSN(tme_m68k_moves8)
{
  int ireg;
  tme_uint8_t ireg_value;
  unsigned int ea_reg;
  unsigned int increment;
  TME_M68K_INSN_PRIV;
  TME_M68K_INSN_CANFAULT;
  ireg = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_SPECOP, 12, 4);

  /* in case we're storing the same address register used in a
     postincrement or predecrement EA, save the current value
     of the register now: */
  ireg_value = ic->tme_m68k_ireg_uint8(ireg << 2);

  /* we have to handle postincrement and predecrement ourselves: */
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ea_reg = TME_M68K_IREG_A0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 0, 3);
    increment = TME_M68K_SIZE_8;
    if (increment == TME_M68K_SIZE_8 && ea_reg == TME_M68K_IREG_A7) {
      increment = TME_M68K_SIZE_16;
    }
    switch (TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 3, 3)) {
    case 3: ic->tme_m68k_ireg_uint32(ea_reg) += increment; break;
    case 4: ic->_tme_m68k_ea_address = (ic->tme_m68k_ireg_uint32(ea_reg) -= increment); break;
    default: break;
    }
  }

  if (TME_M68K_INSN_SPECOP & TME_BIT(11)) {
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_memx8 = ireg_value;
      ic->_tme_m68k_ea_function_code = ic->tme_m68k_ireg_dfc;
    }
    tme_m68k_write_memx8(ic);
  }
  else {
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->_tme_m68k_ea_function_code = ic->tme_m68k_ireg_sfc;
    }
    tme_m68k_read_memx8(ic);
    if (ireg >= TME_M68K_IREG_A0) {
      ic->tme_m68k_ireg_uint32(ireg) = 
        TME_EXT_S8_U32((tme_int8_t) ic->tme_m68k_ireg_memx8);
    }
    else
      ic->tme_m68k_ireg_uint8(ireg << 2) = ic->tme_m68k_ireg_memx8;
  }
  TME_M68K_INSN_OK;
}

/* this does a 16-bit "add SRC, DST": */
TME_M68K_INSN(tme_m68k_add16)
{
  tme_uint16_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op0 = *((tme_uint16_t *) _op0);
  op1 = *((tme_uint16_t *) _op1);

  /* perform the operation: */
  res = op1 + op0;

  /* store the result: */
  *((tme_uint16_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= ((tme_uint8_t) (((op0 ^ op1 ^ 0xffff) & (op1 ^ res)) >> (16 - 1))) * TME_M68K_FLAG_V;
  if (op0 > (op1 ^ 0xffff)) flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 16-bit "sub SRC, DST": */
TME_M68K_INSN(tme_m68k_sub16)
{
  tme_uint16_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op0 = *((tme_uint16_t *) _op0);
  op1 = *((tme_uint16_t *) _op1);

  /* perform the operation: */
  res = op1 - op0;

  /* store the result: */
  *((tme_uint16_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= ((tme_uint8_t) (((op0 ^ op1) & (op1 ^ res)) >> (16 - 1))) * TME_M68K_FLAG_V;
  if (op0 > op1) flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 16-bit "cmp SRC, DST": */
TME_M68K_INSN(tme_m68k_cmp16)
{
  tme_uint16_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op0 = *((tme_uint16_t *) _op0);
  op1 = *((tme_uint16_t *) _op1);

  /* perform the operation: */
  res = op1 - op0;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= ((tme_uint8_t) (((op0 ^ op1) & (op1 ^ res)) >> (16 - 1))) * TME_M68K_FLAG_V;
  if (op0 > op1) flags |= TME_M68K_FLAG_C;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 16-bit "neg DST": */
TME_M68K_INSN(tme_m68k_neg16)
{
  tme_uint16_t res, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op1 = *((tme_uint16_t *) _op1);

  /* perform the operation: */
  res = 0 - op1;

  /* store the result: */
  *((tme_uint16_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= ((tme_uint8_t) (((op1 ^ 0) & (0 ^ res)) >> (16 - 1))) * TME_M68K_FLAG_V;
  if (op1 > 0) flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 16-bit "or SRC, DST": */
TME_M68K_INSN(tme_m68k_or16)
{
  tme_uint16_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op0 = *((tme_uint16_t *) _op0);
  op1 = *((tme_uint16_t *) _op1);

  /* perform the operation: */
  res = op1 | op0;

  /* store the result: */
  *((tme_uint16_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 16-bit "and SRC, DST": */
TME_M68K_INSN(tme_m68k_and16)
{
  tme_uint16_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op0 = *((tme_uint16_t *) _op0);
  op1 = *((tme_uint16_t *) _op1);

  /* perform the operation: */
  res = op1 & op0;

  /* store the result: */
  *((tme_uint16_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 16-bit "eor SRC, DST": */
TME_M68K_INSN(tme_m68k_eor16)
{
  tme_uint16_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op0 = *((tme_uint16_t *) _op0);
  op1 = *((tme_uint16_t *) _op1);

  /* perform the operation: */
  res = op1 ^ op0;

  /* store the result: */
  *((tme_uint16_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 16-bit "not DST": */
TME_M68K_INSN(tme_m68k_not16)
{
  tme_uint16_t res, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op1 = *((tme_uint16_t *) _op1);

  /* perform the operation: */
  res = ~ op1;

  /* store the result: */
  *((tme_uint16_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 16-bit "tst DST": */
TME_M68K_INSN(tme_m68k_tst16)
{
  tme_uint16_t res, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op1 = *((tme_uint16_t *) _op1);

  /* perform the operation: */
  res = op1;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 16-bit "move DST": */
TME_M68K_INSN(tme_m68k_move16)
{
  tme_uint16_t res, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op1 = *((tme_uint16_t *) _op1);

  /* perform the operation: */
  res = op1;

  /* store the result: */
  *((tme_uint16_t *) _op0) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 16-bit "clr DST": */
TME_M68K_INSN(tme_m68k_clr16)
{
  tme_uint16_t res;
  tme_uint8_t flags;

  /* load the operand(s): */

  /* perform the operation: */
  res = 0;

  /* store the result: */
  *((tme_uint16_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 16-bit "cmpa SRC, DST": */
TME_M68K_INSN(tme_m68k_cmpa16)
{
  tme_uint32_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op0 = (tme_uint32_t) ((tme_int32_t) *((tme_int16_t *) _op0));
  op1 = *((tme_uint32_t *) _op1);

  /* perform the operation: */
  res = op1 - op0;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= ((tme_uint8_t) (((op0 ^ op1) & (op1 ^ res)) >> (32 - 1))) * TME_M68K_FLAG_V;
  if (op0 > op1) flags |= TME_M68K_FLAG_C;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 16-bit "negx DST": */
TME_M68K_INSN(tme_m68k_negx16)
{
  tme_uint16_t res, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op1 = *((tme_uint16_t *) _op1);

  /* perform the operation: */
  res = 0 - op1 - ((ic->tme_m68k_ireg_ccr / TME_M68K_FLAG_X) & 1);

  /* store the result: */
  *((tme_uint16_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z);
  flags |= ((tme_uint8_t) (((op1 ^ 0) & (0 ^ res)) >> (16 - 1))) * TME_M68K_FLAG_V;
  if (op1 > 0 || (op1 == 0 && (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X))) flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 16-bit "addx SRC, DST": */
TME_M68K_INSN(tme_m68k_addx16)
{
  tme_uint16_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  unsigned int function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  int ireg_src = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 0, 3);
  int ireg_dst = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 9, 3);
  tme_uint32_t ireg_src_adjust = sizeof(tme_uint16_t);
  tme_uint32_t ireg_dst_adjust = sizeof(tme_uint16_t);
  tme_uint16_t memory;

  memory = (TME_M68K_INSN_OPCODE & TME_BIT(3));
  if (memory) {
    TME_M68K_INSN_CANFAULT;
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_src) -= ireg_src_adjust;
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_src);
    }
    tme_m68k_read_mem16(ic, TME_M68K_IREG_MEMY16);
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst) -= ireg_dst_adjust;
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst);
    }
    tme_m68k_read_memx16(ic);
    op1 = ic->tme_m68k_ireg_memx16;
    op0 = ic->tme_m68k_ireg_memy16;
  }
  else {
    op0 = ic->tme_m68k_ireg_uint16((TME_M68K_IREG_D0 + ireg_src) << 1);
    op1 = ic->tme_m68k_ireg_uint16((TME_M68K_IREG_D0 + ireg_dst) << 1);
  }

  /* perform the operation: */
  res = op1 + op0 + ((ic->tme_m68k_ireg_ccr / TME_M68K_FLAG_X) & 1);

  /* store the result: */
  if (memory) {
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_memx16 = res;
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst);
    }
    tme_m68k_write_memx16(ic);
  }
  else {
    ic->tme_m68k_ireg_uint16((TME_M68K_IREG_D0 + ireg_dst) << 1) = res;
  }

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z);
  flags |= ((tme_uint8_t) (((op0 ^ op1 ^ 0xffff) & (op1 ^ res)) >> (16 - 1))) * TME_M68K_FLAG_V;
  if (op0 > (op1 ^ 0xffff) || (op0 == (op1 ^ 0xffff) && (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X))) flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 16-bit "subx SRC, DST": */
TME_M68K_INSN(tme_m68k_subx16)
{
  tme_uint16_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  unsigned int function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  int ireg_src = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 0, 3);
  int ireg_dst = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 9, 3);
  tme_uint32_t ireg_src_adjust = sizeof(tme_uint16_t);
  tme_uint32_t ireg_dst_adjust = sizeof(tme_uint16_t);
  tme_uint16_t memory;

  memory = (TME_M68K_INSN_OPCODE & TME_BIT(3));
  if (memory) {
    TME_M68K_INSN_CANFAULT;
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_src) -= ireg_src_adjust;
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_src);
    }
    tme_m68k_read_mem16(ic, TME_M68K_IREG_MEMY16);
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst) -= ireg_dst_adjust;
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst);
    }
    tme_m68k_read_memx16(ic);
    op1 = ic->tme_m68k_ireg_memx16;
    op0 = ic->tme_m68k_ireg_memy16;
  }
  else {
    op0 = ic->tme_m68k_ireg_uint16((TME_M68K_IREG_D0 + ireg_src) << 1);
    op1 = ic->tme_m68k_ireg_uint16((TME_M68K_IREG_D0 + ireg_dst) << 1);
  }

  /* perform the operation: */
  res = op1 - op0 - ((ic->tme_m68k_ireg_ccr / TME_M68K_FLAG_X) & 1);

  /* store the result: */
  if (memory) {
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_memx16 = res;
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst);
    }
    tme_m68k_write_memx16(ic);
  }
  else {
    ic->tme_m68k_ireg_uint16((TME_M68K_IREG_D0 + ireg_dst) << 1) = res;
  }

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z);
  flags |= ((tme_uint8_t) (((op0 ^ op1) & (op1 ^ res)) >> (16 - 1))) * TME_M68K_FLAG_V;
  if (op0 > op1 || (op0 == op1 && (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X))) flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 16-bit "cmpm SRC, DST": */
TME_M68K_INSN(tme_m68k_cmpm16)
{
  tme_uint16_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  unsigned int function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  int ireg_src = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 0, 3);
  int ireg_dst = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 9, 3);
  tme_uint32_t ireg_src_adjust = sizeof(tme_uint16_t);
  tme_uint32_t ireg_dst_adjust = sizeof(tme_uint16_t);

  TME_M68K_INSN_CANFAULT;

  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->_tme_m68k_ea_function_code = function_code;
    ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_src);
    ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_src) += ireg_src_adjust;
  }
  tme_m68k_read_mem16(ic, TME_M68K_IREG_MEMY16);
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->_tme_m68k_ea_function_code = function_code;
    ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst);
    ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst) += ireg_dst_adjust;
  }
  tme_m68k_read_memx16(ic);
  op1 = ic->tme_m68k_ireg_memx16;
  op0 = ic->tme_m68k_ireg_memy16;

  /* perform the operation: */
  res = op1 - op0;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= ((tme_uint8_t) (((op0 ^ op1) & (op1 ^ res)) >> (16 - 1))) * TME_M68K_FLAG_V;
  if (op0 > op1) flags |= TME_M68K_FLAG_C;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* a move of an address register to a predecrement or
   postincrement EA with that same address register, must
   store the original value of the address register.  since the
   predecrement and postincrement code in the executer updates
   the address register before the move has happened, we wrap
   the normal move function in this one, that gives an op1
   argument that is the original value of the address register: */
TME_M68K_INSN(tme_m68k_move_srpd16)
{
  /* NB: both this function and tme_m68k_move16()
     get the source operand as _op1, and the destination
     operand as _op0: */
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    *((tme_uint16_t *) _op0)
      = (*((tme_uint16_t *) _op1)
         + sizeof(tme_uint16_t));
  }
  tme_m68k_move16(ic, _op0, _op0);
}

/* a move of an address register to a predecrement or
   postincrement EA with that same address register, must
   store the original value of the address register.  since the
   predecrement and postincrement code in the executer updates
   the address register before the move has happened, we wrap
   the normal move function in this one, that gives an op1
   argument that is the original value of the address register: */
TME_M68K_INSN(tme_m68k_move_srpi16)
{
  /* NB: both this function and tme_m68k_move16()
     get the source operand as _op1, and the destination
     operand as _op0: */
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    *((tme_uint16_t *) _op0)
      = (*((tme_uint16_t *) _op1)
         - sizeof(tme_uint16_t));
  }
  tme_m68k_move16(ic, _op0, _op0);
}

/* the suba function on a 16-byte EA: */
TME_M68K_INSN(tme_m68k_suba16)
{
  *((tme_int32_t *) _op1) -= *((tme_int16_t *) _op0);
  TME_M68K_INSN_OK;
}

/* the adda function on a 16-byte EA: */
TME_M68K_INSN(tme_m68k_adda16)
{
  *((tme_int32_t *) _op1) += *((tme_int16_t *) _op0);
  TME_M68K_INSN_OK;
}

/* the movea function on a 16-byte EA: */
TME_M68K_INSN(tme_m68k_movea16)
{
  *((tme_int32_t *) _op0) = *((tme_int16_t *) _op1);
  TME_M68K_INSN_OK;
}

/* the asl function on a 16-byte EA: */
TME_M68K_INSN(tme_m68k_asl16)
{
  unsigned int count;
  tme_uint16_t sign_bits, sign_bits_mask;
  tme_uint16_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint16_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {

    /* we need to see how the sign of the result will change during
       shifting in order to generate V.

       in general, the idea is to get all of the bits that will ever
       appear in the sign position into sign_bits, with a mask in
       sign_bits_mask.  if (sign_bits & sign_bits_mask) is zero or
       sign_bits_mask, clear V, else set V.

       start by loading the operand into sign_bits and setting
       sign_bits_mask to all-bits-one.

       if the shift count is exactly 16 - 1, then all of the bits
       of the operand will appear in the sign position.

       if the shift count is less than 16 - 1, then some of the
       less significant bits of the operand will never appear in the
       sign position, so we can shift sign_bits_mask to ignore them.

       if the shift count is greater than 16 - 1, then all of the
       bits in the operand, plus at least one zero bit, will appear in
       the sign position.  the only way that the sign bit will never
       change during the shift is if the operand was zero to begin with.
       without any changes to sign_bits or sign_bits_mask, the final
       test will always work, except when sign_bits is all-bits-one.
       the magic below clears the least-significant bit of sign_bits
       iff sign_bits is all-bits-one: */
    sign_bits = res;
    if (63 > SHIFTMAX_INT16_T
        && count > 16) {
      res = 0;
    }
    res <<= (count - 1);
    flags = (res >> (16 - 1));
    flags *= TME_M68K_FLAG_C;
    flags |= (flags * TME_M68K_FLAG_X);
    res <<= 1;
    sign_bits_mask = (tme_uint16_t) -1;
    if (count != 16 - 1) {
      if (count < 16) {
        sign_bits_mask <<= ((16 - 1) - count);
      }
      else {
        sign_bits ^= !(sign_bits + 1);
      }
    }
    sign_bits &= sign_bits_mask;
    if (sign_bits != 0 && sign_bits != sign_bits_mask) {
      flags |= TME_M68K_FLAG_V;
    }
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint16_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the asr function on a 16-byte EA: */
TME_M68K_INSN(tme_m68k_asr16)
{
  unsigned int count;
  tme_int16_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_int16_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    if (63 > SHIFTMAX_INT16_T
        && count > 16) {
      res = 0 - (res < 0);
    }
#ifdef SHIFTSIGNED_INT16_T
    res >>= (count - 1);
#else  /* !SHIFTSIGNED_INT16_T */
    for (; --count > 0; ) {
      res = (res & ~((tme_int16_t) 1)) / 2;
    }
#endif /* !SHIFTSIGNED_INT16_T */
    flags = (res & 1);
    flags *= TME_M68K_FLAG_C;
    flags |= (flags * TME_M68K_FLAG_X);
#ifdef SHIFTSIGNED_INT16_T
    res >>= 1;
#else  /* !SHIFTSIGNED_INT16_T */
    res = (res & ~((tme_int16_t) 1)) / 2;
#endif /* !SHIFTSIGNED_INT16_T */
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_int16_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the lsl function on a 16-byte EA: */
TME_M68K_INSN(tme_m68k_lsl16)
{
  unsigned int count;
  tme_uint16_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint16_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    if (63 > SHIFTMAX_INT16_T
        && count > 16) {
      res = 0;
    }
    res <<= (count - 1);
    flags = (res >> (16 - 1));
    flags *= TME_M68K_FLAG_C;
    flags |= (flags * TME_M68K_FLAG_X);
    res <<= 1;
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint16_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the lsr function on a 16-byte EA: */
TME_M68K_INSN(tme_m68k_lsr16)
{
  unsigned int count;
  tme_uint16_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint16_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    if (63 > SHIFTMAX_INT16_T
        && count > 16) {
      res = 0;
    }
    res >>= (count - 1);
    flags = (res & 1);
    flags *= TME_M68K_FLAG_C;
    flags |= (flags * TME_M68K_FLAG_X);
    res >>= 1;
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint16_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the rol function on a 16-byte EA: */
TME_M68K_INSN(tme_m68k_rol16)
{
  unsigned int count;
  tme_uint16_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint16_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    count &= (16 - 1);
    res = (res << count) | (res >> (16 - count));
    flags |= ((res & 1) * TME_M68K_FLAG_C);
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint16_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the ror function on a 16-byte EA: */
TME_M68K_INSN(tme_m68k_ror16)
{
  unsigned int count;
  tme_uint16_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint16_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    count &= (16 - 1);
    res = (res << (16 - count)) | (res >> count);
    flags |= ((res >> (16 - 1)) * TME_M68K_FLAG_C);
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint16_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the roxl function on a 16-byte EA: */
TME_M68K_INSN(tme_m68k_roxl16)
{
  unsigned int count;
  tme_uint8_t xbit;
  tme_uint16_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint16_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;
  xbit = (flags / TME_M68K_FLAG_X);
  flags |= (xbit * TME_M68K_FLAG_C);

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    count %= (16 + 1);
    flags = xbit;
    if (count > 0) {
      flags = (res >> (16 - count)) & 1;
      if (16 > SHIFTMAX_INT16_T
          && count == 16) {
        res = 0 | (xbit << (16 - 1)) | (res >> ((16 + 1) - 16));
      }
      else if (16 > SHIFTMAX_INT16_T
               && count == 1) {
        res = (res << 1) | (xbit << (1 - 1)) | 0;
      }
      else {
        res = (res << count) | (xbit << (count - 1)) | (res >> ((16 + 1) - count));
      }
    }
    flags *= TME_M68K_FLAG_C;
    flags |= (flags * TME_M68K_FLAG_X);
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint16_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the roxr function on a 16-byte EA: */
TME_M68K_INSN(tme_m68k_roxr16)
{
  unsigned int count;
  tme_uint8_t xbit;
  tme_uint16_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint16_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;
  xbit = (flags / TME_M68K_FLAG_X);
  flags |= (xbit * TME_M68K_FLAG_C);

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    count %= (16 + 1);
    flags = xbit;
    if (count > 0) {
      flags = (res >> (count - 1)) & 1;
      if (16 > SHIFTMAX_INT16_T
          && count == 16) {
        res = (res << ((16 + 1) - 16)) | (xbit << (16 - 16)) | 0;
      }
      else if (16 > SHIFTMAX_INT16_T
               && count == 1) {
        res = 0 | (xbit << (16 - 1)) | (res >> 1);
      }
      else {
        res = (res << ((16 + 1) - count)) | (xbit << (16 - count)) | (res >> count);
      }
    }
    flags *= TME_M68K_FLAG_C;
    flags |= (flags * TME_M68K_FLAG_X);
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint16_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint16_t) res) >> (16 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the movep_rm function on a 16-bit dreg: */
TME_M68K_INSN(tme_m68k_movep_rm16)
{
  unsigned int function_code;
  tme_uint32_t linear_address;
  tme_uint16_t value;
  int dreg;

  TME_M68K_INSN_CANFAULT;

  function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  linear_address = TME_M68K_INSN_OP1(tme_uint32_t);
  linear_address += (tme_int32_t) ((tme_int16_t) TME_M68K_INSN_SPECOP);
  dreg = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 9, 3);
  value = ic->tme_m68k_ireg_uint16(dreg << 1);
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->_tme_m68k_ea_function_code = function_code;
    ic->_tme_m68k_ea_address = linear_address;
    ic->tme_m68k_ireg_memx8 = TME_FIELD_EXTRACTU(value, 8, 8);
  }
  tme_m68k_write_memx8(ic);
  linear_address += 2;
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->_tme_m68k_ea_function_code = function_code;
    ic->_tme_m68k_ea_address = linear_address;
    ic->tme_m68k_ireg_memx8 = TME_FIELD_EXTRACTU(value, 0, 8);
  }
  tme_m68k_write_memx8(ic);
  linear_address += 2;
  TME_M68K_INSN_OK;
}

/* the movem_rm function on 16-bit registers: */
TME_M68K_INSN(tme_m68k_movem_rm16)
{
  int ireg, direction;
  tme_uint16_t mask, bit;
  unsigned int ea_mode;
  tme_uint32_t addend;
  tme_uint32_t total_size;
  /* get the register mask, and figure out the total size
     of the transfer: */
  mask = TME_M68K_INSN_SPECOP;
  total_size = 0;
  if (mask != 0) {
    TME_M68K_INSN_CANFAULT;
    bit = mask;
    do {
      total_size += sizeof(tme_uint16_t);
      bit &= (bit - 1);
    } while (bit != 0);
  }

  /* figure out what direction to move in, and where to start from: */
  ea_mode = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 3, 3);
  direction = 1;
  ireg = TME_M68K_IREG_D0;
  if (ea_mode == 4) {
    direction = -1;
    ireg = TME_M68K_IREG_A7;
    if (!TME_M68K_SEQUENCE_RESTARTING) {

      /* "For the MC68020, MC68030, MC68040, and CPU32, if
         the addressing register is also moved to memory, the
         value written is the initial register value decremented 
         by the size of the operation. The MC68000 and MC68010 
         write the initial register value (not decremented)." */
      if (ic->tme_m68k_type >= TME_M68K_M68020) {
        ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0
                                 + TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 0, 3))
          = (ic->_tme_m68k_ea_address - total_size);
      }

      /* predecrement the effective address for the first transfer: */
      ic->_tme_m68k_ea_address -= sizeof(tme_uint16_t);
    }
  }
  addend = (tme_uint32_t) (direction * sizeof(tme_uint16_t));

  /* do the transfer: */
  for (bit = 1; bit != 0; bit <<= 1) {
    if (mask & bit) {
      if (!TME_M68K_SEQUENCE_RESTARTING) {
        ic->tme_m68k_ireg_memx16 = ic->tme_m68k_ireg_uint16(ireg << 1);
      }
      tme_m68k_write_memx16(ic);
      if (!TME_M68K_SEQUENCE_RESTARTING) {
        ic->_tme_m68k_ea_address += addend;
      }
    }
    ireg += direction;
  }

  /* if this is the predecrement mode, update the address register: */
  /* "For the MC68020, MC68030, MC68040, and CPU32, if
     the addressing register is also moved to memory, the
     value written is the initial register value decremented 
     by the size of the operation. The MC68000 and MC68010 
     write the initial register value (not decremented)." */
  if (ea_mode == 4
      && ic->tme_m68k_type < TME_M68K_M68020) {
    ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0
                              + TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 0, 3))
      = (ic->_tme_m68k_ea_address + sizeof(tme_uint16_t));
  }
  TME_M68K_INSN_OK;
}

/* the movep_mr function on a 16-bit dreg: */
TME_M68K_INSN(tme_m68k_movep_mr16)
{
  unsigned int function_code;
  tme_uint32_t linear_address;
  int dreg;

  TME_M68K_INSN_CANFAULT;

  function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  linear_address = TME_M68K_INSN_OP1(tme_uint32_t);
  linear_address += (tme_int32_t) ((tme_int16_t) TME_M68K_INSN_SPECOP);
  dreg = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 9, 3);
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->_tme_m68k_ea_function_code = function_code;
    ic->_tme_m68k_ea_address = linear_address;
  }
  tme_m68k_read_memx8(ic);
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    TME_FIELD_DEPOSIT16(ic->tme_m68k_ireg_uint16(dreg << 1), 8, 8, ic->tme_m68k_ireg_memx8);
  }
  linear_address += 2;
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->_tme_m68k_ea_function_code = function_code;
    ic->_tme_m68k_ea_address = linear_address;
  }
  tme_m68k_read_memx8(ic);
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    TME_FIELD_DEPOSIT16(ic->tme_m68k_ireg_uint16(dreg << 1), 0, 8, ic->tme_m68k_ireg_memx8);
  }
  linear_address += 2;
  TME_M68K_INSN_OK;
}

/* the movem_mr function on 16-bit registers: */
TME_M68K_INSN(tme_m68k_movem_mr16)
{
  int ireg, direction;
  tme_uint16_t mask, bit;
  unsigned int ea_mode;
  tme_uint32_t addend;
  tme_uint32_t total_size;
  /* get the register mask, and figure out the total size
     of the transfer: */
  mask = TME_M68K_INSN_SPECOP;
  total_size = 0;
  if (mask != 0) {
    TME_M68K_INSN_CANFAULT;
    bit = mask;
    do {
      total_size += sizeof(tme_uint16_t);
      bit &= (bit - 1);
    } while (bit != 0);
  }

  /* figure out what direction to move in, and where to start from: */
  ea_mode = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 3, 3);
  direction = 1;
  ireg = TME_M68K_IREG_D0;
  addend = (tme_uint32_t) (direction * sizeof(tme_uint16_t));

  /* do the transfer: */
  for (bit = 1; bit != 0; bit <<= 1) {
    if (mask & bit) {
      tme_m68k_read_memx16(ic);
      if (!TME_M68K_SEQUENCE_RESTARTING) {
        ic->tme_m68k_ireg_uint32(ireg) = TME_EXT_S16_U32((tme_int16_t) ic->tme_m68k_ireg_memx16);
        ic->_tme_m68k_ea_address += addend;
      }
    }
    ireg += direction;
  }

  /* if this is the postincrement mode, update the address register: */
  if (ea_mode == 3) {
    ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0
                              + TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 0, 3))
      = ic->_tme_m68k_ea_address;
  }
  TME_M68K_INSN_OK;
}

/* chk16: */
TME_M68K_INSN(tme_m68k_chk16)
{
  if (*((tme_int16_t *) _op0) < 0) {
    ic->tme_m68k_ireg_ccr |= TME_M68K_FLAG_N;
    ic->tme_m68k_ireg_pc_last = ic->tme_m68k_ireg_pc;
    ic->tme_m68k_ireg_pc = ic->tme_m68k_ireg_pc_next;
    TME_M68K_INSN_EXCEPTION(TME_M68K_EXCEPTION_INST(TME_M68K_VECTOR_CHK));
  }
  if (*((tme_int16_t *) _op0) > *((tme_int16_t *) _op1)) {
    ic->tme_m68k_ireg_ccr &= ~TME_M68K_FLAG_N;
    ic->tme_m68k_ireg_pc_last = ic->tme_m68k_ireg_pc;
    ic->tme_m68k_ireg_pc = ic->tme_m68k_ireg_pc_next;
    TME_M68K_INSN_EXCEPTION(TME_M68K_EXCEPTION_INST(TME_M68K_VECTOR_CHK));
  }
  TME_M68K_INSN_OK;
}

/* cas16: */
TME_M68K_INSN(tme_m68k_cas16)
{
  struct tme_m68k_rmw rmw;
  struct tme_m68k_tlb *tlb;
  int ireg_dc, ireg_du;
  tme_uint16_t value_dc, value_du, value_mem;

  /* start the read/modify/write cycle: */
  rmw.tme_m68k_rmw_addresses[0] = ic->_tme_m68k_ea_address;
  rmw.tme_m68k_rmw_address_count = 1;
  rmw.tme_m68k_rmw_size = sizeof(tme_uint16_t);
  if (tme_m68k_rmw_start(ic,
                         &rmw)) {
    TME_M68K_INSN_OK;
  }

  /* get the compare and update registers: */
  ireg_dc = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_SPECOP, 0, 3);
  ireg_du = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_SPECOP, 6, 3);

  /* if we can do the fast compare-and-exchange: */
  if (!rmw.tme_m68k_rmw_slow_reads[0]) {

    /* get the compare and update values in big-endian byte order: */
    value_dc = ic->tme_m68k_ireg_uint16(ireg_dc << 1);
    value_du = ic->tme_m68k_ireg_uint16(ireg_du << 1);
    value_dc = tme_htobe_u16(value_dc);
    value_du = tme_htobe_u16(value_du);

    /* get this TLB entry: */
    tlb = rmw.tme_m68k_rmw_tlbs[0];

    /* this TLB entry must allow fast reading and fast writing
       to the same memory: */
    assert (tlb->tme_m68k_tlb_emulator_off_read != TME_EMULATOR_OFF_UNDEF
            && tlb->tme_m68k_tlb_emulator_off_write == tlb->tme_m68k_tlb_emulator_off_read);

    /* do the compare-and-exchange: */
    value_mem =
      tme_memory_atomic_cx16(((tme_shared tme_uint16_t *)
                                   (tlb->tme_m68k_tlb_emulator_off_read
                                    + ic->_tme_m68k_ea_address)),
                                  value_dc,
                                  value_du,
                                  tlb->tme_m68k_tlb_bus_rwlock,
                                  sizeof(tme_uint8_t));
    ic->tme_m68k_ireg_memx16 = tme_betoh_u16(value_mem);

    /* step the transfer count once for the read, and once for the write: */
    TME_M68K_SEQUENCE_TRANSFER_STEP;
    TME_M68K_SEQUENCE_TRANSFER_STEP;
  }

  /* compare the compare operand to the effective address operand: */
  tme_m68k_cmp16(ic, &ic->tme_m68k_ireg_uint16(ireg_dc << 1), &ic->tme_m68k_ireg_memx16);

  /* if the comparison succeeded: */
  if (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z) {

    /* write the update operand to the effective address operand: */
    ic->tme_m68k_ireg_memx16 = ic->tme_m68k_ireg_uint16(ireg_du << 1);
  }

  /* otherwise, the comparison failed: */
  else {

    /* write the effective address operand to the compare operand: */
    ic->tme_m68k_ireg_uint16(ireg_dc << 1) = ic->tme_m68k_ireg_memx16;
  }

  /* finish the read/modify/write cycle: */
  tme_m68k_rmw_finish(ic,
                      &rmw,
                      (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z) != 0);
  TME_M68K_INSN_OK;
}

/* cas2_16: */
TME_M68K_INSN(tme_m68k_cas2_16)
{
  struct tme_m68k_rmw rmw;
  int ireg_dcx, ireg_dux;
  int ireg_dcy, ireg_duy;
  const tme_uint16_t specopx = TME_M68K_INSN_SPECOP;
  const tme_uint16_t specopy = TME_M68K_INSN_OP0(tme_uint16_t);

  /* start the read/modify/write cycle: */
  ic->_tme_m68k_ea_function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  rmw.tme_m68k_rmw_addresses[0] = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_D0
                                                           + TME_FIELD_EXTRACTU(specopx, 12, 4));
  rmw.tme_m68k_rmw_addresses[1] = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_D0
                                                           + TME_FIELD_EXTRACTU(specopy, 12, 4));
  rmw.tme_m68k_rmw_address_count = 2;
  rmw.tme_m68k_rmw_size = sizeof(tme_uint16_t);
  if (tme_m68k_rmw_start(ic,
                         &rmw)) {
    TME_M68K_INSN_OK;
  }

  /* do the comparisons: */
  ireg_dcx = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(specopx, 0, 3);
  ireg_dcy = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(specopy, 0, 3);
  tme_m68k_cmp16(ic,
                 &ic->tme_m68k_ireg_uint16(ireg_dcx << 1),
                 &ic->tme_m68k_ireg_memx16);
  if (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z) {
    tme_m68k_cmp16(ic,
                   &ic->tme_m68k_ireg_uint16(ireg_dcy << 1),
                   &ic->tme_m68k_ireg_memy16);
  }

  /* if the comparisons succeeded: */
  if (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z) {

    /* write the update operands to the effective address operands: */
    ireg_dux = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(specopx, 6, 3);
    ireg_duy = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(specopy, 6, 3);
    ic->tme_m68k_ireg_memx16 = ic->tme_m68k_ireg_uint16(ireg_dux << 1);
    ic->tme_m68k_ireg_memy16 = ic->tme_m68k_ireg_uint16(ireg_duy << 1);
  }

  /* otherwise, the comparisons failed: */
  else {

    /* write the effective address operands to the compare operands.
       "If Dc1 and Dc2 specify the same data register and the comparison
        fails, memory operand 1 is stored in the data register." */
    ic->tme_m68k_ireg_uint16(ireg_dcy << 1) = ic->tme_m68k_ireg_memy16;
    ic->tme_m68k_ireg_uint16(ireg_dcx << 1) = ic->tme_m68k_ireg_memx16;
  }

  /* finish the read/modify/write cycle: */
  tme_m68k_rmw_finish(ic,
                      &rmw,
                      (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z) != 0);
  TME_M68K_INSN_OK;
}

/* moves16: */
TME_M68K_INSN(tme_m68k_moves16)
{
  int ireg;
  tme_uint16_t ireg_value;
  unsigned int ea_reg;
  unsigned int increment;
  TME_M68K_INSN_PRIV;
  TME_M68K_INSN_CANFAULT;
  ireg = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_SPECOP, 12, 4);

  /* in case we're storing the same address register used in a
     postincrement or predecrement EA, save the current value
     of the register now: */
  ireg_value = ic->tme_m68k_ireg_uint16(ireg << 1);

  /* we have to handle postincrement and predecrement ourselves: */
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ea_reg = TME_M68K_IREG_A0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 0, 3);
    increment = TME_M68K_SIZE_16;
    if (increment == TME_M68K_SIZE_8 && ea_reg == TME_M68K_IREG_A7) {
      increment = TME_M68K_SIZE_16;
    }
    switch (TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 3, 3)) {
    case 3: ic->tme_m68k_ireg_uint32(ea_reg) += increment; break;
    case 4: ic->_tme_m68k_ea_address = (ic->tme_m68k_ireg_uint32(ea_reg) -= increment); break;
    default: break;
    }
  }

  if (TME_M68K_INSN_SPECOP & TME_BIT(11)) {
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_memx16 = ireg_value;
      ic->_tme_m68k_ea_function_code = ic->tme_m68k_ireg_dfc;
    }
    tme_m68k_write_memx16(ic);
  }
  else {
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->_tme_m68k_ea_function_code = ic->tme_m68k_ireg_sfc;
    }
    tme_m68k_read_memx16(ic);
    if (ireg >= TME_M68K_IREG_A0) {
      ic->tme_m68k_ireg_uint32(ireg) = 
        TME_EXT_S16_U32((tme_int16_t) ic->tme_m68k_ireg_memx16);
    }
    else
      ic->tme_m68k_ireg_uint16(ireg << 1) = ic->tme_m68k_ireg_memx16;
  }
  TME_M68K_INSN_OK;
}

/* this does a 32-bit "add SRC, DST": */
TME_M68K_INSN(tme_m68k_add32)
{
  tme_uint32_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op0 = *((tme_uint32_t *) _op0);
  op1 = *((tme_uint32_t *) _op1);

  /* perform the operation: */
  res = op1 + op0;

  /* store the result: */
  *((tme_uint32_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= ((tme_uint8_t) (((op0 ^ op1 ^ 0xffffffff) & (op1 ^ res)) >> (32 - 1))) * TME_M68K_FLAG_V;
  if (op0 > (op1 ^ 0xffffffff)) flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 32-bit "sub SRC, DST": */
TME_M68K_INSN(tme_m68k_sub32)
{
  tme_uint32_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op0 = *((tme_uint32_t *) _op0);
  op1 = *((tme_uint32_t *) _op1);

  /* perform the operation: */
  res = op1 - op0;

  /* store the result: */
  *((tme_uint32_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= ((tme_uint8_t) (((op0 ^ op1) & (op1 ^ res)) >> (32 - 1))) * TME_M68K_FLAG_V;
  if (op0 > op1) flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 32-bit "cmp SRC, DST": */
TME_M68K_INSN(tme_m68k_cmp32)
{
  tme_uint32_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op0 = *((tme_uint32_t *) _op0);
  op1 = *((tme_uint32_t *) _op1);

  /* perform the operation: */
  res = op1 - op0;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= ((tme_uint8_t) (((op0 ^ op1) & (op1 ^ res)) >> (32 - 1))) * TME_M68K_FLAG_V;
  if (op0 > op1) flags |= TME_M68K_FLAG_C;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 32-bit "neg DST": */
TME_M68K_INSN(tme_m68k_neg32)
{
  tme_uint32_t res, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op1 = *((tme_uint32_t *) _op1);

  /* perform the operation: */
  res = 0 - op1;

  /* store the result: */
  *((tme_uint32_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= ((tme_uint8_t) (((op1 ^ 0) & (0 ^ res)) >> (32 - 1))) * TME_M68K_FLAG_V;
  if (op1 > 0) flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 32-bit "or SRC, DST": */
TME_M68K_INSN(tme_m68k_or32)
{
  tme_uint32_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op0 = *((tme_uint32_t *) _op0);
  op1 = *((tme_uint32_t *) _op1);

  /* perform the operation: */
  res = op1 | op0;

  /* store the result: */
  *((tme_uint32_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 32-bit "and SRC, DST": */
TME_M68K_INSN(tme_m68k_and32)
{
  tme_uint32_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op0 = *((tme_uint32_t *) _op0);
  op1 = *((tme_uint32_t *) _op1);

  /* perform the operation: */
  res = op1 & op0;

  /* store the result: */
  *((tme_uint32_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 32-bit "eor SRC, DST": */
TME_M68K_INSN(tme_m68k_eor32)
{
  tme_uint32_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op0 = *((tme_uint32_t *) _op0);
  op1 = *((tme_uint32_t *) _op1);

  /* perform the operation: */
  res = op1 ^ op0;

  /* store the result: */
  *((tme_uint32_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 32-bit "not DST": */
TME_M68K_INSN(tme_m68k_not32)
{
  tme_uint32_t res, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op1 = *((tme_uint32_t *) _op1);

  /* perform the operation: */
  res = ~ op1;

  /* store the result: */
  *((tme_uint32_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 32-bit "tst DST": */
TME_M68K_INSN(tme_m68k_tst32)
{
  tme_uint32_t res, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op1 = *((tme_uint32_t *) _op1);

  /* perform the operation: */
  res = op1;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 32-bit "move DST": */
TME_M68K_INSN(tme_m68k_move32)
{
  tme_uint32_t res, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op1 = *((tme_uint32_t *) _op1);

  /* perform the operation: */
  res = op1;

  /* store the result: */
  *((tme_uint32_t *) _op0) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 32-bit "moveq DST": */
TME_M68K_INSN(tme_m68k_moveq32)
{
  tme_uint32_t res;
  tme_uint8_t flags;

  /* load the operand(s): */

  /* perform the operation: */
  res = TME_EXT_S8_U32((tme_int8_t) (TME_M68K_INSN_OPCODE & 0xff));

  /* store the result: */
  *((tme_uint32_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 32-bit "clr DST": */
TME_M68K_INSN(tme_m68k_clr32)
{
  tme_uint32_t res;
  tme_uint8_t flags;

  /* load the operand(s): */

  /* perform the operation: */
  res = 0;

  /* store the result: */
  *((tme_uint32_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 32-bit "negx DST": */
TME_M68K_INSN(tme_m68k_negx32)
{
  tme_uint32_t res, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  op1 = *((tme_uint32_t *) _op1);

  /* perform the operation: */
  res = 0 - op1 - ((ic->tme_m68k_ireg_ccr / TME_M68K_FLAG_X) & 1);

  /* store the result: */
  *((tme_uint32_t *) _op1) = res;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z);
  flags |= ((tme_uint8_t) (((op1 ^ 0) & (0 ^ res)) >> (32 - 1))) * TME_M68K_FLAG_V;
  if (op1 > 0 || (op1 == 0 && (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X))) flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 32-bit "addx SRC, DST": */
TME_M68K_INSN(tme_m68k_addx32)
{
  tme_uint32_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  unsigned int function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  int ireg_src = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 0, 3);
  int ireg_dst = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 9, 3);
  tme_uint32_t ireg_src_adjust = sizeof(tme_uint32_t);
  tme_uint32_t ireg_dst_adjust = sizeof(tme_uint32_t);
  tme_uint16_t memory;

  memory = (TME_M68K_INSN_OPCODE & TME_BIT(3));
  if (memory) {
    TME_M68K_INSN_CANFAULT;
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_src) -= ireg_src_adjust;
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_src);
    }
    tme_m68k_read_mem32(ic, TME_M68K_IREG_MEMY32);
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst) -= ireg_dst_adjust;
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst);
    }
    tme_m68k_read_memx32(ic);
    op1 = ic->tme_m68k_ireg_memx32;
    op0 = ic->tme_m68k_ireg_memy32;
  }
  else {
    op0 = ic->tme_m68k_ireg_uint32((TME_M68K_IREG_D0 + ireg_src));
    op1 = ic->tme_m68k_ireg_uint32((TME_M68K_IREG_D0 + ireg_dst));
  }

  /* perform the operation: */
  res = op1 + op0 + ((ic->tme_m68k_ireg_ccr / TME_M68K_FLAG_X) & 1);

  /* store the result: */
  if (memory) {
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_memx32 = res;
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst);
    }
    tme_m68k_write_memx32(ic);
  }
  else {
    ic->tme_m68k_ireg_uint32((TME_M68K_IREG_D0 + ireg_dst)) = res;
  }

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z);
  flags |= ((tme_uint8_t) (((op0 ^ op1 ^ 0xffffffff) & (op1 ^ res)) >> (32 - 1))) * TME_M68K_FLAG_V;
  if (op0 > (op1 ^ 0xffffffff) || (op0 == (op1 ^ 0xffffffff) && (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X))) flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 32-bit "subx SRC, DST": */
TME_M68K_INSN(tme_m68k_subx32)
{
  tme_uint32_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  unsigned int function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  int ireg_src = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 0, 3);
  int ireg_dst = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 9, 3);
  tme_uint32_t ireg_src_adjust = sizeof(tme_uint32_t);
  tme_uint32_t ireg_dst_adjust = sizeof(tme_uint32_t);
  tme_uint16_t memory;

  memory = (TME_M68K_INSN_OPCODE & TME_BIT(3));
  if (memory) {
    TME_M68K_INSN_CANFAULT;
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_src) -= ireg_src_adjust;
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_src);
    }
    tme_m68k_read_mem32(ic, TME_M68K_IREG_MEMY32);
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst) -= ireg_dst_adjust;
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst);
    }
    tme_m68k_read_memx32(ic);
    op1 = ic->tme_m68k_ireg_memx32;
    op0 = ic->tme_m68k_ireg_memy32;
  }
  else {
    op0 = ic->tme_m68k_ireg_uint32((TME_M68K_IREG_D0 + ireg_src));
    op1 = ic->tme_m68k_ireg_uint32((TME_M68K_IREG_D0 + ireg_dst));
  }

  /* perform the operation: */
  res = op1 - op0 - ((ic->tme_m68k_ireg_ccr / TME_M68K_FLAG_X) & 1);

  /* store the result: */
  if (memory) {
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_memx32 = res;
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst);
    }
    tme_m68k_write_memx32(ic);
  }
  else {
    ic->tme_m68k_ireg_uint32((TME_M68K_IREG_D0 + ireg_dst)) = res;
  }

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z);
  flags |= ((tme_uint8_t) (((op0 ^ op1) & (op1 ^ res)) >> (32 - 1))) * TME_M68K_FLAG_V;
  if (op0 > op1 || (op0 == op1 && (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X))) flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* this does a 32-bit "cmpm SRC, DST": */
TME_M68K_INSN(tme_m68k_cmpm32)
{
  tme_uint32_t res, op0, op1;
  tme_uint8_t flags;

  /* load the operand(s): */
  unsigned int function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  int ireg_src = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 0, 3);
  int ireg_dst = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 9, 3);
  tme_uint32_t ireg_src_adjust = sizeof(tme_uint32_t);
  tme_uint32_t ireg_dst_adjust = sizeof(tme_uint32_t);

  TME_M68K_INSN_CANFAULT;

  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->_tme_m68k_ea_function_code = function_code;
    ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_src);
    ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_src) += ireg_src_adjust;
  }
  tme_m68k_read_mem32(ic, TME_M68K_IREG_MEMY32);
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->_tme_m68k_ea_function_code = function_code;
    ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst);
    ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ireg_dst) += ireg_dst_adjust;
  }
  tme_m68k_read_memx32(ic);
  op1 = ic->tme_m68k_ireg_memx32;
  op0 = ic->tme_m68k_ireg_memy32;

  /* perform the operation: */
  res = op1 - op0;

  /* set the flags: */
  flags = ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  flags |= ((tme_uint8_t) (((op0 ^ op1) & (op1 ^ res)) >> (32 - 1))) * TME_M68K_FLAG_V;
  if (op0 > op1) flags |= TME_M68K_FLAG_C;
  flags |= (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X);
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

/* a move of an address register to a predecrement or
   postincrement EA with that same address register, must
   store the original value of the address register.  since the
   predecrement and postincrement code in the executer updates
   the address register before the move has happened, we wrap
   the normal move function in this one, that gives an op1
   argument that is the original value of the address register: */
TME_M68K_INSN(tme_m68k_move_srpd32)
{
  /* NB: both this function and tme_m68k_move32()
     get the source operand as _op1, and the destination
     operand as _op0: */
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    *((tme_uint32_t *) _op0)
      = (*((tme_uint32_t *) _op1)
         + sizeof(tme_uint32_t));
  }
  tme_m68k_move32(ic, _op0, _op0);
}

/* a move of an address register to a predecrement or
   postincrement EA with that same address register, must
   store the original value of the address register.  since the
   predecrement and postincrement code in the executer updates
   the address register before the move has happened, we wrap
   the normal move function in this one, that gives an op1
   argument that is the original value of the address register: */
TME_M68K_INSN(tme_m68k_move_srpi32)
{
  /* NB: both this function and tme_m68k_move32()
     get the source operand as _op1, and the destination
     operand as _op0: */
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    *((tme_uint32_t *) _op0)
      = (*((tme_uint32_t *) _op1)
         - sizeof(tme_uint32_t));
  }
  tme_m68k_move32(ic, _op0, _op0);
}

/* the suba function on a 32-byte EA: */
TME_M68K_INSN(tme_m68k_suba32)
{
  *((tme_int32_t *) _op1) -= *((tme_int32_t *) _op0);
  TME_M68K_INSN_OK;
}

/* the adda function on a 32-byte EA: */
TME_M68K_INSN(tme_m68k_adda32)
{
  *((tme_int32_t *) _op1) += *((tme_int32_t *) _op0);
  TME_M68K_INSN_OK;
}

/* the movea function on a 32-byte EA: */
TME_M68K_INSN(tme_m68k_movea32)
{
  *((tme_int32_t *) _op0) = *((tme_int32_t *) _op1);
  TME_M68K_INSN_OK;
}

/* the btst function on a 32-byte EA: */
TME_M68K_INSN(tme_m68k_btst32)
{
  tme_uint32_t value, bit;
  bit = _TME_BIT(tme_uint32_t, TME_M68K_INSN_OP0(tme_uint8_t) & (32 - 1));
  value = TME_M68K_INSN_OP1(tme_uint32_t);
  if (value & bit) {
    ic->tme_m68k_ireg_ccr &= ~TME_M68K_FLAG_Z;
  }
  else {
    ic->tme_m68k_ireg_ccr |= TME_M68K_FLAG_Z;
  }
  TME_M68K_INSN_OK;
}

/* the bchg function on a 32-byte EA: */
TME_M68K_INSN(tme_m68k_bchg32)
{
  tme_uint32_t value, bit;
  bit = _TME_BIT(tme_uint32_t, TME_M68K_INSN_OP0(tme_uint8_t) & (32 - 1));
  value = TME_M68K_INSN_OP1(tme_uint32_t);
  if (value & bit) {
    ic->tme_m68k_ireg_ccr &= ~TME_M68K_FLAG_Z;
  }
  else {
    ic->tme_m68k_ireg_ccr |= TME_M68K_FLAG_Z;
  }
  TME_M68K_INSN_OP1(tme_uint32_t) = value ^ bit;
  TME_M68K_INSN_OK;
}

/* the bclr function on a 32-byte EA: */
TME_M68K_INSN(tme_m68k_bclr32)
{
  tme_uint32_t value, bit;
  bit = _TME_BIT(tme_uint32_t, TME_M68K_INSN_OP0(tme_uint8_t) & (32 - 1));
  value = TME_M68K_INSN_OP1(tme_uint32_t);
  if (value & bit) {
    ic->tme_m68k_ireg_ccr &= ~TME_M68K_FLAG_Z;
  }
  else {
    ic->tme_m68k_ireg_ccr |= TME_M68K_FLAG_Z;
  }
  TME_M68K_INSN_OP1(tme_uint32_t) = value & ~bit;
  TME_M68K_INSN_OK;
}

/* the bset function on a 32-byte EA: */
TME_M68K_INSN(tme_m68k_bset32)
{
  tme_uint32_t value, bit;
  bit = _TME_BIT(tme_uint32_t, TME_M68K_INSN_OP0(tme_uint8_t) & (32 - 1));
  value = TME_M68K_INSN_OP1(tme_uint32_t);
  if (value & bit) {
    ic->tme_m68k_ireg_ccr &= ~TME_M68K_FLAG_Z;
  }
  else {
    ic->tme_m68k_ireg_ccr |= TME_M68K_FLAG_Z;
  }
  TME_M68K_INSN_OP1(tme_uint32_t) = value | bit;
  TME_M68K_INSN_OK;
}

/* the asl function on a 32-byte EA: */
TME_M68K_INSN(tme_m68k_asl32)
{
  unsigned int count;
  tme_uint32_t sign_bits, sign_bits_mask;
  tme_uint32_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint32_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {

    /* we need to see how the sign of the result will change during
       shifting in order to generate V.

       in general, the idea is to get all of the bits that will ever
       appear in the sign position into sign_bits, with a mask in
       sign_bits_mask.  if (sign_bits & sign_bits_mask) is zero or
       sign_bits_mask, clear V, else set V.

       start by loading the operand into sign_bits and setting
       sign_bits_mask to all-bits-one.

       if the shift count is exactly 32 - 1, then all of the bits
       of the operand will appear in the sign position.

       if the shift count is less than 32 - 1, then some of the
       less significant bits of the operand will never appear in the
       sign position, so we can shift sign_bits_mask to ignore them.

       if the shift count is greater than 32 - 1, then all of the
       bits in the operand, plus at least one zero bit, will appear in
       the sign position.  the only way that the sign bit will never
       change during the shift is if the operand was zero to begin with.
       without any changes to sign_bits or sign_bits_mask, the final
       test will always work, except when sign_bits is all-bits-one.
       the magic below clears the least-significant bit of sign_bits
       iff sign_bits is all-bits-one: */
    sign_bits = res;
    if (63 > SHIFTMAX_INT32_T
        && count > 32) {
      res = 0;
    }
    res <<= (count - 1);
    flags = (res >> (32 - 1));
    flags *= TME_M68K_FLAG_C;
    flags |= (flags * TME_M68K_FLAG_X);
    res <<= 1;
    sign_bits_mask = (tme_uint32_t) -1;
    if (count != 32 - 1) {
      if (count < 32) {
        sign_bits_mask <<= ((32 - 1) - count);
      }
      else {
        sign_bits ^= !(sign_bits + 1);
      }
    }
    sign_bits &= sign_bits_mask;
    if (sign_bits != 0 && sign_bits != sign_bits_mask) {
      flags |= TME_M68K_FLAG_V;
    }
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint32_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the asr function on a 32-byte EA: */
TME_M68K_INSN(tme_m68k_asr32)
{
  unsigned int count;
  tme_int32_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_int32_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    if (63 > SHIFTMAX_INT32_T
        && count > 32) {
      res = 0 - (res < 0);
    }
#ifdef SHIFTSIGNED_INT32_T
    res >>= (count - 1);
#else  /* !SHIFTSIGNED_INT32_T */
    for (; --count > 0; ) {
      res = (res & ~((tme_int32_t) 1)) / 2;
    }
#endif /* !SHIFTSIGNED_INT32_T */
    flags = (res & 1);
    flags *= TME_M68K_FLAG_C;
    flags |= (flags * TME_M68K_FLAG_X);
#ifdef SHIFTSIGNED_INT32_T
    res >>= 1;
#else  /* !SHIFTSIGNED_INT32_T */
    res = (res & ~((tme_int32_t) 1)) / 2;
#endif /* !SHIFTSIGNED_INT32_T */
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_int32_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the lsl function on a 32-byte EA: */
TME_M68K_INSN(tme_m68k_lsl32)
{
  unsigned int count;
  tme_uint32_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint32_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    if (63 > SHIFTMAX_INT32_T
        && count > 32) {
      res = 0;
    }
    res <<= (count - 1);
    flags = (res >> (32 - 1));
    flags *= TME_M68K_FLAG_C;
    flags |= (flags * TME_M68K_FLAG_X);
    res <<= 1;
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint32_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the lsr function on a 32-byte EA: */
TME_M68K_INSN(tme_m68k_lsr32)
{
  unsigned int count;
  tme_uint32_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint32_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    if (63 > SHIFTMAX_INT32_T
        && count > 32) {
      res = 0;
    }
    res >>= (count - 1);
    flags = (res & 1);
    flags *= TME_M68K_FLAG_C;
    flags |= (flags * TME_M68K_FLAG_X);
    res >>= 1;
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint32_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the rol function on a 32-byte EA: */
TME_M68K_INSN(tme_m68k_rol32)
{
  unsigned int count;
  tme_uint32_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint32_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    count &= (32 - 1);
    res = (res << count) | (res >> (32 - count));
    flags |= ((res & 1) * TME_M68K_FLAG_C);
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint32_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the ror function on a 32-byte EA: */
TME_M68K_INSN(tme_m68k_ror32)
{
  unsigned int count;
  tme_uint32_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint32_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    count &= (32 - 1);
    res = (res << (32 - count)) | (res >> count);
    flags |= ((res >> (32 - 1)) * TME_M68K_FLAG_C);
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint32_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the roxl function on a 32-byte EA: */
TME_M68K_INSN(tme_m68k_roxl32)
{
  unsigned int count;
  tme_uint8_t xbit;
  tme_uint32_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint32_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;
  xbit = (flags / TME_M68K_FLAG_X);
  flags |= (xbit * TME_M68K_FLAG_C);

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    count %= (32 + 1);
    flags = xbit;
    if (count > 0) {
      flags = (res >> (32 - count)) & 1;
      if (32 > SHIFTMAX_INT32_T
          && count == 32) {
        res = 0 | (xbit << (32 - 1)) | (res >> ((32 + 1) - 32));
      }
      else if (32 > SHIFTMAX_INT32_T
               && count == 1) {
        res = (res << 1) | (xbit << (1 - 1)) | 0;
      }
      else {
        res = (res << count) | (xbit << (count - 1)) | (res >> ((32 + 1) - count));
      }
    }
    flags *= TME_M68K_FLAG_C;
    flags |= (flags * TME_M68K_FLAG_X);
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint32_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the roxr function on a 32-byte EA: */
TME_M68K_INSN(tme_m68k_roxr32)
{
  unsigned int count;
  tme_uint8_t xbit;
  tme_uint32_t res;
  tme_uint8_t flags;

  /* get the count and operand: */
  count = TME_M68K_INSN_OP0(tme_uint8_t) & 63;
  res = TME_M68K_INSN_OP1(tme_uint32_t);

  /* generate the X, V, and C flags assuming the count is zero: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;
  xbit = (flags / TME_M68K_FLAG_X);
  flags |= (xbit * TME_M68K_FLAG_C);

  /* if the count is nonzero, update the result and
     generate the X, V, and C flags: */
  if (count > 0) {
    count %= (32 + 1);
    flags = xbit;
    if (count > 0) {
      flags = (res >> (count - 1)) & 1;
      if (32 > SHIFTMAX_INT32_T
          && count == 32) {
        res = (res << ((32 + 1) - 32)) | (xbit << (32 - 32)) | 0;
      }
      else if (32 > SHIFTMAX_INT32_T
               && count == 1) {
        res = 0 | (xbit << (32 - 1)) | (res >> 1);
      }
      else {
        res = (res << ((32 + 1) - count)) | (xbit << (32 - count)) | (res >> count);
      }
    }
    flags *= TME_M68K_FLAG_C;
    flags |= (flags * TME_M68K_FLAG_X);
  }

  /* store the result: */
  TME_M68K_INSN_OP1(tme_uint32_t) = res;

  /* generate the N flag.  we cast to tme_uint8_t as soon as we
     know the bit we want is within the range of the type, to try
     to affect the generated assembly: */
  flags |= ((tme_uint8_t) (((tme_uint32_t) res) >> (32 - 1))) * TME_M68K_FLAG_N;

  /* generate the Z flag: */
  if (res == 0) flags |= TME_M68K_FLAG_Z;

  /* store the flags: */
  ic->tme_m68k_ireg_ccr = flags;
  TME_M68K_INSN_OK;
}

/* the movep_rm function on a 32-bit dreg: */
TME_M68K_INSN(tme_m68k_movep_rm32)
{
  unsigned int function_code;
  tme_uint32_t linear_address;
  tme_uint32_t value;
  int dreg;

  TME_M68K_INSN_CANFAULT;

  function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  linear_address = TME_M68K_INSN_OP1(tme_uint32_t);
  linear_address += (tme_int32_t) ((tme_int16_t) TME_M68K_INSN_SPECOP);
  dreg = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 9, 3);
  value = ic->tme_m68k_ireg_uint32(dreg);
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->_tme_m68k_ea_function_code = function_code;
    ic->_tme_m68k_ea_address = linear_address;
    ic->tme_m68k_ireg_memx8 = TME_FIELD_EXTRACTU(value, 24, 8);
  }
  tme_m68k_write_memx8(ic);
  linear_address += 2;
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->_tme_m68k_ea_function_code = function_code;
    ic->_tme_m68k_ea_address = linear_address;
    ic->tme_m68k_ireg_memx8 = TME_FIELD_EXTRACTU(value, 16, 8);
  }
  tme_m68k_write_memx8(ic);
  linear_address += 2;
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->_tme_m68k_ea_function_code = function_code;
    ic->_tme_m68k_ea_address = linear_address;
    ic->tme_m68k_ireg_memx8 = TME_FIELD_EXTRACTU(value, 8, 8);
  }
  tme_m68k_write_memx8(ic);
  linear_address += 2;
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->_tme_m68k_ea_function_code = function_code;
    ic->_tme_m68k_ea_address = linear_address;
    ic->tme_m68k_ireg_memx8 = TME_FIELD_EXTRACTU(value, 0, 8);
  }
  tme_m68k_write_memx8(ic);
  linear_address += 2;
  TME_M68K_INSN_OK;
}

/* the movem_rm function on 32-bit registers: */
TME_M68K_INSN(tme_m68k_movem_rm32)
{
  int ireg, direction;
  tme_uint16_t mask, bit;
  unsigned int ea_mode;
  tme_uint32_t addend;
  tme_uint32_t total_size;
  /* get the register mask, and figure out the total size
     of the transfer: */
  mask = TME_M68K_INSN_SPECOP;
  total_size = 0;
  if (mask != 0) {
    TME_M68K_INSN_CANFAULT;
    bit = mask;
    do {
      total_size += sizeof(tme_uint32_t);
      bit &= (bit - 1);
    } while (bit != 0);
  }

  /* figure out what direction to move in, and where to start from: */
  ea_mode = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 3, 3);
  direction = 1;
  ireg = TME_M68K_IREG_D0;
  if (ea_mode == 4) {
    direction = -1;
    ireg = TME_M68K_IREG_A7;
    if (!TME_M68K_SEQUENCE_RESTARTING) {

      /* "For the MC68020, MC68030, MC68040, and CPU32, if
         the addressing register is also moved to memory, the
         value written is the initial register value decremented 
         by the size of the operation. The MC68000 and MC68010 
         write the initial register value (not decremented)." */
      if (ic->tme_m68k_type >= TME_M68K_M68020) {
        ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0
                                 + TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 0, 3))
          = (ic->_tme_m68k_ea_address - total_size);
      }

      /* predecrement the effective address for the first transfer: */
      ic->_tme_m68k_ea_address -= sizeof(tme_uint32_t);
    }
  }
  addend = (tme_uint32_t) (direction * sizeof(tme_uint32_t));

  /* do the transfer: */
  for (bit = 1; bit != 0; bit <<= 1) {
    if (mask & bit) {
      if (!TME_M68K_SEQUENCE_RESTARTING) {
        ic->tme_m68k_ireg_memx32 = ic->tme_m68k_ireg_uint32(ireg);
      }
      tme_m68k_write_memx32(ic);
      if (!TME_M68K_SEQUENCE_RESTARTING) {
        ic->_tme_m68k_ea_address += addend;
      }
    }
    ireg += direction;
  }

  /* if this is the predecrement mode, update the address register: */
  /* "For the MC68020, MC68030, MC68040, and CPU32, if
     the addressing register is also moved to memory, the
     value written is the initial register value decremented 
     by the size of the operation. The MC68000 and MC68010 
     write the initial register value (not decremented)." */
  if (ea_mode == 4
      && ic->tme_m68k_type < TME_M68K_M68020) {
    ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0
                              + TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 0, 3))
      = (ic->_tme_m68k_ea_address + sizeof(tme_uint32_t));
  }
  TME_M68K_INSN_OK;
}

/* the movep_mr function on a 32-bit dreg: */
TME_M68K_INSN(tme_m68k_movep_mr32)
{
  unsigned int function_code;
  tme_uint32_t linear_address;
  int dreg;

  TME_M68K_INSN_CANFAULT;

  function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  linear_address = TME_M68K_INSN_OP1(tme_uint32_t);
  linear_address += (tme_int32_t) ((tme_int16_t) TME_M68K_INSN_SPECOP);
  dreg = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 9, 3);
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->_tme_m68k_ea_function_code = function_code;
    ic->_tme_m68k_ea_address = linear_address;
  }
  tme_m68k_read_memx8(ic);
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    TME_FIELD_DEPOSIT32(ic->tme_m68k_ireg_uint32(dreg), 24, 8, ic->tme_m68k_ireg_memx8);
  }
  linear_address += 2;
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->_tme_m68k_ea_function_code = function_code;
    ic->_tme_m68k_ea_address = linear_address;
  }
  tme_m68k_read_memx8(ic);
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    TME_FIELD_DEPOSIT32(ic->tme_m68k_ireg_uint32(dreg), 16, 8, ic->tme_m68k_ireg_memx8);
  }
  linear_address += 2;
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->_tme_m68k_ea_function_code = function_code;
    ic->_tme_m68k_ea_address = linear_address;
  }
  tme_m68k_read_memx8(ic);
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    TME_FIELD_DEPOSIT32(ic->tme_m68k_ireg_uint32(dreg), 8, 8, ic->tme_m68k_ireg_memx8);
  }
  linear_address += 2;
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->_tme_m68k_ea_function_code = function_code;
    ic->_tme_m68k_ea_address = linear_address;
  }
  tme_m68k_read_memx8(ic);
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    TME_FIELD_DEPOSIT32(ic->tme_m68k_ireg_uint32(dreg), 0, 8, ic->tme_m68k_ireg_memx8);
  }
  linear_address += 2;
  TME_M68K_INSN_OK;
}

/* the movem_mr function on 32-bit registers: */
TME_M68K_INSN(tme_m68k_movem_mr32)
{
  int ireg, direction;
  tme_uint16_t mask, bit;
  unsigned int ea_mode;
  tme_uint32_t addend;
  tme_uint32_t total_size;
  /* get the register mask, and figure out the total size
     of the transfer: */
  mask = TME_M68K_INSN_SPECOP;
  total_size = 0;
  if (mask != 0) {
    TME_M68K_INSN_CANFAULT;
    bit = mask;
    do {
      total_size += sizeof(tme_uint32_t);
      bit &= (bit - 1);
    } while (bit != 0);
  }

  /* figure out what direction to move in, and where to start from: */
  ea_mode = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 3, 3);
  direction = 1;
  ireg = TME_M68K_IREG_D0;
  addend = (tme_uint32_t) (direction * sizeof(tme_uint32_t));

  /* do the transfer: */
  for (bit = 1; bit != 0; bit <<= 1) {
    if (mask & bit) {
      tme_m68k_read_memx32(ic);
      if (!TME_M68K_SEQUENCE_RESTARTING) {
        ic->tme_m68k_ireg_uint32(ireg) = ic->tme_m68k_ireg_memx32;
        ic->_tme_m68k_ea_address += addend;
      }
    }
    ireg += direction;
  }

  /* if this is the postincrement mode, update the address register: */
  if (ea_mode == 3) {
    ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0
                              + TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 0, 3))
      = ic->_tme_m68k_ea_address;
  }
  TME_M68K_INSN_OK;
}

/* chk32: */
TME_M68K_INSN(tme_m68k_chk32)
{
  if (*((tme_int32_t *) _op0) < 0) {
    ic->tme_m68k_ireg_ccr |= TME_M68K_FLAG_N;
    ic->tme_m68k_ireg_pc_last = ic->tme_m68k_ireg_pc;
    ic->tme_m68k_ireg_pc = ic->tme_m68k_ireg_pc_next;
    TME_M68K_INSN_EXCEPTION(TME_M68K_EXCEPTION_INST(TME_M68K_VECTOR_CHK));
  }
  if (*((tme_int32_t *) _op0) > *((tme_int32_t *) _op1)) {
    ic->tme_m68k_ireg_ccr &= ~TME_M68K_FLAG_N;
    ic->tme_m68k_ireg_pc_last = ic->tme_m68k_ireg_pc;
    ic->tme_m68k_ireg_pc = ic->tme_m68k_ireg_pc_next;
    TME_M68K_INSN_EXCEPTION(TME_M68K_EXCEPTION_INST(TME_M68K_VECTOR_CHK));
  }
  TME_M68K_INSN_OK;
}

/* cas32: */
TME_M68K_INSN(tme_m68k_cas32)
{
  struct tme_m68k_rmw rmw;
  struct tme_m68k_tlb *tlb;
  int ireg_dc, ireg_du;
  tme_uint32_t value_dc, value_du, value_mem;

  /* start the read/modify/write cycle: */
  rmw.tme_m68k_rmw_addresses[0] = ic->_tme_m68k_ea_address;
  rmw.tme_m68k_rmw_address_count = 1;
  rmw.tme_m68k_rmw_size = sizeof(tme_uint32_t);
  if (tme_m68k_rmw_start(ic,
                         &rmw)) {
    TME_M68K_INSN_OK;
  }

  /* get the compare and update registers: */
  ireg_dc = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_SPECOP, 0, 3);
  ireg_du = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_SPECOP, 6, 3);

  /* if we can do the fast compare-and-exchange: */
  if (!rmw.tme_m68k_rmw_slow_reads[0]) {

    /* get the compare and update values in big-endian byte order: */
    value_dc = ic->tme_m68k_ireg_uint32(ireg_dc);
    value_du = ic->tme_m68k_ireg_uint32(ireg_du);
    value_dc = tme_htobe_u32(value_dc);
    value_du = tme_htobe_u32(value_du);

    /* get this TLB entry: */
    tlb = rmw.tme_m68k_rmw_tlbs[0];

    /* this TLB entry must allow fast reading and fast writing
       to the same memory: */
    assert (tlb->tme_m68k_tlb_emulator_off_read != TME_EMULATOR_OFF_UNDEF
            && tlb->tme_m68k_tlb_emulator_off_write == tlb->tme_m68k_tlb_emulator_off_read);

    /* do the compare-and-exchange: */
    value_mem =
      tme_memory_atomic_cx32(((tme_shared tme_uint32_t *)
                                   (tlb->tme_m68k_tlb_emulator_off_read
                                    + ic->_tme_m68k_ea_address)),
                                  value_dc,
                                  value_du,
                                  tlb->tme_m68k_tlb_bus_rwlock,
                                  sizeof(tme_uint8_t));
    ic->tme_m68k_ireg_memx32 = tme_betoh_u32(value_mem);

    /* step the transfer count once for the read, and once for the write: */
    TME_M68K_SEQUENCE_TRANSFER_STEP;
    TME_M68K_SEQUENCE_TRANSFER_STEP;
  }

  /* compare the compare operand to the effective address operand: */
  tme_m68k_cmp32(ic, &ic->tme_m68k_ireg_uint32(ireg_dc), &ic->tme_m68k_ireg_memx32);

  /* if the comparison succeeded: */
  if (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z) {

    /* write the update operand to the effective address operand: */
    ic->tme_m68k_ireg_memx32 = ic->tme_m68k_ireg_uint32(ireg_du);
  }

  /* otherwise, the comparison failed: */
  else {

    /* write the effective address operand to the compare operand: */
    ic->tme_m68k_ireg_uint32(ireg_dc) = ic->tme_m68k_ireg_memx32;
  }

  /* finish the read/modify/write cycle: */
  tme_m68k_rmw_finish(ic,
                      &rmw,
                      (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z) != 0);
  TME_M68K_INSN_OK;
}

/* cas2_32: */
TME_M68K_INSN(tme_m68k_cas2_32)
{
  struct tme_m68k_rmw rmw;
  int ireg_dcx, ireg_dux;
  int ireg_dcy, ireg_duy;
  const tme_uint16_t specopx = TME_M68K_INSN_SPECOP;
  const tme_uint16_t specopy = TME_M68K_INSN_OP0(tme_uint16_t);

  /* start the read/modify/write cycle: */
  ic->_tme_m68k_ea_function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  rmw.tme_m68k_rmw_addresses[0] = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_D0
                                                           + TME_FIELD_EXTRACTU(specopx, 12, 4));
  rmw.tme_m68k_rmw_addresses[1] = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_D0
                                                           + TME_FIELD_EXTRACTU(specopy, 12, 4));
  rmw.tme_m68k_rmw_address_count = 2;
  rmw.tme_m68k_rmw_size = sizeof(tme_uint32_t);
  if (tme_m68k_rmw_start(ic,
                         &rmw)) {
    TME_M68K_INSN_OK;
  }

  /* do the comparisons: */
  ireg_dcx = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(specopx, 0, 3);
  ireg_dcy = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(specopy, 0, 3);
  tme_m68k_cmp32(ic,
                 &ic->tme_m68k_ireg_uint32(ireg_dcx),
                 &ic->tme_m68k_ireg_memx32);
  if (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z) {
    tme_m68k_cmp32(ic,
                   &ic->tme_m68k_ireg_uint32(ireg_dcy),
                   &ic->tme_m68k_ireg_memy32);
  }

  /* if the comparisons succeeded: */
  if (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z) {

    /* write the update operands to the effective address operands: */
    ireg_dux = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(specopx, 6, 3);
    ireg_duy = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(specopy, 6, 3);
    ic->tme_m68k_ireg_memx32 = ic->tme_m68k_ireg_uint32(ireg_dux);
    ic->tme_m68k_ireg_memy32 = ic->tme_m68k_ireg_uint32(ireg_duy);
  }

  /* otherwise, the comparisons failed: */
  else {

    /* write the effective address operands to the compare operands.
       "If Dc1 and Dc2 specify the same data register and the comparison
        fails, memory operand 1 is stored in the data register." */
    ic->tme_m68k_ireg_uint32(ireg_dcy) = ic->tme_m68k_ireg_memy32;
    ic->tme_m68k_ireg_uint32(ireg_dcx) = ic->tme_m68k_ireg_memx32;
  }

  /* finish the read/modify/write cycle: */
  tme_m68k_rmw_finish(ic,
                      &rmw,
                      (ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_Z) != 0);
  TME_M68K_INSN_OK;
}

/* moves32: */
TME_M68K_INSN(tme_m68k_moves32)
{
  int ireg;
  tme_uint32_t ireg_value;
  unsigned int ea_reg;
  unsigned int increment;
  TME_M68K_INSN_PRIV;
  TME_M68K_INSN_CANFAULT;
  ireg = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_SPECOP, 12, 4);

  /* in case we're storing the same address register used in a
     postincrement or predecrement EA, save the current value
     of the register now: */
  ireg_value = ic->tme_m68k_ireg_uint32(ireg);

  /* we have to handle postincrement and predecrement ourselves: */
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ea_reg = TME_M68K_IREG_A0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 0, 3);
    increment = TME_M68K_SIZE_32;
    if (increment == TME_M68K_SIZE_8 && ea_reg == TME_M68K_IREG_A7) {
      increment = TME_M68K_SIZE_16;
    }
    switch (TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 3, 3)) {
    case 3: ic->tme_m68k_ireg_uint32(ea_reg) += increment; break;
    case 4: ic->_tme_m68k_ea_address = (ic->tme_m68k_ireg_uint32(ea_reg) -= increment); break;
    default: break;
    }
  }

  if (TME_M68K_INSN_SPECOP & TME_BIT(11)) {
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_memx32 = ireg_value;
      ic->_tme_m68k_ea_function_code = ic->tme_m68k_ireg_dfc;
    }
    tme_m68k_write_memx32(ic);
  }
  else {
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->_tme_m68k_ea_function_code = ic->tme_m68k_ireg_sfc;
    }
    tme_m68k_read_memx32(ic);
    ic->tme_m68k_ireg_uint32(ireg) = ic->tme_m68k_ireg_memx32;
  }
  TME_M68K_INSN_OK;
}

/* this reads a 8-bit memx value: */
void
tme_m68k_read_memx8(struct tme_m68k *ic) 
{
  tme_bus_context_t bus_context = ic->_tme_m68k_bus_context;
  unsigned int function_code = ic->_tme_m68k_ea_function_code;
  tme_uint32_t linear_address = ic->_tme_m68k_ea_address;
  struct tme_m68k_tlb *tlb = TME_M68K_DTLB_ENTRY(ic, bus_context, function_code, linear_address);
  tme_uint8_t mem_value;
  const tme_shared tme_uint8_t *mem;

#ifdef _TME_M68K_STATS
  ic->tme_m68k_stats.tme_m68k_stats_memory_total++;
#endif /* _TME_M68K_STATS */

  /* busy this TLB entry: */
  tme_m68k_tlb_busy(tlb);

  /* if we aren't restarting, and this address is properly aligned,
     and this TLB entry covers the operand and allows fast reads: */
  if (__tme_predict_true(!TME_M68K_SEQUENCE_RESTARTING
                         && tme_m68k_tlb_is_valid(tlb)
                         && tlb->tme_m68k_tlb_bus_context == bus_context
                         && (tlb->tme_m68k_tlb_function_codes_mask
                             & TME_BIT(function_code))
                         && linear_address >= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_first
                         && linear_address <= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last
                         && tlb->tme_m68k_tlb_emulator_off_read != TME_EMULATOR_OFF_UNDEF)) {

    /* make the emulator memory pointer: */
    mem = (const tme_shared tme_uint8_t *) (tlb->tme_m68k_tlb_emulator_off_read + linear_address);

    /* do the 8-bit bus read: */
    mem_value = tme_memory_bus_read8(mem, tlb->tme_m68k_tlb_bus_rwlock, sizeof(tme_uint8_t), sizeof(tme_uint32_t));

    /* put the value read: */
    ic->tme_m68k_ireg_memx8 = mem_value;

    /* step the transfer count: */
    TME_M68K_SEQUENCE_TRANSFER_STEP;
  }

  /* otherwise, do the bus cycles the slow way: */
  else {
    tme_m68k_read8(ic, tlb,
                    &ic->_tme_m68k_ea_function_code,
                    &ic->_tme_m68k_ea_address,
                    &ic->tme_m68k_ireg_memx8,
                    TME_M68K_BUS_CYCLE_NORMAL);
  }

  /* unbusy this TLB entry: */
  tme_m68k_tlb_unbusy(tlb);

  /* log the value read: */
  tme_m68k_verify_mem8(ic, ic->_tme_m68k_ea_function_code, ic->_tme_m68k_ea_address, ic->tme_m68k_ireg_memx8, TME_BUS_CYCLE_READ);
  tme_m68k_log(ic, 1000, TME_OK,
               (TME_M68K_LOG_HANDLE(ic),
                _("read_memx8\t%d:0x%08x:\t0x%02x"),
                ic->_tme_m68k_ea_function_code,
                ic->_tme_m68k_ea_address,
                ic->tme_m68k_ireg_memx8));
}

/* this reads a 8-bit mem value: */
void
tme_m68k_read_mem8(struct tme_m68k *ic, int ireg) 
{
  tme_bus_context_t bus_context = ic->_tme_m68k_bus_context;
  unsigned int function_code = ic->_tme_m68k_ea_function_code;
  tme_uint32_t linear_address = ic->_tme_m68k_ea_address;
  struct tme_m68k_tlb *tlb = TME_M68K_DTLB_ENTRY(ic, bus_context, function_code, linear_address);
  tme_uint8_t mem_value;
  const tme_shared tme_uint8_t *mem;

#ifdef _TME_M68K_STATS
  ic->tme_m68k_stats.tme_m68k_stats_memory_total++;
#endif /* _TME_M68K_STATS */

  /* busy this TLB entry: */
  tme_m68k_tlb_busy(tlb);

  /* if we aren't restarting, and this address is properly aligned,
     and this TLB entry covers the operand and allows fast reads: */
  if (__tme_predict_true(!TME_M68K_SEQUENCE_RESTARTING
                         && tme_m68k_tlb_is_valid(tlb)
                         && tlb->tme_m68k_tlb_bus_context == bus_context
                         && (tlb->tme_m68k_tlb_function_codes_mask
                             & TME_BIT(function_code))
                         && linear_address >= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_first
                         && linear_address <= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last
                         && tlb->tme_m68k_tlb_emulator_off_read != TME_EMULATOR_OFF_UNDEF)) {

    /* make the emulator memory pointer: */
    mem = (const tme_shared tme_uint8_t *) (tlb->tme_m68k_tlb_emulator_off_read + linear_address);

    /* do the 8-bit bus read: */
    mem_value = tme_memory_bus_read8(mem, tlb->tme_m68k_tlb_bus_rwlock, sizeof(tme_uint8_t), sizeof(tme_uint32_t));

    /* put the value read: */
    ic->tme_m68k_ireg_uint8(ireg) = mem_value;

    /* step the transfer count: */
    TME_M68K_SEQUENCE_TRANSFER_STEP;
  }

  /* otherwise, do the bus cycles the slow way: */
  else {
    tme_m68k_read8(ic, tlb,
                    &ic->_tme_m68k_ea_function_code,
                    &ic->_tme_m68k_ea_address,
                    &ic->tme_m68k_ireg_uint8(ireg),
                    TME_M68K_BUS_CYCLE_NORMAL);
  }

  /* unbusy this TLB entry: */
  tme_m68k_tlb_unbusy(tlb);

  /* log the value read: */
  tme_m68k_verify_mem8(ic, ic->_tme_m68k_ea_function_code, ic->_tme_m68k_ea_address, ic->tme_m68k_ireg_uint8(ireg), TME_BUS_CYCLE_READ);
  tme_m68k_log(ic, 1000, TME_OK,
               (TME_M68K_LOG_HANDLE(ic),
                _("read_mem8\t%d:0x%08x:\t0x%02x"),
                ic->_tme_m68k_ea_function_code,
                ic->_tme_m68k_ea_address,
                ic->tme_m68k_ireg_uint8(ireg)));
}

/* this writes a 8-bit memx value: */
void
tme_m68k_write_memx8(struct tme_m68k *ic) 
{
  tme_bus_context_t bus_context = ic->_tme_m68k_bus_context;
  unsigned int function_code = ic->_tme_m68k_ea_function_code;
  tme_uint32_t linear_address = ic->_tme_m68k_ea_address;
  struct tme_m68k_tlb *tlb = TME_M68K_DTLB_ENTRY(ic, bus_context, function_code, linear_address);
  tme_uint8_t mem_value;
  tme_shared tme_uint8_t *mem;

#ifdef _TME_M68K_STATS
  ic->tme_m68k_stats.tme_m68k_stats_memory_total++;
#endif /* _TME_M68K_STATS */

  /* log the value written: */
  tme_m68k_verify_mem8(ic, ic->_tme_m68k_ea_function_code, ic->_tme_m68k_ea_address, ic->tme_m68k_ireg_memx8, TME_BUS_CYCLE_WRITE);
  tme_m68k_log(ic, 1000, TME_OK, 
               (TME_M68K_LOG_HANDLE(ic),
                _("write_memx8\t%d:0x%08x:\t0x%02x"),
                ic->_tme_m68k_ea_function_code,
                ic->_tme_m68k_ea_address,
                ic->tme_m68k_ireg_memx8));

  /* busy this TLB entry: */
  tme_m68k_tlb_busy(tlb);

  /* if we aren't restarting, and this address is properly aligned,
     and this TLB entry covers the operand and allows fast writes: */
  if (__tme_predict_true(!TME_M68K_SEQUENCE_RESTARTING
                         && tme_m68k_tlb_is_valid(tlb)
                         && tlb->tme_m68k_tlb_bus_context == bus_context
                         && (tlb->tme_m68k_tlb_function_codes_mask
                             & TME_BIT(function_code))
                         && linear_address >= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_first
                         && linear_address <= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last
                         && tlb->tme_m68k_tlb_emulator_off_write != TME_EMULATOR_OFF_UNDEF)) {

    /* make the emulator memory pointer: */
    mem = (tme_shared tme_uint8_t *) (tlb->tme_m68k_tlb_emulator_off_write + linear_address);

    /* get the value to write: */
    mem_value = ic->tme_m68k_ireg_memx8;

    /* do the 8-bit bus write: */
    tme_memory_bus_write8(mem, mem_value, tlb->tme_m68k_tlb_bus_rwlock, sizeof(tme_uint8_t), sizeof(tme_uint32_t));

    /* step the transfer count: */
    TME_M68K_SEQUENCE_TRANSFER_STEP;
  }

  /* otherwise, do the bus cycles the slow way: */
  else {
    tme_m68k_write8(ic, tlb,
                    &ic->_tme_m68k_ea_function_code,
                    &ic->_tme_m68k_ea_address,
                    &ic->tme_m68k_ireg_memx8,
                    TME_M68K_BUS_CYCLE_NORMAL);
  }

  /* unbusy this TLB entry: */
  tme_m68k_tlb_unbusy(tlb);
}

/* this writes a 8-bit mem value: */
void
tme_m68k_write_mem8(struct tme_m68k *ic, int ireg) 
{
  tme_bus_context_t bus_context = ic->_tme_m68k_bus_context;
  unsigned int function_code = ic->_tme_m68k_ea_function_code;
  tme_uint32_t linear_address = ic->_tme_m68k_ea_address;
  struct tme_m68k_tlb *tlb = TME_M68K_DTLB_ENTRY(ic, bus_context, function_code, linear_address);
  tme_uint8_t mem_value;
  tme_shared tme_uint8_t *mem;

#ifdef _TME_M68K_STATS
  ic->tme_m68k_stats.tme_m68k_stats_memory_total++;
#endif /* _TME_M68K_STATS */

  /* log the value written: */
  tme_m68k_verify_mem8(ic, ic->_tme_m68k_ea_function_code, ic->_tme_m68k_ea_address, ic->tme_m68k_ireg_uint8(ireg), TME_BUS_CYCLE_WRITE);
  tme_m68k_log(ic, 1000, TME_OK, 
               (TME_M68K_LOG_HANDLE(ic),
                _("write_mem8\t%d:0x%08x:\t0x%02x"),
                ic->_tme_m68k_ea_function_code,
                ic->_tme_m68k_ea_address,
                ic->tme_m68k_ireg_uint8(ireg)));

  /* busy this TLB entry: */
  tme_m68k_tlb_busy(tlb);

  /* if we aren't restarting, and this address is properly aligned,
     and this TLB entry covers the operand and allows fast writes: */
  if (__tme_predict_true(!TME_M68K_SEQUENCE_RESTARTING
                         && tme_m68k_tlb_is_valid(tlb)
                         && tlb->tme_m68k_tlb_bus_context == bus_context
                         && (tlb->tme_m68k_tlb_function_codes_mask
                             & TME_BIT(function_code))
                         && linear_address >= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_first
                         && linear_address <= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last
                         && tlb->tme_m68k_tlb_emulator_off_write != TME_EMULATOR_OFF_UNDEF)) {

    /* make the emulator memory pointer: */
    mem = (tme_shared tme_uint8_t *) (tlb->tme_m68k_tlb_emulator_off_write + linear_address);

    /* get the value to write: */
    mem_value = ic->tme_m68k_ireg_uint8(ireg);

    /* do the 8-bit bus write: */
    tme_memory_bus_write8(mem, mem_value, tlb->tme_m68k_tlb_bus_rwlock, sizeof(tme_uint8_t), sizeof(tme_uint32_t));

    /* step the transfer count: */
    TME_M68K_SEQUENCE_TRANSFER_STEP;
  }

  /* otherwise, do the bus cycles the slow way: */
  else {
    tme_m68k_write8(ic, tlb,
                    &ic->_tme_m68k_ea_function_code,
                    &ic->_tme_m68k_ea_address,
                    &ic->tme_m68k_ireg_uint8(ireg),
                    TME_M68K_BUS_CYCLE_NORMAL);
  }

  /* unbusy this TLB entry: */
  tme_m68k_tlb_unbusy(tlb);
}

/* this reads a 16-bit memx value: */
void
tme_m68k_read_memx16(struct tme_m68k *ic) 
{
  tme_bus_context_t bus_context = ic->_tme_m68k_bus_context;
  unsigned int function_code = ic->_tme_m68k_ea_function_code;
  tme_uint32_t linear_address_first = ic->_tme_m68k_ea_address;
  tme_uint32_t linear_address_last = linear_address_first + sizeof(tme_uint16_t) - 1;
  struct tme_m68k_tlb *tlb = TME_M68K_DTLB_ENTRY(ic, bus_context, function_code, linear_address_first);
  tme_uint16_t mem_value;
  const tme_shared tme_uint16_t *mem;

#ifdef _TME_M68K_STATS
  ic->tme_m68k_stats.tme_m68k_stats_memory_total++;
#endif /* _TME_M68K_STATS */

  /* busy this TLB entry: */
  tme_m68k_tlb_busy(tlb);

  /* if we aren't restarting, and this address is properly aligned,
     and this TLB entry covers the operand and allows fast reads: */
  if (__tme_predict_true(!TME_M68K_SEQUENCE_RESTARTING
                         && (ic->_tme_m68k_bus_16bit & linear_address_first) == 0
                         && tme_m68k_tlb_is_valid(tlb)
                         && tlb->tme_m68k_tlb_bus_context == bus_context
                         && (tlb->tme_m68k_tlb_function_codes_mask
                             & TME_BIT(function_code))
                         && linear_address_first >= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_first
                         && linear_address_last <= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last
                         && tlb->tme_m68k_tlb_emulator_off_read != TME_EMULATOR_OFF_UNDEF)) {

    /* make the emulator memory pointer: */
    mem = (const tme_shared tme_uint16_t *) (tlb->tme_m68k_tlb_emulator_off_read + linear_address_first);

    /* do the 16-bit bus read: */
    mem_value = tme_memory_bus_read16(mem, tlb->tme_m68k_tlb_bus_rwlock, sizeof(tme_uint8_t), sizeof(tme_uint32_t));

    /* put the value read, in host byte order: */
    ic->tme_m68k_ireg_memx16 = tme_betoh_u16(mem_value);

    /* step the transfer count: */
    TME_M68K_SEQUENCE_TRANSFER_STEP;
  }

  /* otherwise, do the bus cycles the slow way: */
  else {
    tme_m68k_read16(ic, tlb,
                    &ic->_tme_m68k_ea_function_code,
                    &ic->_tme_m68k_ea_address,
                    &ic->tme_m68k_ireg_memx16,
                    TME_M68K_BUS_CYCLE_NORMAL);
  }

  /* unbusy this TLB entry: */
  tme_m68k_tlb_unbusy(tlb);

  /* log the value read: */
  tme_m68k_verify_mem16(ic, ic->_tme_m68k_ea_function_code, ic->_tme_m68k_ea_address, ic->tme_m68k_ireg_memx16, TME_BUS_CYCLE_READ);
  tme_m68k_log(ic, 1000, TME_OK,
               (TME_M68K_LOG_HANDLE(ic),
                _("read_memx16\t%d:0x%08x:\t0x%04x"),
                ic->_tme_m68k_ea_function_code,
                ic->_tme_m68k_ea_address,
                ic->tme_m68k_ireg_memx16));
}

/* this reads a 16-bit mem value: */
void
tme_m68k_read_mem16(struct tme_m68k *ic, int ireg) 
{
  tme_bus_context_t bus_context = ic->_tme_m68k_bus_context;
  unsigned int function_code = ic->_tme_m68k_ea_function_code;
  tme_uint32_t linear_address_first = ic->_tme_m68k_ea_address;
  tme_uint32_t linear_address_last = linear_address_first + sizeof(tme_uint16_t) - 1;
  struct tme_m68k_tlb *tlb = TME_M68K_DTLB_ENTRY(ic, bus_context, function_code, linear_address_first);
  tme_uint16_t mem_value;
  const tme_shared tme_uint16_t *mem;

#ifdef _TME_M68K_STATS
  ic->tme_m68k_stats.tme_m68k_stats_memory_total++;
#endif /* _TME_M68K_STATS */

  /* busy this TLB entry: */
  tme_m68k_tlb_busy(tlb);

  /* if we aren't restarting, and this address is properly aligned,
     and this TLB entry covers the operand and allows fast reads: */
  if (__tme_predict_true(!TME_M68K_SEQUENCE_RESTARTING
                         && (ic->_tme_m68k_bus_16bit & linear_address_first) == 0
                         && tme_m68k_tlb_is_valid(tlb)
                         && tlb->tme_m68k_tlb_bus_context == bus_context
                         && (tlb->tme_m68k_tlb_function_codes_mask
                             & TME_BIT(function_code))
                         && linear_address_first >= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_first
                         && linear_address_last <= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last
                         && tlb->tme_m68k_tlb_emulator_off_read != TME_EMULATOR_OFF_UNDEF)) {

    /* make the emulator memory pointer: */
    mem = (const tme_shared tme_uint16_t *) (tlb->tme_m68k_tlb_emulator_off_read + linear_address_first);

    /* do the 16-bit bus read: */
    mem_value = tme_memory_bus_read16(mem, tlb->tme_m68k_tlb_bus_rwlock, sizeof(tme_uint8_t), sizeof(tme_uint32_t));

    /* put the value read, in host byte order: */
    ic->tme_m68k_ireg_uint16(ireg) = tme_betoh_u16(mem_value);

    /* step the transfer count: */
    TME_M68K_SEQUENCE_TRANSFER_STEP;
  }

  /* otherwise, do the bus cycles the slow way: */
  else {
    tme_m68k_read16(ic, tlb,
                    &ic->_tme_m68k_ea_function_code,
                    &ic->_tme_m68k_ea_address,
                    &ic->tme_m68k_ireg_uint16(ireg),
                    TME_M68K_BUS_CYCLE_NORMAL);
  }

  /* unbusy this TLB entry: */
  tme_m68k_tlb_unbusy(tlb);

  /* log the value read: */
  tme_m68k_verify_mem16(ic, ic->_tme_m68k_ea_function_code, ic->_tme_m68k_ea_address, ic->tme_m68k_ireg_uint16(ireg), TME_BUS_CYCLE_READ);
  tme_m68k_log(ic, 1000, TME_OK,
               (TME_M68K_LOG_HANDLE(ic),
                _("read_mem16\t%d:0x%08x:\t0x%04x"),
                ic->_tme_m68k_ea_function_code,
                ic->_tme_m68k_ea_address,
                ic->tme_m68k_ireg_uint16(ireg)));
}

/* this reads a 16-bit inst value: */
tme_uint16_t
tme_m68k_fetch16(struct tme_m68k *ic, tme_uint32_t pc) 
{
  tme_bus_context_t bus_context = ic->_tme_m68k_bus_context;
  unsigned int function_code = TME_M68K_FUNCTION_CODE_PROGRAM(ic);
  tme_uint32_t linear_address_first = pc;
  tme_uint32_t linear_address_last = linear_address_first + sizeof(tme_uint16_t) - 1;
  struct tme_m68k_tlb *tlb = &ic->_tme_m68k_itlb;
  tme_uint16_t mem_value;
  const tme_shared tme_uint16_t *mem;
  unsigned int fetch_slow_next = ic->_tme_m68k_insn_fetch_slow_next;

#ifdef _TME_M68K_STATS
  ic->tme_m68k_stats.tme_m68k_stats_memory_total++;
#endif /* _TME_M68K_STATS */

  /* busy this TLB entry: */
  tme_m68k_tlb_busy(tlb);

  /* if this fetch was done by the fast executor: */
  if (__tme_predict_true(fetch_slow_next < ic->_tme_m68k_insn_fetch_slow_count_fast)) {

    /* the entire fetch must be in the instruction buffer, and
       we must be restarting: */
    assert ((fetch_slow_next + sizeof(tme_uint16_t))
            <= ic->_tme_m68k_insn_fetch_slow_count_fast);
    assert (TME_M68K_SEQUENCE_RESTARTING);
    mem_value = tme_memory_read16(((tme_uint16_t *) (((tme_uint8_t *) &ic->_tme_m68k_insn_fetch_buffer[0]) + fetch_slow_next)), sizeof(tme_uint16_t));
  }

  /* otherwise, this fetch was not done by the fast executor: */
  else {

    /* if we're restarting, but the offset in the instruction buffer
       to fetch into is at the instruction buffer total, this must be
       a fake fault caused by the fast executor.  we confirm this by
       checking that this transfer "caused" the fault, and that this
       transfer will be the first slow one after any fast fetches.
       in this case, we can cancel the restart for now: */
    if (TME_M68K_SEQUENCE_RESTARTING
        && (fetch_slow_next
            == ic->_tme_m68k_insn_fetch_slow_count_total)) {
      assert ((ic->_tme_m68k_sequence._tme_m68k_sequence_transfer_next
               == ic->_tme_m68k_sequence._tme_m68k_sequence_transfer_faulted)
              && (fetch_slow_next
                  == ic->_tme_m68k_insn_fetch_slow_count_fast));
      ic->_tme_m68k_sequence._tme_m68k_sequence_transfer_faulted--;
    }

    /* if we're not restarting: */
    if (!TME_M68K_SEQUENCE_RESTARTING) {

      /* we advance the instruction buffer total *before* we do
         what may be a slow fetch, because we may transfer a few
         bytes and then fault.  without this, those few bytes
         would not get saved in the exception stack frame and
         restored later before the continuation of the fetch: */
      ic->_tme_m68k_insn_fetch_slow_count_total += sizeof(tme_uint16_t);
    }

    /* make sure that if this is a new transfer or if this
       transfer faulted, that we're fetching for the current
       last positions in the instruction buffer: */
    assert ((ic->_tme_m68k_sequence._tme_m68k_sequence_transfer_next
             < ic->_tme_m68k_sequence._tme_m68k_sequence_transfer_faulted)
            || ((fetch_slow_next + sizeof(tme_uint16_t))
                == ic->_tme_m68k_insn_fetch_slow_count_total));

    /* if we aren't restarting, and this address is properly aligned,
       and this TLB entry covers the operand and allows fast reads: */
    if (__tme_predict_true(!TME_M68K_SEQUENCE_RESTARTING
                           && ((sizeof(tme_uint16_t) - 1) & linear_address_first) == 0
                           && tme_m68k_tlb_is_valid(tlb)
                           && tlb->tme_m68k_tlb_bus_context == bus_context
                           && (tlb->tme_m68k_tlb_function_codes_mask
                               & TME_BIT(function_code))
                           && linear_address_first >= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_first
                           && linear_address_last <= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last
                           && tlb->tme_m68k_tlb_emulator_off_read != TME_EMULATOR_OFF_UNDEF)) {

      /* make the emulator memory pointer: */
      mem = (const tme_shared tme_uint16_t *) (tlb->tme_m68k_tlb_emulator_off_read + linear_address_first);

      /* do the 16-bit bus read: */
      mem_value = tme_memory_bus_read16(mem, tlb->tme_m68k_tlb_bus_rwlock, sizeof(tme_uint16_t), sizeof(tme_uint32_t));

      /* put the value read, in host byte order: */
      mem_value = tme_betoh_u16(mem_value);
      tme_memory_write16(((tme_uint16_t *) (((tme_uint8_t *) &ic->_tme_m68k_insn_fetch_buffer[0]) + fetch_slow_next)), mem_value, sizeof(tme_uint16_t));

      /* step the transfer count: */
      TME_M68K_SEQUENCE_TRANSFER_STEP;
    }

    /* otherwise, do the bus cycles the slow way: */
    else {
      tme_m68k_read16(ic, tlb,
                      &function_code,
                      &pc,
                      ((tme_uint16_t *) (((tme_uint8_t *) &ic->_tme_m68k_insn_fetch_buffer[0]) + fetch_slow_next)),
                      TME_M68K_BUS_CYCLE_FETCH);
      mem_value = tme_memory_read16(((tme_uint16_t *) (((tme_uint8_t *) &ic->_tme_m68k_insn_fetch_buffer[0]) + fetch_slow_next)), sizeof(tme_uint16_t));
    }
  }

  /* unbusy this TLB entry: */
  tme_m68k_tlb_unbusy(tlb);

  /* log the value read: */
  tme_m68k_verify_mem16(ic, function_code, pc, *((tme_uint16_t *) (((tme_uint8_t *) &ic->_tme_m68k_insn_fetch_buffer[0]) + fetch_slow_next)), TME_BUS_CYCLE_READ);
  tme_m68k_log(ic, 1000, TME_OK,
               (TME_M68K_LOG_HANDLE(ic),
                _("fetch16\t%d:0x%08x:\t0x%04x"),
                function_code,
                pc,
                *((tme_uint16_t *) (((tme_uint8_t *) &ic->_tme_m68k_insn_fetch_buffer[0]) + fetch_slow_next))));

  /* advance the offset in the instruction buffer for the next slow fetch: */
  fetch_slow_next += sizeof(tme_uint16_t);
  ic->_tme_m68k_insn_fetch_slow_next = fetch_slow_next;

  /* return the fetched value: */
  return(mem_value);
}

/* this reads a 16-bit stack value: */
void
tme_m68k_pop16(struct tme_m68k *ic, tme_uint16_t *_value) 
{
  tme_bus_context_t bus_context = ic->_tme_m68k_bus_context;
  unsigned int function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  tme_uint32_t linear_address_first = ic->tme_m68k_ireg_a7;
  tme_uint32_t linear_address_last = linear_address_first + sizeof(tme_uint16_t) - 1;
  struct tme_m68k_tlb *tlb = TME_M68K_DTLB_ENTRY(ic, bus_context, function_code, linear_address_first);
  tme_uint16_t mem_value;
  const tme_shared tme_uint16_t *mem;

#ifdef _TME_M68K_STATS
  ic->tme_m68k_stats.tme_m68k_stats_memory_total++;
#endif /* _TME_M68K_STATS */

  /* busy this TLB entry: */
  tme_m68k_tlb_busy(tlb);

  /* if we aren't restarting, and this address is properly aligned,
     and this TLB entry covers the operand and allows fast reads: */
  if (__tme_predict_true(!TME_M68K_SEQUENCE_RESTARTING
                         && (ic->_tme_m68k_bus_16bit & linear_address_first) == 0
                         && tme_m68k_tlb_is_valid(tlb)
                         && tlb->tme_m68k_tlb_bus_context == bus_context
                         && (tlb->tme_m68k_tlb_function_codes_mask
                             & TME_BIT(function_code))
                         && linear_address_first >= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_first
                         && linear_address_last <= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last
                         && tlb->tme_m68k_tlb_emulator_off_read != TME_EMULATOR_OFF_UNDEF)) {

    /* make the emulator memory pointer: */
    mem = (const tme_shared tme_uint16_t *) (tlb->tme_m68k_tlb_emulator_off_read + linear_address_first);

    /* do the 16-bit bus read: */
    mem_value = tme_memory_bus_read16(mem, tlb->tme_m68k_tlb_bus_rwlock, sizeof(tme_uint8_t), sizeof(tme_uint32_t));

    /* put the value read, in host byte order: */
    *_value = tme_betoh_u16(mem_value);

    /* step the transfer count: */
    TME_M68K_SEQUENCE_TRANSFER_STEP;
  }

  /* otherwise, do the bus cycles the slow way: */
  else {
    tme_m68k_read16(ic, tlb,
                    &function_code,
                    &ic->tme_m68k_ireg_a7,
                    _value,
                    TME_M68K_BUS_CYCLE_NORMAL);
  }

  /* unbusy this TLB entry: */
  tme_m68k_tlb_unbusy(tlb);

  /* log the value read: */
  tme_m68k_verify_mem16(ic, function_code, ic->tme_m68k_ireg_a7, *_value, TME_BUS_CYCLE_READ);
  tme_m68k_log(ic, 1000, TME_OK,
               (TME_M68K_LOG_HANDLE(ic),
                _("pop16\t%d:0x%08x:\t0x%04x"),
                function_code,
                ic->tme_m68k_ireg_a7,
                *_value));
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->tme_m68k_ireg_a7 += sizeof(tme_uint16_t);
  }
}

/* this writes a 16-bit memx value: */
void
tme_m68k_write_memx16(struct tme_m68k *ic) 
{
  tme_bus_context_t bus_context = ic->_tme_m68k_bus_context;
  unsigned int function_code = ic->_tme_m68k_ea_function_code;
  tme_uint32_t linear_address_first = ic->_tme_m68k_ea_address;
  tme_uint32_t linear_address_last = linear_address_first + sizeof(tme_uint16_t) - 1;
  struct tme_m68k_tlb *tlb = TME_M68K_DTLB_ENTRY(ic, bus_context, function_code, linear_address_first);
  tme_uint16_t mem_value;
  tme_shared tme_uint16_t *mem;

#ifdef _TME_M68K_STATS
  ic->tme_m68k_stats.tme_m68k_stats_memory_total++;
#endif /* _TME_M68K_STATS */

  /* log the value written: */
  tme_m68k_verify_mem16(ic, ic->_tme_m68k_ea_function_code, ic->_tme_m68k_ea_address, ic->tme_m68k_ireg_memx16, TME_BUS_CYCLE_WRITE);
  tme_m68k_log(ic, 1000, TME_OK, 
               (TME_M68K_LOG_HANDLE(ic),
                _("write_memx16\t%d:0x%08x:\t0x%04x"),
                ic->_tme_m68k_ea_function_code,
                ic->_tme_m68k_ea_address,
                ic->tme_m68k_ireg_memx16));

  /* busy this TLB entry: */
  tme_m68k_tlb_busy(tlb);

  /* if we aren't restarting, and this address is properly aligned,
     and this TLB entry covers the operand and allows fast writes: */
  if (__tme_predict_true(!TME_M68K_SEQUENCE_RESTARTING
                         && (ic->_tme_m68k_bus_16bit & linear_address_first) == 0
                         && tme_m68k_tlb_is_valid(tlb)
                         && tlb->tme_m68k_tlb_bus_context == bus_context
                         && (tlb->tme_m68k_tlb_function_codes_mask
                             & TME_BIT(function_code))
                         && linear_address_first >= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_first
                         && linear_address_last <= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last
                         && tlb->tme_m68k_tlb_emulator_off_write != TME_EMULATOR_OFF_UNDEF)) {

    /* make the emulator memory pointer: */
    mem = (tme_shared tme_uint16_t *) (tlb->tme_m68k_tlb_emulator_off_write + linear_address_first);

    /* get the value to write, in big-endian byte order: */
    mem_value = tme_htobe_u16(ic->tme_m68k_ireg_memx16);

    /* do the 16-bit bus write: */
    tme_memory_bus_write16(mem, mem_value, tlb->tme_m68k_tlb_bus_rwlock, sizeof(tme_uint8_t), sizeof(tme_uint32_t));

    /* step the transfer count: */
    TME_M68K_SEQUENCE_TRANSFER_STEP;
  }

  /* otherwise, do the bus cycles the slow way: */
  else {
    tme_m68k_write16(ic, tlb,
                    &ic->_tme_m68k_ea_function_code,
                    &ic->_tme_m68k_ea_address,
                    &ic->tme_m68k_ireg_memx16,
                    TME_M68K_BUS_CYCLE_NORMAL);
  }

  /* unbusy this TLB entry: */
  tme_m68k_tlb_unbusy(tlb);
}

/* this writes a 16-bit mem value: */
void
tme_m68k_write_mem16(struct tme_m68k *ic, int ireg) 
{
  tme_bus_context_t bus_context = ic->_tme_m68k_bus_context;
  unsigned int function_code = ic->_tme_m68k_ea_function_code;
  tme_uint32_t linear_address_first = ic->_tme_m68k_ea_address;
  tme_uint32_t linear_address_last = linear_address_first + sizeof(tme_uint16_t) - 1;
  struct tme_m68k_tlb *tlb = TME_M68K_DTLB_ENTRY(ic, bus_context, function_code, linear_address_first);
  tme_uint16_t mem_value;
  tme_shared tme_uint16_t *mem;

#ifdef _TME_M68K_STATS
  ic->tme_m68k_stats.tme_m68k_stats_memory_total++;
#endif /* _TME_M68K_STATS */

  /* log the value written: */
  tme_m68k_verify_mem16(ic, ic->_tme_m68k_ea_function_code, ic->_tme_m68k_ea_address, ic->tme_m68k_ireg_uint16(ireg), TME_BUS_CYCLE_WRITE);
  tme_m68k_log(ic, 1000, TME_OK, 
               (TME_M68K_LOG_HANDLE(ic),
                _("write_mem16\t%d:0x%08x:\t0x%04x"),
                ic->_tme_m68k_ea_function_code,
                ic->_tme_m68k_ea_address,
                ic->tme_m68k_ireg_uint16(ireg)));

  /* busy this TLB entry: */
  tme_m68k_tlb_busy(tlb);

  /* if we aren't restarting, and this address is properly aligned,
     and this TLB entry covers the operand and allows fast writes: */
  if (__tme_predict_true(!TME_M68K_SEQUENCE_RESTARTING
                         && (ic->_tme_m68k_bus_16bit & linear_address_first) == 0
                         && tme_m68k_tlb_is_valid(tlb)
                         && tlb->tme_m68k_tlb_bus_context == bus_context
                         && (tlb->tme_m68k_tlb_function_codes_mask
                             & TME_BIT(function_code))
                         && linear_address_first >= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_first
                         && linear_address_last <= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last
                         && tlb->tme_m68k_tlb_emulator_off_write != TME_EMULATOR_OFF_UNDEF)) {

    /* make the emulator memory pointer: */
    mem = (tme_shared tme_uint16_t *) (tlb->tme_m68k_tlb_emulator_off_write + linear_address_first);

    /* get the value to write, in big-endian byte order: */
    mem_value = tme_htobe_u16(ic->tme_m68k_ireg_uint16(ireg));

    /* do the 16-bit bus write: */
    tme_memory_bus_write16(mem, mem_value, tlb->tme_m68k_tlb_bus_rwlock, sizeof(tme_uint8_t), sizeof(tme_uint32_t));

    /* step the transfer count: */
    TME_M68K_SEQUENCE_TRANSFER_STEP;
  }

  /* otherwise, do the bus cycles the slow way: */
  else {
    tme_m68k_write16(ic, tlb,
                    &ic->_tme_m68k_ea_function_code,
                    &ic->_tme_m68k_ea_address,
                    &ic->tme_m68k_ireg_uint16(ireg),
                    TME_M68K_BUS_CYCLE_NORMAL);
  }

  /* unbusy this TLB entry: */
  tme_m68k_tlb_unbusy(tlb);
}

/* this writes a 16-bit stack value: */
void
tme_m68k_push16(struct tme_m68k *ic, tme_uint16_t value) 
{
  tme_bus_context_t bus_context = ic->_tme_m68k_bus_context;
  unsigned int function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  tme_uint32_t linear_address_first = ic->tme_m68k_ireg_a7 - sizeof(tme_uint16_t);
  tme_uint32_t linear_address_last = linear_address_first + sizeof(tme_uint16_t) - 1;
  struct tme_m68k_tlb *tlb = TME_M68K_DTLB_ENTRY(ic, bus_context, function_code, linear_address_first);
  tme_uint16_t mem_value;
  tme_shared tme_uint16_t *mem;

#ifdef _TME_M68K_STATS
  ic->tme_m68k_stats.tme_m68k_stats_memory_total++;
#endif /* _TME_M68K_STATS */

  /* log the value written: */
  tme_m68k_verify_mem16(ic, function_code, linear_address_first, value, TME_BUS_CYCLE_WRITE);
  tme_m68k_log(ic, 1000, TME_OK, 
               (TME_M68K_LOG_HANDLE(ic),
                _("push16\t%d:0x%08x:\t0x%04x"),
                function_code,
                linear_address_first,
                value));

  /* busy this TLB entry: */
  tme_m68k_tlb_busy(tlb);

  /* if we aren't restarting, and this address is properly aligned,
     and this TLB entry covers the operand and allows fast writes: */
  if (__tme_predict_true(!TME_M68K_SEQUENCE_RESTARTING
                         && (ic->_tme_m68k_bus_16bit & linear_address_first) == 0
                         && tme_m68k_tlb_is_valid(tlb)
                         && tlb->tme_m68k_tlb_bus_context == bus_context
                         && (tlb->tme_m68k_tlb_function_codes_mask
                             & TME_BIT(function_code))
                         && linear_address_first >= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_first
                         && linear_address_last <= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last
                         && tlb->tme_m68k_tlb_emulator_off_write != TME_EMULATOR_OFF_UNDEF)) {

    /* make the emulator memory pointer: */
    mem = (tme_shared tme_uint16_t *) (tlb->tme_m68k_tlb_emulator_off_write + linear_address_first);

    /* get the value to write, in big-endian byte order: */
    mem_value = tme_htobe_u16(value);

    /* do the 16-bit bus write: */
    tme_memory_bus_write16(mem, mem_value, tlb->tme_m68k_tlb_bus_rwlock, sizeof(tme_uint8_t), sizeof(tme_uint32_t));

    /* step the transfer count: */
    TME_M68K_SEQUENCE_TRANSFER_STEP;
  }

  /* otherwise, do the bus cycles the slow way: */
  else {
    tme_m68k_write16(ic, tlb,
                    &function_code,
                    &linear_address_first,
                    &value,
                    TME_M68K_BUS_CYCLE_NORMAL);
  }

  /* unbusy this TLB entry: */
  tme_m68k_tlb_unbusy(tlb);
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->tme_m68k_ireg_a7 -= sizeof(tme_uint16_t);
  }
}

/* this reads a 32-bit memx value: */
void
tme_m68k_read_memx32(struct tme_m68k *ic) 
{
  tme_bus_context_t bus_context = ic->_tme_m68k_bus_context;
  unsigned int function_code = ic->_tme_m68k_ea_function_code;
  tme_uint32_t linear_address_first = ic->_tme_m68k_ea_address;
  tme_uint32_t linear_address_last = linear_address_first + sizeof(tme_uint32_t) - 1;
  struct tme_m68k_tlb *tlb = TME_M68K_DTLB_ENTRY(ic, bus_context, function_code, linear_address_first);
  tme_uint32_t mem_value;
  const tme_shared tme_uint32_t *mem;

#ifdef _TME_M68K_STATS
  ic->tme_m68k_stats.tme_m68k_stats_memory_total++;
#endif /* _TME_M68K_STATS */

  /* busy this TLB entry: */
  tme_m68k_tlb_busy(tlb);

  /* if we aren't restarting, and this address is properly aligned,
     and this TLB entry covers the operand and allows fast reads: */
  if (__tme_predict_true(!TME_M68K_SEQUENCE_RESTARTING
                         && (ic->_tme_m68k_bus_16bit & linear_address_first) == 0
                         && tme_m68k_tlb_is_valid(tlb)
                         && tlb->tme_m68k_tlb_bus_context == bus_context
                         && (tlb->tme_m68k_tlb_function_codes_mask
                             & TME_BIT(function_code))
                         && linear_address_first >= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_first
                         && linear_address_last <= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last
                         && tlb->tme_m68k_tlb_emulator_off_read != TME_EMULATOR_OFF_UNDEF)) {

    /* make the emulator memory pointer: */
    mem = (const tme_shared tme_uint32_t *) (tlb->tme_m68k_tlb_emulator_off_read + linear_address_first);

    /* do the 32-bit bus read: */
    mem_value = tme_memory_bus_read32(mem, tlb->tme_m68k_tlb_bus_rwlock, sizeof(tme_uint8_t), sizeof(tme_uint32_t));

    /* put the value read, in host byte order: */
    ic->tme_m68k_ireg_memx32 = tme_betoh_u32(mem_value);

    /* step the transfer count: */
    TME_M68K_SEQUENCE_TRANSFER_STEP;
  }

  /* otherwise, do the bus cycles the slow way: */
  else {
    tme_m68k_read32(ic, tlb,
                    &ic->_tme_m68k_ea_function_code,
                    &ic->_tme_m68k_ea_address,
                    &ic->tme_m68k_ireg_memx32,
                    TME_M68K_BUS_CYCLE_NORMAL);
  }

  /* unbusy this TLB entry: */
  tme_m68k_tlb_unbusy(tlb);

  /* log the value read: */
  tme_m68k_verify_mem32(ic, ic->_tme_m68k_ea_function_code, ic->_tme_m68k_ea_address, ic->tme_m68k_ireg_memx32, TME_BUS_CYCLE_READ);
  tme_m68k_log(ic, 1000, TME_OK,
               (TME_M68K_LOG_HANDLE(ic),
                _("read_memx32\t%d:0x%08x:\t0x%08x"),
                ic->_tme_m68k_ea_function_code,
                ic->_tme_m68k_ea_address,
                ic->tme_m68k_ireg_memx32));
}

/* this reads a 32-bit mem value: */
void
tme_m68k_read_mem32(struct tme_m68k *ic, int ireg) 
{
  tme_bus_context_t bus_context = ic->_tme_m68k_bus_context;
  unsigned int function_code = ic->_tme_m68k_ea_function_code;
  tme_uint32_t linear_address_first = ic->_tme_m68k_ea_address;
  tme_uint32_t linear_address_last = linear_address_first + sizeof(tme_uint32_t) - 1;
  struct tme_m68k_tlb *tlb = TME_M68K_DTLB_ENTRY(ic, bus_context, function_code, linear_address_first);
  tme_uint32_t mem_value;
  const tme_shared tme_uint32_t *mem;

#ifdef _TME_M68K_STATS
  ic->tme_m68k_stats.tme_m68k_stats_memory_total++;
#endif /* _TME_M68K_STATS */

  /* busy this TLB entry: */
  tme_m68k_tlb_busy(tlb);

  /* if we aren't restarting, and this address is properly aligned,
     and this TLB entry covers the operand and allows fast reads: */
  if (__tme_predict_true(!TME_M68K_SEQUENCE_RESTARTING
                         && (ic->_tme_m68k_bus_16bit & linear_address_first) == 0
                         && tme_m68k_tlb_is_valid(tlb)
                         && tlb->tme_m68k_tlb_bus_context == bus_context
                         && (tlb->tme_m68k_tlb_function_codes_mask
                             & TME_BIT(function_code))
                         && linear_address_first >= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_first
                         && linear_address_last <= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last
                         && tlb->tme_m68k_tlb_emulator_off_read != TME_EMULATOR_OFF_UNDEF)) {

    /* make the emulator memory pointer: */
    mem = (const tme_shared tme_uint32_t *) (tlb->tme_m68k_tlb_emulator_off_read + linear_address_first);

    /* do the 32-bit bus read: */
    mem_value = tme_memory_bus_read32(mem, tlb->tme_m68k_tlb_bus_rwlock, sizeof(tme_uint8_t), sizeof(tme_uint32_t));

    /* put the value read, in host byte order: */
    ic->tme_m68k_ireg_uint32(ireg) = tme_betoh_u32(mem_value);

    /* step the transfer count: */
    TME_M68K_SEQUENCE_TRANSFER_STEP;
  }

  /* otherwise, do the bus cycles the slow way: */
  else {
    tme_m68k_read32(ic, tlb,
                    &ic->_tme_m68k_ea_function_code,
                    &ic->_tme_m68k_ea_address,
                    &ic->tme_m68k_ireg_uint32(ireg),
                    TME_M68K_BUS_CYCLE_NORMAL);
  }

  /* unbusy this TLB entry: */
  tme_m68k_tlb_unbusy(tlb);

  /* log the value read: */
  tme_m68k_verify_mem32(ic, ic->_tme_m68k_ea_function_code, ic->_tme_m68k_ea_address, ic->tme_m68k_ireg_uint32(ireg), TME_BUS_CYCLE_READ);
  tme_m68k_log(ic, 1000, TME_OK,
               (TME_M68K_LOG_HANDLE(ic),
                _("read_mem32\t%d:0x%08x:\t0x%08x"),
                ic->_tme_m68k_ea_function_code,
                ic->_tme_m68k_ea_address,
                ic->tme_m68k_ireg_uint32(ireg)));
}

/* this reads a 32-bit inst value: */
tme_uint32_t
tme_m68k_fetch32(struct tme_m68k *ic, tme_uint32_t pc) 
{
  tme_bus_context_t bus_context = ic->_tme_m68k_bus_context;
  unsigned int function_code = TME_M68K_FUNCTION_CODE_PROGRAM(ic);
  tme_uint32_t linear_address_first = pc;
  tme_uint32_t linear_address_last = linear_address_first + sizeof(tme_uint32_t) - 1;
  struct tme_m68k_tlb *tlb = &ic->_tme_m68k_itlb;
  tme_uint32_t mem_value;
  const tme_shared tme_uint32_t *mem;
  unsigned int fetch_slow_next = ic->_tme_m68k_insn_fetch_slow_next;

#ifdef _TME_M68K_STATS
  ic->tme_m68k_stats.tme_m68k_stats_memory_total++;
#endif /* _TME_M68K_STATS */

  /* busy this TLB entry: */
  tme_m68k_tlb_busy(tlb);

  /* if this fetch was done by the fast executor: */
  if (__tme_predict_true(fetch_slow_next < ic->_tme_m68k_insn_fetch_slow_count_fast)) {

    /* the entire fetch must be in the instruction buffer, and
       we must be restarting: */
    assert ((fetch_slow_next + sizeof(tme_uint32_t))
            <= ic->_tme_m68k_insn_fetch_slow_count_fast);
    assert (TME_M68K_SEQUENCE_RESTARTING);
    mem_value = tme_memory_read32(((tme_uint32_t *) (((tme_uint8_t *) &ic->_tme_m68k_insn_fetch_buffer[0]) + fetch_slow_next)), sizeof(tme_uint16_t));
  }

  /* otherwise, this fetch was not done by the fast executor: */
  else {

    /* if we're restarting, but the offset in the instruction buffer
       to fetch into is at the instruction buffer total, this must be
       a fake fault caused by the fast executor.  we confirm this by
       checking that this transfer "caused" the fault, and that this
       transfer will be the first slow one after any fast fetches.
       in this case, we can cancel the restart for now: */
    if (TME_M68K_SEQUENCE_RESTARTING
        && (fetch_slow_next
            == ic->_tme_m68k_insn_fetch_slow_count_total)) {
      assert ((ic->_tme_m68k_sequence._tme_m68k_sequence_transfer_next
               == ic->_tme_m68k_sequence._tme_m68k_sequence_transfer_faulted)
              && (fetch_slow_next
                  == ic->_tme_m68k_insn_fetch_slow_count_fast));
      ic->_tme_m68k_sequence._tme_m68k_sequence_transfer_faulted--;
    }

    /* if we're not restarting: */
    if (!TME_M68K_SEQUENCE_RESTARTING) {

      /* we advance the instruction buffer total *before* we do
         what may be a slow fetch, because we may transfer a few
         bytes and then fault.  without this, those few bytes
         would not get saved in the exception stack frame and
         restored later before the continuation of the fetch: */
      ic->_tme_m68k_insn_fetch_slow_count_total += sizeof(tme_uint32_t);
    }

    /* make sure that if this is a new transfer or if this
       transfer faulted, that we're fetching for the current
       last positions in the instruction buffer: */
    assert ((ic->_tme_m68k_sequence._tme_m68k_sequence_transfer_next
             < ic->_tme_m68k_sequence._tme_m68k_sequence_transfer_faulted)
            || ((fetch_slow_next + sizeof(tme_uint32_t))
                == ic->_tme_m68k_insn_fetch_slow_count_total));

    /* if we aren't restarting, and this address is properly aligned,
       and this TLB entry covers the operand and allows fast reads: */
    if (__tme_predict_true(!TME_M68K_SEQUENCE_RESTARTING
                           && ((sizeof(tme_uint16_t) - 1) & linear_address_first) == 0
                           && tme_m68k_tlb_is_valid(tlb)
                           && tlb->tme_m68k_tlb_bus_context == bus_context
                           && (tlb->tme_m68k_tlb_function_codes_mask
                               & TME_BIT(function_code))
                           && linear_address_first >= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_first
                           && linear_address_last <= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last
                           && tlb->tme_m68k_tlb_emulator_off_read != TME_EMULATOR_OFF_UNDEF)) {

      /* make the emulator memory pointer: */
      mem = (const tme_shared tme_uint32_t *) (tlb->tme_m68k_tlb_emulator_off_read + linear_address_first);

      /* do the 32-bit bus read: */
      mem_value = tme_memory_bus_read32(mem, tlb->tme_m68k_tlb_bus_rwlock, sizeof(tme_uint16_t), sizeof(tme_uint32_t));

      /* put the value read, in host byte order: */
      mem_value = tme_betoh_u32(mem_value);
      tme_memory_write32(((tme_uint32_t *) (((tme_uint8_t *) &ic->_tme_m68k_insn_fetch_buffer[0]) + fetch_slow_next)), mem_value, sizeof(tme_uint16_t));

      /* step the transfer count: */
      TME_M68K_SEQUENCE_TRANSFER_STEP;
    }

    /* otherwise, do the bus cycles the slow way: */
    else {
      tme_m68k_read32(ic, tlb,
                      &function_code,
                      &pc,
                      ((tme_uint32_t *) (((tme_uint8_t *) &ic->_tme_m68k_insn_fetch_buffer[0]) + fetch_slow_next)),
                      TME_M68K_BUS_CYCLE_FETCH);
      mem_value = tme_memory_read32(((tme_uint32_t *) (((tme_uint8_t *) &ic->_tme_m68k_insn_fetch_buffer[0]) + fetch_slow_next)), sizeof(tme_uint16_t));
    }
  }

  /* unbusy this TLB entry: */
  tme_m68k_tlb_unbusy(tlb);

  /* log the value read: */
  tme_m68k_verify_mem32(ic, function_code, pc, *((tme_uint32_t *) (((tme_uint8_t *) &ic->_tme_m68k_insn_fetch_buffer[0]) + fetch_slow_next)), TME_BUS_CYCLE_READ);
  tme_m68k_log(ic, 1000, TME_OK,
               (TME_M68K_LOG_HANDLE(ic),
                _("fetch32\t%d:0x%08x:\t0x%08x"),
                function_code,
                pc,
                *((tme_uint32_t *) (((tme_uint8_t *) &ic->_tme_m68k_insn_fetch_buffer[0]) + fetch_slow_next))));

  /* advance the offset in the instruction buffer for the next slow fetch: */
  fetch_slow_next += sizeof(tme_uint32_t);
  ic->_tme_m68k_insn_fetch_slow_next = fetch_slow_next;

  /* return the fetched value: */
  return(mem_value);
}

/* this reads a 32-bit stack value: */
void
tme_m68k_pop32(struct tme_m68k *ic, tme_uint32_t *_value) 
{
  tme_bus_context_t bus_context = ic->_tme_m68k_bus_context;
  unsigned int function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  tme_uint32_t linear_address_first = ic->tme_m68k_ireg_a7;
  tme_uint32_t linear_address_last = linear_address_first + sizeof(tme_uint32_t) - 1;
  struct tme_m68k_tlb *tlb = TME_M68K_DTLB_ENTRY(ic, bus_context, function_code, linear_address_first);
  tme_uint32_t mem_value;
  const tme_shared tme_uint32_t *mem;

#ifdef _TME_M68K_STATS
  ic->tme_m68k_stats.tme_m68k_stats_memory_total++;
#endif /* _TME_M68K_STATS */

  /* busy this TLB entry: */
  tme_m68k_tlb_busy(tlb);

  /* if we aren't restarting, and this address is properly aligned,
     and this TLB entry covers the operand and allows fast reads: */
  if (__tme_predict_true(!TME_M68K_SEQUENCE_RESTARTING
                         && (ic->_tme_m68k_bus_16bit & linear_address_first) == 0
                         && tme_m68k_tlb_is_valid(tlb)
                         && tlb->tme_m68k_tlb_bus_context == bus_context
                         && (tlb->tme_m68k_tlb_function_codes_mask
                             & TME_BIT(function_code))
                         && linear_address_first >= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_first
                         && linear_address_last <= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last
                         && tlb->tme_m68k_tlb_emulator_off_read != TME_EMULATOR_OFF_UNDEF)) {

    /* make the emulator memory pointer: */
    mem = (const tme_shared tme_uint32_t *) (tlb->tme_m68k_tlb_emulator_off_read + linear_address_first);

    /* do the 32-bit bus read: */
    mem_value = tme_memory_bus_read32(mem, tlb->tme_m68k_tlb_bus_rwlock, sizeof(tme_uint8_t), sizeof(tme_uint32_t));

    /* put the value read, in host byte order: */
    *_value = tme_betoh_u32(mem_value);

    /* step the transfer count: */
    TME_M68K_SEQUENCE_TRANSFER_STEP;
  }

  /* otherwise, do the bus cycles the slow way: */
  else {
    tme_m68k_read32(ic, tlb,
                    &function_code,
                    &ic->tme_m68k_ireg_a7,
                    _value,
                    TME_M68K_BUS_CYCLE_NORMAL);
  }

  /* unbusy this TLB entry: */
  tme_m68k_tlb_unbusy(tlb);

  /* log the value read: */
  tme_m68k_verify_mem32(ic, function_code, ic->tme_m68k_ireg_a7, *_value, TME_BUS_CYCLE_READ);
  tme_m68k_log(ic, 1000, TME_OK,
               (TME_M68K_LOG_HANDLE(ic),
                _("pop32\t%d:0x%08x:\t0x%08x"),
                function_code,
                ic->tme_m68k_ireg_a7,
                *_value));
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->tme_m68k_ireg_a7 += sizeof(tme_uint32_t);
  }
}

/* this writes a 32-bit memx value: */
void
tme_m68k_write_memx32(struct tme_m68k *ic) 
{
  tme_bus_context_t bus_context = ic->_tme_m68k_bus_context;
  unsigned int function_code = ic->_tme_m68k_ea_function_code;
  tme_uint32_t linear_address_first = ic->_tme_m68k_ea_address;
  tme_uint32_t linear_address_last = linear_address_first + sizeof(tme_uint32_t) - 1;
  struct tme_m68k_tlb *tlb = TME_M68K_DTLB_ENTRY(ic, bus_context, function_code, linear_address_first);
  tme_uint32_t mem_value;
  tme_shared tme_uint32_t *mem;

#ifdef _TME_M68K_STATS
  ic->tme_m68k_stats.tme_m68k_stats_memory_total++;
#endif /* _TME_M68K_STATS */

  /* log the value written: */
  tme_m68k_verify_mem32(ic, ic->_tme_m68k_ea_function_code, ic->_tme_m68k_ea_address, ic->tme_m68k_ireg_memx32, TME_BUS_CYCLE_WRITE);
  tme_m68k_log(ic, 1000, TME_OK, 
               (TME_M68K_LOG_HANDLE(ic),
                _("write_memx32\t%d:0x%08x:\t0x%08x"),
                ic->_tme_m68k_ea_function_code,
                ic->_tme_m68k_ea_address,
                ic->tme_m68k_ireg_memx32));

  /* busy this TLB entry: */
  tme_m68k_tlb_busy(tlb);

  /* if we aren't restarting, and this address is properly aligned,
     and this TLB entry covers the operand and allows fast writes: */
  if (__tme_predict_true(!TME_M68K_SEQUENCE_RESTARTING
                         && (ic->_tme_m68k_bus_16bit & linear_address_first) == 0
                         && tme_m68k_tlb_is_valid(tlb)
                         && tlb->tme_m68k_tlb_bus_context == bus_context
                         && (tlb->tme_m68k_tlb_function_codes_mask
                             & TME_BIT(function_code))
                         && linear_address_first >= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_first
                         && linear_address_last <= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last
                         && tlb->tme_m68k_tlb_emulator_off_write != TME_EMULATOR_OFF_UNDEF)) {

    /* make the emulator memory pointer: */
    mem = (tme_shared tme_uint32_t *) (tlb->tme_m68k_tlb_emulator_off_write + linear_address_first);

    /* get the value to write, in big-endian byte order: */
    mem_value = tme_htobe_u32(ic->tme_m68k_ireg_memx32);

    /* do the 32-bit bus write: */
    tme_memory_bus_write32(mem, mem_value, tlb->tme_m68k_tlb_bus_rwlock, sizeof(tme_uint8_t), sizeof(tme_uint32_t));

    /* step the transfer count: */
    TME_M68K_SEQUENCE_TRANSFER_STEP;
  }

  /* otherwise, do the bus cycles the slow way: */
  else {
    tme_m68k_write32(ic, tlb,
                    &ic->_tme_m68k_ea_function_code,
                    &ic->_tme_m68k_ea_address,
                    &ic->tme_m68k_ireg_memx32,
                    TME_M68K_BUS_CYCLE_NORMAL);
  }

  /* unbusy this TLB entry: */
  tme_m68k_tlb_unbusy(tlb);
}

/* this writes a 32-bit mem value: */
void
tme_m68k_write_mem32(struct tme_m68k *ic, int ireg) 
{
  tme_bus_context_t bus_context = ic->_tme_m68k_bus_context;
  unsigned int function_code = ic->_tme_m68k_ea_function_code;
  tme_uint32_t linear_address_first = ic->_tme_m68k_ea_address;
  tme_uint32_t linear_address_last = linear_address_first + sizeof(tme_uint32_t) - 1;
  struct tme_m68k_tlb *tlb = TME_M68K_DTLB_ENTRY(ic, bus_context, function_code, linear_address_first);
  tme_uint32_t mem_value;
  tme_shared tme_uint32_t *mem;

#ifdef _TME_M68K_STATS
  ic->tme_m68k_stats.tme_m68k_stats_memory_total++;
#endif /* _TME_M68K_STATS */

  /* log the value written: */
  tme_m68k_verify_mem32(ic, ic->_tme_m68k_ea_function_code, ic->_tme_m68k_ea_address, ic->tme_m68k_ireg_uint32(ireg), TME_BUS_CYCLE_WRITE);
  tme_m68k_log(ic, 1000, TME_OK, 
               (TME_M68K_LOG_HANDLE(ic),
                _("write_mem32\t%d:0x%08x:\t0x%08x"),
                ic->_tme_m68k_ea_function_code,
                ic->_tme_m68k_ea_address,
                ic->tme_m68k_ireg_uint32(ireg)));

  /* busy this TLB entry: */
  tme_m68k_tlb_busy(tlb);

  /* if we aren't restarting, and this address is properly aligned,
     and this TLB entry covers the operand and allows fast writes: */
  if (__tme_predict_true(!TME_M68K_SEQUENCE_RESTARTING
                         && (ic->_tme_m68k_bus_16bit & linear_address_first) == 0
                         && tme_m68k_tlb_is_valid(tlb)
                         && tlb->tme_m68k_tlb_bus_context == bus_context
                         && (tlb->tme_m68k_tlb_function_codes_mask
                             & TME_BIT(function_code))
                         && linear_address_first >= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_first
                         && linear_address_last <= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last
                         && tlb->tme_m68k_tlb_emulator_off_write != TME_EMULATOR_OFF_UNDEF)) {

    /* make the emulator memory pointer: */
    mem = (tme_shared tme_uint32_t *) (tlb->tme_m68k_tlb_emulator_off_write + linear_address_first);

    /* get the value to write, in big-endian byte order: */
    mem_value = tme_htobe_u32(ic->tme_m68k_ireg_uint32(ireg));

    /* do the 32-bit bus write: */
    tme_memory_bus_write32(mem, mem_value, tlb->tme_m68k_tlb_bus_rwlock, sizeof(tme_uint8_t), sizeof(tme_uint32_t));

    /* step the transfer count: */
    TME_M68K_SEQUENCE_TRANSFER_STEP;
  }

  /* otherwise, do the bus cycles the slow way: */
  else {
    tme_m68k_write32(ic, tlb,
                    &ic->_tme_m68k_ea_function_code,
                    &ic->_tme_m68k_ea_address,
                    &ic->tme_m68k_ireg_uint32(ireg),
                    TME_M68K_BUS_CYCLE_NORMAL);
  }

  /* unbusy this TLB entry: */
  tme_m68k_tlb_unbusy(tlb);
}

/* this writes a 32-bit stack value: */
void
tme_m68k_push32(struct tme_m68k *ic, tme_uint32_t value) 
{
  tme_bus_context_t bus_context = ic->_tme_m68k_bus_context;
  unsigned int function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  tme_uint32_t linear_address_first = ic->tme_m68k_ireg_a7 - sizeof(tme_uint32_t);
  tme_uint32_t linear_address_last = linear_address_first + sizeof(tme_uint32_t) - 1;
  struct tme_m68k_tlb *tlb = TME_M68K_DTLB_ENTRY(ic, bus_context, function_code, linear_address_first);
  tme_uint32_t mem_value;
  tme_shared tme_uint32_t *mem;

#ifdef _TME_M68K_STATS
  ic->tme_m68k_stats.tme_m68k_stats_memory_total++;
#endif /* _TME_M68K_STATS */

  /* log the value written: */
  tme_m68k_verify_mem32(ic, function_code, linear_address_first, value, TME_BUS_CYCLE_WRITE);
  tme_m68k_log(ic, 1000, TME_OK, 
               (TME_M68K_LOG_HANDLE(ic),
                _("push32\t%d:0x%08x:\t0x%08x"),
                function_code,
                linear_address_first,
                value));

  /* busy this TLB entry: */
  tme_m68k_tlb_busy(tlb);

  /* if we aren't restarting, and this address is properly aligned,
     and this TLB entry covers the operand and allows fast writes: */
  if (__tme_predict_true(!TME_M68K_SEQUENCE_RESTARTING
                         && (ic->_tme_m68k_bus_16bit & linear_address_first) == 0
                         && tme_m68k_tlb_is_valid(tlb)
                         && tlb->tme_m68k_tlb_bus_context == bus_context
                         && (tlb->tme_m68k_tlb_function_codes_mask
                             & TME_BIT(function_code))
                         && linear_address_first >= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_first
                         && linear_address_last <= (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last
                         && tlb->tme_m68k_tlb_emulator_off_write != TME_EMULATOR_OFF_UNDEF)) {

    /* make the emulator memory pointer: */
    mem = (tme_shared tme_uint32_t *) (tlb->tme_m68k_tlb_emulator_off_write + linear_address_first);

    /* get the value to write, in big-endian byte order: */
    mem_value = tme_htobe_u32(value);

    /* do the 32-bit bus write: */
    tme_memory_bus_write32(mem, mem_value, tlb->tme_m68k_tlb_bus_rwlock, sizeof(tme_uint8_t), sizeof(tme_uint32_t));

    /* step the transfer count: */
    TME_M68K_SEQUENCE_TRANSFER_STEP;
  }

  /* otherwise, do the bus cycles the slow way: */
  else {
    tme_m68k_write32(ic, tlb,
                    &function_code,
                    &linear_address_first,
                    &value,
                    TME_M68K_BUS_CYCLE_NORMAL);
  }

  /* unbusy this TLB entry: */
  tme_m68k_tlb_unbusy(tlb);
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    ic->tme_m68k_ireg_a7 -= sizeof(tme_uint32_t);
  }
}

/* this reads a any-bit mem value: */
void
tme_m68k_read_mem(struct tme_m68k *ic, tme_uint8_t *buffer, unsigned int count) 
{
  tme_bus_context_t bus_context = ic->_tme_m68k_bus_context;
  unsigned int function_code = ic->_tme_m68k_ea_function_code;
  tme_uint32_t linear_address_first = ic->_tme_m68k_ea_address;
  struct tme_m68k_tlb *tlb = TME_M68K_DTLB_ENTRY(ic, bus_context, function_code, linear_address_first);

#ifdef _TME_M68K_STATS
  ic->tme_m68k_stats.tme_m68k_stats_memory_total++;
#endif /* _TME_M68K_STATS */

  /* busy this TLB entry: */
  tme_m68k_tlb_busy(tlb);

  /* call the full read function: */
  tme_m68k_read(ic, tlb, &ic->_tme_m68k_ea_function_code, &ic->_tme_m68k_ea_address, buffer, count, TME_M68K_BUS_CYCLE_RAW);

  /* unbusy this TLB entry: */
  tme_m68k_tlb_unbusy(tlb);

  /* log the value read: */
  tme_m68k_verify_mem_any(ic, ic->_tme_m68k_ea_function_code, ic->_tme_m68k_ea_address, buffer, count, TME_BUS_CYCLE_READ);
  tme_m68k_log_start(ic, 1000, TME_OK) {
    unsigned int byte_i;
    tme_log_part(TME_M68K_LOG_HANDLE(ic),
                 _("read_mem %d:0x%08x count %d:"),
                 ic->_tme_m68k_ea_function_code,
                 ic->_tme_m68k_ea_address,
                 count);
    for (byte_i = 0; byte_i < count ; byte_i++) {
      tme_log_part(TME_M68K_LOG_HANDLE(ic), " 0x%02x", (buffer)[byte_i]);
    }
  } tme_m68k_log_finish(ic);
}

/* this reads a region of address space using actual bus cycles: */
void
tme_m68k_read(struct tme_m68k *ic, 
              struct tme_m68k_tlb *tlb,
              unsigned int *_function_code, 
              tme_uint32_t *_linear_address, 
              tme_uint8_t *reg,
              unsigned int reg_size,
              unsigned int flags)
{
  unsigned int function_code;
  tme_uint32_t linear_address;
  tme_bus_addr_t physical_address;
  int shift;
  struct tme_bus_cycle cycle;
  unsigned int transferred, resid, cycle_size;
  int exception;
  int err;
  tme_uint8_t *reg_p;
  unsigned int buffer_i;
  tme_uint8_t reg_buffer[sizeof(tme_uint32_t) * 2];
  const tme_shared tme_uint8_t *mem;

  /* if we're not restarting, everything is fresh: */
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    function_code = *_function_code;
    linear_address = *_linear_address;
    transferred = 0;
  }

  /* otherwise, if this is the transfer that faulted, restore
     our state to the cycle that faulted, then take into account
     any data provided by a software rerun of the faulted cycle: */
  else if (ic->_tme_m68k_sequence._tme_m68k_sequence_transfer_faulted
           == ic->_tme_m68k_sequence._tme_m68k_sequence_transfer_next) {
    function_code = *_function_code = ic->_tme_m68k_group0_function_code;
    linear_address = ic->_tme_m68k_group0_address;
    transferred = ic->_tme_m68k_sequence._tme_m68k_sequence_transfer_faulted_after;
    if (transferred >= reg_size) abort();
    *_linear_address = linear_address - transferred;
    resid = reg_size - transferred;
    if (ic->_tme_m68k_group0_buffer_read_size > resid) abort();
    if (ic->_tme_m68k_group0_buffer_read_softrr > resid) abort();
    if (ic->_tme_m68k_group0_buffer_read_softrr > 0) {
#ifdef WORDS_BIGENDIAN
      memcpy(reg + transferred, 
             ic->_tme_m68k_group0_buffer_read,
             ic->_tme_m68k_group0_buffer_read_size);
#else  /* !WORDS_BIGENDIAN */
      reg_p = (reg + reg_size - 1) - transferred;
      for (buffer_i = 0;
           buffer_i < ic->_tme_m68k_group0_buffer_read_size;
           buffer_i++) {
        *(reg_p--) = ic->_tme_m68k_group0_buffer_read[buffer_i];
      }
#endif /* !WORDS_BIGENDIAN */
    }
    transferred += ic->_tme_m68k_group0_buffer_read_softrr;
  }

  /* otherwise, a later transfer has faulted.  just step the
     transfer number and return: */
  else {
    TME_M68K_SEQUENCE_TRANSFER_STEP;
    return;
  }

  /* do as many bus cycles as needed to complete the transfer: */
  exception = TME_M68K_EXCEPTION_NONE;
  cycle_size = 0;
  for(; transferred < reg_size; ) {
    resid = reg_size - transferred;

    /* start the bus cycle structure: */
    cycle.tme_bus_cycle_type = TME_BUS_CYCLE_READ;
    if (TME_ENDIAN_NATIVE == TME_ENDIAN_BIG
        || (flags & TME_M68K_BUS_CYCLE_RAW)) {
      cycle.tme_bus_cycle_buffer = reg + transferred;
      cycle.tme_bus_cycle_buffer_increment = 1;
    }
    else {
      cycle.tme_bus_cycle_buffer = reg + reg_size - (1 + transferred);
      cycle.tme_bus_cycle_buffer_increment = -1;
    }

    /* if we're emulating a CPU with a 16-bit bus interface: */
    if (ic->_tme_m68k_bus_16bit) {

      /* if we're trying to transfer a non-power-of-two
         number of bytes, either the CPU is broken (no
         instructions ever transfer a non-power-of-two
         number of bytes), or this function allowed an
         unaligned transfer: */
      assert((resid & (resid - 1)) == 0
             || (flags & TME_M68K_BUS_CYCLE_RAW));

      /* only byte transfers can be unaligned: */
      if (resid > sizeof(tme_uint8_t)
          && (linear_address & 1)) {
          exception = TME_M68K_EXCEPTION_AERR;
          break;
      }

      /* set the bus-size specific parts of the bus cycle structure: */
      cycle_size = TME_MIN(resid, sizeof(tme_uint16_t));
      cycle.tme_bus_cycle_size = cycle_size;
      cycle.tme_bus_cycle_port = TME_BUS_CYCLE_PORT(0, TME_BUS16_LOG2);
      cycle.tme_bus_cycle_lane_routing = 
        &tme_m68k_router_16[TME_M68K_BUS_ROUTER_INDEX(TME_BUS16_LOG2, cycle_size, linear_address)];
    }

    /* otherwise we're emulating a CPU with a 32-bit bus interface: */
    else {

      /* an instruction fetch must be aligned: */
      if (flags & TME_M68K_BUS_CYCLE_FETCH) {
        if (linear_address & 1) {
          exception = TME_M68K_EXCEPTION_AERR;
          break;
        }
        assert(!(resid & 1));
      }

      /* set the bus-size specific parts of the bus cycle structure: */
      cycle_size = TME_MIN(resid, sizeof(tme_uint32_t) - (linear_address & (sizeof(tme_uint32_t) - 1)));
      cycle.tme_bus_cycle_size = cycle_size;
      cycle.tme_bus_cycle_port = TME_BUS_CYCLE_PORT(0, TME_BUS32_LOG2);
      cycle.tme_bus_cycle_lane_routing = 
        &tme_m68k_router_32[TME_M68K_BUS_ROUTER_INDEX(TME_BUS32_LOG2, cycle_size, linear_address)];
    }

    /* loop while this TLB entry is invalid or does not apply: */
    for (; __tme_predict_false(tme_m68k_tlb_is_invalid(tlb)
                               || tlb->tme_m68k_tlb_bus_context != ic->_tme_m68k_bus_context
                               || (tlb->tme_m68k_tlb_function_codes_mask & TME_BIT(function_code)) == 0
                               || linear_address < (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_first
                               || linear_address > (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last
                               || (tlb->tme_m68k_tlb_emulator_off_read == TME_EMULATOR_OFF_UNDEF
                                   && (tlb->tme_m68k_tlb_cycles_ok & TME_BUS_CYCLE_READ) == 0)); ) {

      /* this must not be part of a read/modify/write cycle: */
      assert(!(flags & TME_M68K_BUS_CYCLE_RMW));

      /* fill this TLB entry: */
      tme_m68k_tlb_fill(ic, tlb,
                        function_code,
                        linear_address,
                        TME_BUS_CYCLE_READ);
    }

    /* if this TLB entry allows for fast reads: */
    mem = tlb->tme_m68k_tlb_emulator_off_read;
    if (__tme_predict_true(mem != TME_EMULATOR_OFF_UNDEF)) {

      /* make the emulator memory pointer: */
      mem += linear_address;

      /* limit the cycle size to addresses covered by the TLB entry: */
      if (__tme_predict_false((cycle_size - 1)
                              > (((tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last) - linear_address))) {
        cycle_size = (((tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last) - linear_address) + 1;
      }

      /* if this is a little-endian host, and this isn't a raw read: */
      if (TME_ENDIAN_NATIVE == TME_ENDIAN_LITTLE
          && (flags & TME_M68K_BUS_CYCLE_RAW) == 0) {

        /* use the intermediate buffer for the read: */
        cycle.tme_bus_cycle_buffer = &reg_buffer[0];
      }

      /* do the bus read: */
      tme_memory_bus_read_buffer(mem,
                                 cycle.tme_bus_cycle_buffer,
                                 cycle_size,
                                 tlb->tme_m68k_tlb_bus_rwlock,
                                 sizeof(tme_uint8_t),
                                 sizeof(tme_uint32_t));

      /* if this is a little-endian host, and this isn't a raw read: */
      if (TME_ENDIAN_NATIVE == TME_ENDIAN_LITTLE
          && (flags & TME_M68K_BUS_CYCLE_RAW) == 0) {

        /* byteswap the read data in the intermediate buffer: */
        reg_p = reg + reg_size - (1 + transferred);
        buffer_i = 0;
        do {
          *(reg_p--) = reg_buffer[buffer_i];
        } while (++buffer_i != cycle_size);
      }

      /* update: */
      linear_address += cycle_size;
      transferred += cycle_size;
      continue;
    }

    /* otherwise, this TLB entry does not allow for fast reads: */

    /* if this is a part of a read/modify/write cycle: */
    if (flags & TME_M68K_BUS_CYCLE_RMW) {

      /* if this is the first cycle in this read,
         we will establish the new lock, otherwise
         we will continue using the existing lock: */
      cycle.tme_bus_cycle_type
        |= (TME_BUS_CYCLE_LOCK
            | (transferred == 0 ? 0 : TME_BUS_CYCLE_UNLOCK));
    }

    /* form the physical address for the bus cycle handler: */
    physical_address = tlb->tme_m68k_tlb_addr_offset + linear_address;
    shift = tlb->tme_m68k_tlb_addr_shift;
    if (shift < 0) {
      physical_address <<= (0 - shift);
    }
    else if (shift > 0) {
      physical_address >>= shift;
    }
    cycle.tme_bus_cycle_address = physical_address;

    /* run the bus cycle: */
    tme_m68k_tlb_unbusy(tlb);
    tme_m68k_callout_unlock(ic);
    err = (*tlb->tme_m68k_tlb_bus_tlb.tme_bus_tlb_cycle)
         (tlb->tme_m68k_tlb_bus_tlb.tme_bus_tlb_cycle_private, &cycle);
    tme_m68k_callout_relock(ic);
    tme_m68k_tlb_busy(tlb);

    /* if the TLB entry was invalidated before the read: */
    if (err == EBADF
        && tme_m68k_tlb_is_invalid(tlb)) {
      cycle.tme_bus_cycle_size = 0;
    }

    /* otherwise, if we didn't get a bus error, but some
       synchronous event has happened: */
    else if (err == TME_BUS_CYCLE_SYNCHRONOUS_EVENT) {

      /* after the currently executing instruction finishes, check
         for external resets, halts, or interrupts: */
      ic->_tme_m68k_instruction_burst_remaining = 0;
    }

    /* otherwise, any other error might be a bus error: */
    else if (err != TME_OK) {
      err = tme_bus_tlb_fault(&tlb->tme_m68k_tlb_bus_tlb, &cycle, err);
      if (err != TME_OK) {
        exception = TME_M68K_EXCEPTION_BERR;
        break;
      }
    }

    /* update: */
    linear_address += cycle.tme_bus_cycle_size;
    transferred += cycle.tme_bus_cycle_size;
  }

  /* NB: there is no need to explicitly unlock
     a device.  if a locked bus cycle to a device
     faults, the lock must be automatically unlocked: */

  /* if we faulted, stash the information the fault stacker
     will need and start exception processing: */
  if (exception != TME_M68K_EXCEPTION_NONE) {
    ic->_tme_m68k_group0_flags = flags | TME_M68K_BUS_CYCLE_READ;
    ic->_tme_m68k_group0_function_code = function_code;
    ic->_tme_m68k_group0_address = linear_address;
    ic->_tme_m68k_group0_sequence = ic->_tme_m68k_sequence;
    ic->_tme_m68k_group0_sequence._tme_m68k_sequence_transfer_faulted_after = transferred;
    ic->_tme_m68k_group0_buffer_read_size = cycle_size;
    if (ic->_tme_m68k_group0_hook != NULL) {
      (*ic->_tme_m68k_group0_hook)(ic);
    }
    ic->_tme_m68k_group0_sequence._tme_m68k_sequence_transfer_faulted = 
      ic->_tme_m68k_group0_sequence._tme_m68k_sequence_transfer_next;
    tme_m68k_tlb_unbusy(tlb);
    tme_m68k_exception(ic, exception);
  }

  /* otherwise, this transfer has now completed: */
  TME_M68K_SEQUENCE_TRANSFER_STEP;
}

/* this writes a any-bit mem value: */
void
tme_m68k_write_mem(struct tme_m68k *ic, tme_uint8_t *buffer, unsigned int count) 
{
  tme_bus_context_t bus_context = ic->_tme_m68k_bus_context;
  unsigned int function_code = ic->_tme_m68k_ea_function_code;
  tme_uint32_t linear_address_first = ic->_tme_m68k_ea_address;
  struct tme_m68k_tlb *tlb = TME_M68K_DTLB_ENTRY(ic, bus_context, function_code, linear_address_first);

#ifdef _TME_M68K_STATS
  ic->tme_m68k_stats.tme_m68k_stats_memory_total++;
#endif /* _TME_M68K_STATS */

  /* log the value written: */
  tme_m68k_verify_mem_any(ic, ic->_tme_m68k_ea_function_code, ic->_tme_m68k_ea_address, buffer, count, TME_BUS_CYCLE_WRITE);
  tme_m68k_log_start(ic, 1000, TME_OK) {
    unsigned int byte_i;
    tme_log_part(TME_M68K_LOG_HANDLE(ic),
                 _("write_mem %d:0x%08x count %d:"),
                 ic->_tme_m68k_ea_function_code,
                 ic->_tme_m68k_ea_address,
                 count);
    for (byte_i = 0; byte_i < count ; byte_i++) {
      tme_log_part(TME_M68K_LOG_HANDLE(ic), " 0x%02x", (buffer)[byte_i]);
    }
  } tme_m68k_log_finish(ic);

  /* busy this TLB entry: */
  tme_m68k_tlb_busy(tlb);

  /* call the full write function: */
  tme_m68k_write(ic, tlb, &ic->_tme_m68k_ea_function_code, &ic->_tme_m68k_ea_address, buffer, count, TME_M68K_BUS_CYCLE_RAW);

  /* unbusy this TLB entry: */
  tme_m68k_tlb_unbusy(tlb);
}

/* this writes a region of address space using actual bus cycles: */
void
tme_m68k_write(struct tme_m68k *ic, 
              struct tme_m68k_tlb *tlb,
              unsigned int *_function_code, 
              tme_uint32_t *_linear_address, 
              tme_uint8_t *reg,
              unsigned int reg_size,
              unsigned int flags)
{
  unsigned int function_code;
  tme_uint32_t linear_address;
  tme_bus_addr_t physical_address;
  int shift;
  struct tme_bus_cycle cycle;
  unsigned int transferred, resid, cycle_size;
  int exception;
  int err;
  tme_uint8_t *reg_p;
  unsigned int buffer_i;
  tme_uint8_t reg_buffer[sizeof(tme_uint32_t) * 2];
  tme_shared tme_uint8_t *mem;

  /* if we're not restarting, everything is fresh: */
  if (!TME_M68K_SEQUENCE_RESTARTING) {
    function_code = *_function_code;
    linear_address = *_linear_address;
    transferred = 0;
  }

  /* otherwise, if this is the transfer that faulted, restore
     our state to the cycle that faulted, then take into account
     any data provided by a software rerun of the faulted cycle: */
  else if (ic->_tme_m68k_sequence._tme_m68k_sequence_transfer_faulted
           == ic->_tme_m68k_sequence._tme_m68k_sequence_transfer_next) {
    function_code = *_function_code = ic->_tme_m68k_group0_function_code;
    linear_address = ic->_tme_m68k_group0_address;
    transferred = ic->_tme_m68k_sequence._tme_m68k_sequence_transfer_faulted_after;
    if (transferred >= reg_size) abort();
    *_linear_address = linear_address - transferred;
    resid = reg_size - transferred;
    if (ic->_tme_m68k_group0_buffer_write_size > resid) abort();
    if (ic->_tme_m68k_group0_buffer_write_softrr > resid) abort();
    if (ic->_tme_m68k_group0_buffer_write_softrr == 0) {
#ifdef WORDS_BIGENDIAN
      memcpy(reg + transferred, 
             ic->_tme_m68k_group0_buffer_write,
             ic->_tme_m68k_group0_buffer_write_size);
#else  /* !WORDS_BIGENDIAN */
      reg_p = (reg + reg_size - 1) - transferred;
      for (buffer_i = 0;
           buffer_i < ic->_tme_m68k_group0_buffer_write_size;
           buffer_i++) {
        *(reg_p--) = ic->_tme_m68k_group0_buffer_write[buffer_i];
      }
#endif /* !WORDS_BIGENDIAN */
    }
    transferred += ic->_tme_m68k_group0_buffer_write_softrr;
  }

  /* otherwise, a later transfer has faulted.  just step the
     transfer number and return: */
  else {
    TME_M68K_SEQUENCE_TRANSFER_STEP;
    return;
  }

  /* do as many bus cycles as needed to complete the transfer: */
  exception = TME_M68K_EXCEPTION_NONE;
  cycle_size = 0;
  for(; transferred < reg_size; ) {
    resid = reg_size - transferred;

    /* start the bus cycle structure: */
    cycle.tme_bus_cycle_type = TME_BUS_CYCLE_WRITE;
    if (TME_ENDIAN_NATIVE == TME_ENDIAN_BIG
        || (flags & TME_M68K_BUS_CYCLE_RAW)) {
      cycle.tme_bus_cycle_buffer = reg + transferred;
      cycle.tme_bus_cycle_buffer_increment = 1;
    }
    else {
      cycle.tme_bus_cycle_buffer = reg + reg_size - (1 + transferred);
      cycle.tme_bus_cycle_buffer_increment = -1;
    }

    /* if we're emulating a CPU with a 16-bit bus interface: */
    if (ic->_tme_m68k_bus_16bit) {

      /* if we're trying to transfer a non-power-of-two
         number of bytes, either the CPU is broken (no
         instructions ever transfer a non-power-of-two
         number of bytes), or this function allowed an
         unaligned transfer: */
      assert((resid & (resid - 1)) == 0
             || (flags & TME_M68K_BUS_CYCLE_RAW));

      /* only byte transfers can be unaligned: */
      if (resid > sizeof(tme_uint8_t)
          && (linear_address & 1)) {
          exception = TME_M68K_EXCEPTION_AERR;
          break;
      }

      /* set the bus-size specific parts of the bus cycle structure: */
      cycle_size = TME_MIN(resid, sizeof(tme_uint16_t));
      cycle.tme_bus_cycle_size = cycle_size;
      cycle.tme_bus_cycle_port = TME_BUS_CYCLE_PORT(0, TME_BUS16_LOG2);
      cycle.tme_bus_cycle_lane_routing = 
        &tme_m68k_router_16[TME_M68K_BUS_ROUTER_INDEX(TME_BUS16_LOG2, cycle_size, linear_address)];
    }

    /* otherwise we're emulating a CPU with a 32-bit bus interface: */
    else {

      /* set the bus-size specific parts of the bus cycle structure: */
      cycle_size = TME_MIN(resid, sizeof(tme_uint32_t) - (linear_address & (sizeof(tme_uint32_t) - 1)));
      cycle.tme_bus_cycle_size = cycle_size;
      cycle.tme_bus_cycle_port = TME_BUS_CYCLE_PORT(0, TME_BUS32_LOG2);
      cycle.tme_bus_cycle_lane_routing = 
        &tme_m68k_router_32[TME_M68K_BUS_ROUTER_INDEX(TME_BUS32_LOG2, cycle_size, linear_address)];
    }

    /* loop while this TLB entry is invalid or does not apply: */
    for (; __tme_predict_false(tme_m68k_tlb_is_invalid(tlb)
                               || tlb->tme_m68k_tlb_bus_context != ic->_tme_m68k_bus_context
                               || (tlb->tme_m68k_tlb_function_codes_mask & TME_BIT(function_code)) == 0
                               || linear_address < (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_first
                               || linear_address > (tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last
                               || (tlb->tme_m68k_tlb_emulator_off_write == TME_EMULATOR_OFF_UNDEF
                                   && (tlb->tme_m68k_tlb_cycles_ok & TME_BUS_CYCLE_WRITE) == 0)); ) {

      /* this must not be part of a read/modify/write cycle: */
      assert(!(flags & TME_M68K_BUS_CYCLE_RMW));

      /* fill this TLB entry: */
      tme_m68k_tlb_fill(ic, tlb,
                        function_code,
                        linear_address,
                        TME_BUS_CYCLE_WRITE);
    }

    /* if this TLB entry allows for fast writes: */
    mem = tlb->tme_m68k_tlb_emulator_off_write;
    if (__tme_predict_true(mem != TME_EMULATOR_OFF_UNDEF)) {

      /* make the emulator memory pointer: */
      mem += linear_address;

      /* limit the cycle size to addresses covered by the TLB entry: */
      if (__tme_predict_false((cycle_size - 1)
                              > (((tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last) - linear_address))) {
        cycle_size = (((tme_bus_addr32_t) tlb->tme_m68k_tlb_linear_last) - linear_address) + 1;
      }

      /* if this is a little-endian host, and this isn't a raw write: */
      if (TME_ENDIAN_NATIVE == TME_ENDIAN_LITTLE
          && (flags & TME_M68K_BUS_CYCLE_RAW) == 0) {

        /* byteswap the data to write in the intermediate buffer: */
        reg_p = cycle.tme_bus_cycle_buffer;
        buffer_i = 0;
        do {
          reg_buffer[buffer_i] = *(reg_p--);
        } while (++buffer_i != cycle_size);

        /* use the intermediate buffer for the write: */
        cycle.tme_bus_cycle_buffer = &reg_buffer[0];
      }

      /* do the bus write: */
      tme_memory_bus_write_buffer(mem,
                                 cycle.tme_bus_cycle_buffer,
                                 cycle_size,
                                 tlb->tme_m68k_tlb_bus_rwlock,
                                 sizeof(tme_uint8_t),
                                 sizeof(tme_uint32_t));

      /* update: */
      linear_address += cycle_size;
      transferred += cycle_size;
      continue;
    }

    /* otherwise, this TLB entry does not allow for fast writes: */

    /* if this is a part of a read/modify/write cycle: */
    if (flags & TME_M68K_BUS_CYCLE_RMW) {

      /* we will continue using the existing lock.
         the device will automatically unlock after
         the last cycle of this write: */
      cycle.tme_bus_cycle_type
        |= (TME_BUS_CYCLE_LOCK
            | (TME_BUS_CYCLE_UNLOCK));
    }

    /* form the physical address for the bus cycle handler: */
    physical_address = tlb->tme_m68k_tlb_addr_offset + linear_address;
    shift = tlb->tme_m68k_tlb_addr_shift;
    if (shift < 0) {
      physical_address <<= (0 - shift);
    }
    else if (shift > 0) {
      physical_address >>= shift;
    }
    cycle.tme_bus_cycle_address = physical_address;

    /* run the bus cycle: */
    tme_m68k_tlb_unbusy(tlb);
    tme_m68k_callout_unlock(ic);
    err = (*tlb->tme_m68k_tlb_bus_tlb.tme_bus_tlb_cycle)
         (tlb->tme_m68k_tlb_bus_tlb.tme_bus_tlb_cycle_private, &cycle);
    tme_m68k_callout_relock(ic);
    tme_m68k_tlb_busy(tlb);

    /* if the TLB entry was invalidated before the write: */
    if (err == EBADF
        && tme_m68k_tlb_is_invalid(tlb)) {
      cycle.tme_bus_cycle_size = 0;
    }

    /* otherwise, if we didn't get a bus error, but some
       synchronous event has happened: */
    else if (err == TME_BUS_CYCLE_SYNCHRONOUS_EVENT) {

      /* after the currently executing instruction finishes, check
         for external resets, halts, or interrupts: */
      ic->_tme_m68k_instruction_burst_remaining = 0;
    }

    /* otherwise, any other error might be a bus error: */
    else if (err != TME_OK) {
      err = tme_bus_tlb_fault(&tlb->tme_m68k_tlb_bus_tlb, &cycle, err);
      if (err != TME_OK) {
        exception = TME_M68K_EXCEPTION_BERR;
        break;
      }
    }

    /* update: */
    linear_address += cycle.tme_bus_cycle_size;
    transferred += cycle.tme_bus_cycle_size;
  }

  /* NB: there is no need to explicitly unlock
     a device.  if a locked bus cycle to a device
     faults, the lock must be automatically unlocked: */

  /* if we faulted, stash the information the fault stacker
     will need and start exception processing: */
  if (exception != TME_M68K_EXCEPTION_NONE) {
    ic->_tme_m68k_group0_flags = flags;
    ic->_tme_m68k_group0_function_code = function_code;
    ic->_tme_m68k_group0_address = linear_address;
    ic->_tme_m68k_group0_sequence = ic->_tme_m68k_sequence;
    ic->_tme_m68k_group0_sequence._tme_m68k_sequence_transfer_faulted_after = transferred;
    ic->_tme_m68k_group0_buffer_write_size = cycle_size;
#ifdef WORDS_BIGENDIAN
    memcpy(ic->_tme_m68k_group0_buffer_write,
           reg + transferred,
           ic->_tme_m68k_group0_buffer_write_size);
#else  /* !WORDS_BIGENDIAN */
      reg_p = (reg + reg_size - 1) - transferred;
      for (buffer_i = 0;
           buffer_i < ic->_tme_m68k_group0_buffer_write_size;
           buffer_i++) {
        ic->_tme_m68k_group0_buffer_write[buffer_i] = *(reg_p--);
      }
#endif /* !WORDS_BIGENDIAN */
    if (ic->_tme_m68k_group0_hook != NULL) {
      (*ic->_tme_m68k_group0_hook)(ic);
    }
    ic->_tme_m68k_group0_sequence._tme_m68k_sequence_transfer_faulted = 
      ic->_tme_m68k_group0_sequence._tme_m68k_sequence_transfer_next;
    tme_m68k_tlb_unbusy(tlb);
    tme_m68k_exception(ic, exception);
  }

  /* otherwise, this transfer has now completed: */
  TME_M68K_SEQUENCE_TRANSFER_STEP;
}

TME_M68K_INSN(tme_m68k_abcd)
{
  tme_uint8_t dst, dst_msd, dst_lsd;
  tme_uint8_t src, src_msd, src_lsd;
  tme_uint8_t res, res_msd, res_lsd;
  tme_uint8_t flags;
  int memory;
  int rx, ry, function_code;

  /* load the operands: */
  rx = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 0, 3);
  ry = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 9, 3);
  memory = (TME_M68K_INSN_OPCODE & TME_BIT(3)) != 0;
  function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  if (memory) {
    TME_M68K_INSN_CANFAULT;
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + rx) -= sizeof(tme_uint8_t) + ((rx + 1) >> 3);
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + rx);
    }
    tme_m68k_read_memx8(ic);
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ry) -= sizeof(tme_uint8_t) + ((ry + 1) >> 3);
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ry);
    }
    tme_m68k_read_mem8(ic, TME_M68K_IREG_MEMY32);
    src = ic->tme_m68k_ireg_memx8;
    dst = ic->tme_m68k_ireg_memy8;
  }
  else {
    src = ic->tme_m68k_ireg_uint8(rx << 2);
    dst = ic->tme_m68k_ireg_uint8(ry << 2);
  }
  dst_lsd = TME_FIELD_EXTRACTU(dst, 0, 4);
  dst_msd = TME_FIELD_EXTRACTU(dst, 4, 4);
  src_lsd = TME_FIELD_EXTRACTU(src, 0, 4);
  src_msd = TME_FIELD_EXTRACTU(src, 4, 4);

  /* perform the operation: */
  res_lsd = dst_lsd + src_lsd + ((ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X) != 0);
  res_msd = dst_msd + src_msd;
  flags = 0;
  if (res_lsd > 9) {
    res_lsd -= 10;
    res_msd += 1;
  }
  if (res_msd > 9) {
    res_msd -= 10;
    flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  }
  res = (res_msd << 4) + (res_lsd & 0xf);
  if (res == 0) flags |= TME_M68K_FLAG_N;

  /* store the result and set the flags: */
  if (memory) {
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_memx8 = res;
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ry);
      ic->tme_m68k_ireg_ccr = flags;
     }
     tme_m68k_write_memx8(ic);
  }
  else {
    ic->tme_m68k_ireg_uint8(ry << 2) = res;
    ic->tme_m68k_ireg_ccr = flags;
  }

  TME_M68K_INSN_OK;
}

TME_M68K_INSN(tme_m68k_sbcd)
{
  tme_uint8_t dst, dst_msd, dst_lsd;
  tme_uint8_t src, src_msd, src_lsd;
  tme_uint8_t res, res_msd, res_lsd;
  tme_uint8_t flags;
  int memory;
  int rx, ry, function_code;

  /* load the operands: */
  rx = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 0, 3);
  ry = TME_FIELD_EXTRACTU(TME_M68K_INSN_OPCODE, 9, 3);
  memory = (TME_M68K_INSN_OPCODE & TME_BIT(3)) != 0;
  function_code = TME_M68K_FUNCTION_CODE_DATA(ic);
  if (memory) {
    TME_M68K_INSN_CANFAULT;
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + rx) -= sizeof(tme_uint8_t) + ((rx + 1) >> 3);
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + rx);
    }
    tme_m68k_read_memx8(ic);
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ry) -= sizeof(tme_uint8_t) + ((ry + 1) >> 3);
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ry);
    }
    tme_m68k_read_mem8(ic, TME_M68K_IREG_MEMY32);
    src = ic->tme_m68k_ireg_memx8;
    dst = ic->tme_m68k_ireg_memy8;
  }
  else {
    src = ic->tme_m68k_ireg_uint8(rx << 2);
    dst = ic->tme_m68k_ireg_uint8(ry << 2);
  }
  dst_lsd = TME_FIELD_EXTRACTU(dst, 0, 4);
  dst_msd = TME_FIELD_EXTRACTU(dst, 4, 4);
  src_lsd = TME_FIELD_EXTRACTU(src, 0, 4);
  src_msd = TME_FIELD_EXTRACTU(src, 4, 4);

  /* perform the operation: */
  res_lsd = dst_lsd - src_lsd - ((ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X) != 0);
  res_msd = dst_msd - src_msd;
  flags = 0;
  if (res_lsd > 9) {
    res_lsd += 10;
    res_msd -= 1;
  }
  if (res_msd > 9) {
    res_msd += 10;
    flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  }
  res = (res_msd << 4) + (res_lsd & 0xf);
  if (res == 0) flags |= TME_M68K_FLAG_N;

  /* store the result and set the flags: */
  if (memory) {
    if (!TME_M68K_SEQUENCE_RESTARTING) {
      ic->tme_m68k_ireg_memx8 = res;
      ic->_tme_m68k_ea_function_code = function_code;
      ic->_tme_m68k_ea_address = ic->tme_m68k_ireg_uint32(TME_M68K_IREG_A0 + ry);
      ic->tme_m68k_ireg_ccr = flags;
     }
     tme_m68k_write_memx8(ic);
  }
  else {
    ic->tme_m68k_ireg_uint8(ry << 2) = res;
    ic->tme_m68k_ireg_ccr = flags;
  }

  TME_M68K_INSN_OK;
}

TME_M68K_INSN(tme_m68k_nbcd)
{
  tme_uint8_t dst, dst_msd, dst_lsd;
  tme_uint8_t src, src_msd, src_lsd;
  tme_uint8_t res, res_msd, res_lsd;
  tme_uint8_t flags;

  dst = 0x00;
  src = TME_M68K_INSN_OP1(tme_uint8_t);
  dst_lsd = TME_FIELD_EXTRACTU(dst, 0, 4);
  dst_msd = TME_FIELD_EXTRACTU(dst, 4, 4);
  src_lsd = TME_FIELD_EXTRACTU(src, 0, 4);
  src_msd = TME_FIELD_EXTRACTU(src, 4, 4);

  /* perform the operation: */
  res_lsd = dst_lsd - src_lsd - ((ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X) != 0);
  res_msd = dst_msd - src_msd;
  flags = 0;
  if (res_lsd > 9) {
    res_lsd += 10;
    res_msd -= 1;
  }
  if (res_msd > 9) {
    res_msd += 10;
    flags |= TME_M68K_FLAG_C | TME_M68K_FLAG_X;
  }
  res = (res_msd << 4) + (res_lsd & 0xf);
  if (res == 0) flags |= TME_M68K_FLAG_N;

  /* store the result and set the flags: */
  TME_M68K_INSN_OP1(tme_uint8_t) = res;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

TME_M68K_INSN(tme_m68k_ori_ccr)
{
  tme_uint8_t reg;
  reg = ic->tme_m68k_ireg_ccr | (TME_M68K_INSN_OP0(tme_uint8_t) & TME_M68K_FLAG_CCR);
  ic->tme_m68k_ireg_ccr = reg;
  TME_M68K_INSN_OK;
}

TME_M68K_INSN(tme_m68k_andi_ccr)
{
  tme_uint8_t reg;
  reg = ic->tme_m68k_ireg_ccr & (TME_M68K_INSN_OP0(tme_uint8_t) & TME_M68K_FLAG_CCR);
  ic->tme_m68k_ireg_ccr = reg;
  TME_M68K_INSN_OK;
}

TME_M68K_INSN(tme_m68k_eori_ccr)
{
  tme_uint8_t reg;
  reg = ic->tme_m68k_ireg_ccr ^ (TME_M68K_INSN_OP0(tme_uint8_t) & TME_M68K_FLAG_CCR);
  ic->tme_m68k_ireg_ccr = reg;
  TME_M68K_INSN_OK;
}

TME_M68K_INSN(tme_m68k_move_to_ccr)
{
  tme_uint8_t reg;
  reg = (TME_M68K_INSN_OP1(tme_uint16_t) & TME_M68K_FLAG_CCR);
  ic->tme_m68k_ireg_ccr = reg;
  TME_M68K_INSN_OK;
}

TME_M68K_INSN(tme_m68k_ori_sr)
{
  tme_uint16_t reg;
  reg = ic->tme_m68k_ireg_sr | (TME_M68K_INSN_OP0(tme_uint16_t) & TME_M68K_FLAG_SR);
  TME_M68K_INSN_PRIV;
  TME_M68K_INSN_CHANGE_SR(reg);
  TME_M68K_INSN_OK;
}

TME_M68K_INSN(tme_m68k_andi_sr)
{
  tme_uint16_t reg;
  reg = ic->tme_m68k_ireg_sr & (TME_M68K_INSN_OP0(tme_uint16_t) & TME_M68K_FLAG_SR);
  TME_M68K_INSN_PRIV;
  TME_M68K_INSN_CHANGE_SR(reg);
  TME_M68K_INSN_OK;
}

TME_M68K_INSN(tme_m68k_eori_sr)
{
  tme_uint16_t reg;
  reg = ic->tme_m68k_ireg_sr ^ (TME_M68K_INSN_OP0(tme_uint16_t) & TME_M68K_FLAG_SR);
  TME_M68K_INSN_PRIV;
  TME_M68K_INSN_CHANGE_SR(reg);
  TME_M68K_INSN_OK;
}

TME_M68K_INSN(tme_m68k_move_to_sr)
{
  tme_uint16_t reg;
  reg = (TME_M68K_INSN_OP1(tme_uint16_t) & TME_M68K_FLAG_SR);
  TME_M68K_INSN_PRIV;
  TME_M68K_INSN_CHANGE_SR(reg);
  TME_M68K_INSN_OK;
}

TME_M68K_INSN(tme_m68k_mulu)
{
  int ireg_dl;
  tme_uint32_t res;
  tme_uint8_t flags;

  /* get the register containing the factor: */
  ireg_dl = TME_M68K_IREG_D0 + TME_M68K_INSN_OP0(tme_uint32_t);

  /* perform the multiplication: */
  res = (((tme_uint32_t) ic->tme_m68k_ireg_uint16(ireg_dl << 1))
         * TME_M68K_INSN_OP1(tme_uint16_t));

  /* store the result: */
  ic->tme_m68k_ireg_uint32(ireg_dl) = (tme_uint32_t) res;

  /* set the flags: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;
  if (((tme_int32_t) res) < 0) flags |= TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

TME_M68K_INSN(tme_m68k_divu)
{
  int ireg_dq;
  tme_uint32_t dividend, quotient;
  tme_uint16_t divisor, remainder;
  tme_uint8_t flags;

  /* get the register(s): */
  ireg_dq = TME_M68K_IREG_D0 + TME_M68K_INSN_OP0(tme_uint32_t);

  /* form the dividend and the divisor: */
  dividend = (tme_uint32_t) ic->tme_m68k_ireg_uint32(ireg_dq);
  divisor = TME_M68K_INSN_OP1(tme_uint16_t);
  if (divisor == 0) {
    ic->tme_m68k_ireg_pc_last = ic->tme_m68k_ireg_pc;
    ic->tme_m68k_ireg_pc = ic->tme_m68k_ireg_pc_next;
    TME_M68K_INSN_EXCEPTION(TME_M68K_EXCEPTION_INST(TME_M68K_VECTOR_DIV0));
  }

  /* do the division: */
  quotient = dividend / divisor;
  remainder = dividend % divisor;

  /* set the flags and return the quotient and remainder: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;
  if (quotient > 0xffff) {
    flags |= TME_M68K_FLAG_V;
  }
  else {
    if (((tme_int16_t) quotient) < 0) flags |= TME_M68K_FLAG_N;
    if (quotient == 0) flags |= TME_M68K_FLAG_Z;
    ic->tme_m68k_ireg_uint16(ireg_dq << 1) = (tme_uint16_t) quotient;
    ic->tme_m68k_ireg_uint16((ireg_dq << 1) + 1) = remainder;
  }
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

TME_M68K_INSN(tme_m68k_mulul)
{
#ifndef TME_HAVE_INT64_T
  abort();
#else /* TME_HAVE_INT64_T */
  unsigned int flag_v;
  int ireg_dh;
  int ireg_dl;
  tme_uint64_t res;
  tme_uint8_t flags;

  /* get the register containing the factor: */
  ireg_dl = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_SPECOP, 12, 3);

  /* perform the multiplication: */
  res = (((tme_uint64_t) ic->tme_m68k_ireg_uint32(ireg_dl))
         * TME_M68K_INSN_OP1(tme_uint32_t));

  /* store the result: */
  ic->tme_m68k_ireg_uint32(ireg_dl) = (tme_uint32_t) res;
  flag_v = TME_M68K_FLAG_V;
  if (TME_M68K_INSN_SPECOP & TME_BIT(10)) {
    flag_v = 0;
    ireg_dh = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_SPECOP, 0, 3);
    ic->tme_m68k_ireg_uint32(ireg_dh) = (tme_uint32_t) (res >> 32);
  }

  /* set the flags: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;
  if (((tme_int64_t) res) < 0) flags |= TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  if (res > 0xffffffffUL) flags |= flag_v;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
#endif /* TME_HAVE_INT64_T */
}

TME_M68K_INSN(tme_m68k_divul)
{
#ifndef TME_HAVE_INT64_T
  abort();
#else /* TME_HAVE_INT64_T */
  int ireg_dr;
  int ireg_dq;
  tme_uint64_t dividend, quotient;
  tme_uint32_t divisor, remainder;
  tme_uint8_t flags;

  /* get the register(s): */
  ireg_dq = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_SPECOP, 12, 3);
  ireg_dr = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_SPECOP, 0, 3);

  /* form the dividend and the divisor: */
  if (TME_M68K_INSN_SPECOP & TME_BIT(10)) {
    dividend = (tme_uint64_t)
               ((((tme_uint64_t) ic->tme_m68k_ireg_uint32(ireg_dr)) << 32)
                | ic->tme_m68k_ireg_uint32(ireg_dq));
  }
  else
    dividend = (tme_uint64_t) ic->tme_m68k_ireg_uint32(ireg_dq);
  divisor = TME_M68K_INSN_OP1(tme_uint32_t);
  if (divisor == 0) {
    ic->tme_m68k_ireg_pc_last = ic->tme_m68k_ireg_pc;
    ic->tme_m68k_ireg_pc = ic->tme_m68k_ireg_pc_next;
    TME_M68K_INSN_EXCEPTION(TME_M68K_EXCEPTION_INST(TME_M68K_VECTOR_DIV0));
  }

  /* do the division: */
  quotient = dividend / divisor;
  remainder = dividend % divisor;

  /* set the flags and return the quotient and remainder: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;
  if (quotient > 0xffffffffUL) {
    flags |= TME_M68K_FLAG_V;
  }
  else {
    if (((tme_int32_t) quotient) < 0) flags |= TME_M68K_FLAG_N;
    if (quotient == 0) flags |= TME_M68K_FLAG_Z;
    ic->tme_m68k_ireg_uint32(ireg_dq) = (tme_uint32_t) quotient;
    if (ireg_dr != ireg_dq) {
      ic->tme_m68k_ireg_uint32(ireg_dr) = remainder;
    }
  }
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
#endif /* TME_HAVE_INT64_T */
}

TME_M68K_INSN(tme_m68k_muls)
{
  int ireg_dl;
  tme_int32_t res;
  tme_uint8_t flags;

  /* get the register containing the factor: */
  ireg_dl = TME_M68K_IREG_D0 + TME_M68K_INSN_OP0(tme_uint32_t);

  /* perform the multiplication: */
  res = (((tme_int32_t) ic->tme_m68k_ireg_int16(ireg_dl << 1))
         * TME_M68K_INSN_OP1(tme_int16_t));

  /* store the result: */
  ic->tme_m68k_ireg_int32(ireg_dl) = (tme_int32_t) res;

  /* set the flags: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;
  if (((tme_int32_t) res) < 0) flags |= TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

TME_M68K_INSN(tme_m68k_divs)
{
  int ireg_dq;
  tme_int32_t dividend, quotient;
  tme_int16_t divisor, remainder;
  tme_uint8_t flags;

  /* get the register(s): */
  ireg_dq = TME_M68K_IREG_D0 + TME_M68K_INSN_OP0(tme_uint32_t);

  /* form the dividend and the divisor: */
  dividend = (tme_int32_t) ic->tme_m68k_ireg_int32(ireg_dq);
  divisor = TME_M68K_INSN_OP1(tme_int16_t);
  if (divisor == 0) {
    ic->tme_m68k_ireg_pc_last = ic->tme_m68k_ireg_pc;
    ic->tme_m68k_ireg_pc = ic->tme_m68k_ireg_pc_next;
    TME_M68K_INSN_EXCEPTION(TME_M68K_EXCEPTION_INST(TME_M68K_VECTOR_DIV0));
  }

  /* do the division: */
  quotient = dividend / divisor;
  remainder = dividend % divisor;

  /* set the flags and return the quotient and remainder: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;
  if (quotient > 0x7fff || quotient < -32768) {
    flags |= TME_M68K_FLAG_V;
  }
  else {
    if (((tme_int16_t) quotient) < 0) flags |= TME_M68K_FLAG_N;
    if (quotient == 0) flags |= TME_M68K_FLAG_Z;
    ic->tme_m68k_ireg_int16(ireg_dq << 1) = (tme_int16_t) quotient;
    ic->tme_m68k_ireg_int16((ireg_dq << 1) + 1) = remainder;
  }
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
}

TME_M68K_INSN(tme_m68k_mulsl)
{
#ifndef TME_HAVE_INT64_T
  abort();
#else /* TME_HAVE_INT64_T */
  unsigned int flag_v;
  int ireg_dh;
  int ireg_dl;
  tme_int64_t res;
  tme_uint8_t flags;

  /* get the register containing the factor: */
  ireg_dl = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_SPECOP, 12, 3);

  /* perform the multiplication: */
  res = (((tme_int64_t) ic->tme_m68k_ireg_int32(ireg_dl))
         * TME_M68K_INSN_OP1(tme_int32_t));

  /* store the result: */
  ic->tme_m68k_ireg_int32(ireg_dl) = (tme_int32_t) res;
  flag_v = TME_M68K_FLAG_V;
  if (TME_M68K_INSN_SPECOP & TME_BIT(10)) {
    flag_v = 0;
    ireg_dh = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_SPECOP, 0, 3);
    ic->tme_m68k_ireg_int32(ireg_dh) = (tme_int32_t) (res >> 32);
  }

  /* set the flags: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;
  if (((tme_int64_t) res) < 0) flags |= TME_M68K_FLAG_N;
  if (res == 0) flags |= TME_M68K_FLAG_Z;
  if (res > 0x7fffffffL || res < ((0L - 0x7fffffffL) - 1L)) flags |= flag_v;
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
#endif /* TME_HAVE_INT64_T */
}

TME_M68K_INSN(tme_m68k_divsl)
{
#ifndef TME_HAVE_INT64_T
  abort();
#else /* TME_HAVE_INT64_T */
  int ireg_dr;
  int ireg_dq;
  tme_int64_t dividend, quotient;
  tme_int32_t divisor, remainder;
  tme_uint8_t flags;

  /* get the register(s): */
  ireg_dq = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_SPECOP, 12, 3);
  ireg_dr = TME_M68K_IREG_D0 + TME_FIELD_EXTRACTU(TME_M68K_INSN_SPECOP, 0, 3);

  /* form the dividend and the divisor: */
  if (TME_M68K_INSN_SPECOP & TME_BIT(10)) {
    dividend = (tme_int64_t)
               ((((tme_uint64_t) ic->tme_m68k_ireg_uint32(ireg_dr)) << 32)
                | ic->tme_m68k_ireg_uint32(ireg_dq));
  }
  else
    dividend = (tme_int64_t) ic->tme_m68k_ireg_int32(ireg_dq);
  divisor = TME_M68K_INSN_OP1(tme_int32_t);
  if (divisor == 0) {
    ic->tme_m68k_ireg_pc_last = ic->tme_m68k_ireg_pc;
    ic->tme_m68k_ireg_pc = ic->tme_m68k_ireg_pc_next;
    TME_M68K_INSN_EXCEPTION(TME_M68K_EXCEPTION_INST(TME_M68K_VECTOR_DIV0));
  }

  /* do the division: */
  quotient = dividend / divisor;
  remainder = dividend % divisor;

  /* set the flags and return the quotient and remainder: */
  flags = ic->tme_m68k_ireg_ccr & TME_M68K_FLAG_X;
  if (quotient > 0x7fffffffL || quotient < ((0L - 0x7fffffffL) - 1L)) {
    flags |= TME_M68K_FLAG_V;
  }
  else {
    if (((tme_int32_t) quotient) < 0) flags |= TME_M68K_FLAG_N;
    if (quotient == 0) flags |= TME_M68K_FLAG_Z;
    ic->tme_m68k_ireg_int32(ireg_dq) = (tme_int32_t) quotient;
    if (ireg_dr != ireg_dq) {
      ic->tme_m68k_ireg_int32(ireg_dr) = remainder;
    }
  }
  ic->tme_m68k_ireg_ccr = flags;

  TME_M68K_INSN_OK;
#endif /* TME_HAVE_INT64_T */
}
/* automatically generated by m68k-misc-auto.sh, do not edit! */

/* the flags->conditions mapping: */
const tme_uint16_t _tme_m68k_conditions[32] = {
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_HI) | TME_BIT(TME_M68K_C_CC) | TME_BIT(TME_M68K_C_NE) | TME_BIT(TME_M68K_C_VC) | TME_BIT(TME_M68K_C_PL) | TME_BIT(TME_M68K_C_GE) | TME_BIT(TME_M68K_C_GT),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CS) | TME_BIT(TME_M68K_C_NE) | TME_BIT(TME_M68K_C_VC) | TME_BIT(TME_M68K_C_PL) | TME_BIT(TME_M68K_C_GE) | TME_BIT(TME_M68K_C_GT),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_HI) | TME_BIT(TME_M68K_C_CC) | TME_BIT(TME_M68K_C_NE) | TME_BIT(TME_M68K_C_VS) | TME_BIT(TME_M68K_C_PL) | TME_BIT(TME_M68K_C_LT) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CS) | TME_BIT(TME_M68K_C_NE) | TME_BIT(TME_M68K_C_VS) | TME_BIT(TME_M68K_C_PL) | TME_BIT(TME_M68K_C_LT) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CC) | TME_BIT(TME_M68K_C_EQ) | TME_BIT(TME_M68K_C_VC) | TME_BIT(TME_M68K_C_PL) | TME_BIT(TME_M68K_C_GE) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CS) | TME_BIT(TME_M68K_C_EQ) | TME_BIT(TME_M68K_C_VC) | TME_BIT(TME_M68K_C_PL) | TME_BIT(TME_M68K_C_GE) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CC) | TME_BIT(TME_M68K_C_EQ) | TME_BIT(TME_M68K_C_VS) | TME_BIT(TME_M68K_C_PL) | TME_BIT(TME_M68K_C_LT) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CS) | TME_BIT(TME_M68K_C_EQ) | TME_BIT(TME_M68K_C_VS) | TME_BIT(TME_M68K_C_PL) | TME_BIT(TME_M68K_C_LT) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_HI) | TME_BIT(TME_M68K_C_CC) | TME_BIT(TME_M68K_C_NE) | TME_BIT(TME_M68K_C_VC) | TME_BIT(TME_M68K_C_MI) | TME_BIT(TME_M68K_C_LT) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CS) | TME_BIT(TME_M68K_C_NE) | TME_BIT(TME_M68K_C_VC) | TME_BIT(TME_M68K_C_MI) | TME_BIT(TME_M68K_C_LT) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_HI) | TME_BIT(TME_M68K_C_CC) | TME_BIT(TME_M68K_C_NE) | TME_BIT(TME_M68K_C_VS) | TME_BIT(TME_M68K_C_MI) | TME_BIT(TME_M68K_C_GE) | TME_BIT(TME_M68K_C_GT),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CS) | TME_BIT(TME_M68K_C_NE) | TME_BIT(TME_M68K_C_VS) | TME_BIT(TME_M68K_C_MI) | TME_BIT(TME_M68K_C_GE) | TME_BIT(TME_M68K_C_GT),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CC) | TME_BIT(TME_M68K_C_EQ) | TME_BIT(TME_M68K_C_VC) | TME_BIT(TME_M68K_C_MI) | TME_BIT(TME_M68K_C_LT) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CS) | TME_BIT(TME_M68K_C_EQ) | TME_BIT(TME_M68K_C_VC) | TME_BIT(TME_M68K_C_MI) | TME_BIT(TME_M68K_C_LT) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CC) | TME_BIT(TME_M68K_C_EQ) | TME_BIT(TME_M68K_C_VS) | TME_BIT(TME_M68K_C_MI) | TME_BIT(TME_M68K_C_GE) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CS) | TME_BIT(TME_M68K_C_EQ) | TME_BIT(TME_M68K_C_VS) | TME_BIT(TME_M68K_C_MI) | TME_BIT(TME_M68K_C_GE) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_HI) | TME_BIT(TME_M68K_C_CC) | TME_BIT(TME_M68K_C_NE) | TME_BIT(TME_M68K_C_VC) | TME_BIT(TME_M68K_C_PL) | TME_BIT(TME_M68K_C_GE) | TME_BIT(TME_M68K_C_GT),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CS) | TME_BIT(TME_M68K_C_NE) | TME_BIT(TME_M68K_C_VC) | TME_BIT(TME_M68K_C_PL) | TME_BIT(TME_M68K_C_GE) | TME_BIT(TME_M68K_C_GT),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_HI) | TME_BIT(TME_M68K_C_CC) | TME_BIT(TME_M68K_C_NE) | TME_BIT(TME_M68K_C_VS) | TME_BIT(TME_M68K_C_PL) | TME_BIT(TME_M68K_C_LT) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CS) | TME_BIT(TME_M68K_C_NE) | TME_BIT(TME_M68K_C_VS) | TME_BIT(TME_M68K_C_PL) | TME_BIT(TME_M68K_C_LT) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CC) | TME_BIT(TME_M68K_C_EQ) | TME_BIT(TME_M68K_C_VC) | TME_BIT(TME_M68K_C_PL) | TME_BIT(TME_M68K_C_GE) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CS) | TME_BIT(TME_M68K_C_EQ) | TME_BIT(TME_M68K_C_VC) | TME_BIT(TME_M68K_C_PL) | TME_BIT(TME_M68K_C_GE) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CC) | TME_BIT(TME_M68K_C_EQ) | TME_BIT(TME_M68K_C_VS) | TME_BIT(TME_M68K_C_PL) | TME_BIT(TME_M68K_C_LT) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CS) | TME_BIT(TME_M68K_C_EQ) | TME_BIT(TME_M68K_C_VS) | TME_BIT(TME_M68K_C_PL) | TME_BIT(TME_M68K_C_LT) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_HI) | TME_BIT(TME_M68K_C_CC) | TME_BIT(TME_M68K_C_NE) | TME_BIT(TME_M68K_C_VC) | TME_BIT(TME_M68K_C_MI) | TME_BIT(TME_M68K_C_LT) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CS) | TME_BIT(TME_M68K_C_NE) | TME_BIT(TME_M68K_C_VC) | TME_BIT(TME_M68K_C_MI) | TME_BIT(TME_M68K_C_LT) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_HI) | TME_BIT(TME_M68K_C_CC) | TME_BIT(TME_M68K_C_NE) | TME_BIT(TME_M68K_C_VS) | TME_BIT(TME_M68K_C_MI) | TME_BIT(TME_M68K_C_GE) | TME_BIT(TME_M68K_C_GT),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CS) | TME_BIT(TME_M68K_C_NE) | TME_BIT(TME_M68K_C_VS) | TME_BIT(TME_M68K_C_MI) | TME_BIT(TME_M68K_C_GE) | TME_BIT(TME_M68K_C_GT),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CC) | TME_BIT(TME_M68K_C_EQ) | TME_BIT(TME_M68K_C_VC) | TME_BIT(TME_M68K_C_MI) | TME_BIT(TME_M68K_C_LT) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CS) | TME_BIT(TME_M68K_C_EQ) | TME_BIT(TME_M68K_C_VC) | TME_BIT(TME_M68K_C_MI) | TME_BIT(TME_M68K_C_LT) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CC) | TME_BIT(TME_M68K_C_EQ) | TME_BIT(TME_M68K_C_VS) | TME_BIT(TME_M68K_C_MI) | TME_BIT(TME_M68K_C_GE) | TME_BIT(TME_M68K_C_LE),
TME_BIT(TME_M68K_C_T) | TME_BIT(TME_M68K_C_LS) | TME_BIT(TME_M68K_C_CS) | TME_BIT(TME_M68K_C_EQ) | TME_BIT(TME_M68K_C_VS) | TME_BIT(TME_M68K_C_MI) | TME_BIT(TME_M68K_C_GE) | TME_BIT(TME_M68K_C_LE),
};
