binutils (2.41)

(root)/
include/
dis-asm.h
       1  /* Interface between the opcode library and its callers.
       2  
       3     Copyright (C) 1999-2023 Free Software Foundation, Inc.
       4  
       5     This program is free software; you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation; either version 3, or (at your option)
       8     any later version.
       9  
      10     This program is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     You should have received a copy of the GNU General Public License
      16     along with this program; if not, write to the Free Software
      17     Foundation, Inc., 51 Franklin Street - Fifth Floor,
      18     Boston, MA 02110-1301, USA.
      19  
      20     Written by Cygnus Support, 1993.
      21  
      22     The opcode library (libopcodes.a) provides instruction decoders for
      23     a large variety of instruction sets, callable with an identical
      24     interface, for making instruction-processing programs more independent
      25     of the instruction set being processed.  */
      26  
      27  #ifndef DIS_ASM_H
      28  #define DIS_ASM_H
      29  
      30  #ifdef __cplusplus
      31  extern "C" {
      32  #endif
      33  
      34  #include <stdio.h>
      35  #include <string.h>
      36  #include "bfd.h"
      37  
      38  enum dis_insn_type
      39  {
      40    dis_noninsn,			/* Not a valid instruction.  */
      41    dis_nonbranch,		/* Not a branch instruction.  */
      42    dis_branch,			/* Unconditional branch.  */
      43    dis_condbranch,		/* Conditional branch.  */
      44    dis_jsr,			/* Jump to subroutine.  */
      45    dis_condjsr,			/* Conditional jump to subroutine.  */
      46    dis_dref,			/* Data reference instruction.  */
      47    dis_dref2			/* Two data references in instruction.  */
      48  };
      49  
      50  /* When printing styled disassembler output, this describes what style
      51     should be used.  */
      52  
      53  enum disassembler_style
      54  {
      55    /* This is the default style, use this for any additional syntax
      56       (e.g. commas between operands, brackets, etc), or just as a default if
      57       no other style seems appropriate.  */
      58    dis_style_text,
      59  
      60    /* Use this for all instruction mnemonics, or aliases for mnemonics.
      61       These should be things that correspond to real machine
      62       instructions.  */
      63    dis_style_mnemonic,
      64  
      65    /* Some architectures include additional mnemonic like fields within the
      66       instruction operands, e.g. on aarch64 'add w16, w7, w1, lsl #2' where
      67       the 'lsl' is an additional piece of text that describes how the
      68       instruction should behave.  This sub-mnemonic style can be used for
      69       these pieces of text.  */
      70    dis_style_sub_mnemonic,
      71  
      72    /* For things that aren't real machine instructions, but rather
      73       assembler directives, e.g. .byte, etc.  */
      74    dis_style_assembler_directive,
      75  
      76    /* Use this for any register names.  This may or may-not include any
      77       register prefix, e.g. '$', '%', at the discretion of the target,
      78       though within each target the choice to include prefixes for not
      79       should be kept consistent.  If the prefix is not printed with this
      80       style, then dis_style_text should be used.  */
      81    dis_style_register,
      82  
      83    /* Use this for any constant values used within instructions or
      84       directives, unless the value is an absolute address, or an offset
      85       that will be added to an address (no matter where the address comes
      86       from) before use.  This style may, or may-not be used for any
      87       prefix to the immediate value, e.g. '$', at the discretion of the
      88       target, though within each target the choice to include these
      89       prefixes should be kept consistent.  */
      90    dis_style_immediate,
      91  
      92    /* The style for the numerical representation of an absolute address.
      93       Anything that is an address offset should use the immediate style.
      94       This style may, or may-not be used for any prefix to the immediate
      95       value, e.g. '$', at the discretion of the target, though within
      96       each target the choice to include these prefixes should be kept
      97       consistent.  */
      98    dis_style_address,
      99  
     100    /* The style for any constant value within an instruction or directive
     101       that represents an offset that will be added to an address before
     102       use.  This style may, or may-not be used for any prefix to the
     103       immediate value, e.g. '$', at the discretion of the target, though
     104       within each target the choice to include these prefixes should be
     105       kept consistent.  */
     106    dis_style_address_offset,
     107  
     108    /* The style for a symbol's name.  The numerical address of a symbol
     109       should use the address style above, this style is reserved for the
     110       name.  */
     111    dis_style_symbol,
     112  
     113    /* The start of a comment that runs to the end of the line.  Anything
     114       printed after a comment start might be styled differently,
     115       e.g. everything might be styled as a comment, regardless of the
     116       actual style used.  The disassembler itself should not try to adjust
     117       the style emitted for comment content, e.g. an address emitted within
     118       a comment should still be given dis_style_address, in this way it is
     119       up to the user of the disassembler to decide how comments should be
     120       styled.  */
     121    dis_style_comment_start
     122  };
     123  
     124  typedef int (*fprintf_ftype) (void *, const char*, ...) ATTRIBUTE_FPTR_PRINTF_2;
     125  typedef int (*fprintf_styled_ftype) (void *, enum disassembler_style, const char*, ...) ATTRIBUTE_FPTR_PRINTF_3;
     126  
     127  /* This struct is passed into the instruction decoding routine,
     128     and is passed back out into each callback.  The various fields are used
     129     for conveying information from your main routine into your callbacks,
     130     for passing information into the instruction decoders (such as the
     131     addresses of the callback functions), or for passing information
     132     back from the instruction decoders to their callers.
     133  
     134     It must be initialized before it is first passed; this can be done
     135     by hand, or using one of the initialization macros below.  */
     136  
     137  typedef struct disassemble_info
     138  {
     139    fprintf_ftype fprintf_func;
     140    fprintf_styled_ftype fprintf_styled_func;
     141    void *stream;
     142    void *application_data;
     143  
     144    /* Target description.  We could replace this with a pointer to the bfd,
     145       but that would require one.  There currently isn't any such requirement
     146       so to avoid introducing one we record these explicitly.  */
     147    /* The bfd_flavour.  This can be bfd_target_unknown_flavour.  */
     148    enum bfd_flavour flavour;
     149    /* The bfd_arch value.  */
     150    enum bfd_architecture arch;
     151    /* The bfd_mach value.  */
     152    unsigned long mach;
     153    /* Endianness (for bi-endian cpus).  Mono-endian cpus can ignore this.  */
     154    enum bfd_endian endian;
     155    /* Endianness of code, for mixed-endian situations such as ARM BE8.  */
     156    enum bfd_endian endian_code;
     157  
     158    /* Some targets need information about the current section to accurately
     159       display insns.  If this is NULL, the target disassembler function
     160       will have to make its best guess.  */
     161    asection *section;
     162  
     163    /* An array of pointers to symbols either at the location being disassembled
     164       or at the start of the function being disassembled.  The array is sorted
     165       so that the first symbol is intended to be the one used.  The others are
     166       present for any misc. purposes.  This is not set reliably, but if it is
     167       not NULL, it is correct.  */
     168    asymbol **symbols;
     169    /* Number of symbols in array.  */
     170    int num_symbols;
     171  
     172    /* Symbol table provided for targets that want to look at it.  This is
     173       used on Arm to find mapping symbols and determine Arm/Thumb code.  */
     174    asymbol **symtab;
     175    int symtab_pos;
     176    int symtab_size;
     177  
     178    /* For use by the disassembler.
     179       The top 16 bits are reserved for public use (and are documented here).
     180       The bottom 16 bits are for the internal use of the disassembler.  */
     181    unsigned long flags;
     182    /* Set if the disassembler has determined that there are one or more
     183       relocations associated with the instruction being disassembled.  */
     184  #define INSN_HAS_RELOC	 (1u << 31)
     185    /* Set if the user has requested the disassembly of data as well as code.  */
     186  #define DISASSEMBLE_DATA (1u << 30)
     187    /* Set if the user has specifically set the machine type encoded in the
     188       mach field of this structure.  */
     189  #define USER_SPECIFIED_MACHINE_TYPE (1u << 29)
     190    /* Set if the user has requested wide output.  */
     191  #define WIDE_OUTPUT (1u << 28)
     192  
     193    /* Dynamic relocations, if they have been loaded.  */
     194    arelent **dynrelbuf;
     195    long dynrelcount;
     196  
     197    /* Use internally by the target specific disassembly code.  */
     198    void *private_data;
     199  
     200    /* Function used to get bytes to disassemble.  MEMADDR is the
     201       address of the stuff to be disassembled, MYADDR is the address to
     202       put the bytes in, and LENGTH is the number of bytes to read.
     203       INFO is a pointer to this struct.
     204       Returns an errno value or 0 for success.  */
     205    int (*read_memory_func)
     206      (bfd_vma memaddr, bfd_byte *myaddr, unsigned int length,
     207       struct disassemble_info *dinfo);
     208  
     209    /* Function which should be called if we get an error that we can't
     210       recover from.  STATUS is the errno value from read_memory_func and
     211       MEMADDR is the address that we were trying to read.  INFO is a
     212       pointer to this struct.  */
     213    void (*memory_error_func)
     214      (int status, bfd_vma memaddr, struct disassemble_info *dinfo);
     215  
     216    /* Function called to print ADDR.  */
     217    void (*print_address_func)
     218      (bfd_vma addr, struct disassemble_info *dinfo);
     219  
     220    /* Function called to determine if there is a symbol at the given ADDR.
     221       If there is, the function returns 1, otherwise it returns 0.
     222       This is used by ports which support an overlay manager where
     223       the overlay number is held in the top part of an address.  In
     224       some circumstances we want to include the overlay number in the
     225       address, (normally because there is a symbol associated with
     226       that address), but sometimes we want to mask out the overlay bits.  */
     227    asymbol * (*symbol_at_address_func)
     228      (bfd_vma addr, struct disassemble_info *dinfo);
     229  
     230    /* Function called to check if a SYMBOL is can be displayed to the user.
     231       This is used by some ports that want to hide special symbols when
     232       displaying debugging outout.  */
     233    bool (*symbol_is_valid)
     234      (asymbol *, struct disassemble_info *dinfo);
     235  
     236    /* These are for buffer_read_memory.  */
     237    bfd_byte *buffer;
     238    bfd_vma buffer_vma;
     239    size_t buffer_length;
     240  
     241    /* This variable may be set by the instruction decoder.  It suggests
     242        the number of bytes objdump should display on a single line.  If
     243        the instruction decoder sets this, it should always set it to
     244        the same value in order to get reasonable looking output.  */
     245    int bytes_per_line;
     246  
     247    /* The next two variables control the way objdump displays the raw data.  */
     248    /* For example, if bytes_per_line is 8 and bytes_per_chunk is 4, the */
     249    /* output will look like this:
     250       00:   00000000 00000000
     251       with the chunks displayed according to "display_endian". */
     252    int bytes_per_chunk;
     253    enum bfd_endian display_endian;
     254  
     255    /* Number of octets per incremented target address
     256       Normally one, but some DSPs have byte sizes of 16 or 32 bits.  */
     257    unsigned int octets_per_byte;
     258  
     259    /* The number of zeroes we want to see at the end of a section before we
     260       start skipping them.  */
     261    unsigned int skip_zeroes;
     262  
     263    /* The number of zeroes to skip at the end of a section.  If the number
     264       of zeroes at the end is between SKIP_ZEROES_AT_END and SKIP_ZEROES,
     265       they will be disassembled.  If there are fewer than
     266       SKIP_ZEROES_AT_END, they will be skipped.  This is a heuristic
     267       attempt to avoid disassembling zeroes inserted by section
     268       alignment.  */
     269    unsigned int skip_zeroes_at_end;
     270  
     271    /* Whether the disassembler always needs the relocations.  */
     272    bool disassembler_needs_relocs;
     273  
     274    /* Results from instruction decoders.  Not all decoders yet support
     275       this information.  This info is set each time an instruction is
     276       decoded, and is only valid for the last such instruction.
     277  
     278       To determine whether this decoder supports this information, set
     279       insn_info_valid to 0, decode an instruction, then check it.  */
     280  
     281    char insn_info_valid;		/* Branch info has been set. */
     282    char branch_delay_insns;	/* How many sequential insn's will run before
     283  				   a branch takes effect.  (0 = normal) */
     284    char data_size;		/* Size of data reference in insn, in bytes */
     285    enum dis_insn_type insn_type;	/* Type of instruction */
     286    bfd_vma target;		/* Target address of branch or dref, if known;
     287  				   zero if unknown.  */
     288    bfd_vma target2;		/* Second target address for dref2 */
     289  
     290    /* Command line options specific to the target disassembler.  */
     291    const char *disassembler_options;
     292  
     293    /* If non-zero then try not disassemble beyond this address, even if
     294       there are values left in the buffer.  This address is the address
     295       of the nearest symbol forwards from the start of the disassembly,
     296       and it is assumed that it lies on the boundary between instructions.
     297       If an instruction spans this address then this is an error in the
     298       file being disassembled.  */
     299    bfd_vma stop_vma;
     300  
     301    /* The end range of the current range being disassembled.  This is required
     302       in order to notify the disassembler when it's currently handling a
     303       different range than it was before.  This prevent unsafe optimizations when
     304       disassembling such as the way mapping symbols are found on AArch64.  */
     305    bfd_vma stop_offset;
     306  
     307    /* Set to true if the disassembler applied styling to the output,
     308       otherwise, set to false.  */
     309    bool created_styled_output;
     310  } disassemble_info;
     311  
     312  /* This struct is used to pass information about valid disassembler
     313     option arguments from the target to the generic GDB functions
     314     that set and display them.  */
     315  
     316  typedef struct
     317  {
     318    /* Option argument name to use in descriptions.  */
     319    const char *name;
     320  
     321    /* Vector of acceptable option argument values, NULL-terminated.
     322       NULL if any values are accepted.  */
     323    const char **values;
     324  } disasm_option_arg_t;
     325  
     326  /* This struct is used to pass information about valid disassembler
     327     options, their descriptions and arguments from the target to the
     328     generic GDB functions that set and display them.  Options are
     329     defined by tuples of vector entries at each index.  */
     330  
     331  typedef struct
     332  {
     333    /* Vector of option names, NULL-terminated.  */
     334    const char **name;
     335  
     336    /* Vector of option descriptions or NULL if none to be shown.  */
     337    const char **description;
     338  
     339    /* Vector of option argument information pointers or NULL if no
     340       option accepts an argument.  NULL entries denote individual
     341       options that accept no argument.  */
     342    const disasm_option_arg_t **arg;
     343  } disasm_options_t;
     344  
     345  /* This struct is used to pass information about valid disassembler
     346     options and arguments from the target to the generic GDB functions
     347     that set and display them.  */
     348  
     349  typedef struct
     350  {
     351    /* Valid disassembler options.  Individual options that support
     352       an argument will refer to entries in the ARGS vector.  */
     353    disasm_options_t options;
     354  
     355    /* Vector of acceptable option arguments, NULL-terminated.  This
     356       collects all possible option argument choices, some of which
     357       may be shared by different options from the OPTIONS member.  */
     358    disasm_option_arg_t *args;
     359  } disasm_options_and_args_t;
     360  
     361  /* Standard disassemblers.  Disassemble one instruction at the given
     362     target address.  Return number of octets processed.  */
     363  typedef int (*disassembler_ftype) (bfd_vma, disassemble_info *);
     364  
     365  /* Disassemblers used out side of opcodes library.  */
     366  extern int print_insn_m32c		(bfd_vma, disassemble_info *);
     367  extern int print_insn_mep		(bfd_vma, disassemble_info *);
     368  extern int print_insn_s12z		(bfd_vma, disassemble_info *);
     369  extern int print_insn_sh		(bfd_vma, disassemble_info *);
     370  extern int print_insn_sparc		(bfd_vma, disassemble_info *);
     371  extern int print_insn_rx		(bfd_vma, disassemble_info *);
     372  extern int print_insn_rl78		(bfd_vma, disassemble_info *);
     373  extern int print_insn_rl78_g10		(bfd_vma, disassemble_info *);
     374  extern int print_insn_rl78_g13		(bfd_vma, disassemble_info *);
     375  extern int print_insn_rl78_g14		(bfd_vma, disassemble_info *);
     376  
     377  extern disassembler_ftype arc_get_disassembler (bfd *);
     378  extern disassembler_ftype cris_get_disassembler (bfd *);
     379  
     380  extern void print_aarch64_disassembler_options (FILE *);
     381  extern void print_i386_disassembler_options (FILE *);
     382  extern void print_mips_disassembler_options (FILE *);
     383  extern void print_nfp_disassembler_options (FILE *);
     384  extern void print_ppc_disassembler_options (FILE *);
     385  extern void print_riscv_disassembler_options (FILE *);
     386  extern void print_arm_disassembler_options (FILE *);
     387  extern void print_arc_disassembler_options (FILE *);
     388  extern void print_s390_disassembler_options (FILE *);
     389  extern void print_wasm32_disassembler_options (FILE *);
     390  extern void print_loongarch_disassembler_options (FILE *);
     391  extern bool aarch64_symbol_is_valid (asymbol *, struct disassemble_info *);
     392  extern bool arm_symbol_is_valid (asymbol *, struct disassemble_info *);
     393  extern bool csky_symbol_is_valid (asymbol *, struct disassemble_info *);
     394  extern bool riscv_symbol_is_valid (asymbol *, struct disassemble_info *);
     395  extern void disassemble_init_powerpc (struct disassemble_info *);
     396  extern void disassemble_init_s390 (struct disassemble_info *);
     397  extern void disassemble_init_wasm32 (struct disassemble_info *);
     398  extern void disassemble_init_nds32 (struct disassemble_info *);
     399  extern const disasm_options_and_args_t *disassembler_options_arc (void);
     400  extern const disasm_options_and_args_t *disassembler_options_arm (void);
     401  extern const disasm_options_and_args_t *disassembler_options_mips (void);
     402  extern const disasm_options_and_args_t *disassembler_options_powerpc (void);
     403  extern const disasm_options_and_args_t *disassembler_options_riscv (void);
     404  extern const disasm_options_and_args_t *disassembler_options_s390 (void);
     405  
     406  /* Fetch the disassembler for a given architecture ARC, endianess (big
     407     endian if BIG is true), bfd_mach value MACH, and ABFD, if that support
     408     is available.  ABFD may be NULL.  */
     409  extern disassembler_ftype disassembler (enum bfd_architecture arc,
     410  					bool big, unsigned long mach,
     411  					bfd *abfd);
     412  
     413  /* Amend the disassemble_info structure as necessary for the target architecture.
     414     Should only be called after initialising the info->arch field.  */
     415  extern void disassemble_init_for_target (struct disassemble_info *);
     416  
     417  /* Tidy any memory allocated by targets, such as info->private_data.  */
     418  extern void disassemble_free_target (struct disassemble_info *);
     419  
     420  /* Set the basic disassembler print functions.  */
     421  extern void disassemble_set_printf (struct disassemble_info *, void *,
     422  				    fprintf_ftype, fprintf_styled_ftype);
     423  
     424  /* Document any target specific options available from the disassembler.  */
     425  extern void disassembler_usage (FILE *);
     426  
     427  /* Remove whitespace and consecutive commas.  */
     428  extern char *remove_whitespace_and_extra_commas (char *);
     429  
     430  /* Like STRCMP, but treat ',' the same as '\0' so that we match
     431     strings like "foobar" against "foobar,xxyyzz,...".  */
     432  extern int disassembler_options_cmp (const char *, const char *);
     433  
     434  /* A helper function for FOR_EACH_DISASSEMBLER_OPTION.  */
     435  static inline const char *
     436  next_disassembler_option (const char *options)
     437  {
     438    const char *opt = strchr (options, ',');
     439    if (opt != NULL)
     440      opt++;
     441    return opt;
     442  }
     443  
     444  /* A macro for iterating over each comma separated option in OPTIONS.  */
     445  #define FOR_EACH_DISASSEMBLER_OPTION(OPT, OPTIONS) \
     446    for ((OPT) = (OPTIONS); \
     447         (OPT) != NULL; \
     448         (OPT) = next_disassembler_option (OPT))
     449  
     450  
     451  /* This block of definitions is for particular callers who read instructions
     452     into a buffer before calling the instruction decoder.  */
     453  
     454  /* Here is a function which callers may wish to use for read_memory_func.
     455     It gets bytes from a buffer.  */
     456  extern int buffer_read_memory
     457    (bfd_vma, bfd_byte *, unsigned int, struct disassemble_info *);
     458  
     459  /* This function goes with buffer_read_memory.
     460     It prints a message using info->fprintf_func and info->stream.  */
     461  extern void perror_memory (int, bfd_vma, struct disassemble_info *);
     462  
     463  
     464  /* Just print the address in hex.  This is included for completeness even
     465     though both GDB and objdump provide their own (to print symbolic
     466     addresses).  */
     467  extern void generic_print_address
     468    (bfd_vma, struct disassemble_info *);
     469  
     470  /* Always NULL.  */
     471  extern asymbol *generic_symbol_at_address
     472    (bfd_vma, struct disassemble_info *);
     473  
     474  /* Always true.  */
     475  extern bool generic_symbol_is_valid
     476    (asymbol *, struct disassemble_info *);
     477  
     478  /* Method to initialize a disassemble_info struct.  This should be
     479     called by all applications creating such a struct.  */
     480  extern void init_disassemble_info (struct disassemble_info *dinfo, void *stream,
     481  				   fprintf_ftype fprintf_func,
     482  				   fprintf_styled_ftype fprintf_styled_func);
     483  
     484  /* For compatibility with existing code.  */
     485  #define INIT_DISASSEMBLE_INFO(INFO, STREAM, FPRINTF_FUNC, FPRINTF_STYLED_FUNC)  \
     486    init_disassemble_info (&(INFO), (STREAM), (fprintf_ftype) (FPRINTF_FUNC), \
     487  			 (fprintf_styled_ftype) (FPRINTF_STYLED_FUNC))
     488  
     489  #ifdef __cplusplus
     490  }
     491  #endif
     492  
     493  #endif /* ! defined (DIS_ASM_H) */