Disassembler for the SPARC(TM) instruction set

Disassembler for the SPARC(TM) instruction set by Cristina Cifuentes and Norman Ramsey.

Revision history:

DateToolkit VersionAuthor
Sep 19950.1a (Dec 1994)Cifuentes
Jan 19960.3 (Dec 1995)Cifuentes
Mar 19960.4 (Mar 1996)Ramsey
Apr 19990.4 (Mar 1996)Ramsey (legal compliance)

<sparcdis.spec>=
<syntax patterns>
<fetching specs>
<sparcdis.m>=
#include <stdio.h>
#include <mclib.h>
#include "sparc-names.h"   /* generated by 'tools -fieldnames' - has
                                   arrays of names of fields */
#include "sparcdis.h"

<macros>
<definitions>

<sparcdis.h>=
<exported declarations>

The general idea is to disassemble a single instruction in memory. I parameterize the disassembler by three functions:

I don't solve the usual problem of functions returning strings, but the code below should work with statically allocated strings. The real solution is garbage collection, perhaps à la Boehm-Weiser.

print is a varargs printing procedure with the same interface as printf. pr, rel, and fet are closures.

<exported declarations>= (<-U)
typedef void (*Printer)(void *pr, char *fmt, ...);
typedef char *(*RelPrinter)(void *rel, unsigned address);
typedef unsigned (*Fetcher)(void *f, unsigned lc);
extern void sparc_disassemble(unsigned lc, 
        Printer print, void *pr, RelPrinter dis_rel, void *rel,
        Fetcher fetch_word, void *fet);
Defines Fetcher, Printer, RelPrinter (links are to index).

<fetching specs>= (<-U)
address type is "unsigned"
address to integer using "%a"
address add using "%a+%o"
fetch 32 using "fetch_word(fet, %a)"

We find it useful to define macros for converting common operands to strings:

<macros>= (<-U)
#define RD   (rd_names[rd])
#define RS1  (rs1_names[rs1])
#define RS2  (rs2_names[rs2])
#define FD   (fd_names[fd])
#define FS1  (fs1_names[fs1])
#define FS2  (fs2_names[fs2])
#define CD   (cd_names[cd])
#define ROI  (dis_roi(fetch_word, fet, roi))
#define ADDR (dis_addr(fetch_word, fet, addr))

Defines ADDR, CD, FD, FS1, FS2, RD, ROI, RS1, RS2 (links are to index).

I begin by showing how to disassemble addressing modes and structured operands. Because memory management in C is so grotesque, I simply disassemble them into static strings, and I insist that the code below not call more than one per instruction. Garbage collection would be a better solution. Note the ``synthetic'' special cases precede the general ones.

<definitions>= (<-U) [D->]
char *dis_addr(Fetcher fetch_word, void *fet, unsigned lc) {
  static char buf[80];
  match lc to
  | indirectA(rs1)   => return RS1;
  | indexA(rs1, rs2) => sprintf(buf, "%s+%s", RS1, RS2);
  | absoluteA(i)     => sprintf(buf, "%d", i);
  | dispA(rs1, i)    => sprintf(buf, "%s%s%d", RS1, (int)i < 0 ? "" : "+", i);
  endmatch
  return buf;
}
Defines dis_addr (links are to index).

Also note the sign hacking for dispA.

A similar function is needed to disassemble register or immediate.

<definitions>+= (<-U) [<-D->]
char *dis_roi(Fetcher fetch_word, void *fet, unsigned lc) {
  static char buf[80];
  match lc to
  | imode(i)   => sprintf(buf, "%d", i); return buf;
  | rmode(rs2) => return RS2;
  endmatch
}
Defines dis_roi (links are to index).

Finally, register addresses for those restricted load and store modes.

<definitions>+= (<-U) [<-D->]
char *dis_regaddr(Fetcher fetch_word, void *fet, unsigned lc) {
  static char buf[80];
  match lc to
  | indirectR(rs1)   => return RS1;
  | indexR(rs1, rs2) => sprintf(buf, "%s+%s", RS1, RS2);
  else                  sprintf(buf, "??%s??", dis_addr(fetch_word, fet, lc)); 
  endmatch
  return buf;
}
Defines dis_regaddr (links are to index).

Now disassembly of full instructions, which ought to return the successor instruction (because of synthetics) but doesn't. Disassembles one instruction at location lc. The matching statement takes the synthetic instructions first and then all other instructions. Instructions need to be in priority order, as the toolkit matches the first arm that becomes true and doesn't look at the remaining arms.

And we add these patterns to group instructions of like syntax:

<syntax patterns>= (<-U)
patterns 
  load_greg is loadg | LDD 
  load_freg is LDF | LDDF
  load_creg is LDC | LDDC
  load_asi  is loada | LDDA

  sto_greg is storeg | STD 
  sto_freg is STF | STDF
  sto_creg is STC | STDC
  sto_asi  is storea | STDA

  float_2    is  float2s | FSQRTd | FSQRTq | FTOs | FTOd | FTOq | FdTO | FqTO 
                 | FqTOd | FdTOq
  float_3    is  float3s | float3d | float3q | FsMULd | FdMULq
  float_cmp  is fcompares | fcompared | fcompareq 

<definitions>+= (<-U) [<-D]
void sparc_disassemble(unsigned lc, 
        Printer print, void *pr, RelPrinter dis_rel, void *rel,
        Fetcher fetch_word, void *fet) {
  match lc to
  | NOP                   => print(pr, "nop");
   /*   | decode_sethi(hi, r); bset(imode(lo), r) => 
                print(pr, "set 0x%x, %s", hi+lo, rs1_names[r]); */
  | decode_sethi(val, r)  => print(pr, "sethi %%hi(0x%x), %s", val, rs1_names[r]);
  | OR(0, imode(val), rd) => print(pr, "set %d, %s", val, RD);
  | cmp (rs1, roi)        => print(pr, "cmp %s, %s", RS1, ROI);
  | ret()                 => print(pr, "ret");
  | retl()                => print(pr, "retl");
  | jmp (addr)            => print(pr, "jmp %s", ADDR);
  | calla (addr)          => print(pr, "call %s", ADDR);
  | tst (rs2)             => print(pr, "tst %s", RS2);
  | not (rd)              => print(pr, "not %s", RD);
  | not2 (rs1, rd)        => print(pr, "not %s, %s", RS1, RD);
  | neg (rd)              => print(pr, "neg %s", RD);
  | neg2 (rs2, rd)        => print(pr, "neg %s, %s", RS2, RD);
  | inc (val, rd)         => print(pr, "inc %d, %s", val, RD);
  | inccc (val, rd)       => print(pr, "inccc %d, %s", val, RD);
  | dec (val, rd)         => print(pr, "dec %d, %s", val, RD);
  | deccc (val, rd)       => print(pr, "deccc %d, %s", val, RD);
  | btst (roi, rs1)       => print(pr, "btst %s, %s", ROI, RS1);
  | bset (roi, rd)        => print(pr, "bset %s, %s", ROI, RD);
  | bclr (roi, rd)        => print(pr, "bclr %s, %s", ROI, RD);
  | btog (roi, rd)        => print(pr, "btog %s, %s", ROI, RD);
  | clr (rd)              => print(pr, "clr %s", RD);
  | clrw (addr)           => print(pr, "clr [%s]", ADDR);
  | clrb (addr)           => print(pr, "clrb [%s]", ADDR);
  | clrh (addr)           => print(pr, "clrh [%s]", ADDR);
  | mov (roi, rd)         => print(pr, "mov %s, %s", ROI, RD);
  | restore_()            => print(pr, "restore");
  | save_()               => print(pr, "save");

         /* end of synthetics */

  | load_greg(addr, rd)      [name] => print(pr, "%s [%s], %s", name, ADDR, RD);
  | load_freg(addr, fd)      [name] => print(pr, "%s [%s], %s", name, ADDR, FD);
  | load_creg(addr, cd)      [name] => print(pr, "%s [%s], %s", name, ADDR, CD);
  | load_asi (addr, asi, rd) [name] => print(pr, "%s [%s]%d, %s", name, ADDR, asi, RD);

  | sto_greg(rd, addr)       [name] => print(pr, "%s %s, [%s]", name, RD, ADDR);
  | sto_freg(fd, addr)       [name] => print(pr, "%s %s, [%s]", name, FD, ADDR);
  | sto_creg(cd, addr)       [name] => print(pr, "%s %s, [%s]", name, CD, ADDR);
  | sto_asi (rd, addr, asi)  [name] => print(pr, "%s %s, [%s]%d", name, RD, ADDR, asi);

  | LDFSR(addr) [name] => print(pr, "%s [%s], %%fsr", name, ADDR);
  | LDCSR(addr) [name] => print(pr, "%s [%s], %%csr", name, ADDR);
  | STFSR(addr) [name] => print(pr, "%s %%fsr, [%s]", name, ADDR);
  | STCSR(addr) [name] => print(pr, "%s %%csr, [%s]", name, ADDR);
  | STDFQ(addr) [name] => print(pr, "%s %%fq, [%s]", name, ADDR);
  | STDCQ(addr) [name] => print(pr, "%s %%cq, [%s]", name, ADDR);

  /* don't bother with RDY, WRY, and friends */

  | alu (rs1, roi, rd) [name] => print(pr, "%s %s, %s, %s", name, RS1, ROI, RD);
  | branch^a (tgt)     [name] => print(pr, "%s %s", name, dis_rel(rel, tgt));
  | call (tgt)                => print(pr, "call %s", dis_rel(rel, tgt));

  | float_2 (fs2, fd)      [name] => print(pr, "%s %s, %s",     name,      FS2, FD);
  | float_3 (fs1, fs2, fd) [name] => print(pr, "%s %s, %s, %s", name, FS1, FS2, FD);
  | float_cmp (fs1, fs2)   [name] => print(pr, "%s %s, %s",     name,      FS1, FS2);

  | JMPL (addr, rd)    [name] => print(pr, "jmpl %s, %s", ADDR, RD);
  | RETT (addr)        [name] => print(pr, "rett [%s]", ADDR);
  | trap (addr)        [name] => print(pr, "%s [%s]", ADDR);
  | UNIMP (n)          [name] => print(pr, "unimp 0x%x", n);

  | inst = n => print(pr, ".word 0x%08x", n);
  endmatch
}
Defines sparc_disassemble (links are to index).

Index

Chunk names

Identifiers

Legal Disclaimer

All SPARC trademarks are used under license and are trademarks or registered trademarks of SPARC International, Inc. in the U.S. and other countries. Products bearing SPARC trademarks are based on an architecture developed by Sun Microsystems, Inc.