Disassembler for the SPARC(TM) instruction set by Cristina Cifuentes and Norman Ramsey.
Revision history:
Date Toolkit Version Author Sep 1995 0.1a (Dec 1994) Cifuentes Jan 1996 0.3 (Dec 1995) Cifuentes Mar 1996 0.4 (Mar 1996) Ramsey Apr 1999 0.4 (Mar 1996) Ramsey (legal compliance)
<sparcdis.spec>= <syntax patterns> <fetching specs>
<sparcdis.m>= #include <stdio.h> #include <mclib.h> #include "sparc-names.h" /* generated by 'tools -fieldnames' - has arrays of names of fields */ #include "sparcdis.h" <macros> <definitions>
<sparcdis.h>= <exported declarations>
The general idea is to disassemble a single instruction in memory. I parameterize the disassembler by three functions:
print
is a varargs printing procedure with the same interface
as printf.
pr
, rel
, and fet
are closures.
<exported declarations>= (<-U) typedef void (*Printer)(void *pr, char *fmt, ...); typedef char *(*RelPrinter)(void *rel, unsigned address); typedef unsigned (*Fetcher)(void *f, unsigned lc); extern void sparc_disassemble(unsigned lc, Printer print, void *pr, RelPrinter dis_rel, void *rel, Fetcher fetch_word, void *fet);
DefinesFetcher
,Printer
,RelPrinter
(links are to index).
<fetching specs>= (<-U) address type is "unsigned" address to integer using "%a" address add using "%a+%o" fetch 32 using "fetch_word(fet, %a)"
We find it useful to define macros for converting common operands to strings:
<macros>= (<-U) #define RD (rd_names[rd]) #define RS1 (rs1_names[rs1]) #define RS2 (rs2_names[rs2]) #define FD (fd_names[fd]) #define FS1 (fs1_names[fs1]) #define FS2 (fs2_names[fs2]) #define CD (cd_names[cd]) #define ROI (dis_roi(fetch_word, fet, roi)) #define ADDR (dis_addr(fetch_word, fet, addr))
DefinesADDR
,CD
,FD
,FS1
,FS2
,RD
,ROI
,RS1
,RS2
(links are to index).
I begin by showing how to disassemble addressing modes and structured operands. Because memory management in C is so grotesque, I simply disassemble them into static strings, and I insist that the code below not call more than one per instruction. Garbage collection would be a better solution. Note the ``synthetic'' special cases precede the general ones.
<definitions>= (<-U) [D->] char *dis_addr(Fetcher fetch_word, void *fet, unsigned lc) { static char buf[80]; match lc to | indirectA(rs1) => return RS1; | indexA(rs1, rs2) => sprintf(buf, "%s+%s", RS1, RS2); | absoluteA(i) => sprintf(buf, "%d", i); | dispA(rs1, i) => sprintf(buf, "%s%s%d", RS1, (int)i < 0 ? "" : "+", i); endmatch return buf; }
Definesdis_addr
(links are to index).
Also note the sign hacking for dispA
.
A similar function is needed to disassemble register or immediate.
<definitions>+= (<-U) [<-D->] char *dis_roi(Fetcher fetch_word, void *fet, unsigned lc) { static char buf[80]; match lc to | imode(i) => sprintf(buf, "%d", i); return buf; | rmode(rs2) => return RS2; endmatch }
Definesdis_roi
(links are to index).
Finally, register addresses for those restricted load and store modes.
<definitions>+= (<-U) [<-D->] char *dis_regaddr(Fetcher fetch_word, void *fet, unsigned lc) { static char buf[80]; match lc to | indirectR(rs1) => return RS1; | indexR(rs1, rs2) => sprintf(buf, "%s+%s", RS1, RS2); else sprintf(buf, "??%s??", dis_addr(fetch_word, fet, lc)); endmatch return buf; }
Definesdis_regaddr
(links are to index).
Now disassembly of full instructions, which ought to return the
successor instruction (because of synthetics) but doesn't.
Disassembles one instruction at location lc
.
The matching statement takes the synthetic instructions first and then
all other instructions. Instructions need to be in priority order, as the
toolkit matches the first arm that becomes true and doesn't look at the
remaining arms.
And we add these patterns to group instructions of like syntax:
<syntax patterns>= (<-U) patterns load_greg is loadg | LDD load_freg is LDF | LDDF load_creg is LDC | LDDC load_asi is loada | LDDA sto_greg is storeg | STD sto_freg is STF | STDF sto_creg is STC | STDC sto_asi is storea | STDA float_2 is float2s | FSQRTd | FSQRTq | FTOs | FTOd | FTOq | FdTO | FqTO | FqTOd | FdTOq float_3 is float3s | float3d | float3q | FsMULd | FdMULq float_cmp is fcompares | fcompared | fcompareq
<definitions>+= (<-U) [<-D] void sparc_disassemble(unsigned lc, Printer print, void *pr, RelPrinter dis_rel, void *rel, Fetcher fetch_word, void *fet) { match lc to | NOP => print(pr, "nop"); /* | decode_sethi(hi, r); bset(imode(lo), r) => print(pr, "set 0x%x, %s", hi+lo, rs1_names[r]); */ | decode_sethi(val, r) => print(pr, "sethi %%hi(0x%x), %s", val, rs1_names[r]); | OR(0, imode(val), rd) => print(pr, "set %d, %s", val, RD); | cmp (rs1, roi) => print(pr, "cmp %s, %s", RS1, ROI); | ret() => print(pr, "ret"); | retl() => print(pr, "retl"); | jmp (addr) => print(pr, "jmp %s", ADDR); | calla (addr) => print(pr, "call %s", ADDR); | tst (rs2) => print(pr, "tst %s", RS2); | not (rd) => print(pr, "not %s", RD); | not2 (rs1, rd) => print(pr, "not %s, %s", RS1, RD); | neg (rd) => print(pr, "neg %s", RD); | neg2 (rs2, rd) => print(pr, "neg %s, %s", RS2, RD); | inc (val, rd) => print(pr, "inc %d, %s", val, RD); | inccc (val, rd) => print(pr, "inccc %d, %s", val, RD); | dec (val, rd) => print(pr, "dec %d, %s", val, RD); | deccc (val, rd) => print(pr, "deccc %d, %s", val, RD); | btst (roi, rs1) => print(pr, "btst %s, %s", ROI, RS1); | bset (roi, rd) => print(pr, "bset %s, %s", ROI, RD); | bclr (roi, rd) => print(pr, "bclr %s, %s", ROI, RD); | btog (roi, rd) => print(pr, "btog %s, %s", ROI, RD); | clr (rd) => print(pr, "clr %s", RD); | clrw (addr) => print(pr, "clr [%s]", ADDR); | clrb (addr) => print(pr, "clrb [%s]", ADDR); | clrh (addr) => print(pr, "clrh [%s]", ADDR); | mov (roi, rd) => print(pr, "mov %s, %s", ROI, RD); | restore_() => print(pr, "restore"); | save_() => print(pr, "save"); /* end of synthetics */ | load_greg(addr, rd) [name] => print(pr, "%s [%s], %s", name, ADDR, RD); | load_freg(addr, fd) [name] => print(pr, "%s [%s], %s", name, ADDR, FD); | load_creg(addr, cd) [name] => print(pr, "%s [%s], %s", name, ADDR, CD); | load_asi (addr, asi, rd) [name] => print(pr, "%s [%s]%d, %s", name, ADDR, asi, RD); | sto_greg(rd, addr) [name] => print(pr, "%s %s, [%s]", name, RD, ADDR); | sto_freg(fd, addr) [name] => print(pr, "%s %s, [%s]", name, FD, ADDR); | sto_creg(cd, addr) [name] => print(pr, "%s %s, [%s]", name, CD, ADDR); | sto_asi (rd, addr, asi) [name] => print(pr, "%s %s, [%s]%d", name, RD, ADDR, asi); | LDFSR(addr) [name] => print(pr, "%s [%s], %%fsr", name, ADDR); | LDCSR(addr) [name] => print(pr, "%s [%s], %%csr", name, ADDR); | STFSR(addr) [name] => print(pr, "%s %%fsr, [%s]", name, ADDR); | STCSR(addr) [name] => print(pr, "%s %%csr, [%s]", name, ADDR); | STDFQ(addr) [name] => print(pr, "%s %%fq, [%s]", name, ADDR); | STDCQ(addr) [name] => print(pr, "%s %%cq, [%s]", name, ADDR); /* don't bother with RDY, WRY, and friends */ | alu (rs1, roi, rd) [name] => print(pr, "%s %s, %s, %s", name, RS1, ROI, RD); | branch^a (tgt) [name] => print(pr, "%s %s", name, dis_rel(rel, tgt)); | call (tgt) => print(pr, "call %s", dis_rel(rel, tgt)); | float_2 (fs2, fd) [name] => print(pr, "%s %s, %s", name, FS2, FD); | float_3 (fs1, fs2, fd) [name] => print(pr, "%s %s, %s, %s", name, FS1, FS2, FD); | float_cmp (fs1, fs2) [name] => print(pr, "%s %s, %s", name, FS1, FS2); | JMPL (addr, rd) [name] => print(pr, "jmpl %s, %s", ADDR, RD); | RETT (addr) [name] => print(pr, "rett [%s]", ADDR); | trap (addr) [name] => print(pr, "%s [%s]", ADDR); | UNIMP (n) [name] => print(pr, "unimp 0x%x", n); | inst = n => print(pr, ".word 0x%08x", n); endmatch }
Definessparc_disassemble
(links are to index).