Main program

<version string>= (U->)
"0.5"
<*>= [D->]
link pushtrace
<*>+= [<-D->]
procedure usage() 
    every write(&errout, ![
        "Usage: tools [options] [specfile ...]",
        "Options:",
        <usage lines>
        "`-' may be used in place of a file name to mean stdin or stdout"])
    stop()
end
Defines usage (links are to index).

<cases for arguments>= (U->) [D->]
"-alt"        : alt := &output
<refman: options>= [D->]
\tkoption{d}{alt}{Show alternates at decision-tree leaves}
Prints something to do with alternatives at the leaves of decision
trees?
I'm no longer sure\ldots
\endtkoption
<usage lines>= (<-U) [D->]
"    -ascii-dag file   write an ascii representation of matching-statement code",
<cases for arguments>+= (U->) [<-D->]
"-ascii-dag" : ascii_dag   := openfile(get(args),"w")
<refman: options>+= [<-D->]
\tkoption{d}{ascii-dag \file}
   {write an ascii representation of decision DAGs used in matching statements}
Writes on {\file} an ASCII representation of the decision trees
(actually DAGs) used
to implement matching statements.
Useful for debugging, especially performance debugging.
\warning{Probably broken.  In any case, it should be rewritten to use
the new prettyprinter.}
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -ascii-tree file  write an ascii representation of matchin-statement trees",
<cases for arguments>+= (U->) [<-D->]
"-ascii-tree" : ascii_tree := openfile(get(args),"w")
<refman: options>+= [<-D->]
\tkoption{d}{ascii-tree \file}
   {write an ascii representation of decision trees used to build matching DAGs}
Writes on {\file} an ASCII representation of the decision trees before
they are converted to DAGs.
This option is really here only in case something goes badly wrong
with the dagging process (or in case you're curious).
\warning{This, too, is probably broken.}
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -asm-encoder file generate assembly-emitting encoding procedures on file.[ch]",
<cases for arguments>+= (U->) [<-D->]
"-asm-encoder" : asmencoderfilename  := get(args)
<refman: options>+= [<-D->]
\tkoption{E}{asm-encoder {\file}}{generate assembly-emitting encoding procedures on \file}
Like {\tt -encoder}, but the generated encoding procedures use
{\tt asmprintf}, {\tt asmprintfd}, and {\tt asmprintreloc} to emit
assembly language instead of binary.
Puts the implementation into {\tt\file.c} and puts suitable
declarations into {\tt\file.h}.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -asm-grammar file generate assembly-language grammar on file",
<cases for arguments>+= (U->) [<-D->]
"-asm-grammar" : asmgrammarfilename  := get(args)
<refman: options>+= [<-D->]
\tkoption{?}{asm-grammar {\file}}{generate assembly-language grammar on \file}
Writes an EBNF grammar to the specified \file.
Semantic actions call encoding procedures either direct or indirect
according to whether the \opt{indirect} option is used.
In the current version of the toolkit, the grammar written is not the
input of any known parser generator---it is for illustrative purposes only.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -byteorder O      use order O (one of [blm]) to emit words",
<cases for arguments>+= (U->) [<-D->]
"-byteorder"  : emittername := "emit" || get(args)
<refman: options>+= [<-D->]
\tkoption{E}{byteorder \optarg{string}}{choose emission procedure}
This misleadingly named option actually determines the name of the
function that the encoding procedures use to emit tokens 
(\texttt{emit}\optarg{string}).
Since the toolkit library provides \texttt{emitb}, \texttt{emitl}, and
\texttt{emitm} procedures, which emit tokens using big-endian,
little-endian, or machine-dependent byte orders, the values
\texttt{b}, \texttt{l}, and \texttt{m} are good choices.
My favorite trick, however, is to use \texttt{-byteorder X}, then use
the C compiler to pick a byte order later on, for example, by
compiling with {\tt cc -DemitX=emitfast}.
This trick avoids running the generator multiple times, which is good,
because running the generator can be expensive.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -c                generate C code [default]",
<cases for arguments>+= (U->) [<-D->]
"-c" : { Generate_C(); interface_extension := ".h"; implementation_extension := ".c" }
<refman: options>+= [<-D->]
\tkoption{DE}{c}{generate C code}%
Tells the generator to generate C~code (the only option),
or tells the translator that the code emitted for the {\tt -decoder}
file should be C code.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -checker file    generate a checker on file",
<cases for arguments>+= (U->) [<-D->]
"-checker"    :  { checkerfilename := get(args) }
<refman: options>+= [<-D->]
\tkoption{A}{checker \file}{generate a checker on \file}%
Writes the source for a checker in {\file}.
For each instruction in a specification,
the checker program emits assembly code and binary code in ASCII
into an assembly file. The resulting file is then assembled
and disassembled. 
The disassembled output should contain pairs of matching instructions.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -count            count invocations of encoding procedures",
<cases for arguments>+= (U->) [<-D->]
"-count"      : gen_counters := 1
<refman: options>+= [<-D->]
\tkoption{E}{count}{Count invocations of encoding procedures}
Tells the generator to include code in encoding procedures that
counts the invocations of such procedures.
This is useful for determining the static frequency of an instruction
in a program, e.g., how many loads are generated.
<refman: counter functions>
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -debug-bal file   extra debugging info for balancer",
<cases for arguments>+= (U->) [<-D->]
"-debug-bal" : baldebug := openfile(get(args),"w")
<refman: options>+= [<-D->]
\tkoption{d}{debug-bal \file}
  {write extra debugging information about the balancer}
This option causes more information about the operation of the balancer to
be written to \file, as an aid to debugging unsolvable sets of equations.
\endtkoption
<*>+= [<-D->]
global baldebug
Defines baldebug (links are to index).

<usage lines>+= (<-U) [<-D->]
"    -debug-checker file  write debugging info about checker",
<cases for arguments>+= (U->) [<-D->]
"-debug-checker" : cdebug := openfile(get(args),"w")
<refman: options>+= [<-D->]
\tkoption{d}{debug-checker \file}
  {write information about generating checker inputs}
To produce a checker program, the toolkit attempts to generate
test inputs that will satisfy every branch of every constructor. 
This option writes information about choosing test values onto \file.
\endtkoption
<usage lines>+= (<-U) [<-D->]
"    -debug-heur file  write debugging info about heuristic scores",
<cases for arguments>+= (U->) [<-D->]
"-debug-heur" : hdebug := openfile(get(args),"w")
<refman: options>+= [<-D->]
\tkoption{d}{debug-heur \file}
  {write information about heuristic scores for tree-building}
When building the decision tree, the toolkit uses about a half dozen heuristics 
to choose the field to test next.
Each heuristic assigns a score to each field.
This option writes information about heuristic scoring onto \file.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -debug-match file write debugging info about matching statements",
<cases for arguments>+= (U->) [<-D->]
"-debug-match" : mdebug := openfile(get(args),"w")
<refman: options>+= [<-D->]
\tkoption{d}{debug-match \file}
  {write other information about building matching statements}
During tree building, writes information about building matching statements onto \file.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -debug-solver     turn on solver debugging",
<cases for arguments>+= (U->) [<-D->]
"-debug-solver" : debug_on()
<refman: options>+= [<-D->]
\tkoption{V}{debug-solver}{turn on solver debugging (to {\tt stderr})}
Makes the equation solver (and a few other parts of the toolkit) print
lots of debugging information on standard error.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -debug-split file write debugging info when splitting tree nodes",
<cases for arguments>+= (U->) [<-D->]
"-debug-split" : sdebug := openfile(get(args),"w")
<refman: options>+= [<-D->]
\tkoption{d}{debug-split \file}{write debugging info when splitting tree nodes}
During tree building, writes information about splitting internal nodes onto \file.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -decoder file     from matcher, generate decoder on file",
<cases for arguments>+= (U->) [<-D->]
"-decoder" : decoderout := openfile(get(args),"w")
<refman: options>+= [<-D->]
\tkoption{D}{decoder {\file}}
    {from info in \texttt{-matcher} file, generate decoder on \file}
<refman: doc for -matcher and -decoder>
\endtkoption
<refman: doc for -matcher and -decoder>= (<-U U->)
Invoke the toolkit translator, which translates embedding matching
statements from the {\tt -matcher} file and writes the results on the
{\tt -decoder} file.
To use the translator, both {\tt -matcher} and {\tt -decoder} must be
given exactly once.

<usage lines>+= (<-U) [<-D->]
"    -dis file         generate disassembly code on file",
<cases for arguments>+= (U->) [<-D->]
"-dis"        : disassemblyfilename  := get(args)
<refman: options>+= [<-D->]
\tkoption{D}{disr {\file}}{generate disassembly code on \file}
Generate code for disassembly on \file.
In flux right now.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -dot file         write dot(1) commands showing matching statements",
<cases for arguments>+= (U->) [<-D->]
"-dot" : dotfile   := openfile(get(args),"w")
<refman: options>+= [<-D->]
\tkoption{d}{dot {\file}}{write {\tt dot} commands showing matching statements}
Creates output for the {\tt dot} graph-drawing program, which can be
used to create a graphical representation of the decision tree
(actually a DAG) used to implement matching statements.
The output is written to {\file}.
\warning{This option probably doesn't work.
It certainly doesn't understand multiple branches with identical
patterns but different conditions.}
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -emit-bits n      unit of emission is n bits (default 8)",
<cases for arguments>+= (U->) [<-D->]
"-emit-bits"   : emit_unit_bits := integer(get(args)) | stop("-emitbits not integer")
<refman: options>+= [<-D->]
\tkoption{E}{emit-bits \optarg{n}}
  {unit of emission is \optarg{n} bits}
The toolkit's generator works with bits, but it is more convenient for
the toolkit's library to work with larger units.
In particular, the second argument to the {\tt emit} functions tells
the library how wide the emitted token is, in units of \optarg{n} bits.
The default is~8, which means that the {\tt emit} functions work with
bytes, since these are how the library distributed with the toolkit
works.
The width of every emitted token must be a multiple of~\optarg{n} or the
generator will complain.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -encoder file     generate encoding procedures on file.[ch]",
<cases for arguments>+= (U->) [<-D->]
"-encoder" : encoderfilename  := get(args)
<refman: options>+= [<-D->]
\tkoption{E}{encoder {\file}}{generate encoding procedures on {\tt\file.[ch]}}
Invokes the generator, which writes encoding procedures into
{\tt\file.c} and puts suitable
declarations into {\tt\file.h}.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -expand-spec file writes an expanded version of the input specification",
<cases for arguments>+= (U->) [<-D->]
"-expand-spec" : spec  := openfile(get(args),"w")
<refman: options>+= [<-D->]
\tkoption{A}{expand-spec {\file}}{write expanded specification on \file}
Rewrites the specification onto {\file}, but with all patterns in
normal form.  Can be useful for seeing the meanings of complex
opcodes.
\warning{
This option predates the introduction of constructors, which it does
not treat, and it may break horribly.  
Pester us if you want it brought up to date, so that it emits a
complete specification in some canonical form.}
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -fieldnames file  write arrays of field names on file.[ch]",
<cases for arguments>+= (U->) [<-D->]
"-fieldnames" : fieldnamesbase := get(args)
<refman: options>+= [<-D->]
\tkoption{A}{fieldnames \file}
  {write arrays of field names on \file}
For each field in the specification, write to {\tt\file.c} an array of string
literals representing the names of values of that field.
Write declarations to {\tt\file.h}.
Such arrays may be useful in writing disassemblers.
(Will use extensions {\tt.i3} and {\tt.m3} if {\tt-m3} is given.)
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -foldemit         fold constants in calls to emit functions",
<cases for arguments>+= (U->) [<-D->]
"-foldemit"   : simplify_emits := 1
<refman: options>+= [<-D->]
\tkoption{E}{foldemit}{fold constants in calls to emit procedure}
Normally, the toolkit refrains from folding constants when generating
the code that emits a token.
This behavior makes it easy to examine the generated code to
double-check that it really looks right.
The {\tt -foldemit} option tells the toolkit to fold constants aggressively.
Its only real use is to make the generated code smaller, because 
modern compilers fold constants themselves, don't they?
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -icon             generate Icon code (for wizards only)",
<cases for arguments>+= (U->) [<-D->]
"-icon"         : { Generate_Icon(); 
                  interface_extension := ".bogus.icn"; implementation_extension := ".icn" }
<refman: options>+= [<-D->]
\tkoption{D}{icon}{generate Icon code}
Tells the translator to attempt to generate encoding procedures
written in Icon.  Not really supported.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -impossible       force errors to stop with stack trace",
<cases for arguments>+= (U->) [<-D->]
"-impossible" : error := stop := impossible
<refman: options>+= [<-D->]
\tkoption{V}{impossible}{make errors force stop with stack trace}
Arranges for all error messages to be treated as ``impossible,'' which 
causes ``this can't happen'' to be printed and a stack trace to be
generated.
It is intended primarily for debugging the toolkit.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -indirect name[:type] generated encoding procs called indirect through name",
<cases for arguments>+= (U->) [<-D->]
"-indirect"   : get(args) ? 
                  { indirectname := tab(upto(':') | 0)
                    indirecttype := if =":" then tab(0) else indirectname
                  }
<refman: options>+= [<-D->]
\setbox0=\hbox{\optarg{name}\optional{\tt:\optarg{type}}}%
\tkoption{E}{indirect \box0}{generated encoding procs called indirect through \optarg{name}}
Normally, the toolkit generates encoding procedures that are externally visible,
that is, they occupy top-level name space.
The {\tt -indirect} option tells the toolkit to make the procedures local to a 
compilation unit and to provide indirect access to them through a structure
holding procedure pointers.
\optarg{name} gives the name of the structure, \optarg{type} its tag.
(If \optarg{type} is omitted, we reuse \optarg{name}.)
Useful for avoiding name clashes or for having multiple sets of encoding
procedures in a single application.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -late-const       include constants in closure functions (default)",
<cases for arguments>+= (U->) [<-D->]
"-late-const" : lateconst := 1
<refman: options>+= [<-D->]
\tkoption{E}{late-const}{use constants in closure functions (default)}
A heuristic that attempts to make closures smaller at the risk of
creating more closure functions, by putting literal constants in the
function bodies instead of the closures.  
This is the default.
\endtkoption
<initialize globals>= (U->) [D->]
lateconst := 1

<usage lines>+= (<-U) [<-D->]
"    -late-none        include *no* constants in closure functions",
<cases for arguments>+= (U->) [<-D->]
"-late-none" : lateconst := latezero := &null
<refman: options>+= [<-D->]
\tkoption{E}{late-none}{include no constants in closure functions}
Do not apply either the \tkopt{late-const} or \tkopt{late-zero}
heuristics. 
 Makes closures larger but may create fewer closure functions.
You probably don't want this.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -late-zero        include zero in closure functions",
<cases for arguments>+= (U->) [<-D->]
"-late-zero" : latezero := 1
<refman: options>+= [<-D->]
\tkoption{E}{late-zero}{use zeroes in closure functions}
A heuristic that attempts to make closures smaller at the risk of
creating more closure functions, by putting literal zeroes in the
function bodies instead of the closures.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -lc-cons-names    use all lower case for constructor names",
<cases for arguments>+= (U->) [<-D->]
"-lc-cons-names" : lowercons := 1
<refman: options>+= [<-D->]
\tkoption{E}{lc-cons-names}{use all lower case for constructor names}
Ensures that only lower-case letters are used in the names of
generated encoding procedures.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -lc-pat-names     map pattern names to lower case in code",
<cases for arguments>+= (U->) [<-D->]
"-lc-pat-names" : lc_pat_names := 1
<refman: options>+= [<-D->]
\tkoption{D}{lc-pat-names}{force pattern names to lower case}
Pattern names are accessible using the \lit[\term{name}\lit] syntax in matching
statements; this option forces all such names to use lower-case
letters only.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -ledger           tell dot(1) to use ledger paper with -dot",
<cases for arguments>+= (U->) [<-D->]
"-ledger"     : {ledger := 1; pagesize := "11,17"}
<refman: options>+= [<-D->]
\tkoption{d}{ledger}{Use ledger paper with {\tt -dot}}
Insert a PostScript incantation that might make the {\tt -dot} option
use ledger paper instead of regular paper.
\warning{Almost certainly broken.}
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -matcher file     transform pattern-matching statements in file",
<cases for arguments>+= (U->) [<-D->]
"-matcher"    : matcher := openfile(get(args),"r")
<refman: options>+= [<-D->]
\tkoption{D}{matcher \file}
  {transform pattern-matching statements in \file}
<refman: doc for -matcher and -decoder>
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -max-decimal n    values larger than n will be printed in hex",
<cases for arguments>+= (U->) [<-D->]
"-max-decimal" : max_decimal := 0 <= get(args) | 
                   error("-max-decimal value must be nonnegative integer")
<refman: options>+= [<-D->]
\tkoption{DE}{max-decimal \optarg{n}}
        {values larger than \optarg{n} will be printed in hex}
When generating code, 
the toolkit will use hexadecimal notation for integer literals larger than 
\optarg{n}.
For smaller literals, it uses decimal notation.
\optarg{n} must be nonnegative, so forcibly all negative integer literals are written
in decimal notation.
Default value is $2^{31}-1$.
\endtkoption
<initialize globals>+= (U->) [<-D->]
max_decimal := 2^31 - 1
<*>+= [<-D->]
global max_decimal
Defines max_decimal (links are to index).

<usage lines>+= (<-U) [<-D->]
"    -m3               generate Modula-3 code",
<cases for arguments>+= (U->) [<-D->]
"-m3"         : { Generate_M3(); 
                  interface_extension := ".i3"; implementation_extension := ".m3" }
<refman: options>+= [<-D->]
\tkoption{D}{m3}{generate Modula-3 code}
Tells the translator that the code emitted for the {\tt -decoder}
file should be Modula-3 code.
(The toolkit generator and library do not yet support Modula-3.)
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -no-asm-ws        remove whitespace from assembly syntax",
<cases for arguments>+= (U->) [<-D->]
"-no-asm-ws" : no_asm_ws := 1
<refman: options>+= [<-D->]
\tkoption{E}{no-asm-ws}{remove whitespace from assembly syntax}
Removes all whitespace from between operands in assembly-language syntax.
This might be a cheap way to get compatibility with other tools.
\endtkoption
<*>+= [<-D->]
global no_asm_ws
Defines no_asm_ws (links are to index).

<*>+= [<-D->]
global no_asm_ws
Defines no_asm_ws (links are to index).

<usage lines>+= (<-U) [<-D->]
"    -nowarn           no warnings",
<cases for arguments>+= (U->) [<-D->]
"-nowarn" : nowarn := 1
<refman: options>+= [<-D->]
\tkoption{V}{nowarn}{suppress warnings (not recommended)}
Suppresses warnings.
It's not a good idea to use this option routinely.
If a warning annoys you, let us know---maybe we can make it appear only when 
{\tt-verbose}.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -old-closures     use the old closure technique",
<cases for arguments>+= (U->) [<-D->]
"-old-closures" : emit_closure_functions := emit_original_closure_functions
<refman: options>+= [<-D->]
\tkoption{?}{old-closures}{use the old closure technique (not recommended)}
Uses the old, fast technique that produces way too many closure functions.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -postfix          use postfix assembly-language syntax",
<cases for arguments>+= (U->) [<-D->]
"-postfix" : postfix := 1
<refman: options>+= [<-D->]
\tkoption{ED}{postfix}{use postfix assembly-language syntax}
For disassembly or for assembly-language encoding, put opcode
\emph{after} operands.
\endtkoption
<*>+= [<-D->]
global postfix
Defines postfix (links are to index).

<usage lines>+= (<-U) [<-D->]
"    -token-closures  use one closure per token",
<cases for arguments>+= (U->) [<-D->]
"-token-closures" : emit_closure_functions := emit_tokenized_closure_functions
<refman: options>+= [<-D->]
\tkoption{?}{token-closures}{use one closure per token (recommended)}
Emit one closure function per token instead of one per instruction,
which should further reduce counts of closure functions.
\endtkoption

<usage lines>+= (<-U) [<-D->]
"    -test    pay no attention to the man behind the curtain",
<cases for arguments>+= (U->) [<-D->]
"-test" : {
write("Uncommented ", Cuncomment("/* foo at 0 */"))
######    pp := PPnew(&output)
######    t := constype("Outer", set(), 1, 1)
######    t2 := constype("Inner", set(), 1, 1)
######    c := constructor("mkOut", "mkOut", ["in"], t, [epsilon()], [table()], 1)
######    c2 := constructor("mkIn", "mkIn", ["fish"], t2, [epsilon()], [table()], 1)
######    t.members := [c]
######    t2.members := [c2]
######    b := Ebinding_instance("xxx", t, table())
######  y2 := Einstance_tagged(Einstance_input(b, c, "in"), c2, 99)
######    PPxwrite(pp, ppexpimage(y2), " =>$t$c ", "???", "$b")
######    pushtrace("TEST")
######    x2 := eliminate_instances_f(y2)
######  #  x1 := eliminate_instances_f(y1 := Einstance_input(Einstance_input(b, c, "in"), c2, "fish"))
######    poptrace()
######  #  PPxwrite(pp, ppexpimage(y1), " =>$t$c ", ppexpimage(x1), "$b")
######    PPxwrite(pp, ppexpimage(y2), " =>$t$c ", ppexpimage(x2), "$b")
}

<usage lines>+= (<-U) [<-D->]
"    -tryall           try all heuristics on every field, node (for debugging)",
<cases for arguments>+= (U->) [<-D->]
"-tryall"     : tryall := 1
<refman: options>+= [<-D->]
\tkoption{d}{tryall}{try all heuristics on every field in tree}
Show what split would be given by each heuristic.
Useful only with the option {\tt -debug-heur}.
\endtkoption

<usage lines>+= (<-U) [<-D]
"    -verbose          extra warnings & informatory messages",
<cases for arguments>+= (U->) [<-D]
"-verbose"    : verbose := info_msg
<refman: options>+= [<-D]
\tkoption{V}{verbose}{print extra warnings and information}
Make the toolkit print extra warnings and informational messages.
We've tried to include enough stuff so that you can watch the toolkit's 
progress through a big specification, even on a slow computer.
\endtkoption
<*>+= [<-D->]
procedure info_msg(L[])
  write ! ([&errout, image(filename), ", line ", lineno, ": Info -- "] ||| L)
end
Defines info_msg (links are to index).

<*>+= [<-D->]
global cdebug, mdebug, sdebug, hdebug, tryall, indirectname, indirecttype
global lc_pat_names, nowarn, emptyset, emittername, pretty
global lowercons, pc_unit_bits, emit_unit_bits, simplify_emits, gen_counters
global fieldnamesbase, verbose, version
global interface_extension, implementation_extension
global emit_closure_functions, latezero, lateconst
global asmgrammarfilename, disassemblyfilename
Defines asmgrammarfilename, cdebug, disassemblyfilename, emit_closure_functions, emittername, emit_unit_bits, emptyset, fieldnamesbase, gen_counters, hdebug, implementation_extension, indirectname, indirecttype, interface_extension, lateconst, latezero, lc_pat_names, lowercons, mdebug, nowarn, pc_unit_bits, pretty, sdebug, simplify_emits, tryall, verbose, version (links are to index).

<initialize globals>+= (U->) [<-D->]
dotfile := ascii_dag := spec := ascii_tree := decoderout :=
  fulltree := sdebug := hdebug := encoderfilename := asmencoderfilename := 
  ledger := tryall := asmgrammarfilename := disassemblyfilename
  lc_pat_names := alt := matcher := gen_counters := lowercons := 
  &null 
<initialize globals>+= (U->) [<-D->]
emittername := "emitm"
emptyset := set()
verbose := nop
filename := "-"
pagesize := "8.5,11"
fetchtab := table()
pc_unit_bits := 8
emit_unit_bits := 8
fresh_variables := set()
pretty := prettyC
interface_extension := ".h"
implementation_extension := ".c"
debug := debugs := nop 
emit_closure_functions := emit_optimized_closure_functions

It's a hack, but I use the global variable emitterstyle to determine whether code for Semit and Stoken should use a direct call to the emitter named by emittername, or whether it should use a style appropriate to a closure.

<*>+= [<-D->]
global emitterstyle # closure or direct, controls emission
Defines emitterstyle (links are to index).

<initialize globals>+= (U->) [<-D->]
emitterstyle := "direct"

Used to make sure the program counter is unique, so that pc - pc is zero always.

<*>+= [<-D->]
global the_global_pc
Defines the_global_pc (links are to index).

<initialize globals>+= (U->) [<-D]
the_global_pc := Epc()

I have to split up main, otherwise I get too many definitions which leads to a line more than 3000 characters wide, and TeX dies.

<*>+= [<-D->]
global command_line, header_lines
record Gcomment(s)

procedure main(args)
    <initialize globals>
    version := <version string>

    command_line := "tools"
    every command_line ||:= " " || !args
    header_lines := [
      Gcomment("Generated by the New Jersey Machine-Code Toolkit, version " ||version),
      Gcomment("command line: " || command_line)]

    Generate_C()        # default
    while args[1][1:2] == "-" & *args[1] > 1 do
        case a := get(args) of {
            <cases for arguments>
            default    : usage()
       }

    verbose("NJ Machine-Code Toolkit, version ", version)

    init_parser()
    init_tests()        # see code.nw

    if /quiet then write(&errout, "Parsing...")
    if *args > 0 then
        every consume(openfile(filename := !args,"r"))
    else
        consume(&input)

    if /quiet then write(&errout, "Done parsing.")

    if \no_asm_ws then every strip_asm_whitespace(kept_constructors())
    emit_encoding(\encoderfilename)
    emit_assembly(\asmencoderfilename)
    emit_asm_grammar(\asmgrammarfilename)
    emit_disassembler(\disassemblyfilename)
    emit_fieldnames(\fieldnamesbase)
    emit_checker(\checkerfilename)
    if \ledger then { <write PostScript for ledger paper> }

    outspec(\spec)
    if \matcher then {<handle matching statements>}
end
Defines command_line, Gcomment, header_lines, main (links are to index).

<handle matching statements>= (<-U)
codelex(matcher)
P_CodeFile()
<if tokens are left over, complain>
x := super_simplify(genheader(codeheader))
PPxwrite(PPnew(\decoderout), pretty(x))
every resolve_case_patterns(!matching_stmts)
every t := tree(cs := !matching_stmts) do { <manipulate decision tree t> }
<*>+= [<-D->]
global showpnf
Defines showpnf (links are to index).

<*>+= [<-D->]
procedure consume(input)
    lex(input)
    P_Spec()
    <if tokens are left over, complain>
    return
end
Defines consume (links are to index).

<if tokens are left over, complain>= (<-U <-U)
while (token ~== EOF) do {
    error("Leftover token ", token, " = ", image(tval))
    lex()
}
<write PostScript for ledger paper>= (<-U)
write(\dotfile,"%!   PostScript")
write(\dotfile,"/setpapertraybyname {")
write(\dotfile,"  { /trayname exch def statusdict trayname known {")
write(\dotfile,"    statusdict trayname get exec } if} stopped")
write(\dotfile,"    { handleerror } if}bind def")
write(\dotfile,"/ledgertray setpapertraybyname")
<manipulate decision tree t>= (<-U)
checktree(t, cs)
outtree(\ascii_tree, t)
if \dotfile | \ascii_dag | \decoderout | \alt then {
    d := tree2dag(t)
    outtree(\ascii_dag, d)
    dotouttree(\dotfile, d)
    PPxwrite(PPnew(\decoderout), pretty(super_simplify(gencase(cs, d))))
    printalternates(\alt, d)
    write(\alt,"\n", repl("=", 50))
}
<*>+= [<-D->]
record alternator(constraints, thennode, elsenode)

procedure alternates(n)
    local alt
    if *n.children = 2 then {
        every i := 1 to 2 do
            if alt := alternates(n.children[i].node) &
               n.children[3-i].node === alt.elsenode then
                   suspend alternator(alt.constraints|||
                       [constraint(n.field, n.children[i].lo, n.children[i].hi)],
                       alt.thennode, alt.elsenode)
        every i := 1 to 2 do
            suspend alternator(
                       [constraint(n.field, n.children[i].lo, n.children[i].hi)],
                       n.children[i].node, n.children[3-i].node)
    }
end
Defines alternates, alternator (links are to index).

<*>+= [<-D->]
procedure printalternates(file, n)
    local alt
    if alt := alternates(n) then {
        write(file)
        writes(file, "if (")
        l := copy(alt.constraints)
        while genconstraint(file, get(l)) do writes(file, " && ")
        write(file, "1) {")
        writes(file, "    /*")
        writes(file, \alt.thennode.cs.arms[1].original.pattern.name | 
                        "[" || alt.thennode.cs.arms[1].original.line || "]")
        write(file, " */")
        write(file, "} else {")
        writes(file, "    /*")
        writes(file, \alt.elsenode.cs.arms[1].original.pattern.name | 
                        "[" || alt.elsenode.cs.arms[1].original.line || "]")
        write(file, " */")
        write(file, "} else assert(0);")
    } else
        every printalternates(file, (!n.children).node)
end    
Defines printalternates (links are to index).

<*>+= [<-D->]
procedure genconstraint(file, c)
    r := sort(c.lo ++ c.hi)
    writes(file, "(", get(r), "<=", c.field.name, "<", get(r))
    while writes(file, " || ", get(r), "<=", c.field.name, "<", get(r))
    writes(file, ")")
    return
end
Defines genconstraint (links are to index).

openfile makes it possible to open standard input or output using the file name ``-.''

<*>+= [<-D]
procedure openfile(name, mode) 
    /mode := "r"
    if name == "-" then
       case mode of {
          "r" : return &input
          "w" : return &output
          default : stop("bogus file mode: ", mode)
       }
    else 
      return open(name, mode) | stop("Can't open file ", name, " for ", mode)
end
Defines openfile (links are to index).