foogol: | back to bertnase | |
[ To cfoogol from Volume 42, Issue 88. ]
Subject: v08i088: A (vax) compiler for a tiny ALGOL-like language
Newsgroups: mod.sources
Approved: mirror!rs
Submitted by: seismo!enea!suadb!lindberg (Per Lindberg QZ)
Mod.sources: Volume 8, Issue 88
Archive-name: foogol
[ You'll have to write your own RTS, and link it in. See the
documentation... --r$ ]
#! /bin/sh
# This is a shell archive. Remove anything before this line,
# then unpack it by saving it in a file and typing "sh file".
# If all goes well, you will see the message "End of shell archive."
# Contents: Makefile foogol.doc foogol.c
# Wrapped by rs@mirror
PATH=/bin:/usr/bin:/usr/ucb; export PATH
echo shar: extracting "'Makefile'" '(53 characters)'
if test -f 'Makefile' ; then
echo shar: will not over-write existing file "'Makefile'"
else
sed 's/^X//' >Makefile <<'@//E*O*F Makefile//'
Xfoogol: foogol.c
X $(CC) $(CFLAGS) -o foogol foogol.c
@//E*O*F Makefile//
if test 53 -ne "`wc -c <'Makefile'`"; then
echo shar: error transmitting "'Makefile'" '(should have been 53 characters)'
fi
fi # end of overwriting check
echo shar: extracting "'foogol.doc'" '(4379 characters)'
if test -f 'foogol.doc' ; then
echo shar: will not over-write existing file "'foogol.doc'"
else
sed 's/^X//' >foogol.doc <<'@//E*O*F foogol.doc//'
Xfc.doc Last modified: 1986-12-15
X
X
X The FOOGOL-IV compiler
X relese notes and documentation
X Per Lindberg, QZ
X The mad programmer strikes again!
X
XNAME
X fc - foogol compiler
X
XSYNOPSIS
X fc [ -d ] infile [ outfile ]
X
XDESCRIPTION
X fc compiles a foogol program into VAX/UNIX assembly language.
X Default extentions are ".foo" for the source file and ".s"
X for the compiled file. In other words, the resulting outfile
X is is VAX/UNIX assembly language, and can be assebled and
X linked with the vanilla UNIX as and ld programs.
X
X Options: (There is only one switch so far...)
X
X -d Sets the debug option, which makes the compiler print
X out internal diagnostics. Useful for debugging and
X understanding the compiler.
X
X The foogol object code has to be linked with the RTS (Run-Time
X system) and the C library in order to be able to do I/O.
X Example:
X fc foo
X as foo.s -o foo.o
X ld /lib/crt0.o rts.o foo.o -o foo -lc
X Or (shorter):
X fc foo
X cc rts.o foo.s -o foo
X
X The RTS (Run-Time System) should be compiled before it is
X linked with the foogol object code. It consists of just three
X output functions written in C:
X
X PRS(s) char *s; { printf("%s",s); }
X
X PRN(i) int i; { printf("%d",i); }
X
X PR() { putchar('\n'); }
X
X The foogol language is basically a very small ALGOL. The
X current syntactic elements are:
X
X PROGRAM ::= begin
X [ DECLARATION ; ]
X STATEMENT [ ; STATEMENT ]...
X end
X
X DECLARATION ::= integer ID_SEQUENCE
X
X ID_SEQUENCE ::= IDENTIFIER [ , IDENTIFIER ]
X
X STATEMENT ::= IO_STATEMENT
X ! WHILE_STATEMENT
X ! COND_STATEMENT
X ! BLOCK
X ! ASSIGN_STATEMENT
X
X BLOCK ::= begin
X [ DECLARATION ]
X [ ; STATEMENT ]...
X end
X
X IO_STATEMENT ::= prints ( STRING )
X ! printn ( EXPRESSION )
X ! print
X
X COND_STATEMENT ::= if EXPRESSION then STATEMENT
X [ else STATEMENT ]
X
X WHILE_STATEMENT ::= while EXPRESSION do STATEMENT
X
X ASSIGN_STATEMENT::= IDENTIFIER := EXPRESSION
X
X EXPRESSION ::= EXPR1 [ RHS ]
X
X RHS ::= = EXPR1
X ! # EXPR1
X
X SIGNED_TERM ::= + TERM
X ! - TERM
X
X TERM ::= PRIMARY [ * PRIMARY ]...
X
X PRIMARY ::= IDENTIFIER
X ! NUMBER
X ! ( EXPRESSION )
X
X EXPR1 ::= TERM [ SIGNED_TERM ]...
X
X IDENTIFIER ::=
X
X NUMBER ::=
X
X STRING ::=
X
X Example program:
X
X begin
X integer n, div, sub, test, testcopy, found, max;
X test := 2; max := 10; /* number of primes wanted */
X while n # max do begin
X div:= test-1; found:= 0;
X while div-1 do begin
X testcopy:= test; sub:= 0;
X while testcopy do begin
X sub:= sub+1; if sub = div then sub:= 0;
X testcopy:= testcopy-1
X end;
X if sub = 0 then found:= 1;
X div:= div-1
X end;
X if found = 0 then begin
X n:= n+1;
X printn(test); prints(" is prime number "); printn(n); print
X end;
X test:= test+1
X end
X end
X
X The syntax is highly flexible, which means it might easily be
X changed due to some whim. The source code should be checked
X for details and changes before bugs are reported.
X
X The compiler is written by Per Lindberg, and placed in the
X public domain. The Hacker's Ethic applies. It is based on the
X VALGOL I compiler published by G.A. Edgar in Dr. Dobb's
X Journal May 1985. It was implemented for the purpouse of
X demonstrating how a simple compiler works. Therefore, there
X are no optimizations or other frills. You might want to add
X things to it; go right ahead. Happy hacking!
X
XFILES
X fc.c Source code for the foogol compiler
X fc The foogol compiler
X rts.c Source code for the Run-Time system
X rts.o The Run-Time system
X fc.doc This file
X bar.foo Your program...
X
XSEE ALSO
X as, ld, cc
X
XBUGS
X There are no scoping rules, all declared variables can be used
X throughout the entire program. And although you can have local
X declarations in blocks, these declarations are in fact global.
X So you can't redeclare a variable.
X
X Because parsing is by simple recursive-descent and backtracking,
X there is only one cheerful error message: "Syntax error". No
X hints on missing or superflous semicolons or such hand-holding.
X You're supposed to write correct programs in foogol, Buster!
X
X The output code is extremely naive, and very suitable for
X code optimization exercises.
X
X Finally, please remember that this is just a 500-line toy
X compiler, so don't expect too much of it.
@//E*O*F foogol.doc//
if test 4379 -ne "`wc -c <'foogol.doc'`"; then
echo shar: error transmitting "'foogol.doc'" '(should have been 4379 characters)'
fi
fi # end of overwriting check
echo shar: extracting "'foogol.c'" '(13095 characters)'
if test -f 'foogol.c' ; then
echo shar: will not over-write existing file "'foogol.c'"
else
sed 's/^X//' >foogol.c <<'@//E*O*F foogol.c//'
X/*---------------------------------------------------------------------*\
X! !
X! fc.c Compiler for FOOGOL IV -- version 4.2 Last change:1985-12-02 !
X! Translates FOOGOL IV into VAX/UNIX assembler !
X! !
X! Written by Per Lindberg, QZ, Box 27322, 10254 Stockholm, Sweden. !
X! !
X! This software is in the public domain. The Hacker Ethic applies. !
X! (A postcard from anyone who ports it would be appreciated.) !
X! !
X\*---------------------------------the-mad-programmer-strikes-again----*/
X
X#define UNIX
X
X#ifdef SARG10 /* Sargasso C (under TOPS10/20) peculiarities */
X #strings low
X #define _UNIXCON
X#endif
X
X#include
X
X#define isupper(c) ((c) >= 'A' && (c) <= 'Z')
X#define tolower(c) ((c) - 'A' + 'a')
X
X#define MAXTAB 25 /* Tweak these to your own liking */
X#define MAXTOKEN 80
X
X#define WHITESPACE 0 /* These could be turned into enum */
X#define NUMBER 1
X#define LETTER 2
X#define QUOTE 3
X#define SEMICOLON 4
X#define RANDOM 5
X
XFILE *inf, *outf;
X
Xint labelcount = 0,
X linecount = 0,
X debug = 0;
X
Xchar token[MAXTOKEN],
X pending[MAXTOKEN],
X keytab[MAXTAB][MAXTOKEN],
X symtab[MAXTAB][MAXTOKEN],
X *usage =
X#ifdef SARG10
X "usage: '.run fc- [-debug] infile [outfile]'";
X#endif
X#ifdef UNIX
X "usage: 'fc [-debug] infile [outfile]'";
X#endif
X
Xmain(argc,argv) int argc; char *argv[]; {
X if (argc < 2) error(usage);
X if (*argv[1] == '-') { debug = 1; --argc; ++argv; }
X if (argc < 2) error(usage);
X openinfile(argv[1]);
X openoutfile(argv[argc == 2 ? 1 : 2]);
X init();
X if (!PROGRAM()) error("Syntax error");
X fclose(inf);
X fclose(outf);
X}
X
Xchar *defaultext(fname,ext,force) char *fname, *ext; int force; {
X static char result[255];
X char c, *point, *s = result;
X strcpy(result,fname);
X while (*s) ++s;
X point = s;
X while (c = *s, s > result && c != '.') --s;
X if (c == '.') { /* some extention exists */
X point = s;
X if (!force) return result; /* don't worry about what it is */
X }
X strcpy(point,ext); /* put default extention after point */
X return result;
X}
X
Xopeninfile(fname) char *fname; {
X char *defaultext();
X d("openinfile",defaultext(fname,".foo",0),"");
X if ((inf = fopen(defaultext(fname,".foo",0),"r")) == NULL)
X error2("Can't open infile", defaultext(fname,".foo",0));
X}
X
Xopenoutfile(fname) char *fname; {
X char *defaultext();
X d("openoutfile",defaultext(fname,".s",1),"");
X if ((outf = fopen(defaultext(fname,".s",1),"w")) == NULL)
X error2("Can't open outfile", defaultext(fname,".s",1));
X}
X
Xinit() {
X int i;
X d("init","","");
X get2();
X gettoken();
X for (i = 0; i < MAXTAB; i++) keytab[i][0] = '\0';
X}
X
Xerror(msg) char *msg; {
X printf("\n\nFoo: %s", msg);
X if (linecount) printf(" at line %d",linecount + 1);
X printf("\n");
X exit(1);
X}
X
Xerror2(s1,s2) char *s1,*s2; {
X static char msg[80];
X sprintf(msg,"%s\"%s\"",s1,s2);
X error(msg);
X}
X
Xlowcase(s) char *s; {
X char c;
X for (c = *s; c = *s; ++s) if (isupper(c)) *s = tolower(c);
X}
X
X/* Basic I/O functions */
X
Xint out(line) char *line; {
X char c, symb[MAXTOKEN], *subst(), *s = symb;
X int printmode = 1, chmode = 1;
X while(c = *line++) {
X if (c == ' ') { if (chmode) putc('\t',outf);
X chmode = 0;
X } else {
X chmode = 1;
X if (c != 39) { if (printmode) putc(c,outf);
X else *s++ = c;
X } else if (!printmode) {
X *s = '\0';
X if (*symb) fprintf(outf,"%s",subst(symb));
X printmode = 1;
X } else {
X printmode = 0;
X s = symb;
X }
X }
X }
X putc('\n',outf);
X return 1;
X}
X
Xgettoken() {
X strcpy(token,pending); get2();
X if (!strcmp("/",token) && !strcmp("*",pending)) {
X d("comment:",token,pending);
X while (strcmp("*",token) || strcmp("/",pending)) {
X strcpy(token,pending); get2();
X d(" ",token,"");
X }
X strcpy(token,pending); get2();
X strcpy(token,pending); get2();
X }
Xd("gettoken returning",token,pending);
X}
X
Xget2() {
X int c0, c, typ, count = 1;
X char *p = pending;
X while((typ=type(c0=getc(inf))) == WHITESPACE) if (c0 == '\n') ++linecount;
X if (c0 != EOF) *p++ = c0;
X if (typ == QUOTE) {
X while ((c = getc(inf)) != EOF && type(c) != QUOTE) {
X if (++count == MAXTOKEN) error("String too long");
X *p++ = c;
X }
X *p++ = '"';
X }
X else {
X while ((type(c=getc(inf)) == typ
X || typ == LETTER && type(c) == NUMBER)
X && typ != RANDOM
X && c != EOF) {
X *p++ = c;
X typ = type(c);
X if (++count == MAXTOKEN) error("Too long input token");
X }
X ungetc(c,inf);
X }
X *p = '\0';
X}
X
Xint type(c) int c; {
X if (c == EOF) return -1;
X if (c >= '0' && c <= '9') return(NUMBER);
X if (c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z') return(LETTER);
X if (c == ' ' || c == '\t' || c == '\n') return(WHITESPACE); /* */
X if (c == '"') return (QUOTE);
X if (c == ';') return (SEMICOLON);
X return(RANDOM);
X}
X
X/* Basic input matching functions */
X
Xint match(s) char *s; {
Xd("match",token,s);
X lowcase(token);
X if (strcmp(s,token)) return 0;
X gettoken(); return 1;
X}
X
Xint id(name) char *name; {
X int t;
X char c, *p = token;
X d("id",token,name);
X if (type(*p++) != LETTER) return 0;
X while (c = *p++) {
X t = type(c);
X if (t != NUMBER && t != LETTER) return(0);
X }
X lowcase(token);
X enter(name,token);
X gettoken();
X return(1);
X}
X
Xint number(name) char *name; {
X char c, *p = token;
X d("number",token,name);
X while (c = *p++) if (type(c) != NUMBER) return(0);
X enter(name,token);
X gettoken();
X return(1);
X}
X
Xint string(name) char *name; {
X d("string",token,name);
X if (*token != '"') return 0;
X enter(name,token);
X gettoken();
X return 1;
X}
X
Xlabel(name) char *name; {
X char result[6];
X d("label ",name,"");
X sprintf(result,"L%d",labelcount++);
X enter(name,result);
X}
X
X/* Internal symbol table */
X
Xenter(key,val) char *key, *val; {
X int i;
X d("enter ",val,key);
X for (i = 0; i < MAXTAB; i++) {
X if (keytab[i][0] == '\0') {
X strcpy(keytab[i],key);
X strcpy(symtab[i],val);
X return;
X }
X }
X error2("INTERNAL SYMTAB ENTER ERROR, can't enter ", val);
X}
X
Xint lookup(key) char *key; {
X int i;
X for (i = MAXTAB-1; i >= 0 ; i--) {
X if (!strcmp(key,keytab[i])) {
X d("lookup ",symtab[i],key);
X return i;
X }
X }
X error2("INTERNAL SYMTAB LOOKUP ERROR, can't find ", key);
X}
X
Xchar *subst(key) char *key; {
X return symtab[lookup(key)];
X}
X
Xremove(key) char *key; {
X keytab[lookup(key)][0] = '\0';
X}
X
X/* Syntax definition. This is the neat part! */
X
Xint PROGRAM() { d("PROGRAM",token,pending);
X if (!match("begin")) return 0; out(" .text # # begin");
X out(" .align 1");
X out(" .globl _main");
X out("_main:");
X out(" .word 0");
X if (!OPT_DECLARATION()) return 0;
X if (!STATEMENT()) return 0;
X while (match(";"))
X if (!STATEMENT()) return 0;
X if (!match("end")) return 0; out(" ret # # end");
X return 1;
X}
X
Xint OPT_DECLARATION() { d("OPT_DECLARATION",token,pending);
X if (DECLARATION()
X && !match(";")) return 0;
X return 1;
X}
X
Xint DECLARATION() { d("DECLARATION",token,pending);
X if (!match("integer")) return 0; out(" .data 1 # integer");
X if (!ID_SEQUENCE()) return 0; out(" .text");
X return 1;
X}
X
Xint ID_SEQUENCE() { d("ID_SEQUENCE",token,pending);
X if (!IDENTIFIER()) return 0;
X while (match(","))
X if (!IDENTIFIER()) return 0;
X return 1;
X}
X
Xint IDENTIFIER() { d("IDENTIFIER",token,pending);
X if (!id("X")) return 0; out("'X': .long 0");
X remove("X");
X return 1;
X}
X
Xint STATEMENT() { d("STATEMENT",token,pending);
X return
X IO_STATEMENT()
X ||
X WHILE_STATEMENT()
X ||
X COND_STATEMENT()
X ||
X BLOCK()
X || /* the order is important here */
X ASSIGN_STATEMENT();
X}
X
Xint BLOCK() { d("BLOCK",token,pending);
X if (!match("begin")) return 0; out(" # # # begin");
X if (DECL_OR_ST())
X while(match(";"))
X if (!STATEMENT()) return 0;
X if (!match("end")) return 0; out(" # # # end");
X return 1;
X}
X
Xint DECL_OR_ST() { d("DECL_OR_ST",token,pending);
X return
X DECLARATION()
X ||
X STATEMENT();
X}
X
Xint IO_STATEMENT() { d("IO_STATEMENT",token,pending);
X return
X PRINTS_STATEMENT()
X ||
X PRINTN_STATEMENT()
X ||
X PRINT_STATEMENT();
X}
X
Xint PRINTS_STATEMENT() { d("PRINTS_STATEMENT",token,pending);
X if (!match("prints")) return 0;
X if (!match("(")) return 0;
X if (!string("S")) return 0; label("Ls");
X out(" .data 1 # prints");
X out("'Ls': .asciz 'S'");
X out(" .text");
X out(" pushal 'Ls'");
X out(" calls $1,_PRS");
X remove("S"); remove("Ls");
X if (!match(")")) return 0;
X return 1;
X}
X
Xint PRINTN_STATEMENT() { d("PRINTN_STATEMENT",token,pending);
X if (!match("printn")) return 0;
X if (!match("(")) return 0;
X if (!EXPRESSION()) return 0; out(" pushl r0 # printn");
X out(" calls $1,_PRN");
X if (!match(")")) return 0;
X return 1;
X}
X
Xint PRINT_STATEMENT() { d("PRINT_STATEMENT",token,pending);
X if (!match("print")) return 0; out(" calls $0,_PR # print");
X return 1;
X}
X
Xint COND_STATEMENT() { d("COND_STATEMENT",token,pending);
X if (!match("if")) return 0; label("Lt"); label("Le"); label("Lq");
X if (!EXPRESSION()) return 0; out(" tstl r0 # if");
X if (!match("then")) return 0; out(" bneq 'Lq' # then");
X out(" jmp 'Le'");
X out("'Lq':");
X if (!STATEMENT()) return 0; out(" jmp 'Lt'");
X out("'Le': # # # else");
X if (match("else"))
X if (!STATEMENT()) return 0; out("'Lt': # # # endif");
X remove("Lt");remove("Le");remove("Lq");
X return 1;
X}
X
Xint WHILE_STATEMENT() { d("WHILE_STATEMENT",token,pending);
X if (!match("while")) return 0; label("Lw"); label("Lx"); label("Lv");
X out("'Lw': # # # while");
X if (!EXPRESSION()) return 0; out(" tstl r0");
X if (!match("do")) return 0; out(" bneq 'Lv'");
X out(" jmp 'Lx'");
X out("'Lv': # # # do");
X if(!STATEMENT()) return 0; out(" jmp 'Lw'");
X out("'Lx': # # # endwhile");
X remove("Lw");remove("Lx");remove("Lv");
X return 1;
X}
X
Xint ASSIGN_STATEMENT() { d("ASSIGN_STATEMENT",token,pending);
X if (!id("Var")) return 0;
X if (!match(":")) return 0;
X if (!match("=")) return 0;
X if (!EXPRESSION()) return 0; out(" movl r0,'Var' # 'Var':=");
X remove("Var");
X return 1;
X}
X
Xint EXPRESSION() { d("EXPRESSION",token,pending);
X if (!EXPR1()) return 0;
X if (!OPT_RHS()) return 0;
X return 1;
X}
X
Xint OPT_RHS() { d("OPT_RHS",token,pending);
X return
X RHS_EQ()
X ||
X RHS_NEQ()
X ||
X 1;
X}
X
Xint RHS_EQ() { d("RHS_EQ",token,pending);
X if (!match("=")) return 0; label("L="); label("Ly");
X out(" pushl r0 # =");
X if (!EXPR1()) return 0; out(" cmpl (sp)+,r0");
X out(" beql 'L='");
X out(" movl $0,r0");
X out(" jmp 'Ly'");
X out("'L=': movl $1,r0");
X out("'Ly':");
X remove("L="); remove("Ly");
X return 1;
X}
X
Xint RHS_NEQ() { d("RHS_NEQ",token,pending);
X if (!match("#")) return 0; label("L#"); label("Lz");
X out(" pushl r0 # <>");
X if (!EXPR1()) return 0; out(" cmpl (sp)+,r0");
X out(" beql 'L#'");
X out(" movl $1,r0");
X out(" jmp 'Lz'");
X out("'L#': movl $0,r0");
X out("'Lz':");
X remove("L#"); remove("Lz");
X return 1;
X}
X
Xint SIGNED_TERM() { d("SIGNED_TERM",token,pending);
X return
X PLUS_TERM()
X ||
X MINUS_TERM();
X}
X
Xint PLUS_TERM() { d("PLUS_TERM",token,pending);
X if (!match("+")) return 0; out(" pushl r0 # +term");
X if (!TERM()) return 0; out(" addl2 (sp)+,r0");
X return 1;
X}
X
Xint MINUS_TERM() { d("MINUS_TERM",token,pending);
X if (!match("-")) return 0; out(" pushl r0 # -term");
X if (!TERM()) return 0; out(" subl3 r0,(sp)+,r0");
X return 1;
X}
X
Xint TERM() { d("TERM",token,pending);
X if (!PRIMARY()) return 0;
X while (match("*")) { out(" pushl r0 # *");
X if (!PRIMARY()) return 0; out(" mull2 (sp)+,r0");
X }
X return 1;
X}
X
Xint PRIMARY() { d("PRIMARY",token,pending);
X if (id("Z")) { out(" movl 'Z',r0");
X remove("Z");
X return 1;
X }
X if (number("Z")) { out(" movl $'Z',r0");
X remove("Z");
X return 1;
X }
X if (match("(")) {
X if (!EXPRESSION()) return 0;
X if (!match(")")) return 0;
X return 1;
X }
X return 0;
X}
X
Xint EXPR1() { d("EXPR1",token,pending);
X if (!TERM()) return 0;
X while(SIGNED_TERM());
X return 1;
X}
X
X/* And finally, the debug function... */
X
Xint d(s1,s2,s3) char *s1,*s2,*s3; {
X if (debug) {
X printf("%s",s1);
X if (*s2) printf(" \"%s\"",s2);
X if (*s3) printf(" \"%s\"",s3);
X putchar('\n');
X }
X return 1;
X}
@//E*O*F foogol.c//
if test 13095 -ne "`wc -c <'foogol.c'`"; then
echo shar: error transmitting "'foogol.c'" '(should have been 13095 characters)'
fi
fi # end of overwriting check
echo shar: "End of shell archive."
exit 0