第14章 TinyC 前端 - 14.1 第 0.1 版 - 《自己动手写编译器》

14.1 第 0.1 版

14.1 第 0.1 版

首先对上一章的雏形版本稍微升级一下，增加变量声明和 print 语句，一共有 5 个文件：

词法分析文件： scanner.l

%{
#define YYSTYPE char *
#include "y.tab.h"
int cur_line = 1;
void yyerror(const char *msg);
void unrecognized_char(char c);
#define _DUPTEXT {yylval = strdup(yytext);}
%}
 
/* note \042 is '"' */
 
OPERATOR        ([-/+*()=,;])
INTEGER         ([0-9]+)
STRING          (\042[^\042\n]*\042)
IDENTIFIER      ([_a-zA-Z][_a-zA-Z0-9]*)
WHITESPACE      ([ \t]*)
 
%%
{OPERATOR}      { return yytext[0]; }
"int"           { return T_Int; }
"print"         { return T_Print; }
 
{INTEGER}       { _DUPTEXT; return T_IntConstant; }
{STRING}        { _DUPTEXT; return T_StringConstant; }
{IDENTIFIER}    { _DUPTEXT; return T_Identifier; }
 
{WHITESPACE}    { /* ignore every whitespace */ }
\n              { cur_line++; }
.               { unrecognized_char(yytext[0]); }
%%
 
int yywrap(void) { 
    return 1;
}
 
void unrecognized_char(char c) {
    char buf[32] = "Unrecognized character: ?";
    buf[24] = c;
    yyerror(buf);
}
 
void yyerror(const char *msg) {
    printf("Error at line %d:\n\t%s\n", cur_line, msg);
    exit(-1);
}

语法分析文件： parser.y

%{
#include <stdio.h>
#include <stdlib.h>
void yyerror(const char*);
#define YYSTYPE char *
%}
 
%token T_StringConstant T_IntConstant T_Identifier T_Int T_Print
 
%left '+' '-'
%left '*' '/'
%right U_neg
 
%%
 
S:   
    Stmt                        { /* empty */ }
|   S Stmt                      { /* empty */ }
;
 
Stmt:
    VarDecl ';'                 { printf("\n\n"); }
|   Assign                      { /* empty */ }
|   Print                       { /* empty */ }
;
 
VarDecl:
    T_Int T_Identifier          { printf("var %s", $2); }
|   VarDecl ',' T_Identifier    { printf(", %s", $3); }
;
 
Assign:
    T_Identifier '=' E ';'      { printf("pop %s\n\n", $1); }
;
 
Print:
    T_Print '(' T_StringConstant Actuals ')' ';'
                                { printf("print %s\n\n", $3); }
;
 
Actuals:
    /* empty */                 { /* empty */ }
|   Actuals ',' E               { /* empty */ }
;
 
E:
    E '+' E                     { printf("add\n"); }
|   E '-' E                     { printf("sub\n"); }
|   E '*' E                     { printf("mul\n"); }
|   E '/' E                     { printf("div\n"); }
|   '-' E %prec U_neg           { printf("neg\n"); }
|   T_IntConstant               { printf("push %s\n", $1); }
|   T_Identifier                { printf("push %s\n", $1); }
|   '(' E ')'                   { /* empty */ }
;
 
%%
 
int main() {
    return yyparse();
}

makefile 文件： makefile

OUT      = tcc
TESTFILE = test.c
SCANNER  = scanner.l
PARSER   = parser.y
 
CC       = gcc
OBJ      = lex.yy.o y.tab.o
TESTOUT  = $(basename $(TESTFILE)).asm
OUTFILES = lex.yy.c y.tab.c y.tab.h y.output $(OUT)
 
.PHONY: build test simulate clean
 
build: $(OUT)
 
test: $(TESTOUT)
 
simulate: $(TESTOUT)
    python pysim.py $<
 
clean:
    rm -f *.o $(OUTFILES)
 
$(TESTOUT): $(TESTFILE) $(OUT)
    ./$(OUT) < $< > $@
 
$(OUT): $(OBJ)
    $(CC) -o $(OUT) $(OBJ)
 
lex.yy.c: $(SCANNER) y.tab.c
    flex $<
 
y.tab.c: $(PARSER)
    bison -vdty $<

测试文件： test.c

int a, b, c, d;
a = 1 + 2 * ( 2 + 2 );
c = 5;
d = 10;
b = c + d;
 
print("a = %d, b = %d, c = %d, d = %d", a, b, c, d);

Pcode 模拟器： pysim.py ，已经在第 4 章中介绍了。

这个版本在上一章的雏形版本的基础上，进行了以下扩充：

词法分析文件中：
增加了 T_StringConstant, T_Int, T_Print 类型的 token ，以及相应的正则表达式；
增加了一个 _DUPTEXT 宏，表示 yylval = strdup(yytext) 。
语法分析文件中：
增加了 VarDecl 和 Print 两个非终结符以及相应的产生式。

本版本的语法分析文件中，同样要注意源文件的解析过程中各产生式的折叠顺序以及相应的 Pcode 生成顺序。

makefile 里面是编译和测试这个程序的命令，在终端输入 make 后，将编译生成可执行文件 tcc ，然后输入 make test ，（相当于 ”./tcc < test.c > test.asm” ），将输出 test.asm 文件，内容如下：

var a, b, c, d
 
push 1
push 2
push 2
push 2
add
mul
add
pop a
 
push 5
pop c
 
push 10
pop d
 
push c
push d
add
pop b
 
push a
push b
push c
push d
print "a = %d, b = %d, c = %d, d = %d"

可以看出 test.c 文件里的所有语句都被转换成相应的 Pcode 了。再用 Pcode 模拟器运行一下这些 Pcode ，在终端输入 “make simulate” （相当于 “python pysim.py test.asm” ），将输出：

a = 9, b = 5, c = 10, d = 15