16.1 改进 TinyC 前端

上一章的 TinyC 后端中,为了降低 Pcode 命令的翻译难度,对 arg / var / ENDFUNC 命令的格式进行了改写,因此需要改进 TinyC 前端,使之能生成能被 TinyC 后端所识别的新格式 Pcode 命令。具体来说,对于下面这段源程序 test.c

  1. int main() {
  2. int a;
  3. a = 3;
  4. print("sum = %d", sum(4, a));
  5. return 0;
  6. }
  7.  
  8. int sum(int a, int b) {
  9. int c;
  10. c = a + b;
  11. return c;
  12. }

改进后 TinyC 前端需要生成一个 Pcode 文件 test.pcode

  1. FUNC @main:
  2. main.var a
  3.  
  4. push 3
  5. pop a
  6.  
  7. push 4
  8. push a
  9. $sum
  10. print "sum = %d"
  11.  
  12. ret 0
  13. ENDFUNC@main
  14.  
  15. FUNC @sum:
  16. sum.arg a, b
  17. sum.var c
  18.  
  19. push a
  20. push b
  21. add
  22. pop c
  23.  
  24. ret c
  25. ENDFUNC@sum

以及一个宏文件 test.funcmacro

  1. ; ==== begin function `main` ====
  2. %define main.varc 1
  3.  
  4. %MACRO main.var main.varc
  5. %define a [EBP - 4*1]
  6. SUB ESP, 4*main.varc
  7. %ENDMACRO
  8.  
  9. %MACRO ENDFUNC@main 0
  10. LEAVE
  11. RET
  12. %undef a
  13. %ENDMACRO
  14. ; ==== end function `main` ====
  15.  
  16. ; ==== begin function `sum` ====
  17. %define sum.argc 2
  18. %define sum.varc 1
  19.  
  20. %MACRO $sum 0
  21. CALL @sum
  22. ADD ESP, 4*sum.argc
  23. PUSH EAX
  24. %ENDMACRO
  25.  
  26. %MACRO sum.arg sum.argc
  27. %define a [EBP + 8 + 4*sum.argc - 4*1]
  28. %define b [EBP + 8 + 4*sum.argc - 4*2]
  29. %ENDMACRO
  30.  
  31. %MACRO sum.var sum.varc
  32. %define c [EBP - 4*1]
  33. SUB ESP, 4*sum.varc
  34. %ENDMACRO
  35.  
  36. %MACRO ENDFUNC@sum 0
  37. LEAVE
  38. RET
  39. %undef a
  40. %undef b
  41. %undef c
  42. %ENDMACRO
  43. ; ==== end function `sum` ====

在第 14 章的 TinyC 前端 1.0 版的 parser.y 的基础上,针对函数定义、参数定义以及变量定义的语句进行改写,改进后的语法分析文件 parser.y

  1. %{
  2.  
  3. #include <stdio.h>
  4. #include <stdarg.h>
  5. #include <string.h>
  6. #include <stdlib.h>
  7.  
  8. void init_parser(int argc, char *argv[]);
  9. void quit_parser();
  10.  
  11. extern FILE* yyin;
  12. FILE *asmfile, *incfile;
  13. #define BUFSIZE 256
  14.  
  15. #define out_asm(fmt, ...) \
  16. {fprintf(asmfile, fmt, ##__VA_ARGS__); fprintf(asmfile, "\n");}
  17.  
  18. #define out_inc(fmt, ...) \
  19. {fprintf(incfile, fmt, ##__VA_ARGS__); fprintf(incfile, "\n");}
  20.  
  21. void file_error(char *msg);
  22.  
  23. int ii = 0, itop = -1, istack[100];
  24. int ww = 0, wtop = -1, wstack[100];
  25.  
  26. #define _BEG_IF (istack[++itop] = ++ii)
  27. #define _END_IF (itop--)
  28. #define _i (istack[itop])
  29.  
  30. #define _BEG_WHILE (wstack[++wtop] = ++ww)
  31. #define _END_WHILE (wtop--)
  32. #define _w (wstack[wtop])
  33.  
  34. int argc = 0, varc = 0;
  35. char *cur_func_name, *args[128], *vars[128];
  36. void write_func_head();
  37. void write_func_tail();
  38.  
  39. #define _BEG_FUNCDEF(name) (cur_func_name = (name))
  40. #define _APPEND_ARG(arg) (args[argc++] = (arg))
  41. #define _APPEND_VAR(var) (vars[varc++] = (var))
  42. #define _WRITE_FUNCHEAD write_func_head
  43. #define _END_FUNCDEF write_func_tail
  44.  
  45. #define YYSTYPE char *
  46.  
  47. %}
  48.  
  49. %token T_Void T_Int T_While T_If T_Else T_Return T_Break T_Continue
  50. %token T_Print T_ReadInt T_Le T_Ge T_Eq T_Ne T_And T_Or
  51. %token T_IntConstant T_StringConstant T_Identifier
  52.  
  53. %left '='
  54. %left T_Or
  55. %left T_And
  56. %left T_Eq T_Ne
  57. %left '<' '>' T_Le T_Ge
  58. %left '+' '-'
  59. %left '*' '/' '%'
  60. %left '!'
  61.  
  62. %%
  63.  
  64. Start:
  65. Program { /* empty */ }
  66. ;
  67.  
  68. Program:
  69. /* empty */ { /* empty */ }
  70. | Program FuncDef { /* empty */ }
  71. ;
  72.  
  73. FuncDef:
  74. T_Int FuncName Args Vars Stmts EndFuncDef
  75. | T_Void FuncName Args Vars Stmts EndFuncDef
  76. ;
  77.  
  78. FuncName:
  79. T_Identifier { _BEG_FUNCDEF($1); }
  80. ;
  81.  
  82. Args:
  83. '(' ')' { /* empty */ }
  84. | '(' _Args ')' { /* empty */ }
  85. ;
  86.  
  87. _Args:
  88. T_Int T_Identifier { _APPEND_ARG($2); }
  89. | _Args ',' T_Int T_Identifier { _APPEND_ARG($4); }
  90. ;
  91.  
  92. Vars:
  93. _Vars { _WRITE_FUNCHEAD(); }
  94. ;
  95.  
  96. _Vars:
  97. '{' { /* empty */ }
  98. | _Vars Var ';' { /* empty */ }
  99. ;
  100.  
  101. Var:
  102. T_Int T_Identifier { _APPEND_VAR($2); }
  103. | Var ',' T_Identifier { _APPEND_VAR($3); }
  104. ;
  105.  
  106. Stmts:
  107. /* empty */ { /* empty */ }
  108. | Stmts Stmt { /* empty */ }
  109. ;
  110.  
  111. EndFuncDef:
  112. '}' { _END_FUNCDEF(); }
  113. ;
  114.  
  115. Stmt:
  116. AssignStmt { /* empty */ }
  117. | CallStmt { /* empty */ }
  118. | IfStmt { /* empty */ }
  119. | WhileStmt { /* empty */ }
  120. | BreakStmt { /* empty */ }
  121. | ContinueStmt { /* empty */ }
  122. | ReturnStmt { /* empty */ }
  123. | PrintStmt { /* empty */ }
  124. ;
  125.  
  126. AssignStmt:
  127. T_Identifier '=' Expr ';' { out_asm("\tpop %s", $1); }
  128. ;
  129.  
  130. CallStmt:
  131. CallExpr ';' { out_asm("\tpop"); }
  132. ;
  133.  
  134. IfStmt:
  135. If '(' Expr ')' Then '{' Stmts '}' EndThen EndIf
  136. { /* empty */ }
  137. | If '(' Expr ')' Then '{' Stmts '}' EndThen T_Else '{' Stmts '}' EndIf
  138. { /* empty */ }
  139. ;
  140.  
  141. If:
  142. T_If { _BEG_IF; out_asm("_begIf_%d:", _i); }
  143. ;
  144.  
  145. Then:
  146. /* empty */ { out_asm("\tjz _elIf_%d", _i); }
  147. ;
  148.  
  149. EndThen:
  150. /* empty */ { out_asm("\tjmp _endIf_%d\n_elIf_%d:", _i, _i); }
  151. ;
  152.  
  153. EndIf:
  154. /* empty */ { out_asm("_endIf_%d:", _i); _END_IF; }
  155. ;
  156.  
  157. WhileStmt:
  158. While '(' Expr ')' Do '{' Stmts '}' EndWhile
  159. { /* empty */ }
  160. ;
  161.  
  162. While:
  163. T_While { _BEG_WHILE; out_asm("_begWhile_%d:", _w); }
  164. ;
  165.  
  166. Do:
  167. /* empty */ { out_asm("\tjz _endWhile_%d", _w); }
  168. ;
  169.  
  170. EndWhile:
  171. /* empty */ { out_asm("\tjmp _begWhile_%d\n_endWhile_%d:",
  172. _w, _w); _END_WHILE; }
  173. ;
  174.  
  175. BreakStmt:
  176. T_Break ';' { out_asm("\tjmp _endWhile_%d", _w); }
  177. ;
  178.  
  179. ContinueStmt:
  180. T_Continue ';' { out_asm("\tjmp _begWhile_%d", _w); }
  181. ;
  182.  
  183. ReturnStmt:
  184. T_Return ';' { out_asm("\tret"); }
  185. | T_Return Expr ';' { out_asm("\tret ~"); }
  186. ;
  187.  
  188. PrintStmt:
  189. T_Print '(' T_StringConstant PrintIntArgs ')' ';'
  190. { out_asm("\tprint %s", $3); }
  191. ;
  192.  
  193. PrintIntArgs:
  194. /* empty */ { /* empty */ }
  195. | PrintIntArgs ',' Expr { /* empty */ }
  196. ;
  197.  
  198. Expr:
  199. T_IntConstant { out_asm("\tpush %s", $1); }
  200. | T_Identifier { out_asm("\tpush %s", $1); }
  201. | Expr '+' Expr { out_asm("\tadd"); }
  202. | Expr '-' Expr { out_asm("\tsub"); }
  203. | Expr '*' Expr { out_asm("\tmul"); }
  204. | Expr '/' Expr { out_asm("\tdiv"); }
  205. | Expr '%' Expr { out_asm("\tmod"); }
  206. | Expr '>' Expr { out_asm("\tcmpgt"); }
  207. | Expr '<' Expr { out_asm("\tcmplt"); }
  208. | Expr T_Ge Expr { out_asm("\tcmpge"); }
  209. | Expr T_Le Expr { out_asm("\tcmple"); }
  210. | Expr T_Eq Expr { out_asm("\tcmpeq"); }
  211. | Expr T_Ne Expr { out_asm("\tcmpne"); }
  212. | Expr T_Or Expr { out_asm("\tor"); }
  213. | Expr T_And Expr { out_asm("\tand"); }
  214. | '-' Expr %prec '!' { out_asm("\tneg"); }
  215. | '!' Expr { out_asm("\tnot"); }
  216. | ReadInt { /* empty */ }
  217. | CallExpr { /* empty */ }
  218. | '(' Expr ')' { /* empty */ }
  219. ;
  220.  
  221. ReadInt:
  222. T_ReadInt '(' T_StringConstant ')'
  223. { out_asm("\treadint %s", $3); }
  224. ;
  225.  
  226. CallExpr:
  227. T_Identifier Actuals
  228. { out_asm("\t$%s", $1); }
  229. ;
  230.  
  231. Actuals:
  232. '(' ')'
  233. | '(' _Actuals ')'
  234. ;
  235.  
  236. _Actuals:
  237. Expr
  238. | _Actuals ',' Expr
  239. ;
  240.  
  241. %%
  242.  
  243. int main(int argc, char *argv[]) {
  244. init_parser(argc, argv);
  245. yyparse();
  246. quit_parser();
  247. }
  248.  
  249. void init_parser(int argc, char *argv[]) {
  250. if (argc < 2) {
  251. file_error("Must provide an input source file!");
  252. }
  253.  
  254. if (argc > 2) {
  255. file_error("Too much command line arguments!");
  256. }
  257.  
  258. char *in_file_name = argv[1];
  259. int len = strlen(in_file_name);
  260.  
  261. if (len <= 2 || in_file_name[len-1] != 'c' \
  262. || in_file_name[len-2] != '.') {
  263. file_error("Must provide an '.c' source file!");
  264. }
  265.  
  266. if (!(yyin = fopen(in_file_name, "r"))) {
  267. file_error("Input file open error");
  268. }
  269.  
  270. char out_file_name[BUFSIZE];
  271. strcpy(out_file_name, in_file_name);
  272.  
  273. out_file_name[len-1] = 'a';
  274. out_file_name[len] = 's';
  275. out_file_name[len+1] = 'm';
  276. out_file_name[len+2] = '\0';
  277. if (!(asmfile = fopen(out_file_name, "w"))) {
  278. file_error("Output 'asm' file open error");
  279. }
  280.  
  281. out_file_name[len-1] = 'i';
  282. out_file_name[len] = 'n';
  283. out_file_name[len+1] = 'c';
  284. if (!(incfile = fopen(out_file_name, "w"))) {
  285. file_error("Output 'inc' file open error");
  286. }
  287. }
  288.  
  289. void file_error(char *msg) {
  290. printf("\n*** Error ***\n\t%s\n", msg);
  291. puts("");
  292. exit(-1);
  293. }
  294.  
  295. char *cat_strs(char *buf, char *strs[], int strc) {
  296. int i;
  297. strcpy(buf, strs[0]);
  298. for (i = 1; i < strc; i++) {
  299. strcat(strcat(buf, ", "), strs[i]);
  300. }
  301. return buf;
  302. }
  303.  
  304. #define _fn (cur_func_name)
  305.  
  306. void write_func_head() {
  307. char buf[BUFSIZE];
  308. int i;
  309.  
  310. out_asm("FUNC @%s:", _fn);
  311. if (argc > 0) {
  312. out_asm("\t%s.arg %s", _fn, cat_strs(buf, args, argc));
  313. }
  314. if (varc > 0) {
  315. out_asm("\t%s.var %s", _fn, cat_strs(buf, vars, varc));
  316. }
  317.  
  318. out_inc("; ==== begin function `%s` ====", _fn);
  319. out_inc("%%define %s.argc %d", _fn, argc);
  320. out_inc("\n%%MACRO $%s 0\n"
  321. " CALL @%s\n"
  322. " ADD ESP, 4*%s.argc\n"
  323. " PUSH EAX\n"
  324. "%%ENDMACRO",
  325. _fn, _fn, _fn);
  326. if (argc) {
  327. out_inc("\n%%MACRO %s.arg %s.argc", _fn, _fn);
  328. for (i = 0; i < argc; i++) {
  329. out_inc("\t%%define %s [EBP + 8 + 4*%s.argc - 4*%d]",
  330. args[i], _fn, i+1);
  331. }
  332. out_inc("%%ENDMACRO");
  333. }
  334. if (varc) {
  335. out_inc("\n%%define %s.varc %d", _fn, varc);
  336. out_inc("\n%%MACRO %s.var %s.varc", _fn, _fn);
  337. for (i = 0; i < varc; i++) {
  338. out_inc("\t%%define %s [EBP - 4*%d]",
  339. vars[i], i+1);
  340. }
  341. out_inc("\tSUB ESP, 4*%s.varc", _fn);
  342. out_inc("%%ENDMACRO");
  343. }
  344. }
  345.  
  346. void write_func_tail() {
  347. int i;
  348.  
  349. out_asm("ENDFUNC@%s\n", _fn);
  350.  
  351. out_inc("\n%%MACRO ENDFUNC@%s 0\n\tLEAVE\n\tRET", _fn);
  352. for (i = 0; i < argc; i++) {
  353. out_inc("\t%%undef %s", args[i]);
  354. }
  355. for (i = 0; i < varc; i++) {
  356. out_inc("\t%%undef %s", vars[i]);
  357. }
  358. out_inc("%%ENDMACRO");
  359. out_inc("; ==== end function `%s` ====\n", _fn);
  360.  
  361. argc = 0;
  362. varc = 0;
  363. }
  364.  
  365. void quit_parser() {
  366. fclose(yyin); fclose(asmfile); fclose(incfile);
  367. }

词法分析文件 scanner.l 不变,和第 14 章的 TinyC 前端 1.0 版的相同。

将以上 scanner.l, parser.y, test.c 三个文件放在同一目录,输入以下命令生成 TinyC 前端 tcc-frontend :

  1. flex scanner.l
  2. bison -vdty parser.y
  3. gcc -o tcc-frontend lex.yy.c y.tab.c

再输入:

  1. ./tcc-frontend test.c

将利用 tcc-frontend 编译 test.c ,生成 Pcode 文件 test.asm 以及宏文件 test.inc 。对比一下前面的 test.pcode 和 test.funcmacro 文件,二者几乎是一模一样的。