C0 語言的編譯器 (產生 CPU0 的組合語言)

開放電腦

簡介

函式庫

處理器

主機板

輸出入

虛擬機

組譯器

連結器

編譯器

嵌入式系統

作業系統

原始碼下載

文件下載

設計想法

訊息

相關網站

參考文獻

最新修改

簡體版

English

相關程式

編譯器原始碼

#include "Parser.h"
#include "Generator.h"

void compile(char *cFile, char *asmFile) {
  printf("compile file:%s\n", cFile, asmFile);
  char *cText = newFileStr(cFile);
  Parser *parser = parse(cText);
  generate(parser->tree, asmFile);
  ParserFree(parser);
  freeMemory(cText);
}

掃描器

#ifndef SCANNER_H
#define SCANNER_H

#include "Array.h"

typedef struct {
  char *text;
  int textLen;
  int textIdx;
  char token[MAX_LEN];
} Scanner;

Scanner* ScannerNew(char *pText);
void ScannerFree(Scanner *scanner);
char* ScannerScan(Scanner *scanner);
Array* tokenize(char *text);
char *tokenToType(char *token);
void printTokens(Array *tokens);

extern char STRING[];
extern char NUMBER[];
extern char ID[];
extern char KEYWORDS[];
extern char OP1[];
extern char OP2[];
extern char COND_OP[];
extern char ITEM[];

#endif
#include <string.h>
#include "Scanner.h"

char STRING[] = "string";
char NUMBER[] = "number";
char ID[]  = "id";
char KEYWORDS[] = "|if|for|while|return|";
char OP1[] = "|++|--|";
char OP2[] = "|+|-|*|/|";
char COND_OP[] = "|==|!=|<=|>=|<|>|";
char ITEM[]= "|id|number|string|";
char OP[]  = "+-*/<=>!";

#define ch() (scanner->text[scanner->textIdx])
#define next() (scanner->textIdx++)

Scanner* ScannerNew(char *pText) {
  Scanner *scanner = ObjNew(Scanner, 1);
  scanner->text = pText;
  scanner->textLen = strlen(pText);
  scanner->textIdx = 0;
  return scanner;
}

void ScannerFree(Scanner *scanner) {
  ObjFree(scanner);
}

char *ScannerScan(Scanner *scanner) {
  while (strMember(ch(), SPACE))
    next();
  if (scanner->textIdx >= scanner->textLen) 
    return NULL;
  char c = ch();
  int begin = scanner->textIdx;
  if (c == '\"') { // string = ".."
    next(); // skip begin quote "
    while (ch() != '\"') next();
    next(); // skip end quote "
  } else if (strMember(c, OP)) { // OP , ex : ++, --, <=, >=, ...
    while (strMember(ch(), OP)) next();
  } else if (strMember(c, DIGIT)) { // number, ex : 312, 77568, ...
    while (strMember(ch(), DIGIT)) next();
  } else if (strMember(c, ALPHA)) { // name, ex : int, sum, i, for, if, ....
    while (strMember(ch(), ALPHA) || strMember(ch(), DIGIT)) next();
  } else // some other symbol, such as #
    next();
  strSubstr(scanner->token, scanner->text, begin, scanner->textIdx-begin);
  return scanner->token;
}

Array* tokenize(char *text) {
  Array *tokens = ArrayNew(10);
  Scanner *scanner = ScannerNew(text);
  char *token = NULL;
  while ((token = ScannerScan(scanner))!= NULL) {
    ArrayAdd(tokens, newStr(token));
    printf("token=%s\n", token);
  }
  ScannerFree(scanner);
  return tokens;
}

char *tokenToType(char *token) {
  if (strPartOf(token, KEYWORDS))
    return token;
  else if (token[0] == '\"')
    return STRING;
  else if (strMember(token[0], DIGIT))
    return NUMBER;
  else if (strMember(token[0], ALPHA))
    return ID;
  else
    return token;
}

void printTokens(Array *tokens) {
  printf("tokens->count = %d\n", tokens->count);
  int i;
  for (i=0; i<tokens->count; i++) {
    char *token = tokens->item[i];
    printf("token=%s , type=%s\n", token, tokenToType(token));
  }
}

剖析器

引用檔:Parser.h

#ifndef PARSER_H
#define PARSER_H

#include "Scanner.h"
#include "Tree.h"

typedef struct {
  Array *tokens;
  Tree *tree;
  Array* stack;
  int tokenIdx;
} Parser;

Parser *parse(char *text);

Parser *ParserNew();
void ParserParse(Parser *p, char *text);
void ParserFree(Parser *parser);

#endif

原始程式:Parser.c

#include "Parser.h"

Parser *parse(char *text) {
  Parser *p=ParserNew();
  ParserParse(p, text);
  return p;
}

char* nextToken(Parser *p);
char *tokenToType(char *token);
Tree* push(Parser *p, char* term);
Tree* pop(Parser *p, char* term);
Tree* parseProg(Parser *p);
void parseBlock(Parser *p);
void parseFor(Parser *p);
void parseBaseList(Parser *p);
void parseBase(Parser *p);
void parseStmt(Parser *p);
void parseArgList(Parser *p);
void parseExp(Parser *p);
void parseCond(Parser *p);
void error();
BOOL isEnd(Parser *p);
BOOL isNext(Parser *p, char *pTypes);
char *next(Parser *p, char *pTypes);

Parser *ParserNew() {
  Parser *parser = ObjNew(Parser, 1);
  parser->tokens = NULL;
  parser->tree = NULL;
  parser->stack = ArrayNew(10);
  return parser;
}

void ParserFree(Parser *parser) {
  ArrayFree(parser->tokens, strFree);
  ArrayFree(parser->stack, NULL);
  TreeFree(parser->tree);
  ObjFree(parser);
}

void ParserParse(Parser *p, char *text) {
  printf("======= tokenize =======\n");
  p->tokens = tokenize(text);
  printTokens(p->tokens);
  p->tokenIdx = 0;
  printf("======= parsing ========\n");
  p->tree = parseProg(p); 
  if (p->stack->count != 0) {
    printf("parse fail:stack.count=%d", p->stack->count);
    error();
  }
}

void error() {
  printf("error()!\n");
  exit(1);
}

// PROG = BaseList
Tree *parseProg(Parser *p) {
  push(p, "PROG");
  parseBaseList(p);
  return pop(p, "PROG");
}

// BaseList= { BASE }
void parseBaseList(Parser *p) {
  push(p, "BaseList");
  while (!isEnd(p) && !isNext(p, "}"))
      parseBase(p);
  pop(p, "BaseList");
}

// BASE = FOR | STMT;
void parseBase(Parser *p) {
  push(p, "BASE");
  if (isNext(p, "for"))
      parseFor(p);
  else {
      parseStmt(p);
      next(p, ";");
  }
  pop(p, "BASE");
}

// FOR = for (STMT; COND; STMT) BLOCK
void parseFor(Parser *p) {
  push(p, "FOR");
  next(p, "for");
  next(p, "(");
  parseStmt(p);
  next(p, ";");
  parseCond(p);
  next(p, ";");
  parseStmt(p);
  next(p, ")");
  parseBlock(p);
  pop(p, "FOR");
}

// BLOCK = '{' BaseList '}'
void parseBlock(Parser *p) {
  push(p, "BLOCK");
  next(p, "{");
  parseBaseList(p);
  next(p, "}");
  pop(p, "BLOCK");
}

// STMT = return id | id '=' EXP  | id '(' ArgList ')' | id OP1
void parseStmt(Parser *p) {
  push(p, "STMT");
  if (isNext(p, "return")) {
    next(p, "return");
    next(p, "id");
  } else {
    next(p, "id");
    if (isNext(p, "="))  { // id '=' EXP   --> ASSIGN
      next(p, "=");
      parseExp(p);
    } else if (isNext(p, "("))  {  // id '(' ArgList ')'     --> Function Call
      next(p, "(");
      if (!isNext(p, ")"))
          parseArgList(p);
      next(p, ")");
    } else              // id OP1
      next(p, OP1);
  }
//  printf("Exit parseStmt\n");
  pop(p, "STMT");
}

// ArgList = EXP { ',' EXP }
void parseArgList(Parser *p) {
  push(p, "ArgList");
  parseExp(p);
  while (isNext(p, ",")) {
      next(p, ",");
      parseExp(p);
  }
  pop(p, "ArgList");
}

// EXP = ITEM OP2 ITEM | ITEM
void parseExp(Parser *p) {
  push(p, "EXP");
  next(p, ITEM);
  if (isNext(p, OP2)) {
      next(p, OP2);
      next(p, ITEM);
  }
  pop(p, "EXP");
}

// COND = EXP COND_OP EXP
void parseCond(Parser *p) {
  push(p, "COND");
  parseExp(p);
  next(p, COND_OP);
  parseExp(p);
  pop(p, "COND");
}

char* level(Parser *p) {
  return strSpaces(p->stack->count);
}

char* nextToken(Parser *p) {
  return (char*) p->tokens->item[p->tokenIdx];
}

BOOL isEnd(Parser *p) {
  return (p->tokenIdx >= p->tokens->count);
}

BOOL isNext(Parser *p, char *pTypes) {
  char *token = nextToken(p); 
  if (token == NULL) return FALSE;
  char *type = tokenToType(token);
  char tTypes[MAX_LEN+1];
  sprintf(tTypes, "|%s|", pTypes);
  if (strPartOf(type, tTypes))
    return TRUE;
  else
    return FALSE;
}

char *next(Parser *p, char *pTypes) {
  char *token = nextToken(p);
  if (isNext(p, pTypes)) {
    char *type = tokenToType(token);
    Tree *child = TreeNew(type, token);
    Tree *parentTree = ArrayPeek(p->stack);
    TreeAddChild(parentTree, child);
    printf("%s idx=%d, token=%s, type=%s\n", 
      level(p),p->tokenIdx,token,type);
    p->tokenIdx++;
    return token;
  } else {
    printf("next():%s is not type(%s)\n", token, pTypes);
    error();
    p->tokenIdx++;
    return NULL;
  }
}

Tree* push(Parser *p, char* pType) {
  printf("%s+%s\n", level(p), pType);
  Tree* tree = TreeNew(pType, "");
  ArrayPush(p->stack, tree);
  return tree;
}

Tree* pop(Parser *p, char* pType) {
  Tree *tree = ArrayPop(p->stack);
  printf("%s-%s\n", level(p), tree->type);
  if (strcmp(tree->type, pType)!=0) {
    printf("pop(%s):should be %s\n",tree->type,pType);
    error();
  }
  if (p->stack->count > 0) {
    Tree *parentTree = ArrayPeek(p->stack);
    TreeAddChild(parentTree, tree);
  }
  return tree;
}

程式產生器

#ifndef GENERATOR_H
#define GENERATOR_H

#include "Tree.h"
#include "HashTable.h"

typedef struct {
  HashTable *symTable;
  Tree *tree;
  FILE *asmFile;
  int forCount, varCount;
} Generator;

void generate(Tree *tree, char *asmFile);

Generator *GenNew();
void GenFree(Generator *g);
Tree* GenCode(Generator *g, Tree *node, char *rzVar);
void GenData(Generator *g);
void GenPcode(Generator *g, char* label, char* op, char* p1, char* p2, char* pTo);
void GenPcodeToAsm(Generator *g, char* label, char* op, char* p1, char* p2, char* pTo);
void GenAsmCode(Generator *g, char* label, char* op, char* p1, char* p2, char* pTo);
void GenTempVar(Generator *g, char *tempVar);
void negateOp(char *condOp, char *negOp);

#endif
#include "Generator.h"

// 程式產生器的主要函數。
void generate(Tree *tree, char *asmFile) { 
  char nullVar[100]="";
  Generator *g = GenNew();
  g->asmFile = fopen(asmFile, "w");
  printf("=====PCODE=====\n");
  GenCode(g, tree, nullVar);
  GenData(g);
  fclose(g->asmFile);
  GenFree(g);
  char *asmText = newFileStr(asmFile);
  printf("=====AsmFile:%s======\n", asmFile);  
  printf("%s\n", asmText);
  freeMemory(asmText);
}

Generator *GenNew() {
  Generator *g = ObjNew(Generator, 1);
  g->symTable = HashTableNew(127);
  return g;
}

void GenFree(Generator *g) {
  HashTableFree(g->symTable);
  ObjFree(g);
}

char nullVar[100]="";

Tree* GenCode(Generator *g, Tree *node, char *rzVar) {
  strcpy(nullVar, "");
  strcpy(rzVar, "");
  if (node == NULL) return NULL; // 遞迴終止條件。

  if (strEqual(node->type, "FOR")) {
    // 處理 FOR 迴圈,<FOR> ::= "for" "(" <STMT> ";" <COND> ";" <STMT> ")" <BLOCK>
    char forBeginLabel[100], forEndLabel[100], condOp[100];
    Tree *stmt1 = node->childs->item[2], 
         *cond  = node->childs->item[4], 
         *stmt2 = node->childs->item[6], 
         *block = node->childs->item[8];
    GenCode(g, stmt1, nullVar);
    int tempForCount = g->forCount++;
    sprintf(forBeginLabel, "FOR%d", tempForCount);
    sprintf(forEndLabel, "_FOR%d", tempForCount);
    GenPcode(g, forBeginLabel, "", "", "", "");
    GenCode(g, cond, condOp);
    char negOp[100];
    negateOp(condOp, negOp);
    GenPcode(g, "", "J", negOp, "", forEndLabel);
    GenCode(g, block, nullVar);
    GenCode(g, stmt2, nullVar);
    GenPcode(g, "", "J", "", "", forBeginLabel);
    GenPcode(g, forEndLabel, "", "", "", "");
    return NULL;
  } else if (strEqual(node->type, "STMT")) {
    // 處理 STMT (Statement 陳述), <STMT>::= return id | id "=" <EXP> | id ["++"|"--"]
    Tree *c1 = node->childs->item[0];
    if (strEqual(c1->type, "return")) {
      Tree *id = node->childs->item[1];
      GenPcode(g, "", "RET", "", "", id->value);
    } else {
      Tree *id = node->childs->item[0];
      Tree *op = node->childs->item[1];
      if (strEqual(op->type, "=")) { // STMT 是一種 ASSIGN (指定) --> id '=' EXP
        Tree *exp = node->childs->item[2];
        char expVar[100];
        GenCode(g, exp, expVar);
        GenPcode(g, "", "=", expVar, "", id->value);
        HashTablePut(g->symTable, id->value, id->value);
        strcpy(rzVar, expVar);
      } else { // STMT 是 id++ 或 id--,--> id OP1
        char addsub[100];
        if (strEqual(op->value, "++"))
          strcpy(addsub, "+");
        else
          strcpy(addsub, "-");
        GenPcode(g, "", addsub, id->value, "1", id->value);
        strcpy(rzVar, id->value);
      }
    }
  } else if (strEqual(node->type, "COND")) {
    // 處理布林判斷式 <COND> ::= <EXP> ["=="|"!="|"<="|">="|"<"|">"] <EXP>
    Tree* op = node->childs->item[1];
    char expVar1[100], expVar2[100];
    GenCode(g, node->childs->item[0], expVar1);
    GenCode(g, node->childs->item[2], expVar2);
    GenPcode(g, "", "CMP", expVar1, expVar2, nullVar);
    strcpy(rzVar, op->value); // 傳回布林運算子 
  } else if (strPartOf(node->type, "|EXP|TERM|")) {
    // 處理運算式 <EXP> ::= <TERM> { ["+"|"-"] <TERM> } 
    // 與 <TERM> ::= <FACTOR> { ["*"|"/"] <FACTOR> }
    Tree *item1 = node->childs->item[0];
    char var1[100], var2[100], tempVar[100];
    GenCode(g, item1, var1);
    int ti;
    for (ti = 1; ti < node->childs->count; ti += 2) {
      Tree* op = node->childs->item[ti];
      Tree* item2 = node->childs->item[ti+1];
      GenCode(g, item2, var2);
      GenTempVar(g, tempVar);
      GenPcode(g, "", op->value, var1, var2, tempVar);
      strcpy(var1, tempVar);
    }
    strcpy(rzVar, var1);
  } else if (strEqual(node->type, "FACTOR")) {
    // 處理 <FACTOR> ::= <ITEM> | "(" <EXP> ")"         
    if (node->childs->count == 1)
      return GenCode(g, node->childs->item[0], nullVar);
    else
      return GenCode(g, node->childs->item[1], nullVar);
  } else if (strEqual(node->type, "ITEM")) {
    // 處理 <ITEM>::= id | number         
    return GenCode(g, node->childs->item[0], nullVar);
  } else if (strPartOf(node->type, "|number|id|")) {
    // 遇到變數或常數,傳回其 value 名稱。         
    strcpy(rzVar, node->value);
  } else if (node->childs != NULL) { 
    // 其他狀況,若有子代則遞回處理
    int i;
    for (i=0; i<node->childs->count; i++)
      GenCode(g, node->childs->item[i], nullVar);
  }
  return NULL;
}

void GenData(Generator *g) { // 產生組合語言的變數宣告 
  Array *symArray = HashTableToArray(g->symTable);
  int i;
  for (i=0; i<symArray->count; i++) { // 產生符號表 
    char *varName = symArray->item[i];
    char varLabel[100];
    sprintf(varLabel, "%s:", varName);
    GenAsmCode(g, varLabel, "RESW", "1", "", "");
  }
  for (i=0; i<g->varCount; i++) { // 產生臨時變數 
    char tVarLabel[100];
    sprintf(tVarLabel, "T%d:", i);
    GenAsmCode(g, tVarLabel, "RESW", "1", "", "");
  }
  ArrayFree(symArray, NULL);
}

void GenPcode(Generator *g, char* label, char* op, char* p1, char* p2, char* pTo) {
  char labelTemp[100];
  if (strlen(label)>0)
    sprintf(labelTemp, "%s:", label);
  else
    strcpy(labelTemp, "");
  printf("%-8s %-4s %-4s %-4s %-4s\n", labelTemp, op, p1, p2, pTo);
  GenPcodeToAsm(g, labelTemp, op, p1, p2, pTo);
}

void GenPcodeToAsm(Generator *g, char* label, char* op, char* p1, char* p2, char* pTo) {
  if (strlen(label)>0)
    GenAsmCode(g, label, "", "", "", "");
  if (strEqual(op, "=")) { // pTo = p1
    GenAsmCode(g, "", "LD", "R1", p1, "");
    GenAsmCode(g, "", "ST", "R1", pTo, "");
  } else if (strPartOf(op, "|+|-|*|/|")) { // pTo = p1 op p2
    char asmOp[100]; 
    if (strEqual(op, "+")) strcpy(asmOp, "ADD");
    else if (strEqual(op, "-")) strcpy(asmOp, "SUB");
    else if (strEqual(op, "*")) strcpy(asmOp, "MUL");
    else if (strEqual(op, "/")) strcpy(asmOp, "DIV");
    GenAsmCode(g, "", "LD", "R1", p1, "");
    GenAsmCode(g, "", "LD", "R2", p2, "");
    GenAsmCode(g, "", asmOp,"R3", "R2", "R1");
    GenAsmCode(g, "", "ST", "R3", pTo, "");
  } else if (strEqual(op, "CMP")) { // CMP p1, p2
    GenAsmCode(g, "", "LD", "R1", p1, "");
    GenAsmCode(g, "", "LD", "R2", p2, "");
    GenAsmCode(g, "", "CMP", "R1", "R2", "");
  } else if (strEqual(op, "J")) { // J op label
    char asmOp[100];
    if (strEqual(p1, "=")) strcpy(asmOp, "JEQ");
    else if (strEqual(p1, "!=")) strcpy(asmOp, "JNE");
    else if (strEqual(p1, "<")) strcpy(asmOp, "JLT");
    else if (strEqual(p1, ">")) strcpy(asmOp, "JGT");
    else if (strEqual(p1, "<=")) strcpy(asmOp, "JLE");
    else if (strEqual(p1, ">=")) strcpy(asmOp, "JGE");
    else strcpy(asmOp, "JMP");
    GenAsmCode(g, "", asmOp, pTo, "", "");
  } else if (strEqual(op, "RET")) {
    GenAsmCode(g, "", "LD", "R1", pTo, "");
    GenAsmCode(g, "", "RET", "", "", "");
  }
}

void GenAsmCode(Generator *g, char* label, char* op, char* p1, char* p2, char* pTo) {
  char *realOp = op;
  if (strEqual(op, "LD"))
    if (isdigit(p2[0]))
      realOp = "LDI";
  fprintf(g->asmFile, "%-8s %-4s %-4s %-4s %-4s\n", label, realOp, p1, p2, pTo);
}

void GenTempVar(Generator *g, char *tempVar) {
  sprintf(tempVar, "T%d", g->varCount++);
}

void negateOp(char *condOp, char *negOp) {
  if (strEqual(condOp, "==")) strcpy(negOp, "!=");
  if (strEqual(condOp, "!=")) strcpy(negOp, "==");
  if (strEqual(condOp, ">=")) strcpy(negOp, "<");
  if (strEqual(condOp, "<=")) strcpy(negOp, ">");
  if (strEqual(condOp, ">")) strcpy(negOp, "<=");
  if (strEqual(condOp, "<")) strcpy(negOp, ">=");
}

Facebook

Unless otherwise stated, the content of this page is licensed under Creative Commons Attribution-NonCommercial-ShareAlike 3.0 License