BNF3 編譯器

編譯器設計

編譯器簡介

高階語言

語法理論

剖析器

語意理論

符號表

直譯器

型態系統

中間碼

目標語言

最佳化

錯誤處理

進階議題

原始碼下載

程式實作

C 語言

案例研究

JavaScript

V8

Lua

Oberon

NeoPascal

pcc

tcc

gcc

C--

Lex

YACC

AntLR

LLVM

CLang

訊息

相關網站

參考文獻

最新修改

簡體版

English

輸入檔:type3.c0

int x=3, y=5, z;
if ((x+y)>6)
  z = 0;
else
  z = 1;

int i=0, s=0;
while (i<100) {
    s = s + i;
    i++;
}

def sum(n) {
    for (i=0; i<n; i++)
        s = s + i;
    return s;
}

def swap(a, b) {
    t = a;
    a = b;
    b = t;
}

int total=sum(10);

=swap(x, y);

執行結果

D:\oc\compiler>gcc bnf3.c -o bnf3

D:\oc\compiler>bnf3 type3.c0
==== compile file:type3.c0 ========
         =        t1       3
         =        x        t1
         =        t2       5
         =        y        t2
         +        t3       x        y
         =        t4       6
         >        t5       t3       t4
         ifnot    t5       _L0:
         =        t6       0
         =        z        t6
_L0:
         =        t7       1
         =        z        t7
         =        t8       0
         =        i        t8
         =        t9       0
         =        s        t9
_L1:
         =        t10      100
         <        t11      i        t10
         ifnot    t11      _L2:
         +        t12      s        i
         =        s        t12
         ++       i
         goto     _L1:
_L2:
_sum:    function
         PARAM    n
         =        t13      0
         =        i        t13
_L4:
         <        t14      i        n
         ifnot    t14      _L5:
         +        t15      s        i
         =        s        t15
         ++       i
         goto     _L4:
_L5:
         RET      s
_swap:   function
         PARAM    a
         PARAM    b
         =        t        a
         =        a        b
         =        b        t
         =        t16      10
         ARG      t16
         CALL     sum      t17
         =        total    t17
         ARG      x
         ARG      y
         CALL     swap     t18
======= symTable ========
sym[1]=x
sym[2]=3
sym[3]=y
sym[4]=5
sym[5]=z
sym[6]=6
sym[7]=_L0:
sym[8]=0
sym[9]=1
sym[10]=i
sym[11]=s
sym[12]=_L1:
sym[13]=_L2:
sym[14]=100
sym[15]=_L3:
sym[16]=_sum:
sym[17]=n
sym[18]=_L4:
sym[19]=_L5:
sym[20]=_L6:
sym[21]=_swap:
sym[22]=a
sym[23]=b
sym[24]=t
sym[25]=total
sym[26]=sum
sym[27]=10
sym[28]=swap
======= strTable ========
x.3.y.5.z.6._L0:.0.1.i.s._L1:._L2:.100._L3:._sum:.n._L4:._L5:._L6:._swap:.a.b.t.
total.sum.10.swap.

程式:bnf3.c

#include <stdio.h>
#include <assert.h>
#include <stdarg.h>

// ================== 基本常數 ===================
#define FALSE   0
#define TRUE    1
#define BOOL    unsigned char
#define NIL        -1

#define LEN 256

#define SYM_TABLE_MAX 1000
#define STR_TABLE_MAX 10000

#define SPACE " \t\n\r"         // 空白字元
#define ALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" // 英文字母
#define DIGIT "0123456789"        // 數字字元
#define OPERATOR "+-*/%<=>!&|"        // 運算符號字元集 (用來取得 +,-,*,/, ++, ...)

char *keys[] = {"if", "else", "for", "while", "def", "return", NULL};
char *types[] = { "int", "char", "float", "void", NULL };
char *fnames[] = { NULL };

typedef enum Tag { STRING, NUMBER, REAL, ID, OP, END } Tag; // 標記 tag 的選項 
char *tags[] = {"STRING", "NUMBER", "REAL", "ID", "OP", "END" };
typedef enum Kind { KEY, TYPE, FUNC, VAR, NONE } Kind; // 種類 kind 的選項 
char *kinds[] = {"KEY", "TYPE", "FUNC", "VAR", "NONE" };

#define cmember(ch, set) (strchr(set, ch) != NULL)// 測試字元 ch 是否在 set 當中
#define NEXT(var, tag)  char var[LEN];strcpy(var, token);nextTag(tag)
#define NEXT_TOKEN(var, tokens)  char var[LEN];strcpy(var, token);next(tokens)
#define SYMBOL(var, id)  char var[LEN];strcpy(var, symbol(id))
#define ASSERT(cond)     if (!(cond)) { ERROR(); }  // 要求條件要成立,否則就算錯

// 錯誤報告巨集函數,會印出哪一行呼叫了 ERROR()
#define ERROR() printf("ERROR => file:%s, line: %d\n", __FILE__, __LINE__);printStack();exit(1)

FILE *file;
char ch;
int line=1, pos=0;
char token[LEN];
int  tokenIdx = 0;
Tag tag;
Kind kind;

typedef struct Sym {
    int strIdx;
} Sym;

int symTop = 1;
Sym symTable[SYM_TABLE_MAX];

char strTable[STR_TABLE_MAX];
int  strTop = 0;

int strFind(char *strList[], char *key) {
    int i;
    for (i=0; strList[i] != NULL; i++) {
        if (strcmp(strList[i], key)==0)
            return i;
    }
    return NIL;
}

void debug(const char *fmt, ...) {
    va_list args;
    va_start(args, fmt);
    vprintf(fmt, args);
}

char symTemp[LEN];

char *symbol(int s) {
    if (s == 0)
        return "";
    else if (s > 0)
        return &strTable[symTable[s].strIdx];
    else { // s < 0; temp variable
        sprintf(symTemp, "t%d", -1*s);
        return symTemp;
    }
}

int symFind(char *sym) {
    int i;
    for (i=1; i<symTop; i++) {
        if (strcmp(sym, symbol(i))==0)
            return i;
    }
    return NIL;
}

int symPut(char *sym) {
    int s = symFind(sym);
    if (s == NIL) {
        sprintf(&strTable[strTop], "%s", sym);
        s = symTop++;
        symTable[s].strIdx = strTop;
        strTop += strlen(sym) + 1;
    }
    return s;
}

void symTableDump() {
    int i;
    printf("======= symTable ========\n");
    for (i=1; i<symTop; i++) {
        printf("sym[%d]=%s\n", i, symbol(i));
    }
}

void strTableDump() {
    int i;
    printf("======= strTable ========\n");
    for (i=0; i<strTop; i++) {
        char c = strTable[i];
        if (c == '\0') 
            printf(".");
        else
            printf("%c", c);
    }
    printf("\n");
}

int labelIdx = 0;

int nextLabel() {
    char label[LEN];
    sprintf(label, "_L%d:", labelIdx++);
    return symPut(label);
}

int tempIdx = -1;

int nextTemp() {
    return tempIdx--;
}

char *nameStack[100];
void *objStack[100];
int top = 0;

void printStack() {
    int i;
    for (i=0; i<top; i++)
        printf("/%s", nameStack[i]);
    printf("\n");
}

int push(char *name) {
    nameStack[top] = name;
    top ++;
}

char *pop(char *name) {
    top--;
    char *nameTop = nameStack[top];
    ASSERT(strcmp(nameTop, name)==0);
    return nameTop;
}

// 功能:檢查 token 是否為集合字串 set 中的元素
// 範例:strPartOf("int", "byte|int|float") 會傳回 TRUE
BOOL strPartOf(char *token, char *set) {
    ASSERT(token != NULL && set != NULL);
    ASSERT(strlen(token) < LEN);
    char ttoken[LEN+2];
    sprintf(ttoken, "|%s|", token);
    return (strstr(set, ttoken)!=NULL);
}

BOOL isNext(char *tokens) { // 檢查下一個詞彙是否為 tokens 其中之一
    char tTokens[LEN+1];
    sprintf(tTokens, "|%s|", tokens);
    if (strPartOf(token, tTokens))
        return TRUE;
    else
        return FALSE;
}

BOOL isNextTag(Tag pTag) { // 檢查下一個 tag 是否為 pTag
    return (tag == pTag);
}

BOOL isNextKind(Kind pKind) { // 檢查下一個 kind 是否為 pKind
    return (kind == pKind);
}

char cnext() {
    token[tokenIdx++]=ch;
    pos++;
    ch=fgetc(file);
    return ch;
}

void tnext() {
    while (cmember(ch, SPACE)) { // 忽略空白
        tokenIdx = 0;
        if (ch=='\n') {
            line++;
            pos = 1;
        }
        cnext();
    }
    if (feof(file)) {
        strcpy(token, "$END$");
        tag = END;
        return;
    }
    tokenIdx = 0;
    if (ch == '\"') { // 如果是 " 代表字串開頭
        // 字串常數 : string = ".."
        cnext(); // 跳過 "
        while (ch != '\"') cnext(); // 一直讀到下一個 " 符號為止。
        cnext(); // 跳過 "
        tag=STRING;
    } else if (cmember(ch, OPERATOR)) { // 如果是OP(+-*/<=>!等符號)
          // 運算符號 : OP = ++, --, <=, >=, ...
        while (cmember(ch, OPERATOR)) cnext(); // 一直讀到不是OP為止
        token[tokenIdx] = '\0';
        tag=OP;
    } else if (cmember(ch, DIGIT)) { // 如果是數字
           // 數字常數 : number = 312, 77568, ...
        while (cmember(ch, DIGIT)) cnext(); // 一直讀到不是數字為止
        tag=NUMBER;
        // 實數 : real = 3.14, ...
        if (ch == '.') {
            cnext(); // 取得小數點
            tag = REAL;
        }
        while (cmember(ch, DIGIT)) cnext(); // 取得小數部分
    } else if (cmember(ch, ALPHA)) { // 如果是英文字母
        // 基本詞彙 : token = int, sum, i, for, if, x1y2z, ....
        while (cmember(ch, ALPHA) || cmember(ch, DIGIT))
            cnext(); // 一直讀到不是英文字母 (或數字)為止
        token[tokenIdx] = '\0';
        tag=ID;
    } else { // 其他符號,都解讀為單一字元
        tag = OP;
        cnext(); // 傳回單一字元
    }
    token[tokenIdx] = '\0';
    if (strFind(keys, token)!=NIL) 
        kind = KEY;
    else if (strFind(types, token)!=NIL)
        kind = TYPE;
    else if (strFind(fnames, token)!=NIL)
        kind = FUNC;
    else if (tag == ID)
        kind = VAR;
    else
        kind = NONE;
//    printf("token=%s tag=%s kind=%s\n", token, tags[tag], kinds[kind]);
}

void next(char *tokens) { // 檢查下一個詞彙的型態
    if (isNext(tokens)) { // 如果是pTags型態之一
        tnext();
    } else { // 否則,印出錯誤訊息
        debug("next():line=%d pos=%d token=%s expect(%s)\n", 
            line, pos, token, tokens); 
        ERROR();
    }
}

void nextTag(Tag eTag) { // 檢查下一個 tag 的型態
    if (isNextTag(eTag)) {
        tnext();
    } else {
        debug("nextTag():line=%d pos=%d token=%s tag=%s expect(tag=%s)\n", 
            line, pos, token, tags[tag], tags[eTag]);
        ERROR();
    }
}

void nextKind(Kind eKind) { // 檢查下一個 tag 的型態
    if (isNextKind(eKind)) {
        tnext();
    } else {
        debug("nextKind():line=%d pos=%d token=%s kind=%s expect(kind=%s)\n", 
            line, pos, token, kinds[kind], kinds[eKind]); 
        ERROR();
    }
}

char *pcodeText = NULL;

void pcodeRedirect(char *text) {
    if (text != NULL)
        strcpy(text, "");
    pcodeText = text;
}

void pcodeDump(char *text) {
    printf("%s", text);
}

void pcode(int lab, char *op, int s, int s1, int s2) {
    char pcodeStr[LEN];
    SYMBOL(label, lab); SYMBOL(p, s); SYMBOL(p1, s1); SYMBOL(p2, s2);
    sprintf(pcodeStr, "%-8s %-8s %-8s %-8s %-8s\n", label, op, p, p1, p2);
    if (pcodeText == NULL)
        printf("%s", pcodeStr);
    else
        strcat(pcodeText, pcodeStr);
}

#define MAX_ARGS 100
int argList[MAX_ARGS];
int argTop = 0;

int argPush(int a) { argList[argTop++] = a; }
int argPop() { return argList[--argTop]; }

// EBNF : ARGS = E? {,E}*
// BNF  : ARGS = {} | E {, ARGS}?
// 用在 ID(ARGS) 中,follow 為 )
int ARGS() { // BNF
    push("ARGS");
    argTop = 0;
    if (!isNext(")")) {
        argPush(E());
        if (isNext(",")) {
            next(",");
            ARGS();
        }
    }
    pop("ARGS");
}

// F=STRING | NUMBER | '(' E ')' | ID ('('ARGS')')?
int F() {
    int f;
    push("F");
    if (isNext("(")) {
        next("(");
        f = E();
        next(")");
    } else if (isNextTag(NUMBER)) {
        NEXT(number, NUMBER);
        int n = symPut(number);
        f = nextTemp();
        pcode(0, "=", f, n, 0);
    } else if (isNextTag(STRING)) {
        NEXT(str, STRING);
        f=symPut(str);
    } else { // if (isNextTag(ID))
        NEXT(id, ID);
        f=symPut(id);
        if (isNext("(")) { // function call
            next("(");
            ARGS();
            next(")");
            int i;
            for (i=0; i<argTop; i++) {
                pcode(0, "ARG", argList[i], 0, 0);
            }
            int t = nextTemp();
            pcode(0, "CALL", f, t, 0);
            f = t;
        }
    }
    pop("F");
    return f; 
}

#define E_OP "+|-|*|/|%|&|^|||&&||||>>|<<|==|<=|>=|<|>"
// E=F (E_OP F)?
int E() {
    push("E");
    int f1 = F();
    if (isNext(E_OP)) {
        NEXT_TOKEN(op, E_OP);
        int f2 = F();
        int e = nextTemp();
        pcode(0, op, e, f1, f2);
        f1 = e;
    }
    pop("E");
    return f1;
}

// ASSIGN = (ID[++|--]?)?(=E)?
int ASSIGN() {
    push("ASSIGN");
    int var = NIL;
    if (isNextTag(ID)) {
        NEXT(id, ID);
        var = symPut(id);
        if (isNext("++|--")) {
            NEXT_TOKEN(op1, "++|--");
            ASSERT(var != NIL);
            pcode(0, op1, var, 0, 0);
        }
    }
    if (isNext("=")) {
        next("=");
        int e = E();
        if (var != NIL)
            pcode(0, "=", var, e, 0);
    }
    pop("ASSIGN");
}

// EBNF : DECLS = TYPE? ASSIGN {, ASSIGN}*
// BNF  : DECLS = TYPE? ASSIGN_LIST 

char type[LEN];
int DECLS() {
    push("DECLS");
    strcpy(type, "");
    if (isNextKind(TYPE)) {
        strcpy(type, token);
        nextKind(TYPE);
    }
    ASSIGN_LIST();
    pop("DECLS");
}

// BNF  : ASSIGN_LIST = {} | ASSIGN {, ASSIGN_LIST}?
int ASSIGN_LIST() {
    push("ASSIGN_LIST");
    if (!isNext(";")) {
        ASSIGN();
        if (isNext(",")) {
            next(",");
            ASSIGN_LIST();
        }
    }
    pop("ASSIGN_LIST");
}

// STMT=return E | DECLS
int STMT() {
    push("STMT");
    if (isNext("return")) { 
        next("return");
        int e = E();
        pcode(0, "RET", e, 0, 0);
    } else {
        DECLS();
    }
    pop("STMT");
}

// BASE=STMT;|IF|WHILE|FOR|BLOCK|DEF
int BASE() {
    push("BASE");
    if (isNext("{"))
        BLOCK();
    else if (isNext("if"))
        IF();
    else if (isNext("while"))
        WHILE();
    else if (isNext("for"))
        FOR();
    else if (isNext("def"))
        DEF();
    else {
        STMT();
        next(";");
    }
    pop("BASE");
}

// EBNF : BASE_LIST = BASE*
// BNF : BASE_LIST = {} | BASE BASE_LIST?
int BASE_LIST() {
    push("BASE_LIST");
    if (!isNext("}|$END$")) {
        BASE();
        BASE_LIST();
    }
    pop("BASE_LIST");
}

// EBNF : BLOCK={ BASE* }
// BNF : BLOCK={ BASE_LIST }
int BLOCK() {
    push("BLOCK");
    next("{");
    BASE_LIST();
    next("}");
    pop("BLOCK");
}

// IF=if (E) BASE else BASE
int IF() {
    push("IF");
    next("if");
    next("(");
    int e = E();
    next(")");
    int elseLabel = nextLabel();
    pcode(0, "ifnot", e, elseLabel, 0);
    BASE();
    if (isNext("else")) {
        next("else");
        pcode(elseLabel, "", 0, 0, 0);
        BASE();
    }
    pop("IF");
}

// WHILE=while (E) BASE
int WHILE() {
    push("WHILE");
    int startLabel = nextLabel();
    int exitLabel = nextLabel();
    pcode(startLabel, "", 0, 0, 0);
    next("while");
    next("(");
    int e = E();
    next(")");
    pcode(0, "ifnot", e, exitLabel, 0);
    BASE();
    pcode(0, "goto", startLabel, 0, 0);
    pcode(exitLabel, "", 0, 0, 0);
    pop("WHILE");
}

// FOR=for (STMT;E;STMT) BASE
int FOR() {
    push("FOR");
    int startLabel = nextLabel();
    int exitLabel = nextLabel();
    next("for");
    next("(");
    STMT();
    next(";");
    pcode(startLabel, "", 0, 0, 0);
    int e = E();
    pcode(0, "ifnot", e, exitLabel, 0);
    next(";");
    char setPcode[20*LEN];
    pcodeRedirect(setPcode);
    STMT(); // ? 這個不能邊比對邊編譯,怎麼辦?用 pipe 先轉向某 setPCode,然後在將結果補在下面。
    next(")");
    pcodeRedirect(NULL);
    BASE();
    pcodeDump(setPcode);
    pcode(0, "goto", startLabel, 0, 0);
    pcode(exitLabel, "", 0, 0, 0);
    pop("FOR");
}

// EBNF : PARAMS = ID? {, ID}*
// BNF  : PARAMS = {} | ID {, PARAMS}?
int PARAMS() {
    push("PARAMS");
    if (isNextTag(ID)) {
        NEXT(p1, ID);
        pcode(0, "PARAM", symPut(p1), 0, 0);
        if (isNext(",")) {
            next(",");
            PARAMS();
        }
    }
    pop("PARAMS");
}

// DEF=def ID(PARAMS) BLOCK
int DEF() {
    push("DEF");
    int exitLabel = nextLabel();
    next("def");
    NEXT(id, ID);
    char fname[LEN];
    sprintf(fname, "_%s:", id);
    int f = symPut(fname);
    pcode(f, "function", 0, 0, 0);
    next("(");
    PARAMS();
    next(")");    
    BLOCK();
    pop("DEF");
}

// PROG=BASE*
int PROG() {
    push("PROG");
    BASE_LIST();
    pop("PROG");
}

int init() {
    fseek(file, 0, SEEK_SET);
    tokenIdx = 0;
    tnext();
}

int scan() {
    init();
    while (TRUE) {
        tnext();
        if (tag == END)
            break;
        printf("token=%-8s tag=%-8s kind=%d,%-8s\n", token, tags[tag], kind, kinds[kind]);
    }
}

int compile(char *fname) {
    printf("==== compile file:%s ========\n", fname);
    file = fopen(fname, "r");
//  init(); scan();
    init();
    PROG();
    symTableDump();
    strTableDump();
    fclose(file);
}

int main(int argc, char * argv[]) {
    char *fname = argv[1];
    compile(fname);
}

Facebook

Unless otherwise stated, the content of this page is licensed under Creative Commons Attribution-NonCommercial-ShareAlike 3.0 License