Def 編譯器

計算語言學

簡介

詞彙

語法

語意

理解

問題領域

語言生成

語法剖析

語意分析

處理方法

正規表達式

BNF 語法

掃描

剖析器

符號表

解譯

編譯

翻譯

各種語言

組合語言

程式語言

標記語言

維基語言

自然語言

中文

英文

程式實作

JavaScript

C

Python

相關書籍

自然語言處理

編譯器設計

系統程式

訊息

相關網站

參考文獻

最新修改

簡體版

English

輸入檔案:defc.c0

str = "Hello!";
x = 3;
y=5;
if ((x+y)>6)
  z = 0;
else
  z = 1;

i=0;
while (i<100) {
    s = s + i;
    i++;
}

def sum(n) {
    for (i=0; i<n; i++) {
        s = s + i;
    }
}

def swap(a, b) {
    t = a;
    a = b;
    b = t;
}

sum(10);

swap(x, y);

編譯執行結果

D:\oc\compiler>gcc defc.c -o defc

D:\oc\compiler>defc defc.c0
==== compile file:defc.c0 ========
         =        str      "Hello!"
         =        t1       3
         =        x        t1
         =        t2       5
         =        y        t2
         +        t3       x        y
         =        t4       6
         >        t5       t3       t4
         ifnot    t5       _L0:
         =        t6       0
         =        z        t6
_L0:
         =        t7       1
         =        z        t7
         =        t8       0
         =        i        t8
_L1:
         =        t9       100
         <        t10      i        t9
         ifnot    t10      _L2:
         +        t11      s        i
         =        s        t11
         ++       i
         goto     _L1:
_L2:
_sum:    function
         PARAM    n
         =        t12      0
         =        i        t12
_L4:
         <        t13      i        n
         ifnot    t13      _L5:
         +        t14      s        i
         =        s        t14
         ++       i
         goto     _L4:
_L5:
_swap:   function
         PARAM    a
         PARAM    b
         =        t        a
         =        a        b
         =        b        t
         =        t15      10
         ARG      t15
         CALL     sum
         ARG      x
         ARG      y
         CALL     swap
======= symTable ========
sym[1]=str
sym[2]="Hello!"
sym[3]=x
sym[4]=3
sym[5]=y
sym[6]=5
sym[7]=6
sym[8]=_L0:
sym[9]=z
sym[10]=0
sym[11]=1
sym[12]=i
sym[13]=_L1:
sym[14]=_L2:
sym[15]=100
sym[16]=s
sym[17]=_L3:
sym[18]=_sum:
sym[19]=n
sym[20]=_L4:
sym[21]=_L5:
sym[22]=_L6:
sym[23]=_swap:
sym[24]=a
sym[25]=b
sym[26]=t
sym[27]=sum
sym[28]=10
sym[29]=swap
======= strTable ========
str."Hello!".x.3.y.5.6._L0:.z.0.1.i._L1:._L2:.100.s._L3:._sum:.n._L4:._L5:._L6:.
_swap:.a.b.t.sum.10.swap.

程式:defc.c

#include <stdio.h>
#include <assert.h>
#include <stdarg.h>

// ================== 基本常數 ===================
#define FALSE   0
#define TRUE    1
#define BOOL    unsigned char
#define NIL        -1

#define LEN 256

#define SYM_TABLE_MAX 1000
#define STR_TABLE_MAX 10000

#define SPACE " \t\n\r"         // 空白字元
#define ALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" // 英文字母
#define DIGIT "0123456789"        // 數字字元
#define OP "+-*/%<=>!&|"        // 運算符號字元集 (用來取得 +,-,*,/, ++, ...)

char *keywords[] = {"if", "else", "for", "while", "def", "int", "char", "float", NULL};

#define STRING "STRING"
#define NUMBER "NUMBER"
#define FLOAT  "FLOAT"
#define ID     "ID"
#define END    "$END$"

#define cmember(ch, set) (strchr(set, ch) != NULL)// 測試字元 ch 是否在 set 當中
#define NEXT(var, tags)  char var[LEN];strcpy(var, token);next(tags)
#define SYMBOL(var, id)  char var[LEN];strcpy(var, symbol(id))
#define ASSERT(cond)     if (!(cond)) { ERROR(); }  // 要求條件要成立,否則就算錯

// 錯誤報告巨集函數,會印出哪一行呼叫了 ERROR()
#define ERROR() printf("ERROR => file:%s, line: %d\n", __FILE__, __LINE__);exit(1)

FILE *file;
char ch;
int line=1, pos=0;
char token[LEN], tag[LEN];
int  tokenIdx = 0;

typedef struct Sym {
    int strIdx;
} Sym;

int symTop = 1;
Sym symTable[SYM_TABLE_MAX];

char strTable[STR_TABLE_MAX];
int  strTop = 0;

int strFind(char *strList[], char *key) {
    int i;
    for (i=0; strList[i] != NULL; i++) {
        if (strcmp(strList[i], key)==0)
            return i;
    }
    return NIL;
}

void debug(const char *fmt, ...) {
    va_list args;
    va_start(args, fmt);
    vprintf(fmt, args);
}

char symTemp[LEN];

char *symbol(int s) {
    if (s == 0)
        return "";
    else if (s > 0)
        return &strTable[symTable[s].strIdx];
    else { // s < 0; temp variable
        sprintf(symTemp, "t%d", -1*s);
        return symTemp;
    }
}

int symFind(char *sym) {
    int i;
    for (i=1; i<symTop; i++) {
        if (strcmp(sym, symbol(i))==0)
            return i;
    }
    return NIL;
}

int symPut(char *sym) {
    int s = symFind(sym);
    if (s == NIL) {
        sprintf(&strTable[strTop], "%s", sym);
        s = symTop++;
        symTable[s].strIdx = strTop;
        strTop += strlen(sym) + 1;
    }
    return s;
}

void symTableDump() {
    int i;
    printf("======= symTable ========\n");
    for (i=1; i<symTop; i++) {
        printf("sym[%d]=%s\n", i, symbol(i));
    }
}

void strTableDump() {
    int i;
    printf("======= strTable ========\n");
    for (i=0; i<strTop; i++) {
        char c = strTable[i];
        if (c == '\0') 
            printf(".");
        else
            printf("%c", c);
    }
    printf("\n");
}

int labelIdx = 0;

int nextLabel() {
    char label[LEN];
    sprintf(label, "_L%d:", labelIdx++);
    return symPut(label);
}

int tempIdx = -1;

int nextTemp() {
    return tempIdx--;
}

char stack[100];
int top = 0;

void printStack() {
    int i;
    for (i=0; i<top; i++)
        printf("%c", stack[i]);
    printf("\n");
}

int push(char c) {
    stack[top++] = c;
//    printStack();
}

char pop(char c) {
    char ctop = stack[--top];
//    printStack();
    ASSERT(ctop==c);
    return ctop;
}

// 功能:檢查 token 是否為集合字串 set 中的元素
// 範例:strPartOf("int", "byte|int|float") 會傳回 TRUE
BOOL strPartOf(char *token, char *set) {
    ASSERT(token != NULL && set != NULL);
    ASSERT(strlen(token) < LEN);
    char ttoken[LEN+2];
    sprintf(ttoken, "|%s|", token);
    return (strstr(set, ttoken)!=NULL);
}

BOOL isNext(char *tags) { // 檢查下一個詞彙的型態
    char tTags[LEN+1];
    sprintf(tTags, "|%s|", tags);
    if (strPartOf(tag, tTags))
        return TRUE;
    else
        return FALSE;
}

char cnext() {
    token[tokenIdx++]=ch;
    pos++;
    ch=fgetc(file);
    return ch;
}

void tnext() {
    while (cmember(ch, SPACE)) { // 忽略空白
        tokenIdx = 0;
        if (ch=='\n') {
            line++;
            pos = 1;
        }
        cnext();
    }
    if (feof(file)) {
        strcpy(token, END);
        strcpy(tag, END);
        return;
    }
    tokenIdx = 0;
    if (ch == '\"') { // 如果是 " 代表字串開頭
        // 字串常數 : string = ".."
        cnext(); // 跳過 "
        while (ch != '\"') cnext(); // 一直讀到下一個 " 符號為止。
        cnext(); // 跳過 "
        strcpy(tag, STRING);
    } else if (cmember(ch, OP)) { // 如果是OP(+-*/<=>!等符號)
          // 運算符號 : OP = ++, --, <=, >=, ...
        while (cmember(ch, OP)) cnext(); // 一直讀到不是OP為止
        token[tokenIdx] = '\0';
        strcpy(tag, token);
    } else if (cmember(ch, DIGIT)) { // 如果是數字
           // 數字常數 : number = 312, 77568, ...
        while (cmember(ch, DIGIT)) cnext(); // 一直讀到不是數字為止
        strcpy(tag, NUMBER);
        // 浮點常數 : float = 3.14, ...
        if (ch == '.') {
            cnext(); // 取得小數點
            strcpy(tag, "FLOAT");
        }
        while (cmember(ch, DIGIT)) cnext(); // 取得小數部分
    } else if (cmember(ch, ALPHA)) { // 如果是英文字母
        // 基本詞彙 : token = int, sum, i, for, if, x1y2z, ....
        while (cmember(ch, ALPHA) || cmember(ch, DIGIT))
            cnext(); // 一直讀到不是英文字母 (或數字)為止
        token[tokenIdx] = '\0';
        if (strFind(keywords, token)==NIL)
            strcpy(tag, ID);
        else
            strcpy(tag, token);
    } else { // 其他符號,都解讀為單一字元
        sprintf(tag, "%c", ch);
        cnext(); // 傳回單一字元
    }
    token[tokenIdx] = '\0';
//    printf("token=%s tag=%s\n", token, tag);
}

void next(char *tags) { // 檢查下一個詞彙的型態
    if (isNext(tags)) { // 如果是pTags型態之一
        tnext();
    } else { // 否則,印出錯誤訊息
        debug("next():line=%d pos=%d token=%s tag=%s, expect(%s)\n", line, pos, token, tag, tags); 
        printStack();
        ERROR();
    }
}

char *pcodeText = NULL;

void pcodeRedirect(char *text) {
    if (text != NULL)
        strcpy(text, "");
    pcodeText = text;
}

void pcodeDump(char *text) {
    printf("%s", text);
}

void pcode(int lab, char *op, int s, int s1, int s2) {
    char pcodeStr[LEN];
    SYMBOL(label, lab); SYMBOL(p, s); SYMBOL(p1, s1); SYMBOL(p2, s2);
    sprintf(pcodeStr, "%-8s %-8s %-8s %-8s %-8s\n", label, op, p, p1, p2);
    if (pcodeText == NULL)
        printf("%s", pcodeStr);
    else
        strcat(pcodeText, pcodeStr);
}

// F=STRING|NUMBER|ID [++|--]|'(' E ')'
int F() {
    int f;
    push('F');
    if (isNext("(")) {
        next("(");
        f = E();
        next(")");
    } else if (isNext(NUMBER)) {
        NEXT(number, NUMBER);
        int n = symPut(number);
        f = nextTemp();
        pcode(0, "=", f, n, 0);
    } else if (isNext(STRING)) {
        NEXT(str, STRING);
        f=symPut(str);
    } else if (isNext(ID)) {
        NEXT(id, ID);
        if (isNext("++|--"))
            next("++|--");
        f=symPut(id);
    }
    pop('F');
    return f; 
}

#define E_OP "+|-|*|/|%|&|^|||&&||||>>|<<|==|<=|>=|<|>"
// E=F (E_OP F)?
int E() {
    push('E');
    int f1 = F();
    if (isNext(E_OP)) {
        NEXT(op, E_OP);
        int f2 = F();
        int e = nextTemp();
        pcode(0, op, e, f1, f2);
        f1 = e;
    }
    pop('E');
    return f1;
}

#define MAX_ARGS 100
int argList[MAX_ARGS];
int argTop = 0;

int argPush(int a) { argList[argTop++] = a; }
int argPop() { return argList[--argTop]; }

// ARGS = E? {,E}*
// 用在 ID(ARGS) 中,follow 為 )
int ARGS() {
    push('A');
    argTop = 0;
    if (!isNext(")")) {
        argPush(E());
        while (isNext(",")) {
            next(",");
            argPush(E());
        }
    }
    pop('A');
}

// SET=ID(ARGS) | ID[++|--] | ID=E
int SET() {
    push('S');
    NEXT(id, ID);
    int var = symPut(id);
    if (isNext("=")) {
        next("=");
        int e = E();
        pcode(0, "=", var, e, 0);
    } else if (isNext("++|--")) {
        NEXT(op1, "++|--");
        pcode(0, op1, var, 0, 0);
    } else if (isNext("(")) {
        next("(");
        ARGS();
        int i;
        for (i=0; i<argTop; i++) {
            pcode(0, "ARG", argList[i], 0, 0);
        }
        pcode(0, "CALL", var, 0, 0);
        next(")");        
    }
    pop('S');
}

// BLOCK={ BASE* }
int BLOCK() {
    push('K');
    next("{");
    while (!isNext("}")) {
        BASE();
    }
    next("}");
    pop('K');
}

// BASE=SET;|IF|WHILE|FOR|BLOCK
int BASE() {
    push('B');
    if (isNext("{"))
        BLOCK();
    else if (isNext("if"))
        IF();
    else if (isNext("while"))
        WHILE();
    else if (isNext("for"))
        FOR();
    else if (isNext("def"))
        DEF();
    else {
        SET();
        next(";");
    }
    pop('B');
}

// IF=if (E) BASE else BASE
int IF() {
    push('I');
    next("if");
    next("(");
    int e = E();
    next(")");
    int elseLabel = nextLabel();
    pcode(0, "ifnot", e, elseLabel, 0);
    BASE();
    if (isNext("else")) {
        next("else");
        pcode(elseLabel, "", 0, 0, 0);
        BASE();
    }
    pop('I');
}

// WHILE=while (E) BASE
int WHILE() {
    push('W');
    int startLabel = nextLabel();
    int exitLabel = nextLabel();
    pcode(startLabel, "", 0, 0, 0);
    next("while");
    next("(");
    int e = E();
    next(")");
    pcode(0, "ifnot", e, exitLabel, 0);
    BASE();
    pcode(0, "goto", startLabel, 0, 0);
    pcode(exitLabel, "", 0, 0, 0);
    pop('W');
}

// FOR=for (SET;E;SET) BASE
int FOR() {
    push('F');
    int startLabel = nextLabel();
    int exitLabel = nextLabel();
    next("for");
    next("(");
    SET();
    next(";");
    pcode(startLabel, "", 0, 0, 0);
    int e = E();
    pcode(0, "ifnot", e, exitLabel, 0);
    next(";");
    char setPcode[LEN];
    pcodeRedirect(setPcode);
    SET(); // ? 這個不能邊比對邊編譯,怎麼辦?用 pipe 先轉向某 setPCode,然後在將結果補在下面。
    next(")");
    pcodeRedirect(NULL);
    BASE();
    pcodeDump(setPcode);
    pcode(0, "goto", startLabel, 0, 0);
    pcode(exitLabel, "", 0, 0, 0);
    pop('F');
}

// PARAMS = ID? {, ID}*
int PARAMS() {
    if (isNext(ID)) {
        NEXT(p1, ID);
        pcode(0, "PARAM", symPut(p1), 0, 0);
        while (isNext(",")) {
            next(",");
            NEXT(p2, ID);
            pcode(0, "PARAM", symPut(p2), 0, 0);
        }
    }
}

// DEF=def ID(PARAMS) BLOCK
int DEF() {
    push('D');
    int exitLabel = nextLabel();
    next("def");
    NEXT(id, ID);
    char fname[LEN];
    sprintf(fname, "_%s:", id);
    int f = symPut(fname);
    pcode(f, "function", 0, 0, 0);
    next("(");
    PARAMS();
    next(")");    
    BLOCK();
    pop('D');
}

// PROG=BASE*
int PROG() {
    push('P');
    while (!isNext(END)) {
        BASE();
    }
    pop('P');
}

int init() {
    fseek(file, 0, SEEK_SET);
    tokenIdx = 0;
    tnext();
}

int scan() {
    init();
    while (TRUE) {
        tnext();
        if (strcmp(token, END)==0)
            break;
        printf("%s\n", token);
    }
}

int compile(char *fname) {
    printf("==== compile file:%s ========\n", fname);
    file = fopen(fname, "r");
    init();
//  scan();
    PROG();
    symTableDump();
    strTableDump();
    fclose(file);
}

int main(int argc, char * argv[]) {
    char *fname = argv[1];
    compile(fname);
}

Facebook

Unless otherwise stated, the content of this page is licensed under Creative Commons Attribution-NonCommercial-ShareAlike 3.0 License