complier-lab4

This lab comes from SE3355 *Compilers* lab4, which requires me to implement a type-checking module when scanning the AST.

The AST is created by bisonc++ script implemented in lab3. To make the following statement more clear, we can take a see of the structure of the AST node. The venv contains the symbols of variables and functions and the tenv contains the symbols of all types.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
class Var {
public:
int pos_;
virtual ~Var() = default;
virtual void Print(FILE *out, int d) const = 0;
virtual type::Ty *SemAnalyze(env::VEnvPtr venv, env::TEnvPtr tenv,
int labelcount,
err::ErrorMsg *errormsg) const = 0;
protected:
explicit Var(int pos) : pos_(pos) {}
};

class Exp {
public:
int pos_;
virtual ~Exp() = default;
virtual void Print(FILE *out, int d) const = 0;
virtual type::Ty *SemAnalyze(env::VEnvPtr venv, env::TEnvPtr tenv,
int labelcount,
err::ErrorMsg *errormsg) const = 0;
protected:
explicit Exp(int pos) : pos_(pos) {}
};

class Dec {
public:
int pos_;
virtual ~Dec() = default;
virtual void Print(FILE *out, int d) const = 0;
virtual void SemAnalyze(env::VEnvPtr venv, env::TEnvPtr tenv, int labelcount,
err::ErrorMsg *errormsg) const = 0;
protected:
explicit Dec(int pos) : pos_(pos) {}
};

class Ty {
public:
int pos_;
virtual ~Ty() = default;
virtual void Print(FILE *out, int d) const = 0;
virtual type::Ty *SemAnalyze(env::TEnvPtr tenv,
err::ErrorMsg *errormsg) const = 0;
protected:
explicit Ty(int pos) : pos_(pos) {}
};

Part 1 Some Basic Type Checking

1.1 some root and leaves

We want to get the type of every variable, expression and declaration, so we can easily do it from the root and leaves.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
void AbsynTree::SemAnalyze(env::VEnvPtr venv, env::TEnvPtr tenv,
err::ErrorMsg *errormsg) const {
root_->SemAnalyze(venv, tenv, 0, errormsg);
}
type::Ty *VarExp::SemAnalyze(env::VEnvPtr venv, env::TEnvPtr tenv,
int labelcount, err::ErrorMsg *errormsg) const {
return var_->SemAnalyze(venv, tenv, labelcount, errormsg);
}
type::Ty *NilExp::SemAnalyze(env::VEnvPtr venv, env::TEnvPtr tenv,
int labelcount, err::ErrorMsg *errormsg) const {
return type::NilTy::Instance();
}
type::Ty *IntExp::SemAnalyze(env::VEnvPtr venv, env::TEnvPtr tenv,
int labelcount, err::ErrorMsg *errormsg) const {
return type::IntTy::Instance();
}
type::Ty *StringExp::SemAnalyze(env::VEnvPtr venv, env::TEnvPtr tenv, int labelcount, err::ErrorMsg *errormsg) const {
return type::StringTy::Instance();
}
type::Ty *VoidExp::SemAnalyze(env::VEnvPtr venv, env::TEnvPtr tenv,
int labelcount, err::ErrorMsg *errormsg) const {
return type::VoidTy::Instance();
}

1.2 Variables

The type-checking code of three variables is as follows.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
type::Ty *SimpleVar::SemAnalyze(env::VEnvPtr venv, env::TEnvPtr tenv,
int labelcount, err::ErrorMsg *errormsg) const {
env::EnvEntry *entry = venv->Look(sym_);
if (entry && (typeid(*entry) == typeid(env::VarEntry))) {
env::VarEntry* varEntry = static_cast<env::VarEntry *>(entry);
type::Ty *type = varEntry->ty_;
bool readonly = varEntry->readonly_;
return type;
}
else {
errormsg->Error(pos_, "undefined variable %s", sym_->Name().data());
}
return type::IntTy::Instance();
}

type::Ty *FieldVar::SemAnalyze(env::VEnvPtr venv, env::TEnvPtr tenv,
int labelcount, err::ErrorMsg *errormsg) const {
// for field var first check var need to be a RecordTy, then check the sym_ is in the recordTY type
type::Ty * variable_type = var_->SemAnalyze(venv, tenv, labelcount, errormsg);
if (variable_type == nullptr) {
errormsg->Error(pos_, "variable not defined.");
}
else if (typeid(*(variable_type->ActualTy())) != typeid(type::RecordTy)) {
errormsg->Error(pos_, "not a record type");
}
else {
type::RecordTy * real_type = ((type::RecordTy *) (variable_type));
int matched = 0;
for (type::Field* field:real_type->fields_->GetList()) {
if (field->name_->Name() == sym_->Name()) {
matched = 1;
}
}
if (matched == 1) {
return real_type;
}
else {
errormsg->Error(pos_, "field nam doesn't exist");
return real_type;
}
}
}

type::Ty *SubscriptVar::SemAnalyze(env::VEnvPtr venv, env::TEnvPtr tenv,
int labelcount, err::ErrorMsg *errormsg) const {
//check the type of var_ is an array or not
//if so, check subscript_ is an int or not, and check the range.
type::Ty *var_type = var_->SemAnalyze(venv, tenv, labelcount, errormsg);
//var_type is array of the type that we want to return
if (var_type == nullptr) {
errormsg->Error(pos_, "variable not defined.");
}
else if (typeid(*(var_type->ActualTy())) != typeid(type::ArrayTy)) {
errormsg->Error(pos_, "array type required");
}
else {
type::Ty *subscript_type = subscript_->SemAnalyze(venv, tenv, labelcount, errormsg);
if (typeid(*(subscript_type->ActualTy())) != typeid(type::IntTy)) {
errormsg->Error(pos_, "subscribe is not a int type.");
}
else {
return ((type::ArrayTy *) var_type)->ty_;
}
}
return nullptr;
}

1.3 Call expression

The type-checking code of call expression is as follows.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
type::Ty *CallExp::SemAnalyze(env::VEnvPtr venv, env::TEnvPtr tenv,
int labelcount, err::ErrorMsg *errormsg) const {
env::FunEntry *funEntry = static_cast<env::FunEntry *>(venv->Look(func_));
if (funEntry == nullptr) {
errormsg->Error(pos_, "undefined function " + func_->Name());
return type::NilTy::Instance();
}
std::list<Exp *> args_list = args_->GetList();
if (funEntry->formals_ == nullptr) {
auto args_it = args_->GetList().begin();
for (; args_it != args_->GetList().end(); args_it++) {
Exp *current_exp = *args_it;
current_exp->SemAnalyze(venv, tenv, labelcount, errormsg);
}
}
else {
auto formal_it = funEntry->formals_->GetList().begin();
auto args_it = args_->GetList().begin();
for (; formal_it != funEntry->formals_->GetList().end() && args_it != args_->GetList().end(); formal_it++, args_it++) {
Exp *current_exp = *args_it;
type::Ty *formal_type = *formal_it;
if (typeid(*(current_exp->SemAnalyze(venv, tenv, labelcount, errormsg)->ActualTy())) != typeid(*formal_type)) {
// type not match
errormsg->Error(current_exp->pos_, "para type mismatch");
}
}
if (args_it != args_->GetList().end()) {
// number does not match
auto last_it = args_->GetList().end();
last_it--;
errormsg->Error((*last_it)->pos_, "too many params in function g");
}
else if (formal_it != funEntry->formals_->GetList().end()) {

}
}
if (funEntry->result_ != nullptr) {
return funEntry->result_;
}
else {
return type::NilTy::Instance();
}
}

Part 2 Some Tricky Part

2.1 the loop variable shouldn’t be assigned

Since the loop variable in a for-loop can’t not be assigned because of the rule of the tiger language, when a for-loop declares a variable, the variable should be marked with “readonly = true”. In such way, when encountering a variable in assign exp, the module can check the variable is read-only or not.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
type::Ty *ForExp::SemAnalyze(env::VEnvPtr venv, env::TEnvPtr tenv,
int labelcount, err::ErrorMsg *errormsg) const {
venv->BeginScope();
type::Ty *lo_type = lo_->SemAnalyze(venv, tenv, labelcount, errormsg);
venv->Enter(var_, new env::VarEntry(lo_type, true));
type::Ty *hi_type = hi_->SemAnalyze(venv, tenv, labelcount, errormsg);
if (typeid(*(hi_type->ActualTy())) != typeid(type::IntTy)) {
errormsg->Error(hi_->pos_, "for exp's range type is not integer");
}
body_->SemAnalyze(venv, tenv, labelcount + 1, errormsg);
venv->EndScope();
return type::NilTy::Instance();
}

type::Ty *AssignExp::SemAnalyze(env::VEnvPtr venv, env::TEnvPtr tenv,
int labelcount, err::ErrorMsg *errormsg) const {
type::Ty *var_type = var_->SemAnalyze(venv, tenv, labelcount, errormsg);
type::Ty *exp_type = exp_->SemAnalyze(venv, tenv, labelcount, errormsg);
if (typeid(*(exp_type->ActualTy())) == typeid(type::NilTy) && typeid(*(var_type->ActualTy())) == typeid(type::RecordTy)) {
return type::NilTy::Instance();
}
else if (var_type && typeid(*(var_type)->ActualTy()) == typeid(type::IntTy)) {
absyn::SimpleVar *simpleVar = dynamic_cast<SimpleVar *>(var_);
if (simpleVar != nullptr) {
env::EnvEntry *envEntry = venv->Look(simpleVar->sym_);
if (envEntry != nullptr) {
bool readonly = envEntry->readonly_;
if (readonly) {
errormsg->Error(pos_, "loop variable can't be assigned");
}
}
}
}
else if (var_type && exp_type && typeid(*(var_type->ActualTy())) != typeid(*(exp_type->ActualTy()))) {
errormsg->Error(pos_, " unmatched assign exp");
}
return type::NilTy::Instance();
}

2.2 The in-loop checking of break expression

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
type::Ty *ForExp::SemAnalyze(env::VEnvPtr venv, env::TEnvPtr tenv,
int labelcount, err::ErrorMsg *errormsg) const {
...
body_->SemAnalyze(venv, tenv, labelcount + 1, errormsg);
...
}

type::Ty *WhileExp::SemAnalyze(env::VEnvPtr venv, env::TEnvPtr tenv,
int labelcount, err::ErrorMsg *errormsg) const {
...
type::Ty *body_type = body_->SemAnalyze(venv, tenv, labelcount + 1, errormsg);
...
}

type::Ty *BreakExp::SemAnalyze(env::VEnvPtr venv, env::TEnvPtr tenv,
int labelcount, err::ErrorMsg *errormsg) const {
if (labelcount == 0) {
errormsg->Error(pos_, "break is not inside any loop");
}
return type::NilTy::Instance();
}

2.3 handle the nested function declaration

Tiger supports the adjacent nested function, so we need to define all the function name first in order for all the function to find the reference function entry in the venv.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
void FunctionDec::SemAnalyze(env::VEnvPtr venv, env::TEnvPtr tenv,
int labelcount, err::ErrorMsg *errormsg) const {
absyn::FunDec *last_function = nullptr;

for (absyn::FunDec *function:functions_->GetList()) {
if (last_function && function->name_->Name() == last_function->name_->Name()) {
errormsg->Error(function->pos_, "two functions have the same name");
}
venv->Enter(function->name_, new env::FunEntry(nullptr, nullptr));
last_function = function;
}

for (absyn::FunDec *function:functions_->GetList()) {
type::Ty *result_ty = tenv->Look(function->result_);
type::TyList *formals = function->params_->MakeFormalTyList(tenv, errormsg);
venv->Set(function->name_, new env::FunEntry(formals, result_ty));
venv->BeginScope();
auto formal_it = formals->GetList().begin();
auto param_it = function->params_->GetList().begin();
for (; param_it != function->params_->GetList().end(); formal_it++, param_it++) {
venv->Enter((*param_it)->name_, new env::VarEntry(*formal_it));
}
type::Ty *ty = function->body_->SemAnalyze(venv, tenv, labelcount, errormsg);
errormsg->Error(pos_, "function body over");
if (function->result_ == nullptr && ty && typeid(*(ty->ActualTy())) != typeid(type::NilTy)) {
errormsg->Error(pos_, "procedure returns value");
}
venv->EndScope();
}
}

2.4 illegal cycle in nested type declaration

1
2
3
4
5
6
7
8
9
10
11
12
13
// test.16.tig
/* error: mutually recursive types thet do not pass through record or array */
let

type a=c
type b=a
type c=d
type d=a

in
""
end

The upper code is not allowed in tiger language, so we need to check if there is a cycle in type nested declaration. Each time we define a new type, we need to scan the defined type in the venv to make sure there is no cycle.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
void TypeDec::SemAnalyze(env::VEnvPtr venv, env::TEnvPtr tenv, int labelcount,
err::ErrorMsg *errormsg) const {
for (NameAndTy* current: types_->GetList()) { // put all the name into the env first
sym::Symbol *symbol = current->name_;
for (NameAndTy* current_2: types_->GetList()) {
if (current_2 == current) break;
if (current_2->name_->Name() == current->name_->Name()) {
errormsg->Error(pos_, "two types have the same name");
}
}
tenv->Enter(symbol, new type::NameTy(symbol, nullptr));
}
for (NameAndTy* current: types_->GetList()) {
sym::Symbol *symbol = current->name_;
errormsg->Error(pos_, "define symbol " + symbol->Name());
absyn::Ty* type = current->ty_;
absyn::NameTy *changed_type1 = dynamic_cast<absyn::NameTy *>(type);
absyn::ArrayTy *changed_type2 = dynamic_cast<absyn::ArrayTy *>(type);
absyn::RecordTy *changed_type3 = dynamic_cast<absyn::RecordTy *>(type);
if (changed_type1 != nullptr) {
type::Ty *type1 = changed_type1->SemAnalyze(tenv, errormsg);
tenv->Set(symbol, type1);
try {
type::Ty * test_type = tenv->Look(symbol);
type::NameTy * change_one = dynamic_cast<type::NameTy *>(test_type);
type::NameTy * origin_one = dynamic_cast<type::NameTy *>(test_type);
while (change_one != nullptr) {
errormsg->Error(pos_, "change_one= " + change_one->sym_->Name());
change_one = dynamic_cast<type::NameTy *>(change_one->ty_);
if (change_one == origin_one) {
errormsg->Error(pos_, "eeee= " + change_one->sym_->Name());
throw(0);
}
}
}
catch (...) {
errormsg->Error(pos_, "illegal type cycle");
}
}
else if (changed_type2 != nullptr) {
type::Ty *type2 = changed_type2->SemAnalyze(tenv, errormsg);
tenv->Set(symbol, type2);
}
else if (changed_type3 != nullptr) {
type::Ty *type3 = changed_type3->SemAnalyze(tenv, errormsg);
tenv->Set(symbol, type3);
}
else {
exit(0);
}
}
return;
}
Author

Kami-code

Posted on

2021-11-22

Updated on

2021-11-22

Licensed under

Comments