tiger book読書記録 chapter 1
最新コンパイラ構成技法(Modern Commpiler Implementaion in ML new edition) 通称tiger bookを読むことにした。このnoteはその記録用である。
Chapter 1
august 4, 2018:
一章本文を読む。流石に最初の章は簡単であるがMLを知らない。しょうがないので,手元の環境にstandard mlをinstallして簡単なMLのtutorialをすることにした。
August 5, 2018:
Ubuntu環境にstandard ML new jerseyを入れた。deb package化されているので簡単。以下のpackageを入れた。emacsを使っているのでsml-modeも入れておく。
sudo apt-get install sml-mode smlnj smlnj-doc smlnj-runtime elpa-sml-mode
emacsのsml-modeで思った様にindentしてくれない。面倒なので、当面このままにしておく。MLに慣れてきた頃に見直そう。
August 6, 2018:
MLの超基本を適当なsiteで学ぶ。学んだのは
- コメントの書き方 (* *)
- identifier, reserved keyword
- basic types: bool, int, word, real, char, string, unit
- complex type: tuple, list, record
- operators: - + * abs / div mod: divとmodもinfix notationだった
- not, andalso, orelse
- how to define function
- type variable: 'a ''aと equality type, non-equality type
- pattern matching and wild card matching _
- higher order function and fn, rec
- partial application
- list walking operators: null, hd, tl, length, map, rev, @, foldr, foldl, last, nth
- algebraic data type/type constructor/value constructor
- reference type, !, :=
- module system: structure, signature, functor
むぅ、whereのsyntaxがよく理解出来ていないがまぁ、良しとする。解説も簡単なものだったし。必要になったらまた何か解説を読むことにする。
処理系の実行を理解して、簡単なコードは理解出来るようになったか。明日からはtiger bookに戻って1章の演習問題に取り組むことにする。
Aug 8, 2018:
本文中の課題1 引数の最大値を求める関数maxargsを書け
type id = string;
datatype binop = Plus | Minus | Times | Div;
datatype stm = CompoundStm of stm * stm
| AssignStm of id * exp
| PrintStm of exp list
and exp = IdExp of id
| NumExp of int
| OpExp of exp * binop * exp
| EseqExp of stm * exp;
fun maxval x y = if x > y then x else y;
fun maxargs (CompoundStm (stm0, stm1)) = maxval (maxargs stm0) (maxargs stm1)
| maxargs (AssignStm (id0, exp0)) = maxargs_exp exp0
| maxargs (PrintStm xs) = maxval (length xs) (maxargs_exp_list xs)
and maxargs_exp_list [] = 0
| maxargs_exp_list (x::xs) = maxval (maxargs_exp x) (maxargs_exp_list xs)
and maxargs_exp (IdExp id0) = 0
| maxargs_exp (NumExp x) = 0
| maxargs_exp (OpExp (e0, b, e1)) = maxval (maxargs_exp e0) (maxargs_exp e1)
| maxargs_exp (EseqExp (stm0, exp0)) = maxval (maxargs stm0) (maxargs_exp exp0);
なんかmaxvalを使っているのが気に入らない。foldrを使うとうまく出来るのだろうか。まだ、MLに慣れてないのでerrorをfixのに苦労する。ml of new jerseyはcompilerと書いてあるけれど、interpreterなのか。どうやらこれをcompileして実行fileを作成するのは出来ないことは無いが面倒そうだ。moltonとか使えとかある。それならcompilerと名乗らずinterpreterと名乗って欲しい。明日は課題その2をやろう。
August 9, 2018:
課題2と演習問題1.1 a, b, cをやった。dは明日にする。
課題2
type id = string;
datatype binop = Plus | Minus | Times | Div;
datatype stm = CompoundStm of stm * stm
| AssignStm of id * exp
| PrintStm of exp list
and exp = IdExp of id
| NumExp of int
| OpExp of exp * binop * exp
| EseqExp of stm * exp;
structure Table =
struct
exception LookupFailure;
type tkey = id;
type tval = int;
type elem = (id * tval);
type table = (id * tval) list;
fun new () = nil : table;
fun update key (num, t) = (key, num)::t;
fun lookup key [] = raise LookupFailure
| lookup key ((k, v)::xs) = if key = k then v else lookup key xs;
end;
(* interpStm : stm -> table -> table *)
fun interpStm (CompoundStm (stm0, stm1)) t =
let
val t0 = interpStm stm0 t;
in
interpStm stm1 t0
end
| interpStm (AssignStm (id0, exp0)) t = Table.update id0 (interpExp exp0 t)
| interpStm (PrintStm (xs)) t = printExp xs t
(* interpExp : exp -> table -> int * table *)
and interpExp (IdExp id0) t = (Table.lookup id0 t, t)
| interpExp (NumExp num) t = (num, t)
| interpExp (OpExp (e0, op0, e1)) t =
let
val (num0, t0) = interpExp e0 t;
val (num1, t1) = interpExp e1 t0;
in
case op0 of
Plus => (num0 + num1, t1)
| Minus => (num0 - num1, t1)
| Times => (num0 * num1, t1)
| Div => (num0 div num1, t1)
end
| interpExp (EseqExp (stm0, exp0)) t0 =
let
val t1 = interpStm stm0 t0;
in
interpExp exp0 t1
end
(* printExp : exp list -> table -> table *)
and printExp nil t = (print("\n"); t)
| printExp [(IdExp id0)] t = (print (id0 ^ " = " ^ Int.toString (Table.lookup id0 t) ^ ";\n"); t)
(* | printExp [(IdExp id0)] t = (print id0; t) *)
| printExp [(NumExp num)] t = (print ((Int.toString num) ^ ";\n"); t)
| printExp [exp0] t =
let
val (num, t_new) = interpExp exp0 t;
val _ = print (Int.toString num ^ ";\n");
in
t_new
end
| printExp (x::xs) t =
let
val t0 = printExp [x] t;
in
printExp xs t0
end;
val prog =
CompoundStm(AssignStm("a", OpExp(NumExp 5, Plus, NumExp 3)),
CompoundStm(AssignStm("b",
EseqExp(PrintStm [IdExp "a", OpExp(IdExp "a", Minus, NumExp 1)],
OpExp(NumExp 10, Times, IdExp "a"))),
PrintStm [IdExp "b"]));
(*
a = 5 + 3
b = [print(a), a -1, 10 * a]
print(b)
*)
val t = Table.new ();
interpStm prog t;
let文とcase of構文を知らなくてハマる。始めのうちlocal scopeなvariableを定義出来るとは知らず時間を無駄にした。もうちょっとMLを学ぶか。結果を見ると構文定義をそのままナゾルだけで構文木評価が出来るのは楽だな。Cで書くとなるとnode定義を準備してvisitor patternで真面目にtree traverseをすることになりそうだ。PrintStmの評価のためにprintExpを導入したが無しでもPrintStmのpattern matchで頑張ればなんとかなりそう。ま、どっちても同じだから気分の問題か。
演習問題1.1a
type key = string;
datatype tree = LEAF | TREE of tree * key * tree;
val empty = LEAF;
fun insert (key, LEAF) = TREE(LEAF, key, LEAF)
| insert (key, TREE(l, k, r)) =
if key < k then
TREE(insert (key, l), key, LEAF)
else if key > k then
TREE(l, k, insert (key, r))
else
TREE(l, key, r);
fun member (key, LEAF) = false
| member (key, TREE(l, k, r)) =
if key < k then
member (key, l)
else if key > k then
member (key, r)
else
true;
val t = insert("a", empty);
val t = insert("b", t);
member ("a", t);
member ("b", t);
member ("c", t);
演習問題1.1b, c
type key = string;
datatype 'a tree = LEAF | TREE of 'a tree * key * 'a * 'a tree;
val empty = LEAF;
fun insert (key, value, LEAF) = TREE(LEAF, key, value, LEAF)
| insert (key, value, TREE(l, k, v, r)) =
if key < k then
TREE(insert (key, value, l), key, v, LEAF)
else if key > k then
TREE(l, k, v, insert (key, value, r))
else
TREE(l, key, value, r); (* overwrite the existing binding*)
fun member (LEAF, key) = false
| member (TREE(l, k, v, r), key) =
if key < k then
member (l, key)
else if key > k then
member (r, key)
else
true;
exception NOT_FOUND;
fun lookup (key, LEAF) = raise NOT_FOUND
| lookup (key, TREE(l, k, v, r)) =
if key < k then
lookup (key, l)
else if key > k then
lookup (key, r)
else
v;
val t = insert("a", 1, empty);
val t = insert("b", 2, t);
lookup ("a", t);
lookup ("b", t);
lookup ("c", t) handle NOT_FOUND => ~1;
(* 1.1 c *)
(* t s p i p f b s t *)
val t = insert("t", 1, empty);
val t = insert("s", 2, t);
val t = insert("p", 3, t);
val t = insert("i", 4, t);
val t = insert("p", 5, t);
val t = insert("f", 6, t);
val t = insert("b", 7, t);
val t = insert("s", 8, t);
val t = insert("t", 9, t);
(* a b c d e f g h i *)
val t = insert("a", 1, empty);
val t = insert("b", 2, t);
val t = insert("c", 3, t);
val t = insert("d", 4, t);
val t = insert("e", 5, t);
val t = insert("f", 6, t);
val t = insert("g", 7, t);
val t = insert("h", 8, t);
val t = insert("i", 9, t);
1.1cの評価結果は次の通り
val t = TREE (LEAF,"t",1,LEAF) : int tree
val t = TREE (TREE (LEAF,"s",2,LEAF),"s",1,LEAF) : int tree
val t = TREE (TREE (TREE #,"p",2,LEAF),"p",1,LEAF) : int tree
val t = TREE (TREE (TREE #,"i",2,LEAF),"i",1,LEAF) : int tree
val t = TREE (TREE (TREE #,"i",2,LEAF),"i",1,TREE (LEAF,"p",5,LEAF))
: int tree
val t = TREE (TREE (TREE #,"f",2,LEAF),"f",1,LEAF) : int tree
val t = TREE (TREE (TREE #,"b",2,LEAF),"b",1,LEAF) : int tree
val t = TREE (TREE (TREE #,"b",2,LEAF),"b",1,TREE (LEAF,"s",8,LEAF))
: int tree
val t = TREE (TREE (TREE #,"b",2,LEAF),"b",1,TREE (LEAF,"s",8,TREE #))
: int tree
val t = TREE (LEAF,"a",1,LEAF) : int tree
val t = TREE (LEAF,"a",1,TREE (LEAF,"b",2,LEAF)) : int tree
val t = TREE (LEAF,"a",1,TREE (LEAF,"b",2,TREE #)) : int tree
val t = TREE (LEAF,"a",1,TREE (LEAF,"b",2,TREE #)) : int tree
val t = TREE (LEAF,"a",1,TREE (LEAF,"b",2,TREE #)) : int tree
val t = TREE (LEAF,"a",1,TREE (LEAF,"b",2,TREE #)) : int tree
val t = TREE (LEAF,"a",1,TREE (LEAF,"b",2,TREE #)) : int tree
val t = TREE (LEAF,"a",1,TREE (LEAF,"b",2,TREE #)) : int tree
val t = TREE (LEAF,"a",1,TREE (LEAF,"b",2,TREE #)) : int tree
listが長くなると全部表示してくれないのね。何かoptionがあるのかな。まぁ、いいことにする。
August 10 ,2018:
演習問題1.1d 関数型記号表の為のbalanced tree data structureを提案せよ。
要件を考えると
- ヒントの通り、lookupではtreeを変更してはいけない。
- operationの頻度としてはlookup > insert > delete. 関数型であることを考えるとdelete operationは無いとしても良いか。値の上書きは必要か?insertで入れ替えるとしても良い。頻度としてはinsertと同程度。
単純に考えるとbalanced binary treeになるが、avl treeとかred-back treeだとkeyがstringでnode毎にstringのcompareを行うことになるなぁ。そうするとtrieかternary search treeか。でももうbalanced treeでは無くなってしまう。問題の意図としてはred black treeを実装しろというところか。意図から外れてしまうがtrieを実装してみよう。
そこで少しMLの調べる。String.explodeを知った。option, NONE, SOMEを知った。これでなんとか実装できそうだ。実装は明日。
August 12, 2018:
Trieを実装してみた。functional programmingに慣れてないので昨日実装仕掛けたものは動作せず諦めてdatatypeを別のものに変えてやっと動くものが出来た。出来たcodeを眺めてみるとまぁ、当たり前のことをやっているだけだな。Cかpythonならすぐ実装できる自信があったのだが、思ったよりも時間がかかった。特にstringとcharの変換が意外と面倒だ。なのでstringをexplodeでchar listに変換してから処理することにしたらうまくいった。
datatype id = string;
structure Trie =
struct
exception InvalidKey;
exception NotFound;
type tkey = id;
type nodeKey = char;
type tval = int;
datatype trie_node = NODE of nodeKey * tval option * trie_node list;
type trie = trie_node list;
fun new () = [] : trie;
(* datatype 'a trie_node = NODE of nodeKey * 'a option * 'a trie_node list; *)
(* type 'a trie = 'a trie_node list; *)
(* fun new () = [] : 'a trie; *)
fun insert key value root =
insert_node (explode key) value root
and insert_node [] value ns = raise InvalidKey
| insert_node [c] value [] = [NODE(c, SOME value, [])]
| insert_node [c] value (NODE(nkey, nval, children)::ns) =
if c = nkey then
NODE(nkey, SOME value, children)::ns
else
NODE(nkey, nval, children)::insert_node [c] value ns
| insert_node (c::cs) value [] =
[NODE(c, NONE, insert_node cs value [])]
| insert_node (c::cs) value (NODE(nkey, nval, children)::ns) =
if c = nkey then
NODE(nkey, nval, insert_node cs value children)::ns
else
NODE(nkey, nval, children)::insert_node (c::cs) value ns;
fun lookup key root =
lookup_node (explode key) root
and lookup_node [] ns = raise InvalidKey
| lookup_node cs [] = raise NotFound
| lookup_node [c] (NODE(nkey, value, children)::ns) =
if c = nkey then
valOf value handle Option => raise NotFound
else
lookup_node [c] ns
| lookup_node (c::cs) (NODE(nkey, value, children)::ns) =
if c = nkey then
lookup_node cs children
else
lookup_node (c::cs) ns;
fun delete key root =
delete_node (explode key) root
and delete_node [] ns = raise InvalidKey
| delete_node cs [] = raise NotFound
| delete_node [c] (NODE(nkey, value, children)::ns) =
if c = nkey then
if null children then
ns
else
NODE(nkey, NONE, children)::ns
else
NODE(nkey, value, children)::delete_node [c] ns
| delete_node (c::cs) (NODE(nkey, nval, children)::ns)=
if c = nkey then
let
val children_new = delete_node cs children
in
if nval = NONE andalso null children_new then
ns
else
NODE(nkey, nval, children_new)::ns
end
else
NODE(nkey, nval, children) :: delete_node (c::cs) ns
end;
val t = Trie.new ();
val t = Trie.insert "a" 1 t;
val t = Trie.insert "ab" 2 t;
val t = Trie.insert "abc" 3 t;
Trie.lookup "a" t;
Trie.lookup "ab" t;
Trie.lookup "abc" t;
val t = Trie.insert "aaa" 4 t;
Trie.lookup "aaa" t;
val t = Trie.insert "aaa" 5 t;
Trie.lookup "aaa" t;
val t = Trie.delete "ab" t;
val t = Trie.delete "aaa" t;
Trie.lookup "ab" t;
テストコードの実行結果は次の通り
val t = [] : Trie.trie
val t = [NODE (#"a",SOME 1,[])] : Trie.trie_node list
val t = [NODE (#"a",SOME 1,[NODE #])] : Trie.trie_node list
val t = [NODE (#"a",SOME 1,[NODE #])] : Trie.trie_node list
val it = 1 : Trie.tval
val it = 2 : Trie.tval
val it = 3 : Trie.tval
val t = [NODE (#"a",SOME 1,[NODE #,NODE #])] : Trie.trie_node list
val it = 4 : Trie.tval
val t = [NODE (#"a",SOME 1,[NODE #,NODE #])] : Trie.trie_node list
val it = 5 : Trie.tval
val t = [NODE (#"a",SOME 1,[NODE #,NODE #])] : Trie.trie_node list
val t = [NODE (#"a",SOME 1,[NODE #])] : Trie.trie_node list
uncaught exception NotFound
raised at: trie1.sml:44.46-44.54
/usr/lib/smlnj/bin/sml: Fatal error -- Uncaught exception NotFound with 0
raised at trie1.sml:44.46-44.54
node = []だとwarningが出るのね。型が問題か。だからnull operatorがあるのか。
data typeを一般化してfunctorにするのは気力がなかった。chapter 1で止まっていてもしょうがないのでこれで良いことにする。明日からはchapter 2にうつる。noteも新しいものに移る。