@@ -12,24 +12,34 @@ __function__ MONTE-CARLO-TREE-SEARCH(_state_) __returns__ an action
1212
1313---
1414
15- __ function__ PLAYOUT(_ tree_ ) __ returns__ _ updated tree_
16- &emsp ; _ node_ &larr ; _ tree_
17- &emsp ; __ while__ _ node_ is not terminal and was already in _ tree_ __ do__
18- &emsp ;&emsp ;&emsp ; _ move_ &larr ; SELECT(_ node_ )
19- &emsp ;&emsp ;&emsp ; _ node_ &larr ; FOLLOW\- LINK(_ node_ ,_ move_ )
20- &emsp ; _ outcome_ &larr ; SIMULATION(_ node_ .STATE)
21- &emsp ; UPDATE(_ node_ ,_ outcome_ )
22- &emsp ; __ return__ _ tree_
15+ __ function__ PLAYOUT(_ tree_ ) __ returns__ _ updated tree_
16+ &emsp ; _ node_ &larr ; _ tree_
17+ &emsp ; __ while__ _ node_ is not terminal and was already in _ tree_ __ do__
18+ &emsp ;&emsp ;&emsp ; _ move_ &larr ; SELECT(_ node_ )
19+ &emsp ;&emsp ;&emsp ; _ node_ &larr ; FOLLOW\- LINK(_ node_ ,_ move_ )
20+ &emsp ; _ outcome_ &larr ; SIMULATION(_ node_ .STATE)
21+ &emsp ; UPDATE(_ node_ ,_ outcome_ )
22+ &emsp ; __ return__ _ tree_
2323
2424---
2525
26- __ function__ SELECT(_ node_ ) __ returns__ _ an action_
27- &emsp ; __ return__ argmax<sub >m &isin ; FEASIBLE\- ACTIONS(_ node_ )</sub > UCB(RESULT(_ node_ ,_ m_ ))
26+ __ function__ SELECT(_ node_ ) __ returns__ _ an action_
27+ &emsp ; __ return__ argmax<sub >m &isin ; FEASIBLE\- ACTIONS(_ node_ )</sub > UCB(RESULT(_ node_ ,_ m_ ))
2828
2929---
3030
31- __ function__ UCB(_ child_ ) __ returns__ _ a number_
32- &emsp ; __ return__ _ child_ .VALUE + C × ; <span class =" math " >$\sqrt{\frac{\log{_ child_ .PARENT.N}}{_ child_ .N}}$</span >
31+ __ function__ UCB(_ child_ ) __ returns__ _ a number_
32+ &emsp ; __ return__ _ child_ .VALUE + C × ; <math >
33+ <mrow >
34+ <mn >1</mn >
35+ <msqrt >
36+ <mfrac >
37+ <mi >log _ child_ .PARENT.N</mi >
38+ <mi >_ child_ .N</mi >
39+ </mfrac >
40+ </msqrt >
41+ </mrow >
42+ </math >
3343
3444
3545---
0 commit comments