@inproceedings{c5e2cf53c455481886eb31dff449622e,
title = "Two-Phase-Win Strategy for Improving the AlphaZero's Strength",
abstract = "AlphaZero used a combination of Monte-Carlo Tree Search as well as deep neural networks that learned without human knowledge. It demonstrated that reinforcement learning by self-play could surpass the human champions. The great success of AlphaZero seems that every AI tasks can be trained and learned without any human knowledge as well as any human heuristics. But this paper presents another viewpoint: the AlphaZero approach is good at the perspective of overall situations, and miniMax search (with alpha-beta pruning) is adept in discovering partial solutions. Therefore, we introduce the Two-Phase-Win strategy to combine AlphaZero and miniMax search with alpha-beta pruning for improving AlphaZero's strength. It has improved the strength of the AlphaZero approach applied to Connect4. The results of experiments show that the Two-Phase-Win strategy has 58% win rate against the AlphaZero approach and doesn't lose any game in a 100-game match.",
keywords = "alpha-beta pruning, alphazero, minimax search, monte-carlo tree search, two-phase-win",
author = "Chen, {Chih Hung} and Chen, {Yen Chi} and Lin, {Shun Shii}",
note = "Publisher Copyright: {\textcopyright} 2019 IEEE.; 2nd World Symposium on Communication Engineering, WSCE 2019 ; Conference date: 20-12-2019 Through 23-12-2019",
year = "2019",
month = dec,
doi = "10.1109/WSCE49000.2019.9041112",
language = "English",
series = "Proceedings of 2019 2nd World Symposium on Communication Engineering, WSCE 2019",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "117--121",
booktitle = "Proceedings of 2019 2nd World Symposium on Communication Engineering, WSCE 2019",
}