@Article{Supelec907,
author = {Bruno Scherrer and Mohammad Ghavamzadeh and Victor Gabillon and Boris Lesner and Matthieu Geist},
title = {Approximate Modified Policy Iteration and its Application to the Game of Tetris},
journal = {Journal of Machine Learning Research},
year = {2015},
volume = {16},
pages = {1629-1676},
url = {http://jmlr.org/papers/volume16/scherrer15a/scherrer15a.pdf},
abstract = {Modi ed policy iteration (MPI) is a dynamic programming (DP) algorithm that contains the two celebrated policy and value iteration methods. Despite its generality, MPI has not been thoroughly studied, especially its approximation form which is used when the state and/or action spaces are large or in nite. In this paper, we propose three implementations of approximate MPI (AMPI) that are extensions of the well-known approximate DP algo- rithms: tted-value iteration, tted-Q iteration, and classi cation-based policy iteration. We provide error propagation analysis that unify those for approximate policy and value iteration. We develop the nite-sample analysis of these algorithms, which highlights the in uence of their parameters. In the classi cation-based version of the algorithm (CBMPI), the analysis shows that MPI\'s main parameter controls the balance between the estima- tion error of the classi er and the overall value function approximation. We illustrate and evaluate the behavior of these new algorithms in the Mountain Car and Tetris problems. Remarkably, in Tetris, CBMPI outperforms the existing DP approaches by a large margin, and competes with the current state-of-the-art methods while using fewer samples. }
}