author = {Bruno Scherrer and Victor Gabillon and Mohammad Ghavamzadeh and Matthieu Geist},
title = {Approximate Modified Policy Iteration},
year = {2012},
booktitle = {International Conference on Machine Learning (ICML)},
note = {(to appear)},
url = {http://arxiv.org/abs/1205.3054},
abstract = {Modified policy iteration (MPI) is a dynamic programming (DP) algorithm that contains the two celebrated policy and value iteration methods. Despite its generality, MPI has not been thoroughly studied, especially its approximation form which is used when the state and/or action spaces are large or infinite. In this paper, we propose three approximate MPI (AMPI) algorithms that are extensions of the well-known approximate DP algorithms: fitted-value iteration, fitted-Q iteration, and classification-based policy iteration. For all algorithms, we provide error propagation analyses that unify those for approximate policy and value iteration. On an implementation of the last classification-based algorithm, we develop a finite-sample analysis that shows that MPIís main parameter allows to control the balance between the estimation error of the classifier and the overall value function approximation.}