author = {Matthieu Geist and Olivier Pietquin},
title = {Statistically Linearized Least-Squares Temporal Differences},
year = {2010},
booktitle = {Journées Francophones de Planification, Décision et Apprentissage pour la conduite de systèmes (JFPDA 2010)},
month = {June},
note = {8 pages},
address = {Besançon (France)},
abstract = {A major drawback of standard reinforcement learning algorithms is their inability to scaleup to real-world problems. For this reason, a current important trend of research is (state-action) value function approximation. A prominent value function approximator is the least-squares temporal differences (LSTD) algorithm. However, for technical reasons, linearity is mandatory: the parameterization of the value function must be linear (compact nonlinear representations are not allowed) and only the Bellman evaluation operator can be considered. In this paper, this restriction of LSTD is lifted thanks to a derivative-free statistical linearization approach. This way, nonlinear parameterizations and the Bellman optimality operator can be taken into account.}