author = {Matthieu Geist and Olivier Pietquin},
title = {A Brief Survey of Parametric Value Function Approximation},
year = {2010},
month = {September},
organization = {Supélec},
url = {http://www.metz.supelec.fr/metz/personnel/geist_mat/pdfs/Supelec644.pdf},
abstract = {Reinforcement learning is a machine learning answer to the optimal control problem. It consists in learning an optimal control policy through interactions with the system to be controlled, the quality of this policy being quantified by the so-called value function. An important subtopic of reinforcement learning is to compute an approximation of this value function when the system is too large for an exact representation. This survey reviews state of the art methods for (parametric) value function approximation by grouping them into three main categories: bootstrapping, residuals and projected fixed-point approaches. Related algorithms are derived by considering one of the associated cost functions and a specific way to minimize it, almost always a stochastic gradient descent or a recursive least-squares approach.}