@TechReport{Supelec644,

author = {Matthieu Geist and Olivier Pietquin},

title = {A Brief Survey of Parametric Value Function Approximation},

year = {2010},

month = {September},

organization = {Supélec},

url = {http://www.metz.supelec.fr/metz/personnel/geist_mat/pdfs/Supelec644.pdf},

abstract = {Reinforcement learning is a machine learning answer to the
optimal control problem. It
consists in learning an optimal control policy through
interactions with the system to be
controlled, the quality of this policy being quantified by the
so-called value function. An
important subtopic of reinforcement learning is to compute an
approximation of this value
function when the system is too large for an exact
representation. This survey reviews
state of the art methods for (parametric) value function
approximation by grouping them
into three main categories: bootstrapping, residuals and
projected fixed-point approaches.
Related algorithms are derived by considering one of the
associated cost functions and
a specific way to minimize it, almost always a stochastic
gradient descent or a recursive
least-squares approach.}

}