@InCollection{Supelec624,

author = {Matthieu Geist and Olivier Pietquin},

title = {Revisiting natural actor-critics with value function approximation},

year = {2010},

booktitle = {Modeling Decisions for Artificial Intelligence},

publisher = {Springer Verlag - Heidelberg Berlin},

volume = {6408},

pages = {207-218},

month = {October},

note = {Proceedings of 7th International Conference MDAI 2010},

editor = {V. Torra and Y. Narukawa and M. Daumas},

series = {Lecture Notes in Artificial Intelligence (LNAI)},

address = {Perpinya (France)},

url = {http://www.metz.supelec.fr/metz/personnel/geist_mat/pdfs/Supelec624.pdf},

abstract = {Actor-critics architectures have become popular during the last
decade in the field of reinforcement learning because of the
introduction of the policy gradient with function approximation
theorem. It allows combining rationally actorcritic
architectures
with value function approximation and therefore addressing
large-
scale problems. Recent researches led to the replacement of
policy gradient by a natural policy gradient, improving the
efficiency of the corresponding algorithms. However, a common
drawback of these approaches is that they require the
manipulation of the so-called advantage function which does not
satisfy any Bellman equation. Consequently, derivation of actor-
critic algorithms is not straightforward. In this paper, we re-
derive theorems in a way that allows reasoning directly with
the
state-action value function (or Q-function) and thus relying on
the Bellman equation again. Consequently, new forms of critics
can easily be integrated in the actor-critic framework.}

}