@Workshop{Supelec459,

author = {Matthieu Geist and Olivier Pietquin and Gabriel Fricout},

title = {Kalman Temporal Differences: Uncertainty and Value Function Approximation},

year = {2008},

booktitle = {NIPS Workshop on Model Uncertainty and Risk in Reinforcement Learning},

month = {December},

address = {Vancouver (Canada)},

url = {http://www.cs.uwaterloo.ca/~ppoupart/nips08-workshop.html},

abstract = {This paper deals with value (and Q-) function approximation in
deterministic Markovian decision processes (MDPs). A general
statistical framework based on the Kalman filtering paradigm is
introduced. Its principle is to adopt a parametric
representation of the value function, to model the associated
parameter vector as a random variable and to minimize the mean-
squared error of the parameters conditioned on past observed
transitions. From this general framework, which will be called
Kalman Temporal Differences (KTD), and using an approximation
scheme called the unscented transform, a family of algorithms
is derived. Contrary to most of function approximation schemes,
this framework inherently allows to derive uncertainty
information over the value function, which can be notably
useful for the exploration/exploitation dilemma.}

}