@InProceedings{Supelec637,

author = {Matthieu Geist and Olivier Pietquin},

title = {Eligibility Traces through Colored Noises},

year = {2010},

booktitle = {Proceedings of the IEEE International Conference on Ultra Modern Control systems (ICUMT 2010)},

pages = {458 - 465},

month = {October},

note = {(best paper award)},

address = {Moscow (Russia)},

url = {http://www.metz.supelec.fr/metz/personnel/geist_mat/pdfs/Supelec637.pdf},

isbn = {978-1-4244-7285-7},

doi = {10.1109/ICUMT.2010.5676597},

abstract = {The Gaussian Process Temporal Differences (GPTD) framework
initiated statistical modeling of value function approximation.
It was followed by the close Kalman Temporal Differences (KTD)
approach. Both methods share the same drawback: they provide
biased estimates of the value function when transitions of the
system to be controlled are stochastic. A colored noise model
has been introduced to cope with this problem in the GPTD
framework, which actually leads to a Monte-Carlo estimate of
the value function. In this paper, we generalize this colored
noise model using ideas close to eligibility traces and apply
it to the KTD framework. This allows removing the bias when the
so-called eligibility factor is set to one, and decreasing it
when this factor is strictly between zero and one. The proposed
algorithm is experimented on the simple Boyan chain in order to
study the effect of the eligibility factor. As KTD generalizes
GPTD in the sense that it allows taking into account nonlinear
parameterizations, we also propose an experiment combining the
new algorithm with a neural network.}

}