@InProceedings{Supelec637,
author = {Matthieu Geist and Olivier Pietquin},
title = {Eligibility Traces through Colored Noises},
year = {2010},
booktitle = {Proceedings of the IEEE International Conference on Ultra Modern Control systems (ICUMT 2010)},
pages = {458 - 465},
month = {October},
note = {(best paper award)},
address = {Moscow (Russia)},
url = {http://www.metz.supelec.fr/metz/personnel/geist_mat/pdfs/Supelec637.pdf},
isbn = {978-1-4244-7285-7},
doi = {10.1109/ICUMT.2010.5676597},
abstract = {The Gaussian Process Temporal Differences (GPTD) framework initiated statistical modeling of value function approximation. It was followed by the close Kalman Temporal Differences (KTD) approach. Both methods share the same drawback: they provide biased estimates of the value function when transitions of the system to be controlled are stochastic. A colored noise model has been introduced to cope with this problem in the GPTD framework, which actually leads to a Monte-Carlo estimate of the value function. In this paper, we generalize this colored noise model using ideas close to eligibility traces and apply it to the KTD framework. This allows removing the bias when the so-called eligibility factor is set to one, and decreasing it when this factor is strictly between zero and one. The proposed algorithm is experimented on the simple Boyan chain in order to study the effect of the eligibility factor. As KTD generalizes GPTD in the sense that it allows taking into account nonlinear parameterizations, we also propose an experiment combining the new algorithm with a neural network.}
}