@InCollection{Supelec749,
author = {Matthieu Geist and Bruno Scherrer},
title = {L1-penalized projected Bellman residual},
year = {2011},
booktitle = {Proceedings of the European Workshop on Reinforcement Learning (EWRL 2011)},
publisher = {Springer Verlag - Heidelberg Berlin},
pages = {12 pages},
month = {september},
series = {Lecture Notes in Computer Science (LNCS)},
address = {Athens (Greece)},
url = {http://www.metz.supelec.fr//metz/personnel/geist_mat/pdfs/supelec749.pdf},
abstract = {We consider the task of feature selection for value function approximation in reinforcement learning. A promising approach consists in combining the Least-Squares Temporal Difference (LSTD) algorithm with L1-regularization, which has proven to be effective in the supervised learning community. This has been done recently whit the LARS-TD algorithm, which replaces the projection operator of LSTD with an L1-penalized projection and solves the corresponding fixed-point problem. However, this approach is not guaranteed to be correct in the general off-policy setting. We take a different route by adding an L1-penalty term to the projected Bellman residual, which requires weaker assumptions while offering a comparable performance. However, this comes at the cost of a higher computational complexity if only a part of the regularization path is computed. Nevertheless, our approach ends up to a supervised learning problem, which let envision easy extensions to other penalties.}
}