author = {Raghav Aras and Olivier Pietquin},
title = {Optimal Average Reward Controllers For POMDPs},
year = {2010},
booktitle = {Journées Francophones de Planification, Décision et Apprentissage pour la conduite de systèmes (JFPDA 2010)},
month = {June},
note = {6 pages},
address = {Besançon (France)},
abstract = {We consider the problem of finding a finite-state controller (FSC) of a given size with the largest average per period reward for controlling a POMP over the infinite horizon. A POMDP when considered in conjunction with a fixed FSC for it forms an MDP. If thisMDP is assumed to be recurrent, it can be solved as a linear program (LP) to yield the average reward of the fixed FSC. If the FSC is allowed to be variable instead of fixed, the LP transforms to a nonlinear program (NLP). An optimal solution to the NLP yields the required average reward maximizing FSC. NLPs based on the average reward equations and the steady state distribution of a recurrent MDP are presented.}