@Workshop{Supelec613,

author = {Raghav Aras and Olivier Pietquin},

title = {Optimal Average Reward Controllers For POMDPs},

year = {2010},

booktitle = {Journées Francophones de Planification, Décision et Apprentissage pour la conduite de systèmes (JFPDA 2010)},

month = {June},

note = {6 pages},

address = {Besançon (France)},

abstract = {We consider the problem of finding a finite-state controller
(FSC) of a given size with the
largest average per period reward for controlling a POMP over
the infinite horizon. A POMDP when
considered in conjunction with a fixed FSC for it forms an MDP.
If thisMDP is assumed to be recurrent,
it can be solved as a linear program (LP) to yield the average
reward of the fixed FSC. If the FSC is
allowed to be variable instead of fixed, the LP transforms to a
nonlinear program (NLP). An optimal
solution to the NLP yields the required average reward
maximizing FSC. NLPs based on the average
reward equations and the steady state distribution of a
recurrent MDP are presented.}

}