@article{oai:repository.lib.tottori-u.ac.jp:00003271, author = {平山, 克己 and 河合, 一 and Kawai, Hajime and Hirayama, Katsumi}, issue = {1}, journal = {鳥取大学工学部研究報告, Reports of the Faculty of Engineering, Tottori University}, month = {Nov}, note = {We consider discrete time Markov decision process (MDP) with finite state space, finite action space and two kinds of immediate reward The problem is to maximize time average reward generated by on reward stream, subject to that the other reward is not smaller than a prescribed value. The probelm is analyzed in the range of pure stationary policies MDP with one optimality criterion and no constraint can be solved by usual policy improvement method. MDP with one reward constraint can be solved by linear programming, in the range of mixed policies. On the other hand, however, when we restrict the policies to pure polices the problem is some conbinatrial problem, for which any solving method has not been discovered. In this paper, we propose an approach applying Genetic Algorithm in order to carry on a search process effectively and to obtain a near optimal pure stationary policy. A numerical example is given to examine the effeciency of the approach proposed here.}, pages = {295--302}, title = {遺伝アルゴリズムによる制約付きマルコフ決定過程の解法}, volume = {26}, year = {1995}, yomi = {ヒラヤマ, カツミ and カワイ, ハジメ and ヒラヤマ, カツミ} }