@techreport{81562b0436314914a413116ba8f87c2a,
title = "Limiting dynamics for Q-learning with memory one in symmetric two-player, two-action games",
abstract = " We develop a method based on computer algebra systems to represent the mutual pure strategy best-response dynamics of symmetric two-player, two-action repeated games played by players with a one-period memory. We apply this method to the iterated prisoner's dilemma, stag hunt and hawk-dove games and identify all possible equilibrium strategy pairs and the conditions for their existence. The only equilibrium strategy pair that is possible in all three games is the win-stay, lose-shift strategy. Lastly, we show that the mutual best-response dynamics are realized by a sample batch Q-learning algorithm in the infinite batch size limit. ",
keywords = "math.DS, nlin.AO",
author = "Meylahn, {Janusz M} and Lars Janssen",
note = "30 pages, 12 figures",
year = "2021",
month = jul,
day = "29",
doi = "10.48550/arXiv.2107.13995",
language = "English",
publisher = "ArXiv.org",
type = "WorkingPaper",
institution = "ArXiv.org",
}