Module pacai.student.qlearningAgents
Expand source code
from pacai.agents.learning.reinforcement import ReinforcementAgent
from pacai.util import reflection
class QLearningAgent(ReinforcementAgent):
"""
A Q-Learning agent.
Some functions that may be useful:
`pacai.agents.learning.reinforcement.ReinforcementAgent.getAlpha`:
Get the learning rate.
`pacai.agents.learning.reinforcement.ReinforcementAgent.getDiscountRate`:
Get the discount rate.
`pacai.agents.learning.reinforcement.ReinforcementAgent.getEpsilon`:
Get the exploration probability.
`pacai.agents.learning.reinforcement.ReinforcementAgent.getLegalActions`:
Get the legal actions for a reinforcement agent.
`pacai.util.probability.flipCoin`:
Flip a coin (get a binary value) with some probability.
`random.choice`:
Pick randomly from a list.
Additional methods to implement:
`pacai.agents.base.BaseAgent.getAction`:
Compute the action to take in the current state.
With probability `pacai.agents.learning.reinforcement.ReinforcementAgent.getEpsilon`,
we should take a random action and take the best policy action otherwise.
Note that if there are no legal actions, which is the case at the terminal state,
you should choose None as the action.
`pacai.agents.learning.reinforcement.ReinforcementAgent.update`:
The parent class calls this to observe a state transition and reward.
You should do your Q-Value update here.
Note that you should never call this function, it will be called on your behalf.
DESCRIPTION: <Write something here so we know what you did.>
"""
def __init__(self, index, **kwargs):
super().__init__(index, **kwargs)
# You can initialize Q-values here.
def getQValue(self, state, action):
"""
Get the Q-Value for a `pacai.core.gamestate.AbstractGameState`
and `pacai.core.directions.Directions`.
Should return 0.0 if the (state, action) pair has never been seen.
"""
return 0.0
def getValue(self, state):
"""
Return the value of the best action in a state.
I.E., the value of the action that solves: `max_action Q(state, action)`.
Where the max is over legal actions.
Note that if there are no legal actions, which is the case at the terminal state,
you should return a value of 0.0.
This method pairs with `QLearningAgent.getPolicy`,
which returns the actual best action.
Whereas this method returns the value of the best action.
"""
return 0.0
def getPolicy(self, state):
"""
Return the best action in a state.
I.E., the action that solves: `max_action Q(state, action)`.
Where the max is over legal actions.
Note that if there are no legal actions, which is the case at the terminal state,
you should return a value of None.
This method pairs with `QLearningAgent.getValue`,
which returns the value of the best action.
Whereas this method returns the best action itself.
"""
return None
class PacmanQAgent(QLearningAgent):
"""
Exactly the same as `QLearningAgent`, but with different default parameters.
"""
def __init__(self, index, epsilon = 0.05, gamma = 0.8, alpha = 0.2, numTraining = 0, **kwargs):
kwargs['epsilon'] = epsilon
kwargs['gamma'] = gamma
kwargs['alpha'] = alpha
kwargs['numTraining'] = numTraining
super().__init__(index, **kwargs)
def getAction(self, state):
"""
Simply calls the super getAction method and then informs the parent of an action for Pacman.
Do not change or remove this method.
"""
action = super().getAction(state)
self.doAction(state, action)
return action
class ApproximateQAgent(PacmanQAgent):
"""
An approximate Q-learning agent.
You should only have to overwrite `QLearningAgent.getQValue`
and `pacai.agents.learning.reinforcement.ReinforcementAgent.update`.
All other `QLearningAgent` functions should work as is.
Additional methods to implement:
`QLearningAgent.getQValue`:
Should return `Q(state, action) = w * featureVector`,
where `*` is the dotProduct operator.
`pacai.agents.learning.reinforcement.ReinforcementAgent.update`:
Should update your weights based on transition.
DESCRIPTION: <Write something here so we know what you did.>
"""
def __init__(self, index,
extractor = 'pacai.core.featureExtractors.IdentityExtractor', **kwargs):
super().__init__(index, **kwargs)
self.featExtractor = reflection.qualifiedImport(extractor)
# You might want to initialize weights here.
def final(self, state):
"""
Called at the end of each game.
"""
# Call the super-class final method.
super().final(state)
# Did we finish training?
if self.episodesSoFar == self.numTraining:
# You might want to print your weights here for debugging.
# *** Your Code Here ***
raise NotImplementedError()
Classes
class ApproximateQAgent (index, extractor='pacai.core.featureExtractors.IdentityExtractor', **kwargs)
-
An approximate Q-learning agent.
You should only have to overwrite
QLearningAgent.getQValue()
andReinforcementAgent.update()
. All otherQLearningAgent
functions should work as is.Additional methods to implement:
QLearningAgent.getQValue()
: Should returnQ(state, action) = w * featureVector
, where*
is the dotProduct operator.ReinforcementAgent.update()
: Should update your weights based on transition.DESCRIPTION:
Args
actionFn
- A function which takes a state and returns the list of legal actions.
alpha
- The learning rate.
epsilon
- The exploration rate.
gamma
- The discount factor.
numTraining
- The number of training episodes.
Expand source code
class ApproximateQAgent(PacmanQAgent): """ An approximate Q-learning agent. You should only have to overwrite `QLearningAgent.getQValue` and `pacai.agents.learning.reinforcement.ReinforcementAgent.update`. All other `QLearningAgent` functions should work as is. Additional methods to implement: `QLearningAgent.getQValue`: Should return `Q(state, action) = w * featureVector`, where `*` is the dotProduct operator. `pacai.agents.learning.reinforcement.ReinforcementAgent.update`: Should update your weights based on transition. DESCRIPTION: <Write something here so we know what you did.> """ def __init__(self, index, extractor = 'pacai.core.featureExtractors.IdentityExtractor', **kwargs): super().__init__(index, **kwargs) self.featExtractor = reflection.qualifiedImport(extractor) # You might want to initialize weights here. def final(self, state): """ Called at the end of each game. """ # Call the super-class final method. super().final(state) # Did we finish training? if self.episodesSoFar == self.numTraining: # You might want to print your weights here for debugging. # *** Your Code Here *** raise NotImplementedError()
Ancestors
Static methods
def loadAgent(name, index, args={})
-
Inherited from:
PacmanQAgent
.loadAgent
Load an agent with the given class name. The name can be fully qualified or just the bare class name. If the bare name is given, the class should …
Methods
def doAction(self, state, action)
-
Inherited from:
PacmanQAgent
.doAction
Called by inherited class when an action is taken in a state.
def final(self, state)
-
Called at the end of each game.
Expand source code
def final(self, state): """ Called at the end of each game. """ # Call the super-class final method. super().final(state) # Did we finish training? if self.episodesSoFar == self.numTraining: # You might want to print your weights here for debugging. # *** Your Code Here *** raise NotImplementedError()
def getAction(self, state)
-
Inherited from:
PacmanQAgent
.getAction
Simply calls the super getAction method and then informs the parent of an action for Pacman. Do not change or remove this method.
def getLegalActions(self, state)
-
Inherited from:
PacmanQAgent
.getLegalActions
Get the actions available for a given state. This is what you should use to obtain legal actions for a state.
def getPolicy(self, state)
-
Inherited from:
PacmanQAgent
.getPolicy
Return the best action in a state. I.E., the action that solves:
max_action Q(state, action)
. Where the max is over legal actions. Note that if … def getQValue(self, state, action)
-
Inherited from:
PacmanQAgent
.getQValue
Get the Q-Value for a
AbstractGameState
andDirections
. Should return 0.0 if the (state, action) pair … def getValue(self, state)
-
Inherited from:
PacmanQAgent
.getValue
Return the value of the best action in a state. I.E., the value of the action that solves:
max_action Q(state, action)
. Where the max is over legal … def observationFunction(self, state)
-
Inherited from:
PacmanQAgent
.observationFunction
This is where we ended up after our last action.
def observeTransition(self, state, action, nextState, deltaReward)
-
Inherited from:
PacmanQAgent
.observeTransition
Called by environment to inform agent that a transition has been observed. This will result in a call to
ReinforcementAgent.update
on the same … def registerInitialState(self, state)
-
Inherited from:
PacmanQAgent
.registerInitialState
Inspect the starting state.
def startEpisode(self)
-
Inherited from:
PacmanQAgent
.startEpisode
Called by environment when a new episode is starting.
def stopEpisode(self)
-
Inherited from:
PacmanQAgent
.stopEpisode
Called by environment when an episode is done.
def update(self, state, action, nextState, reward)
-
Inherited from:
PacmanQAgent
.update
This class will call this function after observing a transition and reward.
class PacmanQAgent (index, epsilon=0.05, gamma=0.8, alpha=0.2, numTraining=0, **kwargs)
-
Exactly the same as
QLearningAgent
, but with different default parameters.Args
actionFn
- A function which takes a state and returns the list of legal actions.
alpha
- The learning rate.
epsilon
- The exploration rate.
gamma
- The discount factor.
numTraining
- The number of training episodes.
Expand source code
class PacmanQAgent(QLearningAgent): """ Exactly the same as `QLearningAgent`, but with different default parameters. """ def __init__(self, index, epsilon = 0.05, gamma = 0.8, alpha = 0.2, numTraining = 0, **kwargs): kwargs['epsilon'] = epsilon kwargs['gamma'] = gamma kwargs['alpha'] = alpha kwargs['numTraining'] = numTraining super().__init__(index, **kwargs) def getAction(self, state): """ Simply calls the super getAction method and then informs the parent of an action for Pacman. Do not change or remove this method. """ action = super().getAction(state) self.doAction(state, action) return action
Ancestors
Subclasses
Static methods
def loadAgent(name, index, args={})
-
Inherited from:
QLearningAgent
.loadAgent
Load an agent with the given class name. The name can be fully qualified or just the bare class name. If the bare name is given, the class should …
Methods
def doAction(self, state, action)
-
Inherited from:
QLearningAgent
.doAction
Called by inherited class when an action is taken in a state.
def final(self, state)
-
Inherited from:
QLearningAgent
.final
Called by Pacman game at the terminal state.
def getAction(self, state)
-
Simply calls the super getAction method and then informs the parent of an action for Pacman. Do not change or remove this method.
Expand source code
def getAction(self, state): """ Simply calls the super getAction method and then informs the parent of an action for Pacman. Do not change or remove this method. """ action = super().getAction(state) self.doAction(state, action) return action
def getLegalActions(self, state)
-
Inherited from:
QLearningAgent
.getLegalActions
Get the actions available for a given state. This is what you should use to obtain legal actions for a state.
def getPolicy(self, state)
-
Inherited from:
QLearningAgent
.getPolicy
Return the best action in a state. I.E., the action that solves:
max_action Q(state, action)
. Where the max is over legal actions. Note that if … def getQValue(self, state, action)
-
Inherited from:
QLearningAgent
.getQValue
Get the Q-Value for a
AbstractGameState
andDirections
. Should return 0.0 if the (state, action) pair … def getValue(self, state)
-
Inherited from:
QLearningAgent
.getValue
Return the value of the best action in a state. I.E., the value of the action that solves:
max_action Q(state, action)
. Where the max is over legal … def observationFunction(self, state)
-
Inherited from:
QLearningAgent
.observationFunction
This is where we ended up after our last action.
def observeTransition(self, state, action, nextState, deltaReward)
-
Inherited from:
QLearningAgent
.observeTransition
Called by environment to inform agent that a transition has been observed. This will result in a call to
ReinforcementAgent.update
on the same … def registerInitialState(self, state)
-
Inherited from:
QLearningAgent
.registerInitialState
Inspect the starting state.
def startEpisode(self)
-
Inherited from:
QLearningAgent
.startEpisode
Called by environment when a new episode is starting.
def stopEpisode(self)
-
Inherited from:
QLearningAgent
.stopEpisode
Called by environment when an episode is done.
def update(self, state, action, nextState, reward)
-
Inherited from:
QLearningAgent
.update
This class will call this function after observing a transition and reward.
class QLearningAgent (index, **kwargs)
-
A Q-Learning agent.
Some functions that may be useful:
ReinforcementAgent.getAlpha()
: Get the learning rate.ReinforcementAgent.getDiscountRate()
: Get the discount rate.ReinforcementAgent.getEpsilon()
: Get the exploration probability.ReinforcementAgent.getLegalActions()
: Get the legal actions for a reinforcement agent.flipCoin()
: Flip a coin (get a binary value) with some probability.random.choice
: Pick randomly from a list.Additional methods to implement:
BaseAgent.getAction()
: Compute the action to take in the current state. With probabilityReinforcementAgent.getEpsilon()
, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action.ReinforcementAgent.update()
: The parent class calls this to observe a state transition and reward. You should do your Q-Value update here. Note that you should never call this function, it will be called on your behalf.DESCRIPTION:
Args
actionFn
- A function which takes a state and returns the list of legal actions.
alpha
- The learning rate.
epsilon
- The exploration rate.
gamma
- The discount factor.
numTraining
- The number of training episodes.
Expand source code
class QLearningAgent(ReinforcementAgent): """ A Q-Learning agent. Some functions that may be useful: `pacai.agents.learning.reinforcement.ReinforcementAgent.getAlpha`: Get the learning rate. `pacai.agents.learning.reinforcement.ReinforcementAgent.getDiscountRate`: Get the discount rate. `pacai.agents.learning.reinforcement.ReinforcementAgent.getEpsilon`: Get the exploration probability. `pacai.agents.learning.reinforcement.ReinforcementAgent.getLegalActions`: Get the legal actions for a reinforcement agent. `pacai.util.probability.flipCoin`: Flip a coin (get a binary value) with some probability. `random.choice`: Pick randomly from a list. Additional methods to implement: `pacai.agents.base.BaseAgent.getAction`: Compute the action to take in the current state. With probability `pacai.agents.learning.reinforcement.ReinforcementAgent.getEpsilon`, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. `pacai.agents.learning.reinforcement.ReinforcementAgent.update`: The parent class calls this to observe a state transition and reward. You should do your Q-Value update here. Note that you should never call this function, it will be called on your behalf. DESCRIPTION: <Write something here so we know what you did.> """ def __init__(self, index, **kwargs): super().__init__(index, **kwargs) # You can initialize Q-values here. def getQValue(self, state, action): """ Get the Q-Value for a `pacai.core.gamestate.AbstractGameState` and `pacai.core.directions.Directions`. Should return 0.0 if the (state, action) pair has never been seen. """ return 0.0 def getValue(self, state): """ Return the value of the best action in a state. I.E., the value of the action that solves: `max_action Q(state, action)`. Where the max is over legal actions. Note that if there are no legal actions, which is the case at the terminal state, you should return a value of 0.0. This method pairs with `QLearningAgent.getPolicy`, which returns the actual best action. Whereas this method returns the value of the best action. """ return 0.0 def getPolicy(self, state): """ Return the best action in a state. I.E., the action that solves: `max_action Q(state, action)`. Where the max is over legal actions. Note that if there are no legal actions, which is the case at the terminal state, you should return a value of None. This method pairs with `QLearningAgent.getValue`, which returns the value of the best action. Whereas this method returns the best action itself. """ return None
Ancestors
Subclasses
Static methods
def loadAgent(name, index, args={})
-
Inherited from:
ReinforcementAgent
.loadAgent
Load an agent with the given class name. The name can be fully qualified or just the bare class name. If the bare name is given, the class should …
Methods
def doAction(self, state, action)
-
Inherited from:
ReinforcementAgent
.doAction
Called by inherited class when an action is taken in a state.
def final(self, state)
-
Inherited from:
ReinforcementAgent
.final
Called by Pacman game at the terminal state.
def getAction(self, state)
-
Inherited from:
ReinforcementAgent
.getAction
The BaseAgent will receive an
AbstractGameState
, and must return an action fromDirections
. def getLegalActions(self, state)
-
Inherited from:
ReinforcementAgent
.getLegalActions
Get the actions available for a given state. This is what you should use to obtain legal actions for a state.
def getPolicy(self, state)
-
Return the best action in a state. I.E., the action that solves:
max_action Q(state, action)
. Where the max is over legal actions. Note that if there are no legal actions, which is the case at the terminal state, you should return a value of None.This method pairs with
QLearningAgent.getValue()
, which returns the value of the best action. Whereas this method returns the best action itself.Expand source code
def getPolicy(self, state): """ Return the best action in a state. I.E., the action that solves: `max_action Q(state, action)`. Where the max is over legal actions. Note that if there are no legal actions, which is the case at the terminal state, you should return a value of None. This method pairs with `QLearningAgent.getValue`, which returns the value of the best action. Whereas this method returns the best action itself. """ return None
def getQValue(self, state, action)
-
Get the Q-Value for a
AbstractGameState
andDirections
. Should return 0.0 if the (state, action) pair has never been seen.Expand source code
def getQValue(self, state, action): """ Get the Q-Value for a `pacai.core.gamestate.AbstractGameState` and `pacai.core.directions.Directions`. Should return 0.0 if the (state, action) pair has never been seen. """ return 0.0
def getValue(self, state)
-
Return the value of the best action in a state. I.E., the value of the action that solves:
max_action Q(state, action)
. Where the max is over legal actions. Note that if there are no legal actions, which is the case at the terminal state, you should return a value of 0.0.This method pairs with
QLearningAgent.getPolicy()
, which returns the actual best action. Whereas this method returns the value of the best action.Expand source code
def getValue(self, state): """ Return the value of the best action in a state. I.E., the value of the action that solves: `max_action Q(state, action)`. Where the max is over legal actions. Note that if there are no legal actions, which is the case at the terminal state, you should return a value of 0.0. This method pairs with `QLearningAgent.getPolicy`, which returns the actual best action. Whereas this method returns the value of the best action. """ return 0.0
def observationFunction(self, state)
-
Inherited from:
ReinforcementAgent
.observationFunction
This is where we ended up after our last action.
def observeTransition(self, state, action, nextState, deltaReward)
-
Inherited from:
ReinforcementAgent
.observeTransition
Called by environment to inform agent that a transition has been observed. This will result in a call to
ReinforcementAgent.update
on the same … def registerInitialState(self, state)
-
Inherited from:
ReinforcementAgent
.registerInitialState
Inspect the starting state.
def startEpisode(self)
-
Inherited from:
ReinforcementAgent
.startEpisode
Called by environment when a new episode is starting.
def stopEpisode(self)
-
Inherited from:
ReinforcementAgent
.stopEpisode
Called by environment when an episode is done.
def update(self, state, action, nextState, reward)
-
Inherited from:
ReinforcementAgent
.update
This class will call this function after observing a transition and reward.