11import numpy as np
2- import random
3- from datetime import datetime
42from multiagent .coreRace import World , Agent , Landmark
53from multiagent .scenario import BaseScenario
64
@@ -11,15 +9,19 @@ class Scenario(BaseScenario):
119 def __init__ (self ):
1210 super (Scenario , self ).__init__ ()
1311 self .agentsToLandMarks = {}
14- self .numberOfAgents = 8
15- self .laststate = 0
12+ self .numberOfAgents = 6
13+ self .allCooperateForce = 1.0
14+ self .allCheatPunishment = - 1.0
15+ self .cheaterGain = 2.0
16+ self .robbedCooperator = 0.0
17+
1618 def make_world (self ):
1719 world = World ()
1820 # add agents
1921
2022 world .agents = [Agent () for i in range (self .numberOfAgents )]
2123 for i , agent in enumerate (world .agents ):
22- # print(i)
24+ print (i )
2325 agent .name = 'agent %d' % i
2426 agent .collide = False
2527 agent .silent = True
@@ -31,7 +33,7 @@ def make_world(self):
3133 landmark .movable = False
3234 #fill in the dictionary
3335 for i in range (self .numberOfAgents ):
34- self .agentsToLandMarks .update ({ world .agents [i ]: world .landmarks [i ] })
36+ self .agentsToLandMarks .update ({ world .agents [i ]: world .landmarks [i ] })
3537
3638 # make initial conditions
3739 self .reset_world (world )
@@ -69,61 +71,39 @@ def reward(self, agent, world):
6971 cheat-cheat: all stay, all cooperate: all move as 1.0, cheat cooperate: cheater moves more.
7072 '''
7173
72- random . seed ( datetime . now ())
74+
7375 def agent_cheated (agent ):
7476 if agent .action .u [1 ] > 0.0 :
7577 return True
7678 return False
77-
79+
7880 reward_n = []
7981 numOfCheaters = 0
8082 numOfAgents = 0
81- # agent part code
83+ # if(agent_cheated(world.agents[1])):
84+ # print(world.agents[1].action.u)
8285 for i , thisAgent in enumerate (world .agents ):
83- j = random .randint (1 ,10 )
84- if (i == 0 ):
85- if (j >= 8 ):
86- thisAgent .action .u [1 ]= 1
87- if (i == 1 ):
88- if (j >= 6 ):
89- thisAgent .action .u [1 ]= 1
90- if (i == 2 ):
91- if (j >= 4 ):
92- thisAgent .action .u [1 ]= 1
93- if (i == 3 ):
94- thisAgent .action .u [1 ]= 1
95- if (i == 4 ):
96- thisAgent .action .u [1 ]= 1
97- if (i == 5 ):
98- thisAgent .action .u [1 ]= 1
99- #for i, thisAgent in enumerate(world.agents):
100- #print(thisAgent.action.u[1])
101- for i , thisAgent in enumerate (world .agents ):
102- # if i == 0:
86+ # if i == 2:
87+ # print(thisAgent.action.u)
10388 # print(i)
10489 # print(agent.action.u)
10590 numOfAgents += 1
10691 thisAgent .action .u [0 ] = 0.0 #invalidate horizontal action
10792 if agent_cheated (thisAgent ): #it's trying to move up
10893 numOfCheaters += 1
109- #print(numOfCheaters)
94+ # print("agent " + str(i) + "cheated" )
95+ # print(numOfCheaters)
11096
11197 for i , thisAgent in enumerate (world .agents ):
11298 if numOfCheaters > numOfAgents // 2 :
113-
114- if thisAgent .action .u [1 ] == 1 :
115- thisAgent .action .u [1 ] = - 0.01 # all cheat: move back ward
116- if thisAgent .action .u [1 ] == 0 :
117- thisAgent .action .u [1 ] = 0.01
118- #print("back")
99+ thisAgent .action .u [1 ] = self .allCheatPunishment # all cheat: move back ward
119100 elif numOfCheaters == 0 :
120- thisAgent .action .u [1 ] = 0.01 # all cooperate: move up together
101+ thisAgent .action .u [1 ] = self . allCooperateForce # all cooperate: move up together
121102 else :
122103 if thisAgent .action .u [1 ] > 0.0 : # this agent cheat
123- thisAgent .action .u [1 ] = 0.02
124- #print("cheat")
104+ thisAgent .action .u [1 ] = self .cheaterGain
125105 else :
126- thisAgent .action .u [1 ] = 0.01
106+ thisAgent .action .u [1 ] = self . robbedCooperator
127107 reward_n .append (thisAgent .action .u [1 ])
128108 return reward_n
129109
@@ -132,5 +112,4 @@ def observation(self, agent, world):
132112 entity_pos = []
133113 for entity in world .landmarks :
134114 entity_pos .append (entity .state .p_pos - agent .state .p_pos )
135- return np .concatenate ([agent .state .p_vel ] + entity_pos )
136-
115+ return np .concatenate ([agent .state .p_vel ] + entity_pos )
0 commit comments