Skip to content

Commit a8749ea

Browse files
jzleibocopybara-github
authored andcommitted
Allow conversation game master to be configured with memory and additional components, also add an agreement tracker contrib component. Both of these changes were added in order to improve the state_formation environment, which is also improved here in several more ways including the priming of the main players for their negotiation.
PiperOrigin-RevId: 687677088 Change-Id: I4b16b4e2445559643c3495a0aead6714ceea24df
1 parent 9f317f5 commit a8749ea

File tree

6 files changed

+402
-119
lines changed

6 files changed

+402
-119
lines changed

concordia/components/game_master/schelling_diagram_payoffs.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,12 @@ def _set_outcome_messages(
232232

233233
def update_before_event(self, player_action_attempt: str) -> None:
234234
# `player_action_attempt` is formatted as "name: attempt".
235-
player_name, choice_str = player_action_attempt.split(': ')
235+
# we assume that the player action attempt is in the format
236+
# 'player_name: player_choice'. All other occurences of ':' will be treated
237+
# as a part of the player choice.
238+
player_name, choice_str = player_action_attempt.split(': ', 1)
239+
if player_name not in self._acting_player_names:
240+
return
236241
self._partial_joint_action[player_name] = choice_str
237242
self._state = ''
238243

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
# Copyright 2022 DeepMind Technologies Limited.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""A component for computing and delivering payoffs using a Schelling diagram.
16+
"""
17+
18+
from collections.abc import Callable, Mapping, Sequence
19+
import datetime
20+
21+
from concordia.agents import deprecated_agent
22+
from concordia.agents import entity_agent
23+
from concordia.associative_memory import associative_memory
24+
from concordia.components.game_master import current_scene
25+
from concordia.document import interactive_document
26+
from concordia.language_model import language_model
27+
from concordia.typing import component
28+
import numpy as np
29+
import termcolor
30+
31+
_DEFAULT_CHAIN_OF_THOUGHT_PREFIX = (
32+
'This is a social science experiment. It is structured as a '
33+
'tabletop roleplaying game. You are the game master and storyteller. '
34+
'Your job is to make sure the game runs smoothly and accurately tracks '
35+
'the state of the world, subject to the laws of logic and physics. Next, '
36+
'you will be asked a series of questions to help you reason through '
37+
'whether a group of negotiating players have agreed with one another or '
38+
'not and what they agreed on if they did. '
39+
'It is important to the experiment we never mention that it is a '
40+
'game and we always use third-person limited perspective, even when '
41+
'speaking directly to the participants.'
42+
)
43+
44+
45+
class AgreementTracker(component.Component):
46+
"""Track whether negotiating agents have agreed and what they agreed on.
47+
"""
48+
49+
def __init__(
50+
self,
51+
model: language_model.LanguageModel,
52+
memory: associative_memory.AssociativeMemory,
53+
negotiating_players: Sequence[
54+
deprecated_agent.BasicAgent | entity_agent.EntityAgent],
55+
informed_players: Sequence[
56+
deprecated_agent.BasicAgent | entity_agent.EntityAgent],
57+
clock_now: Callable[[], datetime.datetime],
58+
resolution_scenes: Sequence[str],
59+
chain_of_thought_prefix: str = _DEFAULT_CHAIN_OF_THOUGHT_PREFIX,
60+
basic_setting: str = 'unspecified',
61+
name: str = 'agreement tracker',
62+
seed: int | None = None,
63+
verbose: bool = False,
64+
):
65+
"""Initialize an agreement tracker component.
66+
67+
Args:
68+
model: a language model
69+
memory: an associative memory
70+
negotiating_players: the players who are negotiating
71+
informed_players: the players who are informed of the negotiation
72+
outcome
73+
clock_now: Function to call to get current time.
74+
resolution_scenes: Scene types in which to check for agreement.
75+
chain_of_thought_prefix: include this string in context before all
76+
reasoning steps for handling the agreements.
77+
basic_setting: a string to include in the context before all reasoning
78+
name: name of this component e.g. Possessions, Account, Property, etc
79+
seed: random seed
80+
verbose: whether to print the full update chain of thought or not
81+
"""
82+
self._seed = seed
83+
self._model = model
84+
self._memory = memory
85+
self._negotiating_players = negotiating_players
86+
self._informed_players = informed_players
87+
self._clock_now = clock_now
88+
self._chain_of_thought_prefix = chain_of_thought_prefix
89+
self._basic_setting = basic_setting
90+
self._name = name
91+
self._verbose = verbose
92+
93+
self._history = []
94+
self._involved_players = list(negotiating_players) + list(informed_players)
95+
96+
self._resolution_scenes = resolution_scenes
97+
self._current_scene = current_scene.CurrentScene(
98+
name='current scene type',
99+
memory=self._memory,
100+
clock_now=self._clock_now,
101+
verbose=self._verbose,
102+
)
103+
104+
self.reset()
105+
# Set the initial state's string representation.
106+
self.update()
107+
108+
def reset(self) -> None:
109+
self._stage_idx = 0
110+
# Map each player's name to their component of the joint action.
111+
self._partial_joint_action = {
112+
player.name: None for player in self._negotiating_players}
113+
114+
def name(self) -> str:
115+
"""Returns the name of this component."""
116+
return self._name
117+
118+
def get_last_log(self):
119+
if self._history:
120+
return self._history[-1].copy()
121+
122+
def get_history(self):
123+
return self._history.copy()
124+
125+
def update(self) -> None:
126+
self._current_scene.update()
127+
128+
def _joint_action_is_complete(self, joint_action: Mapping[str, str]) -> bool:
129+
for player in self._negotiating_players:
130+
if joint_action[player.name] is None:
131+
return False
132+
return True
133+
134+
def update_before_event(self, player_action_attempt: str) -> None:
135+
# `player_action_attempt` is formatted as "name: attempt".
136+
# we assume that the player action attempt is in the format
137+
# 'player_name: player_choice'. All other occurences of ':' will be treated
138+
# as a part of the player choice.
139+
player_name, choice_str = player_action_attempt.split(': ', 1)
140+
if player_name not in [player.name for player in self._involved_players]:
141+
return
142+
self._partial_joint_action[player_name] = choice_str
143+
144+
def update_after_event(
145+
self,
146+
event_statement: str,
147+
) -> None:
148+
chain_of_thought_str = ''
149+
current_scene_type = self._current_scene.state()
150+
finished = False
151+
negotiator_names = [player.name for player in self._negotiating_players]
152+
if current_scene_type in self._resolution_scenes:
153+
# Check if all players have acted so far in the current stage game.
154+
joint_action = self._partial_joint_action.copy()
155+
if self._joint_action_is_complete(joint_action):
156+
# Check if negotiators agree.
157+
chain_of_thought = interactive_document.InteractiveDocument(
158+
self._model, rng=np.random.default_rng(self._seed))
159+
chain_of_thought.statement(
160+
f'{self._chain_of_thought_prefix}\nSetting: {self._basic_setting}')
161+
chain_of_thought.statement(f'List of negotiators: {negotiator_names}')
162+
chain_of_thought.statement('Statements of negotiators:')
163+
for negotiator in self._negotiating_players:
164+
chain_of_thought.statement(
165+
f'{negotiator.name}\'s statement: '
166+
f'{joint_action[negotiator.name]}')
167+
_ = chain_of_thought.open_question(
168+
question='Have the negotiators agreed? Explain your reasoning.',
169+
max_tokens=800,
170+
)
171+
agreement = chain_of_thought.open_question(
172+
question=(
173+
'What did they agree on? If they did not agree then respond'
174+
' with "The negotiators were unable to come to an agreement."'
175+
),
176+
max_tokens=500,
177+
)
178+
agreement = f'Agreement: {agreement}'
179+
for player in self._involved_players:
180+
player.observe(agreement)
181+
self._memory.add(agreement)
182+
183+
finished = True
184+
chain_of_thought_str = chain_of_thought.view().text()
185+
186+
if self._verbose:
187+
print(termcolor.colored(chain_of_thought_str, 'yellow'))
188+
189+
num_players_already_acted = np.sum(
190+
[value is not None for value in self._partial_joint_action.values()])
191+
total_num_players_to_act = len(self._partial_joint_action)
192+
update_log = {
193+
'date': self._clock_now(),
194+
'Summary': self.name(),
195+
'Stage index': self._stage_idx,
196+
'How many players acted so far this stage': (
197+
f'{num_players_already_acted}/{total_num_players_to_act}'),
198+
'Joint action': str(self._partial_joint_action),
199+
'Chain of thought': chain_of_thought_str,
200+
}
201+
self._history.append(update_log)
202+
203+
if finished:
204+
# Advance to the next stage.
205+
self._stage_idx += 1
206+
self._partial_joint_action = {
207+
player.name: None for player in self._negotiating_players}

concordia/contrib/components/game_master/daily_activities.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,21 +96,21 @@ def __init__(
9696
name: str = 'DailyActivities',
9797
verbose: bool = False,
9898
):
99-
"""Initialize a grounded inventory component tracking objects in python.
99+
"""Initialize a grounded activities component tracking objects in python.
100100
101101
Args:
102102
model: a language model
103103
memory: an associative memory
104104
activity_configs: sequence of activity configurations
105105
resolution_scene: on which scene type should this component be updated
106106
after the event, i.e. when to check the joint action and compute results
107-
players: sequence of players who have an inventory and will observe it.
107+
players: sequence of players who have an activities and will observe it.
108108
player_initial_activity_distribution: dict mapping player name to a
109109
dictionary with activities as keys and initial daily time as values.
110110
clock_now: Function to call to get current time.
111111
num_to_retrieve: number of recent memories to retrieve for context.
112112
chain_of_thought_prefix: include this string in context before all
113-
reasoning steps for handling the inventory.
113+
reasoning steps for handling the activities.
114114
basic_setting: a string to include in the context before all reasoning
115115
name: the name of this component e.g. Possessions, Account, Property, etc
116116
verbose: whether to print the full update chain of thought or not
@@ -330,7 +330,7 @@ def __init__(
330330
Args:
331331
memory: an associative memory
332332
daily_activities: the component to use to get the activities of players.
333-
players: sequence of players who have an inventory and will observe it.
333+
players: sequence of players who have daily activities.
334334
clock_now: Function to call to get current time.
335335
player_score_fn: function to compute an individual's score
336336
the value returned by this function is added to the score of the

concordia/environment/scenes/conversation.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
import random
2424

2525
from concordia.agents import deprecated_agent
26+
from concordia.agents import entity_agent
27+
from concordia.associative_memory import associative_memory
2628
from concordia.associative_memory import blank_memories
2729
from concordia.clocks import game_clock
2830
from concordia.document import interactive_document
@@ -42,7 +44,7 @@ class ConversationTracker(component.Component):
4244
def __init__(
4345
self,
4446
model: language_model.LanguageModel,
45-
players: Sequence[deprecated_agent.BasicAgent],
47+
players: Sequence[deprecated_agent.BasicAgent | entity_agent.EntityAgent],
4648
premise: str = '',
4749
check_for_termination: bool = True,
4850
key_question: str | None = None,
@@ -147,7 +149,7 @@ def update(self):
147149

148150

149151
def make_conversation_game_master(
150-
players: Sequence[deprecated_agent.BasicAgent],
152+
players: Sequence[deprecated_agent.BasicAgent | entity_agent.EntityAgent],
151153
clock: game_clock.MultiIntervalClock,
152154
model: language_model.LanguageModel,
153155
memory_factory: blank_memories.MemoryFactory,
@@ -159,6 +161,8 @@ def make_conversation_game_master(
159161
review_participants: bool = True,
160162
key_question: str | None = None,
161163
max_steps: int | None = 3,
164+
memory: associative_memory.AssociativeMemory | None = None,
165+
additional_components: Sequence[component.Component] | None = None,
162166
verbose: bool = False,
163167
seed: int | None = None,
164168
):
@@ -182,6 +186,10 @@ def make_conversation_game_master(
182186
key_question: optionally, end the scene once the game master knows the
183187
answer to this question.
184188
max_steps: Maximum number of conversation steps. If none, no limit
189+
memory: Optionally, use this memory instead of creating a new one with the
190+
`memory_factory`. When this is not None the `memory_factory` is ignored.
191+
additional_components: optionally, add additional components to the game
192+
master.
185193
verbose: whether or not to print
186194
seed: random seed for the game master
187195
@@ -225,14 +233,19 @@ def make_conversation_game_master(
225233
for player in players:
226234
player.observe(convo)
227235

228-
memory = memory_factory.make_blank_memory()
236+
if memory is None:
237+
memory = memory_factory.make_blank_memory()
238+
239+
components = [conversation_tracker]
240+
if additional_components is not None:
241+
components.extend(additional_components)
229242
game_master = game_master_lib.GameMaster(
230243
model=model,
231244
memory=memory,
232245
clock=clock,
233246
name=name,
234247
players=players,
235-
components=[conversation_tracker],
248+
components=components,
236249
action_spec=action_spec,
237250
update_thought_chain=[thought_chains.identity],
238251
randomise_initiative=randomise_initiative,

0 commit comments

Comments
 (0)