Skip to content
Snippets Groups Projects
Commit d2a36d70 authored by tnbeats's avatar tnbeats
Browse files

22211572

parent 85194667
No related branches found
No related tags found
No related merge requests found
......@@ -52,11 +52,11 @@ def np2(x, y, trace=None):
# for fun, I timed the two to see which is faster. both almost the same
# start_time = time.time()
# np2(3, 7)
# end_time = time.time()
# print(end_time - start_time)
# start_time = time.time()
# np(3, 7)
# end_time = time.time()
# print(end_time - start_time)
start_time = time.time()
np2(3, 10)
end_time = time.time()
print(end_time - start_time)
start_time = time.time()
np(3, 10)
end_time = time.time()
print(end_time - start_time)
# represent state of the board as a string
# each string is a box
# the menace system will be a dictionary
# pick arbitrary
# store all moves in history and reward at the end of each game
\ No newline at end of file
import json
import random
NUM_GUMDROPS = 8
def choose(weights):
total = sum(weights)
roll = random.randint(1, total)
for i, weight in enumerate(weights):
if roll <= weight:
return i
roll -= weight
def check_game_end(state):
winning_combinations = [(0, 1, 2), (3, 4, 5), (6, 7, 8),
(0, 3, 6), (1, 4, 7), (2, 5, 8),
(0, 4, 8), (2, 4, 6)]
for a, b, c in winning_combinations:
if state[a] == state[b] == state[c] != '-':
return True, state[a]
if '-' not in state:
return True, None
return False, None
def make_move(state, move, player):
new_state = list(state)
new_state[move] = player
return ''.join(new_state)
def get_legal_moves(state):
return [i for i, spot in enumerate(state) if spot == '-']
def print_board(state):
print("Board positions:")
for i in range(1, 10, 3):
print(f"{i} {i+1} {i+2}")
print("\nCurrent board:")
for i in range(0, 9, 3):
print(' '.join(state[i:i+3]))
def human_move(state):
print_board(state)
while True:
try:
move = int(input("Enter your move (1-9): ")) - 1
if move in get_legal_moves(state):
return move
else:
print("Invalid move, try again.")
except ValueError:
print("Please enter a number.")
def save_menace_state(menace):
with open('MENACE/menace_state.json', 'w') as file:
json.dump(menace, file)
def load_menace_state():
try:
with open('menace_state.json', 'r') as file:
return json.load(file)
except FileNotFoundError:
return {}
def one_game(menace, human_player=False, self_play=False):
state = "---------"
history = []
game_end, winner = check_game_end(state)
while not game_end:
player = 'X' if state.count('X') == state.count('O') else 'O'
if human_player and player == 'O':
move = human_move(state)
else:
if self_play or state not in menace:
num_holes = state.count('-')
menace[state] = [NUM_GUMDROPS for _ in range(num_holes)]
move_weights = menace[state]
legal_moves = get_legal_moves(state)
chosen_index = choose(move_weights)
move = legal_moves[chosen_index]
history.append((state, move))
state = make_move(state, move, player)
game_end, winner = check_game_end(state)
if human_player:
print_board(state)
if winner:
print(f"Game ended, winner: {winner}")
else:
print("Game ended, draw")
update_menace(menace, history, winner)
return winner
def update_menace(menace, history, winner):
reward = 10
penalty = 1
for state, move in reversed(history):
if state in menace:
index = get_legal_moves(state).index(move)
if (winner == 'X' and state.count('X') == state.count('O')) or \
(winner == 'O' and state.count('X') > state.count('O')):
menace[state][index] += reward
else:
menace[state][index] = max(1, menace[state][index] - penalty)
def play_again():
choice = input("Play again? (y/n): ").lower()
return choice == 'y'
def train_menace_self_play(menace, games):
for i in range(games):
one_game(menace, human_player=False, self_play=True)
if i % 1000 == 0 or i == games - 1:
print(
f"Self-Play Training Progress: {i / games * 100:.1f}%", end="\r")
print()
menace = load_menace_state()
# Training MENACE
train_menace_self_play(menace, 50000)
save_menace_state(menace)
# Play against MENACE
while True:
one_game(menace, human_player=True)
if play_again():
continue
else:
break
save_menace_state(menace)
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment