22211572

d2a36d70 · tnbeats · 85194667 · d2a36d70 · d2a36d70 · d2a36d70
Commit d2a36d70 authored 1 year ago by tnbeats
--- a/4_9_Exercises/Question 4.py
+++ b/4_9_Exercises/Question 4.py
@@ -52,11 +52,11 @@ def np2(x, y, trace=None):


 # for fun, I timed the two to see which is faster. both almost the same
-# start_time = time.time()
-# np2(3, 7)
-# end_time = time.time()
-# print(end_time - start_time)
-# start_time = time.time()
-# np(3, 7)
-# end_time = time.time()
-# print(end_time - start_time)
+start_time = time.time()
+np2(3, 10)
+end_time = time.time()
+print(end_time - start_time)
+start_time = time.time()
+np(3, 10)
+end_time = time.time()
+print(end_time - start_time)
--- a/MENACE/menace.py
+++ b/MENACE/menace.py
-# represent state of the board as a string
-# each string is a box
-# the menace system will be a dictionary
-# pick arbitrary
-# store all moves in history and reward at the end of each game
\ No newline at end of file
+import json
+import random
+
+NUM_GUMDROPS = 8
+
+
+def choose(weights):
+    total = sum(weights)
+    roll = random.randint(1, total)
+    for i, weight in enumerate(weights):
+        if roll <= weight:
+            return i
+        roll -= weight
+
+
+def check_game_end(state):
+    winning_combinations = [(0, 1, 2), (3, 4, 5), (6, 7, 8),
+                            (0, 3, 6), (1, 4, 7), (2, 5, 8),
+                            (0, 4, 8), (2, 4, 6)]
+    for a, b, c in winning_combinations:
+        if state[a] == state[b] == state[c] != '-':
+            return True, state[a]
+    if '-' not in state:
+        return True, None
+    return False, None
+
+
+def make_move(state, move, player):
+    new_state = list(state)
+    new_state[move] = player
+    return ''.join(new_state)
+
+
+def get_legal_moves(state):
+    return [i for i, spot in enumerate(state) if spot == '-']
+
+
+def print_board(state):
+    print("Board positions:")
+    for i in range(1, 10, 3):
+        print(f"{i} {i+1} {i+2}")
+    print("\nCurrent board:")
+    for i in range(0, 9, 3):
+        print(' '.join(state[i:i+3]))
+
+
+def human_move(state):
+    print_board(state)
+    while True:
+        try:
+            move = int(input("Enter your move (1-9): ")) - 1
+            if move in get_legal_moves(state):
+                return move
+            else:
+                print("Invalid move, try again.")
+        except ValueError:
+            print("Please enter a number.")
+
+
+def save_menace_state(menace):
+    with open('MENACE/menace_state.json', 'w') as file:
+        json.dump(menace, file)
+
+
+def load_menace_state():
+    try:
+        with open('menace_state.json', 'r') as file:
+            return json.load(file)
+    except FileNotFoundError:
+        return {}
+
+
+def one_game(menace, human_player=False, self_play=False):
+    state = "---------"
+    history = []
+    game_end, winner = check_game_end(state)
+    while not game_end:
+        player = 'X' if state.count('X') == state.count('O') else 'O'
+        if human_player and player == 'O':
+            move = human_move(state)
+        else:
+            if self_play or state not in menace:
+                num_holes = state.count('-')
+                menace[state] = [NUM_GUMDROPS for _ in range(num_holes)]
+            move_weights = menace[state]
+            legal_moves = get_legal_moves(state)
+            chosen_index = choose(move_weights)
+            move = legal_moves[chosen_index]
+
+        history.append((state, move))
+        state = make_move(state, move, player)
+        game_end, winner = check_game_end(state)
+    if human_player:
+        print_board(state)
+        if winner:
+            print(f"Game ended, winner: {winner}")
+        else:
+            print("Game ended, draw")
+    update_menace(menace, history, winner)
+    return winner
+
+
+def update_menace(menace, history, winner):
+    reward = 10
+    penalty = 1
+    for state, move in reversed(history):
+        if state in menace:
+            index = get_legal_moves(state).index(move)
+            if (winner == 'X' and state.count('X') == state.count('O')) or \
+                    (winner == 'O' and state.count('X') > state.count('O')):
+                menace[state][index] += reward
+            else:
+                menace[state][index] = max(1, menace[state][index] - penalty)
+
+
+def play_again():
+    choice = input("Play again? (y/n): ").lower()
+    return choice == 'y'
+
+
+def train_menace_self_play(menace, games):
+    for i in range(games):
+        one_game(menace, human_player=False, self_play=True)
+        if i % 1000 == 0 or i == games - 1:
+            print(
+                f"Self-Play Training Progress: {i / games * 100:.1f}%", end="\r")
+    print()
+
+
+
+menace = load_menace_state()
+
+# Training MENACE
+train_menace_self_play(menace, 50000)
+
+save_menace_state(menace)
+
+# Play against MENACE
+while True:
+    one_game(menace, human_player=True)
+    if play_again():
+        continue
+    else:
+        break
+
+save_menace_state(menace)
--- a/MENACE/menace_state.json
+++ b/MENACE/menace_state.json