Apprentissage par renforcement : Recherche du trésor

Ce savoir consiste à la recherche d’un trésor caché sur une carte en utilisant un algorithme d'apprentissage par renforcement (langage python).

Définition apprentissage par renforcement

« En intelligence artificielle, plus précisément en apprentissage automatique, l'apprentissage par renforcement consiste, pour un agent autonome (robot, etc.), à apprendre les actions à prendre, à partir d'expériences, de façon à optimiser une récompense quantitative au cours du temps.

L'agent est plongé au sein d'un environnement, et prend ses décisions en fonction de son état courant. En retour, l'environnement procure à l'agent une récompense, qui peut être positive ou négative. L'agent cherche, au travers d'expériences itérées, un comportement décisionnel (appelé stratégie ou politique, et qui est une fonction associant à l'état courant l'action à exécuter) optimal, en ce sens qu'il maximise la somme des récompenses au cours du temps. »

Description générale

L’agent (robot) devra se déplacer de façon aléatoire au début et apprendre de ses erreurs pour connaître le chemin vers le trésor. L’agent pourra se déplacer sur les cases adjacentes à lui.

Il sera bloqué par des obstacles et pénalisé (réapparition et récompense négative) s’il est sur la même case qu’un piège et récompensé (réapparition et récompense positive) s’il est sur la même case qu’un trésor.

Menu de la simulation

# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import time

from PIL import Image, ImageTk
import Tkinter as tk
import os

MENU_H = 400
MENU_W = 600
H_HEIGHT = 429
H_WIDTH = 758
PH_HEIGHT = 735
PH_WIDTH = 766
HELP_OPEN = False
PHELP_OPEN = False

def geoliste(g):
    r=[i for i in range(0,len(g)) if not g[i].isdigit()]
    return [int(g[0:r[0]]),int(g[r[0]+1:r[1]]),int(g[r[1]+1:r[2]]),int(g[r[2]+1:])]

def launch(event):
    window.withdraw() 
    os.system('python rdt.py %(h)d %(w)d %(a)d %(t)d %(wa)s %(tr)s %(s)f' % {'h': SIMU_HEIGHT, 'w': SIMU_WIDTH, 'a' : SIMU_AGENT, 't' : SIMU_TREASURE, 'wa' : SIMU_WALLS, 'tr' : SIMU_TRAPS, 's' : SIMU_SPEED})
    window.deiconify()

def quit(event):
    window.quit()

def help_window(event):
    global HELP_OPEN
    if not HELP_OPEN:
       hwindow.deiconify()
       HELP_OPEN = True

def phelp_window(event):
    global PHELP_OPEN
    if not PHELP_OPEN:
       phwindow.deiconify()
       PHELP_OPEN = True

def parameter_window(event):
    window.withdraw()
    pwindow.deiconify()

def help_quit():
    global HELP_OPEN
    hwindow.withdraw()
    HELP_OPEN = False

def phelp_quit():
    global PHELP_OPEN
    phwindow.withdraw()
    PHELP_OPEN = False

def parameter_quit():
    pwindow.withdraw()
    window.deiconify()

def recupVariable(event):
    global SIMU_HEIGHT, SIMU_WIDTH, SIMU_AGENT, SIMU_TREASURE, SIMU_WALLS, SIMU_TRAPS, SIMU_SPEED
    SIMU_HEIGHT = pscale_height.get()
    SIMU_WIDTH = pscale_width.get()
    SIMU_SPEED = pscale_speed.get()
    try:
        SIMU_AGENT = int(pentry_agent.get())-1
        if SIMU_AGENT > SIMU_HEIGHT*SIMU_WIDTH or SIMU_AGENT < 1:
            SIMU_AGENT = 0
    except ValueError:
        SIMU_AGENT = 0
    try:
        SIMU_TREASURE = int(pentry_treasure.get())
        if SIMU_TREASURE > SIMU_HEIGHT*SIMU_WIDTH or SIMU_TREASURE < 1:
            SIMU_TREASURE = SIMU_HEIGHT*SIMU_WIDTH 
    except ValueError:
        SIMU_TREASURE = SIMU_HEIGHT*SIMU_WIDTH
    SIMU_WALLS = pentry_walls.get()
    if SIMU_WALLS == '':
        SIMU_WALLS = ','
    SIMU_TRAPS = pentry_traps.get()
    if SIMU_TRAPS == '':
        SIMU_TRAPS = ','
    
    parameter_quit()

if __name__ == "__main__":
    SIMU_HEIGHT = 6
    SIMU_WIDTH = 4
    SIMU_AGENT = 0
    SIMU_TREASURE = SIMU_HEIGHT*SIMU_WIDTH
    SIMU_WALLS = ','
    SIMU_TRAPS = ','
    SIMU_SPEED = 0.5

    # Main menu window
    window = tk.Tk()
    window.title("The Treasure Hunt")
    window.resizable(width=False, height=False)
    canvas = tk.Canvas(window, bg='white',
                           height=MENU_H,
                           width=MENU_W)
    canvas._photobg = photobg = ImageTk.PhotoImage(file= "image/image_menu.png")
    canvas.create_image(0,0,anchor=tk.NW,image=photobg)
    canvas.pack()

    launch_button = tk.Button(canvas, text ='Launch simulation', width = 30)
    launch_button.place(relx=0.5,rely=0.7,anchor='center')

    parameter_button = tk.Button(canvas, text ='Parameters', width = 30)
    parameter_button.place(relx=0.5,rely=0.8,anchor='center')

    quit_button = tk.Button(canvas, text ='Quit', width = 30)
    quit_button.place(relx=0.5,rely=0.9,anchor='center')

    help_button = tk.Button(canvas, text ='Help', width = 4)
    help_button.place(relx=0.9,rely=0.9,anchor='center')

    # Center the window
    MENUX = (window.winfo_screenwidth()-MENU_W)//2
    MENUY = (window.winfo_screenheight()-MENU_H)//2
    window.geometry("%dx%d%+d%+d" % (MENU_W,MENU_H,MENUX,MENUY))
    
    # Button event
    launch_button.bind('<Button-1>', launch)
    parameter_button.bind('<Button-1>', parameter_window)
    quit_button.bind('<Button-1>', quit)
    help_button.bind('<Button-1>', help_window)

    # Help window
    hwindow = tk.Toplevel(window)
    hwindow.title("Help")
    hwindow.geometry("%dx%d%+d%+d" % (H_WIDTH, H_HEIGHT, MENUX-H_WIDTH, MENUY+(MENU_H//2)-(H_HEIGHT//2)))
    hwindow.resizable(width=False, height=False)

    hcanvas = tk.Canvas(hwindow,height=H_HEIGHT,width=H_WIDTH)
    hcanvas._photohelp = photohelp = ImageTk.PhotoImage(file= "image/help.png")
    hcanvas.create_image(0,0,anchor=tk.NW,image=photohelp)
    hcanvas.pack()

    hwindow.withdraw()

    # Parameter window
    pwindow = tk.Toplevel(window)
    pwindow.title("Parameter")
    pwindow.geometry("%dx%d%+d%+d" % (MENU_W,MENU_H,MENUX,MENUY))
    pwindow.resizable(width=False, height=False)
    
    pscale_width = tk.Scale(pwindow,variable=SIMU_WIDTH, orient='horizontal', from_=1, to=10, length=100, label='Width')
    pscale_width.set(SIMU_WIDTH)
    pscale_width.place(relx=0.2,rely=0.1,anchor='center')
      
    pscale_height = tk.Scale(pwindow,variable=SIMU_HEIGHT, orient='vertical', from_=1, to=10, length=100, label='Height')
    pscale_height.set(SIMU_HEIGHT)
    pscale_height.place(relx=0.1,rely=0.3,anchor='center')    
    
    pbutton_done = tk.Button(pwindow, text ='Done', width = 30)
    pbutton_done.place(relx=0.5,rely=0.8,anchor='center')
    pbutton_done.bind('<Button-1>', recupVariable)

    tk.Label(pwindow, text="Choose Agent's case between 1 & Height x Width").place(relx=0.3, rely=0.1,anchor='w')
    pentry_agent = tk.Entry(pwindow, width=2)
    pentry_agent.place(relx=0.5,rely=0.15,anchor='center')

    tk.Label(pwindow, text="Choose Treasure's case between 1 & Height x Width").place(relx=0.3, rely=0.25,anchor='w')
    pentry_treasure = tk.Entry(pwindow, width=2)
    pentry_treasure.place(relx=0.5,rely=0.3,anchor='center')

    tk.Label(pwindow, text="Enter a list for Wall's cases, \nbetween 1 & Height x Width and with ',' as separator").place(relx=0.3, rely=0.4,anchor='w')
    pentry_walls = tk.Entry(pwindow, width=30)
    pentry_walls.place(relx=0.5,rely=0.5,anchor='center')

    tk.Label(pwindow, text="Enter a list for Trap's cases, \nbetween 1 & Height x Width and with ',' as separator").place(relx=0.3, rely=0.6,anchor='w')
    pentry_traps = tk.Entry(pwindow, width=30)
    pentry_traps.place(relx=0.5,rely=0.7,anchor='center')

    pscale_speed = tk.Scale(pwindow,variable=SIMU_SPEED, orient='vertical', from_=0.05, to=0.5, length=100, label='Delay time', resolution=0.05)
    pscale_speed.set(SIMU_SPEED)
    pscale_speed.place(relx=0.1,rely=0.6,anchor='center')

    pbutton_help = tk.Button(pwindow, text ='Help', width = 4)
    pbutton_help.place(relx=0.9,rely=0.9,anchor='center')
    pbutton_help.bind('<Button-1>', phelp_window)

    pwindow.withdraw()

    # Parameter Help window
    phwindow = tk.Toplevel(window)
    phwindow.title("Help")
    phwindow.geometry("%dx%d%+d%+d" % (PH_WIDTH, PH_HEIGHT, MENUX+MENU_W, MENUY+(MENU_H//2)-(PH_HEIGHT//2)))
    phwindow.resizable(width=False, height=False)
    canvas_phelp = tk.Canvas(phwindow,height=PH_HEIGHT,width=PH_WIDTH)
    canvas_phelp._photophelp = photophelp = ImageTk.PhotoImage(file= "image/final_help_parameter.png")
    canvas_phelp.create_image(0,0,anchor=tk.NW,image=photophelp)
    canvas_phelp.pack()
    phwindow.withdraw()

    # Help window protocol (cross)
    hwindow.protocol('WM_DELETE_WINDOW', help_quit)
    pwindow.protocol('WM_DELETE_WINDOW', parameter_quit)
    phwindow.protocol('WM_DELETE_WINDOW', phelp_quit)

    # Launch the menu window
    window.mainloop()

Version et bibliothèques utilisées

Version Python : 2.7.12

Bibliothèques :
- PIL pour l’implémentation des images
- Tkinter pour l’implémentation des widgets (boutons,etc,…)
- numpy pour la gestion de l’aléatoire
- pandas pour la construction de la Q-table
- time pour gérer le délai entre chaque action
- sys pour récupérer les données de la simulation via les arguments en commandes
- os pour lancer la simulation comme une commande

Programme principal

Vous pouvez télécharger le code source du programme principal et les différents fichiers du savoir en cliquant sur le lien ci-dessous.

Télécharger savoir fichier

Catégories

Java 2 Langage C 1 Python 2

Savoirs les plus récents

Création de tableaux en HTML
HTML5
PHP DateTime : créez, comparez et formatez des dates
PHP
Correction algorithme : Généalogie
Algorithmes
Correction algorithme : Coupe du monde
Algorithmes
Correction algorithme : Découpage et collage
Algorithmes

Apprentissage par renforcement : Recherche du trésor

Définition apprentissage par renforcement

Description générale

Menu de la simulation

Version et bibliothèques utilisées

Programme principal

Catégories

Savoirs les plus récents

Création de tableaux en HTML

PHP DateTime : créez, comparez et formatez des dates

Correction algorithme : Généalogie

Correction algorithme : Coupe du monde

Correction algorithme : Découpage et collage

0 commentaires