diff --git a/.obsidian/workspace.json b/.obsidian/workspace.json index ffa5bdf..bcbdb22 100644 --- a/.obsidian/workspace.json +++ b/.obsidian/workspace.json @@ -7,60 +7,22 @@ "id": "ee1680277f6a0d97", "type": "tabs", "children": [ - { - "id": "8d8de4cd4c80f0f8", - "type": "leaf", - "state": { - "type": "markdown", - "state": { - "file": "Autonomous Networking/notes/3 WSN MAC.md", - "mode": "source", - "source": false - }, - "icon": "lucide-file", - "title": "3 WSN MAC" - } - }, { "id": "e86977f921664e81", "type": "leaf", "state": { "type": "markdown", "state": { - "file": "Autonomous Networking/notes/6 Internet of Things.md", + "file": "Autonomous Networking/notes/7 RL.md", "mode": "source", "source": false }, "icon": "lucide-file", - "title": "6 Internet of Things" - } - }, - { - "id": "0f6b9ae8038d6ee6", - "type": "leaf", - "state": { - "type": "image", - "state": { - "file": "Biometric Systems/images/Pasted image 20241023133231.png" - }, - "icon": "lucide-image", - "title": "Pasted image 20241023133231" - } - }, - { - "id": "46b435f098a3c1ca", - "type": "leaf", - "state": { - "type": "image", - "state": { - "file": "Biometric Systems/images/Pasted image 20241023133231.png" - }, - "icon": "lucide-image", - "title": "Pasted image 20241023133231" + "title": "7 RL" } } ], - "currentTab": 3 + "stacked": true } ], "direction": "vertical" @@ -216,10 +178,14 @@ "obsidian-git:Open Git source control": false } }, - "active": "2b2245f56092006e", + "active": "0d5325c0f9289cea", "lastOpenFiles": [ - "Biometric Systems/images/Pasted image 20241023135125.png", + "Autonomous Networking/notes/2 RFID.md", + "Autonomous Networking/notes/7 RL.md", + "Autonomous Networking/slides/7 RL1.pdf", + "Autonomous Networking/notes/3 WSN MAC.md", "Biometric Systems/images/Pasted image 20241023133231.png", + "Biometric Systems/images/Pasted image 20241023135125.png", "Biometric Systems/images/Pasted image 20241023135925.png", "Biometric Systems/images/Pasted image 20241023135127.png", "Biometric Systems/images/Pasted image 20241023135922.png", @@ -228,20 +194,15 @@ "Biometric Systems/images/Pasted image 20241023141948.png", "Biometric Systems/images/Pasted image 20241023144725.png", "Biometric Systems/images/Pasted image 20241024090856.png", - "Biometric Systems/images/Pasted image 20241024091235.png", - "Autonomous Networking/notes/3 WSN MAC.md", "Autonomous Networking/slides/2 RFID.pdf", "conflict-files-obsidian-git.md", "Autonomous Networking/notes/4 WSN Routing.md", "Autonomous Networking/notes/6.1 RL.md", "Biometric Systems/slides/LEZIONE5_NEW_More about face localization.pdf", - "Autonomous Networking/slides/7 RL1.pdf", "Autonomous Networking/slides/6 IoT.pdf", - "Autonomous Networking/notes/7 RL.md", "Autonomous Networking/slides/3 WSN.pdf", "Autonomous Networking/slides/4 WSN2.pdf", "Autonomous Networking/notes/6 Internet of Things.md", - "Autonomous Networking/notes/2 RFID.md", "Autonomous Networking/notes/5 Drones.md", "Biometric Systems/slides/LEZIONE4_Face introduction and localization.pdf", "Biometric Systems/notes/4. Face recognition.md", diff --git a/Autonomous Networking/notes/7 RL.md b/Autonomous Networking/notes/7 RL.md index 606f7a2..8ef493a 100644 --- a/Autonomous Networking/notes/7 RL.md +++ b/Autonomous Networking/notes/7 RL.md @@ -84,13 +84,13 @@ exploration vs exploitation dilemma: - emits observation Ot - emits scalar reward Rt - - **agent state:** the view of the agent on the environment state, is a function of history -- the function of the history is involved in taking the next decision -- the state representation defines what happens next -- ... - +- the history is involved in taking the next decision: + - agent selects actions + - environment selects observations/rewards +- the state information is used to determine what happens next + - state is a function of history: $S_t = f(H_t)$ + #### Inside the agent one or more of these components - **Policy:** agent's behavior function @@ -107,9 +107,10 @@ one or more of these components - is a prediction of future reward - used to evaluate the goodness/badness of states - values are prediction of rewards - - Vp(s) = Ep[yRt+1 + y^2Rt+2 ... | St = s] + - $V_\pi(s) = Ep[yRt+1 + y^2Rt+2 ... | St = s]$ - **Model:** - predicts what the environment will do next + - may predict the resultant next state and/or the next reward - many problems are model free back to the original problem: