vault backup: 2024-10-24 14:50:54
|
@ -1,15 +1,8 @@
|
||||||
{
|
{
|
||||||
<<<<<<< HEAD
|
|
||||||
"display": false,
|
"display": false,
|
||||||
"blockDisplay": false,
|
|
||||||
"blockMenuIcon": false,
|
|
||||||
"blockKeyboardIcon": false,
|
|
||||||
=======
|
|
||||||
"display": true,
|
|
||||||
"blockDisplay": true,
|
"blockDisplay": true,
|
||||||
"blockMenuIcon": true,
|
"blockMenuIcon": true,
|
||||||
"blockKeyboardIcon": true,
|
"blockKeyboardIcon": true,
|
||||||
>>>>>>> origin/main
|
|
||||||
"inlineDisplay": false,
|
"inlineDisplay": false,
|
||||||
"inlineMenuIcon": false,
|
"inlineMenuIcon": false,
|
||||||
"inlineKeyboardIcon": false
|
"inlineKeyboardIcon": false
|
||||||
|
|
74
.obsidian/workspace.json
vendored
|
@ -8,35 +8,36 @@
|
||||||
"type": "tabs",
|
"type": "tabs",
|
||||||
"children": [
|
"children": [
|
||||||
{
|
{
|
||||||
"id": "6253f413423c2998",
|
"id": "aca070766b645da2",
|
||||||
"type": "leaf",
|
"type": "leaf",
|
||||||
"state": {
|
"state": {
|
||||||
"type": "markdown",
|
"type": "markdown",
|
||||||
"state": {
|
"state": {
|
||||||
"file": "Foundation of data science/notes/1 CV Basics.md",
|
"file": "Biometric Systems/notes/4. Face recognition.md",
|
||||||
"mode": "source",
|
"mode": "source",
|
||||||
"source": false
|
"source": false
|
||||||
},
|
},
|
||||||
"icon": "lucide-file",
|
"icon": "lucide-file",
|
||||||
"title": "1 CV Basics"
|
"title": "4. Face recognition"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "223656f305be45b9",
|
"id": "1444840efdc58de5",
|
||||||
"type": "leaf",
|
"type": "leaf",
|
||||||
"state": {
|
"state": {
|
||||||
"type": "markdown",
|
"type": "pdf",
|
||||||
"state": {
|
"state": {
|
||||||
"file": "conflict-files-obsidian-git.md",
|
"file": "Biometric Systems/slides/LEZIONE5_NEW_More about face localization.pdf",
|
||||||
"mode": "source",
|
"page": 59,
|
||||||
"source": false
|
"left": -22,
|
||||||
|
"top": 260,
|
||||||
|
"zoom": 0.675
|
||||||
},
|
},
|
||||||
"icon": "lucide-file",
|
"icon": "lucide-file-text",
|
||||||
"title": "conflict-files-obsidian-git"
|
"title": "LEZIONE5_NEW_More about face localization"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"currentTab": 1,
|
|
||||||
"stacked": true
|
"stacked": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -93,7 +94,8 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"direction": "horizontal",
|
"direction": "horizontal",
|
||||||
"width": 300
|
"width": 300,
|
||||||
|
"collapsed": true
|
||||||
},
|
},
|
||||||
"right": {
|
"right": {
|
||||||
"id": "11560c155f3d8f6e",
|
"id": "11560c155f3d8f6e",
|
||||||
|
@ -193,44 +195,44 @@
|
||||||
"obsidian-git:Open Git source control": false
|
"obsidian-git:Open Git source control": false
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"active": "223656f305be45b9",
|
"active": "aca070766b645da2",
|
||||||
"lastOpenFiles": [
|
"lastOpenFiles": [
|
||||||
|
"Biometric Systems/slides/LEZIONE5_NEW_More about face localization.pdf",
|
||||||
|
"Biometric Systems/notes/4. Face recognition.md",
|
||||||
|
"Pasted image 20241024100002.png",
|
||||||
|
"Pasted image 20241024095704.png",
|
||||||
|
"Pasted image 20241024094223.png",
|
||||||
|
"Pasted image 20241024093000.png",
|
||||||
|
"Pasted image 20241024092903.png",
|
||||||
|
"Pasted image 20241024092146.png",
|
||||||
|
"Pasted image 20241024091511.png",
|
||||||
|
"Pasted image 20241024091446.png",
|
||||||
|
"Pasted image 20241024091433.png",
|
||||||
|
"Pasted image 20241024091235.png",
|
||||||
|
"Autonomous Networking/notes/7 RL.md",
|
||||||
|
"Autonomous Networking/slides/7 RL1.pdf",
|
||||||
|
"Autonomous Networking/notes/6 Internet of Things.md",
|
||||||
|
"Autonomous Networking/slides/6 IoT.pdf",
|
||||||
|
"Biometric Systems/notes/2. Performance indexes.md",
|
||||||
|
"Biometric Systems/slides/LEZIONE3_Affidabilita_del_riconoscimento.pdf",
|
||||||
|
"Biometric Systems/slides/LEZIONE2_Indici_di_prestazione.pdf",
|
||||||
|
"Biometric Systems/notes/3. Recognition Reliability.md",
|
||||||
|
"Autonomous Networking/notes/4 WSN Routing.md",
|
||||||
|
"Autonomous Networking/notes/3 WSN MAC.md",
|
||||||
|
"Autonomous Networking/notes/2 RFID.md",
|
||||||
"Foundation of data science/notes/1 CV Basics.md",
|
"Foundation of data science/notes/1 CV Basics.md",
|
||||||
"conflict-files-obsidian-git.md",
|
"conflict-files-obsidian-git.md",
|
||||||
"Autonomous Networking/images/Pasted image 20241017161803.png",
|
|
||||||
"Autonomous Networking/images/Pasted image 20241017161747.png",
|
|
||||||
"Autonomous Networking/images/Pasted image 20241017161744.png",
|
|
||||||
"Autonomous Networking/images/Pasted image 20241017161724.png",
|
|
||||||
"Autonomous Networking/images/Pasted image 20241017154152.png",
|
|
||||||
"Biometric Systems/images/Pasted image 20241017083255.png",
|
|
||||||
"Biometric Systems/images/Pasted image 20241017083943.png",
|
|
||||||
"Biometric Systems/images/Pasted image 20241017083506.png",
|
|
||||||
"Autonomous Networking/notes/6.1 RL.md",
|
|
||||||
"Autonomous Networking/notes/6 Internet of Things.md",
|
|
||||||
"Autonomous Networking/notes/5 Drones.md",
|
"Autonomous Networking/notes/5 Drones.md",
|
||||||
"Autonomous Networking/slides/5 Drones.pdf",
|
"Autonomous Networking/slides/5 Drones.pdf",
|
||||||
"Biometric Systems/notes/4. Face recognition.md",
|
|
||||||
"Biometric Systems/slides/LEZIONE4_Face introduction and localization.pdf",
|
"Biometric Systems/slides/LEZIONE4_Face introduction and localization.pdf",
|
||||||
"Biometric Systems/notes/3. Recognition Reliability.md",
|
|
||||||
"Biometric Systems/slides/LEZIONE3_Affidabilita_del_riconoscimento.pdf",
|
|
||||||
"Biometric Systems/images/Pasted image 20241016174417.png",
|
|
||||||
"Biometric Systems/images/Pasted image 20241016174411.png",
|
|
||||||
"Foundation of data science/slides/IP CV Basics.pdf",
|
"Foundation of data science/slides/IP CV Basics.pdf",
|
||||||
"Foundation of data science/slides/Untitled.md",
|
"Foundation of data science/slides/Untitled.md",
|
||||||
"Autonomous Networking/slides/4 WSN2.pdf",
|
"Autonomous Networking/slides/4 WSN2.pdf",
|
||||||
"Autonomous Networking/notes/4 WSN pt. 2.md",
|
"Autonomous Networking/notes/4 WSN pt. 2.md",
|
||||||
"Foundation of data science/notes",
|
"Foundation of data science/notes",
|
||||||
"Biometric Systems/notes/2. Performance indexes.md",
|
|
||||||
"Biometric Systems/notes/1. Introduction.md",
|
"Biometric Systems/notes/1. Introduction.md",
|
||||||
"Biometric Systems/notes",
|
|
||||||
"Autonomous Networking/notes/4 WSN Routing.md",
|
|
||||||
"Autonomous Networking/notes/3 WSN MAC.md",
|
|
||||||
"Autonomous Networking/notes/3 WSN.md",
|
"Autonomous Networking/notes/3 WSN.md",
|
||||||
"Autonomous Networking/slides/3 WSN.pdf",
|
|
||||||
"Autonomous Networking/notes/2 RFID.md",
|
|
||||||
"BUCA/Queues.md",
|
"BUCA/Queues.md",
|
||||||
"BUCA",
|
|
||||||
"Biometric Systems/slides/LEZIONE2_Indici_di_prestazione.pdf",
|
|
||||||
"Biometric Systems/final notes/2. Performance indexes.md",
|
"Biometric Systems/final notes/2. Performance indexes.md",
|
||||||
"().md",
|
"().md",
|
||||||
"a.md",
|
"a.md",
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
IoT term is used to refer to
|
IoT term is used to refer to
|
||||||
- the resulting globlal network of connecting smart objects
|
- the resulting globlal network of connecting smart objects
|
||||||
- the protocols ...
|
- technologies needed to realize such a vision
|
||||||
- ...
|
- applications and services leveraging such technologies
|
||||||
|
|
||||||
required features:
|
required features:
|
||||||
- devices hetereogeneity
|
- devices hetereogeneity
|
||||||
|
@ -9,6 +9,11 @@ required features:
|
||||||
- data ubiquitous data exchange
|
- data ubiquitous data exchange
|
||||||
- energy-optimized solutions
|
- energy-optimized solutions
|
||||||
|
|
||||||
|
key features:
|
||||||
|
- localization and tracking (many applications require position and movement tracking)
|
||||||
|
- self-organization capabilities (support ad-hoc networks)
|
||||||
|
- semantic interoperability (standard formats for data)
|
||||||
|
- embedded security and privacy-preserving mechanism
|
||||||
|
|
||||||
#### Backscattering
|
#### Backscattering
|
||||||
- allows devices to run without battery
|
- allows devices to run without battery
|
||||||
|
@ -16,7 +21,7 @@ required features:
|
||||||
- use radio frequency signals as power source
|
- use radio frequency signals as power source
|
||||||
- two types
|
- two types
|
||||||
- ambient
|
- ambient
|
||||||
- rfid
|
- RFID
|
||||||
|
|
||||||
##### Ambient backscattering
|
##### Ambient backscattering
|
||||||
- devices harvest power from signals available in the environment
|
- devices harvest power from signals available in the environment
|
||||||
|
@ -29,13 +34,12 @@ required features:
|
||||||
- signal may not be available indoor or not powerful enough
|
- signal may not be available indoor or not powerful enough
|
||||||
|
|
||||||
##### RFID backscattering
|
##### RFID backscattering
|
||||||
|
- main advantage is the availability of RFID signal. Reader is always present in a RFID
|
||||||
...
|
|
||||||
|
|
||||||
##### Battery free smart home
|
##### Battery free smart home
|
||||||
- in a smart home there may be a lot of smart devices
|
- in a smart home there may be a lot of smart devices
|
||||||
- if every one of them has a battery, it's not good for the environment
|
- if every one of them has a battery, it's not good for the environment
|
||||||
- we can deploy an RFID reader with multiple antennas that covers all the different rooms
|
- we can deploy an RFID reader with multiple antennas that covers all the different rooms
|
||||||
|
- of course RFID sensors can have very low capabilities, but they can run sensors!
|
||||||
|
|
||||||
### Communication
|
### Communication
|
||||||
add scheme slide
|
add scheme slide
|
||||||
|
|
|
@ -1,122 +0,0 @@
|
||||||
Case study: battery-free smart home
|
|
||||||
- each device produces a new data sample with a rate that depends on the environment and the user (continuously, event based / on demand...)
|
|
||||||
- a device should only transmit when it has new data
|
|
||||||
- but in backscattering-based networks they need to be queried by the receiver
|
|
||||||
|
|
||||||
In which order should the reader query tags?
|
|
||||||
- assume prefixed timeslots
|
|
||||||
- TDMA with random access performs poorly
|
|
||||||
- TDMA with fixed assignment also does (wasted queries)
|
|
||||||
- we want to query devices that have new data samples and avoid
|
|
||||||
- data loss
|
|
||||||
- redundant queries
|
|
||||||
|
|
||||||
Goal: design a mac protocol that adapts to all of this.
|
|
||||||
One possibility is to use Reinforcement Learning
|
|
||||||
|
|
||||||
#### Reinforcement learning
|
|
||||||
How can an intelligent agent learns to make a good sequence of decisions
|
|
||||||
|
|
||||||
- an agent can figure out how the world works by trying things and see what happens
|
|
||||||
- is what people and animals do
|
|
||||||
- we explore a computational approach to learning from interaction
|
|
||||||
- goal-directed learning from interaction
|
|
||||||
|
|
||||||
RL is learning what to do, it presents two main characteristics:
|
|
||||||
- trial and error search
|
|
||||||
- delayed reward
|
|
||||||
|
|
||||||
- sensation, action and goal are the 3 main aspects of a reinforcement learning method
|
|
||||||
- a learning agents must be able to
|
|
||||||
- sense the state of the environment
|
|
||||||
- take actions that affects the state
|
|
||||||
|
|
||||||
Difference from other ML
|
|
||||||
- no supervisor
|
|
||||||
- feedback may be delayed
|
|
||||||
- time matters
|
|
||||||
- agent action affects future decisions
|
|
||||||
- ...
|
|
||||||
- online learning
|
|
||||||
|
|
||||||
Learning online
|
|
||||||
- learning while interacting with an ever changing world
|
|
||||||
- we expect agents to get things wrong, to refine their understanding as they go
|
|
||||||
- the world is not static, agents continuously encounter new situations
|
|
||||||
|
|
||||||
RL applications:
|
|
||||||
- self driving cars
|
|
||||||
- engineering
|
|
||||||
- healthcare
|
|
||||||
- news recommendation
|
|
||||||
- ...
|
|
||||||
|
|
||||||
Rewards
|
|
||||||
- a reward is a scalar feedback signal (a number)
|
|
||||||
- reward Rt indicates how well the agent is doing at step t
|
|
||||||
- the agent should maximize cumulative reward
|
|
||||||
|
|
||||||
RL based on the reward hypotesis
|
|
||||||
all goals can be described by the maximization of expected cumulative rewards
|
|
||||||
|
|
||||||
communication in battery free environments
|
|
||||||
- positive rewards if the queried device has new data
|
|
||||||
- else negative
|
|
||||||
|
|
||||||
Challenge:
|
|
||||||
- tradeoff between exploration and exploitation
|
|
||||||
- to obtain a lot of reward a RL agent must prefer action that it tried and ...
|
|
||||||
- ...
|
|
||||||
|
|
||||||
exploration vs exploitation dilemma:
|
|
||||||
- comes from incomplete information: we need to gather enough information to make best overall decisions while keeping the risk under control
|
|
||||||
- exploitation: we take advanced of the best option we know
|
|
||||||
- exploration: test new decisions
|
|
||||||
|
|
||||||
### A general RL framework
|
|
||||||
at each timestamp the agent
|
|
||||||
- executes action
|
|
||||||
- receives observation
|
|
||||||
- receives scalar reward
|
|
||||||
|
|
||||||
the environment
|
|
||||||
...
|
|
||||||
|
|
||||||
agent state: the view of the agent on the environment state, is a function of history
|
|
||||||
- the function of the history is involved in taking the next decision
|
|
||||||
- the state representation defines what happens next
|
|
||||||
- ...
|
|
||||||
|
|
||||||
#### Inside the agent
|
|
||||||
one or more of these components
|
|
||||||
- **Policy:** agent's behavior function
|
|
||||||
- defines what to do (behavior at a given time)
|
|
||||||
- maps state to action
|
|
||||||
- core of the RL agent
|
|
||||||
- the policy is altered based on the reward
|
|
||||||
- may be
|
|
||||||
- deterministic: single function of the state
|
|
||||||
- stochastic: specifying probabilities for each actions
|
|
||||||
- reward changes probabilities
|
|
||||||
- **Value function:**
|
|
||||||
- specifies what's good in the long run
|
|
||||||
- is a prediction of future reward
|
|
||||||
- used to evaluate the goodness/badness of states
|
|
||||||
- values are prediction of rewards
|
|
||||||
- Vp(s) = Ep[yRt+1 + y^2Rt+2 ... | St = s]
|
|
||||||
- **Model:**
|
|
||||||
- predicts what the environment will do next
|
|
||||||
- many problems are model free
|
|
||||||
|
|
||||||
back to the original problem:
|
|
||||||
- n devices
|
|
||||||
- each devices produces new data with rate_i
|
|
||||||
- in which order should the reader query tags?
|
|
||||||
- formulate as an RL problem
|
|
||||||
- agent is the reder
|
|
||||||
- one action per device (query)
|
|
||||||
- rewards:
|
|
||||||
- positive when querying a device with new data
|
|
||||||
- negative if it has no data
|
|
||||||
- what to do if the device has lost data?
|
|
||||||
- state?
|
|
225
Autonomous Networking/notes/7 RL.md
Normal file
|
@ -0,0 +1,225 @@
|
||||||
|
Case study: battery-free smart home
|
||||||
|
- each device produces a new data sample with a rate that depends on the environment and the user (continuously, event based / on demand...)
|
||||||
|
- a device should only transmit when it has new data
|
||||||
|
- but in backscattering-based networks they need to be queried by the receiver
|
||||||
|
|
||||||
|
In which order should the reader query tags?
|
||||||
|
- assume prefixed time slots
|
||||||
|
- TDMA with random access performs poorly
|
||||||
|
- TDMA with fixed assignment also does (wasted queries)
|
||||||
|
- we want to query devices that have new data samples and avoid
|
||||||
|
- data loss
|
||||||
|
- redundant queries
|
||||||
|
|
||||||
|
Goal: design a mac protocol that adapts to all of this.
|
||||||
|
One possibility is to use Reinforcement Learning
|
||||||
|
|
||||||
|
#### Reinforcement learning
|
||||||
|
How can an intelligent agent learns to make a good sequence of decisions
|
||||||
|
|
||||||
|
- an agent can figure out how the world works by trying things and see what happens
|
||||||
|
- is what people and animals do
|
||||||
|
- we explore a computational approach to learning from interaction
|
||||||
|
- goal-directed learning from interaction
|
||||||
|
|
||||||
|
RL is learning what to do, it presents two main characteristics:
|
||||||
|
- trial and error search
|
||||||
|
- delayed reward
|
||||||
|
|
||||||
|
- sensation, action and goal are the 3 main aspects of a reinforcement learning method
|
||||||
|
- a learning agents must be able to
|
||||||
|
- sense the state of the environment
|
||||||
|
- take actions that affects the state
|
||||||
|
|
||||||
|
Difference from other ML
|
||||||
|
- no supervisor
|
||||||
|
- feedback may be delayed
|
||||||
|
- time matters
|
||||||
|
- agent action affects future decisions
|
||||||
|
- a sequence of successful decisions will result in the process being reinforced
|
||||||
|
- RL learns online
|
||||||
|
|
||||||
|
Learning online
|
||||||
|
- learning while interacting with an ever changing world
|
||||||
|
- we expect agents to get things wrong, to refine their understanding as they go
|
||||||
|
- the world is not static, agents continuously encounter new situations
|
||||||
|
|
||||||
|
RL applications:
|
||||||
|
- self driving cars
|
||||||
|
- engineering
|
||||||
|
- healthcare
|
||||||
|
- news recommendation
|
||||||
|
- ...
|
||||||
|
|
||||||
|
Rewards
|
||||||
|
- a reward is a scalar feedback signal (a number)
|
||||||
|
- reward Rt indicates how well the agent is doing at step t
|
||||||
|
- the agent should maximize cumulative reward
|
||||||
|
|
||||||
|
RL based on the reward hypotesis
|
||||||
|
all goals can be described by the maximization of expected cumulative rewards
|
||||||
|
|
||||||
|
communication in battery free environments
|
||||||
|
- positive rewards if the queried device has new data
|
||||||
|
- else negative
|
||||||
|
|
||||||
|
Challenge:
|
||||||
|
- tradeoff between exploration and exploitation
|
||||||
|
- to obtain a lot of reward a RL agent must prefer action that it tried in the past
|
||||||
|
- but better actions may exist... So the agent has to exploit!
|
||||||
|
|
||||||
|
exploration vs exploitation dilemma:
|
||||||
|
- comes from incomplete information: we need to gather enough information to make best overall decisions while keeping the risk under control
|
||||||
|
- exploitation: we take advanced of the best option we know
|
||||||
|
- exploration: test new decisions
|
||||||
|
|
||||||
|
### A general RL framework
|
||||||
|
**at each timestamp the agent:**
|
||||||
|
- executes action At
|
||||||
|
- receives observation Ot
|
||||||
|
- receives scalar reward Rt
|
||||||
|
|
||||||
|
**the environment:**
|
||||||
|
- receives action At
|
||||||
|
- emits observation Ot
|
||||||
|
- emits scalar reward Rt
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
**agent state:** the view of the agent on the environment state, is a function of history
|
||||||
|
- the function of the history is involved in taking the next decision
|
||||||
|
- the state representation defines what happens next
|
||||||
|
- ...
|
||||||
|
|
||||||
|
#### Inside the agent
|
||||||
|
one or more of these components
|
||||||
|
- **Policy:** agent's behavior function
|
||||||
|
- defines what to do (behavior at a given time)
|
||||||
|
- maps state to action
|
||||||
|
- core of the RL agent
|
||||||
|
- the policy is altered based on the reward
|
||||||
|
- may be
|
||||||
|
- deterministic: single function of the state
|
||||||
|
- stochastic: specifying probabilities for each actions
|
||||||
|
- reward changes probabilities
|
||||||
|
- **Value function:**
|
||||||
|
- specifies what's good in the long run
|
||||||
|
- is a prediction of future reward
|
||||||
|
- used to evaluate the goodness/badness of states
|
||||||
|
- values are prediction of rewards
|
||||||
|
- Vp(s) = Ep[yRt+1 + y^2Rt+2 ... | St = s]
|
||||||
|
- **Model:**
|
||||||
|
- predicts what the environment will do next
|
||||||
|
- many problems are model free
|
||||||
|
|
||||||
|
back to the original problem:
|
||||||
|
- n devices
|
||||||
|
- each devices produces new data with rate_i
|
||||||
|
- in which order should the reader query tags?
|
||||||
|
- formulate as an RL problem
|
||||||
|
- agent is the reder
|
||||||
|
- one action per device (query)
|
||||||
|
- rewards:
|
||||||
|
- positive when querying a device with new data
|
||||||
|
- negative if it has no data
|
||||||
|
- what to do if the device has lost data?
|
||||||
|
- state?
|
||||||
|
|
||||||
|
|
||||||
|
### Exploration vs exploitation trade-off
|
||||||
|
- Rewards evaluate actions taken
|
||||||
|
- evaluative feedback depends on the action taken
|
||||||
|
- no active exploration
|
||||||
|
|
||||||
|
Let's consider a simplified version of an RL problem: K-armed bandit problem.
|
||||||
|
- K different options
|
||||||
|
- every time need to chose one
|
||||||
|
- maximize expected total reward over some time period
|
||||||
|
- analogy with slot machines
|
||||||
|
- the levers are the actions
|
||||||
|
- which level gives the highest reward?
|
||||||
|
- Formalization
|
||||||
|
- set of actions A (or "arms")
|
||||||
|
- reward function R that follows an unknown probability distributions
|
||||||
|
- only one state
|
||||||
|
- ...
|
||||||
|
|
||||||
|
Example: doctor treatment
|
||||||
|
- doctor has 3 treatments (actions), each of them has a reward.
|
||||||
|
- for the doctor to decide which action to take is best, we must define the value of taking each action
|
||||||
|
- we call these values the action values (or action value function)
|
||||||
|
- action value: ...
|
||||||
|
|
||||||
|
Each action has a reward defined by a probability distribution.
|
||||||
|
- the red treatment has a bernoulli probability
|
||||||
|
- the yellow treatment binomial
|
||||||
|
- the blue uniform
|
||||||
|
- the agent does not know the distributions!
|
||||||
|
- the estimated action for action a is the sum of rewards observed divided by the total time the action has been taken (add formula ...)
|
||||||
|
- 1predicate denotes the random variable (1 if true else 0)
|
||||||
|
|
||||||
|
- greedy action:
|
||||||
|
- doctors assign the treatment they currently think is the best
|
||||||
|
- ...
|
||||||
|
- the greedy action is computed as the argmax of Q values
|
||||||
|
- greedy always exploits current knowledge
|
||||||
|
- epsilon-greedy:
|
||||||
|
- with a probability epsilon sometimes we explore
|
||||||
|
- 1-eps probability: we chose best greedy action
|
||||||
|
- eps probability: we chose random action
|
||||||
|
|
||||||
|
exercises ...
|
||||||
|
|
||||||
|
exercise 2: k-armed bandit problem.
|
||||||
|
K = 4 actions, denoted 1,2,3 and 4
|
||||||
|
eps-greedy selection
|
||||||
|
initial Q estimantes = 0 for all a.
|
||||||
|
|
||||||
|
Initial sequenze of actions and rewards is:
|
||||||
|
A1 = 1 R1 = 1
|
||||||
|
A2 = 2 R2 = 2
|
||||||
|
A3 = 2 R3 = 2
|
||||||
|
A4 = 2 R4 = 2
|
||||||
|
A5 = 3 R5 = 0
|
||||||
|
|
||||||
|
---
|
||||||
|
step A1: action 1 selected. Q of action 1 is 1
|
||||||
|
step A2: action 2 selected. Q(1) = 1, Q(2) = 1
|
||||||
|
step A3: action 2 selected. Q(1) = 2, Q(2) = 1.5
|
||||||
|
step A4: action 2. Q(1) = 1, Q(2) = 1.6
|
||||||
|
step A5: action 3. Q(1) = 1, Q(2) = 1.6, Q(3) = 0
|
||||||
|
|
||||||
|
For sure A2 and A5 are epsilon cases, system didn't chose the one with highest Q value.
|
||||||
|
A3 and A4 can be both greedy and epsilon case.
|
||||||
|
|
||||||
|
#### Incremental formula to estimate action-value
|
||||||
|
- to simplify notation we concentrate on a single action
|
||||||
|
- Ri denotes the reward received after the i(th) selection of this action. Qn denotes the estimate of its action value after it has been selected n-1 times (add Qn formula ...)
|
||||||
|
- given Qn and the reward Rn, the new average of rewards can be computed by (add formula with simplifications...) $Q_(n+1) = Q_{n} + \frac{1}{n}[Rn - Qn]$
|
||||||
|
- NewEstimate <- OldEstimate + StepSize (Target - OldEstimate)
|
||||||
|
- Target - OldEstimate is the error
|
||||||
|
|
||||||
|
Pseudocode for bandit algorithm:
|
||||||
|
```
|
||||||
|
Initialize for a = 1 to k:
|
||||||
|
Q(a) = 0
|
||||||
|
N(a) = 0
|
||||||
|
Loop forever:
|
||||||
|
with probability 1-eps:
|
||||||
|
A = argmax_a(Q(a))
|
||||||
|
else:
|
||||||
|
A = random action
|
||||||
|
R = bandit(A) # returns the reward of the action A
|
||||||
|
N(A) = N(A) + 1
|
||||||
|
Q(A) = Q(A) + 1\N(A) * (R - Q(A))
|
||||||
|
```
|
||||||
|
|
||||||
|
Nonstationary problem: rewards probabilities change over time.
|
||||||
|
- in the doctor example, a treatment may not be good in all conditions
|
||||||
|
- the agent (doctor) is unaware of the changes, he would like to adapt to it
|
||||||
|
|
||||||
|
An option is to use a fixed step size. We remove the 1/n factor and add an $\alpha$ constant factor between 0 and 1.
|
||||||
|
And we get $Q_{n+1} = (1-\alpha)^{n}Q_1 + \sum_{i=1}^{n}{\alpha(1 - \alpha)^{(n-1)} R_i}$
|
||||||
|
|
||||||
|
|
||||||
|
... ADD MISSING PART ...
|
BIN
Autonomous Networking/slides/6 IoT.pdf
Normal file
BIN
Autonomous Networking/slides/7 RL1.pdf
Normal file
|
@ -19,3 +19,130 @@ Steps:
|
||||||
|
|
||||||
##### Ci si può nascondere?
|
##### Ci si può nascondere?
|
||||||
Secondo Adam Harvey, il punto chiave che i computer rilevano è il "nose bridge", o l'area tra gli occhi. Se si nascondono si può far credere al computer che non ci sia una faccia.
|
Secondo Adam Harvey, il punto chiave che i computer rilevano è il "nose bridge", o l'area tra gli occhi. Se si nascondono si può far credere al computer che non ci sia una faccia.
|
||||||
|
|
||||||
|
Approcci di diversa natura:
|
||||||
|
- **Basati su feature:** si individuano le feature principali di una faccia (ad esempio posizione occhi, posizione naso, etc..) e poi si possono verificare diverse proprietà di queste (es.: colore della pelle corretto o distanza tra naso e occhi entro una certa soglia)
|
||||||
|
- **Basati su immagine:** solitamente vengono utilizzati dei modelli di machine learning che imparano da immagini esemplificative.
|
||||||
|
|
||||||
|
> [!PDF|yellow] [[LEZIONE5_NEW_More about face localization.pdf#page=4&selection=56,0,61,0&color=yellow|LEZIONE5_NEW_More about face localization, p.4]]
|
||||||
|
> > pixels with the top 5 percent
|
||||||
|
>
|
||||||
|
> white compensation using Luma
|
||||||
|
|
||||||
|
RGB is not a preceptually uniform space
|
||||||
|
- colors close to each other in RGB space may not be perceived as similar
|
||||||
|
|
||||||
|
RGB to YCrCb
|
||||||
|
![[Pasted image 20241023133231.png]]
|
||||||
|
Darker region may have dominant blue component, light region red.
|
||||||
|
|
||||||
|
Algoritmo A:
|
||||||
|
- variance-based segmentation
|
||||||
|
- the simplest method is thresholding
|
||||||
|
- we have an image I with L gray levels
|
||||||
|
- $n_i$ is the number of pixels with gray level $i$
|
||||||
|
- $p_i = \frac{n_i}{MxN}$ is the probability of gray level i
|
||||||
|
- we divide pixel in two classes C0 and C1 by a gray level t
|
||||||
|
- for each class we can compute mean and variance of gray level![[Pasted image 20241023135127.png]]
|
||||||
|
- connected components
|
||||||
|
- skin tone pixels are segmented using local color variance![[Pasted image 20241023135925.png]]
|
||||||
|
|
||||||
|
Eye localization
|
||||||
|
The algorithm builds two different eye maps (chroma and luma)
|
||||||
|
- **Chrominance map:** its creation relies on the observation that the region around eyes is characterized by high values of Cb and low values of Cr: $EyeMapC = \frac{1}{3}[(C_B^2)+(C_R^2)+(\frac{C_b}{C_r})]$
|
||||||
|
- **Luminance map:** eyes usually contain both light and dark zones that can be highlighted by morphological operators (dilation and erosion with hemispheric structuring elements)![[Pasted image 20241023140250.png]]
|
||||||
|
|
||||||
|
Chroma map is enhanced by histogram equalization
|
||||||
|
The two maps are combined through AND operator
|
||||||
|
The resulting map undergoes dilation, masking and normalization to discard the other face regions and brighten eyes
|
||||||
|
Further operations allow to refine this map.
|
||||||
|
|
||||||
|
**Dilation**
|
||||||
|
The dilation operator takes two pieces of data as inputs. The first is the image which is to be dilated. The second is a (usually small) set of coordinate points known as a structuring element (also known as a kernel). It is this structuring element that determines the precise effect of the dilation on the input image.
|
||||||
|
|
||||||
|
**Erosion**
|
||||||
|
The erosion operator takes two pieces of data as inputs. The first is the image which is to be eroded. The second is a (usually small) set of coordinate points known as a structuring element (also known as a kernel). It is this structuring element that determines the precise effect of the erosion on the input image.
|
||||||
|
|
||||||
|
![[Pasted image 20241023141948.png]]
|
||||||
|
|
||||||
|
The algorithm analyzes all the triangles composed by two candidate eyes and a candidate mouth. Each triangle is verified by checking:
|
||||||
|
- Luma variations and average of the orientation gradient of the blobs containing eyes and mouth
|
||||||
|
- Geometry and orientation of the triangle
|
||||||
|
- Presence of a face contour around the triangle
|
||||||
|
|
||||||
|
#### Algoritmo B
|
||||||
|
Viola Jones rappresenta una vera e propria innovazione per quanta riguarda la localizzazione di una faccia all'interno di un’immagine. Essendo l’algoritmo basato su machine learning, il training di questo è avvenuto utilizzando un dataset personalizzato nel quale vengono etichettate immagini come positive nel caso in cui ci sia una faccia e come negative nel caso in cui non ve ne sia alcuna.
|
||||||
|
|
||||||
|
L'agoritmo image based usa un classifier inizialmente trainato con varie istanze delle classi da identificare (esempi positivi) e classi di immagini che non contengono nessun oggetto della classe (esempi negativi).
|
||||||
|
|
||||||
|
L'obiettivo del training è estrarre features dagli esempi e selezionare quelle più discriminative. Il modello costruito in modo incrementale e contiene le features.
|
||||||
|
|
||||||
|
L'algoritmo fa uso di:
|
||||||
|
- **Ada-Boosting** per la selezione di feature: vengono creati diversi weak classifier, uno per feature, e tramite adaptive boosting riusciamo a creare uno strong classifier composto da un subset di weak-classifier.![[Pasted image 20241023144725.png]]
|
||||||
|
|
||||||
|
AdaBoost è una tecnica di addestramento che ha lo scopo di apprendere la sequenza ottimale di classificatori deboli e i corrispondenti pesi.
|
||||||
|
Richiede un insieme di pattern di training {(x1,y1),(x2,y2),...,(xN,yN)}, dove yi ∈{-1,+1} è l’etichetta della classe associata al pattern. Inoltre durante l’apprendimento è calcolata e aggiornata una distribuzione di pesi [w1,w2,...,wN] associati ai pattern di training, wi è associato al pattern (xi ,yi).
|
||||||
|
Dopo l’iterazione m, è assegnato ai pattern più difficili da classificare un peso w1(m) superiore, cosicché alla successiva iterazione m+1 tali pattern riceveranno un’attenzione maggiore.
|
||||||
|
|
||||||
|
un weak classifier è spesso un classifier lineare. Può essere comparato a una linea dritta.
|
||||||
|
![[Pasted image 20241024090856.png]]
|
||||||
|
in questo caso non va bene perché non tutti i rossi stanno dallo stesso lato. Nell'esempio è impossibile separare le due classi usando linee dritte.
|
||||||
|
p.s. non è un classifier a caso, è quello che in questo round ha il numero di errori minore.
|
||||||
|
|
||||||
|
![[Pasted image 20241024091151.png]]
|
||||||
|
Per trovare una classificazione che separa i sample problematici, incrementiamo i pesi.
|
||||||
|
![[Pasted image 20241024091235.png]]
|
||||||
|
questo classificatore separa correttamente i sample problematici
|
||||||
|
![[Pasted image 20241024091433.png]]![[Pasted image 20241024091446.png]]
|
||||||
|
![[Pasted image 20241024091511.png]]
|
||||||
|
|
||||||
|
##### Classifying faces with AdaBoost
|
||||||
|
Estraiamo feature rettangolari dalle immagini: le Haar features.
|
||||||
|
![[Pasted image 20241024092146.png]]
|
||||||
|
|
||||||
|
Quello che fa è calcolare: somma dell'intensità dei pixel che si trovano nell'area bianca) - somma dell'intensità dei pixel nell'area nera. Se il risultato dell’operazione è un numero grande allora vuol dire che con alta probabilità in quella porzione di immagine è presente la features identificata dal filtro (il filtro è uno dei quadrati sopra), dove ad esempio nel caso del B (nell'immagine sopra) sono angoli.
|
||||||
|
|
||||||
|
Per un immagine 24x24px, il numero di possibili rettangoli di features è 160'000!
|
||||||
|
Come si calcolano le Haar features? Possiamo usare AdaBoost per scegliere quali usare.
|
||||||
|
|
||||||
|
![[Pasted image 20241024092903.png]]
|
||||||
|
esempio molto stupido
|
||||||
|
|
||||||
|
![[Pasted image 20241024093000.png]]
|
||||||
|
esempio un po' meno stupido
|
||||||
|
|
||||||
|
Per ogni round di adaboost:
|
||||||
|
- proviamo ogni filtro rettangolare su ogni esempio
|
||||||
|
- scegliamo la threshold migliore per ogni filtro
|
||||||
|
- scegliamo la miglior combo filtro/threshold
|
||||||
|
- ricalcoliamo i pesi
|
||||||
|
Complessità computazionale: O(MNT)
|
||||||
|
- M filters
|
||||||
|
- N examples
|
||||||
|
- T thresholds
|
||||||
|
|
||||||
|
Le rectangular features possono essere valutate attraverso immagini integral il quale nome viene dato, in ambito computer vision, ad un algoritmo con annessa struttura dati chiamata Summed-Area table, la quale ci consente di calcolare l’area di una sottomatrice in tempo costante.
|
||||||
|
L'immagine integrale in posizione (x,y) è la somma del valore dei pixel sopra e a sinistra di (x,y):
|
||||||
|
$$II(x,y)=\sum_{x'<=x,y'<=y}I(x',y')$$
|
||||||
|
Usando integral image è possibile calcolare la somma dei valori dei pixel in ogni rettangolo:
|
||||||
|
![[Pasted image 20241024094223.png]]
|
||||||
|
|
||||||
|
Il singolo weak classifier dipende dai parametri $z_k$ (feature) e $t_k$ (threshold):
|
||||||
|
- per ogni feature scegliamo un valore di threshold che minimizza l'errore di classificazione
|
||||||
|
- si sceglie poi la feature con meno errore
|
||||||
|
|
||||||
|
Un solo classificatore robusto, per quanto elimini una grande porzione di sottofinestre che non contengono facce, non soddisfa i requisiti di applicazioni. Una possibile soluzione consiste nell'impiego di classificatori in cascata (cascade classifier), via via più complessi:
|
||||||
|
![[Pasted image 20241024095704.png]]
|
||||||
|
|
||||||
|
un classificatore per 1 sola feature riesce a passare al secondo stadio la quasi totalità dei volti esistenti (circa 100%) mentre scarta al contempo il 50% dei falsi volti.
|
||||||
|
Un classificatore per 5 feature raggiunge quasi il 100% di detection rate e il 40% di false positive rate (20% cumulativo) usando i dati dello stadio precedente.
|
||||||
|
Un classificatore per 20 feature raggiunge quasi il 100% di detection rate con 10% di false positive rate (2% cumulativo).
|
||||||
|
|
||||||
|
La localizzazione dei volti avviene analizzando sottofinestre consecutive (sovrapposte)
|
||||||
|
dell’immagine in input e valutando per ciascuna se appartiene alla classe dei volti:
|
||||||
|
![[Pasted image 20241024100002.png]]
|
||||||
|
|
||||||
|
#### Valutazione della localizzazione
|
||||||
|
- **Falsi positivi:** percentuale di finestre classificate come volto che in realtà non lo contengono
|
||||||
|
- **Facce non localizzate:** percentuale di volti che non sono stati individuati
|
||||||
|
- **C-ERROR** o Errore di localizzazione: distanza euclidea tra il reale centro della faccia e quello ipotizzato dal sistema, normalizzato rispetto alla somma degli assi dell’ellisse contenente il volto.
|
BIN
Pasted image 20241023133231.png
Normal file
After Width: | Height: | Size: 18 KiB |
BIN
Pasted image 20241023135125.png
Normal file
After Width: | Height: | Size: 83 KiB |
BIN
Pasted image 20241023135127.png
Normal file
After Width: | Height: | Size: 83 KiB |
BIN
Pasted image 20241023135922.png
Normal file
After Width: | Height: | Size: 204 KiB |
BIN
Pasted image 20241023135924.png
Normal file
After Width: | Height: | Size: 204 KiB |
BIN
Pasted image 20241023135925.png
Normal file
After Width: | Height: | Size: 204 KiB |
BIN
Pasted image 20241023140250.png
Normal file
After Width: | Height: | Size: 13 KiB |
BIN
Pasted image 20241023141948.png
Normal file
After Width: | Height: | Size: 110 KiB |
BIN
Pasted image 20241023144721.png
Normal file
After Width: | Height: | Size: 62 KiB |
BIN
Pasted image 20241023144725.png
Normal file
After Width: | Height: | Size: 62 KiB |
BIN
Pasted image 20241024090856.png
Normal file
After Width: | Height: | Size: 19 KiB |
BIN
Pasted image 20241024091151.png
Normal file
After Width: | Height: | Size: 26 KiB |
BIN
Pasted image 20241024091235.png
Normal file
After Width: | Height: | Size: 23 KiB |
BIN
Pasted image 20241024091433.png
Normal file
After Width: | Height: | Size: 29 KiB |
BIN
Pasted image 20241024091446.png
Normal file
After Width: | Height: | Size: 23 KiB |
BIN
Pasted image 20241024091511.png
Normal file
After Width: | Height: | Size: 32 KiB |
BIN
Pasted image 20241024092146.png
Normal file
After Width: | Height: | Size: 14 KiB |
BIN
Pasted image 20241024092903.png
Normal file
After Width: | Height: | Size: 165 KiB |
BIN
Pasted image 20241024093000.png
Normal file
After Width: | Height: | Size: 157 KiB |
BIN
Pasted image 20241024094223.png
Normal file
After Width: | Height: | Size: 22 KiB |
BIN
Pasted image 20241024095704.png
Normal file
After Width: | Height: | Size: 48 KiB |
BIN
Pasted image 20241024100002.png
Normal file
After Width: | Height: | Size: 73 KiB |