Skip to content

Commit ac28c72

Browse files
committed
Add penalties for boundaries in BasicExample behavior.
1 parent 645fdba commit ac28c72

1 file changed

Lines changed: 9 additions & 0 deletions

File tree

client/src/examples/Basic.jsx

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ export default function BasicExample() {
6161
const step = useCallback((direction) => {
6262
setPos((prev) => {
6363
const next = Math.min(MAX_POS, Math.max(MIN_POS, prev + direction));
64+
// Base per-step penalty encourages shorter paths
6465
let reward = -0.01;
6566
let done = false;
6667

@@ -73,6 +74,14 @@ export default function BasicExample() {
7374
done = true;
7475
}
7576

77+
// Treat hitting the extrema of the grid as a terminal (failure) state so
78+
// the agent cannot get stuck endlessly accumulating negative reward.
79+
// We also apply an extra penalty to make this outcome clearly undesirable.
80+
if (next === MIN_POS || next === MAX_POS) {
81+
reward -= 0.5; // extra penalty for falling off the playable area
82+
done = true;
83+
}
84+
7685
setRewardAccum((r) => r + reward);
7786

7887
if (done) {

0 commit comments

Comments
 (0)