File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -61,6 +61,7 @@ export default function BasicExample() {
6161 const step = useCallback ( ( direction ) => {
6262 setPos ( ( prev ) => {
6363 const next = Math . min ( MAX_POS , Math . max ( MIN_POS , prev + direction ) ) ;
64+ // Base per-step penalty encourages shorter paths
6465 let reward = - 0.01 ;
6566 let done = false ;
6667
@@ -73,6 +74,14 @@ export default function BasicExample() {
7374 done = true ;
7475 }
7576
77+ // Treat hitting the extrema of the grid as a terminal (failure) state so
78+ // the agent cannot get stuck endlessly accumulating negative reward.
79+ // We also apply an extra penalty to make this outcome clearly undesirable.
80+ if ( next === MIN_POS || next === MAX_POS ) {
81+ reward -= 0.5 ; // extra penalty for falling off the playable area
82+ done = true ;
83+ }
84+
7685 setRewardAccum ( ( r ) => r + reward ) ;
7786
7887 if ( done ) {
You can’t perform that action at this time.
0 commit comments