diff --git a/internal/stack/error.go b/internal/stack/error.go new file mode 100644 index 0000000..6b9969b --- /dev/null +++ b/internal/stack/error.go @@ -0,0 +1,88 @@ +// Copyright 2025 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package stack + +import ( + "errors" + "fmt" +) + +// Parse errors. + +var ( + ErrNotStackComment = errors.New("not a stack comment, missing [") + ErrEmptyComment = errors.New("empty comment") + errIncompleteComment = errors.New("incomplete stack comment") + errEmptyItem = errors.New("empty item in stack comment") + errDoubleQuote = errors.New("double-quote not allowed in stack comment") +) + +type nestingError struct { + opening, expected, found rune +} + +func (e nestingError) Error() string { + return fmt.Sprintf("expected %c to close %c, found %c", e.opening, e.found, e.expected) +} + +// Analysis errors. + +// ErrOpUnderflows is reported when an operation uses more items than are currently +// available on the stack. +type ErrOpUnderflows struct { + Want int // how many slots the op consumes + Have int // current stack depth +} + +func (e ErrOpUnderflows) Error() string { + return fmt.Sprintf("stack underflow: op requires %d items, stack has %d", e.Want, e.Have) +} + +// ErrCommentUnderflows is reported when a stack comment declares more items than +// are currently available on the stack. +type ErrCommentUnderflows struct { + Items []string // computed stack + Want int // how many slots the comment declares +} + +func (e ErrCommentUnderflows) Error() string { + return fmt.Sprintf("stack has %d items, comment declares %d", len(e.Items), e.Want) +} + +// ErrMismatch is reported when a stack comment declares a specific item should be +// contained in a stack slot, but the stack is known to contain a different one at the +// same position. +type ErrMismatch struct { + Items []string // computed stack + Slot int // stack slot index + Want string // what the comment has at that index +} + +func (e ErrMismatch) Error() string { + return fmt.Sprintf("stack item %d differs (expected %q, have %q) in %s", e.Slot, e.Want, e.Items[e.Slot], render(e.Items)) +} + +// ErrCommentRenamesItem is raised when the stack comment changes the name of an existing +// item, i.e. one that wasn't produced by the current operation. +type ErrCommentRenamesItem struct { + Item string + NewName string +} + +func (e ErrCommentRenamesItem) Error() string { + return fmt.Sprintf("comment introduces new name %s for existing stack item %s", e.NewName, e.Item) +} diff --git a/internal/stack/stack.go b/internal/stack/stack.go new file mode 100644 index 0000000..0ecb09c --- /dev/null +++ b/internal/stack/stack.go @@ -0,0 +1,205 @@ +// Copyright 2025 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package stack + +import ( + "fmt" + "slices" + "strings" + + "github.com/fjl/geas/internal/set" +) + +// Op is an operation that modifies the stack. +type Op interface { + StackIn(imm byte) []string // input items + StackOut(imm byte) []string // output items +} + +// Stack is a symbolic EVM stack. It tracks the positions +// of items and their symbolic names. +type Stack struct { + counter int // item counter + stack []int + + // item naming + nameToItem map[string]int + itemToName map[int]string + + // buffers for apply + opItems map[string]int + opNewItems set.Set[int] +} + +func New() *Stack { + return &Stack{ + nameToItem: make(map[string]int), + itemToName: make(map[int]string), + opItems: make(map[string]int), + opNewItems: make(set.Set[int]), + } +} + +// Init clears the stack and sets its contents. +func (s *Stack) Init(names []string) { + clear(s.nameToItem) + clear(s.itemToName) + s.stack = make([]int, 0, len(names)) + for _, name := range slices.Backward(names) { + if item, ok := s.nameToItem[name]; ok { + s.push(item) + } else { + item = s.newItem() + s.push(item) + s.setName(item, name) + } + } +} + +// Apply performs a stack manipulation. +// The comment is checked for correctness if non-nil. +func (s *Stack) Apply(op Op, imm byte, comment []string) error { + // Drop consumed items, but remember them by name. + clear(s.opItems) + inputs := op.StackIn(imm) + for i, name := range inputs { + if _, ok := s.opItems[name]; ok { + panic("BUG: op has duplicate input stack item " + name) + } + val, ok := s.get(i) + if !ok { + return ErrOpUnderflows{Want: len(inputs), Have: len(s.stack)} + } + s.opItems[name] = val + } + s.stack = s.stack[:len(s.stack)-len(inputs)] + + // Add output items. If any names from the operation's input list are reused, their + // item identifiers will be restored. For all other names, new items are created. + outputs := op.StackOut(imm) + clear(s.opNewItems) + for i := len(outputs) - 1; i >= 0; i-- { + if item, ok := s.opItems[outputs[i]]; ok { + s.push(item) + } else { + item := s.newItem() + s.push(item) + s.opNewItems.Add(item) + } + } + + // Check the comment, and apply its names to the stack. + if comment == nil { + return nil + } + for i, name := range comment { + stackItem, ok := s.get(i) + if !ok { + return ErrCommentUnderflows{Items: s.Items(), Want: len(comment)} + } + if item, ok := s.nameToItem[name]; ok && item != stackItem { + return ErrMismatch{Items: s.Items(), Slot: i, Want: name} + } + // The comment is not supposed to rename items that weren't produced by + // this operation. + if !s.opNewItems.Includes(stackItem) && s.nameToItem[name] == 0 { + return ErrCommentRenamesItem{NewName: name, Item: s.itemToName[stackItem]} + } + // Rename the item according to the comment. + s.setName(stackItem, name) + } + // By now the comment is known not to have more items than the stack, and all declared + // names match the stack. Notably, there is no expectation that comments are complete, + // i.e. it's OK if comments elide some items at the end. + // Unfortunately, this also permits a sitation where items can be 'added back' if they + // were dropped from the comment before. + // Consider this example: + // + // push 1 ; [a] + // push 2 ; [b, a] + // push 3 ; [c, b] <-- a is lost here... + // add ; [sum, a] <-- but now it's back! confusing! + // + // I'm not sure if this should be prevented somehow. + + return nil +} + +// Items returns a list of current stack items. +func (s *Stack) Items() []string { + items := make([]string, len(s.stack)) + for i := range items { + item, _ := s.get(i) + items[i] = s.getName(item) + } + return items +} + +// String returns a description of the current stack. +func (s *Stack) String() string { + return render(s.Items()) +} + +func render(stk []string) string { + var out strings.Builder + out.WriteByte('[') + for i, name := range stk { + if i > 0 { + out.WriteString(", ") + } + out.WriteString(name) + } + out.WriteByte(']') + return out.String() +} + +// push adds an item at the top of the stack. +func (s *Stack) push(item int) { + s.stack = append(s.stack, item) +} + +// get accesses item i (zero is top). +func (s *Stack) get(i int) (val int, ok bool) { + if i < 0 { + panic("BUG: negative stack offset") + } + if i > len(s.stack)-1 { + return 0, false + } + return s.stack[len(s.stack)-1-i], true +} + +// newItem creates a new item (but does not add it to the stack). +func (s *Stack) newItem() int { + s.counter++ + return s.counter +} + +// setName sets the name of a stack item. +func (s *Stack) setName(item int, name string) { + s.itemToName[item] = name + s.nameToItem[name] = item +} + +// getName reports the known name of an item, or invents one. +func (s *Stack) getName(item int) string { + name, ok := s.itemToName[item] + if ok { + return name + } + return fmt.Sprintf("_%d", item) +} diff --git a/internal/stack/stack_test.go b/internal/stack/stack_test.go new file mode 100644 index 0000000..297ab85 --- /dev/null +++ b/internal/stack/stack_test.go @@ -0,0 +1,130 @@ +// Copyright 2025 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package stack + +import ( + "reflect" + "testing" + + "github.com/fjl/geas/internal/evm" +) + +type stackTest struct { + t *testing.T + s *Stack +} + +func newTest(t *testing.T, initial string) *stackTest { + commentSlice, err := ParseComment(initial) + if err != nil { + panic("invalid stack comment: " + initial) + } + s := New() + s.Init(commentSlice) + return &stackTest{t, s} +} + +func (t *stackTest) applyOK(op *evm.Op, imm byte, comment string) { + t.t.Helper() + commentSlice, err := ParseComment(comment) + if err != nil { + panic("invalid stack comment: " + comment) + } + t.t.Logf("apply %5s on %s", op.Name, t.s.String()) + if err := t.s.Apply(op, imm, commentSlice); err != nil { + t.t.Fatalf("error: %v", err) + } +} + +func (t *stackTest) applyErr(op *evm.Op, imm byte, comment string, wantErr error) { + t.t.Helper() + commentSlice, err := ParseComment(comment) + if err != nil { + panic("invalid stack comment: " + comment) + } + t.t.Logf("apply %5s on %s", op.Name, t.s.String()) + err = t.s.Apply(op, imm, commentSlice) + if err == nil { + t.t.Fatalf("expected error, got none") + } + if !reflect.DeepEqual(err, wantErr) { + t.t.Fatalf("wrong error: %v\n want: %v", err, wantErr) + } +} + +func TestStackAnalysis(t *testing.T) { + vm := evm.FindInstructionSet("frontier") + var ( + push1 = vm.PushBySize(1) + add = vm.OpByName("ADD") + swap2 = vm.OpByName("SWAP2") + dup1 = vm.OpByName("DUP1") + dup2 = vm.OpByName("DUP2") + ) + t.Run("ok", func(t *testing.T) { + st := newTest(t, "[a, b, c, d]") + st.applyOK(dup2, 0, "[b, a, b, c]") + st.applyOK(add, 0, "[sum, b, c]") + st.applyOK(swap2, 0, "[c, b, sum]") + st.applyOK(dup1, 0, "[c, c, b, sum]") + st.applyOK(push1, 0, "[val, c, c, b, sum]") + st.applyOK(swap2, 0, "[c, c, val, b, sum]") + }) + t.Run("initWithDuplicates", func(t *testing.T) { + st := newTest(t, "[a, a, a]") + st.applyOK(add, 0, "[sum, a]") + }) + t.Run("commentMismatch", func(t *testing.T) { + st := newTest(t, "[a, b, c, d]") + st.applyErr(add, 0, "[sum, d, c]", + ErrMismatch{ + Items: []string{"sum", "c", "d"}, + Slot: 1, + Want: "d", + }, + ) + }) + t.Run("opUnderflows", func(t *testing.T) { + st := newTest(t, "[a]") + st.applyErr(add, 0, "[sum]", + ErrOpUnderflows{ + Want: 2, + Have: 1, + }, + ) + }) + t.Run("commentUnderflows", func(t *testing.T) { + st := newTest(t, "[a, b]") + st.applyErr(add, 0, "[sum, b]", + ErrCommentUnderflows{ + Items: []string{"sum"}, + Want: 2, + }, + ) + }) + t.Run("stackItemRenamed", func(t *testing.T) { + st := newTest(t, "[a, b]") + st.applyOK(push1, 0, "[x, a, b]") + st.applyErr(add, 0, "[sum, c]", + ErrCommentRenamesItem{ + Item: "b", + NewName: "c", + }, + ) + }) + +} diff --git a/internal/stack/stackcomment.go b/internal/stack/stackcomment.go new file mode 100644 index 0000000..21d01b7 --- /dev/null +++ b/internal/stack/stackcomment.go @@ -0,0 +1,166 @@ +// Copyright 2025 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package stack + +import ( + "fmt" + "unicode/utf8" +) + +// ParseComment parses a stack comment at the start of the given input string. +// It checks for basic syntax errors, such as invalid parenthesis nesting. +// Stack items are canonicalized, i.e. all whitespace in items is removed. +func ParseComment(text string) ([]string, error) { + in := inStream(text) + switch in.skipSpace() { + case -1: + return nil, ErrEmptyComment + case '[': + in.next() + // Start of stack comment. + default: + return nil, ErrNotStackComment + } + + // The starting square bracket has been read. + items := []string{} + nest := new(paramNesting) + nest.enter('[') + for { + in.skipSpace() + item, err := parseElem(&in, nest) + if err != nil { + return items, err + } + if len(item) > 0 { + items = append(items, item) + } + switch in.skipSpace() { + case ',': + // continue parsing next item + in.next() + case ']': + // end of stack comment + return items, nil + } + } +} + +func parseElem(in *inStream, nest *paramNesting) (elem string, err error) { + var chars []rune +loop: + for { + c := in.peek() + switch c { + case -1: + return "", errIncompleteComment + case ' ', '\t': + in.next() + continue loop // skip all space + case '[', '(', '{': + nest.enter(c) + case ']', ')', '}': + err := nest.leave(c) + if err != nil { + return "", err + } + if len(*nest) == 0 { + return string(chars), nil + } + case ',': + if !nest.inExpr() { + if len(chars) == 0 { + return "", errEmptyItem + } + return string(chars), nil + } + case '"': + return "", errDoubleQuote + } + chars = append(chars, c) + in.next() + } +} + +// inStream reads characters from a string. +type inStream string + +func (in *inStream) peek() rune { + if len(*in) == 0 { + return -1 + } + c, _ := utf8.DecodeRuneInString(string(*in)) + return c +} + +func (in *inStream) next() { + if len(*in) > 0 { + _, size := utf8.DecodeRuneInString(string(*in)) + *in = (*in)[size:] + } +} + +// skipSpace forwards the input to the next non-space character. +func (in *inStream) skipSpace() rune { + for { + if len(*in) == 0 { + return -1 + } + c, size := utf8.DecodeRuneInString(string(*in)) + switch c { + case ' ', '\t': + *in = (*in)[size:] + default: + return c + } + } +} + +// paramNesting tracks nesting of parentheses. +type paramNesting []rune + +// inExpr reports whether the parser is currently inside of a parenthesized expression. +func (n *paramNesting) inExpr() bool { + return len(*n) > 1 +} + +func (n *paramNesting) enter(c rune) { + *n = append(*n, c) +} + +func (n *paramNesting) leave(c rune) error { + if len(*n) == 0 { + panic("BUG: empty nest") + } + opening := (*n)[len(*n)-1] + var expected rune + switch opening { + case '(': + expected = ')' + case '{': + expected = '}' + case '[': + expected = ']' + default: + panic(fmt.Sprintf("BUG: invalid nest char %c", c)) + } + if c != expected { + return nestingError{opening, expected, c} + } + *n = (*n)[:len(*n)-1] + return nil +} diff --git a/internal/stack/stackcomment_test.go b/internal/stack/stackcomment_test.go new file mode 100644 index 0000000..6d9dfc0 --- /dev/null +++ b/internal/stack/stackcomment_test.go @@ -0,0 +1,96 @@ +// Copyright 2025 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package stack + +import ( + "slices" + "testing" +) + +var parseCommentTests = []struct { + input string + output []string + wantErr error +}{ + // valid cases + { + input: "[]", + output: []string{}, + }, + { + input: "[ ]", + output: []string{}, + }, + { + input: "[a, b]", + output: []string{"a", "b"}, + }, + // whitespace removal, nesting + { + input: "[a == b, d', (x*y) + 1 - 2, arr[1:2], fn(a, b)]", + output: []string{"a==b", "d'", "(x*y)+1-2", "arr[1:2]", "fn(a,b)"}, + }, + + // some errors + { + input: "", + wantErr: ErrEmptyComment, + }, + { + input: "text", + wantErr: ErrNotStackComment, + }, + { + input: "[a", + wantErr: errIncompleteComment, + }, + { + input: "[a,", + wantErr: errIncompleteComment, + }, + { + input: "[a,,]", + wantErr: errEmptyItem, + }, + { + input: `[a, "b"]`, + wantErr: errDoubleQuote, + }, + { + input: `[a, func(])]`, + wantErr: nestingError{opening: '(', expected: ')', found: ']'}, + }, +} + +func TestParseComment(t *testing.T) { + for _, test := range parseCommentTests { + slice, err := ParseComment(test.input) + if err != nil { + if test.wantErr == nil { + t.Errorf("test(%q): unexpected error: %q", test.input, err) + } else if err != test.wantErr { + t.Errorf("test(%q): wrong error: %q, want %q", test.input, err, test.wantErr) + } + } else { + if test.wantErr != nil { + t.Errorf("test(%q): expected error, got none (result %v)", test.input, slice) + } else if !slices.Equal(slice, test.output) { + t.Errorf("test(%q): wrong result %#v", test.input, slice) + } + } + } +}