Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 27 additions & 11 deletions execution_chain/db/aristo/aristo_compute.nim
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# nimbus-eth1
# Copyright (c) 2023-2025 Status Research & Development GmbH
# Copyright (c) 2023-2026 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
Expand All @@ -18,6 +18,15 @@ import
"."/[aristo_desc, aristo_get, aristo_layers],
./aristo_desc/desc_backend

const
MAX_RLP_SIZE_ACCOUNT_LEAF = 111
MAX_RLP_SIZE_STORAGE_LEAF = 34
MAX_RLP_SIZE_ACCOUNT_LEAF_NODE = 149
MAX_RLP_SIZE_STORAGE_LEAF_NODE = 71
MAX_RLP_SIZE_BRANCH_NODE = 533
MAX_RLP_SIZE_EXTENSION_NODE = 69


type WriteBatch = tuple[writer: PutHdlRef, count: int, depth: int, prefix: uint64]

# Keep write batch size _around_ 1mb, give or take some overhead - this is a
Expand Down Expand Up @@ -99,20 +108,20 @@ template encodeLeaf(w: var RlpWriter, pfx: NibblesBuf, leafData: untyped): HashK
w.startList(2)
w.append(pfx.toHexPrefix(isLeaf = true).data())
w.append(leafData)
w.finish().digestTo(HashKey)
w.finish(asOpenArray = true).digestTo(HashKey)

template encodeBranch(w: var RlpWriter, vtx: VertexRef, subKeyForN: untyped): HashKey =
w.startList(17)
for (n {.inject.}, subvid {.inject.}) in vtx.allPairs():
w.append(subKeyForN)
w.append EmptyBlob
w.finish().digestTo(HashKey)
w.finish(asOpenArray = true).digestTo(HashKey)

template encodeExt(w: var RlpWriter, pfx: NibblesBuf, branchKey: HashKey): HashKey =
w.startList(2)
w.append(pfx.toHexPrefix(isLeaf = false).data())
w.append(branchKey)
w.finish().digestTo(HashKey)
w.finish(asOpenArray = true).digestTo(HashKey)

proc getKey(
db: AristoTxRef, rvid: RootedVertexID, skipLayers: static bool
Expand Down Expand Up @@ -153,12 +162,13 @@ proc computeKeyImpl(
var level = level

# TODO this is the same code as when serializing NodeRef, without the NodeRef
var writer = initRlpWriter()


let key =
case vtx.vType
of AccLeaf:
let vtx = AccLeafRef(vtx)
var writer = RlpArrayBufWriter[MAX_RLP_SIZE_ACCOUNT_LEAF_NODE, 1]()
writer.encodeLeaf(vtx.pfx):
let
stoID = vtx.stoID
Expand All @@ -182,17 +192,21 @@ proc computeKeyImpl(
else:
VOID_HASH_KEY

rlp.encode Account(
var w = RlpArrayBufWriter[MAX_RLP_SIZE_ACCOUNT_LEAF, 1]()
w.append(Account(
nonce: vtx.account.nonce,
balance: vtx.account.balance,
storageRoot: skey.to(Hash32),
codeHash: vtx.account.codeHash,
)
codeHash: vtx.account.codeHash
))
w.finish(asOpenArray = true)
of StoLeaf:
let vtx = StoLeafRef(vtx)
var writer = RlpArrayBufWriter[MAX_RLP_SIZE_STORAGE_LEAF_NODE, 1]()
writer.encodeLeaf(vtx.pfx):
# TODO avoid memory allocation when encoding storage data
rlp.encode(vtx.stoData)
var w = RlpArrayBufWriter[MAX_RLP_SIZE_STORAGE_LEAF, 1]()
w.append(vtx.stoData)
w.finish(asOpenArray = true)
of Branches:
# For branches, we need to load the vertices before recursing into them
# to exploit their on-disk order
Expand Down Expand Up @@ -261,10 +275,12 @@ proc computeKeyImpl(

if vtx.vType == ExtBranch:
let vtx = ExtBranchRef(vtx)
var writer = RlpArrayBufWriter[MAX_RLP_SIZE_EXTENSION_NODE, 1]()
writer.encodeExt(vtx.pfx):
var bwriter = initRlpWriter()
var bwriter = RlpArrayBufWriter[MAX_RLP_SIZE_BRANCH_NODE, 1]()
bwriter.writeBranch(vtx)
else:
var writer = RlpArrayBufWriter[MAX_RLP_SIZE_BRANCH_NODE, 1]()
writer.writeBranch(vtx)

# Cache the hash into the same storage layer as the the top-most value that it
Expand Down
122 changes: 122 additions & 0 deletions tests/test_aristo/test_compute.nim
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,125 @@ suite "Aristo compute":

let w = txFrame.computeKey((root, root)).value.to(Hash32)
check w == samples[^1][^1][2]


test "Max size RLP encoding of all MPT node types":
## This test exercises the RlpArrayBufWriter stack-allocated buffer paths in
## aristo_compute.nim by constructing a trie that produces the largest
## possible RLP encoding for each MPT node type:
##
## - Account leaf node (MAX_RLP_SIZE_ACCOUNT_LEAF_NODE = 148):
## Maximised by using max nonce (uint64.high), max balance (UInt256.high),
## a non-empty codeHash, and a valid storageRoot (from attached storage).
##
## - Storage leaf node (MAX_RLP_SIZE_STORAGE_LEAF_NODE = 70):
## Maximised by storing UInt256.high as the storage value.
##
## - Branch node (MAX_RLP_SIZE_BRANCH_NODE = 532):
## Maximised by having all 16 child slots occupied at a branch, each with
## a full 32-byte hash key (RLP encoded nodes >= 32 bytes).
##
## - Extension node (MAX_RLP_SIZE_EXTENSION_NODE = 68):
## Created when multiple paths share a common prefix before diverging,
## producing a hex-prefix-encoded shared nibble path + a branch key child.
##
## The test inserts enough accounts (with carefully chosen paths) to produce
## all four node types, then calls computeKey on the state root to force
## RLP serialization through the RlpArrayBufWriter code paths. If any buffer
## is undersized the test will fail with an overflow/assertion.

let
db = AristoDbRef.init()
txFrame = db.txRef
root = STATE_ROOT_VID

# Maximum-size account payload: max nonce, max balance, non-empty codeHash.
# The storageRoot will be filled in by attaching storage to this account.
let maxAccount = AristoAccount(
nonce: uint64.high,
balance: UInt256.high,
codeHash: hash32"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
)

# A large account (with storage) to maximise account leaf RLP.
# This account will also get a storage slot with max value to maximise
# storage leaf RLP.
let accPathWithStorage =
hash32"1000000000000000000000000000000000000000000000000000000000000001"
check:
txFrame.mergeAccount(accPathWithStorage, maxAccount) ==
Result[bool, AristoError].ok(true)

# Attach a storage slot with the largest possible UInt256 value.
# This maximises the storage leaf node RLP encoding.
let
stoPath = hash32"2000000000000000000000000000000000000000000000000000000000000001"
maxStoData = UInt256.high
check:
txFrame.mergeSlot(accPathWithStorage, stoPath, maxStoData).isOk

# Insert 16 more accounts at paths chosen so that their keccak hashes
# spread across all 16 nibble values at the root branch level. This is
# not guaranteed by arbitrary paths, but inserting enough distinct accounts
# with varied first nibbles will populate many branch children. We use 16
# accounts with different leading bytes to maximise the chance of filling
# the root branch.
#
# All use max-size payloads (large nonce, balance, codeHash) to ensure
# each child's RLP node is >= 32 bytes (so branch stores full 32-byte
# hash keys rather than inline RLP).
for i in 0'u8 .. 15'u8:
var pathBytes: array[32, byte]
pathBytes[0] = (i * 16) + i # e.g. 0x00, 0x11, 0x22 ... 0xFF
pathBytes[1] = 0xFF
pathBytes[2] = byte(i)
# Fill remaining bytes to make each path unique
for j in 3 .. 31:
pathBytes[j] = byte(i)
let accPath = Hash32(pathBytes)
let acc = AristoAccount(
nonce: uint64.high,
balance: UInt256.high,
codeHash: hash32"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
)
check:
txFrame.mergeAccount(accPath, acc) == Result[bool, AristoError].ok(true)

# Insert two more accounts that share a long common prefix to force the
# creation of an extension node. When two paths share leading nibbles but
# diverge later, the trie creates an extension node encoding the shared
# prefix followed by a branch.
# block:
let extAcc = AristoAccount(
nonce: uint64.high,
balance: UInt256.high,
codeHash: hash32"cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc",
)
# These two paths share the first 8 bytes (16 nibbles) then diverge,
# which should produce an extension node with a long shared prefix.
let extPath1 =
hash32"ABCDEF0123456789000000000000000000000000000000000000000000000001"
let extPath2 =
hash32"ABCDEF0123456789000000000000000000000000000000000000000000000002"

check txFrame.mergeAccount(extPath1, extAcc) == Result[bool, AristoError].ok(true)
check txFrame.mergeAccount(extPath2, extAcc) == Result[bool, AristoError].ok(true)

# Now compute the state root key. This forces RLP serialization of every
# node in the trie through the RlpArrayBufWriter code paths.
# If any ArrayBuf is too small, this will fail with an assertion/overflow.
let stateRoot = txFrame.computeKey((root, root))
check stateRoot.isOk

# Verify the root hash is a valid 32-byte hash
let rootHash = stateRoot.value.to(Hash32)
check rootHash != default(Hash32)

# Run structural integrity checks on the trie
let rc = txFrame.check
check rc == typeof(rc).ok()

# Verify the computation is stable (computing again gives the same result)
let stateRoot2 = txFrame.computeKey((root, root))
check stateRoot2.isOk
check stateRoot2.value == stateRoot.value
70 changes: 70 additions & 0 deletions tests/test_aristo/test_compute_benchmark.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Nimbus
# Copyright (c) 2026 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or
# distributed except according to those terms.

{.used.}

import
std/times,
unittest2,
../../execution_chain/db/aristo/[
aristo_compute,
aristo_merge,
aristo_desc,
aristo_init/memory_only,
aristo_tx_frame,
]

suite "Aristo compute benchmark":
const
NUM_THREADS = 16
NUM_FRAMES = 10
NUM_ACCOUNTS_PER_FRAME = 400000

setup:
let db = AristoDbRef.init()
var txFrame = db.txRef

for i in 0 ..< NUM_ACCOUNTS_PER_FRAME:
check:
txFrame.mergeAccount(
cast[Hash32](i),
AristoAccount(balance: i.u256(), codeHash: EMPTY_CODE_HASH)) == Result[bool, AristoError].ok(true)
txFrame.checkpoint(1, skipSnapshot = true)

let batch = db.putBegFn()[]
db.persist(batch, txFrame)
check db.putEndFn(batch).isOk()

txFrame = db.baseTxFrame()

for n in 1 .. NUM_FRAMES:
txFrame = db.txFrameBegin(txFrame)

let
startIdx = NUM_ACCOUNTS_PER_FRAME * n
endIdx = startIdx + NUM_ACCOUNTS_PER_FRAME

for i in startIdx ..< endIdx:
check:
txFrame.mergeAccount(
cast[Hash32](i * i),
AristoAccount(balance: i.u256(), codeHash: EMPTY_CODE_HASH)) == Result[bool, AristoError].ok(true)

txFrame.checkpoint(1, skipSnapshot = false)


test "Serial benchmark - skipLayers = false":
debugEcho "\nSerial benchmark (skipLayers = false) running..."

let before = cpuTime()
check txFrame.computeKey((STATE_ROOT_VID, STATE_ROOT_VID)).isOk()
let elapsed = cpuTime() - before

debugEcho "Serial benchmark (skipLayers = false) cpu time: ", elapsed
2 changes: 1 addition & 1 deletion vendor/nim-eth
Loading