Skip to content

Commit a1a76ef

Browse files
committed
Add missing percentage.
Fix edge case when SQ uses single supernode to get sidecars. Adopt test.
1 parent 8810313 commit a1a76ef

3 files changed

Lines changed: 78 additions & 38 deletions

File tree

beacon_chain/sync/sync_overseer2.nim

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,7 @@ template blobsCount(blck: ForkedSignedBeaconBlock): int =
101101
func slimLog(blck: ref ForkedSignedBeaconBlock): string =
102102
"(" & $blck.kind & ",slot:" & $blck[].slot() &
103103
",root:" & shortLog(blck[].root()) &
104-
",parent_root:" & shortLog(blck[].parent_root()) &
105-
",blobs_count:" & $blck[].blobsCount() & ")"
104+
",parent_root:" & shortLog(blck[].parent_root()) & ")"
106105

107106
func slimLog(blocks: openArray[ref ForkedSignedBeaconBlock]): string =
108107
"[" & blocks.mapIt(slimLog(it)).join(",") & "]"
@@ -206,22 +205,34 @@ func getColumnsFillRate(
206205
func getMissingColumnsLog(
207206
overseer: SyncOverseerRef2,
208207
blocks: openArray[ref ForkedSignedBeaconBlock]
209-
): string =
210-
var res: seq[string]
208+
): (string, string) =
209+
var
210+
res: seq[string]
211+
missingCount = 0.0
212+
totalCount = 0.0
213+
214+
let blocksColumnsCount = float(len(overseer.columnQuarantine[].custodyMap))
215+
211216
for blck in blocks:
212217
withBlck(blck[]):
213218
when consensusFork == ConsensusFork.Fulu:
214-
res.add(
215-
if len(forkyBlck.message.body.blob_kzg_commitments) == 0:
216-
shortLog(forkyBlck.root) & ":[]"
217-
else:
218-
let map =
219-
overseer.columnQuarantine[].getMissingColumnsMap(forkyBlck.root)
220-
shortLog(forkyBlck.root) & ":" & $map
221-
)
219+
if len(forkyBlck.message.body.blob_kzg_commitments) > 0:
220+
let map =
221+
overseer.columnQuarantine[].getMissingColumnsMap(forkyBlck.root)
222+
res.add(shortLog(forkyBlck.root) & ":" & $map)
223+
missingCount += float(len(map))
224+
totalCount += blocksColumnsCount
222225
else:
223226
raiseAssert "Unsupported fork"
224-
"[" & res.join(",") & "]"
227+
228+
let missing =
229+
if totalCount > 0.0:
230+
((missingCount * 100.0) / totalCount).formatBiggestFloat(ffDecimal, 2) &
231+
"%"
232+
else:
233+
"0.00%"
234+
235+
(missing, "[" & res.join(",") & "]")
225236

226237
func getLastSeenHeadLog(
227238
overseer: SyncOverseerRef2
@@ -1986,12 +1997,14 @@ proc doRangeSidecarsStep(
19861997
# blocks.
19871998
(false, false)
19881999

1989-
let missingLog = overseer.getMissingColumnsLog(blocks)
2000+
let (missingCount, missingLog) =
2001+
overseer.getMissingColumnsLog(blocks)
19902002

19912003
debug "Peer columns compatibility",
19922004
custody_map = shortLog(custodyMap),
19932005
peer_map = shortLog(peerMap),
19942006
intersect_map = shortLog(intersectMap),
2007+
missing_count = missingCount,
19952008
missing_log = missingLog
19962009

19972010
if (len(blocks) > 0) and (columnsNeeded and not(columnsHave)):
@@ -2000,6 +2013,7 @@ proc doRangeSidecarsStep(
20002013
custody_map = shortLog(custodyMap),
20012014
peer_map = shortLog(peerMap),
20022015
intersect_map = shortLog(intersectMap),
2016+
missing_count = missingCount,
20032017
missing_log = missingLog
20042018
overseer.ssqueue(direction).push(request)
20052019
return true
@@ -2022,6 +2036,7 @@ proc doRangeSidecarsStep(
20222036
columns_map = getShortMap(request, intersectMap, data.toSeq()),
20232037
peer_map = shortLog(peerMap),
20242038
intersection_map = shortLog(intersectMap),
2039+
missing_count = missingCount,
20252040
columns = slimLog(data.asSeq()),
20262041
missing_log = missingLog
20272042

@@ -2064,6 +2079,7 @@ proc doRangeSidecarsStep(
20642079
custody_map = shortLog(custodyMap),
20652080
peer_map = shortLog(peerMap),
20662081
intersect_map = shortLog(intersectMap),
2082+
missing_count = missingCount,
20672083
missing_log = missingLog,
20682084
columns_needed = columnsNeeded, columns_have = columnsHave
20692085

beacon_chain/sync/sync_queue.nim

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77

88
{.push raises: [].}
99

10-
import std/[deques, heapqueue, tables, strutils, sequtils, math, typetraits]
10+
import std/[
11+
deques, heapqueue, sets, tables, strutils, sequtils, math, typetraits]
1112
import stew/base10, chronos, chronicles, results
1213
import
1314
../spec/[helpers, forks, column_map],
@@ -53,6 +54,7 @@ type
5354

5455
ColumnCompleteness* = object
5556
map: ColumnMap
57+
keys: HashSet[string]
5658
done: bool
5759

5860
SyncRequest*[T] = object
@@ -334,6 +336,7 @@ func isComplete[M, N](
334336
peer: M,
335337
criteria: var N
336338
): bool =
339+
mixin getKey
337340
if criteria.done:
338341
return true
339342
when N is BlockCompleteness:
@@ -343,6 +346,10 @@ func isComplete[M, N](
343346
# If criteria's map is empty, it means that we do not need columns for
344347
# the range.
345348
return true
349+
if $(peer.getKey()) in criteria.keys:
350+
# Columns was already downloaded from this peer for the range, there is no
351+
# reason to request it one more time.
352+
return true
346353
# If the peer has columns that we are still missing, we should return
347354
# `false`, so that peer will get request for that range, but if the peer
348355
# does not have the columns we need, we should return `true`.
@@ -377,13 +384,20 @@ func fillCompleteness[M](
377384
peer: M,
378385
missingMap: Opt[ColumnMap],
379386
done: bool,
387+
storePeer: bool,
380388
criteria: var ColumnCompleteness
381389
) =
390+
mixin getKey
391+
382392
if done:
383393
criteria.done = true
384394
criteria.map = ColumnMap()
385395
return
386396

397+
if storePeer:
398+
let key = peer.getKey()
399+
criteria.keys.incl($key)
400+
387401
if missingMap.isSome():
388402
criteria.map = missingMap.get()
389403

@@ -1028,7 +1042,7 @@ proc push*[M, N](sq: SyncQueue[M, N], requests: openArray[SyncRequest[M]]) =
10281042
elif N is ColumnCompleteness:
10291043
sq.fillCompleteness(
10301044
sq.requests[pos.qindex].data, request.item, Opt.none(ColumnMap),
1031-
done = false, sq.requests[pos.qindex].completeness)
1045+
done = false, storePeer = false, sq.requests[pos.qindex].completeness)
10321046
sq.del(pos)
10331047

10341048
proc push*[M, N](sq: SyncQueue[M, N], sr: SyncRequest[M]) =
@@ -1141,7 +1155,7 @@ proc push*[M, N](
11411155
return SyncPushResponse(
11421156
code: SyncProcessError.NoRelevant, count: 0'i64)
11431157

1144-
template fillCompleteness(pdone, pblck: untyped) =
1158+
template fillCompleteness(pdone, pblck, pstore: untyped) =
11451159
when N is BlockCompleteness:
11461160
sq.fillCompleteness(
11471161
sq.requests[position.qindex].data, sr.item, done = pdone,
@@ -1151,11 +1165,13 @@ proc push*[M, N](
11511165
let map = sq.getMissingMap(data, pblck.get().root)
11521166
sq.fillCompleteness(
11531167
sq.requests[position.qindex].data, sr.item, Opt.some(map),
1154-
done = pdone, sq.requests[position.qindex].completeness)
1168+
done = pdone, storePeer = pstore,
1169+
sq.requests[position.qindex].completeness)
11551170
else:
11561171
sq.fillCompleteness(
11571172
sq.requests[position.qindex].data, sr.item, Opt.none(ColumnMap),
1158-
done = pdone, sq.requests[position.qindex].completeness)
1173+
done = pdone, storePeer = pstore,
1174+
sq.requests[position.qindex].completeness)
11591175

11601176
# This is backpressure handling algorithm, this algorithm is blocking
11611177
# all pending `push` requests if `request` is not in range.
@@ -1234,18 +1250,18 @@ proc push*[M, N](
12341250
# peers returns empty response for the same range.
12351251
if sq.requests[position.qindex].voidsCount >= sq.requestsCount:
12361252
when N is BlockCompleteness:
1237-
fillCompleteness(true, Opt.none(BlockId))
1253+
fillCompleteness(true, Opt.none(BlockId), false)
12381254
sq.advanceQueue(res)
12391255
elif N is ColumnCompleteness:
12401256
let localMap = sq.cbGetLocalColumnMap()
12411257
# If completeness map was changed it proves that specific range is
12421258
# not actually empty and we should not move forward.
12431259
if sq.requests[position.qindex].completeness.map != localMap:
1244-
fillCompleteness(false, Opt.none(BlockId))
1260+
fillCompleteness(false, Opt.none(BlockId), false)
12451261
else:
1246-
fillCompleteness(true, Opt.none(BlockId))
1262+
fillCompleteness(true, Opt.none(BlockId), false)
12471263
else:
1248-
fillCompleteness(false, Opt.none(BlockId))
1264+
fillCompleteness(false, Opt.none(BlockId), false)
12491265

12501266
of SyncProcessError.Duplicate:
12511267
# Duplicate responses does not affect failures count
@@ -1260,7 +1276,7 @@ proc push*[M, N](
12601276
topics = "sync"
12611277

12621278
sq.gapList.reset()
1263-
fillCompleteness(true, Opt.none(BlockId))
1279+
fillCompleteness(true, Opt.none(BlockId), false)
12641280
sq.advanceQueue(res)
12651281

12661282
of SyncProcessError.MissingSidecars:
@@ -1275,7 +1291,7 @@ proc push*[M, N](
12751291
topics = "sync"
12761292

12771293
inc(sq.requests[position.qindex].failuresCount)
1278-
fillCompleteness(false, pres.blck)
1294+
fillCompleteness(false, pres.blck, true)
12791295
sq.del(position)
12801296
res = 0'i64
12811297

@@ -1292,7 +1308,7 @@ proc push*[M, N](
12921308
topics = "sync"
12931309

12941310
inc(sq.requests[position.qindex].failuresCount)
1295-
fillCompleteness(false, pres.blck)
1311+
fillCompleteness(false, pres.blck, false)
12961312
sq.del(position)
12971313
res = 0'i64
12981314

@@ -1310,7 +1326,7 @@ proc push*[M, N](
13101326

13111327
sr.item.updateScore(PeerScoreUnviableFork)
13121328
inc(sq.requests[position.qindex].failuresCount)
1313-
fillCompleteness(false, pres.blck)
1329+
fillCompleteness(false, pres.blck, false)
13141330
sq.del(position)
13151331
res = 0'i64
13161332

@@ -1331,7 +1347,7 @@ proc push*[M, N](
13311347
sq.rewardForGaps(PeerScoreMissingValues)
13321348
sq.gapList.reset()
13331349
inc(sq.requests[position.qindex].failuresCount)
1334-
fillCompleteness(false, pres.blck)
1350+
fillCompleteness(false, pres.blck, false)
13351351
sq.del(position)
13361352
res = 0'i64
13371353

@@ -1351,7 +1367,7 @@ proc push*[M, N](
13511367
topics = "sync"
13521368

13531369
sr.item.updateScore(PeerScoreMissingValues)
1354-
fillCompleteness(false, pres.blck)
1370+
fillCompleteness(false, pres.blck, false)
13551371
sq.del(position)
13561372
res = 0'i64
13571373

@@ -1364,7 +1380,7 @@ proc push*[M, N](
13641380
if sr.hasEndGap(data):
13651381
sq.gapList.add(GapItem.init(sr))
13661382

1367-
fillCompleteness(true, Opt.none(BlockId))
1383+
fillCompleteness(true, Opt.none(BlockId), false)
13681384
sq.advanceQueue(res)
13691385
of SyncProcessError.NoRelevant:
13701386
raiseAssert "Processor should not return this error code"
@@ -1392,7 +1408,7 @@ proc push*[M, N](
13921408
except CancelledError as exc:
13931409
let pos = sq.find(sr)
13941410
if pos.isSome():
1395-
fillCompleteness(false, Opt.none(BlockId))
1411+
fillCompleteness(false, Opt.none(BlockId), false)
13961412
sq.del(pos.get())
13971413
raise exc
13981414
finally:

tests/test_sync_manager.nim

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
import std/sequtils
1212
import unittest2
13-
import chronos, stew/base10, chronos/unittest2/asynctests
13+
import chronos, stew/base10, chronos/unittest2/asynctests, libp2p/peerid
1414
import ../beacon_chain/networking/peer_scores
1515
import ../beacon_chain/gossip_processing/block_processor,
1616
../beacon_chain/sync/[sync_queue, response_utils],
@@ -19,6 +19,7 @@ import ../beacon_chain/gossip_processing/block_processor,
1919
type
2020
SomeTPeer = ref object
2121
id: string
22+
peerId: PeerId
2223
score: int
2324
map: ColumnMap
2425

@@ -27,8 +28,8 @@ type
2728
func init(t: typedesc[SomeTPeer], id: string, score = 1000): SomeTPeer =
2829
SomeTPeer(id: id, score: score)
2930

30-
func init(t: typedesc[SomeTPeer], id: string, map: ColumnMap): SomeTPeer =
31-
SomeTPeer(id: id, map: map)
31+
proc init(t: typedesc[SomeTPeer], id: string, map: ColumnMap): SomeTPeer =
32+
SomeTPeer(id: id, map: map, peerId: PeerId.random().get())
3233

3334
func init(t: typedesc[ColumnMap], columns: openArray[int]): ColumnMap =
3435
var res = columns.mapIt(ColumnIndex(it))
@@ -37,6 +38,9 @@ func init(t: typedesc[ColumnMap], columns: openArray[int]): ColumnMap =
3738
func `$`(peer: SomeTPeer): string =
3839
"peer#" & peer.id
3940

41+
func getKey*(peer: SomeTPeer): PeerId =
42+
peer.peerId
43+
4044
template shortLog(peer: SomeTPeer): string =
4145
$peer
4246

@@ -1613,6 +1617,8 @@ suite "SyncManager test suite":
16131617
peer8 = SomeTPeer.init("8", getLocalMap())
16141618
peer9 = SomeTPeer.init("9", getLocalMap())
16151619
peer10 = SomeTPeer.init("10", getLocalMap())
1620+
peer11 = SomeTPeer.init("11", getLocalMap())
1621+
peer12 = SomeTPeer.init("12", getLocalMap())
16161622

16171623
let
16181624
r1 = sq.pop(Slot(127), peer1)
@@ -1676,7 +1682,7 @@ suite "SyncManager test suite":
16761682
check p8.code == SyncProcessError.MissingSidecars
16771683

16781684
let
1679-
r9 = sq.pop(Slot(127), peer8)
1685+
r9 = sq.pop(Slot(127), peer9)
16801686
(d9, c9) = createFuluChain(r9, r9.item.map)
16811687

16821688
var columns9 =
@@ -1690,17 +1696,18 @@ suite "SyncManager test suite":
16901696

16911697
# Finish this range with full 32 blocks and columns
16921698
let
1693-
r10 = sq.pop(Slot(127), peer8)
1699+
r10 = sq.pop(Slot(127), peer10)
16941700
(d10, c10) = createFuluChain(r10, r10.item.map)
16951701

16961702
let p10 = await sq.push(r10, d10, c10)
16971703
check p10.code == SyncProcessError.NoError
16981704

16991705
let
1700-
r11 = sq.pop(Slot(127), peer9)
1701-
r12 = sq.pop(Slot(127), peer10)
1706+
r11 = sq.pop(Slot(127), peer11)
1707+
r12 = sq.pop(Slot(127), peer12)
17021708
r13 = sq.pop(Slot(127), peer8)
17031709
r14 = sq.pop(Slot(127), peer9)
1710+
r15 = sq.pop(Slot(127), peer10)
17041711
(d11, c11) = createFuluChain(r11, r11.item.map)
17051712
(d12, c12) = createFuluChain(r12, r12.item.map)
17061713

@@ -1709,6 +1716,7 @@ suite "SyncManager test suite":
17091716
r12.isEmpty() == false
17101717
r13.isEmpty() == true
17111718
r14.isEmpty() == true
1719+
r15.isEmpty() == true
17121720

17131721
let p11 = await sq.push(r11, d11, c11)
17141722
check p11.code == SyncProcessError.NoError

0 commit comments

Comments
 (0)