comet/reader.go at main · orbiterhq/comet · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
package comet

import (
	"container/list"
	"encoding/binary"
	"fmt"
	"os"
	"strings"
	"sync"
	"sync/atomic"
	"syscall"
	"time"

	"github.com/klauspost/compress/zstd"
)

// Removed ErrFileAwaitingData - no longer needed with immutable cloned indexes

// AtomicSlice provides atomic access to a byte slice using atomic.Value
type AtomicSlice struct {
	value atomic.Value // Stores []byte
}

// Load atomically loads the slice
func (a *AtomicSlice) Load() []byte {
	if v := a.value.Load(); v != nil {
		return v.([]byte)
	}
	return nil
}

// Store atomically stores a new slice
func (a *AtomicSlice) Store(data []byte) {
	a.value.Store(data)
}

// ReaderConfig configures the bounded reader behavior
type ReaderConfig struct {
	MaxMappedFiles  int   // Maximum number of files to keep mapped (default: 10)
	MaxMemoryBytes  int64 // Maximum memory to use for mapping (default: 1GB)
	CleanupInterval int   // How often to run cleanup in milliseconds (default: 5000)
}

// DefaultReaderConfig returns the default configuration for Reader
func DefaultReaderConfig() ReaderConfig {
	return ReaderConfig{
		MaxMappedFiles:  10,                     // Reasonable default
		MaxMemoryBytes:  2 * 1024 * 1024 * 1024, // 2GB default (fixed: was 2MB)
		CleanupInterval: 5000,                   // 5 second cleanup
	}
}

// ReaderConfigForStorage returns a reader configuration optimized for the given storage settings
func ReaderConfigForStorage(maxFileSize int64) ReaderConfig {
	// Start with defaults
	cfg := DefaultReaderConfig()

	// Validate maxFileSize
	if maxFileSize <= 0 {
		return cfg // Return defaults for invalid input
	}

	// Calculate optimal max mapped files based on memory and file size
	// Assume files are typically 80% full on average
	avgFileSize := (maxFileSize * 4) / 5
	if avgFileSize <= 0 {
		avgFileSize = maxFileSize // Prevent division issues
	}

	// How many average-sized files fit in our memory limit?
	filesInMemory := cfg.MaxMemoryBytes / avgFileSize

	// Set max mapped files to 2x what fits in memory (allows for variation in file sizes)
	// But keep it within reasonable bounds
	optimalMaxFiles := filesInMemory * 2
	if optimalMaxFiles < 10 {
		cfg.MaxMappedFiles = 10 // Minimum for good performance
	} else {
		cfg.MaxMappedFiles = int(optimalMaxFiles)
	}

	return cfg
}

// MappedFile represents a memory-mapped file with atomic data access
type MappedFile struct {
	FileInfo
	file     *os.File
	data     atomic.Value // Stores []byte, accessed atomically
	lastSize int64        // Last known file size for remapping
}

// recentFileCache implements an LRU cache for recently accessed files
type recentFileCache struct {
	mu       sync.RWMutex
	items    map[int]*list.Element
	order    *list.List
	capacity int
}

type cacheItem struct {
	fileIndex int
}

func newRecentFileCache(capacity int) *recentFileCache {
	if capacity <= 0 {
		capacity = 5 // Reasonable minimum
	}
	return &recentFileCache{
		items:    make(map[int]*list.Element),
		order:    list.New(),
		capacity: capacity,
	}
}

func (c *recentFileCache) access(fileIndex int) {
	c.mu.Lock()
	defer c.mu.Unlock()

	if elem, exists := c.items[fileIndex]; exists {
		// Move to front
		c.order.MoveToFront(elem)
		return
	}

	// Add new item
	if c.order.Len() >= c.capacity {
		// Remove oldest
		oldest := c.order.Back()
		if oldest != nil {
			oldItem := oldest.Value.(*cacheItem)
			delete(c.items, oldItem.fileIndex)
			c.order.Remove(oldest)
		}
	}

	item := &cacheItem{fileIndex: fileIndex}
	elem := c.order.PushFront(item)
	c.items[fileIndex] = elem
}

// contains checks if a file index is in the recent cache (protected from eviction)
func (c *recentFileCache) contains(fileIndex int) bool {
	c.mu.RLock()
	defer c.mu.RUnlock()
	_, exists := c.items[fileIndex]
	return exists
}

// Reader provides bounded memory-mapped read access to a shard with intelligent file management
type Reader struct {
	shardID              uint32
	index                *ShardIndex
	config               ReaderConfig
	fileInfos            []FileInfo          // All file metadata (not necessarily mapped)
	mappedFiles          map[int]*MappedFile // Currently mapped files
	mappingMu            sync.RWMutex        // Protects mappedFiles
	localMemory          int64               // Atomic counter for local memory usage
	recentCache          *recentFileCache
	decompressor         *zstd.Decoder
	bufferPool           *sync.Pool
	state                *CometState // Shared state for metrics (optional)
	lastKnownIndexUpdate int64       // Last known index update timestamp for cache invalidation
	client               *Client     // Reference to client for index refreshing (optional)
}

// NewReader creates a new bounded reader for a shard with smart file mapping
func NewReader(shardID uint32, index *ShardIndex, config ...ReaderConfig) (*Reader, error) {
	if index == nil {
		return nil, fmt.Errorf("index cannot be nil")
	}

	// Use provided config or default
	var cfg ReaderConfig
	if len(config) > 0 {
		cfg = config[0]
	} else {
		cfg = DefaultReaderConfig()
	}

	r := &Reader{
		shardID:     shardID,
		index:       index,
		config:      cfg,
		fileInfos:   make([]FileInfo, len(index.Files)),
		mappedFiles: make(map[int]*MappedFile),
		recentCache: newRecentFileCache(cfg.MaxMappedFiles / 2), // Half capacity for recent files
		bufferPool: &sync.Pool{
			New: func() any {
				return make([]byte, 0, 64*1024)
			},
		},
	}

	// Deep copy file infos to avoid data races
	copy(r.fileInfos, index.Files)

	// Create decompressor
	dec, err := zstd.NewReader(nil)
	if err != nil {
		return nil, fmt.Errorf("failed to create decompressor: %w", err)
	}
	r.decompressor = dec

	// In smart mapping mode, just map the most recent file to get started
	if len(r.fileInfos) > 0 {
		lastIndex := len(r.fileInfos) - 1
		mapped, err := r.mapFile(r.fileInfos[lastIndex])
		if err == nil {
			r.mappedFiles[lastIndex] = mapped
			// Note: Can't update ReaderMappedFiles here as state isn't set yet
		}
	}

	return r, nil
}

// SetState sets the shared state for metrics tracking
func (r *Reader) SetState(state *CometState) {
	r.state = state

	// Update current metrics now that we have state
	if state != nil {
		r.mappingMu.RLock()
		mappedCount := len(r.mappedFiles)
		r.mappingMu.RUnlock()

		atomic.StoreUint64(&state.ReaderMappedFiles, uint64(mappedCount))
		atomic.StoreUint64(&state.ReaderCacheBytes, uint64(atomic.LoadInt64(&r.localMemory)))
		// If we already have mapped files, update the file maps counter
		if mappedCount > 0 {
			atomic.StoreUint64(&state.ReaderFileMaps, uint64(mappedCount))
		}

		// Initialize the last known index update timestamp
		atomic.StoreInt64(&r.lastKnownIndexUpdate, state.GetLastIndexUpdate())
	}
}

// SetClient sets the client reference for index refreshing
func (r *Reader) SetClient(client *Client) {
	r.client = client
}

// refreshFromLiveIndex refreshes the reader's file info from the live shard index
func (r *Reader) refreshFromLiveIndex() error {
	if r.client == nil {
		return fmt.Errorf("no client reference available for index refresh")
	}

	// Get the current shard
	shard := r.client.getShard(r.shardID)
	if shard == nil {
		return fmt.Errorf("shard %d not found", r.shardID)
	}

	// Get a copy of the current files under read lock
	shard.mu.RLock()
	currentFiles := make([]FileInfo, len(shard.index.Files))
	copy(currentFiles, shard.index.Files)
	shard.mu.RUnlock()

	// Update our file info with the fresh data
	return r.UpdateFiles(&currentFiles)
}

// mapFile maps a single file into memory
func (r *Reader) mapFile(info FileInfo) (*MappedFile, error) {
	file, err := os.Open(info.Path)
	if err != nil {
		return nil, err
	}

	stat, err := file.Stat()
	if err != nil {
		file.Close()
		return nil, err
	}

	size := stat.Size()
	mappedFile := &MappedFile{
		FileInfo: info,
		file:     file,
		lastSize: size,
	}

	if size == 0 {
		// For empty files, we should check if this file is expected to have data
		// If it's in the index with entries > 0, this is likely a race condition
		// where the file was created but data hasn't been written yet
		mappedFile.data.Store([]byte{})
		return mappedFile, nil
	}

	// Check memory limits before mapping
	if atomic.LoadInt64(&r.localMemory)+size > r.config.MaxMemoryBytes {
		file.Close()
		return nil, fmt.Errorf("mapping would exceed memory limit: current=%d + new=%d > limit=%d",
			atomic.LoadInt64(&r.localMemory), size, r.config.MaxMemoryBytes)
	}

	// Memory map the file
	data, err := syscall.Mmap(int(file.Fd()), 0, int(size), syscall.PROT_READ, syscall.MAP_PRIVATE)
	if err != nil {
		file.Close()
		return nil, fmt.Errorf("mmap failed: %w", err)
	}

	// Store data atomically
	mappedFile.data.Store(data)

	// Update memory tracking
	atomic.AddInt64(&r.localMemory, size)

	// Update metrics
	if r.state != nil {
		atomic.AddUint64(&r.state.ReaderFileMaps, 1)
		atomic.StoreUint64(&r.state.ReaderCacheBytes, uint64(atomic.LoadInt64(&r.localMemory)))
		// Note: ReaderMappedFiles is updated by the caller after adding to mappedFiles map
	}

	return mappedFile, nil
}

// ReadEntryAtPosition reads a single entry at the given position
func (r *Reader) ReadEntryAtPosition(pos EntryPosition) ([]byte, error) {
	// Check if closed
	r.mappingMu.RLock()
	closed := r.decompressor == nil
	mapped, isMapped := r.mappedFiles[pos.FileIndex]
	validIndex := pos.FileIndex >= 0 && pos.FileIndex < len(r.fileInfos)
	r.mappingMu.RUnlock()

	if closed {
		return nil, fmt.Errorf("reader is closed")
	}

	if !validIndex {
		return nil, fmt.Errorf("file index %d out of range", pos.FileIndex)
	}

	if !isMapped {
		// In smart mapping mode, temporarily map the file if we have capacity
		if err := r.ensureFileMapped(pos.FileIndex); err != nil {
			return nil, fmt.Errorf("failed to map file for read: %w", err)
		}

		// Get the mapped file again
		r.mappingMu.RLock()
		mapped = r.mappedFiles[pos.FileIndex]
		r.mappingMu.RUnlock()
	}

	if mapped == nil {
		return nil, fmt.Errorf("file %d could not be mapped", pos.FileIndex)
	}

	// Check if file has grown since last mapping
	if err := r.checkAndRemapIfGrown(pos.FileIndex, mapped); err != nil {
		return nil, fmt.Errorf("failed to check file growth: %w", err)
	}

	// Update recent cache
	r.recentCache.access(pos.FileIndex)

	// Read from the mapped data
	data := mapped.data.Load().([]byte)

	// Handle empty files gracefully - this is a transient condition
	if len(data) == 0 {
		// Check if file has grown since mapping
		if mapped.file != nil {
			stat, err := mapped.file.Stat()
			if err == nil && stat.Size() > 0 {
				// File has grown - try remapping
				if err := r.checkAndRemapIfGrown(pos.FileIndex, mapped); err != nil {
					return nil, fmt.Errorf("failed to remap grown file: %w", err)
				}
				// Try again with the new mapping
				data = mapped.data.Load().([]byte)
			}
		}

		// If still empty, this is likely a new file awaiting its first flush
		if len(data) == 0 {
			return nil, fmt.Errorf("file %d is empty or not yet mapped", pos.FileIndex)
		}
	}

	// Try to read the entry
	result, err := r.readEntryFromFileData(data, pos.ByteOffset)
	if err != nil && strings.Contains(err.Error(), "mmap coherence issue") {
		// This is the specific mmap coherence race - force a remap
		// The file has grown on disk but our mmap view is stale
		// Use force=true to ensure we remap even if stat() shows same size
		if err := r.checkAndRemapIfGrownForce(pos.FileIndex, mapped, true); err != nil {
			return nil, fmt.Errorf("failed to remap after mmap coherence issue: %w", err)
		}

		// Get the new data and try once more
		data = mapped.data.Load().([]byte)
		return r.readEntryFromFileData(data, pos.ByteOffset)
	}

	return result, err
}

// checkAndRemapIfGrown checks if a file has grown and remaps it if necessary
func (r *Reader) checkAndRemapIfGrown(fileIndex int, mapped *MappedFile) error {
	return r.checkAndRemapIfGrownForce(fileIndex, mapped, false)
}

// checkAndRemapIfGrownForce checks if a file has grown and remaps it if necessary
// If force is true, it always remaps regardless of size checks
func (r *Reader) checkAndRemapIfGrownForce(fileIndex int, mapped *MappedFile, force bool) error {
	if mapped.file == nil {
		return nil // Can't check growth without file handle
	}

	// Get current file size
	stat, err := mapped.file.Stat()
	if err != nil {
		return err
	}

	currentSize := stat.Size()

	// Check if we need to remap:
	// 1. File has grown
	// 2. Current mapping is empty but file now has data
	// 3. Force is true (mmap coherence issue detected)
	currentData := mapped.data.Load().([]byte)
	needsRemap := force || currentSize > mapped.lastSize || (len(currentData) == 0 && currentSize > 0)

	// If file has grown or was empty but now has data, we need to remap it
	if needsRemap {
		r.mappingMu.Lock()
		defer r.mappingMu.Unlock()

		// Double-check under lock
		currentData = mapped.data.Load().([]byte)
		needsRemap = force || currentSize > mapped.lastSize || (len(currentData) == 0 && currentSize > 0)
		if needsRemap {
			// Unmap old mapping
			if data := mapped.data.Load(); data != nil {
				if dataBytes, ok := data.([]byte); ok && len(dataBytes) > 0 {
					if err := syscall.Munmap(dataBytes); err != nil {
						return fmt.Errorf("failed to unmap old data: %w", err)
					}
					// Update memory tracking
					atomic.AddInt64(&r.localMemory, -int64(len(dataBytes)))
				}
			}

			// If force is true, we need to close and reopen the file to get a fresh view
			if force && mapped.file != nil {
				oldSize := mapped.lastSize

				// Close the old file
				mapped.file.Close()

				// Reopen the file
				newFile, err := os.Open(mapped.FileInfo.Path)
				if err != nil {
					return fmt.Errorf("failed to reopen file for forced remap: %w", err)
				}
				mapped.file = newFile

				// Re-stat to get the actual current size
				stat, err = newFile.Stat()
				if err != nil {
					return fmt.Errorf("failed to stat reopened file: %w", err)
				}
				currentSize = stat.Size()

				// If the size still hasn't changed, we have a real coherence issue
				if currentSize == oldSize && currentSize == int64(len(currentData)) {
					// The OS is still showing the old size even with a fresh file handle
					// This can happen due to OS-level caching. Try a small delay.
					time.Sleep(10 * time.Millisecond)

					// Check size again
					if stat2, err := newFile.Stat(); err == nil {
						currentSize = stat2.Size()
					}
				}
			}

			// Check memory limits for new mapping
			newSize := currentSize
			if atomic.LoadInt64(&r.localMemory)+newSize > r.config.MaxMemoryBytes {
				// Try to evict something first
				if err := r.evictOldestFile(); err != nil {
					return fmt.Errorf("cannot remap file: would exceed memory limit and cannot evict: %w", err)
				}
			}

			// Create new mapping
			if currentSize == 0 {
				// For empty files, store an empty byte slice
				mapped.data.Store([]byte{})
				mapped.lastSize = 0
				return nil
			}

			// Memory map the grown file
			data, err := syscall.Mmap(int(mapped.file.Fd()), 0, int(currentSize), syscall.PROT_READ, syscall.MAP_PRIVATE)
			if err != nil {
				return fmt.Errorf("failed to remap grown file: %w", err)
			}

			// Store new data atomically
			mapped.data.Store(data)
			mapped.lastSize = currentSize

			// Update memory tracking
			atomic.AddInt64(&r.localMemory, newSize)

			// Update metrics
			if r.state != nil {
				atomic.AddUint64(&r.state.ReaderFileRemaps, 1)
				atomic.StoreUint64(&r.state.ReaderCacheBytes, uint64(atomic.LoadInt64(&r.localMemory)))
			}
		}
	}

	return nil
}

// ensureFileMapped ensures a file is mapped, respecting memory limits
func (r *Reader) ensureFileMapped(fileIndex int) error {
	if fileIndex < 0 || fileIndex >= len(r.fileInfos) {
		return fmt.Errorf("file index %d out of range", fileIndex)
	}

	r.mappingMu.Lock()
	defer r.mappingMu.Unlock()

	// Check if already mapped
	if _, exists := r.mappedFiles[fileIndex]; exists {
		return nil
	}

	// Check if we have room for another file
	if len(r.mappedFiles) >= r.config.MaxMappedFiles {
		// Need to evict something
		if err := r.evictOldestFile(); err != nil {
			return fmt.Errorf("failed to evict file for mapping: %w", err)
		}
	}

	// Check memory limits
	if r.config.MaxMemoryBytes <= 0 {
		return fmt.Errorf("memory limit too restrictive: %d bytes", r.config.MaxMemoryBytes)
	}

	if atomic.LoadInt64(&r.localMemory) >= r.config.MaxMemoryBytes {
		// Try to free some memory
		if err := r.evictOldestFile(); err != nil {
			return fmt.Errorf("at memory limit and cannot evict: %w", err)
		}
	}

	// Map the file
	if fileIndex >= len(r.fileInfos) {
		return fmt.Errorf("file index %d out of range", fileIndex)
	}

	mapped, err := r.mapFile(r.fileInfos[fileIndex])
	if err != nil {
		return err
	}

	r.mappedFiles[fileIndex] = mapped

	// Update mapped files count metric
	if r.state != nil {
		atomic.StoreUint64(&r.state.ReaderMappedFiles, uint64(len(r.mappedFiles)))
	}

	return nil
}

// evictOldestFile evicts the oldest non-recent file
func (r *Reader) evictOldestFile() error {
	// Find a file to evict (not in recent cache)
	for fileIndex, mapped := range r.mappedFiles {
		if !r.recentCache.contains(fileIndex) {
			// Evict this file
			delete(r.mappedFiles, fileIndex)

			// Unmap the file
			if data := mapped.data.Load(); data != nil {
				if dataBytes, ok := data.([]byte); ok && len(dataBytes) > 0 {
					if err := syscall.Munmap(dataBytes); err != nil {
						return fmt.Errorf("failed to munmap file %d: %w", fileIndex, err)
					}
					// Update memory tracking
					atomic.AddInt64(&r.localMemory, -int64(len(dataBytes)))
				}
			}

			// Close file descriptor
			if mapped.file != nil {
				mapped.file.Close()
			}

			// Update metrics
			if r.state != nil {
				atomic.AddUint64(&r.state.ReaderFileUnmaps, 1)
				atomic.AddUint64(&r.state.ReaderCacheEvicts, 1)
				atomic.StoreUint64(&r.state.ReaderCacheBytes, uint64(atomic.LoadInt64(&r.localMemory)))
				atomic.StoreUint64(&r.state.ReaderMappedFiles, uint64(len(r.mappedFiles)))
			}

			return nil
		}
	}

	return fmt.Errorf("no files to evict")
}

// readEntryFromFileData reads a single entry from memory-mapped data at a byte offset
func (r *Reader) readEntryFromFileData(data []byte, byteOffset int64) ([]byte, error) {
	if len(data) == 0 {
		// This can happen when a file has been created but not yet written to
		// It's a temporary condition that should resolve once the writer flushes
		return nil, fmt.Errorf("file is empty (likely awaiting flush)")
	}

	if byteOffset < 0 {
		return nil, fmt.Errorf("invalid byte offset: %d", byteOffset)
	}

	headerSize := int64(12)
	fileSize := int64(len(data))

	// Check for overflow and bounds
	if byteOffset > fileSize || byteOffset > fileSize-headerSize {
		// This can happen when the memory-mapped view is stale even after fsync
		// The index shows an entry exists and it's been synced to disk, but the
		// mmap view hasn't caught up yet. This is a known OS-level race condition.
		return nil, fmt.Errorf("offset %d beyond file size %d (mmap coherence issue)", byteOffset, fileSize)
	}

	// Special case: if offset equals file size, the entry hasn't been written yet
	if byteOffset == fileSize {
		// The index claims this entry exists, but it's exactly at EOF
		// This means the entry is about to be written but isn't there yet
		return nil, fmt.Errorf("offset %d at EOF of file size %d (mmap coherence issue)", byteOffset, fileSize)
	}

	// Read header
	header := data[byteOffset : byteOffset+headerSize]
	length := binary.LittleEndian.Uint32(header[0:4])

	// Check bounds
	dataStart := byteOffset + headerSize
	dataEnd := dataStart + int64(length)
	if dataEnd > int64(len(data)) {
		return nil, fmt.Errorf("entry data extends beyond file: dataEnd=%d, fileSize=%d", dataEnd, len(data))
	}

	// Extract entry data
	entryData := data[dataStart:dataEnd]

	// Check if data is compressed by looking for zstd magic bytes
	// zstd magic number is 0xFD2FB528 (little-endian: 28 B5 2F FD)
	if len(entryData) >= 4 &&
		entryData[0] == 0x28 && entryData[1] == 0xB5 &&
		entryData[2] == 0x2F && entryData[3] == 0xFD {
		// Data is compressed with zstd, decompress it

		// Get buffer from pool
		buf := r.bufferPool.Get().([]byte)
		//lint:ignore SA6002 Buffer pool pattern - resetting slice length is intentional
		defer r.bufferPool.Put(buf[:0])

		// Decompress the entire data (no compression flag prefix)
		decompressed, err := r.decompressor.DecodeAll(entryData, buf)
		if err != nil {
			return nil, fmt.Errorf("decompression failed: %w", err)
		}

		// Return a copy since we're returning the buffer to the pool
		result := make([]byte, len(decompressed))
		copy(result, decompressed)
		return result, nil
	}

	// Data is not compressed - return a copy to avoid segfaults
	// when the memory-mapped region is unmapped
	result := make([]byte, len(entryData))
	copy(result, entryData)
	return result, nil
}

// UpdateFiles updates the reader with new file information
func (r *Reader) UpdateFiles(newFiles *[]FileInfo) error {
	r.mappingMu.Lock()
	defer r.mappingMu.Unlock()

	// Update file infos
	r.fileInfos = make([]FileInfo, len(*newFiles))
	copy(r.fileInfos, *newFiles)

	// Optionally re-map recent files if they still exist
	if len(*newFiles) > 0 {
		// Try to map the most recent file
		lastIndex := len(*newFiles) - 1
		if _, exists := r.mappedFiles[lastIndex]; !exists {
			if mapped, err := r.mapFile((*newFiles)[lastIndex]); err == nil {
				r.mappedFiles[lastIndex] = mapped
			}
		}
	}

	return nil
}

// Close unmaps all files and cleans up resources
func (r *Reader) Close() error {
	r.mappingMu.Lock()
	defer r.mappingMu.Unlock()

	// Track how many files we're unmapping
	unmapCount := len(r.mappedFiles)

	var firstErr error
	for _, mapped := range r.mappedFiles {
		// Unmap if mapped
		if data := mapped.data.Load(); data != nil {
			if dataBytes, ok := data.([]byte); ok && len(dataBytes) > 0 {
				if err := syscall.Munmap(dataBytes); err != nil && firstErr == nil {
					firstErr = err
				}
			}
		}

		// Close file
		if mapped.file != nil {
			if err := mapped.file.Close(); err != nil && firstErr == nil {
				firstErr = err
			}
		}
	}

	r.mappedFiles = make(map[int]*MappedFile)

	// Reset memory tracking
	atomic.StoreInt64(&r.localMemory, 0)

	// Update metrics for files unmapped during close
	if r.state != nil && unmapCount > 0 {
		atomic.AddUint64(&r.state.ReaderFileUnmaps, uint64(unmapCount))
		atomic.StoreUint64(&r.state.ReaderCacheBytes, 0)
		atomic.StoreUint64(&r.state.ReaderMappedFiles, 0)
	}

	// Close decompressor
	if r.decompressor != nil {
		r.decompressor.Close()
		r.decompressor = nil
	}

	return firstErr
}

// GetMemoryUsage returns current memory usage statistics
func (r *Reader) GetMemoryUsage() (int64, int) {
	r.mappingMu.RLock()
	defer r.mappingMu.RUnlock()

	return atomic.LoadInt64(&r.localMemory), len(r.mappedFiles)
}

// ReadEntryByNumber reads an entry by its sequential entry number, using the file metadata to locate it
func (r *Reader) ReadEntryByNumber(entryNumber int64) ([]byte, error) {
	// First try with cached file infos
	for fileIndex, fileInfo := range r.fileInfos {
		// Check if this entry falls within this file's range
		if fileInfo.StartEntry <= entryNumber && entryNumber < fileInfo.StartEntry+fileInfo.Entries {
			// Found the file - now we need to find the exact position within the file
			relativeEntryNum := entryNumber - fileInfo.StartEntry

			if relativeEntryNum == 0 {
				// First entry in file - starts at offset 0
				pos := EntryPosition{FileIndex: fileIndex, ByteOffset: 0}
				data, err := r.ReadEntryAtPosition(pos)
				if err != nil && r.state != nil {
					// Check if we should refresh on error
					currentIndexUpdate := r.state.GetLastIndexUpdate()
					lastKnown := atomic.LoadInt64(&r.lastKnownIndexUpdate)
					if currentIndexUpdate > lastKnown {
						// Index has been updated, refresh and retry
						if refreshErr := r.refreshFromLiveIndex(); refreshErr == nil {
							atomic.StoreInt64(&r.lastKnownIndexUpdate, currentIndexUpdate)
							return r.ReadEntryByNumber(entryNumber)
						}
					}
				}
				return data, err
			}

			// For non-first entries, we need to scan from the beginning
			// This should rarely happen if the binary index is properly maintained
			data, err := r.readEntryByScanning(fileIndex, relativeEntryNum)
			if err != nil && r.state != nil {
				// Check if we should refresh on error
				currentIndexUpdate := r.state.GetLastIndexUpdate()
				lastKnown := atomic.LoadInt64(&r.lastKnownIndexUpdate)
				if currentIndexUpdate > lastKnown {
					// Index has been updated, refresh and retry
					if refreshErr := r.refreshFromLiveIndex(); refreshErr == nil {
						atomic.StoreInt64(&r.lastKnownIndexUpdate, currentIndexUpdate)
						return r.ReadEntryByNumber(entryNumber)
					}
				}
			}
			return data, err
		}
	}

	// Entry not found in cached files - check if index has been updated using LastIndexUpdate
	if r.state != nil {
		currentIndexUpdate := r.state.GetLastIndexUpdate()
		lastKnown := atomic.LoadInt64(&r.lastKnownIndexUpdate)

		if currentIndexUpdate > lastKnown {
			// Reader index is stale - try to refresh from the live index

			if err := r.refreshFromLiveIndex(); err != nil {
				return nil, fmt.Errorf("reader index is stale and refresh failed: %w", err)
			}

			// Update our known index timestamp
			atomic.StoreInt64(&r.lastKnownIndexUpdate, currentIndexUpdate)

			// Try reading again with refreshed index
			return r.ReadEntryByNumber(entryNumber)
		}
	}

	// Entry not found in any file - this could be because:
	// 1. The entry doesn't exist yet (reading ahead of writes)
	// 2. The entry was in a file that got deleted by retention

	// If we're looking for entry 0 and have files, but none contain it,
	// it might be in a newly created file that hasn't been written to yet
	if entryNumber == 0 && len(r.fileInfos) > 0 {
		// Try the first file even if it claims not to have entry 0
		firstFile := r.fileInfos[0]
		if firstFile.Entries == 0 && firstFile.StartEntry > 0 {
			// This is likely a new file that was just created
			// Try reading from it anyway
			pos := EntryPosition{FileIndex: 0, ByteOffset: 0}
			data, err := r.ReadEntryAtPosition(pos)
			if err == nil {
				return data, nil
			}

		}
	}

	return nil, fmt.Errorf("entry %d not found in any file", entryNumber)
}

// readEntryByScanning scans through a file to find the Nth entry (0-indexed within file)
func (r *Reader) readEntryByScanning(fileIndex int, relativeEntryNum int64) ([]byte, error) {
	// Ensure file is mapped
	if err := r.ensureFileMapped(fileIndex); err != nil {
		return nil, err
	}

	r.mappingMu.RLock()
	mapped, exists := r.mappedFiles[fileIndex]
	r.mappingMu.RUnlock()

	if !exists || mapped == nil {
		return nil, fmt.Errorf("file %d not mapped", fileIndex)
	}

	// Check if file has grown since last mapping
	if err := r.checkAndRemapIfGrown(fileIndex, mapped); err != nil {
		return nil, fmt.Errorf("failed to check file growth: %w", err)
	}

	// Reload data after potential remapping
	data := mapped.data.Load().([]byte)
	if len(data) == 0 {
		return nil, fmt.Errorf("file %d is empty", fileIndex)
	}

	// Scan through entries to find the target
	currentOffset := int64(0)
	for entryIdx := int64(0); entryIdx < relativeEntryNum; entryIdx++ {
		// Read entry header to get length
		if currentOffset+12 > int64(len(data)) {
			// The index claims there are more entries than we have data for
			return nil, fmt.Errorf("file %d: insufficient data for entry %d (file too short)", fileIndex, relativeEntryNum)
		}

		// Extract length from header (first 4 bytes)
		length := binary.LittleEndian.Uint32(data[currentOffset : currentOffset+4])

		// Sanity check the length
		if length > 10*1024*1024 { // 10MB max entry size
			return nil, fmt.Errorf("file %d: invalid entry length %d at offset %d", fileIndex, length, currentOffset)
		}

		// Skip to next entry: header (12 bytes) + data length
		entrySize := int64(12 + length)
		nextOffset := currentOffset + entrySize

		// Safety check
		if nextOffset > int64(len(data)) {
			return nil, fmt.Errorf("file %d: entry at offset %d extends beyond file (need %d, have %d)", fileIndex, currentOffset, nextOffset, len(data))
		}

		currentOffset = nextOffset
	}

	// Now read the target entry at currentOffset
	result, err := r.readEntryFromFileData(data, currentOffset)
	if err != nil && strings.Contains(err.Error(), "mmap coherence issue") {
		// Mmap coherence issue in scanning - force a remap
		if err := r.checkAndRemapIfGrownForce(fileIndex, mapped, true); err != nil {
			return nil, fmt.Errorf("failed to remap after mmap coherence issue in scanning: %w", err)
		}

		// Get the new data and try once more
		data = mapped.data.Load().([]byte)
		return r.readEntryFromFileData(data, currentOffset)
	}

	return result, err
}