@@ -1199,3 +1199,225 @@ def test_mixed_frequency_and_position(self):
11991199 # (5,6) appears once -> highest ID
12001200 assert id_56 > id_12
12011201 assert id_56 > id_34
1202+
1203+
1204+ class TestPaletteEncoding :
1205+ """Test palette encoding optimization."""
1206+
1207+ def test_palette_generated_for_outlier (self ):
1208+ """Palette solution should be generated when there's an outlier."""
1209+ data = [1 , 2 , 3 , 2 , 3 , 2 , 1 , 0 , 2 , 1 , 2 , 2 , 3 , 3 , 1 , 11110124 ]
1210+ solutions = pack_table (data , default = 0 , compression = None )
1211+
1212+ # Should have both direct and palette solutions
1213+ assert len (solutions ) >= 2
1214+
1215+ # Check that one is a palette solution
1216+ palette_solutions = [s for s in solutions if hasattr (s , 'palette' ) and isinstance (s .palette , list )]
1217+ assert len (palette_solutions ) >= 1
1218+
1219+ # Verify palette structure
1220+ pal_sol = palette_solutions [0 ]
1221+ assert pal_sol .palette == [0 , 1 , 2 , 3 , 11110124 ]
1222+ assert pal_sol .nLookups == 2 # indices + palette
1223+
1224+ def test_palette_skipped_all_unique (self ):
1225+ """Palette should not be generated when all values are unique."""
1226+ data = list (range (100 ))
1227+ solutions = pack_table (data , default = 0 , compression = None )
1228+
1229+ # Should not have palette solutions (all values unique)
1230+ palette_solutions = [s for s in solutions if hasattr (s , 'palette' ) and isinstance (s .palette , list )]
1231+ assert len (palette_solutions ) == 0
1232+
1233+ def test_palette_skipped_no_savings (self ):
1234+ """Palette should not be generated when index_bits >= value_bits."""
1235+ # 16 unique values in range [0..15] -> 4 bits for both indices and values
1236+ data = list (range (16 ))
1237+ solutions = pack_table (data , default = 0 , compression = None )
1238+
1239+ # Should not have palette solutions (no bit savings)
1240+ palette_solutions = [s for s in solutions if hasattr (s , 'palette' ) and isinstance (s .palette , list )]
1241+ assert len (palette_solutions ) == 0
1242+
1243+ def test_palette_with_few_unique_values (self ):
1244+ """Palette should be generated when few unique values with outlier."""
1245+ # 100 values, only 5 unique small values + 1 huge outlier
1246+ import random
1247+ random .seed (42 )
1248+ data = [random .choice ([1 , 2 , 3 , 4 , 5 ]) for _ in range (100 )] + [999999 ]
1249+ solutions = pack_table (data , default = 0 , compression = None )
1250+
1251+ # Should have palette solution
1252+ palette_solutions = [s for s in solutions if hasattr (s , 'palette' ) and isinstance (s .palette , list )]
1253+ assert len (palette_solutions ) >= 1
1254+
1255+ pal_sol = palette_solutions [0 ]
1256+ assert len (pal_sol .palette ) <= 6 # 5 small values + outlier
1257+
1258+ def test_palette_cost_calculation (self ):
1259+ """Verify palette solution cost is calculated correctly."""
1260+ data = [1 , 2 , 3 , 2 , 3 , 2 , 1 , 0 , 2 , 1 , 2 , 2 , 3 , 3 , 1 , 11110124 ]
1261+ solutions = pack_table (data , default = 0 , compression = None )
1262+
1263+ palette_sol = [s for s in solutions if hasattr (s , 'palette' ) and isinstance (s .palette , list )][0 ]
1264+
1265+ # Palette: 5 values × 4 bytes = 20 bytes
1266+ # Indices: 16 values, 3 bits each, packed = ~6 bytes
1267+ # Total should be around 20-28 bytes
1268+ assert 20 <= palette_sol .cost <= 30
1269+ assert palette_sol .cost < 64 # Better than direct (64 bytes)
1270+
1271+ def test_palette_in_pareto_frontier (self ):
1272+ """Palette solution should be on Pareto frontier."""
1273+ data = [1 , 2 , 3 , 2 , 3 , 2 , 1 , 0 , 2 , 1 , 2 , 2 , 3 , 3 , 1 , 11110124 ]
1274+ solutions = pack_table (data , default = 0 , compression = None )
1275+
1276+ # All returned solutions should be non-dominated
1277+ for a in solutions :
1278+ for b in solutions :
1279+ if a is b :
1280+ continue
1281+ # a should not dominate b (otherwise b wouldn't be in frontier)
1282+ assert not (a .nLookups <= b .nLookups and a .fullCost <= b .fullCost )
1283+
1284+ def test_palette_selected_large_dataset (self ):
1285+ """Palette should be selected for large dataset with outliers."""
1286+ import random
1287+ random .seed (42 )
1288+ # 1000 values from small range, plus one huge outlier
1289+ data = [random .choice ([1 , 2 , 3 , 4 , 5 ]) for _ in range (1000 )] + [999999 ]
1290+
1291+ # With compression=1, palette should win
1292+ solution = pack_table (data , default = 0 , compression = 1 )
1293+
1294+ # Should be palette solution
1295+ assert hasattr (solution , 'palette' ) and isinstance (solution .palette , list )
1296+ assert len (solution .palette ) == 6 # 5 values + outlier
1297+
1298+ def test_palette_code_generation_c (self ):
1299+ """Test palette code generation for C."""
1300+ data = [1 , 2 , 3 , 2 , 3 , 2 , 1 , 0 , 2 , 1 , 2 , 2 , 3 , 3 , 1 , 11110124 ]
1301+ solutions = pack_table (data , default = 0 , compression = None )
1302+
1303+ palette_sol = [s for s in solutions if hasattr (s , 'palette' ) and isinstance (s .palette , list )][0 ]
1304+
1305+ code = Code ("test" )
1306+ palette_sol .genCode (code , "get" , language = "c" , private = False )
1307+
1308+ output = io .StringIO ()
1309+ code .print_code (file = output , language = "c" )
1310+ result = output .getvalue ()
1311+
1312+ # Should contain palette array
1313+ assert "palette" in result
1314+ # Should contain the outlier value
1315+ assert "11110124" in result
1316+
1317+ def test_palette_code_generation_rust (self ):
1318+ """Test palette code generation for Rust."""
1319+ data = [1 , 2 , 3 , 2 , 3 , 2 , 1 , 0 , 2 , 1 , 2 , 2 , 3 , 3 , 1 , 11110124 ]
1320+ solutions = pack_table (data , default = 0 , compression = None )
1321+
1322+ palette_sol = [s for s in solutions if hasattr (s , 'palette' ) and isinstance (s .palette , list )][0 ]
1323+
1324+ code = Code ("test" )
1325+ palette_sol .genCode (code , "get" , language = "rust" , private = False )
1326+
1327+ output = io .StringIO ()
1328+ code .print_code (file = output , language = "rust" )
1329+ result = output .getvalue ()
1330+
1331+ # Should contain palette array
1332+ assert "palette" in result
1333+ # Should be valid Rust syntax
1334+ assert "fn " in result or "#[inline]" in result
1335+
1336+ def test_palette_end_to_end_c (self ):
1337+ """Compile and run palette-encoded C code."""
1338+ import random
1339+ random .seed (42 )
1340+ data = [random .choice ([10 , 20 , 30 ]) for _ in range (50 )] + [999999 ]
1341+
1342+ # Force palette solution
1343+ solutions = pack_table (data , default = 0 , compression = None )
1344+ palette_sol = [s for s in solutions if hasattr (s , 'palette' ) and isinstance (s .palette , list )][0 ]
1345+
1346+ code = Code ("data" )
1347+ palette_sol .genCode (code , "get" , language = "c" , private = False )
1348+
1349+ output = io .StringIO ()
1350+ code .print_code (file = output , language = "c" )
1351+ c_code = output .getvalue ()
1352+
1353+ # Compile and test
1354+ _compile_and_run_c (c_code , data , 0 )
1355+
1356+ def test_palette_end_to_end_rust (self ):
1357+ """Compile and run palette-encoded Rust code."""
1358+ import random
1359+ random .seed (42 )
1360+ data = [random .choice ([10 , 20 , 30 ]) for _ in range (50 )] + [999999 ]
1361+
1362+ # Force palette solution
1363+ solutions = pack_table (data , default = 0 , compression = None )
1364+ palette_sol = [s for s in solutions if hasattr (s , 'palette' ) and isinstance (s .palette , list )][0 ]
1365+
1366+ code = Code ("data" )
1367+ palette_sol .genCode (code , "get" , language = "rust" , private = False )
1368+
1369+ output = io .StringIO ()
1370+ code .print_code (file = output , language = "rust" )
1371+ rust_code = output .getvalue ()
1372+
1373+ # Compile and test
1374+ _compile_and_run_rust (rust_code , data , 0 )
1375+
1376+ def test_palette_with_bias (self ):
1377+ """Palette should work with OuterLayer bias optimization."""
1378+ # Values with common bias
1379+ data = [100 , 101 , 102 , 101 , 102 , 101 , 100 , 99 ] + [999999 ]
1380+ solutions = pack_table (data , default = 0 , compression = None )
1381+
1382+ palette_sol = [s for s in solutions if hasattr (s , 'palette' ) and isinstance (s .palette , list )]
1383+ if palette_sol :
1384+ # If palette generated, it should handle the bias correctly
1385+ pal = palette_sol [0 ]
1386+ code = Code ("test" )
1387+ pal .genCode (code , "get" , language = "c" )
1388+ assert True # Just verify it generates without error
1389+
1390+ def test_palette_with_repeated_pattern (self ):
1391+ """Palette with repeated patterns should work correctly."""
1392+ # Repeated pattern with outlier
1393+ base_pattern = [1 , 2 , 3 , 2 , 3 , 2 , 1 , 0 ]
1394+ data = base_pattern * 32 + [999999 ]
1395+
1396+ solution = pack_table (data , default = 0 , compression = 5 )
1397+
1398+ # Should generate valid code
1399+ code = Code ("test" )
1400+ solution .genCode (code , "get" , language = "c" )
1401+ output = io .StringIO ()
1402+ code .print_code (file = output , language = "c" )
1403+ assert len (output .getvalue ()) > 0
1404+
1405+ def test_palette_separate_from_other_arrays (self ):
1406+ """Verify palette uses separate array, not offset into existing arrays."""
1407+ data = [1 , 2 , 3 , 2 , 3 , 2 , 1 , 0 , 2 , 1 , 2 , 2 , 3 , 3 , 1 , 11110124 ]
1408+ solutions = pack_table (data , default = 0 , compression = None )
1409+
1410+ palette_sol = [s for s in solutions if hasattr (s , 'palette' ) and isinstance (s .palette , list )][0 ]
1411+
1412+ code = Code ("data" )
1413+ palette_sol .genCode (code , "get" , language = "c" , private = False )
1414+
1415+ output = io .StringIO ()
1416+ code .print_code (file = output , language = "c" )
1417+ result = output .getvalue ()
1418+
1419+ # Should have separate palette array name
1420+ assert "data_palette" in result
1421+ # Should not have offset addition in palette access (e.g., not "data_u32[5 + ...]")
1422+ # The palette access should be clean: palette[index]
1423+ assert "palette[" in result or "palette" in result
0 commit comments