GENEVO-GENetic-EVolutionary-Organoid-Benchmarks/EvolutIoN_criterion.py at main · Devanik21/GENEVO-GENetic-EVolutionary-Organoid-Benchmarks · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
import streamlit as st
import json
import pandas as pd
import numpy as np
import time
import random
from dataclasses import dataclass, field
from typing import List, Dict, Tuple, Optional, Set
import plotly.graph_objects as go
import plotly.express as px
import networkx as nx
from collections import Counter


# ==================== DATA STRUCTURES & CORE FUNCTIONS (Copied from gene.py for compatibility) ====================
# These dataclasses are necessary to reconstruct the Genotype objects from the JSON file.

@dataclass
class DevelopmentalGene:
    """Encodes developmental rules for phenotype construction"""
    rule_type: str
    trigger_condition: str
    parameters: Dict[str, float]

@dataclass
class ModuleGene:
    """Enhanced module gene with biological properties"""
    id: str
    module_type: str
    size: int
    activation: str
    normalization: str
    dropout_rate: float
    learning_rate_mult: float
    plasticity: float
    color: str
    position: Tuple[float, float, float] = (0.0, 0.0, 0.0)

@dataclass
class ConnectionGene:
    """Enhanced connection with synaptic properties"""
    source: str
    target: str
    weight: float
    connection_type: str
    delay: float
    plasticity_rule: str

@dataclass
class Genotype:
    """Complete genetic encoding with developmental program"""
    modules: List[ModuleGene]
    connections: List[ConnectionGene]
    developmental_rules: List[DevelopmentalGene] = field(default_factory=list)
    meta_parameters: Dict[str, float] = field(default_factory=dict)
    epigenetic_markers: Dict[str, float] = field(default_factory=dict)
    fitness: float = 0.0
    age: int = 0
    generation: int = 0
    lineage_id: str = ""
    parent_ids: List[str] = field(default_factory=list)
    accuracy: float = 0.0
    efficiency: float = 0.0
    complexity: float = 0.0
    robustness: float = 0.0
    form_id: int = 1

    def copy(self):
        """Deep copy with new lineage"""
        new_genotype = Genotype(
            modules=[ModuleGene(
                m.id, m.module_type, m.size, m.activation, m.normalization,
                m.dropout_rate, m.learning_rate_mult, m.plasticity, m.color, m.position
            ) for m in self.modules],
            connections=[ConnectionGene(
                c.source, c.target, c.weight, c.connection_type, c.delay, c.plasticity_rule
            ) for c in self.connections],
            developmental_rules=[DevelopmentalGene(
                d.rule_type, d.trigger_condition, d.parameters.copy()
            ) for d in self.developmental_rules],
            meta_parameters=self.meta_parameters.copy(),
            epigenetic_markers={k: v * 0.5 for k, v in self.epigenetic_markers.items()}, # Imperfect inheritance
            fitness=self.fitness,
            age=0,
            generation=self.generation,
            parent_ids=[self.lineage_id],
            form_id=self.form_id
        )
        return new_genotype

def dict_to_genotype(d: Dict) -> Genotype:
    """Deserializes a dictionary back into a Genotype object."""
    # Reconstruct nested dataclasses
    d['modules'] = [ModuleGene(**m) for m in d.get('modules', [])]
    d['connections'] = [ConnectionGene(**c) for c in d.get('connections', [])]
    d['developmental_rules'] = [DevelopmentalGene(**dr) for dr in d.get('developmental_rules', [])]

    # Create a full Genotype instance first to get all default values
    # This prevents KeyErrors if the JSON is missing fields like 'accuracy', 'efficiency', etc.
    # which are calculated during evolution but not always saved.
    base_genotype = Genotype(modules=[], connections=[])

    # Update the base instance with values from the dictionary
    base_genotype.modules = d.get('modules', [])
    base_genotype.connections = d.get('connections', [])
    base_genotype.developmental_rules = d.get('developmental_rules', [])

    # Update all other fields present in the dictionary
    for key, value in d.items():
        if hasattr(base_genotype, key):
            setattr(base_genotype, key, value)

    return base_genotype

def is_viable(genotype: Genotype) -> bool:
    """
    Checks if a genotype is structurally viable.
    """
    if not genotype.modules or not genotype.connections:
        return False

    G = nx.DiGraph()
    module_ids = {m.id for m in genotype.modules}

    for conn in genotype.connections:
        if conn.source in module_ids and conn.target in module_ids:
            G.add_edge(conn.source, conn.target)

    if G.number_of_nodes() < 2: return False

    input_nodes = [node for node, in_degree in G.in_degree() if in_degree == 0]
    output_nodes = [node for node, out_degree in G.out_degree() if out_degree == 0]

    if not input_nodes:
        potential_inputs = [m.id for m in genotype.modules if 'input' in m.id or 'embed' in m.id or 'V1' in m.id]
        input_nodes = [node for node in potential_inputs if node in G.nodes]

    if not output_nodes:
        potential_outputs = [m.id for m in genotype.modules if 'output' in m.id or 'PFC' in m.id]
        output_nodes = [node for node in potential_outputs if node in G.nodes]

    if not input_nodes or not output_nodes: return False

    for start_node in input_nodes:
        for end_node in output_nodes:
            if start_node in G and end_node in G and nx.has_path(G, start_node, end_node):
                return True

    return Genotype(**d)

# ==================== REAL-WORLD TASK SIMULATION ====================

def simulate_task_performance(architecture: Genotype, task_name: str) -> Dict:
    """
    Simulates the performance of a given architecture on a specific 'real-world' task.
    This is a heuristic evaluation based on architectural properties.
    """
    # Use the full, rigorous evaluation function from gene.py
    # This provides a much more nuanced score.
    # We must provide default values for the advanced evaluation parameters
    # that exist in gene.py's version of this function.
    eval_params = {
        'enable_epigenetics': True,
        'enable_baldwin': True,
        'epistatic_linkage_k': 2,
        'parasite_profile': None
    }
    fitness, scores = evaluate_fitness(architecture, task_name, architecture.generation, **eval_params)

    # CRITICAL FIX: Update the architecture object with the calculated scores.
    # This mirrors the behavior in gene.py and ensures all downstream functions have the correct data.
    architecture.accuracy = scores['task_accuracy']
    architecture.efficiency = scores['efficiency']
    architecture.robustness = scores['robustness']

    # Create a report based on the component scores
    report = [
        f"Task Accuracy Score: {scores['task_accuracy']:.3f}",
        f"Efficiency Score: {scores['efficiency']:.3f}",
        f"Robustness Score: {scores['robustness']:.3f}",
        f"Generalization Score: {scores['generalization']:.3f}"
    ]

    return {"score": fitness, "report": report, "components": scores}

# ==================== CORE ANALYSIS FUNCTIONS (from gene.py) ====================
# These functions are copied directly from gene.py to provide the same deep analysis capabilities.

def evaluate_fitness(genotype: Genotype, task_type: str, generation: int, weights: Optional[Dict[str, float]] = None, enable_epigenetics: bool = False, enable_baldwin: bool = False, epistatic_linkage_k: int = 0, parasite_profile: Optional[Dict] = None, **kwargs) -> Tuple[float, Dict[str, float]]:
    """
    Multi-objective fitness evaluation with realistic task simulation.
    This is the same function as in gene.py for consistency.
    """
    scores = {'task_accuracy': 0.0, 'efficiency': 0.0, 'robustness': 0.0, 'generalization': 0.0}
    total_params = sum(m.size for m in genotype.modules)
    avg_plasticity = np.mean([m.plasticity for m in genotype.modules]) if genotype.modules else 0
    connection_density = len(genotype.connections) / (len(genotype.modules) ** 2 + 1) if genotype.modules else 0

    # Task-specific accuracy simulation
    if 'ARC' in task_type:
        graph_attention_count = sum(1 for m in genotype.modules if m.module_type in ['graph', 'attention'])
        compositional_score = graph_attention_count / (len(genotype.modules) + 1e-6)
        plasticity_bonus = avg_plasticity * 0.4
        efficiency_penalty = np.exp(-total_params / 50000)
        scores['task_accuracy'] = (compositional_score * 0.4 + plasticity_bonus * 0.3 + efficiency_penalty * 0.3 + np.random.normal(0, 0.05))
    elif 'Image' in task_type:
        conv_count = sum(1 for m in genotype.modules if m.module_type == 'conv')
        hierarchical_bonus = 0.2 if genotype.form_id in [1, 4] else 0.0
        scores['task_accuracy'] = ((conv_count / (len(genotype.modules) + 1e-6)) * 0.5 + hierarchical_bonus + connection_density * 0.2 + np.random.normal(0, 0.05))
    elif 'Language' in task_type:
        attn_count = sum(1 for m in genotype.modules if 'attention' in m.module_type or 'transformer' in m.module_type)
        depth_bonus = len(genotype.modules) / 10
        scores['task_accuracy'] = ((attn_count / (len(genotype.modules) + 1e-6)) * 0.6 + min(depth_bonus, 0.3) + np.random.normal(0, 0.05))
    elif 'Robotics' in task_type or 'Sequential' in task_type:
        rec_count = sum(1 for m in genotype.modules if 'recurrent' in m.module_type or 'liquid' in m.module_type)
        memory_bonus = 0.3 if any('memory' in m.id for m in genotype.modules) else 0.0
        scores['task_accuracy'] = ((rec_count / (len(genotype.modules) + 1e-6)) * 0.5 + memory_bonus + avg_plasticity * 0.15 + np.random.normal(0, 0.05))

    # --- The following logic is from the full gene.py implementation ---
    # It's crucial for ensuring the 'scores' dictionary is always fully populated.
    if enable_baldwin:
        lifetime_learning_bonus = avg_plasticity * 0.2
        scores['task_accuracy'] += lifetime_learning_bonus

    scores['task_accuracy'] = np.clip(scores['task_accuracy'], 0, 1)

    # Efficiency score
    param_efficiency = 1.0 / (1.0 + np.log(1 + total_params / 10000))
    connection_efficiency = 1.0 - min(connection_density, 0.8)
    scores['efficiency'] = (param_efficiency + connection_efficiency) / 2

    # Robustness score
    robustness_from_diversity = len(set(c.connection_type for c in genotype.connections)) / 3 if genotype.connections else 0
    robustness_from_plasticity = 1.0 - abs(avg_plasticity - 0.5) * 2
    scores['robustness'] = (robustness_from_diversity * 0.5 + robustness_from_plasticity * 0.5)

    # Generalization potential
    depth = len(genotype.modules)
    modularity_score = 1.0 - abs(connection_density - 0.3) * 2
    scores['generalization'] = (min(depth / 10, 1.0) * 0.4 + modularity_score * 0.3 + avg_plasticity * 0.3)

    if weights is None:
        weights = {'task_accuracy': 0.6, 'efficiency': 0.2, 'robustness': 0.1, 'generalization': 0.1}

    total_fitness = sum(scores[k] * weights[k] for k in weights)

    epistatic_contribution = 0.0
    if epistatic_linkage_k > 0 and len(genotype.modules) > epistatic_linkage_k:
        num_modules = len(genotype.modules)
        for i, module in enumerate(genotype.modules):
            indices = list(range(num_modules))
            indices.remove(i)
            interacting_indices = random.sample(indices, k=epistatic_linkage_k)
            context_signature = tuple([module.module_type] + [genotype.modules[j].module_type for j in interacting_indices])
            hash_val = hash(context_signature)
            epistatic_contribution += (hash_val % 2000 - 1000) / 10000.0

    total_fitness += epistatic_contribution

    if parasite_profile:
        vulnerability_score = 0.0
        target_activation = parasite_profile.get('target_activation')
        if target_activation:
            for module in genotype.modules:
                if module.activation == target_activation:
                    vulnerability_score += 0.1
        total_fitness *= (1.0 - min(vulnerability_score, 0.5))

    return max(total_fitness, 1e-6), scores

def analyze_lesion_sensitivity(architecture: Genotype, base_fitness: float, task_type: str, fitness_weights: Dict, eval_params: Dict) -> Dict[str, float]:
    criticality_scores = {}
    for module in architecture.modules:
        if 'input' in module.id or 'output' in module.id: continue
        lesioned_arch = architecture.copy()
        lesioned_arch.modules = [m for m in lesioned_arch.modules if m.id != module.id]
        lesioned_arch.connections = [c for c in lesioned_arch.connections if c.source != module.id and c.target != module.id]
        if not is_viable(lesioned_arch): continue # type: ignore
        lesioned_fitness, _ = evaluate_fitness(lesioned_arch, task_type, lesioned_arch.generation, fitness_weights, **eval_params)
        criticality_scores[f"Module: {module.id}"] = base_fitness - lesioned_fitness
    return criticality_scores

def analyze_information_flow(architecture: Genotype) -> Dict[str, float]:
    G = nx.DiGraph()
    for module in architecture.modules: G.add_node(module.id)
    for conn in architecture.connections:
        if conn.weight > 1e-6: G.add_edge(conn.source, conn.target, weight=1.0/conn.weight)
    if not G.nodes: return {}
    return nx.betweenness_centrality(G, weight='weight', normalized=True)

def generate_pytorch_code(architecture: Genotype) -> str:
    module_defs = [f"            # Fitness: {architecture.fitness:.4f}, Accuracy: {architecture.accuracy:.4f}"]
    for m in architecture.modules:
        if m.module_type == 'mlp': module_defs.append(f"            '{m.id}': nn.Sequential(nn.Linear({m.size}, {m.size}), nn.GELU()),")
        elif m.module_type == 'attention': module_defs.append(f"            '{m.id}': nn.MultiheadAttention(embed_dim={m.size}, num_heads=8, batch_first=True),")
        elif m.module_type == 'conv': module_defs.append(f"            '{m.id}': nn.Conv2d(3, {m.size}, 3, padding=1),")
        elif m.module_type == 'recurrent': module_defs.append(f"            '{m.id}': nn.LSTM({m.size}, {m.size}, batch_first=True),")
        else: module_defs.append(f"            '{m.id}': nn.Identity(), # Placeholder for '{m.module_type}'")

    G = nx.DiGraph([(c.source, c.target) for c in architecture.connections])
    try: exec_order = list(nx.topological_sort(G))
    except nx.NetworkXUnfeasible: exec_order = [m.id for m in architecture.modules]

    forward_pass = ["        outputs = {} # Dict to store module outputs"]
    # Find true inputs (no incoming connections)
    true_inputs = [m.id for m in architecture.modules if G.in_degree(m.id) == 0]
    if not true_inputs: true_inputs = [exec_order[0]] # Fallback
    for in_node in true_inputs: forward_pass.append(f"        outputs['{in_node}'] = x # Feed input to '{in_node}'")

    for mid in exec_order:
        if mid in true_inputs: continue
        inputs = [c.source for c in architecture.connections if c.target == mid]
        if not inputs: continue

        input_str = " + ".join([f"outputs['{i}']" for i in inputs if i in exec_order])
        if not input_str: input_str = 'x' # Fallback

        module_type = next((m.module_type for m in architecture.modules if m.id == mid), '')
        if module_type == 'recurrent': forward_pass.append(f"        out, _ = self.evolved_modules['{mid}']({input_str}); outputs['{mid}'] = out")
        elif module_type == 'attention': forward_pass.append(f"        attn_out, _ = self.evolved_modules['{mid}']({input_str}, {input_str}, {input_str}); outputs['{mid}'] = attn_out")
        else: forward_pass.append(f"        outputs['{mid}'] = self.evolved_modules['{mid}']({input_str})")

    # Find true output (no outgoing connections)
    true_output = [m.id for m in architecture.modules if G.out_degree(m.id) == 0]
    if not true_output: true_output = [exec_order[-1]] # Fallback
    forward_pass.append(f"        return outputs['{true_output[0]}']")

    return f"""
import torch
import torch.nn as nn

class EvolvedArchitecture(nn.Module):
    def __init__(self):
        super().__init__()
        self.evolved_modules = nn.ModuleDict({{
{chr(10).join(module_defs)}
        }})

    def forward(self, x):
{chr(10).join(forward_pass)}
""".strip()

def generate_tensorflow_code(architecture: Genotype) -> str:
    # This is a simplified version for brevity. A full implementation would be similar to the PyTorch one.
    return f"# TensorFlow code generation for {architecture.lineage_id} is a work in progress.\n# Key properties: {len(architecture.modules)} modules, {len(architecture.connections)} connections."

def visualize_genotype_2d(genotype: Genotype) -> go.Figure:
    G = nx.DiGraph()
    for module in genotype.modules:
        G.add_node(module.id, color=module.color, size=15 + np.sqrt(module.size),
                   hover_text=f"<b>{module.id}</b><br>Type: {module.module_type}<br>Size: {module.size}")
    for conn in genotype.connections:
        if conn.source in G.nodes and conn.target in G.nodes: G.add_edge(conn.source, conn.target)

    try: pos = nx.kamada_kawai_layout(G)
    except Exception: pos = nx.spring_layout(G, seed=42)

    edge_x, edge_y = [], []
    for edge in G.edges():
        x0, y0 = pos[edge[0]]; x1, y1 = pos[edge[1]]
        edge_x.extend([x0, x1, None]); edge_y.extend([y0, y1, None])
    edge_trace = go.Scatter(x=edge_x, y=edge_y, line=dict(width=1, color='#888'), hoverinfo='none', mode='lines')

    node_x, node_y, node_text, node_color, node_size = [], [], [], [], []
    for node in G.nodes():
        x, y = pos[node]
        node_x.append(x); node_y.append(y)
        node_text.append(G.nodes[node]['hover_text'])
        node_color.append(G.nodes[node]['color'])
        node_size.append(G.nodes[node]['size'])

    node_trace = go.Scatter(x=node_x, y=node_y, mode='markers+text', text=[node for node in G.nodes()],
                            textposition="top center", hoverinfo='text', hovertext=node_text,
                            marker=dict(showscale=False, color=node_color, size=node_size, line=dict(width=2, color='black')))

    fig = go.Figure(data=[edge_trace, node_trace],
             layout=go.Layout(title=f"<b>2D View: {genotype.lineage_id}</b>", title_x=0.5, showlegend=False,
                             margin=dict(b=20, l=5, r=5, t=50), xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                             yaxis=dict(showgrid=False, zeroline=False, showticklabels=False), height=400, plot_bgcolor='white'))
    return fig

# ==================== STREAMLIT APP ====================

def main():
    st.set_page_config(
        page_title="GENEVO Real-World Tester",
        layout="wide",
        page_icon="🤖"
    )

    # --- Custom CSS ---
    st.markdown("""
    <style>
    .main-header {
        font-size: 2.5rem;
        font-weight: 700;
        color: #1e3a8a; /* Darker blue */
        text-align: center;
    }
    .sub-header {
        font-size: 1.1rem;
        color: #64748b;
        text-align: center;
        margin-bottom: 2rem;
    }
    .st-emotion-cache-1vzeuhh { /* Target expander header */
        font-size: 1.1rem;
    }
    </style>
    """, unsafe_allow_html=True)

    # --- Header ---
    st.markdown('<h1 class="main-header">GENEVO: Architecture Analysis & Benchmarking</h1>', unsafe_allow_html=True)
    st.markdown('<p class="sub-header">A rigorous diagnostic tool to perform deep analysis and simulated benchmarking on evolved architectures.</p>', unsafe_allow_html=True)

    # --- Session State Initialization ---
    if 'population' not in st.session_state:
        st.session_state.population = None

    # --- Sidebar for Upload and Selection ---
    st.sidebar.header("🔬 Experiment Setup")
    uploaded_file = st.sidebar.file_uploader(
        "Upload `genevo_experiment_data.json`",
        type="json",
        help="Upload the JSON file generated by the main GENEVO application."
    )

    if uploaded_file is not None:
        try:
            data = json.load(uploaded_file)
            pop_dicts = data.get('final_population', [])
            if pop_dicts:
                population = [dict_to_genotype(p) for p in pop_dicts]
                population.sort(key=lambda x: x.fitness, reverse=True)
                st.session_state.population = population
                st.sidebar.success(f"✅ Loaded {len(population)} architectures.")
            else:
                st.sidebar.error("JSON file does not contain a 'final_population' list.")
                st.session_state.population = None
        except Exception as e:
            st.sidebar.error(f"Error parsing JSON: {e}")
            st.session_state.population = None

    if st.session_state.population:
        st.sidebar.markdown("---")
        st.sidebar.header("🧠 Select Architecture")

        # Create options for the selectbox
        options = {
            f"Rank {i+1}: {ind.lineage_id} (Fitness: {ind.fitness:.4f})": ind.lineage_id
            for i, ind in enumerate(st.session_state.population)
        }

        selected_option = st.sidebar.selectbox(
            "Choose an architecture to test:",
            options.keys()
        )

        selected_lineage_id = options[selected_option]
        selected_arch = next((p for p in st.session_state.population if p.lineage_id == selected_lineage_id), None)

    else:
        st.info("Please upload a `genevo_experiment_data.json` file to begin.")
        st.stop()

    if not selected_arch:
        st.error("Could not find the selected architecture. Please try again.")
        st.stop()

    # --- Main Display Area ---
    st.header(f"🔬 Analysis Dashboard: `{selected_arch.lineage_id}`")

    # Display key stats of the selected architecture
    tab_vitals, tab_analysis, tab_benchmark, tab_code = st.tabs([
        "🌐 Vitals & Architecture",
        "🔬 Causal & Structural Analysis",
        "🚀 Simulated Benchmarking",
        "💻 Code Export"
    ])

    # --- TAB 1: Vitals & Architecture ---
    with tab_vitals:
        vitals_col1, vitals_col2 = st.columns([1, 2])
        with vitals_col1:
            st.markdown("#### Quantitative Profile")
            st.metric("Evolved Fitness", f"{selected_arch.fitness:.4f}")
            st.metric("Evolved Accuracy", f"{selected_arch.accuracy:.3f}")
            st.metric("Efficiency Score", f"{selected_arch.efficiency:.3f}")
            st.metric("Robustness Score", f"{selected_arch.robustness:.3f}")
            st.metric("Total Parameters", f"{sum(m.size for m in selected_arch.modules):,}")
            st.metric("Complexity Score", f"{selected_arch.complexity:.3f}")

        with vitals_col2:
            st.markdown("#### Architectural Blueprint (2D)")
            st.plotly_chart(visualize_genotype_2d(selected_arch), use_container_width=True)

    # --- TAB 2: Causal & Structural Analysis ---
    with tab_analysis:
        st.markdown("This tab dissects the functional importance of the architecture's components using techniques from `gene.py`.")

        if st.button("Run Full Causal Analysis", key="run_causal_analysis"):
            st.session_state.causal_results = {}

            with st.spinner("Performing lesion sensitivity analysis..."):
                # Use default weights for analysis
                fitness_weights = {'task_accuracy': 0.6, 'efficiency': 0.2, 'robustness': 0.1, 'generalization': 0.1}
                # The task type here is less critical, it's for the fitness function context
                task_type_for_eval = "Abstract Reasoning (ARC-AGI-2)"

                eval_params = {
                    'enable_epigenetics': True,
                    'enable_baldwin': True,
                    'epistatic_linkage_k': 2,
                    'parasite_profile': None
                }

                criticality_scores = analyze_lesion_sensitivity(
                    selected_arch, selected_arch.fitness, task_type_for_eval, fitness_weights, eval_params
                )
                st.session_state.causal_results['criticality'] = sorted(criticality_scores.items(), key=lambda item: item[1], reverse=True)

            with st.spinner("Analyzing information flow..."):
                centrality_scores = analyze_information_flow(selected_arch)
                st.session_state.causal_results['centrality'] = sorted(centrality_scores.items(), key=lambda item: item[1], reverse=True)

        if 'causal_results' in st.session_state and st.session_state.causal_results:
            causal_col1, causal_col2 = st.columns(2)
            with causal_col1:
                st.subheader("Lesion Sensitivity")
                st.markdown("Components whose removal causes the largest drop in fitness.")
                crit_data = st.session_state.causal_results.get('criticality', [])
                if crit_data:
                    df_crit = pd.DataFrame(crit_data, columns=['Component', 'Fitness Drop'])
                    st.dataframe(df_crit.head(10))
                else:
                    st.info("No criticality data available.")

            with causal_col2:
                st.subheader("Information Flow Backbone")
                st.markdown("Modules with the highest betweenness centrality, crucial for routing information.")
                cent_data = st.session_state.causal_results.get('centrality', [])
                if cent_data:
                    df_cent = pd.DataFrame(cent_data, columns=['Module', 'Centrality Score'])
                    st.dataframe(df_cent.head(10))
                else:
                    st.info("No centrality data available.")

    # --- TAB 3: Simulated Benchmarking ---
    with tab_benchmark:
        st.subheader("🚀 Real-World Task Simulation")
        with st.expander("🤔 How is this test performed?"):
            st.markdown("""
            This simulation uses the **exact same `evaluate_fitness` function from `gene.py`**. It is a rigorous, rule-based analysis of the architecture's properties against different task demands.

            1.  **Task-Specific Heuristics:** Each task (e.g., Vision, Language) has rules that reward specific architectural features (e.g., `conv` modules for vision, `attention` for language).
            2.  **Multi-Objective Score:** The final score is a weighted sum of four components:
                - **Task Accuracy:** The heuristic score for the specific task.
                - **Efficiency:** A penalty for high parameter counts and connection density.
                - **Robustness:** A measure of architectural stability.
                - **Generalization:** A score based on properties linked to generalization potential.

            This provides a much more nuanced and credible estimate of performance than a simple random score. The detailed reports show the breakdown of these components.
            """)

        tasks = ["Vision (ImageNet)", "Language (MMLU-Pro)", "Abstract Reasoning (ARC-AGI-2)", "Robotics Control (Continuous Action)"]

        if 'benchmark_results' not in st.session_state:
            st.session_state.benchmark_results = {}

        if st.button("Run All Benchmark Simulations", type="primary"):
            st.session_state.benchmark_results = {}
            progress_bar = st.progress(0, text="Starting benchmarks...")
            for i, task in enumerate(tasks):
                time.sleep(1.5) # Simulate work
                # IMPORTANT: We must use a copy of the selected architecture for each simulation
                # to prevent the scores from one task from bleeding into the next.
                arch_copy = selected_arch.copy()
                arch_copy.lineage_id = selected_arch.lineage_id # Preserve original ID for display
                result = simulate_task_performance(arch_copy, task)
                st.session_state.benchmark_results[task] = result
                progress_bar.progress((i + 1) / len(tasks), text=f"Simulating {task}...")
            progress_bar.empty()
            st.success("All benchmark simulations complete!")

        if st.session_state.benchmark_results:
            st.markdown("---")
            st.subheader("📊 Benchmark Results")

            for task, result in st.session_state.benchmark_results.items():
                with st.expander(f"**{task}** - Overall Score: **{result['score']:.3f}**"):
                    st.markdown("###### Component Scores:")
                    cols = st.columns(4)
                    cols[0].metric("Task Accuracy", f"{result['components']['task_accuracy']:.3f}")
                    cols[1].metric("Efficiency", f"{result['components']['efficiency']:.3f}")
                    cols[2].metric("Robustness", f"{result['components']['robustness']:.3f}")
                    cols[3].metric("Generalization", f"{result['components']['generalization']:.3f}")

    # --- TAB 4: Code Export ---
    with tab_code:
        st.markdown("The genotype can be translated into functional code for deep learning frameworks, providing a direct path from discovery to application.")
        code_col1, code_col2 = st.columns(2)
        with code_col1:
            st.subheader("PyTorch Code")
            st.code(generate_pytorch_code(selected_arch), language='python')
        with code_col2:
            st.subheader("TensorFlow / Keras Code")
            st.code(generate_tensorflow_code(selected_arch), language='python')

    st.sidebar.markdown("---")
    st.sidebar.info(
        "This application provides a **rigorous analysis and heuristic simulation** of real-world performance. "
        "Scores are based on architectural properties from `gene.py`'s evaluation logic, not on actual model training."
    )

if __name__ == "__main__":
    main()