GeneralizedNotationNotation/src/execute/pymdp/pymdp_runner.py at main · ActiveInferenceInstitute/GeneralizedNotationNotation · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
#!/usr/bin/env python3
"""
PyMDP script execution module for the GNN Processing Pipeline.

This module provides functions to execute PyMDP scripts that were
generated by the GNN rendering step (11_render.py).
"""

import json
import logging
import os
import subprocess  # nosec B404 -- subprocess calls with controlled/trusted input
import sys
import traceback
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Union

logger = logging.getLogger(__name__)

def validate_and_clean_pymdp_script(script_path: Path) -> bool:
    """
    Validate and clean PyMDP script for syntax errors.

    Args:
        script_path: Path to the PyMDP script to validate

    Returns:
        bool: True if script is valid or was successfully cleaned, False otherwise
    """
    if not script_path.exists():
        logger.error(f"Script file not found: {script_path}")
        return False

    try:
        # First, try to compile the script as-is
        with open(script_path, 'r') as f:
            content = f.read()
            compile(content, script_path.name, 'exec')
        logger.debug(f"Script {script_path.name} is syntactically valid")
        return True
    except SyntaxError as e:
        logger.warning(f"Syntax error in {script_path.name}: {e}")

        # Try to fix common syntax issues
        try:
            # Remove stray } characters at the beginning of lines
            lines = content.split('\n')
            cleaned_lines = []
            for line in lines:
                # Remove stray } at the beginning of lines (common rendering issue)
                if line.strip().startswith('}'):
                    logger.debug(f"Removing stray '}}' from line: {line.strip()}")
                    continue
                cleaned_lines.append(line)

            cleaned_content = '\n'.join(cleaned_lines)

            # Try to compile the cleaned content
            compile(cleaned_content, script_path.name, 'exec')

            # If successful, write the cleaned content back
            with open(script_path, 'w') as f:
                f.write(cleaned_content)

            logger.info(f"Successfully cleaned syntax errors in {script_path.name}")
            return True

        except SyntaxError as e2:
            logger.error(f"Could not fix syntax errors in {script_path.name}: {e2}")
            return False
        except Exception as e3:
            logger.error(f"Error during script cleanup: {e3}")
            return False

def execute_pymdp_script_with_outputs(
    script_path: Path,
    output_dir: Path,
    verbose: bool = False
) -> Dict[str, Any]:
    """
    Execute a single PyMDP script with comprehensive output capture and analysis.

    Args:
        script_path: Path to the PyMDP script
        output_dir: Directory to save execution outputs
        verbose: Whether to enable verbose output

    Returns:
        Dict containing execution results, logs, and analysis data
    """
    if not script_path.exists():
        logger.error(f"Script file not found: {script_path}")
        return {"success": False, "error": "Script file not found"}

    logger.info(f"Executing PyMDP script with full output capture: {script_path}")

    # Create output subdirectory for this script
    script_output_dir = output_dir / script_path.stem
    script_output_dir.mkdir(parents=True, exist_ok=True)

    # First, validate and clean the script
    if not validate_and_clean_pymdp_script(script_path):
        logger.error(f"Script validation failed: {script_path}")
        return {"success": False, "error": "Script validation failed"}

    try:
        # Check if required dependencies are available
        required_deps = ["numpy", "pymdp", "matplotlib"]
        missing_deps = []

        for dep in required_deps:
            try:
                __import__(dep)
            except ImportError:
                missing_deps.append(dep)

        if missing_deps:
            error_msg = f"Missing required dependencies: {', '.join(missing_deps)}"
            logger.error(error_msg)
            return {"success": False, "error": error_msg, "missing_dependencies": missing_deps}

        # Execute the script with output capture
        abs_script_path = script_path.resolve()

        # Prepare environment for enhanced execution
        env = os.environ.copy()

        # Calculate paths relative to this runner file, not the target script
        # This runner is in src/execute/pymdp/pymdp_runner.py
        runner_path = Path(__file__).resolve()
        src_path = runner_path.parent.parent.parent
        project_root = src_path.parent

        # Add both project root (for 'src.x') and src (for 'utils.x') to PYTHONPATH
        env['PYTHONPATH'] = f"{project_root}:{src_path}:{env.get('PYTHONPATH', '')}"
        env['PYMDP_OUTPUT_DIR'] = str(script_output_dir)  # Let script know where to save files

        logger.info(f"Running PyMDP script: {script_path.name}")
        result = subprocess.run(  # nosec B603 -- subprocess calls with controlled/trusted input
            [sys.executable, str(abs_script_path)],
            capture_output=True,
            text=True,
            check=False,
            cwd=abs_script_path.parent,
            env=env,
            timeout=600,
        )

        # Save execution logs
        execution_log = {
            "script_name": script_path.name,
            "execution_time": datetime.now().isoformat(),
            "return_code": result.returncode,
            "stdout": result.stdout,
            "stderr": result.stderr,
            "script_path": str(script_path),
            "output_directory": str(script_output_dir)
        }

        # Save detailed execution log
        log_file = script_output_dir / f"{script_path.stem}_execution_log.json"
        with open(log_file, 'w') as f:
            json.dump(execution_log, f, indent=2)

        # Save stdout to text file for easy viewing
        stdout_file = script_output_dir / f"{script_path.stem}_stdout.txt"
        with open(stdout_file, 'w') as f:
            f.write(result.stdout)

        # Save stderr to text file if there are errors
        if result.stderr.strip():
            stderr_file = script_output_dir / f"{script_path.stem}_stderr.txt"
            with open(stderr_file, 'w') as f:
                f.write(result.stderr)

        # Process the execution result
        if result.returncode == 0:
            logger.info(f"Script executed successfully: {script_path.name}")

            # Generate execution metadata (no analysis artifacts)
            metadata = extract_execution_metadata(script_path, result.stdout, script_output_dir)

            execution_summary = {
                "success": True,
                "script_name": script_path.name,
                "return_code": result.returncode,
                "output_files": [
                    str(log_file.relative_to(output_dir)),
                    str(stdout_file.relative_to(output_dir))
                ],
                "metadata": metadata,
                "execution_time": execution_log["execution_time"]
            }

            if verbose and result.stdout.strip():
                logger.debug(f"Output from {script_path.name}:\n{result.stdout}")

            return execution_summary
        else:
            logger.error(f"Script execution failed with return code {result.returncode}: {script_path.name}")
            error_summary = {
                "success": False,
                "script_name": script_path.name,
                "return_code": result.returncode,
                "error": "Script execution failed",
                "stderr": result.stderr,
                "stdout": result.stdout,
                "execution_time": execution_log["execution_time"]
            }
            return error_summary

    except Exception as e:
        logger.error(f"Error executing script {script_path.name}: {e}")
        return {
            "success": False,
            "script_name": script_path.name,
            "error": str(e),
            "traceback": traceback.format_exc()
        }

def extract_execution_metadata(script_path: Path, stdout_content: str, output_dir: Path) -> Dict[str, Any]:
    """
    Extract metadata from PyMDP script execution for validation purposes.

    Args:
        script_path: Path to the executed script
        stdout_content: Captured stdout from script execution
        output_dir: Directory where outputs were saved

    Returns:
        Dictionary containing execution metadata
    """
    metadata = {
        "matrices_found": False,
        "agent_instantiated": False,
        "trace_file": None
    }

    try:
        # Parse stdout for matrix information
        if "A = " in stdout_content and "B = " in stdout_content:
            metadata["matrices_found"] = True
            logger.info(f"Successfully extracted A and B matrices from {script_path.name}")

        # Check if agent was instantiated
        if "agent successfully instantiated" in stdout_content.lower():
            metadata["agent_instantiated"] = True
            logger.info(f"PyMDP Agent successfully instantiated in {script_path.name}")

        # Generate simulation trace (raw data for analysis)
        trace_file = generate_simulation_trace(stdout_content, output_dir, script_path.stem)
        if trace_file:
            metadata["trace_file"] = str(trace_file.name)

    except Exception as e:
        logger.warning(f"Error during metadata extraction for {script_path.name}: {e}")
        metadata["error"] = str(e)

    return metadata


def generate_simulation_trace(stdout_content: str, output_dir: Path, script_name: str) -> Optional[Path]:
    """
    Generate a simulation trace file from PyMDP execution output.

    Returns:
        Path to generated trace file, or None if generation failed
    """
    try:
        lines = stdout_content.split('\n')

        # Extract relevant execution traces
        trace_data = {
            "script_name": script_name,
            "timestamp": datetime.now().isoformat(),
            "matrices_found": [],
            "agent_status": [],
            "debug_output": [],
            "errors": []
        }

        for line in lines:
            if any(matrix in line for matrix in ["A = ", "B = ", "C = ", "D = ", "E = "]):
                trace_data["matrices_found"].append(line.strip())
            elif "AGENT_SCRIPT:" in line:
                trace_data["agent_status"].append(line.strip())
            elif "Debug" in line or "debug" in line:
                trace_data["debug_output"].append(line.strip())
            elif "Error" in line or "ERROR" in line:
                trace_data["errors"].append(line.strip())

        # Save trace data
        trace_file = output_dir / f"{script_name}_simulation_trace.json"
        with open(trace_file, 'w') as f:
            json.dump(trace_data, f, indent=2)

        logger.info(f"Generated simulation trace: {trace_file.name}")
        return trace_file

    except Exception as e:
        logger.warning(f"Error generating simulation trace: {e}")
        return None

def run_pymdp_scripts(
    rendered_simulators_dir: Union[str, Path],
    execution_output_dir: Optional[Union[str, Path]] = None,
    recursive_search: bool = True,
    verbose: bool = False
) -> bool:
    """
    Find and run PyMDP scripts on rendered models with comprehensive output generation.

    Args:
        rendered_simulators_dir: Directory containing rendered simulators (target_dir from executor)
        execution_output_dir: Directory for PyMDP execution outputs
        recursive_search: Whether to search recursively for scripts
        verbose: Whether to enable verbose output

    Returns:
        bool: True if all scripts executed successfully, False if any failed
    """
    logger.info(f"Starting PyMDP script execution with output capture from: {rendered_simulators_dir}")

    # Set up execution output directory
    if execution_output_dir:
        exec_output_dir = Path(execution_output_dir)
        exec_output_dir.mkdir(parents=True, exist_ok=True)
        logger.info(f"PyMDP execution outputs will be saved to: {exec_output_dir}")
    else:
        exec_output_dir = Path("pymdp_execution_outputs")
        exec_output_dir.mkdir(parents=True, exist_ok=True)
        logger.info(f"Using default PyMDP execution output directory: {exec_output_dir}")

    # Find all PyMDP scripts - look directly in the PyMDP subdirectory
    base_path = Path(rendered_simulators_dir)
    pymdp_dir = base_path / "pymdp"

    if not pymdp_dir.exists():
        logger.info(f"PyMDP directory not found: {pymdp_dir}")
        # Create empty execution report for consistency
        create_empty_execution_report(exec_output_dir)
        return True  # Consider this a success if no scripts to run

    # Find all Python files
    if recursive_search:
        script_files = list(pymdp_dir.rglob("*.py"))
    else:
        script_files = list(pymdp_dir.glob("*.py"))

    # Filter out __pycache__ and other non-script files
    script_files = [f for f in script_files if not any(part.startswith('__') for part in f.parts)]

    logger.info(f"Found {len(script_files)} PyMDP script(s) in {pymdp_dir}")
    for script in script_files:
        logger.debug(f"  - {script.name}")

    if not script_files:
        logger.info("No PyMDP scripts found to execute")
        create_empty_execution_report(exec_output_dir)
        return True  # Consider this a success if no scripts to run

    # Execute each script with comprehensive output capture
    execution_results = []
    success_count = 0
    failure_count = 0

    for script_file in script_files:
        logger.info(f"Processing PyMDP script: {script_file.name}")

        script_result = execute_pymdp_script_with_outputs(script_file, exec_output_dir, verbose)
        execution_results.append(script_result)

        if script_result.get("success", False):
            success_count += 1
            logger.info(f"✅ Successfully executed: {script_file.name}")
        else:
            failure_count += 1
            logger.error(f"❌ Failed to execute: {script_file.name}")

    # Generate comprehensive execution report
    total_count = success_count + failure_count
    logger.info(f"PyMDP script execution summary: {success_count} succeeded, {failure_count} failed, {total_count} total")

    # Create final execution report
    create_execution_report(exec_output_dir, execution_results, success_count, failure_count, total_count)

    # Consider the overall run successful if any scripts succeeded
    return failure_count == 0 or success_count > 0

def create_execution_report(output_dir: Path, execution_results: List[Dict],
                          success_count: int, failure_count: int, total_count: int) -> None:
    """Create a comprehensive PyMDP execution report."""

    report = {
        "timestamp": datetime.now().isoformat(),
        "framework": "PyMDP",
        "execution_summary": {
            "total_scripts": total_count,
            "successful_scripts": success_count,
            "failed_scripts": failure_count,
            "success_rate": success_count / total_count if total_count > 0 else 0.0
        },
        "script_results": execution_results,
        "output_directory": str(output_dir)
    }

    # Save JSON report
    report_file = output_dir / "pymdp_execution_report.json"
    with open(report_file, 'w') as f:
        json.dump(report, f, indent=2)

    # Save markdown report
    md_report_file = output_dir / "pymdp_execution_report.md"
    with open(md_report_file, 'w') as f:
        f.write("# PyMDP Execution Report\n\n")
        f.write(f"**Generated:** {report['timestamp']}\n")
        f.write("**Framework:** PyMDP\n")
        f.write(f"**Success Rate:** {report['execution_summary']['success_rate']:.1%}\n\n")

        f.write("## Execution Summary\n\n")
        f.write(f"- **Total Scripts:** {total_count}\n")
        f.write(f"- **Successful:** {success_count}\n")
        f.write(f"- **Failed:** {failure_count}\n\n")

        f.write("## Script Details\n\n")
        for result in execution_results:
            status = "✅ SUCCESS" if result.get("success", False) else "❌ FAILED"
            f.write(f"### {result.get('script_name', 'Unknown')} - {status}\n")

            if result.get("success", False):
                f.write(f"- **Execution Time:** {result.get('execution_time', 'N/A')}\n")
                if 'metadata' in result:
                    meta = result['metadata']
                    f.write(f"- **Matrices Extracted:** {'Yes' if meta.get('matrices_found', False) else 'No'}\n")
                    f.write(f"- **Agent Instantiated:** {'Yes' if meta.get('agent_instantiated', False) else 'No'}\n")
            else:
                f.write(f"- **Error:** {result.get('error', 'Unknown error')}\n")

            f.write("\n")

    logger.info(f"Created comprehensive execution report: {report_file.name}")

def create_empty_execution_report(output_dir: Path) -> None:
    """Create an empty execution report when no scripts are found."""

    report = {
        "timestamp": datetime.now().isoformat(),
        "framework": "PyMDP",
        "execution_summary": {
            "total_scripts": 0,
            "successful_scripts": 0,
            "failed_scripts": 0,
            "success_rate": 0.0
        },
        "script_results": [],
        "message": "No PyMDP scripts found to execute",
        "output_directory": str(output_dir)
    }

    report_file = output_dir / "pymdp_execution_report.json"
    with open(report_file, 'w') as f:
        json.dump(report, f, indent=2)

    logger.info(f"Created empty execution report: {report_file.name}")

if __name__ == "__main__":
    # Setup logging for standalone execution
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        stream=sys.stdout
    )

    # Parse command-line arguments for standalone execution
    import argparse
    parser = argparse.ArgumentParser(description="Execute PyMDP scripts generated by the GNN rendering step")
    parser.add_argument("--output-dir", type=Path, default="../output",
                       help="Main pipeline output directory")
    parser.add_argument("--recursive", action=argparse.BooleanOptionalAction, default=True,
                       help="Recursively search for scripts in the output directory")
    parser.add_argument("--verbose", action=argparse.BooleanOptionalAction, default=False,
                       help="Enable verbose output")

    args = parser.parse_args()

    # Enable verbose logging if requested
    if args.verbose:
        logger.setLevel(logging.DEBUG)

    # Run the scripts
    success = run_pymdp_scripts(
        rendered_simulators_dir=args.output_dir,
        recursive_search=args.recursive,
        verbose=args.verbose
    )

    # Exit with appropriate status code
    sys.exit(0 if success else 1)