Skip to content

Commit 234131c

Browse files
committed
[anneal] Reuse pre-built Lean artifacts in generated workspaces
Modify `src/aeneas.rs` to read `lake-manifest.json` from the Aeneas directory and generate pinned `require` statements in `lakefile.lean` pointing to the toolchain's `.lake/packages` directory. This prevents Lake from re-downloading Mathlib and rebuilding Aeneas in the generated workspace, significantly speeding up verification. If a dependency listed in the manifest is missing from the toolchain, we fail with a hard error asking the user to run setup again. gherrit-pr-id: G4yr5ojlipqf3iwpr5ddns5cvkfwpeeeb
1 parent 3b5a819 commit 234131c

2 files changed

Lines changed: 121 additions & 22 deletions

File tree

anneal/src/aeneas.rs

Lines changed: 76 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,25 @@ use std::{
2020

2121
use anyhow::{Context, Result, bail};
2222
use indicatif::{ProgressBar, ProgressStyle};
23+
use serde::Deserialize;
2324

2425
use crate::{generate, resolve::LockedRoots, scanner::AnnealArtifact, setup::Tool};
2526

2627
const ANNEAL_PRELUDE: &str = include_str!("Anneal.lean");
2728

29+
/// Represents the structure of Lake's `lake-manifest.json` file.
30+
/// We use this to discover transitive dependencies that need to be pinned.
31+
#[derive(Deserialize)]
32+
struct LakeManifest {
33+
packages: Vec<LakePackage>,
34+
}
35+
36+
/// A single package dependency in `lake-manifest.json`.
37+
#[derive(Deserialize)]
38+
struct LakePackage {
39+
name: String,
40+
}
41+
2842
/// Orchestrates the Aeneas translation and Lean verification process.
2943
///
3044
/// This function is the main entry point for the "backend" phase of Anneal.
@@ -269,19 +283,71 @@ pub fn run_aeneas(
269283
//
270284
// If `ANNEAL_AENEAS_DIR` is set (e.g., in CI or local development via Docker),
271285
// we use it. Otherwise, we default to the managed toolchain directory.
272-
let aeneas_dep = if let Ok(path) = std::env::var("ANNEAL_AENEAS_DIR") {
273-
// Use a path dependency to avoid git cloning.
274-
//
275-
// Note: We append the revision as a comment (`-- <rev>`) so that we can
276-
// verify in integration tests that the binary was built with the
277-
// correct revision, even when using a local override.
278-
format!(r#"require aeneas from "{path}/backends/lean" -- {}"#, env!("ANNEAL_AENEAS_REV"))
286+
let (aeneas_root, aeneas_dep) = if let Ok(path) = std::env::var("ANNEAL_AENEAS_DIR") {
287+
let root = std::path::PathBuf::from(path);
288+
let dep = format!(
289+
r#"require aeneas from "{}/backends/lean" -- {}"#,
290+
root.display(),
291+
env!("ANNEAL_AENEAS_REV")
292+
);
293+
(root, dep)
279294
} else {
280295
let toolchain = crate::setup::Toolchain::resolve()?;
281-
let path = toolchain.root.display();
282-
format!(r#"require aeneas from "{path}/backends/lean" -- {}"#, env!("ANNEAL_AENEAS_REV"))
296+
let root = toolchain.root;
297+
let dep = format!(
298+
r#"require aeneas from "{}/backends/lean" -- {}"#,
299+
root.display(),
300+
env!("ANNEAL_AENEAS_REV")
301+
);
302+
(root, dep)
283303
};
284304

305+
// To prevent Lake from re-downloading dependencies (like Mathlib) and
306+
// rebuilding Aeneas in the generated workspace, we explicitly pin all
307+
// dependencies to the paths where they were already built in the toolchain
308+
// directory. Lake records absolute paths in its trace files, so if we
309+
// don't do this, it sees path mismatches and triggers rebuilds.
310+
let manifest_path = aeneas_root.join("backends").join("lean").join("lake-manifest.json");
311+
let mut extra_requires = String::new();
312+
313+
if manifest_path.exists() {
314+
let manifest_content = std::fs::read_to_string(&manifest_path)
315+
.context("Failed to read lake-manifest.json from Aeneas directory")?;
316+
let manifest: LakeManifest = serde_json::from_str(&manifest_content)
317+
.context("Failed to parse lake-manifest.json from Aeneas directory")?;
318+
319+
for package in manifest.packages {
320+
let pkg_dir = aeneas_root
321+
.join("backends")
322+
.join("lean")
323+
.join(".lake")
324+
.join("packages")
325+
.join(&package.name);
326+
327+
if pkg_dir.exists() {
328+
use std::fmt::Write;
329+
let _ = writeln!(
330+
extra_requires,
331+
r#"require {} from "{}" -- pinned by cargo-anneal"#,
332+
package.name,
333+
pkg_dir.display()
334+
);
335+
} else {
336+
bail!(
337+
"Package directory for '{}' is missing in '{}'. Please ensure it is built.",
338+
package.name,
339+
pkg_dir.display()
340+
);
341+
}
342+
}
343+
} else {
344+
bail!(
345+
"lake-manifest.json is missing from Aeneas directory at '{}'.\n\
346+
Please ensure it is built or run `cargo run setup` again.",
347+
manifest_path.display()
348+
);
349+
}
350+
285351
let roots_str = lake_roots.iter().map(|r| format!("`{}", r)).collect::<Vec<_>>().join(", ");
286352

287353
let lakefile = format!(
@@ -290,7 +356,7 @@ import Lake
290356
open Lake DSL
291357
292358
{aeneas_dep}
293-
359+
{extra_requires}
294360
package anneal_verification
295361
296362
@[default_target]

anneal/tests/integration.rs

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,6 @@ echo "---END-INVOCATION---" >> "{}"
663663
}
664664

665665
let toolchain_path = get_toolchain_path();
666-
let lean_backend_dir = toolchain_path.join("backends/lean");
667666

668667
// Resolve Mocks
669668

@@ -762,9 +761,18 @@ echo "---END-INVOCATION---" >> "{}"
762761
cmd.env("ANNEAL_FORCE_TTY", "1");
763762
cmd.env("FORCE_COLOR", "1");
764763
let isolated_aeneas_dir = self.sandbox_root.join("aeneas_cache");
765-
fs::create_dir_all(&isolated_aeneas_dir).unwrap();
766-
cmd.env("ANNEAL_AENEAS_DIR", isolated_aeneas_dir);
767-
cmd.env("ANNEAL_INTEGRATION_TEST_LEAN_CACHE_DIR", &lean_backend_dir);
764+
let isolated_lean_dir = isolated_aeneas_dir.join("backends/lean");
765+
766+
if !isolated_lean_dir.exists() {
767+
fs::create_dir_all(isolated_lean_dir.parent().unwrap()).unwrap();
768+
769+
// Populate the isolated Aeneas directory from the worker cache
770+
// to allow dependency pinning to work without test-specific code in src.
771+
smart_clone_cache(&self.worker_cache.aeneas.join("backends/lean"), &isolated_lean_dir)
772+
.expect("Failed to populate isolated Aeneas directory");
773+
}
774+
775+
cmd.env("ANNEAL_AENEAS_DIR", &isolated_aeneas_dir);
768776
cmd.env("ANNEAL_USE_PATH_FOR_TOOLS", "1");
769777
cmd.env("RAYON_NUM_THREADS", "1");
770778

@@ -821,7 +829,7 @@ echo "---END-INVOCATION---" >> "{}"
821829
/// - Hardlinks heavy immutable binaries (.olean, .c) to save disk space and
822830
/// time. Because these were previously marked as read-only, any attempts at
823831
/// mutation will fail loudly rather than result in race conditions.
824-
fn smart_clone_cache(source: &Path, target: &Path) -> io::Result<()> {
832+
fn smart_clone_cache(source: &Path, target: &Path) -> anyhow::Result<()> {
825833
let walker = new_sorted_walkdir(source).into_iter().filter_entry(|e| {
826834
// Instantly bypass traversing inside ANY `.git/objects` directory!
827835
// This prevents ~230,000 file copies per Mathlib clone.
@@ -835,19 +843,29 @@ fn smart_clone_cache(source: &Path, target: &Path) -> io::Result<()> {
835843
});
836844

837845
for entry in walker {
838-
let entry = entry.map_err(io::Error::other)?;
846+
let entry = entry.map_err(|e| anyhow::anyhow!("Failed to walk directory: {}", e))?;
839847
let source_path = entry.path();
840848
let relative_path = source_path.strip_prefix(source).unwrap();
841849
let target_path = target.join(relative_path);
842850

843851
if entry.file_type().is_dir() {
844-
fs::create_dir_all(&target_path)?;
852+
fs::create_dir_all(&target_path)
853+
.map_err(|e| anyhow::anyhow!("Failed to create dir {:?}: {}", target_path, e))?;
845854

846855
// If this is a .git directory, manually symlink its objects folder
847856
if source_path.file_name().and_then(|s| s.to_str()) == Some(".git") {
848857
let src_objects = source_path.join("objects");
849858
if src_objects.exists() {
850-
let _ = std::os::unix::fs::symlink(&src_objects, target_path.join("objects"));
859+
std::os::unix::fs::symlink(&src_objects, target_path.join("objects")).map_err(
860+
|e| {
861+
anyhow::anyhow!(
862+
"Failed to symlink .git/objects from {:?} to {:?}: {}",
863+
src_objects,
864+
target_path.join("objects"),
865+
e
866+
)
867+
},
868+
)?;
851869
}
852870
}
853871
} else if entry.file_type().is_file() {
@@ -863,11 +881,19 @@ fn smart_clone_cache(source: &Path, target: &Path) -> io::Result<()> {
863881
|| file_name == "lake.lock";
864882

865883
if is_mutable_metadata {
866-
fs::copy(source_path, &target_path)?;
867-
let mut perms = fs::metadata(&target_path)?.permissions();
884+
fs::copy(source_path, &target_path).map_err(|e| {
885+
anyhow::anyhow!("Failed to copy {:?} to {:?}: {}", source_path, target_path, e)
886+
})?;
887+
let mut perms = fs::metadata(&target_path)
888+
.map_err(|e| {
889+
anyhow::anyhow!("Failed to get metadata of {:?}: {}", target_path, e)
890+
})?
891+
.permissions();
868892
#[allow(clippy::permissions_set_readonly_false)]
869893
perms.set_readonly(false);
870-
fs::set_permissions(&target_path, perms)?;
894+
fs::set_permissions(&target_path, perms).map_err(|e| {
895+
anyhow::anyhow!("Failed to set permissions on {:?}: {}", target_path, e)
896+
})?;
871897
} else {
872898
// NOTE: It is crucial that we *don't* provide a deep-copy
873899
// fallback path here. The symlinked files consume a huge amount
@@ -879,7 +905,14 @@ fn smart_clone_cache(source: &Path, target: &Path) -> io::Result<()> {
879905
// that these errors are surfaced (rather than being worked
880906
// around – e.g. via deep copying) so that we know to fix the
881907
// bugs.
882-
std::os::unix::fs::symlink(source_path, &target_path)?;
908+
std::os::unix::fs::symlink(source_path, &target_path).map_err(|e| {
909+
anyhow::anyhow!(
910+
"Failed to symlink {:?} to {:?}: {}",
911+
source_path,
912+
target_path,
913+
e
914+
)
915+
})?;
883916
}
884917
}
885918
}

0 commit comments

Comments
 (0)