Skip to content

Commit e37557a

Browse files
faroloneclaudep-e-wgemini-code-assist[bot]
authored andcommitted
feat: add Qwen3.5 MoE hybrid layer support (p-e-w#187)
* feat: add Qwen3.5 MoE hybrid layer support Qwen3.5 MoE uses GatedDeltaNet (linear attention) on some layers instead of standard self-attention, causing abliteration to fail because self_attn.o_proj doesn't exist on those layers. Changes: - Wrap self_attn.o_proj in suppress(Exception) and add linear_attn.out_proj as alternative attention out-projection for GatedDeltaNet layers - Scan all layers in get_abliterable_components() instead of only layer 0, since hybrid models have different components on different layers - Derive LoRA target_modules from actual named_modules() instead of splitting component keys, which fails when module names differ across layers (e.g. "o_proj" vs "out_proj") Tested with Qwen3.5-397B-A17B (7/100 refusals, KL 0.2676). Relates to p-e-w#43 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Apply suggestion from @gemini-code-assist[bot] Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Philipp Emanuel Weidmann <pew@worldwidemann.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent 1355da6 commit e37557a

1 file changed

Lines changed: 27 additions & 12 deletions

File tree

src/heretic/model.py

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -161,14 +161,19 @@ def _apply_lora(self):
161161
assert isinstance(self.model, PreTrainedModel)
162162

163163
# Always use LoRA adapters for abliteration (faster reload, no weight modification).
164-
# We use the leaf names (e.g. "o_proj") as target modules.
165-
# This may cause LoRA adapters to be attached to unrelated modules (e.g. "conv.o_proj"),
166-
# but this is harmless as we only abliterate the modules we target in `abliterate()`,
167-
# leaving the others at their default (identity) state.
168-
# NOTE: This will need to be updated when hybrid layer support (#43) is merged.
169-
target_modules = [
170-
comp.split(".")[-1] for comp in self.get_abliterable_components()
171-
]
164+
# Collect actual leaf module names from the model for LoRA targeting.
165+
# This is more robust than splitting component keys (e.g. "attn.o_proj" -> "o_proj")
166+
# because hybrid models like Qwen3.5 MoE have modules with different names
167+
# across layers (e.g. "o_proj" on attention layers, "out_proj" on linear attention layers).
168+
target_modules_set: set[str] = set()
169+
layers = self.get_layers()
170+
for layer_index, layer in enumerate(layers):
171+
module_id_to_leaf_name = {id(m): name.split(".")[-1] for name, m in layer.named_modules()}
172+
for modules_list in self.get_layer_modules(layer_index).values():
173+
for mod in modules_list:
174+
if id(mod) in module_id_to_leaf_name:
175+
target_modules_set.add(module_id_to_leaf_name[id(mod)])
176+
target_modules = list(target_modules_set)
172177

173178
if self.settings.row_normalization != RowNormalization.FULL:
174179
# Rank 1 is sufficient for directional ablation without renormalization.
@@ -340,9 +345,14 @@ def try_add(component: str, module: Any):
340345
f"Unexpected Tensor in {component} - expected nn.Module"
341346
)
342347

343-
# Exceptions aren't suppressed here, because there is currently
344-
# no alternative location for the attention out-projection.
345-
try_add("attn.o_proj", layer.self_attn.o_proj) # ty:ignore[possibly-missing-attribute]
348+
# Standard self-attention out-projection (most models).
349+
with suppress(Exception):
350+
try_add("attn.o_proj", layer.self_attn.o_proj) # ty:ignore[possibly-missing-attribute]
351+
352+
# Qwen3.5 MoE hybrid layers use GatedDeltaNet (linear attention) instead
353+
# of standard self-attention, so self_attn.o_proj doesn't exist on those layers.
354+
with suppress(Exception):
355+
try_add("attn.o_proj", layer.linear_attn.out_proj) # ty:ignore[possibly-missing-attribute]
346356

347357
# Most dense models.
348358
with suppress(Exception):
@@ -374,7 +384,12 @@ def try_add(component: str, module: Any):
374384
return modules
375385

376386
def get_abliterable_components(self) -> list[str]:
377-
return list(self.get_layer_modules(0).keys())
387+
# Scan all layers because hybrid models (e.g. Qwen3.5 MoE) have different
388+
# components on different layers (some have self_attn, others linear_attn).
389+
components: set[str] = set()
390+
for layer_index in range(len(self.get_layers())):
391+
components.update(self.get_layer_modules(layer_index).keys())
392+
return sorted(components)
378393

379394
def abliterate(
380395
self,

0 commit comments

Comments
 (0)