@@ -474,6 +474,10 @@ def flux_to_diffusers(mmdit_config, output_prefix=""):
474474 "ff_context.net.0.proj.bias" : "txt_mlp.0.bias" ,
475475 "ff_context.net.2.weight" : "txt_mlp.2.weight" ,
476476 "ff_context.net.2.bias" : "txt_mlp.2.bias" ,
477+ "attn.norm_q.weight" : "img_attn.norm.query_norm.scale" ,
478+ "attn.norm_k.weight" : "img_attn.norm.key_norm.scale" ,
479+ "attn.norm_added_q.weight" : "txt_attn.norm.query_norm.scale" ,
480+ "attn.norm_added_k.weight" : "txt_attn.norm.key_norm.scale" ,
477481 }
478482
479483 for k in block_map :
@@ -496,6 +500,8 @@ def flux_to_diffusers(mmdit_config, output_prefix=""):
496500 "norm.linear.bias" : "modulation.lin.bias" ,
497501 "proj_out.weight" : "linear2.weight" ,
498502 "proj_out.bias" : "linear2.bias" ,
503+ "attn.norm_q.weight" : "norm.query_norm.scale" ,
504+ "attn.norm_k.weight" : "norm.key_norm.scale" ,
499505 }
500506
501507 for k in block_map :
@@ -514,18 +520,14 @@ def flux_to_diffusers(mmdit_config, output_prefix=""):
514520 ("txt_in.weight" , "context_embedder.weight" ),
515521 ("vector_in.in_layer.bias" , "time_text_embed.text_embedder.linear_1.bias" ),
516522 ("vector_in.in_layer.weight" , "time_text_embed.text_embedder.linear_1.weight" ),
517- ("vector_in.out_layer.bias" , "time_text_embed.timestep_embedder .linear_2.bias" ),
523+ ("vector_in.out_layer.bias" , "time_text_embed.text_embedder .linear_2.bias" ),
518524 ("vector_in.out_layer.weight" , "time_text_embed.text_embedder.linear_2.weight" ),
519525 ("guidance_in.in_layer.bias" , "time_text_embed.guidance_embedder.linear_1.bias" ),
520526 ("guidance_in.in_layer.weight" , "time_text_embed.guidance_embedder.linear_1.weight" ),
521- ("guidance_in.out_layer.bias" , "time_text_embed.guidance_embedder.linear_1 .bias" ),
527+ ("guidance_in.out_layer.bias" , "time_text_embed.guidance_embedder.linear_2 .bias" ),
522528 ("guidance_in.out_layer.weight" , "time_text_embed.guidance_embedder.linear_2.weight" ),
523529 ("final_layer.adaLN_modulation.1.bias" , "norm_out.linear.bias" , swap_scale_shift ),
524530 ("final_layer.adaLN_modulation.1.weight" , "norm_out.linear.weight" , swap_scale_shift ),
525-
526- # TODO: the values of these weights are different in Diffusers
527- ("guidance_in.out_layer.bias" , "time_text_embed.guidance_embedder.linear_2.bias" ),
528- ("vector_in.out_layer.bias" , "time_text_embed.text_embedder.linear_2.bias" ),
529531 }
530532
531533 for k in MAP_BASIC :
0 commit comments