From a4c94821cb30ddfb2adea11591059ad2b511e02f Mon Sep 17 00:00:00 2001 From: jayn7 Date: Sat, 10 Jan 2026 03:20:31 +0700 Subject: [PATCH 1/5] Add Gemma3 12B Support --- loader.py | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 3 deletions(-) diff --git a/loader.py b/loader.py index 37ef133..e73b849 100644 --- a/loader.py +++ b/loader.py @@ -10,7 +10,7 @@ from .dequant import is_quantized, dequantize_tensor IMG_ARCH_LIST = {"flux", "sd1", "sdxl", "sd3", "aura", "hidream", "cosmos", "ltxv", "hyvid", "wan", "lumina2", "qwen_image"} -TXT_ARCH_LIST = {"t5", "t5encoder", "llama", "qwen2vl", "qwen3", "qwen3vl"} +TXT_ARCH_LIST = {"t5", "t5encoder", "llama", "qwen2vl", "qwen3", "qwen3vl", "gemma3"} VIS_TYPE_LIST = {"clip-vision", "mmproj"} def get_orig_shape(reader, tensor_name): @@ -199,6 +199,13 @@ def gguf_sd_loader(path, handle_prefix="model.diffusion_model.", is_text_model=F "output.weight": "lm_head.weight", } +GEMMA3_SD_MAP = LLAMA_SD_MAP.copy() +GEMMA3_SD_MAP.update({ + "ffn_pre_norm": "pre_feedforward_layernorm", + "post_ffw_norm": "post_feedforward_layernorm", + "post_attention_norm": "post_attention_layernorm", +}) + CLIP_VISION_SD_MAP = { "mm.": "visual.merger.mlp.", "v.post_ln.": "visual.merger.ln_q.", @@ -220,6 +227,61 @@ def sd_map_replace(raw_sd, key_map): sd[k] = v return sd +##GEMMA3 +def fix_gemma3_llama_cpp_keys(sd): + for k in list(sd.keys()): + if k.endswith(".ffn_norm.weight"): + new_k = k.replace(".ffn_norm.weight", ".ffn_pre_norm.weight") + sd[new_k] = sd.pop(k) + return sd + +def gemma3_norm_corrections(sd): + norm_patterns = [ + "input_layernorm.weight", + "post_attention_layernorm.weight", + "pre_feedforward_layernorm.weight", + "post_feedforward_layernorm.weight", + "self_attn.q_norm.weight", + "self_attn.k_norm.weight", + "model.norm.weight" + ] + corrected = 0 + for key in list(sd.keys()): + if any(p in key for p in norm_patterns): + if is_quantized(sd[key]): + sd[key] = dequantize_tensor(sd[key], dtype=torch.float32) - 1.0 + else: + sd[key] = sd[key].float() - 1.0 + corrected += 1 + #logging.info(f"Gemma3: Applied -1 norm correction to {corrected} tensors") + return sd + +def load_gemma3_tokenizer(path, base_dir): + # Using gemma3 tokenizer.model + #https://huggingface.co/google/gemma-3-12b-it/resolve/main/tokenizer.model + base_dir = os.path.dirname(path) + + tokenizer_search_paths = [ + os.path.join(base_dir, "tokenizer.model"), + os.path.join(base_dir, "gemma3-tokenizer.model"), + ] + for tok_path in tokenizer_search_paths: + if os.path.exists(tok_path): + try: + with open(tok_path, "rb") as f: + tokenizer_bytes = f.read() + logging.info(f"Loaded Gemma3 tokenizer from: {tok_path} ({len(tokenizer_bytes)} bytes)") + return torch.frombuffer(bytearray(tokenizer_bytes), dtype=torch.uint8) + except Exception as e: + logging.warning(f"Failed to load tokenizer from {tok_path}: {e}") + + error_msg = ( + f"Gemma3 tokenizer not found for: {os.path.basename(path)}\n" + f"Place 'tokenizer.model' in: {base_dir}" + ) + logging.error(f"{error_msg}") + raise FileNotFoundError(error_msg) + def llama_permute(raw_sd, n_head, n_head_kv): # Reverse version of LlamaModel.permute in llama.cpp convert script sd = {} @@ -408,17 +470,24 @@ def gguf_clip_loader(path): logging.warning(f"Dequantizing {temb_key} to prevent runtime OOM.") sd[temb_key] = dequantize_tensor(sd[temb_key], dtype=torch.float16) sd = sd_map_replace(sd, T5_SD_MAP) - elif arch in {"llama", "qwen2vl", "qwen3", "qwen3vl"}: + elif arch in {"llama", "qwen2vl", "qwen3", "qwen3vl", "gemma3"}: # TODO: pass model_options["vocab_size"] to loader somehow temb_key = "token_embd.weight" if temb_key in sd and sd[temb_key].shape[0] >= (64 * 1024): if arch == "llama" and sd[temb_key].shape == (131072, 5120): # non-standard Comfy-Org tokenizer sd["tekken_model"] = gguf_tekken_tokenizer_loader(path, sd[temb_key].shape) + elif arch == "gemma3": + sd["spiece_model"] = load_gemma3_tokenizer(path, os.path.dirname(path)) # See note above for T5. logging.warning(f"Dequantizing {temb_key} to prevent runtime OOM.") sd[temb_key] = dequantize_tensor(sd[temb_key], dtype=torch.float16) - sd = sd_map_replace(sd, LLAMA_SD_MAP) + if arch == "gemma3": + sd = fix_gemma3_llama_cpp_keys(sd) + sd = sd_map_replace(sd, GEMMA3_SD_MAP) + sd = gemma3_norm_corrections(sd) + else: + sd = sd_map_replace(sd, LLAMA_SD_MAP) if arch == "llama": sd = llama_permute(sd, 32, 8) # L3 / Mistral if arch == "qwen2vl": From 5a537ab64e69655016f43fbb29e03bf9ad3c3123 Mon Sep 17 00:00:00 2001 From: hpr <64584739+jarz76@users.noreply.github.com> Date: Sat, 10 Jan 2026 12:50:56 +0700 Subject: [PATCH 2/5] Update loader.py --- loader.py | 1 - 1 file changed, 1 deletion(-) diff --git a/loader.py b/loader.py index e73b849..4bfea43 100644 --- a/loader.py +++ b/loader.py @@ -496,4 +496,3 @@ def gguf_clip_loader(path): else: pass return sd - From 36b16deb096596aa3df53920d87d7fd98b7a01ae Mon Sep 17 00:00:00 2001 From: jayn7 Date: Mon, 12 Jan 2026 02:37:57 +0700 Subject: [PATCH 3/5] Support tokenizer recreation from metadata --- loader.py | 65 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 25 deletions(-) diff --git a/loader.py b/loader.py index 4bfea43..a833f32 100644 --- a/loader.py +++ b/loader.py @@ -256,31 +256,46 @@ def gemma3_norm_corrections(sd): #logging.info(f"Gemma3: Applied -1 norm correction to {corrected} tensors") return sd -def load_gemma3_tokenizer(path, base_dir): - # Using gemma3 tokenizer.model - #https://huggingface.co/google/gemma-3-12b-it/resolve/main/tokenizer.model - base_dir = os.path.dirname(path) +def gguf_gemma3_tokenizer_loader(path): + logging.info("Attempting to recreate sentencepiece tokenizer from GGUF file metadata...") + try: + from sentencepiece import sentencepiece_model_pb2 as model + except ImportError: + raise ImportError("Please install sentencepiece and protobuf.\npip install sentencepiece protobuf") + spm = model.ModelProto() + reader = gguf.GGUFReader(path) + + spm.normalizer_spec.name = "identity" + spm.normalizer_spec.add_dummy_prefix = False + spm.trainer_spec.model_type = 2 + spm.trainer_spec.input_format = "tsv" + spm.trainer_spec.byte_fallback = True + spm.trainer_spec.max_sentence_length = 4192 + spm.trainer_spec.bos_piece = "" + + tokens = get_list_field(reader, "tokenizer.ggml.tokens", str) + scores = get_list_field(reader, "tokenizer.ggml.scores", float) + toktype = get_list_field(reader, "tokenizer.ggml.token_type", int) - tokenizer_search_paths = [ - os.path.join(base_dir, "tokenizer.model"), - os.path.join(base_dir, "gemma3-tokenizer.model"), - ] - for tok_path in tokenizer_search_paths: - if os.path.exists(tok_path): - try: - with open(tok_path, "rb") as f: - tokenizer_bytes = f.read() - logging.info(f"Loaded Gemma3 tokenizer from: {tok_path} ({len(tokenizer_bytes)} bytes)") - return torch.frombuffer(bytearray(tokenizer_bytes), dtype=torch.uint8) - except Exception as e: - logging.warning(f"Failed to load tokenizer from {tok_path}: {e}") + if not tokens or not scores or not toktype: + raise ValueError("Missing tokenizer metadata") + + for idx in range(len(tokens)): + piece = spm.SentencePiece() + piece.piece = tokens[idx] + if idx == 3: # UNK position + piece.type = 2 # UNK Token + piece.score = 0.0 # UNK Score + else: + piece.type = toktype[idx] + piece.score = scores[idx] + spm.pieces.append(piece) + + spm.trainer_spec.vocab_size = len(spm.pieces) + logging.info(f"Created tokenizer with vocab size of {len(spm.pieces)}") - error_msg = ( - f"Gemma3 tokenizer not found for: {os.path.basename(path)}\n" - f"Place 'tokenizer.model' in: {base_dir}" - ) - logging.error(f"{error_msg}") - raise FileNotFoundError(error_msg) + del reader + return torch.ByteTensor(list(spm.SerializeToString())) def llama_permute(raw_sd, n_head, n_head_kv): # Reverse version of LlamaModel.permute in llama.cpp convert script @@ -477,8 +492,8 @@ def gguf_clip_loader(path): if arch == "llama" and sd[temb_key].shape == (131072, 5120): # non-standard Comfy-Org tokenizer sd["tekken_model"] = gguf_tekken_tokenizer_loader(path, sd[temb_key].shape) - elif arch == "gemma3": - sd["spiece_model"] = load_gemma3_tokenizer(path, os.path.dirname(path)) + if arch == "gemma3": + sd["spiece_model"] = gguf_gemma3_tokenizer_loader(path) # See note above for T5. logging.warning(f"Dequantizing {temb_key} to prevent runtime OOM.") sd[temb_key] = dequantize_tensor(sd[temb_key], dtype=torch.float16) From 243a525df95b47baf7848626094172d70a8af42c Mon Sep 17 00:00:00 2001 From: hpr <64584739+jarz76@users.noreply.github.com> Date: Mon, 12 Jan 2026 02:40:44 +0700 Subject: [PATCH 4/5] Update loader.py --- loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loader.py b/loader.py index a833f32..742e883 100644 --- a/loader.py +++ b/loader.py @@ -492,7 +492,7 @@ def gguf_clip_loader(path): if arch == "llama" and sd[temb_key].shape == (131072, 5120): # non-standard Comfy-Org tokenizer sd["tekken_model"] = gguf_tekken_tokenizer_loader(path, sd[temb_key].shape) - if arch == "gemma3": + elif arch == "gemma3": sd["spiece_model"] = gguf_gemma3_tokenizer_loader(path) # See note above for T5. logging.warning(f"Dequantizing {temb_key} to prevent runtime OOM.") From 2e7f529b29da88fab741f8b8261be010b88bcbda Mon Sep 17 00:00:00 2001 From: jayn7 Date: Mon, 12 Jan 2026 07:09:08 +0700 Subject: [PATCH 5/5] update loader.py --- loader.py | 115 +++++++++++++++++++++++++----------------------------- 1 file changed, 54 insertions(+), 61 deletions(-) diff --git a/loader.py b/loader.py index 742e883..7cefb11 100644 --- a/loader.py +++ b/loader.py @@ -201,7 +201,7 @@ def gguf_sd_loader(path, handle_prefix="model.diffusion_model.", is_text_model=F GEMMA3_SD_MAP = LLAMA_SD_MAP.copy() GEMMA3_SD_MAP.update({ - "ffn_pre_norm": "pre_feedforward_layernorm", + "ffn_norm": "pre_feedforward_layernorm", "post_ffw_norm": "post_feedforward_layernorm", "post_attention_norm": "post_attention_layernorm", }) @@ -227,15 +227,20 @@ def sd_map_replace(raw_sd, key_map): sd[k] = v return sd -##GEMMA3 -def fix_gemma3_llama_cpp_keys(sd): - for k in list(sd.keys()): - if k.endswith(".ffn_norm.weight"): - new_k = k.replace(".ffn_norm.weight", ".ffn_pre_norm.weight") - sd[new_k] = sd.pop(k) +def llama_permute(raw_sd, n_head, n_head_kv): + # Reverse version of LlamaModel.permute in llama.cpp convert script + sd = {} + permute = lambda x,h: x.reshape(h, x.shape[0] // h // 2, 2, *x.shape[1:]).swapaxes(1, 2).reshape(x.shape) + for k,v in raw_sd.items(): + if k.endswith(("q_proj.weight", "q_proj.bias")): + v.data = permute(v.data, n_head) + if k.endswith(("k_proj.weight", "k_proj.bias")): + v.data = permute(v.data, n_head_kv) + sd[k] = v return sd def gemma3_norm_corrections(sd): + # Reverse change from Gemma3Model modify_tensors in llama.cpp convert script norm_patterns = [ "input_layernorm.weight", "post_attention_layernorm.weight", @@ -256,59 +261,6 @@ def gemma3_norm_corrections(sd): #logging.info(f"Gemma3: Applied -1 norm correction to {corrected} tensors") return sd -def gguf_gemma3_tokenizer_loader(path): - logging.info("Attempting to recreate sentencepiece tokenizer from GGUF file metadata...") - try: - from sentencepiece import sentencepiece_model_pb2 as model - except ImportError: - raise ImportError("Please install sentencepiece and protobuf.\npip install sentencepiece protobuf") - spm = model.ModelProto() - reader = gguf.GGUFReader(path) - - spm.normalizer_spec.name = "identity" - spm.normalizer_spec.add_dummy_prefix = False - spm.trainer_spec.model_type = 2 - spm.trainer_spec.input_format = "tsv" - spm.trainer_spec.byte_fallback = True - spm.trainer_spec.max_sentence_length = 4192 - spm.trainer_spec.bos_piece = "" - - tokens = get_list_field(reader, "tokenizer.ggml.tokens", str) - scores = get_list_field(reader, "tokenizer.ggml.scores", float) - toktype = get_list_field(reader, "tokenizer.ggml.token_type", int) - - if not tokens or not scores or not toktype: - raise ValueError("Missing tokenizer metadata") - - for idx in range(len(tokens)): - piece = spm.SentencePiece() - piece.piece = tokens[idx] - if idx == 3: # UNK position - piece.type = 2 # UNK Token - piece.score = 0.0 # UNK Score - else: - piece.type = toktype[idx] - piece.score = scores[idx] - spm.pieces.append(piece) - - spm.trainer_spec.vocab_size = len(spm.pieces) - logging.info(f"Created tokenizer with vocab size of {len(spm.pieces)}") - - del reader - return torch.ByteTensor(list(spm.SerializeToString())) - -def llama_permute(raw_sd, n_head, n_head_kv): - # Reverse version of LlamaModel.permute in llama.cpp convert script - sd = {} - permute = lambda x,h: x.reshape(h, x.shape[0] // h // 2, 2, *x.shape[1:]).swapaxes(1, 2).reshape(x.shape) - for k,v in raw_sd.items(): - if k.endswith(("q_proj.weight", "q_proj.bias")): - v.data = permute(v.data, n_head) - if k.endswith(("k_proj.weight", "k_proj.bias")): - v.data = permute(v.data, n_head_kv) - sd[k] = v - return sd - def strip_quant_suffix(name): pattern = r"[-_]?(?:ud-)?i?q[0-9]_[a-z0-9_\-]{1,8}$" match = re.search(pattern, name, re.IGNORECASE) @@ -473,6 +425,48 @@ def gguf_tekken_tokenizer_loader(path, temb_shape): del reader return torch.ByteTensor(list(json.dumps(data).encode('utf-8'))) +def gguf_gemma3_tokenizer_loader(path): + #TODO: merge into gguf_tokenizer_loader + logging.info("Attempting to recreate sentencepiece tokenizer from GGUF file metadata...") + try: + from sentencepiece import sentencepiece_model_pb2 as model + except ImportError: + raise ImportError("Please install sentencepiece and protobuf.\npip install sentencepiece protobuf") + spm = model.ModelProto() + reader = gguf.GGUFReader(path) + + spm.normalizer_spec.name = "identity" + spm.normalizer_spec.add_dummy_prefix = False + spm.trainer_spec.model_type = 2 + spm.trainer_spec.input_format = "tsv" + spm.trainer_spec.byte_fallback = True + spm.trainer_spec.max_sentence_length = 4192 + spm.trainer_spec.bos_piece = "" + + tokens = get_list_field(reader, "tokenizer.ggml.tokens", str) + scores = get_list_field(reader, "tokenizer.ggml.scores", float) + toktype = get_list_field(reader, "tokenizer.ggml.token_type", int) + + if not tokens or not scores or not toktype: + raise ValueError("Missing tokenizer metadata") + + for idx in range(len(tokens)): + piece = spm.SentencePiece() + piece.piece = tokens[idx] + if idx == 3: # UNK position + piece.type = 2 # UNK Token + piece.score = 0.0 # UNK Score + else: + piece.type = toktype[idx] + piece.score = scores[idx] + spm.pieces.append(piece) + + spm.trainer_spec.vocab_size = len(spm.pieces) + logging.info(f"Created tokenizer with vocab size of {len(spm.pieces)}") + + del reader + return torch.ByteTensor(list(spm.SerializeToString())) + def gguf_clip_loader(path): sd, extra = gguf_sd_loader(path, is_text_model=True) arch = extra.get("arch_str", None) @@ -498,7 +492,6 @@ def gguf_clip_loader(path): logging.warning(f"Dequantizing {temb_key} to prevent runtime OOM.") sd[temb_key] = dequantize_tensor(sd[temb_key], dtype=torch.float16) if arch == "gemma3": - sd = fix_gemma3_llama_cpp_keys(sd) sd = sd_map_replace(sd, GEMMA3_SD_MAP) sd = gemma3_norm_corrections(sd) else: