Skip to content

Commit

Permalink
Fix incorrect call to layer.forward()
Browse files Browse the repository at this point in the history
  • Loading branch information
ZX-ModelCloud committed Jan 7, 2025
1 parent 0f0048b commit 7ee016a
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions gptqmodel/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,7 +625,7 @@ def store_lm_head_input_hook(_, args, kwargs):
sym=sym,
mse=mse,
)

print("gptq[name]",gptq[name])
for name in skipped_modules:
subset.pop(name)

Expand Down Expand Up @@ -670,11 +670,11 @@ def tmp(_, inp: Tuple[torch.Tensor, ...], out: torch.Tensor):
if layer.reuse_kv:
additional_layer_inputs["kv_last_layer"] = shared_kv_cache_dict.get(i - 1)

layer_output = layer(*layer_input) if self.quantize_config.lm_head else layer(*layer_input, **additional_layer_inputs)
layer_output = layer(*layer_input) if is_lm_head else layer(*layer_input, **additional_layer_inputs)
if shared_kv_cache_dict.get(i) is None:
shared_kv_cache_dict[i] = layer_output[-1]
else:
layer(*layer_input) if self.quantize_config.lm_head else layer(*layer_input, **additional_layer_inputs)
layer(*layer_input) if is_lm_head else layer(*layer_input, **additional_layer_inputs)

del layer_input
del additional_layer_inputs
Expand Down Expand Up @@ -731,7 +731,7 @@ def tmp(_, inp: Tuple[torch.Tensor, ...], out: torch.Tensor):
self.quant_log.append(stat)
logger.info(stat)

quantizers[f"{self.layers_node}.{i}.{name}"] = (
quantizers[self.lm_head if is_lm_head else f"{self.layers_node}.{i}.{name}"] = (
gptq[name].quantizer.to(CPU),
move_to(scale, CPU),
move_to(zero, CPU),
Expand Down Expand Up @@ -760,7 +760,7 @@ def tmp(_, inp: Tuple[torch.Tensor, ...], out: torch.Tensor):

with torch.no_grad():
layer_output = move_to(
layer(*layer_input)[0] if self.quantize_config.lm_head else layer(*layer_input, **additional_layer_inputs)[0],
layer(*layer_input)[0] if is_lm_head else layer(*layer_input, **additional_layer_inputs)[0],
cur_layer_device if calibration_enable_gpu_cache else CPU,
)
layer_outputs.append([layer_output])
Expand Down

0 comments on commit 7ee016a

Please sign in to comment.