Skip to content

Commit

Permalink
llama : refactor wavtokenizer tensors
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
ggerganov committed Dec 16, 2024
1 parent b6c9e70 commit 91a3530
Show file tree
Hide file tree
Showing 8 changed files with 394 additions and 509 deletions.
2 changes: 1 addition & 1 deletion common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -842,7 +842,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
}
).set_sparam());
add_opt(common_arg(
{"--sampling-seq"}, "SEQUENCE",
{"--sampling-seq", "--sampler-seq"}, "SEQUENCE",
string_format("simplified sequence for samplers that will be used (default: %s)", sampler_type_chars.c_str()),
[](common_params & params, const std::string & value) {
params.sampling.samplers = common_sampler_types_from_chars(value);
Expand Down
12 changes: 8 additions & 4 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,8 +326,8 @@ def prepare_tensors(self):
gguf.MODEL_TENSOR.TIME_MIX_W2,
gguf.MODEL_TENSOR.TIME_MIX_DECAY_W1,
gguf.MODEL_TENSOR.TIME_MIX_DECAY_W2,
gguf.MODEL_TENSOR.POS_NET_NORM1,
gguf.MODEL_TENSOR.POS_NET_NORM2,
gguf.MODEL_TENSOR.POSNET_NORM1,
gguf.MODEL_TENSOR.POSNET_NORM2,
)
)
or not new_name.endswith(".weight")
Expand Down Expand Up @@ -2059,12 +2059,16 @@ def set_gguf_parameters(self):
super().set_gguf_parameters()
self.gguf_writer.add_vocab_size (self.hparams["vocab_size"])
self.gguf_writer.add_features_length (self.hparams["n_embd_features"])
self.gguf_writer.add_posnet_length (self.hparams["n_embd_posnet"])
self.gguf_writer.add_convnext_length (self.hparams["n_embd_convnext"])
self.gguf_writer.add_feed_forward_length(self.hparams["n_ff"])
self.gguf_writer.add_group_norm_eps (self.hparams["group_norm_epsilon"])
self.gguf_writer.add_group_norm_groups (self.hparams["group_norm_groups"])

self.gguf_writer.add_posnet_embedding_length(self.hparams["posnet"]["n_embd"])
self.gguf_writer.add_posnet_block_count (self.hparams["posnet"]["n_layer"])

self.gguf_writer.add_convnext_embedding_length(self.hparams["convnext"]["n_embd"])
self.gguf_writer.add_convnext_block_count (self.hparams["convnext"]["n_layer"])


@Model.register("Qwen2MoeForCausalLM")
class Qwen2MoeModel(Model):
Expand Down
27 changes: 19 additions & 8 deletions examples/tts/convert_pt_to_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,13 @@ def flatten_state_dict(state_dict, parent_key='', sep='.'):
new_key = key

new_key = new_key.replace('state_dict.', '')
new_key = new_key.replace('pos_net', 'posnet')

# check if matches "backbone.pos_net.%d.bias" or "backbone.pos_net.%d.weight"
if new_key.startswith("backbone.pos_net."):
match = re.match(r"backbone\.pos_net\.(\d+)\.(bias|weight)", new_key)
# check if matches "backbone.posnet.%d.bias" or "backbone.posnet.%d.weight"
if new_key.startswith("backbone.posnet."):
match = re.match(r"backbone\.posnet\.(\d+)\.(bias|weight)", new_key)
if match:
new_key = f"backbone.pos_net.{match.group(1)}.norm.{match.group(2)}"
new_key = f"backbone.posnet.{match.group(1)}.norm.{match.group(2)}"

# "feature_extractor.encodec.quantizer.vq.layers.0._codebook.embed" -> "backbone.embedding.weight"
if new_key == "feature_extractor.encodec.quantizer.vq.layers.0._codebook.embed":
Expand All @@ -99,7 +100,7 @@ def flatten_state_dict(state_dict, parent_key='', sep='.'):
new_key = new_key.replace("gamma", "gamma.weight")

# convert from 1D [768] to 2D [768, 1] so that ggml_add can broadcast the bias
if (new_key.endswith("norm.weight") or new_key.endswith("norm1.weight") or new_key.endswith("norm2.weight") or new_key.endswith(".bias")) and (new_key.startswith("backbone.pos_net") or new_key.startswith("backbone.embed.bias")):
if (new_key.endswith("norm.weight") or new_key.endswith("norm1.weight") or new_key.endswith("norm2.weight") or new_key.endswith(".bias")) and (new_key.startswith("backbone.posnet") or new_key.startswith("backbone.embed.bias")):
value = value.unsqueeze(1)

if new_key.endswith("dwconv.bias"):
Expand Down Expand Up @@ -155,16 +156,26 @@ def flatten_state_dict(state_dict, parent_key='', sep='.'):
],
"hidden_size": 1282,
"n_embd_features": 512,
"n_embd_posnet": 768,
"n_embd_convnext": 768,
"n_ff": 2304,
"vocab_size": 4096,
"n_head": 1,
"layer_norm_epsilon": 1e-6,
"group_norm_epsilon": 1e-6,
"group_norm_groups": 32,
"max_position_embeddings": 8192, # ?
"num_hidden_layers": 12
"n_layer": 12,
"posnet": {
"n_embd": 768,
"n_layer": 6
},
"convnext": {
"n_embd": 768,
"n_layer": 12
},
#"n_embd_posnet": 768,
#"n_embd_convnext": 768,
#"n_layer_posnet": 6,
#"n_layer_convnext": 12
}

with open(path_dst + '/config.json', 'w') as f:
Expand Down
4 changes: 4 additions & 0 deletions examples/tts/tts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,10 @@ int main(int argc, char ** argv) {
smpl[i] = common_sampler_init(model_ttc, params.sampling);
}

LOG_INF("sampler seed: %u\n", common_sampler_get_seed(smpl[0]));
LOG_INF("sampler params: \n%s\n", params.sampling.print().c_str());
LOG_INF("sampler chain: %s\n", common_sampler_print(smpl[0]).c_str());

LOG_INF("%s: loading done\n", __func__);

const auto t_main_start = ggml_time_us();
Expand Down
100 changes: 53 additions & 47 deletions gguf-py/gguf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,6 @@ class LLM:
CONTEXT_LENGTH = "{arch}.context_length"
EMBEDDING_LENGTH = "{arch}.embedding_length"
FEATURES_LENGTH = "{arch}.features_length"
POSNET_LENGTH = "{arch}.posnet_length"
CONVNEXT_LENGTH = "{arch}.convnext_length"
BLOCK_COUNT = "{arch}.block_count"
LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
Expand Down Expand Up @@ -160,6 +158,14 @@ class SSM:
class WKV:
HEAD_SIZE = "{arch}.wkv.head_size"

class PosNet:
EMBEDDING_LENGTH = "{arch}.posnet.embedding_length"
BLOCK_COUNT = "{arch}.posnet.block_count"

class ConvNext:
EMBEDDING_LENGTH = "{arch}.convnext.embedding_length"
BLOCK_COUNT = "{arch}.convnext.block_count"

class Tokenizer:
MODEL = "tokenizer.ggml.model"
PRE = "tokenizer.ggml.pre"
Expand Down Expand Up @@ -377,21 +383,21 @@ class MODEL_TENSOR(IntEnum):
CLS = auto() # classifier
CLS_OUT = auto() # classifier output projection
CONV1D = auto()
CONV_NEXT_DW = auto()
CONV_NEXT_NORM = auto()
CONV_NEXT_PW1 = auto()
CONV_NEXT_PW2 = auto()
CONV_NEXT_GAMMA = auto()
POS_NET_CONV1 = auto()
POS_NET_CONV2 = auto()
POS_NET_NORM = auto()
POS_NET_NORM1 = auto()
POS_NET_NORM2 = auto()
POS_NET_ATTN_NORM = auto()
POS_NET_ATTN_Q = auto()
POS_NET_ATTN_K = auto()
POS_NET_ATTN_V = auto()
POS_NET_ATTN_OUT = auto()
CONVNEXT_DW = auto()
CONVNEXT_NORM = auto()
CONVNEXT_PW1 = auto()
CONVNEXT_PW2 = auto()
CONVNEXT_GAMMA = auto()
POSNET_CONV1 = auto()
POSNET_CONV2 = auto()
POSNET_NORM = auto()
POSNET_NORM1 = auto()
POSNET_NORM2 = auto()
POSNET_ATTN_NORM = auto()
POSNET_ATTN_Q = auto()
POSNET_ATTN_K = auto()
POSNET_ATTN_V = auto()
POSNET_ATTN_OUT = auto()


MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
Expand Down Expand Up @@ -558,21 +564,21 @@ class MODEL_TENSOR(IntEnum):
MODEL_TENSOR.CLS: "cls",
MODEL_TENSOR.CLS_OUT: "cls.output",
MODEL_TENSOR.CONV1D: "conv1d",
MODEL_TENSOR.CONV_NEXT_DW: "conv_next.{bid}.dw",
MODEL_TENSOR.CONV_NEXT_NORM: "conv_next.{bid}.norm",
MODEL_TENSOR.CONV_NEXT_PW1: "conv_next.{bid}.pw1",
MODEL_TENSOR.CONV_NEXT_PW2: "conv_next.{bid}.pw2",
MODEL_TENSOR.CONV_NEXT_GAMMA: "conv_next.{bid}.gamma",
MODEL_TENSOR.POS_NET_CONV1: "pos_net.{bid}.conv1",
MODEL_TENSOR.POS_NET_CONV2: "pos_net.{bid}.conv2",
MODEL_TENSOR.POS_NET_NORM: "pos_net.{bid}.norm",
MODEL_TENSOR.POS_NET_NORM1: "pos_net.{bid}.norm1",
MODEL_TENSOR.POS_NET_NORM2: "pos_net.{bid}.norm2",
MODEL_TENSOR.POS_NET_ATTN_NORM: "pos_net.{bid}.attn_norm",
MODEL_TENSOR.POS_NET_ATTN_Q: "pos_net.{bid}.attn_q",
MODEL_TENSOR.POS_NET_ATTN_K: "pos_net.{bid}.attn_k",
MODEL_TENSOR.POS_NET_ATTN_V: "pos_net.{bid}.attn_v",
MODEL_TENSOR.POS_NET_ATTN_OUT: "pos_net.{bid}.attn_output",
MODEL_TENSOR.CONVNEXT_DW: "convnext.{bid}.dw",
MODEL_TENSOR.CONVNEXT_NORM: "convnext.{bid}.norm",
MODEL_TENSOR.CONVNEXT_PW1: "convnext.{bid}.pw1",
MODEL_TENSOR.CONVNEXT_PW2: "convnext.{bid}.pw2",
MODEL_TENSOR.CONVNEXT_GAMMA: "convnext.{bid}.gamma",
MODEL_TENSOR.POSNET_CONV1: "posnet.{bid}.conv1",
MODEL_TENSOR.POSNET_CONV2: "posnet.{bid}.conv2",
MODEL_TENSOR.POSNET_NORM: "posnet.{bid}.norm",
MODEL_TENSOR.POSNET_NORM1: "posnet.{bid}.norm1",
MODEL_TENSOR.POSNET_NORM2: "posnet.{bid}.norm2",
MODEL_TENSOR.POSNET_ATTN_NORM: "posnet.{bid}.attn_norm",
MODEL_TENSOR.POSNET_ATTN_Q: "posnet.{bid}.attn_q",
MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k",
MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v",
MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output",
}

MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
Expand Down Expand Up @@ -1415,23 +1421,23 @@ class MODEL_TENSOR(IntEnum):
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.TOKEN_EMBD_NORM,
MODEL_TENSOR.CONV1D,
MODEL_TENSOR.CONV_NEXT_DW,
MODEL_TENSOR.CONV_NEXT_NORM,
MODEL_TENSOR.CONV_NEXT_PW1,
MODEL_TENSOR.CONV_NEXT_PW2,
MODEL_TENSOR.CONV_NEXT_GAMMA,
MODEL_TENSOR.CONVNEXT_DW,
MODEL_TENSOR.CONVNEXT_NORM,
MODEL_TENSOR.CONVNEXT_PW1,
MODEL_TENSOR.CONVNEXT_PW2,
MODEL_TENSOR.CONVNEXT_GAMMA,
MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.POS_NET_CONV1,
MODEL_TENSOR.POS_NET_CONV2,
MODEL_TENSOR.POS_NET_NORM,
MODEL_TENSOR.POS_NET_NORM1,
MODEL_TENSOR.POS_NET_NORM2,
MODEL_TENSOR.POS_NET_ATTN_NORM,
MODEL_TENSOR.POS_NET_ATTN_Q,
MODEL_TENSOR.POS_NET_ATTN_K,
MODEL_TENSOR.POS_NET_ATTN_V,
MODEL_TENSOR.POS_NET_ATTN_OUT,
MODEL_TENSOR.POSNET_CONV1,
MODEL_TENSOR.POSNET_CONV2,
MODEL_TENSOR.POSNET_NORM,
MODEL_TENSOR.POSNET_NORM1,
MODEL_TENSOR.POSNET_NORM2,
MODEL_TENSOR.POSNET_ATTN_NORM,
MODEL_TENSOR.POSNET_ATTN_Q,
MODEL_TENSOR.POSNET_ATTN_K,
MODEL_TENSOR.POSNET_ATTN_V,
MODEL_TENSOR.POSNET_ATTN_OUT,
],
# TODO
}
Expand Down
14 changes: 10 additions & 4 deletions gguf-py/gguf/gguf_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,11 +634,17 @@ def add_embedding_length(self, length: int) -> None:
def add_features_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.FEATURES_LENGTH.format(arch=self.arch), length)

def add_posnet_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.POSNET_LENGTH.format(arch=self.arch), length)
def add_posnet_embedding_length(self, length: int) -> None:
self.add_uint32(Keys.PosNet.EMBEDDING_LENGTH.format(arch=self.arch), length)

def add_convnext_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.CONVNEXT_LENGTH.format(arch=self.arch), length)
def add_posnet_block_count(self, length: int) -> None:
self.add_uint32(Keys.PosNet.BLOCK_COUNT.format(arch=self.arch), length)

def add_convnext_embedding_length(self, length: int) -> None:
self.add_uint32(Keys.ConvNext.EMBEDDING_LENGTH.format(arch=self.arch), length)

def add_convnext_block_count(self, length: int) -> None:
self.add_uint32(Keys.ConvNext.BLOCK_COUNT.format(arch=self.arch), length)

def add_block_count(self, length: int) -> None:
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
Expand Down
50 changes: 25 additions & 25 deletions gguf-py/gguf/tensor_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -704,64 +704,64 @@ class TensorNameMap:
),
#############################################################################

MODEL_TENSOR.CONV_NEXT_DW: (
MODEL_TENSOR.CONVNEXT_DW: (
"backbone.convnext.{bid}.dwconv", # wavtokenizer
),

MODEL_TENSOR.CONV_NEXT_NORM: (
MODEL_TENSOR.CONVNEXT_NORM: (
"backbone.convnext.{bid}.norm", # wavtokenizer
),

MODEL_TENSOR.CONV_NEXT_PW1: (
MODEL_TENSOR.CONVNEXT_PW1: (
"backbone.convnext.{bid}.pwconv1", # wavtokenizer
),

MODEL_TENSOR.CONV_NEXT_PW2: (
MODEL_TENSOR.CONVNEXT_PW2: (
"backbone.convnext.{bid}.pwconv2", # wavtokenizer
),

MODEL_TENSOR.CONV_NEXT_GAMMA: (
MODEL_TENSOR.CONVNEXT_GAMMA: (
"backbone.convnext.{bid}.gamma", # wavtokenizer
),

MODEL_TENSOR.POS_NET_CONV1: (
"backbone.pos_net.{bid}.conv1", # wavtokenizer
MODEL_TENSOR.POSNET_CONV1: (
"backbone.posnet.{bid}.conv1", # wavtokenizer
),

MODEL_TENSOR.POS_NET_CONV2: (
"backbone.pos_net.{bid}.conv2", # wavtokenizer
MODEL_TENSOR.POSNET_CONV2: (
"backbone.posnet.{bid}.conv2", # wavtokenizer
),

MODEL_TENSOR.POS_NET_NORM: (
"backbone.pos_net.{bid}.norm", # wavtokenizer
MODEL_TENSOR.POSNET_NORM: (
"backbone.posnet.{bid}.norm", # wavtokenizer
),

MODEL_TENSOR.POS_NET_NORM1: (
"backbone.pos_net.{bid}.norm1", # wavtokenizer
MODEL_TENSOR.POSNET_NORM1: (
"backbone.posnet.{bid}.norm1", # wavtokenizer
),

MODEL_TENSOR.POS_NET_NORM2: (
"backbone.pos_net.{bid}.norm2", # wavtokenizer
MODEL_TENSOR.POSNET_NORM2: (
"backbone.posnet.{bid}.norm2", # wavtokenizer
),

MODEL_TENSOR.POS_NET_ATTN_NORM: (
"backbone.pos_net.{bid}.norm", # wavtokenizer
MODEL_TENSOR.POSNET_ATTN_NORM: (
"backbone.posnet.{bid}.norm", # wavtokenizer
),

MODEL_TENSOR.POS_NET_ATTN_Q: (
"backbone.pos_net.{bid}.q", # wavtokenizer
MODEL_TENSOR.POSNET_ATTN_Q: (
"backbone.posnet.{bid}.q", # wavtokenizer
),

MODEL_TENSOR.POS_NET_ATTN_K: (
"backbone.pos_net.{bid}.k", # wavtokenizer
MODEL_TENSOR.POSNET_ATTN_K: (
"backbone.posnet.{bid}.k", # wavtokenizer
),

MODEL_TENSOR.POS_NET_ATTN_V: (
"backbone.pos_net.{bid}.v", # wavtokenizer
MODEL_TENSOR.POSNET_ATTN_V: (
"backbone.posnet.{bid}.v", # wavtokenizer
),

MODEL_TENSOR.POS_NET_ATTN_OUT: (
"backbone.pos_net.{bid}.proj_out", # wavtokenizer
MODEL_TENSOR.POSNET_ATTN_OUT: (
"backbone.posnet.{bid}.proj_out", # wavtokenizer
),
}

Expand Down
Loading

0 comments on commit 91a3530

Please sign in to comment.