Skip to content

Commit

Permalink
支持bert做embedding
Browse files Browse the repository at this point in the history
  • Loading branch information
黄宇扬 committed Apr 25, 2024
1 parent a232bea commit 4736535
Show file tree
Hide file tree
Showing 10 changed files with 403 additions and 2 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
set(FASTLLM_CXX_SOURCES src/fastllm.cpp src/device.cpp src/model.cpp src/executor.cpp
src/devices/cpu/cpudevice.cpp src/devices/cpu/cpudevicebatch.cpp
src/models/chatglm.cpp src/models/moss.cpp src/models/llama.cpp src/models/qwen.cpp src/models/basellm.cpp
src/models/glm.cpp src/models/minicpm.cpp src/models/internlm2.cpp
src/models/glm.cpp src/models/minicpm.cpp src/models/internlm2.cpp src/models/bert.cpp
third_party/json11/json11.cpp)

include_directories(include)
Expand Down
12 changes: 12 additions & 0 deletions include/devices/cpu/cpudevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,14 @@ namespace fastllm {
void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams);
};

class CpuTanHOp : BaseOperator {
void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams);
};

class CpuGeluOp : BaseOperator {
void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams);
};

class CpuGeluNewOp : BaseOperator {
void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams);
};
Expand All @@ -114,6 +122,10 @@ namespace fastllm {
void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams);
};

class CpuAttentionExtendedMaskOp : BaseOperator {
void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams);
};

class CpuAlibiMaskOp : BaseOperator {
void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams);
};
Expand Down
9 changes: 8 additions & 1 deletion include/fastllm.h
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,8 @@ namespace fastllm {
BPE = 0,
NORMAL = 1,
QWEN = 2,
GLM = 3
GLM = 3,
BERT = 4
};

struct TrieNode {
Expand Down Expand Up @@ -505,6 +506,10 @@ namespace fastllm {

void Silu(const fastllm::Data &input, fastllm::Data &output);

void TanH(const Data &input, Data &output);

void Gelu(const Data &input, Data &output);

void GeluNew(const Data &input, Data &output);

void Swiglu(const fastllm::Data &input, fastllm::Data &output);
Expand All @@ -517,6 +522,8 @@ namespace fastllm {

void AttentionMask(Data &input, const Data &mask, float maskValue); // 把input里对应位置mask中为1的部分变成maskValue

void AttentionExtendedMask(Data &input, const Data &mask); // bert中的extended mask

void AlibiMask(Data &input, const Data &mask, float maskValue); // alibi mask

void Permute(const Data &input, const std::vector<int> &axis, Data &output); // 转置
Expand Down
3 changes: 3 additions & 0 deletions include/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@
#define FASTLLM_MODEL_H

#include "basellm.h"
#include "bert.h"

namespace fastllm {
std::unique_ptr<BertModel> CreateEmbeddingModelFromFile(const std::string &fileName);

std::unique_ptr<basellm> CreateLLMModelFromFile(const std::string &fileName);

std::unique_ptr<basellm> CreateEmptyLLMModel(const std::string &modelType);
Expand Down
53 changes: 53 additions & 0 deletions include/models/bert.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@

#ifndef FASTLLM_BERT_H
#define FASTLLM_BERT_H

#include "basellm.h"
#include "fastllm.h"

namespace fastllm {
class BertModel {
public:
BertModel() {};

~BertModel() {
this->weight.ReleaseWeight();
};

void InitParams(); // 初始化参数信息

// 推理
std::vector <std::vector <float> > Forward(
const Data &inputIds,
const Data &attentionMask,
const Data &tokenTypeIds,
const Data &positionIds);

std::vector <float> EmbeddingSentence(const std::string &context);

std::vector <std::vector <float> > EmbeddingSentenceBatch(const std::vector <std::string> &contexts);

void LoadFromFile(const std::string &fileName); // 从文件读取

void SaveLowBitModel(const std::string &fileName, int bit); // 存储成量化模型

void SaveModel(const std::string &fileName); // 直接导出

void WarmUp() {}; // 预热

std::string model_type;

float layer_norm_eps = 1e-12;

int embed_dim = 512;
int num_attention_heads = 64;
int head_dim = embed_dim / num_attention_heads;
int max_positions = 32768;
int block_cnt = 12;

WeightMap weight; // 权重
std::map <std::string, int> deviceMap;
};
}

#endif //FASTLLM_BERT_H
94 changes: 94 additions & 0 deletions src/devices/cpu/cpudevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,15 @@ namespace fastllm {
this->ops["MatMulTransB"] = (BaseOperator*)(new CpuMatMulTransBOp());
this->ops["SoftMax"] = (BaseOperator*)(new CpuSoftMaxOp());
this->ops["Silu"] = (BaseOperator*)(new CpuSiluOp());
this->ops["TanH"] = (BaseOperator*)(new CpuTanHOp());
this->ops["Gelu"] = (BaseOperator*)(new CpuGeluOp());
this->ops["GeluNew"] = (BaseOperator*)(new CpuGeluNewOp());
this->ops["Swiglu"] = (BaseOperator*)(new CpuSwigluOp());
this->ops["Mul"] = (BaseOperator*)(new CpuMulOp());
this->ops["MulTo"] = (BaseOperator*)(new CpuMulToOp());
this->ops["AddTo"] = (BaseOperator*)(new CpuAddToOp());
this->ops["AttentionMask"] = (BaseOperator*)(new CpuAttentionMaskOp());
this->ops["AttentionExtendedMask"] = (BaseOperator*)(new CpuAttentionExtendedMaskOp());
this->ops["AlibiMask"] = (BaseOperator*)(new CpuAlibiMaskOp());
this->ops["TopK"] = (BaseOperator*)(new CpuTopKOp());
this->ops["Permute"] = (BaseOperator*)(new CpuPermuteOp());
Expand Down Expand Up @@ -2505,6 +2508,74 @@ namespace fastllm {
}
}

void CpuTanHOp::Run(const std::string &opType, const fastllm::DataDict &datas,
const fastllm::FloatDict &floatParams, const fastllm::IntDict &intParams) {
Data &input = *(datas.find("input")->second);
Data &output = *(datas.find("output")->second);
output.Allocate();
AssertInFastLLM(input.dataType == DataType::FLOAT32, "GeluNew error: Data's type should be float32.\n");

float temp = sqrt(2.0f / M_PI), factor = 0.044715;
float *inputData = (float*)input.cpuData;
float *outputData = (float*)output.cpuData;
int len = input.Count(0);
int i = 0;
for (; i < len; i++) {
outputData[i] = tanhf(inputData[i]);
}
}

float erf(float a)
{
float r, s, t, u;

t = fabsf(a);
s = a * a;
if (t > 0.927734375f)
{ // 475/512
// maximum error 0.99527 ulp
r = fmaf(-1.72853470e-5f, t, 3.83197126e-4f); // -0x1.220000p-16,0x1.91cfb2p-12
u = fmaf(-3.88396438e-3f, t, 2.42546219e-2f); // -0x1.fd1438p-9, 0x1.8d6342p-6
r = fmaf(r, s, u);
r = fmaf(r, t, -1.06777877e-1f); // -0x1.b55cb8p-4
r = fmaf(r, t, -6.34846687e-1f); // -0x1.450aa0p-1
r = fmaf(r, t, -1.28717512e-1f); // -0x1.079d0cp-3
r = fmaf(r, t, -t);
r = 1.0f - expf(r);
r = copysignf(r, a);
}
else
{
// maximum error 0.98929 ulp
r = -5.96761703e-4f; // -0x1.38e000p-11
r = fmaf(r, s, 4.99119423e-3f); // 0x1.471a58p-8
r = fmaf(r, s, -2.67681349e-2f); // -0x1.b691b2p-6
r = fmaf(r, s, 1.12819925e-1f); // 0x1.ce1c44p-4
r = fmaf(r, s, -3.76125336e-1f); // -0x1.812700p-2
r = fmaf(r, s, 1.28379166e-1f); // 0x1.06eba8p-3
r = fmaf(r, a, a);
}
return r;
}

void CpuGeluOp::Run(const std::string &opType, const fastllm::DataDict &datas,
const fastllm::FloatDict &floatParams, const fastllm::IntDict &intParams) {
Data &input = *(datas.find("input")->second);
Data &output = *(datas.find("output")->second);
output.Allocate();
AssertInFastLLM(input.dataType == DataType::FLOAT32, "GeluNew error: Data's type should be float32.\n");

float temp = sqrt(2.0f / M_PI), factor = 0.044715;
float *inputData = (float*)input.cpuData;
float *outputData = (float*)output.cpuData;
int len = input.Count(0);
int i = 0;
for (; i < len; i++) {
float x = inputData[i];
outputData[i] = x * 0.5f * (1.0f + erf(x / sqrt(2.0)));
}
}

void CpuGeluNewOp::Run(const std::string &opType, const fastllm::DataDict &datas,
const fastllm::FloatDict &floatParams, const fastllm::IntDict &intParams) {
Data &input = *(datas.find("input")->second);
Expand Down Expand Up @@ -2769,6 +2840,29 @@ namespace fastllm {
}
}

void CpuAttentionExtendedMaskOp::Run(const std::string &opType, const fastllm::DataDict &datas,
const fastllm::FloatDict &floatParams, const fastllm::IntDict &intParams) {
Data &input = *(datas.find("input")->second);
Data &mask = *(datas.find("mask")->second);
int spatial = input.dims[3], n = input.dims[0], m = input.dims[1] * input.dims[2];

AssertInFastLLM(mask.dataType == DataType::FLOAT32, "AttentionExtendedMask: mask's datatype should be float32.");
if (input.dataType == DataType::FLOAT32) {
float *maskData = (float *) mask.cpuData;
float *attnData = (float *) input.cpuData;
for (int on = 0; on < n; on++) {
for (int om = 0; om < m; om++) {
int o = on * m + om;
for (int i = 0; i < spatial; i++) {
attnData[o * spatial + i] += maskData[on * spatial + i];
}
}
}
} else {
ErrorInFastLLM("AttentionExtendedMask error: unsupport input's dataType.\n");
}
}

void CpuAlibiMaskOp::Run(const std::string &opType, const fastllm::DataDict &datas,
const fastllm::FloatDict &floatParams, const fastllm::IntDict &intParams) {
Data &input = *(datas.find("input")->second);
Expand Down
49 changes: 49 additions & 0 deletions src/fastllm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1012,6 +1012,10 @@ namespace fastllm {
return s;
}

bool isDigitOrChar(char c) {
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}

Data Tokenizer::Encode(const std::string &ori) {
if (this->type == TokenizerType::BPE) {
std::string s = Normalize(ori);
Expand Down Expand Up @@ -1329,6 +1333,33 @@ namespace fastllm {
}
}

return Data (DataType::FLOAT32, {1, (int)v.size()}, v);
} else if (this->type == TokenizerType::BERT) {
std::vector <float> v;
for (int i = 0; i < ori.size(); i++) {
int tokenId = -999999, pos = i - 1;
TrieNode *now = this->root;

if (i > 0 && isDigitOrChar(ori[i - 1]) && isDigitOrChar(ori[i])) {
now = now->next['#']->next['#'];
}
for (int j = i; j < ori.size(); j++) {
if (now->next.find(ori[j]) != now->next.end()) {
now = now->next[ori[j]];
if (now->tokenId != -999999) {
tokenId = now->tokenId;
pos = j;
}
} else {
break;
}
}
if (pos >= i) {
i = pos;
v.push_back(tokenId);
}
}

return Data (DataType::FLOAT32, {1, (int)v.size()}, v);
} else {
std::vector <float> v;
Expand Down Expand Up @@ -2177,6 +2208,18 @@ namespace fastllm {
}, {}, {});
}

void TanH(const Data &input, Data &output) {
curExecutor->Run("TanH", {
{"input", (Data*)&input}, {"output", &output}
}, {}, {});
}

void Gelu(const fastllm::Data &input, fastllm::Data &output) {
curExecutor->Run("Gelu", {
{"input", (Data*)&input}, {"output", &output}
}, {}, {});
}

void GeluNew(const fastllm::Data &input, fastllm::Data &output) {
curExecutor->Run("GeluNew", {
{"input", (Data*)&input}, {"output", &output}
Expand Down Expand Up @@ -2213,6 +2256,12 @@ namespace fastllm {
}, {{"maskValue", maskValue}}, {});
}

void AttentionExtendedMask(Data &input, const Data &mask) {
curExecutor->Run("AttentionExtendedMask", {
{"input", &input}, {"mask", (Data*)&mask}
}, {}, {});
}

void AlibiMask(Data &input, const Data &mask, float maskValue) {
curExecutor->Run("AlibiMask", {
{"input", &input}, {"mask", (Data*)&mask}
Expand Down
11 changes: 11 additions & 0 deletions src/model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "glm.h"
#include "minicpm.h"
#include "internlm2.h"
#include "bert.h"

namespace fastllm {
void basellm::LoadFromFile(const std::string &fileName) {
Expand Down Expand Up @@ -118,12 +119,22 @@ namespace fastllm {
model->weight.tokenizer.type = Tokenizer::TokenizerType::QWEN;
} else if (modelType == "glm") {
model = (basellm*)(new GLMModel());
} else if (modelType == "bert") {
model = (basellm*)(new BertModel());
} else {
ErrorInFastLLM("Unkown model type: " + modelType);
}
return model;
}

std::unique_ptr<BertModel> CreateEmbeddingModelFromFile(const std::string &fileName) {
BertModel *model = new BertModel();
model->weight.tokenizer.type = Tokenizer::BERT;
model->LoadFromFile(fileName);
model->WarmUp();
return std::unique_ptr<fastllm::BertModel> (model);
}

std::unique_ptr<fastllm::basellm> CreateLLMModelFromFile(const std::string &fileName) {
std::string modelType = GetModelTypeFromFile(fileName);
basellm *model = CreateModelWithType(modelType);
Expand Down
Loading

0 comments on commit 4736535

Please sign in to comment.