Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: aiproxy modelinfo and dashboard and model rpm limit #5291

Open
wants to merge 99 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
99 commits
Select commit Hold shift + click to select a range
1cab226
feat: model info
zijiren233 Dec 14, 2024
8f794f8
fix: model config vision
zijiren233 Dec 15, 2024
d1798e0
feat: aiproxy dashboard api
zijiren233 Dec 16, 2024
173d8d7
fix: two week and pg hour format
zijiren233 Dec 16, 2024
df79d24
fix: model tag name
zijiren233 Dec 16, 2024
945cbd8
feat: model rpm limit
zijiren233 Dec 17, 2024
fc27042
fix: ci
zijiren233 Dec 17, 2024
78bdc4b
feat: search log with code type
zijiren233 Dec 17, 2024
ba231f4
feat: resp detail buf use pool
zijiren233 Dec 18, 2024
0a8e948
feat: no need init client, use ctx
zijiren233 Dec 18, 2024
57b2dac
fix: lint
zijiren233 Dec 18, 2024
f88908a
feat: admin api log filed
zijiren233 Dec 18, 2024
8e84a06
feat: log usage
zijiren233 Dec 18, 2024
2284935
feat: auto retry
zijiren233 Dec 19, 2024
dc7a4ca
fix: retry channel exhausted, use first channel
zijiren233 Dec 19, 2024
52939f0
feat: init monitor
zijiren233 Dec 19, 2024
587a369
feat: auto ban error rate and auto test unban
zijiren233 Dec 20, 2024
9d05406
fix: getChannelWithFallback
zijiren233 Dec 20, 2024
f2875bf
feat: support google thinking
zijiren233 Dec 22, 2024
6f80f1f
fix: monitor
zijiren233 Dec 22, 2024
cd8f23d
feat: get log detail
zijiren233 Dec 24, 2024
5d6df15
feat: no need channel config
zijiren233 Dec 24, 2024
ab94bb4
feat: key validate
zijiren233 Dec 24, 2024
1ee5d70
feat: add model error auto ban optioon
zijiren233 Dec 24, 2024
80b6924
feat: gemini tool
zijiren233 Dec 24, 2024
d549f9e
feat: gemini openai sdk
zijiren233 Dec 24, 2024
e89d300
fix: option keys
zijiren233 Dec 24, 2024
51e91df
feat: do not save access at
zijiren233 Dec 24, 2024
b7665a6
fix: del no use options
zijiren233 Dec 24, 2024
cc1cdc2
fix: del no use options
zijiren233 Dec 24, 2024
ba2a486
fix: auto test banned models need return when get from redis error ha…
zijiren233 Dec 24, 2024
64fd532
fix: remove channel db hook
zijiren233 Dec 24, 2024
f0d4475
chore: clean detail only after insert it
zijiren233 Dec 24, 2024
c0d20e7
fix: err print on debug
zijiren233 Dec 24, 2024
305174a
fix: cache update
zijiren233 Dec 24, 2024
0e3d3d3
feat: group consume level rpm ratio
zijiren233 Dec 25, 2024
f9e9150
fix: error return
zijiren233 Dec 25, 2024
c77aa38
feat: decode svg
zijiren233 Dec 25, 2024
589b10f
fix: check is image
zijiren233 Dec 25, 2024
3a84bd3
fix: reply raw 429 message
zijiren233 Dec 25, 2024
95d9f03
feat: req and resp body max size limit
zijiren233 Dec 25, 2024
6f12375
fix: _ import lint
zijiren233 Dec 25, 2024
98d83e0
fix: get token encoder log
zijiren233 Dec 25, 2024
3938847
fix: sum used amount
zijiren233 Dec 26, 2024
2131509
fix: delete no need cache
zijiren233 Dec 27, 2024
4b7d3ec
feat: dashboard rpm
zijiren233 Dec 27, 2024
bf0cb59
feat: dashboard tpm
zijiren233 Dec 27, 2024
0c7d28b
feat: step modelinfo
zijiren233 Dec 30, 2024
f60395c
feat: yi
zijiren233 Dec 30, 2024
53d59c6
fix: yi
zijiren233 Dec 30, 2024
3ce3c8a
feat: debug banned
zijiren233 Dec 31, 2024
4de28bd
chore: bump go mod
zijiren233 Dec 31, 2024
de12071
chore: bump go mod
zijiren233 Dec 31, 2024
d5dca05
fix: save model time parse
zijiren233 Dec 31, 2024
ff8a129
feat: fill dash carts gaps
zijiren233 Dec 31, 2024
a5f01bc
feat: fill dash carts gaps
zijiren233 Dec 31, 2024
9e38776
chore: go mod tidy
zijiren233 Dec 31, 2024
8bfbe9e
feat: dashboard timespan
zijiren233 Dec 31, 2024
3f13fbc
feat: dashboard timespan from query
zijiren233 Dec 31, 2024
f19d76e
feat: decouple request paths
zijiren233 Dec 31, 2024
abcb220
feat: group model tmp limit
zijiren233 Dec 31, 2024
3fe259a
feat: decoupling url paths
zijiren233 Jan 2, 2025
38f9f4f
fix: check balance
zijiren233 Jan 2, 2025
9d83c84
refactor: relay handler
zijiren233 Jan 2, 2025
915e35e
refactor: post relay
zijiren233 Jan 2, 2025
8c3012c
feat: fill gaps before and after point
zijiren233 Jan 2, 2025
e95221d
fix: qwen long tokens
zijiren233 Jan 2, 2025
0eda611
feat: get rpm from redis
zijiren233 Jan 2, 2025
5ad8364
fix: fill gaps
zijiren233 Jan 2, 2025
69352d1
fix: log error
zijiren233 Jan 2, 2025
8a087ee
fix: token not fount err log
zijiren233 Jan 2, 2025
da0bcdc
fix: if err resp is not json, replay raw content
zijiren233 Jan 3, 2025
ef39d8d
fix: do not save same response body and content
zijiren233 Jan 3, 2025
a3b8640
fix: save resp json or empty
zijiren233 Jan 3, 2025
4e7c364
feat: sort distinct values
zijiren233 Jan 3, 2025
695ffeb
fix: token models
zijiren233 Jan 4, 2025
7fe03e0
feat: redis clean expired cache
zijiren233 Jan 4, 2025
40a2365
feat: atomic model cache
zijiren233 Jan 4, 2025
cc30b33
feat: consume
zijiren233 Jan 4, 2025
9855428
feat: group custom model rpm tpm
zijiren233 Jan 4, 2025
9ce8d3f
fix: models
zijiren233 Jan 5, 2025
b92a620
fix: v1 route
zijiren233 Jan 5, 2025
c6fd01a
fix: cros
zijiren233 Jan 5, 2025
c62b4ad
feat: rate limit err log record
zijiren233 Jan 5, 2025
93068f4
fix: rpush
zijiren233 Jan 5, 2025
00cf94d
fix: dashboard time span
zijiren233 Jan 6, 2025
3e7dad5
feat: group model list adjusted tpm rpm
zijiren233 Jan 6, 2025
9a7f5c1
feat: baichuan model config
zijiren233 Jan 6, 2025
2baece4
fix: rpm limit recore ignore empty channel id
zijiren233 Jan 6, 2025
aa816e6
feat: disable model config
zijiren233 Jan 6, 2025
860cc1d
feat: internal token
zijiren233 Jan 6, 2025
e560a0f
fix: lint
zijiren233 Jan 6, 2025
07c4cb5
fix: recore req to redis
zijiren233 Jan 6, 2025
2b0050d
feat: option from env
zijiren233 Jan 6, 2025
215daf9
fix: internal token option key
zijiren233 Jan 6, 2025
a5aa936
fix: ignore redis ping error
zijiren233 Jan 6, 2025
346f83d
fix: ignore redis ping error
zijiren233 Jan 6, 2025
8987cfb
fix: subscription
zijiren233 Jan 8, 2025
cca1597
fix: subscription
zijiren233 Jan 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 0 additions & 63 deletions service/aiproxy/common/client/init.go

This file was deleted.

201 changes: 92 additions & 109 deletions service/aiproxy/common/config/config.go
Original file line number Diff line number Diff line change
@@ -1,144 +1,129 @@
package config

import (
"math"
"os"
"slices"
"strconv"
"sync"
"sync/atomic"
"time"

"github.com/labring/sealos/service/aiproxy/common/env"
)

var (
OptionMap map[string]string
OptionMapRWMutex sync.RWMutex
DebugEnabled = env.Bool("DEBUG", false)
DebugSQLEnabled = env.Bool("DEBUG_SQL", false)
)

var (
DebugEnabled, _ = strconv.ParseBool(os.Getenv("DEBUG"))
DebugSQLEnabled, _ = strconv.ParseBool(os.Getenv("DEBUG_SQL"))
DisableAutoMigrateDB = env.Bool("DISABLE_AUTO_MIGRATE_DB", false)
OnlyOneLogFile = env.Bool("ONLY_ONE_LOG_FILE", false)
AdminKey = os.Getenv("ADMIN_KEY")
)

var (
// 当测试或请求的时候发生错误是否自动禁用渠道
automaticDisableChannelEnabled atomic.Bool
// 当测试成功是否自动启用渠道
automaticEnableChannelWhenTestSucceedEnabled atomic.Bool
// 是否近似计算token
approximateTokenEnabled atomic.Bool
// 重试次数
retryTimes atomic.Int64
// 暂停服务
disableServe atomic.Bool
// log detail 存储时间(小时)
disableServe atomic.Bool
logDetailStorageHours int64 = 3 * 24
internalToken atomic.Value
)

func GetLogDetailStorageHours() int64 {
return atomic.LoadInt64(&logDetailStorageHours)
}

func SetLogDetailStorageHours(hours int64) {
atomic.StoreInt64(&logDetailStorageHours, hours)
}

func GetDisableServe() bool {
return disableServe.Load()
}

func SetDisableServe(disabled bool) {
disableServe.Store(disabled)
}
var (
retryTimes atomic.Int64
enableModelErrorAutoBan atomic.Bool
modelErrorAutoBanRate = math.Float64bits(0.5)
timeoutWithModelType atomic.Value
disableModelConfig atomic.Bool
)

func GetAutomaticDisableChannelEnabled() bool {
return automaticDisableChannelEnabled.Load()
}
var (
defaultChannelModels atomic.Value
defaultChannelModelMapping atomic.Value
groupMaxTokenNum atomic.Int64
groupConsumeLevelRatio atomic.Value
)

func SetAutomaticDisableChannelEnabled(enabled bool) {
automaticDisableChannelEnabled.Store(enabled)
}
var geminiSafetySetting atomic.Value

func GetAutomaticEnableChannelWhenTestSucceedEnabled() bool {
return automaticEnableChannelWhenTestSucceedEnabled.Load()
}
var billingEnabled atomic.Bool

func SetAutomaticEnableChannelWhenTestSucceedEnabled(enabled bool) {
automaticEnableChannelWhenTestSucceedEnabled.Store(enabled)
func init() {
timeoutWithModelType.Store(make(map[int]int64))
defaultChannelModels.Store(make(map[int][]string))
defaultChannelModelMapping.Store(make(map[int]map[string]string))
groupConsumeLevelRatio.Store(make(map[float64]float64))
geminiSafetySetting.Store("BLOCK_NONE")
billingEnabled.Store(true)
internalToken.Store(os.Getenv("INTERNAL_TOKEN"))
}

func GetApproximateTokenEnabled() bool {
return approximateTokenEnabled.Load()
func GetDisableModelConfig() bool {
return disableModelConfig.Load()
}

func SetApproximateTokenEnabled(enabled bool) {
approximateTokenEnabled.Store(enabled)
func SetDisableModelConfig(disabled bool) {
disabled = env.Bool("DISABLE_MODEL_CONFIG", disabled)
disableModelConfig.Store(disabled)
}

func GetRetryTimes() int64 {
return retryTimes.Load()
}

func SetRetryTimes(times int64) {
times = env.Int64("RETRY_TIMES", times)
retryTimes.Store(times)
}

var DisableAutoMigrateDB = os.Getenv("DISABLE_AUTO_MIGRATE_DB") == "true"

var RelayTimeout = env.Int("RELAY_TIMEOUT", 0) // unit is second

var RateLimitKeyExpirationDuration = 20 * time.Minute
func GetEnableModelErrorAutoBan() bool {
return enableModelErrorAutoBan.Load()
}

var OnlyOneLogFile = env.Bool("ONLY_ONE_LOG_FILE", false)
func SetEnableModelErrorAutoBan(enabled bool) {
enabled = env.Bool("ENABLE_MODEL_ERROR_AUTO_BAN", enabled)
enableModelErrorAutoBan.Store(enabled)
}

var (
// 代理地址
RelayProxy = env.String("RELAY_PROXY", "")
// 用户内容请求代理地址
UserContentRequestProxy = env.String("USER_CONTENT_REQUEST_PROXY", "")
// 用户内容请求超时时间,单位为秒
UserContentRequestTimeout = env.Int("USER_CONTENT_REQUEST_TIMEOUT", 30)
)
func GetModelErrorAutoBanRate() float64 {
return math.Float64frombits(atomic.LoadUint64(&modelErrorAutoBanRate))
}

var AdminKey = env.String("ADMIN_KEY", "")
func SetModelErrorAutoBanRate(rate float64) {
rate = env.Float64("MODEL_ERROR_AUTO_BAN_RATE", rate)
atomic.StoreUint64(&modelErrorAutoBanRate, math.Float64bits(rate))
}

var (
globalAPIRateLimitNum atomic.Int64
defaultChannelModels atomic.Value
defaultChannelModelMapping atomic.Value
defaultGroupQPM atomic.Int64
groupMaxTokenNum atomic.Int32
)
func GetTimeoutWithModelType() map[int]int64 {
return timeoutWithModelType.Load().(map[int]int64)
}

func init() {
defaultChannelModels.Store(make(map[int][]string))
defaultChannelModelMapping.Store(make(map[int]map[string]string))
func SetTimeoutWithModelType(timeout map[int]int64) {
timeout = env.JSON("TIMEOUT_WITH_MODEL_TYPE", timeout)
timeoutWithModelType.Store(timeout)
}

// 全局qpm,不是根据ip限制,而是所有请求共享一个qpm
func GetGlobalAPIRateLimitNum() int64 {
return globalAPIRateLimitNum.Load()
func GetLogDetailStorageHours() int64 {
return atomic.LoadInt64(&logDetailStorageHours)
}

func SetGlobalAPIRateLimitNum(num int64) {
globalAPIRateLimitNum.Store(num)
func SetLogDetailStorageHours(hours int64) {
hours = env.Int64("LOG_DETAIL_STORAGE_HOURS", hours)
atomic.StoreInt64(&logDetailStorageHours, hours)
}

// group默认qpm,如果group没有设置qpm,则使用该qpm
func GetDefaultGroupQPM() int64 {
return defaultGroupQPM.Load()
func GetDisableServe() bool {
return disableServe.Load()
}

func SetDefaultGroupQPM(qpm int64) {
defaultGroupQPM.Store(qpm)
func SetDisableServe(disabled bool) {
disabled = env.Bool("DISABLE_SERVE", disabled)
disableServe.Store(disabled)
}

func GetDefaultChannelModels() map[int][]string {
return defaultChannelModels.Load().(map[int][]string)
}

func SetDefaultChannelModels(models map[int][]string) {
models = env.JSON("DEFAULT_CHANNEL_MODELS", models)
for key, ms := range models {
slices.Sort(ms)
models[key] = slices.Compact(ms)
Expand All @@ -151,54 +136,52 @@ func GetDefaultChannelModelMapping() map[int]map[string]string {
}

func SetDefaultChannelModelMapping(mapping map[int]map[string]string) {
mapping = env.JSON("DEFAULT_CHANNEL_MODEL_MAPPING", mapping)
defaultChannelModelMapping.Store(mapping)
}

// 那个group最多可创建的token数量,0表示不限制
func GetGroupMaxTokenNum() int32 {
return groupMaxTokenNum.Load()
func GetGroupConsumeLevelRatio() map[float64]float64 {
return groupConsumeLevelRatio.Load().(map[float64]float64)
}

func SetGroupMaxTokenNum(num int32) {
groupMaxTokenNum.Store(num)
func SetGroupConsumeLevelRatio(ratio map[float64]float64) {
ratio = env.JSON("GROUP_CONSUME_LEVEL_RATIO", ratio)
groupConsumeLevelRatio.Store(ratio)
}

var (
geminiSafetySetting atomic.Value
geminiVersion atomic.Value
)
// GetGroupMaxTokenNum returns max number of tokens per group, 0 means unlimited
func GetGroupMaxTokenNum() int64 {
return groupMaxTokenNum.Load()
}

func init() {
geminiSafetySetting.Store("BLOCK_NONE")
geminiVersion.Store("v1beta")
func SetGroupMaxTokenNum(num int64) {
num = env.Int64("GROUP_MAX_TOKEN_NUM", num)
groupMaxTokenNum.Store(num)
}

func GetGeminiSafetySetting() string {
return geminiSafetySetting.Load().(string)
}

func SetGeminiSafetySetting(setting string) {
setting = env.String("GEMINI_SAFETY_SETTING", setting)
geminiSafetySetting.Store(setting)
}

func GetGeminiVersion() string {
return geminiVersion.Load().(string)
}

func SetGeminiVersion(version string) {
geminiVersion.Store(version)
}

var billingEnabled atomic.Bool

func init() {
billingEnabled.Store(true)
}

func GetBillingEnabled() bool {
return billingEnabled.Load()
}

func SetBillingEnabled(enabled bool) {
enabled = env.Bool("BILLING_ENABLED", enabled)
billingEnabled.Store(enabled)
}

func GetInternalToken() string {
return internalToken.Load().(string)
}

func SetInternalToken(token string) {
token = env.String("INTERNAL_TOKEN", token)
internalToken.Store(token)
}
Loading
Loading