-
Notifications
You must be signed in to change notification settings - Fork 10.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ggml-ci
- Loading branch information
Showing
8 changed files
with
820 additions
and
702 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
#include "llama-cparams.h" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#pragma once | ||
|
||
#include "llama.h" | ||
|
||
#include <cstdint> | ||
|
||
struct llama_cparams { | ||
uint32_t n_ctx; // context size used during inference | ||
uint32_t n_batch; | ||
uint32_t n_ubatch; | ||
uint32_t n_seq_max; | ||
int n_threads; // number of threads to use for generation | ||
int n_threads_batch; // number of threads to use for batch processing | ||
|
||
float rope_freq_base; | ||
float rope_freq_scale; | ||
|
||
uint32_t n_ctx_orig_yarn; | ||
// These hyperparameters are not exposed in GGUF, because all | ||
// existing YaRN models use the same values for them. | ||
float yarn_ext_factor; | ||
float yarn_attn_factor; | ||
float yarn_beta_fast; | ||
float yarn_beta_slow; | ||
float defrag_thold; | ||
|
||
bool embeddings; | ||
bool causal_attn; | ||
bool offload_kqv; | ||
bool flash_attn; | ||
bool no_perf; | ||
|
||
enum llama_pooling_type pooling_type; | ||
|
||
ggml_backend_sched_eval_callback cb_eval; | ||
void * cb_eval_user_data; | ||
}; |
Oops, something went wrong.