tts : add OuteTTS support #10784

ggerganov · 2024-12-11T16:52:21Z

Overview

This PR adds inference support for the OuteTTS vocoder (i.e. WavTokenizer) directly into libllama. This enables full text-to-speech generation using llama.cpp.

# generate output.wav
llama-tts \
    --hf-repo OuteAI/OuteTTS-0.2-500M-GGUF \
    --hf-file OuteTTS-0.2-500M-Q8_0.gguf \
    --hf-repo-v ggml-org/WavTokenizer \
    --hf-file-v WavTokenizer-Large-75-F16.gguf \
    -p "I am sorry Dave, I'm afraid I can't do that."

# play the generated audio
ffplay output.wav

sorry.mp4

TTS requires 2 models to be provided: an LLM and a voice decoder. The first one generates audio codes (tokens) from the provided input text, based on some voice settings. The second one converts the audio codes into a spectrogram. The spectrogram is then converted back to audio with inverse FFT.

Usage

Get OuteTTS from https://huggingface.co/OuteAI/OuteTTS-0.2-500M
Convert to GGUF and optionally quantize

# this will produce F16 LLM model (~1 GB)
mkdir models/outetts-0.2-0.5B-llm
python convert_hf_to_gguf.py OuteAI/OuteTTS-0.2-500M/ --outfile models/outetts-0.2-0.5B-llm/ggml-model-f16.gguf --outtype f16

# this will produce Q8_0 LLM model (~500 MB)
llama-quantize models/outetts-0.2-0.5B-llm/ggml-model-f16.gguf models/outetts-0.2-0.5B-llm/ggml-model-q8_0.gguf q8_0

Get the WavTokenizer model and convert to GGUF:

# convert PT -> HF
python examples/tts/convert_pt_to_hf.py ./WavTokenizer-large-speech-75token/wavtokenizer_large_speech_320_24k.ckpt

# convert HF -> GGUF (~250 MB)
mkdir models/wavtokenizer-large-75
python convert_hf_to_gguf.py WavTokenizer-large-speech-75token/ --outfile models/wavtokenizer-large-75/ggml-model-f16.gguf --outtype f16
```\

- Generate speech from text using the `llama-tts` example:

```bash
llama-tts \
    -m  ./models/outetts-0.2-0.5B-llm/ggml-model-q8_0.gguf \
    -mv ./models/wavtokenizer-large-75/ggml-model-f16.gguf \
    -p "Hello world"

Note that the sampling settings of the LLM might need some adjustments.

Server usage

Initial server support is available using the examples/tts/tts-outetts.py script. It requires to start 2 servers: one with the LLM and one with WavTokenizer:

# llm server
./build/bin/llama-server -m ./models/outetts-0.2-0.5B-llm/ggml-model-q8_0.gguf --port 8020

# wavtokenizer server
./build/bin/llama-server -m ./models/wavtokenizer-large-75/ggml-model-f16.gguf --port 8021 --embeddings --pooling none

# generate audio
python ./examples/tts/tts-outetts.py http://localhost:8020 http://localhost:8021 "Hello world"

The python script is currently missing the spectrogram -> audio conversion. For reference implementation of this post-processing see:

The original Python code: https://github.com/edwko/OuteTTS/blob/f43afd9fcc61baf18da0664ebe0e3ac0ebbb3814/outetts/wav_tokenizer/decoder/heads.py#L24-L67

Or the embd_to_audio() function in tts.cpp:

llama.cpp/examples/tts/tts.cpp

Lines 190 to 270 in 29df666

    
           // TODO: not optimized at all 
        
           static std::vector<float> embd_to_audio( 
        
                   const float * embd, 
        
                   const int n_codes, 
        
                   const int n_embd, 
        
                   const int n_thread) { 
        
               const int n_fft = 1280; 
        
               const int n_hop = 320; 
        
               const int n_win = 1280; 
        
               const int n_pad = (n_win - n_hop)/2; 
        
               const int n_out = (n_codes - 1)*n_hop + n_win; 
        
               std::vector<float> hann(n_fft); 
        
               fill_hann_window(hann.size(), true, hann.data()); 
        
               int n_spec = n_embd*n_codes; 
        
               std::vector<float> E (n_spec); 
        
               std::vector<float> S (n_spec); 
        
               std::vector<float> ST(n_spec); 
        
               for (int l = 0; l < n_codes; ++l) { 
        
                   for (int k = 0; k < n_embd; ++k) { 
        
                       E[k*n_codes + l] = embd[l*n_embd + k]; 
        
                   } 
        
               } 
        
               for (int k = 0; k < n_embd/2; ++k) { 
        
                   for (int l = 0; l < n_codes; ++l) { 
        
                       float mag = E[(k           )*n_codes + l]; 
        
                       float phi = E[(k + n_embd/2)*n_codes + l]; 
        
                       mag = exp(mag); 
        
                       if (mag > 1e2) { 
        
                           mag = 1e2; 
        
                       } 
        
                       S[2*(k*n_codes + l) + 0] = mag*cosf(phi); 
        
                       S[2*(k*n_codes + l) + 1] = mag*sinf(phi); 
        
                   } 
        
               } 
        
               for (int l = 0; l < n_codes; ++l) { 
        
                   for (int k = 0; k < n_embd/2; ++k) { 
        
                       ST[l*n_embd + 2*k + 0] = S[2*(k*n_codes + l) + 0]; 
        
                       ST[l*n_embd + 2*k + 1] = S[2*(k*n_codes + l) + 1]; 
        
                   } 
        
               } 
        
               std::vector<float> res  (n_codes*n_fft); 
        
               std::vector<float> hann2(n_codes*n_fft); 
        
               std::vector<std::thread> workers(n_thread); 
        
               for (int i = 0; i < n_thread; ++i) { 
        
                   workers[i] = std::thread([&, i]() { 
        
                       for (int l = i; l < n_codes; l += n_thread) { 
        
                           irfft(n_fft, ST.data() + l*n_embd, res.data() + l*n_fft); 
        
                           for (int j = 0; j < n_fft; ++j) { 
        
                               res  [l*n_fft + j] *= hann[j]; 
        
                               hann2[l*n_fft + j]  = hann[j] * hann[j]; 
        
                           } 
        
                       } 
        
                   }); 
        
               } 
        
               for (int i = 0; i < n_thread; ++i) { 
        
                   workers[i].join(); 
        
               } 
        
               std::vector<float> audio; 
        
               std::vector<float> env; 
        
               fold(res,   n_out, n_win, n_hop, n_pad, audio); 
        
               fold(hann2, n_out, n_win, n_hop, n_pad, env); // TODO: can be done once 
        
               for (size_t i = 0; i < audio.size(); ++i) { 
        
                   audio[i] /= env[i]; 
        
               } 
        
               return audio; 
        
           }

I don't know what is the best way to implement this in a Python script and importing PyTorch for that seems like an overkill. So I'll leave it like this for now and hope we get some ideas later on.

TODO:

Clean-up implementation
Fix conv tensor shapes
Remove hardcoded constants
Better conversion script
Server support
Optimize the spectrum operations
Read and use other voices
Rename outetts-voc arch to wav-tokenizer
Remove BOS after merging server : (embeddings) using same format for "input" and "content" #10872

mirek190 · 2024-12-11T19:16:21Z

wow ...nice ;)

and implementation of multimodal models like a vision yet and we done ;-D

ggerganov · 2024-12-11T19:34:45Z

wow ...nice ;)

and implementation multimodal models live vision yet and we done ;-D

and-we-are-done.mp4

edwko · 2024-12-11T19:36:12Z

Awesome! Really excited to see it running natively 😊

ggerganov · 2024-12-11T19:37:21Z

Awesome! Really excited to see it running natively

natively.mp4

ggerganov · 2024-12-11T19:47:24Z

Here is a longer generation:

TTS requires 2 models to be provided: an LLM and a Vocoder(?). The first one generates audio codes (tokens) from the provided input text, based on some voice settings. The second one converts the audio codes into a spectrogram. The spectrogram is then converted back to audio with inverse FFT.

longer.mp4

Not sure how to pass punctuation yet. Or even if this model supports it.

punctuation.mp4

jadams777 · 2024-12-11T19:49:19Z

This is great. Would love to see a video tutorial on how to set up Ollama with this.

ggerganov · 2024-12-11T19:50:40Z

This is great. Would love to see a video tutorial on how to set up Ollama with this.

ollama.mp4

ngxson · 2024-12-11T19:58:01Z

Out of curiosity, does it make sense to combine both llm+voc into one gguf? I'm thinking about the idea of having llama-voice-to-voice -m llama-3.1.gguf -mtts oute-tts.gguf -masr whisper.gguf, but maybe it's too early to think about that?

ggerganov · 2024-12-11T20:01:07Z

Maybe we can add support to pack multiple models in a single GGUF.

edwko · 2024-12-11T20:29:08Z

Not sure how to pass punctuation yet. Or even if this model supports it.
punctuation.mp4

The current models doesn't support special characters yet. I plan to add support for this in next release. For now in the interface it clears them.

ggerganov · 2024-12-11T20:40:18Z

Great, looking forward to this. And many thanks and admirations for this work 👍

edwko · 2024-12-12T09:34:32Z

examples/tts/tts.cpp

+#include <vector>
+#include <fstream>
+#include <thread>
+


Here's a suggestion for the text preprocessing implementation, based on how it's currently done in library.

#include <string> #include <vector> #include <regex> #include <stdexcept> #include <sstream> #include <map> #include <iostream> const std::map<int, std::string> ones = { {0, "zero"}, {1, "one"}, {2, "two"}, {3, "three"}, {4, "four"}, {5, "five"}, {6, "six"}, {7, "seven"}, {8, "eight"}, {9, "nine"}, {10, "ten"}, {11, "eleven"}, {12, "twelve"}, {13, "thirteen"}, {14, "fourteen"}, {15, "fifteen"}, {16, "sixteen"}, {17, "seventeen"}, {18, "eighteen"}, {19, "nineteen"} }; const std::map<int, std::string> tens = { {2, "twenty"}, {3, "thirty"}, {4, "forty"}, {5, "fifty"}, {6, "sixty"}, {7, "seventy"}, {8, "eighty"}, {9, "ninety"} }; // Convert a number less than 1000 to words std::string convert_less_than_thousand(int num) { std::string result; if (num >= 100) { result += ones.at(num / 100) + " hundred "; num %= 100; } if (num >= 20) { result += tens.at(num / 10); if (num % 10 > 0) { result += "-" + ones.at(num % 10); } } else if (num > 0) { result += ones.at(num); } return result; } std::string number_to_words(const std::string& number_str) { try { size_t decimal_pos = number_str.find('.'); std::string integer_part = number_str.substr(0, decimal_pos); int int_number = std::stoi(integer_part); std::string result; if (int_number == 0) { result = "zero"; } else { if (int_number >= 1000000000) { int billions = int_number / 1000000000; result += convert_less_than_thousand(billions) + " billion "; int_number %= 1000000000; } if (int_number >= 1000000) { int millions = int_number / 1000000; result += convert_less_than_thousand(millions) + " million "; int_number %= 1000000; } if (int_number >= 1000) { int thousands = int_number / 1000; result += convert_less_than_thousand(thousands) + " thousand "; int_number %= 1000; } if (int_number > 0) { result += convert_less_than_thousand(int_number); } } // Handle decimal part if (decimal_pos != std::string::npos) { result += " point"; std::string decimal_part = number_str.substr(decimal_pos + 1); for (char digit : decimal_part) { result += " " + ones.at(digit - '0'); } } return result; } catch (const std::exception& e) { // Skip if fails return " "; } } std::string replace_numbers_with_words(const std::string& input_text) { std::regex number_pattern(R"(\d+(\.\d+)?)"); std::string result; auto it = std::sregex_iterator(input_text.begin(), input_text.end(), number_pattern); auto end = std::sregex_iterator(); size_t last_pos = 0; for (std::sregex_iterator i = it; i != end; ++i) { const std::smatch& match = *i; result.append(input_text, last_pos, match.position() - last_pos); result.append(number_to_words(match.str())); last_pos = match.position() + match.length(); } result.append(input_text, last_pos); return result; } // Based on: https://github.com/edwko/OuteTTS/blob/a613e79c489d8256dd657ea9168d78de75895d82/outetts/version/v1/prompt_processor.py#L39 std::string process_text(const std::string& text) { // For now I skipped text romanization as I am unsure how to handle // uroman and MeCab implementations in C++ // maybe something like https://github.com/anyascii/anyascii/ could work. // currently only English would be supported in this function std::string processed_text = replace_numbers_with_words(text); std::transform(processed_text.begin(), processed_text.end(), processed_text.begin(), ::tolower); std::regex special_chars(R"([-_/,\.\\])"); processed_text = std::regex_replace(processed_text, special_chars, " "); std::regex non_alpha(R"([^a-z\s])"); processed_text = std::regex_replace(processed_text, non_alpha, ""); std::regex multiple_spaces(R"(\s+)"); processed_text = std::regex_replace(processed_text, multiple_spaces, " "); processed_text = std::regex_replace(processed_text, std::regex(R"(^\s+|\s+$)"), ""); /* Replace spaces with the separator token same as in line 365 for (auto & c : prompt_user) { if (c == ' ') { prompt_clean += "<|text_sep|>"; */ processed_text = std::regex_replace(processed_text, std::regex(R"(\s)"), "<|text_sep|>"); return processed_text; }

edwko · 2024-12-14T10:00:02Z

I've consolidated WavTokenizer into model.py file and split the base model (1.75GB) into two components:

https://huggingface.co/OuteAI/wavtokenizer-large-75token-interface/tree/main
encoder (82MB)
decoder (248MB)

Might help with the convert_pt_to_hf.py script.

Here's the splitting code:

# model.py code...

def split_wav_tokenizer(model, save_directory):
    """Split WavTokenizer model and save components"""
    encoder_dir = os.path.join(save_directory, "encoder")
    decoder_dir = os.path.join(save_directory, "decoder")
    
    encoder = WavEncoder(model.feature_extractor)
    encoder.save_pretrained(encoder_dir)
    
    codebook_weights = torch.cat(
        [vq.codebook for vq in model.feature_extractor.encodec.quantizer.vq.layers],
        dim=0
    )
    decoder = WavDecoder(model.backbone, model.head, codebook_weights)
    decoder.save_pretrained(decoder_dir)

ggerganov · 2024-12-17T12:50:14Z

Initial server support is now available using the examples/tts/tts-outetts.py script. It requires to start 2 servers: one with the LLM and one with WavTokenizer:

# llm server
./build/bin/llama-server -m ./models/outetts-0.2-0.5B-llm/ggml-model-q8_0.gguf --port 8020

# wavtokenizer server
./build/bin/llama-server -m ./models/wavtokenizer-large-75/ggml-model-f16.gguf --port 8021 --embeddings --pooling none

# generate audio
python ./examples/tts/tts-outetts.py http://localhost:8020 http://localhost:8021 "Hello world"

The python script is currently missing the spectrogram -> audio conversion. I don't know what is the best way to implement this and importing PyTorch for that seems like an overkill. So I'll leave it like this for now and hope we get some ideas later on.

This is still WIP as we'll refactor the endpoints to improve support for this, before merging.

ggml-ci

ggerganov · 2024-12-18T12:30:15Z

Planning to merge this later today. There is a lot that can be improved in the following aspects:

Better conversion script for the WavTokenizer model
A more general TTS example (currently hacked just for OuteTTS)
Improve spectrogram post-processing implementation
Better server support + voice loading

The primary goal of this PR was to see how viable it is to support TTS in libllama and lay down some initial steps. The OuteTTS implementation did not require any major modifications to the API, so I think this is a good indication for integrating more TTS models in the future.

After merging this, I will focus on refactoring the src/llama.cpp to make the code more modularized and figure out how to improve the KV cache implementation.

bachittle · 2024-12-18T18:20:04Z

Awesome work! Would love to see more models like these supported in the future. This one comes to mind as a potential next candidate:
https://huggingface.co/fishaudio/fish-speech-1.5
https://arxiv.org/abs/2411.01156

mirek190 · 2024-12-18T18:24:44Z

Finały llamacpp is getting multimodal 😁

jadams777 · 2024-12-18T18:31:58Z

Awesome work! Would love to see more models like these supported in the future. This one comes to mind as a potential next candidate: https://huggingface.co/fishaudio/fish-speech-1.5 https://arxiv.org/abs/2411.01156

+1 for Fish Speech

* server : add "tokens" output ggml-ci * server : output embeddings for all tokens when pooling = none ggml-ci * server : be explicit about the pooling type in the tests ggml-ci * server : do not normalize embeddings when there is no pooling ggml-ci * llama : add OuteTTS support (wip) * wip * extract features * first conv * group norm * resnet conv * resnet * attn * pos net * layer norm * convnext * head * hann window * fix n_embd + remove llama.cpp hacks * compute hann window * fft * spectrum processing * clean-up * tts : receive input text and generate codes * clip : fix new conv name * tts : minor fix * tts : add header + minor fixes ggml-ci * tts : add matchematical constant ggml-ci * tts : fix sampling + cut initial noise * tts : fixes * tts : update default samplers ggml-ci * tts : text pre-processing * tts : outetts-voc -> wavtokenizer-dec * tts : remove hardcoded constants ggml-ci * tts : fix tensor shapes * llama : refactor wavtokenizer tensors ggml-ci * cont ggml-ci * cont [no ci] * llama : update WavTokenizer to non-causal attn * llama : handle no-vocab detokenization * tts : add Python example for OuteTTS (wip) * tts : extend python example to generate spectrogram ggml-ci * server : fix rebase artifacts * tts : enable "return_tokens" in Python example ggml-ci * tts : minor fixes * common : support HF download for vocoder

ylsdamxssjxxdd · 2024-12-25T23:09:13Z

Great work ! Can it support other languages ?

Green-Sky · 2024-12-26T12:18:01Z

@ylsdamxssjxxdd checkout #10894 for a related discussion with code.

github-actions bot added examples python python script changes ggml changes relating to the ggml tensor library for machine learning labels Dec 11, 2024

edwko reviewed Dec 12, 2024

View reviewed changes

ggerganov mentioned this pull request Dec 13, 2024

Bamba architecture #10810

Draft

3 tasks

ggerganov force-pushed the gg/tts-add-outetts branch 4 times, most recently from 51e1ff4 to c5e01c8 Compare December 17, 2024 08:29

ggerganov requested a review from ngxson as a code owner December 17, 2024 08:29

github-actions bot added the server label Dec 17, 2024

ggerganov mentioned this pull request Dec 17, 2024

server : output embeddings for all tokens when pooling = none #10861

Merged

ggerganov force-pushed the gg/tts-add-outetts branch from c5e01c8 to ce083a5 Compare December 17, 2024 12:21

ggerganov force-pushed the gg/tts-add-outetts branch from ce083a5 to 265a5ea Compare December 17, 2024 14:35

ggerganov changed the base branch from master to gg/server-embeddings-all December 17, 2024 14:36

ggerganov force-pushed the gg/server-embeddings-all branch from 2230786 to 2a5510e Compare December 18, 2024 09:34

pminev mentioned this pull request Dec 18, 2024

Update submodules alpaca-core/ilib-llama.cpp#26

Closed

1 task

ggerganov added 14 commits December 18, 2024 14:02

tts : fix sampling + cut initial noise

906a0ed

tts : fixes

1d7c27c

tts : update default samplers

3d54be4

ggml-ci

tts : text pre-processing

befdcd2

tts : outetts-voc -> wavtokenizer-dec

e70f140

tts : remove hardcoded constants

c096bbd

ggml-ci

tts : fix tensor shapes

d1ef627

llama : refactor wavtokenizer tensors

980d631

ggml-ci

cont

35259e5

ggml-ci

cont [no ci]

2033fb7

llama : update WavTokenizer to non-causal attn

824fa75

llama : handle no-vocab detokenization

d291c74

tts : add Python example for OuteTTS (wip)

5038abe

tts : extend python example to generate spectrogram

edb7896

ggml-ci

ggerganov force-pushed the gg/tts-add-outetts branch from 265a5ea to edb7896 Compare December 18, 2024 12:05

ggerganov added 2 commits December 18, 2024 14:10

server : fix rebase artifacts

2a1a6f6

tts : enable "return_tokens" in Python example

29df666

ggml-ci

ggerganov added 2 commits December 18, 2024 17:50

tts : minor fixes

a95191c

common : support HF download for vocoder

c0df192

ggerganov merged commit 0bf2d10 into master Dec 18, 2024
50 checks passed

ggerganov deleted the gg/tts-add-outetts branch December 18, 2024 17:27

mudler mentioned this pull request Dec 18, 2024

[EPIC] Model support dashboard (v2) mudler/LocalAI#1126

Open

89 tasks

xleven mentioned this pull request Dec 25, 2024

Does ollama also plan to support the sound models? ollama/ollama#3265

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

tts : add OuteTTS support #10784

tts : add OuteTTS support #10784

ggerganov commented Dec 11, 2024 •

edited

Loading

mirek190 commented Dec 11, 2024 •

edited

Loading

ggerganov commented Dec 11, 2024

edwko commented Dec 11, 2024

ggerganov commented Dec 11, 2024

ggerganov commented Dec 11, 2024

jadams777 commented Dec 11, 2024

ggerganov commented Dec 11, 2024

ngxson commented Dec 11, 2024 •

edited

Loading

ggerganov commented Dec 11, 2024

edwko commented Dec 11, 2024

ggerganov commented Dec 11, 2024

edwko Dec 12, 2024 •

edited

Loading

edwko commented Dec 14, 2024 •

edited

Loading

ggerganov commented Dec 17, 2024 •

edited

Loading

ggerganov commented Dec 18, 2024

bachittle commented Dec 18, 2024

mirek190 commented Dec 18, 2024

jadams777 commented Dec 18, 2024

ylsdamxssjxxdd commented Dec 25, 2024

Green-Sky commented Dec 26, 2024

	// TODO: not optimized at all
	static std::vector<float> embd_to_audio(
	const float * embd,
	const int n_codes,
	const int n_embd,
	const int n_thread) {
	const int n_fft = 1280;
	const int n_hop = 320;
	const int n_win = 1280;
	const int n_pad = (n_win - n_hop)/2;
	const int n_out = (n_codes - 1)*n_hop + n_win;

	std::vector<float> hann(n_fft);

	fill_hann_window(hann.size(), true, hann.data());

	int n_spec = n_embd*n_codes;

	std::vector<float> E (n_spec);
	std::vector<float> S (n_spec);
	std::vector<float> ST(n_spec);

	for (int l = 0; l < n_codes; ++l) {
	for (int k = 0; k < n_embd; ++k) {
	E[kn_codes + l] = embd[ln_embd + k];
	}
	}

	for (int k = 0; k < n_embd/2; ++k) {
	for (int l = 0; l < n_codes; ++l) {
	float mag = E[(k )*n_codes + l];
	float phi = E[(k + n_embd/2)*n_codes + l];

	mag = exp(mag);

	if (mag > 1e2) {
	mag = 1e2;
	}
	S[2(kn_codes + l) + 0] = mag*cosf(phi);
	S[2(kn_codes + l) + 1] = mag*sinf(phi);
	}
	}

	for (int l = 0; l < n_codes; ++l) {
	for (int k = 0; k < n_embd/2; ++k) {
	ST[ln_embd + 2k + 0] = S[2(kn_codes + l) + 0];
	ST[ln_embd + 2k + 1] = S[2(kn_codes + l) + 1];
	}
	}

	std::vector<float> res (n_codes*n_fft);
	std::vector<float> hann2(n_codes*n_fft);

	std::vector<std::thread> workers(n_thread);
	for (int i = 0; i < n_thread; ++i) {
	workers[i] = std::thread([&, i]() {
	for (int l = i; l < n_codes; l += n_thread) {
	irfft(n_fft, ST.data() + ln_embd, res.data() + ln_fft);
	for (int j = 0; j < n_fft; ++j) {
	res [ln_fft + j] = hann[j];
	hann2[ln_fft + j] = hann[j] hann[j];
	}
	}
	});
	}
	for (int i = 0; i < n_thread; ++i) {
	workers[i].join();
	}

	std::vector<float> audio;
	std::vector<float> env;

	fold(res, n_out, n_win, n_hop, n_pad, audio);
	fold(hann2, n_out, n_win, n_hop, n_pad, env); // TODO: can be done once

	for (size_t i = 0; i < audio.size(); ++i) {
	audio[i] /= env[i];
	}

	return audio;
	}

tts : add OuteTTS support #10784

tts : add OuteTTS support #10784

Conversation

ggerganov commented Dec 11, 2024 • edited Loading

Overview

Usage

Server usage

TODO:

mirek190 commented Dec 11, 2024 • edited Loading

ggerganov commented Dec 11, 2024

edwko commented Dec 11, 2024

ggerganov commented Dec 11, 2024

ggerganov commented Dec 11, 2024

jadams777 commented Dec 11, 2024

ggerganov commented Dec 11, 2024

ngxson commented Dec 11, 2024 • edited Loading

ggerganov commented Dec 11, 2024

edwko commented Dec 11, 2024

ggerganov commented Dec 11, 2024

edwko Dec 12, 2024 • edited Loading

Choose a reason for hiding this comment

edwko commented Dec 14, 2024 • edited Loading

ggerganov commented Dec 17, 2024 • edited Loading

ggerganov commented Dec 18, 2024

bachittle commented Dec 18, 2024

mirek190 commented Dec 18, 2024

jadams777 commented Dec 18, 2024

ylsdamxssjxxdd commented Dec 25, 2024

Green-Sky commented Dec 26, 2024

ggerganov commented Dec 11, 2024 •

edited

Loading

mirek190 commented Dec 11, 2024 •

edited

Loading

ngxson commented Dec 11, 2024 •

edited

Loading

edwko Dec 12, 2024 •

edited

Loading

edwko commented Dec 14, 2024 •

edited

Loading

ggerganov commented Dec 17, 2024 •

edited

Loading