-
Notifications
You must be signed in to change notification settings - Fork 0
/
ilm-7b-q_batch.py
58 lines (45 loc) · 1.91 KB
/
ilm-7b-q_batch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# This has the 7billion-param model, quantized down to 4 bits
# Not only does it take less memory, but its a lot faster
import os
import torch, auto_gptq
from transformers import AutoModel, AutoTokenizer
from auto_gptq.modeling import BaseGPTQForCausalLM
auto_gptq.modeling._base.SUPPORTED_MODELS = ["internlm"]
torch.set_grad_enabled(False)
class InternLMXComposer2QForCausalLM(BaseGPTQForCausalLM):
layers_block_name = "model.layers"
outside_layer_modules = [
'vit', 'vision_proj', 'model.tok_embeddings', 'model.norm', 'output',
]
inside_layer_modules = [
["attention.wqkv.linear"],
["attention.wo.linear"],
["feed_forward.w1.linear", "feed_forward.w3.linear"],
["feed_forward.w2.linear"],
]
model = InternLMXComposer2QForCausalLM.from_quantized(
'internlm/internlm-xcomposer2-vl-7b-4bit', trust_remote_code=True, device="cuda:0").eval()
tokenizer = AutoTokenizer.from_pretrained(
'internlm/internlm-xcomposer2-vl-7b-4bit', trust_remote_code=True)
#query = '<ImageHere>Please describe this image in detail.'
#query = '<ImageHere>Describe any text or writing in the image.'
#query = '<ImageHere>Describe any signature, watermark, or copyright in the image.'
query = '<ImageHere>Please objectively describe the subjects in detail, including any blurring.'
while True:
try:
image_path = input()
except EOFError:
exit()
if image_path == '':
exit()
filename, _ = os.path.splitext(image_path)
txt_filename = f"{filename}.ilm7q"
if os.path.exists(txt_filename):
print(txt_filename,"already exists")
continue
image = image_path
with torch.cuda.amp.autocast():
response, _ = model.chat(tokenizer, query=query, image=image, history=[], do_sample=False)
print(response)
with open(txt_filename, "w") as f:
f.write(response)