&pip install llama-cpp-python
from llama_cpp import Llama
llm = Llama(model_path="./lightblue-karasu-7B-chat-plus-unleashed-q4_K_M.gguf",
chat_format="qwen",
n_ctx=2048,
n_threads=16,
n_gpu_layers=10,
low_vram=True,
verbose=False
)
messages =[{"role": "system", "content": "あなたは優秀なAIアシスタント。名前はkarasu-7bです。"},
{"role": "assistant", "content": "はい。わかりました。"}
]
while True:
message = input("User: ")
if not message:
break
messages.append({'role': 'user', 'content': message})
output = llm.create_chat_completion(
messages=messages,
temperature=0.8,
top_k=40,
top_p=0.95,
repeat_penalty=1.3,
max_tokens=200,
)
res = output["choices"][0]["message"]["content"]
messages.append({'role': 'assistant', 'content': res})
print(" karasu-7b: " + res.replace("<|im_end|>", ""))