karasu-7b-chat-plus (chat)
&pip install llama-cpp-python
from llama_cpp import Llama
llm = Llama(model_path="./lightblue-karasu-7B-chat-plus-unleashed-q4_K_M.gguf",
chat_format="qwen",
n_ctx=2048, # The max sequence length to use - note that longer sequence lengths require much more resources
n_threads=16, # The number of CPU threads to use, tailor to your system and the resulting performance
n_gpu_layers=10, # The number of layers to offload to GPU, if you have GPU acceleration available
low_vram=True,
verbose=False
) # Set chat_format according to the model you are using
# プロンプトの準備
messages =[{"role": "system", "content": "あなたは優秀なAIアシスタント。名前はkarasu-7bです。"},
{"role": "assistant", "content": "はい。わかりました。"}
]
while True:
message = input("User: ")
if not message:
break
messages.append({'role': 'user', 'content': message})
output = llm.create_chat_completion(
messages=messages,
temperature=0.8,
top_k=40,
top_p=0.95,
repeat_penalty=1.3,
max_tokens=200,
)
res = output["choices"][0]["message"]["content"]
messages.append({'role': 'assistant', 'content': res})
print(" karasu-7b: " + res.replace("<|im_end|>", ""))
この記事が気に入ったらサポートをしてみませんか?