$pip install llama-cpp-python
from llama_cpp import Llama
import io
import os
import json
import re
filename = "messages.json"
def read_log_file(filename):
with open(filename, 'r', encoding='utf-8') as f:
return json.load(f)
llm = Llama(model_path="./nekomata-7b-instruction.Q4_K_M.gguf",
n_ctx=2048,n_threads=16,verbose=False)
sys_messages = [
("### 指示:", """あなたは優秀なAIアシスタント。名前はネコマタです。
若い女性のように振舞ってください。では楽しい会話をしてください。\n\n"""),
("### 入力:", "よろしくお願いします。\n\n")
]
messages = []
if os.path.exists(filename):
messages = read_log_file(filename)
def get_prompt(messages):
pre_prompt = sys_messages + messages
pattern = '[\[\]()]'
prompt = re.sub(pattern, '', str(pre_prompt))
prompt = prompt + "\n応答:"
return prompt
print(" nekomataとチャットしましょう。json log")
while True:
message = input("ユーザー: ")
if not message:
break
messages.append(("### 入力:", message))
prompt = get_prompt(messages)
output = llm.create_completion(
prompt,
temperature=0.5,
top_k=40,
top_p=0.95,
repeat_penalty=1.3,
max_tokens=200,
)
res = output["choices"][0]["text"]
messages.append(("### 応答:", res))
messages = messages[-8:]
chat_log = read_log_file(filename)
chat_log.extend(messages)
chat_log_list = list(chat_log)
chat_log = chat_log_list[-10:]
with open(filename, 'w', encoding='utf-8') as f:
json.dump(chat_log, f, ensure_ascii=False, indent=4)