【ComfyUI】 ComfyUIのAPI化【Modal】

ヴィット。

2024年9月23日 11:46

ComfyUIなどのGUIアプリをクラウド環境で動かすとなるとどうしても重くなってしまいます。
そこでComfyUIをAPI化して、快適に画像生成を行いましょう。

クラウドサービスとしてはModalを使用します。既にComfyUIでノードを組んで画像生成はできていることを前提とするため、まだの方はこちらの記事などを参考にしてみてください。

ワークフローの出力

まずはComfy UIを起動して、API化したいワークフローを作成します。

Devモードの有効化

Comfy UI 右下のメニューにある歯車から設定画面を開く。
Dev Modeの項目があるので有効化

ワークフローを保存

Dev Mode が有効になると「Save(API Format)」というメニューが出てくるので、ワークフローをjson形式で保存することができます。

出力したワークフローファイル(workflow_api.json)は、Modalを実行するためのPythonファイルと同じ階層においてください。

コード（サーバーサイド）

以下がコード全文です。
大きく３つのブロックに分かれており、前半がイメージファイルの作成。
真ん中がWebUI用のエンドポイント。そして、最後がAPIサーバとして動かすためのコードです。

import json
import subprocess
import uuid
from pathlib import Path
from typing import Dict
import modal


# 使用するモデルのリスト（LORAやVAEなども含む）
MODELS = [
    ("https://huggingface.co/Comfy-Org/flux1-schnell/resolve/main/flux1-schnell-fp8.safetensors", "models/checkpoints"),
]

# 使用するカスタムノードのリスト
NODES = [
]

# イメージファイルの作成
image = (
    modal.Image.debian_slim(
        python_version="3.11"
    )
    .apt_install("git")
    .pip_install("comfy-cli==1.2.3")
    .run_commands(
        "comfy --skip-prompt install --nvidia"
    )
    .run_commands(*[
        f"comfy --skip-prompt model download --url {url} --relative-path {path}"
            for url, path in MODELS
    ])
    .run_commands(*[
        f"comfy node install {node}"
            for node in NODES
    ])
)

app = modal.App(name="example-comfyui", image=image)


# """
# WEB UI
# """
@app.function(
    allow_concurrent_inputs=10,
    concurrency_limit=1,
    container_idle_timeout=30,
    timeout=1800,
    gpu="L4",
)
@modal.web_server(8000, startup_timeout=60)
def ui():
    subprocess.Popen("comfy launch -- --listen 0.0.0.0 --port 8000", shell=True)

# """
# API SERVER
# """
@app.cls(
    allow_concurrent_inputs=10,
    container_idle_timeout=300,
    gpu="A10G",
    mounts=[
        modal.Mount.from_local_file(
            Path(__file__).parent / "workflow_api.json",
            "/root/workflow_api.json",
        ),
    ],
)
class ComfyUI:
    @modal.enter()
    def launch_comfy_background(self):
        cmd = "comfy launch --background"
        subprocess.run(cmd, shell=True, check=True)

    @modal.method()
    def infer(
        self,
        client_id: str,
        workflow_path: str = "/root/workflow_api.json",
        prompt: str = "A beautiful landscape painting",
        seed: int = 0,
        width: int = 1024,
        height: int = 1024
    ) -> bytes:
        # looks up the name of the output image file based on the workflow
        workflow = json.loads(Path(workflow_path).read_text())

        workflow_updates = {
            "9": {"inputs": {"filename_prefix": client_id}},
            "6": {"inputs": {"text": prompt}},
            "31": {"inputs": {"seed": seed}},
            "27": {
                "inputs": {
                    "width": width,
                    "height": height
                }
            }
        }
        for node_id, updates in workflow_updates.items():
            if node_id in workflow and updates:
                workflow[node_id]["inputs"].update(updates["inputs"])

        new_workflow_file = f"{client_id}.json"
        json.dump(workflow, Path(new_workflow_file).open("w"), indent=4)

        cmd = f"comfy run --workflow {new_workflow_file} --wait --timeout 1200"
        subprocess.run(cmd, shell=True, check=True)

        output_dir = "/root/comfy/ComfyUI/output"

        for f in Path(output_dir).iterdir():
            if f.name.startswith(client_id):
                return f.read_bytes()

    @modal.web_endpoint(method="POST")
    def api(self, item: Dict):
        from fastapi import Response

        client_id = uuid.uuid4().hex
        workflow_path = "/root/workflow_api.json"
        prompt = item.get("prompt")
        seed = item.get("seed")
        width = item.get("width")
        height = item.get("height")
 
        img_bytes = self.infer.local(client_id, workflow_path, prompt, seed, width, height)

        return Response(img_bytes, media_type="image/jpeg")

コード（クライアントサイド）

やっていることは単純で、APIサーバに対してPOSTを送っているだけです。
Modal特有のコードもなくPythonに依存しているわけでもないので、curlで叩くこともできます。


import argparse
import pathlib
import sys
import time

import requests

OUTPUT_DIR = pathlib.Path("画像を保存したい場所")
OUTPUT_DIR.mkdir(exist_ok=True, parents=True)


def main(args: argparse.Namespace):
    url = f"https://{args.modal_workspace}--example-comfyui-comfyui-api{'-dev' if args.dev else ''}.modal.run/"
    data = {
        "prompt": args.prompt,
    }
    print(f"Sending request to {url} with prompt: {data['prompt']}")
    print("Waiting for response...")
    start_time = time.time()
    res = requests.post(url, json=data)
    if res.status_code == 200:
        end_time = time.time()
        print(
            f"Image finished generating in {round(end_time - start_time, 1)} seconds!"
        )
        filename = OUTPUT_DIR / f"{slugify(args.prompt)}.png"
        filename.write_bytes(res.content)
        print(f"saved to '{filename}'")
    else:
        if res.status_code == 404:
            print(f"Workflow API not found at {url}")
        res.raise_for_status()


def parse_args(arglist: list[str]) -> argparse.Namespace:
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--modal-workspace",
        type=str,
        required=True,
        help="Name of the Modal workspace with the deployed app. Run `modal profile current` to check.",
    )
    parser.add_argument(
        "--prompt",
        type=str,
        required=True,
        help="what to draw in the blank part of the image",
    )
    parser.add_argument(
        "--dev",
        action="store_true",
        help="use this flag when running the ComfyUI server in development mode with `modal serve`",
    )

    return parser.parse_args(arglist[1:])


def slugify(s: str) -> str:
    return s.lower().replace(" ", "-").replace(".", "-").replace("/", "-")[:32]


if __name__ == "__main__":
    args = parse_args(sys.argv)
    main(args)

使い方

python comfyclient.py \
--modal-workspace Modalのワークスペースの名 \
--prompt  "landscape" --dev