あいいうえ

2024年6月18日 12:55

import cv2
import numpy as np
import os
import glob
import boto3
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
from io import BytesIO

# 定数の設定
S3_BUCKET_NAME = 'your-s3-bucket'
VIDEO_KEY = 'path/to/video.mp4'
TEACHER_DATA_PREFIX = 'path/to/teacher/data/'
OUTPUT_PREFIX = 'path/to/output/'

# S3クライアントの作成
s3 = boto3.client('s3')

# 一時ディレクトリの作成
local_temp_dir = '/tmp'
frame_features_dir = os.path.join(local_temp_dir, 'frame_features')
teacher_features_dir = os.path.join(local_temp_dir, 'teacher_features')
match_dir = os.path.join(local_temp_dir, 'matches')
os.makedirs(frame_features_dir, exist_ok=True)
os.makedirs(teacher_features_dir, exist_ok=True)
os.makedirs(match_dir, exist_ok=True)

# ORB特徴量検出器
orb = cv2.ORB_create()

# S3から動画ファイルをダウンロード
def download_video_from_s3(bucket_name, key, download_path):
s3.download_file(bucket_name, key, download_path)

# S3から教師データをダウンロード
def download_teacher_data_from_s3(bucket_name, prefix, download_dir):
paginator = s3.get_paginator('list_objects_v2')
for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix):
for obj in page['Contents']:
key = obj['Key']
if key.endswith('.jpg'):
local_path = os.path.join(download_dir, os.path.relpath(key, prefix))
os.makedirs(os.path.dirname(local_path), exist_ok=True)
s3.download_file(bucket_name, key, local_path)

# 特徴量をS3にアップロード
def upload_features_to_s3(bucket_name, prefix, local_dir):
for root, _, files in os.walk(local_dir):
for file in files:
local_path = os.path.join(root, file)
s3_path = os.path.join(prefix, os.path.relpath(local_path, local_dir))
s3.upload_file(local_path, bucket_name, s3_path)

# ステップ1: 動画からフレームを抽出し、特徴量を保存する
def extract_frame_features(video_path, output_dir):
cap = cv2.VideoCapture(video_path)
frame_count = 0

while cap.isOpened():
ret, frame = cap.read()
if not ret:
break

gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
keypoints, descriptors = orb.detectAndCompute(gray, None)

if descriptors is not None:
np.save(os.path.join(output_dir, f'frame_{frame_count}.npy'), descriptors)
frame_count += 1

cap.release()
return frame_count

# ステップ2: 教師データの特徴量を計算し保存する
def extract_teacher_features(teacher_data_path, output_dir):
for folder in os.listdir(teacher_data_path):
folder_path = os.path.join(teacher_data_path, folder)
if not os.path.isdir(folder_path):
continue

all_descriptors = []
for image_path in glob.glob(os.path.join(folder_path, '*.jpg')):
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
keypoints, descriptors = orb.detectAndCompute(image, None)

if descriptors is not None:
all_descriptors.append(descriptors)

if all_descriptors:
all_descriptors = np.vstack(all_descriptors)
np.save(os.path.join(output_dir, f'{folder}_features.npy'), all_descriptors)

# ステップ3: フレームごとの特徴量と教師データフォルダの特徴量を比較し、一致度が最も大きい画像を表示・保存する
def compare_features(frame_count, frame_features_dir, teacher_features_dir, match_dir):
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
similarity_scores = []

for i in range(frame_count):
frame_features = np.load(os.path.join(frame_features_dir, f'frame_{i}.npy'))
best_similarity = -1
best_folder = None

for folder in os.listdir(teacher_features_dir):
teacher_features = np.load(os.path.join(teacher_features_dir, folder))

matches = bf.match(frame_features, teacher_features)
matches = sorted(matches, key=lambda x: x.distance)

similarity = len(matches)

if similarity > best_similarity:
best_similarity = similarity
best_folder = folder

similarity_scores.append(best_similarity)
with open(os.path.join(match_dir, f'frame_{i}_match.txt'), 'w') as f:
f.write(f'{best_folder}: {best_similarity}\n')

np.save(os.path.join(match_dir, 'similarity_scores.npy'), similarity_scores)
return similarity_scores

# ステップ4: 一致度の推移をグラフ化し表示、元データも保存
def plot_similarity_scores(similarity_scores, output_path):
plt.plot(similarity_scores)
plt.xlabel('Frame Index')
plt.ylabel('Similarity Score')
plt.title('Similarity Scores Over Frames')
plt.savefig(os.path.join(output_path, 'similarity_scores_plot.png'))
plt.show()

# 主処理
def main():
local_video_path = os.path.join(local_temp_dir, 'video.mp4')

# S3から動画ファイルをダウンロード
download_video_from_s3(S3_BUCKET_NAME, VIDEO_KEY, local_video_path)

# S3から教師データをダウンロード
download_teacher_data_from_s3(S3_BUCKET_NAME, TEACHER_DATA_PREFIX, teacher_features_dir)

# フレーム特徴量の抽出と保存
frame_count = extract_frame_features(local_video_path, frame_features_dir)

# 教師データの特徴量の抽出と保存
extract_teacher_features(teacher_features_dir, teacher_features_dir)

# フレーム特徴量と教師データ特徴量の比較
similarity_scores = compare_features(frame_count, frame_features_dir, teacher_features_dir, match_dir)

# 一致度の推移をグラフ化
plot_similarity_scores(similarity_scores, match_dir)

# 特徴量と一致度の結果をS3にアップロード
upload_features_to_s3(S3_BUCKET_NAME, os.path.join(OUTPUT_PREFIX, 'frame_features'), frame_features_dir)
upload_features_to_s3(S3_BUCKET_NAME, os.path.join(OUTPUT_PREFIX, 'teacher_features'), teacher_features_dir)
upload_features_to_s3(S3_BUCKET_NAME, os.path.join(OUTPUT_PREFIX, 'matches'), match_dir)

if __name__ == '__main__':
main()

あいいうえ

いいなと思ったら応援しよう！