口コミ保存用スクリプト
AI画像生成用に、シティヘブンネットの口コミをローカルにテキスト保存するスクリプトを作成しました。不完全&不要な部分が混ざっていますが、とりあえず動いたので、参考までに。
##############
# 機能
# heaven口コミ全文保存
#
# 2023.10.31
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import urllib
from urllib.parse import urlparse
import os, sys
import pprint
# import config
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import datetime
from selenium.common.exceptions import NoSuchElementException
import time
## jouname
girl='jou'
## chromedriver path
chrome_driver_path = r'C:/work/aaaaa/chromedriver.exe'
### Create URLs #######################################################################
review_urls = []
for i in range(1,8):
review_urls.append("https://www.cityheaven.net/saitama/A1101/A110101/loveomiya/reviews/"+str(i)+"/?girlid=nnnnnnnnn")
print(review_urls)
# chrome driver #####################################################
options = Options()
options.page_load_strategy = 'normal'
# options.add_argument('--blink-settings=imagesEnabled=false')
# driver = webdriver.Chrome(executable_path = chrome_driver_path ,options=options)
driver = webdriver.Chrome(options=options)
if os.path.isdir('shop_a') == False:
os.makedirs('shop_a')
if os.path.isdir('./shop_a/'+ girl) == False:
os.makedirs('./shop_a/' + girl)
log_file_path = "./jou_log.txt"
sys.stdout = open(log_file_path, "w", encoding='utf-8')
for url in review_urls:
driver.get(url)
review_elements = driver.find_elements(By.CLASS_NAME,"review-item")
for review_element in review_elements:
usernameurl = review_element.find_element(By.XPATH,'.//div[1]/div[1]/div/a').get_attribute('href') # UsernameURL
usernametext = review_element.find_element(By.XPATH,'.//div[1]/div[1]/div/a').text
visitdate = review_element.find_element(By.XPATH,'.//div[1]/div[2]/div[1]/dl/dd[1]').text
review_item_rate1 = review_element.find_element(By.XPATH,'.//div[2]/ul/li[1]/span').text
review_item_rate2 = review_element.find_element(By.XPATH,'.//div[2]/ul/li[2]/span').text
review_item_rate3 = review_element.find_element(By.XPATH,'.//div[2]/ul/li[3]/span').text
review_item_rate4 = review_element.find_element(By.XPATH,'.//div[2]/ul/li[4]/span').text
review_item_rate5 = review_element.find_element(By.XPATH,'.//div[2]/ul/li[5]/span').text
review_title = review_element.find_element(By.XPATH,'.//div[2]/div[2]/span') # review_title
review_postdate = review_element.find_element(By.XPATH,'div[2]/p[2]') # review post date
review_post = review_element.find_element(By.XPATH,'.//div[2]/p[@class="review-item-post"]') # review_post
print('='*40)
print("usernameurl : ",usernameurl)
print("usernametext : ",usernametext)
print("visitdate : ", visitdate)
print("point : 女の子 : "+review_item_rate1+
" プレイ : "+review_item_rate2+
" 料金 : "+review_item_rate3+
" スタッフ : "+review_item_rate4+
" 写真 : "+review_item_rate5)
print("title : ", review_title.text)
print("date : ", review_postdate.text)
print("---poststart---\n", review_post.text)
print("---postend---")
sys.stdout.close()
sys.stdout = sys.__stdout__
print("END of RESULT")
while True:
pass
※取得データ差し上げます。