アメグラ2号のブログ

1980年代後半の古き良きビデオゲームのほか、オッサンの個人的備忘録

python scheduleで複数タスクを定期実行させる

f:id:game-allergy:20210331143049p:plain

前回pythonのscheduleモジュールのひな型ができたので、それに実際のタスクスクリプトを乗っけてみる。

実施内容は以前作成したWEBスクレイピングに関してを自動取得をさせる。おおなかな流れは以下。

・DBを初期化

・tenki.jpへアクセスしてデータをDBへ格納

・DBへアクセスし、データをグラフにして画像保存

◆メインスクリプト

#! /usr/bin/env python3
# ***********************************
# Create table of DB
# Web scraping
# Create Chart
# ***********************************
# ===================================
# import
# ===================================
import time
import datetime
import schedule
import sqlite3
import os
from a01_table import ConnectDB,CreateWeaterTable,DisconnectDB
from a02_web import WebScraping,ConnectDB,StoreDB,DisconnectDB
from a03_chart import GetDataDB,pdDataFrame,pltMatplot,saveChart,ShowChart

# ===================================
# function
# ===================================
# Do Job for a certain period of time 
def job():
    StartMessage(timestamp())
    time.sleep(1)

    # ==============================
    # a01_table
    # ==============================
    dbname = 'TEST.db'
    cur,connection = ConnectDB(dbname)
    CreateWeaterTable(cur,connection)
    DisconnectDB(cur,connection)
    print(timestamp(),"table done",)

    # ==============================
    # a02_web
    # ==============================
    URL = 'https://tenki.jp/forecast/3/17/4610/14100/1hour.html'
    dbname = 'TEST.db'
    # ===================================
    # execution
    # ===================================
    List_all = WebScraping(URL)
    cur, connection = ConnectDB(dbname)
    StoreDB(List_all,cur,connection)
    DisconnectDB(cur,connection)
    print(timestamp(),"web done",)

    # ==============================
    # CreateWeatherChart
    # ==============================
    dbname = 'TEST.db'
    dt1 = datetime.datetime.now()
    dt2 = dt1.strftime('%Y_%m%d_%H%M_%S')
    currentdir = os.getcwd()
    # ===================================
    # execution
    # ===================================
    # connect to DB
    cur, connection = ConnectDB(dbname)

    # Access to DB and get data
    result1,result2,result3,result4,result5,result6 = GetDataDB(cur, connection)

    # data --> pandas/Dataframe
    x1,y1,x2,y2,x3,y3,x4,y4,x5,y5,x6,y6 = pdDataFrame(result1,result2,result3,result4,result5,result6)

    # Create Chart and save them
    fig1,fig2,fig3 = pltMatplot(x1,y1,x2,y2,x3,y3,x4,y4,x5,y5,x6,y6)

    # save fig data as jpg file
    saveChart(fig1,fig2,fig3,dt2,currentdir)

    # Show Charts on display
    #ShowChart()

    # Disconnect DB(End process)
    DisconnectDB(cur,connection)

    print(timestamp(),"chart done",)

    # ==============================
    # End message
    # ==============================
    EndMessage(timestamp())

# ===================================
# function(sub)
# ===================================
def timestamp():
    dt1 = datetime.datetime.now()
    dt2 = dt1.strftime('%Y_%m/%d_%H:%M_%S')
    return dt2

def ScheduleStartMessage(timestamp):
    print(timestamp,"===Schedule started===")

def StartMessage(timestamp):
    print(timestamp,"===Process started===")

def EndMessage(timestamp):
    print(timestamp,"===Process done===")

# ===================================
# ready for exe
# ===================================

# ===================================
# execution
# ===================================
def main():
    # Do Job for a certain period of time
    schedule.every(10).seconds.do(job)
    ScheduleStartMessage(timestamp())

    # Checking schedule run
    while True:
        schedule.run_pending()
        time.sleep(1)

if __name__ == "__main__":
    main()

◆サブスクリプト(DBテーブル初期化)

# ***********************************
# Create table
# a01_table 
# ***********************************
# ===================================
# import
# ===================================
import sqlite3
import datetime
# ===================================
# function
# ===================================
# Check the existence
def CheckTableExistence(cur):
    cur.execute('SELECT COUNT(*) FROM sqlite_master WHERE TYPE="table" AND NAME="WeatherTable"')

# Create table
def CreateTable(cur):
    sql = 'CREATE TABLE WeatherTable(id INTEGER PRIMARY KEY AUTOINCREMENT, hour STRING, weather STRING, temp DOUBLE, windspeed INTEGER, windblow STRING, humid INTEGER)'
    cur.execute(sql)

# Delete table
def DropTable(cur):
    sql = 'DROP TABLE WeatherTable'
    cur.execute(sql)

# connect to DB
def ConnectDB(dbname):
    connection = sqlite3.connect(dbname)
    cur = connection.cursor()
    r = (cur,connection)
    return r

# Disconnect DB(End process)
def DisconnectDB(connection):
    cur.close()
    connection.commit()
    connection.close()

# main execution
def CreateWeaterTable(cur,connection):
    #Check the Existence of table
    CheckTableExistence(cur)

    # Check the Existence of table and Create the table
    if cur.fetchone() == (0,):
        print("WeatherTable is not existed then created")
        CreateTable(cur)
    
    else:
        print('WeatherTable exists, then deleted old one and created new one')
        DropTable(cur)
        CreateTable(cur)

◆サブスクリプト(WEBスクレイピング

# ***********************************
# Web scraping
# a02_web 
# ***********************************
# ===================================
# import
# ===================================
import requests
import bs4
import sqlite3
import datetime
# ===================================
# function
# ===================================
def WebScraping(URL):
    r = requests.get(URL)
    s = bs4.BeautifulSoup(r.text, 'html.parser')

    # 気温情報の取得
    list_date= []
    for j in s.find_all('tr' , class_ ='head'):
        for i in j.find_all('p'):
            list_date.append(i.text)

    # 気温情報の取得
    list_temp= []
    for j in s.find_all('tr' , class_ ='temperature'):
        for i in j.find_all('span' ):
            list_temp.append(i.string)

    # 天気情報(晴れとか)の取得
    list_weather = []
    for j in s.find_all('tr' , class_ ='weather'):
        for i in j.find_all('p'):
            list_weather.append(i.string)

    # 時刻の取得
    list_hour = []
    for j in s.find_all('tr' , class_ ='hour'):
        for i in j.find_all('span'):
            list_hour.append(i.string)

    # 風向きの取得
    list_windblow = []
    for j in s.find_all('tr' , class_ ='wind-blow'):
        for i in j.find_all('p'):
            list_windblow.append(i.string)

    # 風速の取得
    list_windspeed = []
    for j in s.find_all('tr' , class_ ='wind-speed'):
        for i in j.find_all('span'):
            list_windspeed.append(i.string)

    # 湿度の取得
    list_humid = []
    for j in s.find_all('tr' , class_ ='humidity'):
        for i in j.find_all('td'):
            list_humid.append(i.string)

    #------------------------------
    # 各情報を2次元配列へ格納する
    #------------------------------
    List_all =[list_hour, list_weather, list_temp, list_windspeed,list_windblow, list_humid]
    return List_all

# connect to DB
def ConnectDB(dbname):
    connection = sqlite3.connect(dbname)
    cur = connection.cursor()
    r = (cur,connection)
    return r

def StoreDB(List_all,cur,connection):
    sql1 = 'INSERT INTO WeatherTable(hour, weather, temp, windspeed, windblow, humid) VALUES(?,?,?,?,?,?)'
    for i in range(0,72,1):
        cur.execute(sql1,(List_all[0][i], List_all[1][i], List_all[2][i], List_all[3][i], List_all[4][i], List_all[5][i]))
    #cur.close()

# Disconnect DB(End process)
def DisconnectDB(cur,connection):
    cur.close()
    connection.commit()
    connection.close()

◆サブスクリプト(グラフ作成・保存)

# ***********************************
# Create chart
# a03_chart 
# ***********************************
# ===================================
# import
# ===================================
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import os

# ===================================
# function
# ===================================
# connect to DB
def ConnectDB(dbname):
    connection = sqlite3.connect(dbname)
    cur = connection.cursor()
    r = (cur,connection)
    return r

# get Data from DB
def GetDataDB(cur, connection):
    #-----------------------------
    # DB接続、カーソル起動
    #-----------------------------
    dbname = 'TEST.db'
    connection = sqlite3.connect(dbname)
    cur = connection.cursor()
    #-----------------------------
    # データを2種類、DBから引き出す
    #-----------------------------
    # 0-24時までのデータ(data1)
    sql1 = 'SELECT temp, hour FROM WeatherTable LIMIT 24'
    c1 =cur.execute(sql1)
    result1 = c1.fetchall()
    # 次の日の0-24時までのデータ(data2)
    sql2 = 'SELECT temp, hour FROM WeatherTable LIMIT 24,24'
    c2 = cur.execute(sql2)
    result2 = c2.fetchall()
    #-----------------------------
    # 0-24時までのデータ(data3)
    sql3 = 'SELECT humid, hour FROM WeatherTable LIMIT 24'
    c3 =cur.execute(sql3)
    result3 = c3.fetchall()
    # 次の日の0-24時までのデータ(data4)
    sql4 = 'SELECT humid, hour FROM WeatherTable LIMIT 24,24'
    c4 = cur.execute(sql4)
    result4 = c4.fetchall()
    #-----------------------------
    # 0-24時までのデータ(data5)
    sql5 = 'SELECT windspeed, hour FROM WeatherTable LIMIT 24'
    c5 =cur.execute(sql5)
    result5 = c5.fetchall()
    # 次の日の0-24時までのデータ(data6)
    sql6 = 'SELECT windspeed, hour FROM WeatherTable LIMIT 24,24'
    c6 = cur.execute(sql6)
    result6 = c6.fetchall()
    #-----------------------------
    # 戻り値
    #-----------------------------
    result_all =(result1,result2,result3,result4,result5,result6)
    return result_all

def pdDataFrame(result1,result2,result3,result4,result5,result6):
    #データフレーム(表に書き出すイメージ)
    df1 = pd.DataFrame(result1,columns=["temp", "hour"])
    df2 = pd.DataFrame(result2,columns=["temp", "hour"])
    df3 = pd.DataFrame(result3,columns=["humid", "hour"])
    df4 = pd.DataFrame(result4,columns=["humid", "hour"])
    df5 = pd.DataFrame(result5,columns=["windspeed", "hour"])
    df6 = pd.DataFrame(result6,columns=["windspeed", "hour"])
    #-----------------------------
    # グラフの準備(2つのグラフ)
    #-----------------------------
    # グラフのX軸、Y軸の指定
    x1 = df1.hour
    y1 = df1.temp
    x2 = df2.hour
    y2 = df2.temp
    x3 = df3.hour
    y3 = df3.humid
    x4 = df4.hour
    y4 = df4.humid
    x5 = df5.hour
    y5 = df5.windspeed
    x6 = df6.hour
    y6 = df6.windspeed
    #-----------------------------
    # 戻り値
    #-----------------------------
    xyData = (x1,y1,x2,y2,x3,y3,x4,y4,x5,y5,x6,y6)
    return xyData

def pltMatplot(x1,y1,x2,y2,x3,y3,x4,y4,x5,y5,x6,y6):
    # グラフ上限値を決めておく(5℃~25℃)
    tempMin = 5
    tempMax = 25
    humidMin = 0
    humidMax = 100
    windMin = 0
    windMax = 15

    # 日付情報取得(ファイル名用)
    #td1 = datetime.date.today()
    #td2 = td1.strftime('%Y%m%d')
    td1 = datetime.datetime.now()
    td2 = td1.strftime('%Y%m%d-%H%M-%S')

    # fig1-------------------------------
    fig1 = plt.figure(td2 + "_01")
    # (data1)----------------------------
    chart1 = fig1.add_subplot(1,2,1)
    chart1.plot(x1, y1)
    plt.ylim(tempMin, tempMax)
    # グラフ内のラベル表記
    chart1.set_title(td2)
    chart1.set_xlabel("Time")
    chart1.set_ylabel("Temperature(℃)")
    # (data2)----------------------------
    chart2 = fig1.add_subplot(1,2,2)
    chart2.plot(x2, y2)
    plt.ylim(tempMin, tempMax)
    # グラフ内のラベル表記
    chart2.set_title("Tomorrow")
    chart2.set_xlabel("Time")
    chart2.set_ylabel("Temperature(℃)")

    # fig2-------------------------------
    fig2 = plt.figure(td2+"_02")
    # (data3)----------------------------
    chart3 = fig2.add_subplot(1,2,1)
    chart3.plot(x3, y3)
    plt.ylim(humidMin, humidMax)
    # グラフ内のラベル表記
    chart3.set_title(td2)
    chart3.set_xlabel("Time")
    chart3.set_ylabel("Humidity(%)")

    # (data4)----------------------------
    chart4 = fig2.add_subplot(1,2,2)
    chart4.plot(x4, y4)
    plt.ylim(humidMin, humidMax)
    # グラフ内のラベル表記
    chart4.set_title("Tomorrow")
    chart4.set_xlabel("Time")
    chart4.set_ylabel("Humidity(%)")

    # fig3-------------------------------
    fig3 = plt.figure(td2+"_03")
    # (data5)----------------------------
    chart5 = fig3.add_subplot(1,2,1)
    chart5.plot(x5, y5)
    plt.ylim(windMin, windMax)
    # グラフ内のラベル表記
    chart5.set_title(td2)
    chart5.set_xlabel("Time")
    chart5.set_ylabel("WindSpeed(m/s)")
    # (data6)----------------------------
    chart6 = fig3.add_subplot(1,2,2)
    chart6.plot(x6, y6)
    plt.ylim(windMin, windMax)
    # グラフ内のラベル表記
    chart6.set_title("Tomorrow")
    chart6.set_xlabel("Time")
    chart6.set_ylabel("WindSpeed(m/s)")
    #-----------------------------
    # グラフの設定
    #-----------------------------
    fig1.tight_layout() 
    fig2.tight_layout() 
    fig3.tight_layout()
    #-----------------------------
    # 戻り値
    #-----------------------------
    fig_all = (fig1,fig2,fig3)
    return fig_all

# Show Charts on display  
def ShowChart():
    plt.show()

# save fig data as jpg file
def saveChart(fig1,fig2,fig3,dt2,currentdir):
    fig1.savefig(currentdir  + '/fig/' + dt2 + "_fig_temp.jpg")
    fig2.savefig(currentdir  + '/fig/' + dt2 + "_fig_humid.jpg")
    fig3.savefig(currentdir  + '/fig/' + dt2 + "_fig_windspeed.jpg")   


# Disconnect DB(End process)
def DisconnectDB(cur,connection):
    cur.close()
    connection.commit()
    connection.close()


ああ~長いな、、、これでもかなり短くしたんだけど。。。

タスクは、DB初期化、Webスクレイピング、グラフ作成の3つなんだけど、これをscheduleモジュールで定期実行させようとすると意外とややこしい。最初は関数化しないでダラダラとコードを書いてしまったのでデバッグが大変だった。プログラミング書籍には「コードは短くシンプルに」という意味がようやく分かってきた。。。。だって自分で作ったスクリプトなのに修正が面倒だなんて…アホやん。
ということで極力defで関数化して、小さいブロックごとで実行をさせて結果を渡す、結果をもらって処理をしてまた渡す…の流れを作り直した。
なるほど~、細分化したほうがよっぽど合理的だね。作るときは渡す引数の型とか気にしたりする必要があってちょーっと面倒だけど、出来上がればメンテナンスはかなりやりやすい。


それにしても…このscheduleって、実行間隔を極端に短くすると場合によっては迷惑行為になるかなぁ。