前回pythonのscheduleモジュールのひな型ができたので、それに実際のタスクスクリプトを乗っけてみる。
実施内容は以前作成したWEBスクレイピングに関してを自動取得をさせる。おおなかな流れは以下。
・DBを初期化
・tenki.jpへアクセスしてデータをDBへ格納
・DBへアクセスし、データをグラフにして画像保存
◆メインスクリプト
#! /usr/bin/env python3 # *********************************** # Create table of DB # Web scraping # Create Chart # *********************************** # =================================== # import # =================================== import time import datetime import schedule import sqlite3 import os from a01_table import ConnectDB,CreateWeaterTable,DisconnectDB from a02_web import WebScraping,ConnectDB,StoreDB,DisconnectDB from a03_chart import GetDataDB,pdDataFrame,pltMatplot,saveChart,ShowChart # =================================== # function # =================================== # Do Job for a certain period of time def job(): StartMessage(timestamp()) time.sleep(1) # ============================== # a01_table # ============================== dbname = 'TEST.db' cur,connection = ConnectDB(dbname) CreateWeaterTable(cur,connection) DisconnectDB(cur,connection) print(timestamp(),"table done",) # ============================== # a02_web # ============================== URL = 'https://tenki.jp/forecast/3/17/4610/14100/1hour.html' dbname = 'TEST.db' # =================================== # execution # =================================== List_all = WebScraping(URL) cur, connection = ConnectDB(dbname) StoreDB(List_all,cur,connection) DisconnectDB(cur,connection) print(timestamp(),"web done",) # ============================== # CreateWeatherChart # ============================== dbname = 'TEST.db' dt1 = datetime.datetime.now() dt2 = dt1.strftime('%Y_%m%d_%H%M_%S') currentdir = os.getcwd() # =================================== # execution # =================================== # connect to DB cur, connection = ConnectDB(dbname) # Access to DB and get data result1,result2,result3,result4,result5,result6 = GetDataDB(cur, connection) # data --> pandas/Dataframe x1,y1,x2,y2,x3,y3,x4,y4,x5,y5,x6,y6 = pdDataFrame(result1,result2,result3,result4,result5,result6) # Create Chart and save them fig1,fig2,fig3 = pltMatplot(x1,y1,x2,y2,x3,y3,x4,y4,x5,y5,x6,y6) # save fig data as jpg file saveChart(fig1,fig2,fig3,dt2,currentdir) # Show Charts on display #ShowChart() # Disconnect DB(End process) DisconnectDB(cur,connection) print(timestamp(),"chart done",) # ============================== # End message # ============================== EndMessage(timestamp()) # =================================== # function(sub) # =================================== def timestamp(): dt1 = datetime.datetime.now() dt2 = dt1.strftime('%Y_%m/%d_%H:%M_%S') return dt2 def ScheduleStartMessage(timestamp): print(timestamp,"===Schedule started===") def StartMessage(timestamp): print(timestamp,"===Process started===") def EndMessage(timestamp): print(timestamp,"===Process done===") # =================================== # ready for exe # =================================== # =================================== # execution # =================================== def main(): # Do Job for a certain period of time schedule.every(10).seconds.do(job) ScheduleStartMessage(timestamp()) # Checking schedule run while True: schedule.run_pending() time.sleep(1) if __name__ == "__main__": main()
◆サブスクリプト(DBテーブル初期化)
# *********************************** # Create table # a01_table # *********************************** # =================================== # import # =================================== import sqlite3 import datetime # =================================== # function # =================================== # Check the existence def CheckTableExistence(cur): cur.execute('SELECT COUNT(*) FROM sqlite_master WHERE TYPE="table" AND NAME="WeatherTable"') # Create table def CreateTable(cur): sql = 'CREATE TABLE WeatherTable(id INTEGER PRIMARY KEY AUTOINCREMENT, hour STRING, weather STRING, temp DOUBLE, windspeed INTEGER, windblow STRING, humid INTEGER)' cur.execute(sql) # Delete table def DropTable(cur): sql = 'DROP TABLE WeatherTable' cur.execute(sql) # connect to DB def ConnectDB(dbname): connection = sqlite3.connect(dbname) cur = connection.cursor() r = (cur,connection) return r # Disconnect DB(End process) def DisconnectDB(connection): cur.close() connection.commit() connection.close() # main execution def CreateWeaterTable(cur,connection): #Check the Existence of table CheckTableExistence(cur) # Check the Existence of table and Create the table if cur.fetchone() == (0,): print("WeatherTable is not existed then created") CreateTable(cur) else: print('WeatherTable exists, then deleted old one and created new one') DropTable(cur) CreateTable(cur)
# *********************************** # Web scraping # a02_web # *********************************** # =================================== # import # =================================== import requests import bs4 import sqlite3 import datetime # =================================== # function # =================================== def WebScraping(URL): r = requests.get(URL) s = bs4.BeautifulSoup(r.text, 'html.parser') # 気温情報の取得 list_date= [] for j in s.find_all('tr' , class_ ='head'): for i in j.find_all('p'): list_date.append(i.text) # 気温情報の取得 list_temp= [] for j in s.find_all('tr' , class_ ='temperature'): for i in j.find_all('span' ): list_temp.append(i.string) # 天気情報(晴れとか)の取得 list_weather = [] for j in s.find_all('tr' , class_ ='weather'): for i in j.find_all('p'): list_weather.append(i.string) # 時刻の取得 list_hour = [] for j in s.find_all('tr' , class_ ='hour'): for i in j.find_all('span'): list_hour.append(i.string) # 風向きの取得 list_windblow = [] for j in s.find_all('tr' , class_ ='wind-blow'): for i in j.find_all('p'): list_windblow.append(i.string) # 風速の取得 list_windspeed = [] for j in s.find_all('tr' , class_ ='wind-speed'): for i in j.find_all('span'): list_windspeed.append(i.string) # 湿度の取得 list_humid = [] for j in s.find_all('tr' , class_ ='humidity'): for i in j.find_all('td'): list_humid.append(i.string) #------------------------------ # 各情報を2次元配列へ格納する #------------------------------ List_all =[list_hour, list_weather, list_temp, list_windspeed,list_windblow, list_humid] return List_all # connect to DB def ConnectDB(dbname): connection = sqlite3.connect(dbname) cur = connection.cursor() r = (cur,connection) return r def StoreDB(List_all,cur,connection): sql1 = 'INSERT INTO WeatherTable(hour, weather, temp, windspeed, windblow, humid) VALUES(?,?,?,?,?,?)' for i in range(0,72,1): cur.execute(sql1,(List_all[0][i], List_all[1][i], List_all[2][i], List_all[3][i], List_all[4][i], List_all[5][i])) #cur.close() # Disconnect DB(End process) def DisconnectDB(cur,connection): cur.close() connection.commit() connection.close()
◆サブスクリプト(グラフ作成・保存)
# *********************************** # Create chart # a03_chart # *********************************** # =================================== # import # =================================== import sqlite3 import pandas as pd import matplotlib.pyplot as plt import datetime import os # =================================== # function # =================================== # connect to DB def ConnectDB(dbname): connection = sqlite3.connect(dbname) cur = connection.cursor() r = (cur,connection) return r # get Data from DB def GetDataDB(cur, connection): #----------------------------- # DB接続、カーソル起動 #----------------------------- dbname = 'TEST.db' connection = sqlite3.connect(dbname) cur = connection.cursor() #----------------------------- # データを2種類、DBから引き出す #----------------------------- # 0-24時までのデータ(data1) sql1 = 'SELECT temp, hour FROM WeatherTable LIMIT 24' c1 =cur.execute(sql1) result1 = c1.fetchall() # 次の日の0-24時までのデータ(data2) sql2 = 'SELECT temp, hour FROM WeatherTable LIMIT 24,24' c2 = cur.execute(sql2) result2 = c2.fetchall() #----------------------------- # 0-24時までのデータ(data3) sql3 = 'SELECT humid, hour FROM WeatherTable LIMIT 24' c3 =cur.execute(sql3) result3 = c3.fetchall() # 次の日の0-24時までのデータ(data4) sql4 = 'SELECT humid, hour FROM WeatherTable LIMIT 24,24' c4 = cur.execute(sql4) result4 = c4.fetchall() #----------------------------- # 0-24時までのデータ(data5) sql5 = 'SELECT windspeed, hour FROM WeatherTable LIMIT 24' c5 =cur.execute(sql5) result5 = c5.fetchall() # 次の日の0-24時までのデータ(data6) sql6 = 'SELECT windspeed, hour FROM WeatherTable LIMIT 24,24' c6 = cur.execute(sql6) result6 = c6.fetchall() #----------------------------- # 戻り値 #----------------------------- result_all =(result1,result2,result3,result4,result5,result6) return result_all def pdDataFrame(result1,result2,result3,result4,result5,result6): #データフレーム(表に書き出すイメージ) df1 = pd.DataFrame(result1,columns=["temp", "hour"]) df2 = pd.DataFrame(result2,columns=["temp", "hour"]) df3 = pd.DataFrame(result3,columns=["humid", "hour"]) df4 = pd.DataFrame(result4,columns=["humid", "hour"]) df5 = pd.DataFrame(result5,columns=["windspeed", "hour"]) df6 = pd.DataFrame(result6,columns=["windspeed", "hour"]) #----------------------------- # グラフの準備(2つのグラフ) #----------------------------- # グラフのX軸、Y軸の指定 x1 = df1.hour y1 = df1.temp x2 = df2.hour y2 = df2.temp x3 = df3.hour y3 = df3.humid x4 = df4.hour y4 = df4.humid x5 = df5.hour y5 = df5.windspeed x6 = df6.hour y6 = df6.windspeed #----------------------------- # 戻り値 #----------------------------- xyData = (x1,y1,x2,y2,x3,y3,x4,y4,x5,y5,x6,y6) return xyData def pltMatplot(x1,y1,x2,y2,x3,y3,x4,y4,x5,y5,x6,y6): # グラフ上限値を決めておく(5℃~25℃) tempMin = 5 tempMax = 25 humidMin = 0 humidMax = 100 windMin = 0 windMax = 15 # 日付情報取得(ファイル名用) #td1 = datetime.date.today() #td2 = td1.strftime('%Y%m%d') td1 = datetime.datetime.now() td2 = td1.strftime('%Y%m%d-%H%M-%S') # fig1------------------------------- fig1 = plt.figure(td2 + "_01") # (data1)---------------------------- chart1 = fig1.add_subplot(1,2,1) chart1.plot(x1, y1) plt.ylim(tempMin, tempMax) # グラフ内のラベル表記 chart1.set_title(td2) chart1.set_xlabel("Time") chart1.set_ylabel("Temperature(℃)") # (data2)---------------------------- chart2 = fig1.add_subplot(1,2,2) chart2.plot(x2, y2) plt.ylim(tempMin, tempMax) # グラフ内のラベル表記 chart2.set_title("Tomorrow") chart2.set_xlabel("Time") chart2.set_ylabel("Temperature(℃)") # fig2------------------------------- fig2 = plt.figure(td2+"_02") # (data3)---------------------------- chart3 = fig2.add_subplot(1,2,1) chart3.plot(x3, y3) plt.ylim(humidMin, humidMax) # グラフ内のラベル表記 chart3.set_title(td2) chart3.set_xlabel("Time") chart3.set_ylabel("Humidity(%)") # (data4)---------------------------- chart4 = fig2.add_subplot(1,2,2) chart4.plot(x4, y4) plt.ylim(humidMin, humidMax) # グラフ内のラベル表記 chart4.set_title("Tomorrow") chart4.set_xlabel("Time") chart4.set_ylabel("Humidity(%)") # fig3------------------------------- fig3 = plt.figure(td2+"_03") # (data5)---------------------------- chart5 = fig3.add_subplot(1,2,1) chart5.plot(x5, y5) plt.ylim(windMin, windMax) # グラフ内のラベル表記 chart5.set_title(td2) chart5.set_xlabel("Time") chart5.set_ylabel("WindSpeed(m/s)") # (data6)---------------------------- chart6 = fig3.add_subplot(1,2,2) chart6.plot(x6, y6) plt.ylim(windMin, windMax) # グラフ内のラベル表記 chart6.set_title("Tomorrow") chart6.set_xlabel("Time") chart6.set_ylabel("WindSpeed(m/s)") #----------------------------- # グラフの設定 #----------------------------- fig1.tight_layout() fig2.tight_layout() fig3.tight_layout() #----------------------------- # 戻り値 #----------------------------- fig_all = (fig1,fig2,fig3) return fig_all # Show Charts on display def ShowChart(): plt.show() # save fig data as jpg file def saveChart(fig1,fig2,fig3,dt2,currentdir): fig1.savefig(currentdir + '/fig/' + dt2 + "_fig_temp.jpg") fig2.savefig(currentdir + '/fig/' + dt2 + "_fig_humid.jpg") fig3.savefig(currentdir + '/fig/' + dt2 + "_fig_windspeed.jpg") # Disconnect DB(End process) def DisconnectDB(cur,connection): cur.close() connection.commit() connection.close()
ああ~長いな、、、これでもかなり短くしたんだけど。。。
タスクは、DB初期化、Webスクレイピング、グラフ作成の3つなんだけど、これをscheduleモジュールで定期実行させようとすると意外とややこしい。最初は関数化しないでダラダラとコードを書いてしまったのでデバッグが大変だった。プログラミング書籍には「コードは短くシンプルに」という意味がようやく分かってきた。。。。だって自分で作ったスクリプトなのに修正が面倒だなんて…アホやん。ということで極力defで関数化して、小さいブロックごとで実行をさせて結果を渡す、結果をもらって処理をしてまた渡す…の流れを作り直した。なるほど~、細分化したほうがよっぽど合理的だね。作るときは渡す引数の型とか気にしたりする必要があってちょーっと面倒だけど、出来上がればメンテナンスはかなりやりやすい。
それにしても…このscheduleって、実行間隔を極端に短くすると場合によっては迷惑行為になるかなぁ。