WEBスクレイピング、今回はpythonライブラリ(requests, bs4)を使うやり方。
#=============================== # WEBスクレイピング #=============================== import requests import bs4 URL = 'https://tenki.jp/forecast/3/17/4610/14100/1hour.html' r = requests.get(URL) s = bs4.BeautifulSoup(r.text, 'html.parser') #リストの作成 list_temp= [] #入れ子の要素取得(取得要素の絞り込み) for j in s.find_all('tr' , class_ ='temperature'): for i in j.find_all('span' ): list_temp.append(i.string) print("Number of elements",len(list_temp)) print('--------------------') for i in range(0,24,1): print(list_temp[i]) print('--------------------') print('end of line') input()
◆気温の取得ができたので、ほかのも取得してみた。やり方は基本同じで取得するタグ情報だけ気を付ければなんでもいける。こりゃラクチンだ。
#=============================== # WEBスクレイピング # tenki.jpの天気予報情報を取得 #=============================== import requests import bs4 # 情報取得したいURL URL = 'https://tenki.jp/forecast/3/17/4610/14100/1hour.html' # URLへアクセス r = requests.get(URL) # 解析しやすいデータに s = bs4.BeautifulSoup(r.text, 'html.parser') #=============================== # 気温情報の取得 list_temp= [] for j in s.find_all('tr' , class_ ='temperature'): for i in j.find_all('span' ): list_temp.append(i.string) # 天気情報(晴れとか)の取得 list_weather = [] for j in s.find_all('tr' , class_ ='weather'): for i in j.find_all('p'): list_weather.append(i.string) # 時刻の取得 list_hour = [] for j in s.find_all('tr' , class_ ='hour'): for i in j.find_all('span'): list_hour.append(i.string) # 風向きの取得 list_windblow = [] for j in s.find_all('tr' , class_ ='wind-blow'): for i in j.find_all('p'): list_windblow.append(i.string) # 風速の取得 list_windspeed = [] for j in s.find_all('tr' , class_ ='wind-speed'): for i in j.find_all('span'): list_windspeed.append(i.string) #=============================== print("Number of elements",len(list_temp)) print('--------------------') for i in range(0,24,1): print(list_hour[i], list_temp[i] + '(℃)', list_weather[i], list_windblow[i], list_windspeed[i] + '(m/s)') print('--------------------') for i in range(24,48,1): print(list_hour[i], list_temp[i] + '(℃)', list_weather[i], list_windblow[i], list_windspeed[i] + '(m/s)') print('--------------------') for i in range(48,72,1): print(list_hour[i], list_temp[i] + '(℃)', list_weather[i], list_windblow[i], list_windspeed[i] + '(m/s)') print('--------------------') print('end of line') input()
◆記事のほうはこちら