Очистка данных о погоде с использованием регулярных выражений для цикла for - PullRequest
0 голосов
/ 14 октября 2019

Попытка избавиться от атмосферного давления через 12 часов. Не уверен, как изменить мое регулярное выражение для захвата минуты и часа, которые находятся в двух отдельных классах в 'td'. Я попытался добавить атрибут class = "ng-star-вставка" в find_all и искать час, но не смог. Переменная V - это всего лишь фрагмент разобранного html с использованием bs4. Для каждого часа дня есть примерно две строки, похожие на V.

from urllib.request import urlopen as uReq
import numpy as np
import cv2
from bs4 import BeautifulSoup as soup
import re
import datetime
import os
import csv
import cefpython3 as cef
import sys
import selenium
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

v = (
<tr _ngcontent-app-root-c5="" class="mat-header-row ng-star-inserted" mat-header-row="" role="row">
 <!-- -->
 <th _ngcontent-app-root-c5="" class="mat-header-cell cdk-column-timeHour mat-column-timeHour ng-star-inserted" mat-header-cell="" role="columnheader">
  Time
 </th>
 <th _ngcontent-app-root-c5="" class="mat-header-cell cdk-column-conditions mat-column-conditions ng-star-inserted" mat-header-cell="" role="columnheader">
  Conditions
 </th>
 <th _ngcontent-app-root-c5="" class="mat-header-cell cdk-column-temperature mat-column-temperature ng-star-inserted" mat-header-cell="" role="columnheader">
  Temp.
 </th>
 <th _ngcontent-app-root-c5="" class="mat-header-cell cdk-column-feelsLike mat-column-feelsLike ng-star-inserted" mat-header-cell="" role="columnheader">
  Feels Like
 </th>
 <th _ngcontent-app-root-c5="" class="mat-header-cell cdk-column-precipitation mat-column-precipitation ng-star-inserted" mat-header-cell="" role="columnheader">
  Precip
 </th>
 <th _ngcontent-app-root-c5="" class="mat-header-cell cdk-column-liquidPrecipitation mat-column-liquidPrecipitation ng-star-inserted" mat-header-cell="" role="columnheader">
  Amount
 </th>
 <th _ngcontent-app-root-c5="" class="mat-header-cell cdk-column-cloudCover mat-column-cloudCover ng-star-inserted" mat-header-cell="" role="columnheader">
  Cloud Cover
 </th>
 <th _ngcontent-app-root-c5="" class="mat-header-cell cdk-column-dewPoint mat-column-dewPoint ng-star-inserted" mat-header-cell="" role="columnheader">
  Dew Point
 </th>
 <th _ngcontent-app-root-c5="" class="mat-header-cell cdk-column-humidity mat-column-humidity ng-star-inserted" mat-header-cell="" role="columnheader">
  Humidity
 </th>
 <th _ngcontent-app-root-c5="" class="mat-header-cell cdk-column-wind mat-column-wind ng-star-inserted" mat-header-cell="" role="columnheader">
  Wind
 </th>
 <th _ngcontent-app-root-c5="" class="mat-header-cell cdk-column-pressure mat-column-pressure ng-star-inserted" mat-header-cell="" role="columnheader">
  Pressure
 </th>
</tr>
<tr _ngcontent-app-root-c5="" class="mat-row ng-star-inserted" mat-row="" role="row">
 <!-- -->
 <td _ngcontent-app-root-c5="" class="mat-cell cdk-column-timeHour mat-column-timeHour ng-star-inserted" mat-cell="" role="gridcell">
  <!-- -->
  <span _ngcontent-app-root-c5="" class="ng-star-inserted">
   12
   <span _ngcontent-app-root-c5="" class="show-for-medium">
    :00
   </span>
   am
  </span>
  <!-- -->
  <!-- -->
  <!-- -->
 </td>
 <td _ngcontent-app-root-c5="" class="mat-cell cdk-column-conditions mat-column-conditions ng-star-inserted" mat-cell="" role="gridcell">
  <!-- -->
  <!-- -->
  <!-- -->
  <!-- -->
  <span _ngcontent-app-root-c5="" class="ng-star-inserted">
   <img _ngcontent-app-root-c5="" alt="Partly Cloudy" class="no-scale" src="//www.wunderground.com/static/i/c/v4/29.svg"/>
   <span _ngcontent-app-root-c5="" class="show-for-medium conditions">
    Partly Cloudy
   </span>
   <span _ngcontent-app-root-c5="" class="show-for-small-only conditions">
   </span>
  </span>
 </td>
 <td _ngcontent-app-root-c5="" class="mat-cell cdk-column-temperature mat-column-temperature ng-star-inserted" mat-cell="" role="gridcell">
  <!-- -->
  <!-- -->
  <!-- -->
  <lib-display-unit _ngcontent-app-root-c5="" _nghost-app-root-c15="" class="ng-star-inserted">
   <!-- -->
   <span _ngcontent-app-root-c15="" class="test-true wu-unit wu-unit-temperature is-degree-visible ng-star-inserted">
    <!-- -->
    <!-- -->
    <!-- -->
    <span _ngcontent-app-root-c15="" class="wu-value wu-value-to">
     49
    </span>
    <span _ngcontent-app-root-c15="" class="wu-label">
     <!-- -->
     <span _ngcontent-app-root-c15="" class="ng-star-inserted">
      F
     </span>
     <!-- -->
    </span>
    <!-- -->
   </span>
   <!-- -->
  </lib-display-unit>
  <!-- -->
 </td>
 <td _ngcontent-app-root-c5="" class="mat-cell cdk-column-feelsLike mat-column-feelsLike ng-star-inserted" mat-cell="" role="gridcell">
  <!-- -->
  <!-- -->
  <!-- -->
  <lib-display-unit _ngcontent-app-root-c5="" _nghost-app-root-c15="" class="ng-star-inserted">
   <!-- -->
   <span _ngcontent-app-root-c15="" class="test-true wu-unit wu-unit-temperature is-degree-visible ng-star-inserted">
    <!-- -->
    <!-- -->
    <!-- -->
    <span _ngcontent-app-root-c15="" class="wu-value wu-value-to">
     49
    </span>
    <span _ngcontent-app-root-c15="" class="wu-label">
     <!-- -->
     <span _ngcontent-app-root-c15="" class="ng-star-inserted">
      F
     </span>
     <!-- -->
    </span>
    <!-- -->
   </span>
   <!-- -->
  </lib-display-unit>
  <!-- -->
 </td>
 <td _ngcontent-app-root-c5="" class="mat-cell cdk-column-precipitation mat-column-precipitation ng-star-inserted" mat-cell="" role="gridcell">
  <!-- -->
  <!-- -->
  <a _ngcontent-app-root-c5="" class="ng-star-inserted" href="/precipitation/us/or/portland">
   <!-- -->
   <lib-display-unit _ngcontent-app-root-c5="" _nghost-app-root-c15="" class="ng-star-inserted">
    <!-- -->
    <span _ngcontent-app-root-c15="" class="test- wu-unit wu-unit-chance ng-star-inserted">
     <!-- -->
     <!-- -->
     <!-- -->
     <span _ngcontent-app-root-c15="" class="wu-value wu-value-to">
      0
     </span>
     <span _ngcontent-app-root-c15="" class="wu-label">
      <!-- -->
      <span _ngcontent-app-root-c15="" class="ng-star-inserted">
       %
      </span>
      <!-- -->
     </span>
     <!-- -->
    </span>
    <!-- -->
   </lib-display-unit>
  </a>
  <!-- -->
  <!-- -->
 </td>
 <td _ngcontent-app-root-c5="" class="mat-cell cdk-column-liquidPrecipitation mat-column-liquidPrecipitation ng-star-inserted" mat-cell="" role="gridcell">
  <!-- -->
  <!-- -->
  <a _ngcontent-app-root-c5="" class="ng-star-inserted" href="/precipitation/us/or/portland">
   <!-- -->
   <lib-display-unit _ngcontent-app-root-c5="" _nghost-app-root-c15="" class="ng-star-inserted">
    <!-- -->
    <span _ngcontent-app-root-c15="" class="test- wu-unit wu-unit-rain ng-star-inserted">
     <!-- -->
     <!-- -->
     <!-- -->
     <span _ngcontent-app-root-c15="" class="wu-value wu-value-to">
      0
     </span>
     <span _ngcontent-app-root-c15="" class="wu-label">
      <!-- -->
      <span _ngcontent-app-root-c15="" class="ng-star-inserted">
       in
      </span>
      <!-- -->
     </span>
     <!-- -->
    </span>
    <!-- -->
   </lib-display-unit>
  </a>
  <!-- -->
  <!-- -->
 </td>
 <td _ngcontent-app-root-c5="" class="mat-cell cdk-column-cloudCover mat-column-cloudCover ng-star-inserted" mat-cell="" role="gridcell">
  <!-- -->
  <!-- -->
  <!-- -->
  <lib-display-unit _ngcontent-app-root-c5="" _nghost-app-root-c15="" class="ng-star-inserted">
   <!-- -->
   <span _ngcontent-app-root-c15="" class="test- wu-unit wu-unit-chance ng-star-inserted">
    <!-- -->
    <!-- -->
    <!-- -->
    <span _ngcontent-app-root-c15="" class="wu-value wu-value-to">
     31
    </span>
    <span _ngcontent-app-root-c15="" class="wu-label">
     <!-- -->
     <span _ngcontent-app-root-c15="" class="ng-star-inserted">
      %
     </span>
     <!-- -->
    </span>
    <!-- -->
   </span>
   <!-- -->
  </lib-display-unit>
  <!-- -->
 </td>
 <td _ngcontent-app-root-c5="" class="mat-cell cdk-column-dewPoint mat-column-dewPoint ng-star-inserted" mat-cell="" role="gridcell">
  <!-- -->
  <!-- -->
  <!-- -->
  <lib-display-unit _ngcontent-app-root-c5="" _nghost-app-root-c15="" class="ng-star-inserted">
   <!-- -->
   <span _ngcontent-app-root-c15="" class="test-true wu-unit wu-unit-temperature is-degree-visible ng-star-inserted">
    <!-- -->
    <!-- -->
    <!-- -->
    <span _ngcontent-app-root-c15="" class="wu-value wu-value-to">
     44
    </span>
    <span _ngcontent-app-root-c15="" class="wu-label">
     <!-- -->
     <span _ngcontent-app-root-c15="" class="ng-star-inserted">
      F
     </span>
     <!-- -->
    </span>
    <!-- -->
   </span>
   <!-- -->
  </lib-display-unit>
  <!-- -->
 </td>
 <td _ngcontent-app-root-c5="" class="mat-cell cdk-column-humidity mat-column-humidity ng-star-inserted" mat-cell="" role="gridcell">
  <!-- -->
  <!-- -->
  <!-- -->
  <lib-display-unit _ngcontent-app-root-c5="" _nghost-app-root-c15="" class="ng-star-inserted">
   <!-- -->
   <span _ngcontent-app-root-c15="" class="test- wu-unit wu-unit-humidity ng-star-inserted">
    <!-- -->
    <!-- -->
    <!-- -->
    <span _ngcontent-app-root-c15="" class="wu-value wu-value-to">
     81
    </span>
    <span _ngcontent-app-root-c15="" class="wu-label">
     <!-- -->
     <span _ngcontent-app-root-c15="" class="ng-star-inserted">
      %
     </span>
     <!-- -->
    </span>
    <!-- -->
   </span>
   <!-- -->
  </lib-display-unit>
  <!-- -->
 </td>
 <td _ngcontent-app-root-c5="" class="mat-cell cdk-column-wind mat-column-wind ng-star-inserted" mat-cell="" role="gridcell">
  <!-- -->
  <!-- -->
  <!-- -->
  <lib-display-unit _ngcontent-app-root-c5="" _nghost-app-root-c15="" class="ng-star-inserted">
   <!-- -->
   <span _ngcontent-app-root-c15="" class="test- wu-unit wu-unit-speed ng-star-inserted">
    <!-- -->
    <!-- -->
    <!-- -->
    <span _ngcontent-app-root-c15="" class="wu-value wu-value-to">
     2
    </span>
    <span _ngcontent-app-root-c15="" class="wu-label">
     <!-- -->
     <span _ngcontent-app-root-c15="" class="ng-star-inserted">
      mph
     </span>
     <!-- -->
    </span>
    <!-- -->
    <span _ngcontent-app-root-c15="" class="wu-suffix ng-star-inserted">
     NW
    </span>
   </span>
   <!-- -->
  </lib-display-unit>
  <!-- -->
 </td>
 <td _ngcontent-app-root-c5="" class="mat-cell cdk-column-pressure mat-column-pressure ng-star-inserted" mat-cell="" role="gridcell">
  <!-- -->
  <!-- -->
  <!-- -->
  <lib-display-unit _ngcontent-app-root-c5="" _nghost-app-root-c15="" class="ng-star-inserted">
   <!-- -->
   <span _ngcontent-app-root-c15="" class="test- wu-unit wu-unit-pressure ng-star-inserted">
    <!-- -->
    <!-- -->
    <!-- -->
    <span _ngcontent-app-root-c15="" class="wu-value wu-value-to">
     30.05
    </span>
    <span _ngcontent-app-root-c15="" class="wu-label">
     <!-- -->
     <span _ngcontent-app-root-c15="" class="ng-star-inserted">
      in
     </span>
     <!-- -->
    </span>
    <!-- -->
   </span>
   <!-- -->
  </lib-display-unit>
  <!-- -->
 </td>
</tr>
)

time_12 = '8:27'

userInputDt = datetime.datetime.strptime(time_12, '%H:%M')

for row in v:
    m = re.findall('(\d+:\d+)\s+([AP]M)', row.get_text())
    if len(m) > 0:
        dtString = ' '.join(map(str,m[0]))
        dt = datetime.datetime.strptime(dtString, '%I:%M %p')
        timedelta = abs(dt - userInputDt)
        if time_delta == None or timedelta < time_delta:
            save_row = row
            time_delta = timedelta
z = []

for td in save_row.find_all('td'):
        z.append(td.get_text())

output

Traceback (most recent call last):
  File "C:\Python\Scripts\test2.py", line 57, in <module>
    for td in save_row.find_all('td'):
AttributeError: 'NoneType' object has no attribute 'find_all'

Сбой сценария на

m = re.findall('(\d+:\d+)\s+([AP]M)', row.get_text())

Мне кажется,объедините часы и минуты из html в одну строку, прежде чем сработает регулярное выражение, но не знаете, как поступить.

timeHour ng-star-inserted" mat-cell="" role="gridcell">
  <!-- -->
  <span _ngcontent-app-root-c5="" class="ng-star-inserted">
   12
   <span _ngcontent-app-root-c5="" class="show-for-medium">
    :00
   </span>

Фактический исходный код

from urllib.request import urlopen as uReq
import numpy as np
import cv2
from bs4 import BeautifulSoup as soup
import re
import datetime
import os
import csv
import cefpython3 as cef
import sys
import selenium
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

now = datetime.datetime.now()
now_plus_12 = now + datetime.timedelta(hours = 12)
future_12 = (now_plus_12.timetuple())
fTup12 = str(future_12[0]),str(future_12[1]),str(future_12[2]),str(future_12[3]),str(future_12[4])
fList12 = list(fTup12)
year = fList12[0]
month = fList12[1]
day = fList12[2]
timeTup = str(fList12[3]),str(fList12[4])
timeList = list(timeTup)
time_12 = ':'.join(timeList)

opts = webdriver.ChromeOptions()
opts.add_argument('headless')

driver = webdriver.Chrome(chrome_options=opts)
driver.maximize_window()

weather_url = 'https://www.wunderground.com/hourly/us/or/portland/date/' + year + '-' + month + '-' + day
driver.get(weather_url)
time.sleep(20)
weather_html = driver.page_source
weather_soup = soup(weather_html, "html.parser")
table = weather_soup.find('table', id="hourly-forecast-table")

save_row = None
time_delta = None

#userInputDt = datetime.datetime.strptime(time_12, '%H:%M')

#for row in table.find_all('tr'):
 #   m = re.findall('(\d+:\d+)\s+([AP]M)', row.get_text())
  #  if len(m) > 0:
   #     dtString = ' '.join(map(str,m[0]))
    #    dt = datetime.datetime.strptime(dtString, '%I:%M %p')
     #   timedelta = abs(dt - userInputDt)
      #  if time_delta == None or timedelta < time_delta:
       #     save_row = row
        #    time_delta = timedelta
#v = []

#for td in save_row.find_all('td'):
 #       v.append(td.get_text())

1 Ответ

0 голосов
/ 15 октября 2019

Понял, что источник HTML изменился с AM / PM верхнего регистра на AM / PM нижнего регистра

изменен

m = re.findall('(\d+:\d+)\s+([AP]M)', row.get_text())

на

m = re.findall('(\d+:\d+)\s+([ap]m)', row.get_text())
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...