C
|
ca
|
2024-09-16 04:41:41
|
2024-09-16 04:41:42
|
|
C
|
ca_ab
|
2024-09-16 04:05:33
|
2024-09-16 04:05:34
|
|
C
|
ca_ab_calgary
|
2024-09-16 04:09:46
|
2024-09-16 04:09:46
|
|
C
|
ca_ab_edmonton
|
2024-09-16 04:54:25
|
2024-09-16 04:54:25
|
|
C
|
ca_ab_grande_prairie
|
2024-09-16 04:23:19
|
2024-09-16 04:23:19
|
|
C
|
ca_ab_grande_prairie_county_no_1
|
2024-09-16 04:02:50
|
2024-09-16 04:02:50
|
|
C
|
ca_ab_lethbridge
|
2024-09-16 04:56:20
|
2024-09-16 04:56:20
|
|
C
|
ca_ab_strathcona_county
|
2024-09-16 04:17:07
|
2024-09-16 04:17:07
|
|
C
|
ca_ab_wood_buffalo
|
2024-09-16 04:19:30
|
2024-09-16 04:19:31
|
|
D>
|
ca_bc
|
2024-06-26 04:14:44
|
2024-09-16 04:19:34
|
scrapelib.HTTPError: 404 while retrieving https://www.leg.bc.ca/_api/search/query?querytext='(contentclass:sts_listitem%20OR…
Traceback (most recent call last):
File "/app/reports/utils.py", line 71, in scrape_people
report.report = subcommand.handle(args, other)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 260, in handle
return self.do_handle(args, other, juris)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 305, in do_handle
report['scrape'] = self.do_scrape(juris, args, scrapers)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 173, in do_scrape
report[scraper_name] = scraper.do_scrape(**scrape_args)
File "/app/.heroku/src/pupa/pupa/scrape/base.py", line 99, in do_scrape
for obj in self.scrape(**kwargs) or []:
File "/app/scrapers/ca_bc/people.py", line 16, in scrape
page = self.lxmlize(COUNCIL_PAGE, xml=True)
File "/app/scrapers/utils.py", line 204, in lxmlize
response = self.get(url, cookies=cookies)
File "/app/scrapers/utils.py", line 196, in get
return super().get(*args, verify=SSL_VERIFY, **kwargs)
File "/app/.heroku/python/lib/python3.9/site-packages/requests/sessions.py", line 602, in get
return self.request("GET", url, **kwargs)
File "/app/.heroku/python/lib/python3.9/site-packages/scrapelib/__init__.py", line 602, in request
raise HTTPError(resp)
scrapelib.HTTPError: 404 while retrieving https://www.leg.bc.ca/_api/search/query?querytext='(contentclass:sts_listitem%20OR%20IsDocument:True)%20SPSiteUrl:/content%20ListId:8ecafcaa-2bf9-4434-a60c-3663a9afd175%20MLAActiveOWSBOOL:1%20-LastNameOWSTEXT:Vacant'&selectproperties='Picture1OWSIMGE,Title,Path'&&sortlist='LastNameSort:ascending'&rowlimit=100&QueryTemplatePropertiesUrl='spfile://webroot/queryparametertemplate.xml'
|
C
|
ca_bc_abbotsford
|
2024-09-16 04:06:04
|
2024-09-16 04:06:04
|
|
C
|
ca_bc_burnaby
|
2024-09-16 04:45:54
|
2024-09-16 04:45:54
|
|
C
|
ca_bc_coquitlam
|
2024-09-16 04:10:03
|
2024-09-16 04:10:04
|
|
C
|
ca_bc_kelowna
|
2024-09-16 04:13:05
|
2024-09-16 04:13:05
|
|
C
|
ca_bc_langley
|
2024-09-16 04:02:29
|
2024-09-16 04:02:29
|
|
C
|
ca_bc_langley_city
|
2024-09-16 04:15:08
|
2024-09-16 04:15:08
|
|
C
|
ca_bc_new_westminster
|
2024-09-16 04:16:27
|
2024-09-16 04:16:27
|
|
C
|
ca_bc_richmond
|
2024-09-16 04:11:18
|
2024-09-16 04:11:18
|
|
C
|
ca_bc_saanich
|
2024-09-16 04:08:51
|
2024-09-16 04:08:51
|
|
C
|
ca_bc_surrey
|
2024-09-16 04:44:54
|
2024-09-16 04:44:54
|
|
C
|
ca_bc_vancouver
|
2024-09-16 04:55:53
|
2024-09-16 04:55:53
|
|
C
|
ca_bc_victoria
|
2024-09-16 04:17:40
|
2024-09-16 04:17:40
|
|
D>
|
ca_mb
|
2024-09-16 04:18:45
|
2024-09-16 18:59:49
|
SystemExit: 1
Traceback (most recent call last):
File "/app/reports/utils.py", line 71, in scrape_people
report.report = subcommand.handle(args, other)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 260, in handle
return self.do_handle(args, other, juris)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 305, in do_handle
report['scrape'] = self.do_scrape(juris, args, scrapers)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 173, in do_scrape
report[scraper_name] = scraper.do_scrape(**scrape_args)
File "/app/.heroku/src/pupa/pupa/scrape/base.py", line 99, in do_scrape
for obj in self.scrape(**kwargs) or []:
File "/app/scrapers/ca_mb/people.py", line 39, in scrape
page = self.lxmlize(url)
File "/app/scrapers/utils.py", line 204, in lxmlize
response = self.get(url, cookies=cookies)
File "/app/scrapers/utils.py", line 196, in get
return super().get(*args, verify=SSL_VERIFY, **kwargs)
File "/app/.heroku/python/lib/python3.9/site-packages/requests/sessions.py", line 602, in get
return self.request("GET", url, **kwargs)
File "/app/.heroku/python/lib/python3.9/site-packages/scrapelib/__init__.py", line 579, in request
resp = super().request(
File "/app/.heroku/python/lib/python3.9/site-packages/scrapelib/__init__.py", line 404, in request
resp = super().request(
File "/app/.heroku/python/lib/python3.9/site-packages/scrapelib/__init__.py", line 231, in request
self._throttle()
File "/app/.heroku/python/lib/python3.9/site-packages/scrapelib/__init__.py", line 188, in _throttle
time.sleep(diff)
File "/app/.heroku/python/lib/python3.9/site-packages/gunicorn/workers/base.py", line 203, in handle_abort
sys.exit(1)
SystemExit: 1
|
C
|
ca_mb_winnipeg
|
2024-09-16 04:56:08
|
2024-09-16 04:56:08
|
|
C
|
ca_nb
|
2024-09-16 04:20:57
|
2024-09-16 04:20:57
|
|
C
|
ca_nb_fredericton
|
2024-09-16 04:23:39
|
2024-09-16 04:23:39
|
|
C
|
ca_nb_moncton
|
2024-09-16 04:20:04
|
2024-09-16 04:20:04
|
|
C
|
ca_nb_saint_john
|
2024-09-16 04:23:52
|
2024-09-16 04:23:53
|
|
C
|
ca_nl
|
2024-09-16 04:47:18
|
2024-09-16 04:47:18
|
|
C
|
ca_nl_st_john_s
|
2024-09-16 04:17:35
|
2024-09-16 04:17:35
|
|
C
|
ca_ns
|
2024-09-16 04:44:40
|
2024-09-16 04:44:40
|
|
C
|
ca_ns_cape_breton
|
2024-09-16 04:07:40
|
2024-09-16 04:07:40
|
|
C
|
ca_ns_halifax
|
2024-09-16 04:11:43
|
2024-09-16 04:11:44
|
|
C
|
ca_nt
|
2024-09-16 04:03:14
|
2024-09-16 04:03:14
|
|
A
|
ca_nu
|
2024-09-16 04:15:02
|
2024-09-16 04:15:02
|
|
C
|
ca_on
|
2024-09-16 04:23:14
|
2024-09-16 04:23:14
|
|
C
|
ca_on_ajax
|
2024-09-16 04:07:20
|
2024-09-16 04:07:20
|
|
C
|
ca_on_belleville
|
2024-09-16 04:14:28
|
2024-09-16 04:14:28
|
|
C
|
ca_on_brampton
|
2024-09-16 04:55:56
|
2024-09-16 04:55:57
|
|
C
|
ca_on_brantford
|
2024-09-16 04:16:35
|
2024-09-16 04:16:35
|
|
C
|
ca_on_burlington
|
2024-09-16 04:54:34
|
2024-09-16 04:54:35
|
|
C
|
ca_on_caledon
|
2024-09-16 04:17:04
|
2024-09-16 04:17:04
|
|
C
|
ca_on_cambridge
|
2024-09-16 04:09:39
|
2024-09-16 04:09:39
|
|
C
|
ca_on_chatham_kent
|
2024-09-16 04:46:21
|
2024-09-16 04:46:21
|
|
C
|
ca_on_clarington
|
2024-09-16 04:06:27
|
2024-09-16 04:06:27
|
|
C
|
ca_on_fort_erie
|
2024-09-16 04:23:55
|
2024-09-16 04:23:55
|
|
C
|
ca_on_georgina
|
2024-09-16 04:08:46
|
2024-09-16 04:08:47
|
|
C
|
ca_on_greater_sudbury
|
2024-09-16 04:09:35
|
2024-09-16 04:09:35
|
|
C
|
ca_on_grimsby
|
2024-09-16 04:14:34
|
2024-09-16 04:14:34
|
|
D>
|
ca_on_guelph
|
2024-08-23 04:18:00
|
2024-09-16 04:56:27
|
requests.exceptions.SSLError: HTTPSConnectionPool(host='data.open.guelph.ca', port=443): Max retries exceeded with url: /dat…
Traceback (most recent call last):
File "/app/.heroku/python/lib/python3.9/site-packages/urllib3/connectionpool.py", line 715, in urlopen
httplib_response = self._make_request(
File "/app/.heroku/python/lib/python3.9/site-packages/urllib3/connectionpool.py", line 404, in _make_request
self._validate_conn(conn)
File "/app/.heroku/python/lib/python3.9/site-packages/urllib3/connectionpool.py", line 1060, in _validate_conn
conn.connect()
File "/app/.heroku/python/lib/python3.9/site-packages/urllib3/connection.py", line 419, in connect
self.sock = ssl_wrap_socket(
File "/app/.heroku/python/lib/python3.9/site-packages/urllib3/util/ssl_.py", line 449, in ssl_wrap_socket
ssl_sock = _ssl_wrap_socket_impl(
File "/app/.heroku/python/lib/python3.9/site-packages/urllib3/util/ssl_.py", line 493, in _ssl_wrap_socket_impl
return ssl_context.wrap_socket(sock, server_hostname=server_hostname)
File "/app/.heroku/python/lib/python3.9/ssl.py", line 501, in wrap_socket
return self.sslsocket_class._create(
File "/app/.heroku/python/lib/python3.9/ssl.py", line 1074, in _create
self.do_handshake()
File "/app/.heroku/python/lib/python3.9/ssl.py", line 1343, in do_handshake
self._sslobj.do_handshake()
ssl.SSLError: [SSL: SSLV3_ALERT_HANDSHAKE_FAILURE] sslv3 alert handshake failure (_ssl.c:1133)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/app/.heroku/python/lib/python3.9/site-packages/requests/adapters.py", line 564, in send
resp = conn.urlopen(
File "/app/.heroku/python/lib/python3.9/site-packages/urllib3/connectionpool.py", line 801, in urlopen
retries = retries.increment(
File "/app/.heroku/python/lib/python3.9/site-packages/urllib3/util/retry.py", line 594, in increment
raise MaxRetryError(_pool, url, error or ResponseError(cause))
urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='data.open.guelph.ca', port=443): Max retries exceeded with url: /datafiles/guelph-mayor-and-councillors-contact-information-2018-2022.csv (Caused by SSLError(SSLError(1, '[SSL: SSLV3_ALERT_HANDSHAKE_FAILURE] sslv3 alert handshake failure (_ssl.c:1133)')))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/app/reports/utils.py", line 71, in scrape_people
report.report = subcommand.handle(args, other)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 260, in handle
return self.do_handle(args, other, juris)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 305, in do_handle
report['scrape'] = self.do_scrape(juris, args, scrapers)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 173, in do_scrape
report[scraper_name] = scraper.do_scrape(**scrape_args)
File "/app/.heroku/src/pupa/pupa/scrape/base.py", line 99, in do_scrape
for obj in self.scrape(**kwargs) or []:
File "/app/scrapers/utils.py", line 401, in scrape
reader = self.csv_reader(
File "/app/scrapers/utils.py", line 239, in csv_reader
response = self.get(url, **kwargs)
File "/app/scrapers/utils.py", line 196, in get
return super().get(*args, verify=SSL_VERIFY, **kwargs)
File "/app/.heroku/python/lib/python3.9/site-packages/requests/sessions.py", line 602, in get
return self.request("GET", url, **kwargs)
File "/app/.heroku/python/lib/python3.9/site-packages/scrapelib/__init__.py", line 579, in request
resp = super().request(
File "/app/.heroku/python/lib/python3.9/site-packages/scrapelib/__init__.py", line 404, in request
resp = super().request(
File "/app/.heroku/python/lib/python3.9/site-packages/scrapelib/__init__.py", line 232, in request
return super().request(
File "/app/.heroku/python/lib/python3.9/site-packages/scrapelib/__init__.py", line 122, in request
resp = super().request(
File "/app/.heroku/python/lib/python3.9/site-packages/requests/sessions.py", line 589, in request
resp = self.send(prep, **send_kwargs)
File "/app/.heroku/python/lib/python3.9/site-packages/requests/sessions.py", line 724, in send
history = [resp for resp in gen]
File "/app/.heroku/python/lib/python3.9/site-packages/requests/sessions.py", line 724, in <listcomp>
history = [resp for resp in gen]
File "/app/.heroku/python/lib/python3.9/site-packages/requests/sessions.py", line 265, in resolve_redirects
resp = self.send(
File "/app/.heroku/python/lib/python3.9/site-packages/requests/sessions.py", line 703, in send
r = adapter.send(request, **kwargs)
File "/app/.heroku/python/lib/python3.9/site-packages/requests/adapters.py", line 595, in send
raise SSLError(e, request=request)
requests.exceptions.SSLError: HTTPSConnectionPool(host='data.open.guelph.ca', port=443): Max retries exceeded with url: /datafiles/guelph-mayor-and-councillors-contact-information-2018-2022.csv (Caused by SSLError(SSLError(1, '[SSL: SSLV3_ALERT_HANDSHAKE_FAILURE] sslv3 alert handshake failure (_ssl.c:1133)')))
|
C
|
ca_on_haldimand_county
|
2024-09-16 04:02:46
|
2024-09-16 04:02:46
|
|
C
|
ca_on_hamilton
|
2024-09-16 04:05:56
|
2024-09-16 04:05:56
|
|
C
|
ca_on_huron
|
2024-09-16 04:11:48
|
2024-09-16 04:11:48
|
|
C
|
ca_on_kawartha_lakes
|
2024-09-16 04:09:50
|
2024-09-16 04:09:50
|
|
C
|
ca_on_king
|
2024-09-16 04:54:02
|
2024-09-16 04:54:02
|
|
C
|
ca_on_kingston
|
2024-09-16 04:17:17
|
2024-09-16 04:17:18
|
|
C
|
ca_on_kitchener
|
2024-09-16 04:07:15
|
2024-09-16 04:07:15
|
|
C
|
ca_on_lambton
|
2024-09-16 04:00:50
|
2024-09-16 04:00:50
|
|
C
|
ca_on_lasalle
|
2024-09-16 04:23:45
|
2024-09-16 04:23:45
|
|
C
|
ca_on_lincoln
|
2024-09-16 04:11:22
|
2024-09-16 04:11:22
|
|
C
|
ca_on_london
|
2024-09-16 04:41:54
|
2024-09-16 04:41:54
|
|
C
|
ca_on_markham
|
2024-09-16 04:00:44
|
2024-09-16 04:00:45
|
|
C
|
ca_on_milton
|
2024-09-16 04:46:55
|
2024-09-16 04:46:55
|
|
D>
|
ca_on_mississauga
|
2024-06-25 04:27:21
|
2024-09-16 04:19:24
|
IndexError: list index out of range
Traceback (most recent call last):
File "/app/reports/utils.py", line 71, in scrape_people
report.report = subcommand.handle(args, other)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 260, in handle
return self.do_handle(args, other, juris)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 305, in do_handle
report['scrape'] = self.do_scrape(juris, args, scrapers)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 173, in do_scrape
report[scraper_name] = scraper.do_scrape(**scrape_args)
File "/app/.heroku/src/pupa/pupa/scrape/base.py", line 99, in do_scrape
for obj in self.scrape(**kwargs) or []:
File "/app/scrapers/ca_on_mississauga/people.py", line 17, in scrape
yield self.councillor_data(councillor_url.attrib["href"])
File "/app/scrapers/ca_on_mississauga/people.py", line 35, in councillor_data
photo = page.xpath(
IndexError: list index out of range
|
C
|
ca_on_newmarket
|
2024-09-16 04:23:49
|
2024-09-16 04:23:49
|
|
C
|
ca_on_niagara
|
2024-09-27 10:12:02
|
2024-09-27 10:12:02
|
|
C
|
ca_on_niagara_on_the_lake
|
2024-09-16 04:01:57
|
2024-09-16 04:01:57
|
|
C
|
ca_on_north_dumfries
|
2024-09-16 04:01:41
|
2024-09-16 04:01:41
|
|
C
|
ca_on_oakville
|
2024-09-16 04:16:31
|
2024-09-16 04:16:31
|
|
C
|
ca_on_oshawa
|
2024-09-16 04:46:47
|
2024-09-16 04:46:47
|
|
C
|
ca_on_ottawa
|
2024-09-16 04:08:56
|
2024-09-16 04:08:56
|
|
C
|
ca_on_peel
|
2024-09-16 04:06:11
|
2024-09-16 04:06:11
|
|
C
|
ca_on_pickering
|
2024-09-16 04:06:33
|
2024-09-16 04:06:33
|
|
C
|
ca_on_richmond_hill
|
2024-09-16 04:47:08
|
2024-09-16 04:47:08
|
|
C
|
ca_on_sault_ste_marie
|
2024-09-16 04:03:29
|
2024-09-16 04:03:29
|
|
C
|
ca_on_st_catharines
|
2024-09-16 04:23:23
|
2024-09-16 04:23:23
|
|
C
|
ca_on_thunder_bay
|
2024-09-16 04:10:30
|
2024-09-16 04:10:30
|
|
D>
04:47:11 WARNING pupa: validation of CanadianPerson bc73b330-73e6-11ef-a4df-36a9f18de763 failed: 1 validation errors:
Value 'None' for field '<obj>.image' does not match regular expression '\A(?:(?:ftp|https?)://|\Z)'
|
ca_on_toronto
|
2024-05-15 04:05:10
|
2024-09-16 04:47:11
|
Value 'None' for field '<obj>.image' does not match regular expression '\A(?:(?:ftp|https?)://|\Z)'
Traceback (most recent call last):
File "/app/.heroku/src/pupa/pupa/scrape/base.py", line 175, in validate
validator.validate(self.as_dict(), schema)
File "/app/.heroku/python/lib/python3.9/site-packages/validictory/validator.py", line 616, in validate
raise MultipleValidationError(self._errors)
validictory.validator.MultipleValidationError: 1 validation errors:
Value 'None' for field '<obj>.image' does not match regular expression '\A(?:(?:ftp|https?)://|\Z)'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/app/reports/utils.py", line 71, in scrape_people
report.report = subcommand.handle(args, other)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 260, in handle
return self.do_handle(args, other, juris)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 305, in do_handle
report['scrape'] = self.do_scrape(juris, args, scrapers)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 173, in do_scrape
report[scraper_name] = scraper.do_scrape(**scrape_args)
File "/app/.heroku/src/pupa/pupa/scrape/base.py", line 104, in do_scrape
self.save_object(obj)
File "/app/.heroku/src/pupa/pupa/scrape/base.py", line 89, in save_object
raise ve
File "/app/.heroku/src/pupa/pupa/scrape/base.py", line 85, in save_object
obj.validate()
File "/app/.heroku/src/pupa/pupa/scrape/base.py", line 177, in validate
raise ScrapeValueError('validation of {} {} failed: {}'.format(
pupa.exceptions.ScrapeValueError: validation of CanadianPerson bc73b330-73e6-11ef-a4df-36a9f18de763 failed: 1 validation errors:
Value 'None' for field '<obj>.image' does not match regular expression '\A(?:(?:ftp|https?)://|\Z)'
|
C
|
ca_on_uxbridge
|
2024-09-16 04:10:08
|
2024-09-16 04:10:08
|
|
C
|
ca_on_vaughan
|
2024-09-16 04:01:37
|
2024-09-16 04:01:37
|
|
C
|
ca_on_waterloo
|
2024-09-16 04:17:21
|
2024-09-16 04:17:21
|
|
C
|
ca_on_waterloo_region
|
2024-09-16 04:03:49
|
2024-09-16 04:03:49
|
|
C
|
ca_on_welland
|
2024-09-16 04:54:05
|
2024-09-16 04:54:05
|
|
C
|
ca_on_wellesley
|
2024-09-16 04:09:25
|
2024-09-16 04:09:25
|
|
C
|
ca_on_whitby
|
2024-09-16 04:46:37
|
2024-09-16 04:46:37
|
|
C
|
ca_on_whitchurch_stouffville
|
2024-09-16 04:09:30
|
2024-09-16 04:09:30
|
|
D>
|
ca_on_wilmot
|
2024-08-14 04:50:45
|
2024-09-16 04:09:42
|
AssertionError: No councillors found
Traceback (most recent call last):
File "/app/reports/utils.py", line 71, in scrape_people
report.report = subcommand.handle(args, other)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 260, in handle
return self.do_handle(args, other, juris)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 305, in do_handle
report['scrape'] = self.do_scrape(juris, args, scrapers)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 173, in do_scrape
report[scraper_name] = scraper.do_scrape(**scrape_args)
File "/app/.heroku/src/pupa/pupa/scrape/base.py", line 99, in do_scrape
for obj in self.scrape(**kwargs) or []:
File "/app/scrapers/ca_on_wilmot/people.py", line 12, in scrape
assert len(councillors), "No councillors found"
AssertionError: No councillors found
|
C
|
ca_on_windsor
|
2024-09-16 04:17:13
|
2024-09-16 04:17:13
|
|
D>
|
ca_on_woolwich
|
2024-08-12 04:27:59
|
2024-09-16 04:10:33
|
scrapelib.HTTPError: 404 while retrieving https://www.woolwich.ca/en/council/council.asp
Traceback (most recent call last):
File "/app/reports/utils.py", line 71, in scrape_people
report.report = subcommand.handle(args, other)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 260, in handle
return self.do_handle(args, other, juris)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 305, in do_handle
report['scrape'] = self.do_scrape(juris, args, scrapers)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 173, in do_scrape
report[scraper_name] = scraper.do_scrape(**scrape_args)
File "/app/.heroku/src/pupa/pupa/scrape/base.py", line 99, in do_scrape
for obj in self.scrape(**kwargs) or []:
File "/app/scrapers/ca_on_woolwich/people.py", line 13, in scrape
page = self.lxmlize(COUNCIL_PAGE)
File "/app/scrapers/utils.py", line 204, in lxmlize
response = self.get(url, cookies=cookies)
File "/app/scrapers/utils.py", line 196, in get
return super().get(*args, verify=SSL_VERIFY, **kwargs)
File "/app/.heroku/python/lib/python3.9/site-packages/requests/sessions.py", line 602, in get
return self.request("GET", url, **kwargs)
File "/app/.heroku/python/lib/python3.9/site-packages/scrapelib/__init__.py", line 602, in request
raise HTTPError(resp)
scrapelib.HTTPError: 404 while retrieving https://www.woolwich.ca/en/council/council.asp
|
C
|
ca_pe
|
2024-09-16 04:11:06
|
2024-09-16 04:11:06
|
|
C
|
ca_pe_charlottetown
|
2024-09-16 04:01:53
|
2024-09-16 04:01:53
|
|
C
|
ca_pe_stratford
|
2024-09-16 04:46:34
|
2024-09-16 04:46:34
|
|
C
|
ca_pe_summerside
|
2024-09-16 04:16:50
|
2024-09-16 04:16:50
|
|
C
|
ca_qc
|
2024-09-16 04:53:50
|
2024-09-16 04:53:50
|
|
C
|
ca_qc_beaconsfield
|
2024-09-16 04:56:24
|
2024-09-16 04:56:24
|
|
C
|
ca_qc_brossard
|
2024-09-16 04:06:45
|
2024-09-16 04:06:46
|
|
C
|
ca_qc_cote_saint_luc
|
2024-09-16 04:03:54
|
2024-09-16 04:03:54
|
|
D>
|
ca_qc_dollard_des_ormeaux
|
2024-07-04 04:19:40
|
2024-09-16 04:23:59
|
IndexError: list index out of range
Traceback (most recent call last):
File "/app/reports/utils.py", line 71, in scrape_people
report.report = subcommand.handle(args, other)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 260, in handle
return self.do_handle(args, other, juris)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 305, in do_handle
report['scrape'] = self.do_scrape(juris, args, scrapers)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 173, in do_scrape
report[scraper_name] = scraper.do_scrape(**scrape_args)
File "/app/.heroku/src/pupa/pupa/scrape/base.py", line 99, in do_scrape
for obj in self.scrape(**kwargs) or []:
File "/app/scrapers/ca_qc_dollard_des_ormeaux/people.py", line 32, in scrape
p.image = councillor.xpath(".//@data-src")[0]
IndexError: list index out of range
|
C
|
ca_qc_dorval
|
2024-09-16 04:47:23
|
2024-09-16 04:47:23
|
|
C
|
ca_qc_gatineau
|
2024-09-16 04:09:21
|
2024-09-16 04:09:21
|
|
D>
04:15:12 WARNING scrapelib: sleeping for 10 seconds before retry
04:15:22 WARNING scrapelib: sleeping for 20 seconds before retry
04:15:42 WARNING scrapelib: sleeping for 40 seconds before retry
04:16:22 WARNING pupa: could not save RunPlan, no successful runs of ocd-jurisdiction/country:ca/csd:2466102/legislature yet
|
ca_qc_kirkland
|
|
2024-09-16 04:16:22
|
scrapelib.HTTPError: 403 while retrieving https://www.ville.kirkland.qc.ca/portrait-municipal/conseil-municipal/elus-municip…
Traceback (most recent call last):
File "/app/reports/utils.py", line 71, in scrape_people
report.report = subcommand.handle(args, other)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 260, in handle
return self.do_handle(args, other, juris)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 305, in do_handle
report['scrape'] = self.do_scrape(juris, args, scrapers)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 173, in do_scrape
report[scraper_name] = scraper.do_scrape(**scrape_args)
File "/app/.heroku/src/pupa/pupa/scrape/base.py", line 99, in do_scrape
for obj in self.scrape(**kwargs) or []:
File "/app/scrapers/ca_qc_kirkland/people.py", line 11, in scrape
page = self.lxmlize(COUNCIL_PAGE)
File "/app/scrapers/utils.py", line 204, in lxmlize
response = self.get(url, cookies=cookies)
File "/app/scrapers/utils.py", line 196, in get
return super().get(*args, verify=SSL_VERIFY, **kwargs)
File "/app/.heroku/python/lib/python3.9/site-packages/requests/sessions.py", line 602, in get
return self.request("GET", url, **kwargs)
File "/app/.heroku/python/lib/python3.9/site-packages/scrapelib/__init__.py", line 602, in request
raise HTTPError(resp)
scrapelib.HTTPError: 403 while retrieving https://www.ville.kirkland.qc.ca/portrait-municipal/conseil-municipal/elus-municipaux
|
C
|
ca_qc_laval
|
2024-09-16 04:56:02
|
2024-09-16 04:56:02
|
|
C
|
ca_qc_levis
|
2024-09-16 04:02:36
|
2024-09-16 04:02:36
|
|
C
|
ca_qc_longueuil
|
2024-09-16 04:08:41
|
2024-09-16 04:08:41
|
|
C
|
ca_qc_mercier
|
2024-09-16 04:46:43
|
2024-09-16 04:46:43
|
|
C
|
ca_qc_montreal
|
2024-09-16 04:41:50
|
2024-09-16 04:41:50
|
|
D>
|
ca_qc_montreal_est
|
2024-08-12 04:47:55
|
2024-09-16 04:06:39
|
IndexError: list index out of range
Traceback (most recent call last):
File "/app/reports/utils.py", line 71, in scrape_people
report.report = subcommand.handle(args, other)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 260, in handle
return self.do_handle(args, other, juris)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 305, in do_handle
report['scrape'] = self.do_scrape(juris, args, scrapers)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 173, in do_scrape
report[scraper_name] = scraper.do_scrape(**scrape_args)
File "/app/.heroku/src/pupa/pupa/scrape/base.py", line 99, in do_scrape
for obj in self.scrape(**kwargs) or []:
File "/app/scrapers/ca_qc_montreal_est/people.py", line 23, in scrape
p.image = councillor.xpath(".//@data-lazy-src")[0]
IndexError: list index out of range
|
C
|
ca_qc_pointe_claire
|
2024-09-16 04:01:45
|
2024-09-16 04:01:45
|
|
C
|
ca_qc_quebec
|
2024-09-16 04:53:56
|
2024-09-16 04:53:56
|
|
C
|
ca_qc_saguenay
|
2024-09-16 04:46:30
|
2024-09-16 04:46:30
|
|
D>
|
ca_qc_sainte_anne_de_bellevue
|
2023-10-24 04:04:32
|
2024-09-16 04:23:43
|
AssertionError: No councillors found
Traceback (most recent call last):
File "/app/reports/utils.py", line 71, in scrape_people
report.report = subcommand.handle(args, other)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 260, in handle
return self.do_handle(args, other, juris)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 305, in do_handle
report['scrape'] = self.do_scrape(juris, args, scrapers)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 173, in do_scrape
report[scraper_name] = scraper.do_scrape(**scrape_args)
File "/app/.heroku/src/pupa/pupa/scrape/base.py", line 99, in do_scrape
for obj in self.scrape(**kwargs) or []:
File "/app/scrapers/ca_qc_sainte_anne_de_bellevue/people.py", line 14, in scrape
assert len(councillors), "No councillors found"
AssertionError: No councillors found
|
C
|
ca_qc_saint_jean_sur_richelieu
|
2024-09-16 04:19:57
|
2024-09-16 04:19:57
|
|
D>
|
ca_qc_saint_jerome
|
2024-06-25 04:19:49
|
2024-09-16 04:45:58
|
AssertionError: No councillors found
Traceback (most recent call last):
File "/app/reports/utils.py", line 71, in scrape_people
report.report = subcommand.handle(args, other)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 260, in handle
return self.do_handle(args, other, juris)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 305, in do_handle
report['scrape'] = self.do_scrape(juris, args, scrapers)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 173, in do_scrape
report[scraper_name] = scraper.do_scrape(**scrape_args)
File "/app/.heroku/src/pupa/pupa/scrape/base.py", line 99, in do_scrape
for obj in self.scrape(**kwargs) or []:
File "/app/scrapers/ca_qc_saint_jerome/people.py", line 11, in scrape
assert len(councillors), "No councillors found"
AssertionError: No councillors found
|
C
|
ca_qc_senneville
|
2024-09-16 04:07:46
|
2024-09-16 04:07:46
|
|
C
|
ca_qc_sherbrooke
|
2024-09-16 04:07:09
|
2024-09-16 04:07:09
|
|
D>
04:42:23 WARNING scrapelib: sleeping for 10 seconds before retry
04:42:33 WARNING scrapelib: sleeping for 20 seconds before retry
04:42:53 WARNING scrapelib: sleeping for 40 seconds before retry
|
ca_qc_terrebonne
|
2024-08-28 04:18:16
|
2024-09-16 04:43:33
|
scrapelib.HTTPError: 500 while retrieving https://terrebonne.ca/district-7/
Traceback (most recent call last):
File "/app/reports/utils.py", line 71, in scrape_people
report.report = subcommand.handle(args, other)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 260, in handle
return self.do_handle(args, other, juris)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 305, in do_handle
report['scrape'] = self.do_scrape(juris, args, scrapers)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 173, in do_scrape
report[scraper_name] = scraper.do_scrape(**scrape_args)
File "/app/.heroku/src/pupa/pupa/scrape/base.py", line 99, in do_scrape
for obj in self.scrape(**kwargs) or []:
File "/app/scrapers/ca_qc_terrebonne/people.py", line 25, in scrape
page = self.lxmlize(url)
File "/app/scrapers/utils.py", line 204, in lxmlize
response = self.get(url, cookies=cookies)
File "/app/scrapers/utils.py", line 196, in get
return super().get(*args, verify=SSL_VERIFY, **kwargs)
File "/app/.heroku/python/lib/python3.9/site-packages/requests/sessions.py", line 602, in get
return self.request("GET", url, **kwargs)
File "/app/.heroku/python/lib/python3.9/site-packages/scrapelib/__init__.py", line 602, in request
raise HTTPError(resp)
scrapelib.HTTPError: 500 while retrieving https://terrebonne.ca/district-7/
|
C
|
ca_qc_trois_rivieres
|
2024-09-16 04:19:15
|
2024-09-16 04:19:15
|
|
C
|
ca_qc_westmount
|
2024-09-16 04:06:20
|
2024-09-16 04:06:20
|
|
C
|
ca_sk
|
2024-09-16 04:12:59
|
2024-09-16 04:12:59
|
|
C
|
ca_sk_regina
|
2024-09-16 04:02:16
|
2024-09-16 04:02:16
|
|
D>
04:13:10 WARNING scrapelib: sleeping for 10 seconds before retry
04:13:20 WARNING scrapelib: sleeping for 20 seconds before retry
04:13:41 WARNING scrapelib: sleeping for 40 seconds before retry
|
ca_sk_saskatoon
|
2024-09-12 04:05:10
|
2024-09-16 04:14:21
|
scrapelib.HTTPError: 403 while retrieving https://saskatoonopendataconfig.blob.core.windows.net/converteddata/MayorAndCityCo…
Traceback (most recent call last):
File "/app/reports/utils.py", line 71, in scrape_people
report.report = subcommand.handle(args, other)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 260, in handle
return self.do_handle(args, other, juris)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 305, in do_handle
report['scrape'] = self.do_scrape(juris, args, scrapers)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 173, in do_scrape
report[scraper_name] = scraper.do_scrape(**scrape_args)
File "/app/.heroku/src/pupa/pupa/scrape/base.py", line 99, in do_scrape
for obj in self.scrape(**kwargs) or []:
File "/app/scrapers/utils.py", line 401, in scrape
reader = self.csv_reader(
File "/app/scrapers/utils.py", line 239, in csv_reader
response = self.get(url, **kwargs)
File "/app/scrapers/utils.py", line 196, in get
return super().get(*args, verify=SSL_VERIFY, **kwargs)
File "/app/.heroku/python/lib/python3.9/site-packages/requests/sessions.py", line 602, in get
return self.request("GET", url, **kwargs)
File "/app/.heroku/python/lib/python3.9/site-packages/scrapelib/__init__.py", line 602, in request
raise HTTPError(resp)
scrapelib.HTTPError: 403 while retrieving https://saskatoonopendataconfig.blob.core.windows.net/converteddata/MayorAndCityCouncilContactInformation.csv
|
D>
04:54:38 WARNING scrapelib: sleeping for 10 seconds before retry
04:54:48 WARNING scrapelib: sleeping for 20 seconds before retry
04:55:08 WARNING scrapelib: sleeping for 40 seconds before retry
|
ca_yt
|
2024-09-11 05:27:53
|
2024-09-16 04:55:49
|
scrapelib.HTTPError: 403 while retrieving https://yukonassembly.ca/mlas
Traceback (most recent call last):
File "/app/reports/utils.py", line 71, in scrape_people
report.report = subcommand.handle(args, other)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 260, in handle
return self.do_handle(args, other, juris)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 305, in do_handle
report['scrape'] = self.do_scrape(juris, args, scrapers)
File "/app/.heroku/src/pupa/pupa/cli/commands/update.py", line 173, in do_scrape
report[scraper_name] = scraper.do_scrape(**scrape_args)
File "/app/.heroku/src/pupa/pupa/scrape/base.py", line 99, in do_scrape
for obj in self.scrape(**kwargs) or []:
File "/app/scrapers/ca_yt/people.py", line 9, in scrape
page = self.lxmlize(COUNCIL_PAGE)
File "/app/scrapers/utils.py", line 204, in lxmlize
response = self.get(url, cookies=cookies)
File "/app/scrapers/utils.py", line 196, in get
return super().get(*args, verify=SSL_VERIFY, **kwargs)
File "/app/.heroku/python/lib/python3.9/site-packages/requests/sessions.py", line 602, in get
return self.request("GET", url, **kwargs)
File "/app/.heroku/python/lib/python3.9/site-packages/scrapelib/__init__.py", line 602, in request
raise HTTPError(resp)
scrapelib.HTTPError: 403 while retrieving https://yukonassembly.ca/mlas
|