Source code for pywind.decc.extract
# coding=utf-8
# This is free and unencumbered software released into the public domain.
#
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
# For more information, please refer to <http://unlicense.org/>
"""
The DECC publish monthly extracts of planning applications for renewable projects.
This module aims to make accessing this report simpler.
https://www.gov.uk/government/publications/renewable-energy-planning-database-monthly-extract
"""
from __future__ import print_function
import logging
import sys
import csv
from datetime import datetime
from pprint import pprint
if sys.version_info >= (3, 0):
import codecs
import html5lib
from pywind.utils import get_or_post_a_url, _convert_type
from .geo import Coord
[docs]class DeccRecord(object):
"""
Simple class to hold details of one DECC station.
"""
DATE_FIELDS = ('record_last_updated_dd_mm_yyyy',
'planning_application_submitted',
'planning_application_withdrawn',
'planning_permission_refused',
'appeal_lodged',
'appeal_withdrawn',
'appeal_refused',
'appeal_granted',
'planning_permission_granted',
'planning_permission_granted',
'record_last_updated_dd_mm_yyyy',
'secretary_of_state___intervened',
'secretary_of_state___refusal',
'secretary_of_state___granted',
'planning_permission_expired',
'under_construction',
'operational',
)
BOOLEAN_FIELDS = ('chp_enabled',
'green_belt',
'national_park',
'aonb',
'heritage_coast',
'special_landscape_area',
'employment_use',
'natural_environment',
'other_land_use',
'built_heritage__archaeology',
'project_specific',
'chp'
)
FLOAT_FIELDS = ('installed_capacity_mwelec',
'ro_banding_roc_mwh',
'fit_tariff_p_kwh',
'cfd_capacity_mw',
'turbine_capacity_mw',
'height_of_turbines_m'
)
INT_FIELDS = ('ref_id',
'no._of_turbines',
'x-coordinate',
'y-coordinate')
def __init__(self, app_info):
self.logger = logging.getLogger(__name__)
self.attrs = {}
for key in app_info.keys():
val = app_info[key]
key = key.replace('(', '').replace(')', '').replace('/', '_')
if val in ['', '#REF!']:
val = None
else:
if key in self.INT_FIELDS + self.FLOAT_FIELDS and val.lower() == 'n/a':
val = '0'
if key in self.DATE_FIELDS:
val = _convert_type(val, 'date')
elif key in self.INT_FIELDS:
val = _convert_type(val, 'int')
elif key in self.FLOAT_FIELDS:
val = _convert_type(val, 'float')
elif key in self.BOOLEAN_FIELDS:
val = _convert_type(val, 'bool')
else:
if sys.version_info < (3, 0):
val = val.decode('latin1').encode('utf-8')
self.attrs[key] = val
if self.attrs.get('x-coordinate') is not None and self.attrs.get('y-coordinate') is not None:
coord = Coord(self.attrs['x-coordinate'], self.attrs['y-coordinate'])
self.attrs['lat'], self.attrs['lon'] = coord.as_wgs84()
def __getattr__(self, item):
if item in self.attrs:
return self.attrs[item]
raise AttributeError(item)
def __contains__(self, item):
return item in self.attrs
[docs] def fit_rate_mwh(self):
""" Convert the FIT Tariff rate into GBP per MWh.
:rtype: float
"""
fit = self.attrs.get('fit_tariff_(p_kwh)', 0)
if fit in [0.0, None]:
return 0.0
return fit * 10
[docs]class MonthlyExtract(object):
"""
The MonthlyExtract class allows the current monthly data to be easily retrieved and parsed.
.. note::
The CSV data returned does not declare an encoding, so latin1 is presently assumed.
"""
BASE_URL = "https://www.gov.uk"
URL = "https://www.gov.uk/government/publications/renewable-energy-planning-database-monthly-extract"
def __init__(self, filename=None):
self.records = []
self.raw_data = None
self.available = None
self.csv_fields = {}
self.filename = filename
if filename is None:
self._find_available()
def __len__(self):
"""
Return the number of DECC records that have been extracted. Will be 0 until get_data() has been called.
"""
return len(self.records)
def __getitem__(self, item):
return self.records[item]
[docs] def get_data(self):
""" Get the data from the DECC server and parse it into DECC records.
:returns: True or False
:rtype: bool
"""
if self.filename is not None:
return self._parse_filename()
if self.available is None:
self._find_available()
if self.available is None:
raise Exception("Unable to get details of available downloads")
response = get_or_post_a_url(self.available['url'])
self.raw_data = response.content
if sys.version_info >= (3, 0):
csvfile = csv.reader(codecs.iterdecode(response.content.splitlines(), 'latin1'))
else:
csvfile = csv.reader(response.content.splitlines())
for row in csvfile:
self._parse_row(row)
self.records = sorted(self.records, key=lambda rec: rec.site_name)
return True
[docs] def rows(self):
""" Generator that returns records
:returns: Dict of planning application information
:rtype: dict
"""
for app in self.records:
yield {'PlanningApplication': {'@{}'.format(key): getattr(app, key)
for key in self.csv_fields}}
[docs] def save_original(self, filename):
""" Save the downloaded certificate data into the filename provided.
:param filename: Filename to save the file to.
:returns: True or False
:rtype: bool
"""
if self.raw_data is None:
return False
with open(filename, 'wb') as ofh:
ofh.write(self.raw_data)
return True
# Private functions
def _find_available(self):
"""
Get the URL and period for the currently available download.
"""
response = get_or_post_a_url(self.URL)
document = html5lib.parse(response.content,
treebuilder="lxml",
namespaceHTMLElements=False)
titles = document.xpath('.//h2[@class="title"]')
period = None
for tit in titles:
if len(tit.getchildren()) == 0:
period = tit.text.split(':')[1].strip()
links = document.xpath('.//span[@class="download"]/a')
self.available = {'period': period,
'url': self.BASE_URL + links[0].get('href')}
def _parse_filename(self):
with open(self.filename, 'rb') as ofh:
if sys.version_info >= (3, 0):
csvfile = csv.reader(codecs.iterdecode(ofh, 'latin1'))
else:
csvfile = csv.reader(ofh)
for row in csvfile:
self._parse_row(row)
self.available = {'period': 'Unknown'}
self.records = sorted(self.records, key=lambda rec: rec.site_name)
return True
def _parse_row(self, row):
# There tend to be blank entries...so remove them....
if self.csv_fields is None and 'Ref ID' not in row:
return
if 'Ref ID' in row:
for colnum in range(len(row)):
if row[colnum] == '':
continue
self.csv_fields[row[colnum].lower().replace(' ', '_')] = colnum
return
app_info = {}
for key in self.csv_fields.keys():
app_info[key] = row[self.csv_fields[key]]
if len(app_info) == 0:
return
decc = DeccRecord(app_info)
try:
chk = decc.site_name
if chk is None:
return
except AttributeError:
return
self.records.append(decc)