Source code for pywind.decc.extract

# coding=utf-8

# This is free and unencumbered software released into the public domain.
#
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.

# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
# For more information, please refer to <http://unlicense.org/>

"""
The DECC publish monthly extracts of planning applications for renewable projects.
This module aims to make accessing this report simpler.

https://www.gov.uk/government/publications/renewable-energy-planning-database-monthly-extract
"""

from __future__ import print_function

import logging
import sys
import csv
from datetime import datetime
from pprint import pprint

if sys.version_info >= (3, 0):
    import codecs
import html5lib

from pywind.utils import get_or_post_a_url, _convert_type
from .geo import Coord


[docs]class DeccRecord(object): """ Simple class to hold details of one DECC station. """ DATE_FIELDS = ('record_last_updated_dd_mm_yyyy', 'planning_application_submitted', 'planning_application_withdrawn', 'planning_permission_refused', 'appeal_lodged', 'appeal_withdrawn', 'appeal_refused', 'appeal_granted', 'planning_permission_granted', 'planning_permission_granted', 'record_last_updated_dd_mm_yyyy', 'secretary_of_state___intervened', 'secretary_of_state___refusal', 'secretary_of_state___granted', 'planning_permission_expired', 'under_construction', 'operational', ) BOOLEAN_FIELDS = ('chp_enabled', 'green_belt', 'national_park', 'aonb', 'heritage_coast', 'special_landscape_area', 'employment_use', 'natural_environment', 'other_land_use', 'built_heritage__archaeology', 'project_specific', 'chp' ) FLOAT_FIELDS = ('installed_capacity_mwelec', 'ro_banding_roc_mwh', 'fit_tariff_p_kwh', 'cfd_capacity_mw', 'turbine_capacity_mw', 'height_of_turbines_m' ) INT_FIELDS = ('ref_id', 'no._of_turbines', 'x-coordinate', 'y-coordinate') def __init__(self, app_info): self.logger = logging.getLogger(__name__) self.attrs = {} for key in app_info.keys(): val = app_info[key] key = key.replace('(', '').replace(')', '').replace('/', '_') if val in ['', '#REF!']: val = None else: if key in self.INT_FIELDS + self.FLOAT_FIELDS and val.lower() == 'n/a': val = '0' if key in self.DATE_FIELDS: val = _convert_type(val, 'date') elif key in self.INT_FIELDS: val = _convert_type(val, 'int') elif key in self.FLOAT_FIELDS: val = _convert_type(val, 'float') elif key in self.BOOLEAN_FIELDS: val = _convert_type(val, 'bool') else: if sys.version_info < (3, 0): val = val.decode('latin1').encode('utf-8') self.attrs[key] = val if self.attrs.get('x-coordinate') is not None and self.attrs.get('y-coordinate') is not None: coord = Coord(self.attrs['x-coordinate'], self.attrs['y-coordinate']) self.attrs['lat'], self.attrs['lon'] = coord.as_wgs84() def __getattr__(self, item): if item in self.attrs: return self.attrs[item] raise AttributeError(item) def __contains__(self, item): return item in self.attrs
[docs] def fit_rate_mwh(self): """ Convert the FIT Tariff rate into GBP per MWh. :rtype: float """ fit = self.attrs.get('fit_tariff_(p_kwh)', 0) if fit in [0.0, None]: return 0.0 return fit * 10
[docs]class MonthlyExtract(object): """ The MonthlyExtract class allows the current monthly data to be easily retrieved and parsed. .. note:: The CSV data returned does not declare an encoding, so latin1 is presently assumed. """ BASE_URL = "https://www.gov.uk" URL = "https://www.gov.uk/government/publications/renewable-energy-planning-database-monthly-extract" def __init__(self, filename=None): self.records = [] self.raw_data = None self.available = None self.csv_fields = {} self.filename = filename if filename is None: self._find_available() def __len__(self): """ Return the number of DECC records that have been extracted. Will be 0 until get_data() has been called. """ return len(self.records) def __getitem__(self, item): return self.records[item]
[docs] def get_data(self): """ Get the data from the DECC server and parse it into DECC records. :returns: True or False :rtype: bool """ if self.filename is not None: return self._parse_filename() if self.available is None: self._find_available() if self.available is None: raise Exception("Unable to get details of available downloads") response = get_or_post_a_url(self.available['url']) self.raw_data = response.content if sys.version_info >= (3, 0): csvfile = csv.reader(codecs.iterdecode(response.content.splitlines(), 'latin1')) else: csvfile = csv.reader(response.content.splitlines()) for row in csvfile: self._parse_row(row) self.records = sorted(self.records, key=lambda rec: rec.site_name) return True
[docs] def rows(self): """ Generator that returns records :returns: Dict of planning application information :rtype: dict """ for app in self.records: yield {'PlanningApplication': {'@{}'.format(key): getattr(app, key) for key in self.csv_fields}}
[docs] def save_original(self, filename): """ Save the downloaded certificate data into the filename provided. :param filename: Filename to save the file to. :returns: True or False :rtype: bool """ if self.raw_data is None: return False with open(filename, 'wb') as ofh: ofh.write(self.raw_data) return True
# Private functions def _find_available(self): """ Get the URL and period for the currently available download. """ response = get_or_post_a_url(self.URL) document = html5lib.parse(response.content, treebuilder="lxml", namespaceHTMLElements=False) titles = document.xpath('.//h2[@class="title"]') period = None for tit in titles: if len(tit.getchildren()) == 0: period = tit.text.split(':')[1].strip() links = document.xpath('.//span[@class="download"]/a') self.available = {'period': period, 'url': self.BASE_URL + links[0].get('href')} def _parse_filename(self): with open(self.filename, 'rb') as ofh: if sys.version_info >= (3, 0): csvfile = csv.reader(codecs.iterdecode(ofh, 'latin1')) else: csvfile = csv.reader(ofh) for row in csvfile: self._parse_row(row) self.available = {'period': 'Unknown'} self.records = sorted(self.records, key=lambda rec: rec.site_name) return True def _parse_row(self, row): # There tend to be blank entries...so remove them.... if self.csv_fields is None and 'Ref ID' not in row: return if 'Ref ID' in row: for colnum in range(len(row)): if row[colnum] == '': continue self.csv_fields[row[colnum].lower().replace(' ', '_')] = colnum return app_info = {} for key in self.csv_fields.keys(): app_info[key] = row[self.csv_fields[key]] if len(app_info) == 0: return decc = DeccRecord(app_info) try: chk = decc.site_name if chk is None: return except AttributeError: return self.records.append(decc)