Source code for scripts.functions

# -*- coding: utf-8 -*-
"""
Created on Mon Jun  4 20:48:26 2018

@author: elcok
"""

import os
import sys
import numpy as np
import pandas as pd
import geopandas as gpd
import subprocess
import rasterio as rio
from rasterio.mask import mask
from shapely.geometry import mapping
from osgeo import ogr
from shapely.wkb import loads
from rasterstats import point_query
from itertools import product

sys.path.append(os.path.join( '..'))
from scripts.utils import load_config,get_num,int2date

from sklearn import metrics
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 
pd.set_option('chained_assignment',None)

[docs]def region_exposure(region,include_storms=True,event_set=False,sens_analysis_storms=[],save=True):
    """Create a GeoDataframe with exposure and hazard information for each building in the specified region.
    
    Arguments:
        *region* (string) -- NUTS3 code of region to consider.
        
        *include_storms* (bool) -- if set to False, it will only return a list of buildings and their characteristics (default: **True**)
        
        *event_set* (bool) -- if set to True, we will calculate the exposure for the event set instead of the historical storms (default: **True**)
        
        *sens_analysis_storms* (list) -- if empty, it will fill with default list
        
        *save* (bool) -- boolean to decide whether you want to save the output to a csv file (default: **True**)
   
    Returns:
        *GeoDataFrame* with all **hazard** and **exposure** values.
    """    
    country = region[:2]
    
    data_path = load_config()['paths']['data']    
   
    osm_path = os.path.join(data_path,'OSM','{}.osm.pbf'.format(country))
    
    area_poly = os.path.join(data_path,country,'NUTS3_POLY','{}.poly'.format(region))
    area_pbf = os.path.join(data_path,country,'NUTS3_OSM','{}.osm.pbf'.format(region))

    if (region == 'UKN01') | (region == 'UKN02') | (region == 'UKN03') | (region == 'UKN04') | (region == 'UKN05'):
        osm_path = os.path.join(data_path,'OSM','IE.osm.pbf') 
    
    clip_osm(data_path,osm_path,area_poly,area_pbf)   
    
    gdf_table = fetch_buildings(data_path,country,region,regional=True)
    
    print ('Fetched all buildings from osm data for {}'.format(region))

    # convert to european coordinate system for overlap
    gdf_table = gdf_table.to_crs(epsg=3035)
    
    print(len(gdf_table))

    # Specify Country
    gdf_table["COUNTRY"] = country
    
    # give unique_id 
    gdf_table['ID_'] = [str(x)+'_'+region for x in gdf_table.index]
    
    # Calculate area
    gdf_table["AREA_m2"] = gdf_table.geometry.area

    # Determine centroid
    gdf_table["centroid"] = gdf_table.geometry.centroid

    nuts_eu = gpd.read_file(os.path.join(data_path,'input_data','NUTS3_ETRS.shp'))

    nuts_eu.loc[nuts_eu['NUTS_ID']==region].to_file(os.path.join(data_path,
                                country,'NUTS3_SHAPE','{}.shp'.format(region)))

    # create geometry envelope outline for rasterstats. Use a buffer to make sure all buildings are in there.
    geoms = [mapping(nuts_eu.loc[nuts_eu['NUTS_ID']==region].geometry.envelope.buffer(10000).values[0])]

    # Get land use values 
    with rio.open(os.path.join(data_path,'input_data','g100_clc12_V18_5.tif')) as src:
        out_image, out_transform = mask(src, geoms, crop=True)
        out_image = out_image[0,:,:]
        tqdm.pandas(desc='CLC_2012_'+region)
        gdf_table['CLC_2012'] = gdf_table.centroid.progress_apply(lambda x: get_raster_value(x,out_image,out_transform))

    # Obtain storm values for sensitivity analysis storms   
    if len(sens_analysis_storms) > 0:
        storm_list = load_sens_analysis_storms(sens_analysis_storms)
        for outrast_storm in storm_list:
            storm_name = str(int2date(get_num(outrast_storm[-23:].split('_')[0][:-2])))
            tqdm.pandas(desc=storm_name+'_'+region)
            with rio.open(outrast_storm) as src:
                out_image, out_transform = mask(src, geoms, crop=True)
                out_image = out_image[0,:,:]
                gdf_table[storm_name] = gdf_table.centroid.progress_apply(lambda x: get_raster_value(x,out_image,out_transform))

    # Obtain storm values for historical storms
    elif (include_storms == True) & (event_set == False):
        storm_list = get_storm_list(data_path)
        for outrast_storm in storm_list:
            storm_name = str(int2date(get_num(outrast_storm[-23:].split('_')[0][:-2])))
            tqdm.pandas(desc=storm_name+'_'+region)
            with rio.open(outrast_storm) as src:
                out_image, out_transform = mask(src, geoms, crop=True)
                out_image = out_image[0,:,:]
                gdf_table[storm_name] = gdf_table.centroid.progress_apply(lambda x: get_raster_value(x,out_image,out_transform))
                gdf_table[storm_name].loc[gdf_table[storm_name] < 0] = 0 
                gdf_table[storm_name].loc[gdf_table[storm_name] > 500] = 0 

    # Obtain storm values for event set storms
    elif (include_storms == True) & (event_set == True):
        #geoms = [mapping(nuts_eu.loc[nuts_eu['NUTS_ID']==region].to_crs({'init': 'epsg:4326'}).geometry.envelope.buffer(0.1).values[0])]
        storm_list = get_event_storm_list(data_path)[:10]
        for outrast_storm in tqdm(storm_list,total=len(storm_list),desc=region):
            storm_name = str(int2date(get_num(outrast_storm[-24:].split('_')[0][:-4])))
            with rio.open(outrast_storm) as src:
                out_image = src.read(1)
                out_transform = src.transform
                gdf_table[storm_name] = gdf_table.centroid.apply(lambda x: get_raster_value(x,out_image,out_transform))

    if save == True:
        df_exposure = pd.DataFrame(gdf_table)
        df_exposure.to_csv(os.path.join(data_path,'output_exposure',country,'{}_exposure.csv'.format(region)))        

    print ('Obtained all storm information for {}'.format(region))

    return gdf_table        


[docs]def region_losses(region,storm_event_set=False,sample=(5, 0,95,20,80)):
    """Estimation of the losses for all buildings in a region for a pre-defined list of storms.
    
    Arguments:
        *region* (string) -- nuts code of region to consider.
        
        *storm_event_set* (bool) -- calculates all regions within a country parallel. Set to **False** if you have little capacity on the machine (default: **True**).
        
        *sample* (tuple) -- tuple of parameter values. This is a dummy placeholder, should be filled with either **load_sample(country)** values or **sens_analysis_param_list**.
    
    Returns:
        *pandas Dataframe* -- pandas dataframe with all buildings of the region and their **losses** for each wind storm.

    """ 
    data_path = load_config()['paths']['data']    
    
    country = region[:2]

    #load storms
    if storm_event_set == False:
        storm_list = get_storm_list(data_path)
        storm_name_list = [str(int2date(get_num(x[-23:].split('_')[0][:-2]))) for x in storm_list]
    else:
        storm_list = get_event_storm_list(data_path)
        storm_name_list = [str(int2date(get_num(x[-24:].split('_')[0][:-4]))) for x in storm_list]

    #load max dam
    max_dam = load_max_dam(data_path)
  
    #load curves
    curves = load_curves(data_path)
   
    output_table = region_exposure(region,include_storms=True,event_set=storm_event_set)

    no_storm_columns = list(set(output_table.columns).difference(list(storm_name_list)))
    write_output = pd.DataFrame(output_table[no_storm_columns])

    ## Calculate losses for buildings in this NUTS region
    for storm in storm_name_list:
        write_output[storm] = loss_calculation(storm,country,output_table,max_dam,curves,sample)
    
    df_losses = pd.DataFrame(write_output)

    ## save this regional file
    if storm_event_set == False:
        df_losses.to_csv(os.path.join(data_path,'output_losses',country,'{}_losses.csv'.format(region)))
 
        print ('Finished with loss calculation for {}'.format(region))
    
        return(gpd.GeoDataFrame(write_output))
        
    else:
        #Numpify data
        pdZ = np.array(df_losses[storm_name_list],dtype=int)
        write_output.drop(storm_name_list, axis=1, inplace=True)
       
        output_ =[]
        
        for row in pdZ:
            H,X1 = np.histogram(row, bins = 100, normed = True )
            dx = X1[1] - X1[0]
            F1 = np.cumsum(np.append(0,H))*dx
            output_.append(metrics.auc(X1, F1))
        
        df_losses['Risk'] = output_
        
        df_losses.to_csv(os.path.join(data_path,'output_risk',country,'{}_risk.csv'.format(region)))

        print ('Finished with risk calculation for {}'.format(region))
        
        return(gpd.GeoDataFrame(write_output))
    

[docs]def region_sens_analysis(region,samples,sens_analysis_storms=[],save=True):
    """Perform a sensitivity analysis for the specified region, based on a predefined list of storms. 
    
    Arguments:
        *region* (string) -- nuts code of region to consider.

        *samples* (list) -- list o tuples, where each tuple is a **unique** set of parameter values.
    
        *sens_analysis_storms* (list) -- if empty, it will fill with default list

        *save* (bool) -- boolean to decide whether you want to save the output to a csv file (default: **True**)

    Returns:
        *list* -- list with the total losses per storm for all parameter combinations

    """ 
    
    data_path = load_config()['paths']['data']    
    
    country = region[:2]

    # select storms to assess
    if len(sens_analysis_storms) == 0:
        sens_analysis_storms = ['19991203','19900125','20090124','20070118','19991226']

    storm_list = sens_analysis_storms

    all_combis = list(product(samples,storm_list))

    #load max dam
    max_dam = load_max_dam(data_path)
  
    #load curves
    curves = load_curves(data_path)

    # get exposure table   
    output_table = region_exposure(region,include_storms=True,event_set=False,sens_analysis_storms=storm_list,save=True)
    
    # calculate losses for all combinations
    output_file = pd.DataFrame(index=list(range(len(samples))),columns=sens_analysis_storms)
    for iter_,(sample,storm) in enumerate(all_combis):
        output_file.loc[iter_,storm] = list(loss_calculation(storm,country,output_table,max_dam,curves,sample).sum())[0]
    
    if save == True:
        output_file.to_csv(os.path.join(data_path,'output_sens','{}_sens_analysis'.format(region)))
    
    return(output_file)

[docs]def loss_calculation(storm,country,output_table,max_dam,curves,sample):
    """Calculate the losses per storm.
    
    Arguments:
        *storm* (string) -- date of the storm.
        
        *region* (string) -- NUTS3 code of region to consider.
        
        *output_table* (GeoDataFrame) -- GeoDataFrame with all buildings and the wind speed values for each storm.
        
        *max_dam* (numpy array) -- table with maximum damages per building type/land-use class.
        
        *curves*  (pandas dataframe) -- fragility curves for the different building types.
        
        *sample* (list) -- ratios of different curves used in this study. See the **Sensitivity analysis documentation** for an explanation.
   
    Returns:
        *pandas Series* -- losses to all buildings for the specified storm
    """   
    
    max_dam_country = np.asarray(max_dam[max_dam['CODE'].str.contains(country)].iloc[:,1:],dtype='int16')    

    df_C2 = pd.DataFrame(output_table[['AREA_m2','CLC_2012',storm]])
    df_C3 = pd.DataFrame(output_table[['AREA_m2','CLC_2012',storm]])
    df_C4 = pd.DataFrame(output_table[['AREA_m2','CLC_2012',storm]])
 
    # error processing of large integer value in storm data. 
    df_C2_error = df_C2[df_C2[storm].astype(str).str.split(".").str[0].map(len) > 10 ]
    df_C3_error = df_C3[df_C3[storm].astype(str).str.split(".").str[0].map(len) > 10 ]
    df_C4_error = df_C4[df_C4[storm].astype(str).str.split(".").str[0].map(len) > 10 ]
    
    if not df_C2_error.empty:
        print(df_C2_error)
    if not df_C3_error.empty:
        print(df_C3_error)
    if not df_C4_error.empty:
        print(df_C4_error)

    df_C2[str(storm)+'_curve'] = df_C2[storm].astype('int64').map(curves['C2']) 
    df_C3[str(storm)+'_curve'] = df_C3[storm].astype('int64').map(curves['C3'])
    df_C4[str(storm)+'_curve'] = df_C4[storm].astype('int64').map(curves['C4']) 

    # curves are in percentages, rescale to ratio
    df_C2[str(storm)+'_curve'] = df_C2[str(storm)+'_curve']/100
    df_C3[str(storm)+'_curve'] = df_C3[str(storm)+'_curve']/100
    df_C4[str(storm)+'_curve'] = df_C4[str(storm)+'_curve']/100
 
    #specify shares for urban and nonurban        
    RES_URB = sample[4]/100 
    IND_URB = sample[3]/100   

    RES_NONURB = 0.5
    IND_NONURB = 0.5

    # Use pandas where to fill new column for losses
    df_C2['Loss'] = np.where(df_C2['CLC_2012'].between(0,12, inclusive=True), (df_C2['AREA_m2']*df_C2[str(storm)+'_curve']*max_dam_country[0,0]*RES_URB+df_C2['AREA_m2']*df_C2[str(storm)+'_curve']*max_dam_country[0,2]*IND_URB)*(sample[0]/100), 0)
    df_C2['Loss'] = np.where(df_C2['CLC_2012'].between(13,23, inclusive=True), (df_C2['AREA_m2']*df_C2[str(storm)+'_curve']*max_dam_country[0,0]*RES_NONURB+df_C2['AREA_m2']*df_C2[str(storm)+'_curve']*max_dam_country[0,2]*IND_NONURB)*(sample[0]/100),df_C2['Loss'])

    df_C3['Loss'] = np.where(df_C3['CLC_2012'].between(0,12, inclusive=True), (df_C3['AREA_m2']*df_C3[str(storm)+'_curve']*max_dam_country[0,0]*RES_URB+df_C3['AREA_m2']*df_C3[str(storm)+'_curve']*max_dam_country[0,2]*IND_URB)*(sample[1]/100), 0)
    df_C3['Loss'] = np.where(df_C3['CLC_2012'].between(13,23, inclusive=True), (df_C3['AREA_m2']*df_C3[str(storm)+'_curve']*max_dam_country[0,0]*RES_NONURB+df_C3['AREA_m2']*df_C3[str(storm)+'_curve']*max_dam_country[0,2]*IND_NONURB)*(sample[1]/100),df_C3['Loss'])

    df_C4['Loss'] = np.where(df_C4['CLC_2012'].between(0,12, inclusive=True), (df_C4['AREA_m2']*df_C4[str(storm)+'_curve']*max_dam_country[0,0]*RES_URB+df_C4['AREA_m2']*df_C4[str(storm)+'_curve']*max_dam_country[0,2]*IND_URB)*(sample[2]/100), 0)
    df_C4['Loss'] = np.where(df_C4['CLC_2012'].between(13,23, inclusive=True), (df_C4['AREA_m2']*df_C4[str(storm)+'_curve']*max_dam_country[0,0]*RES_NONURB+df_C4['AREA_m2']*df_C4[str(storm)+'_curve']*max_dam_country[0,2]*IND_NONURB)*(sample[2]/100),df_C4['Loss'])

    # and write output 
    return  (df_C2['Loss'].fillna(0).astype(int) + df_C3['Loss'].fillna(0).astype(int) + df_C4['Loss'].fillna(0).astype(int))        

def get_storm_data(storm_path):
    """  Obtain raster grid of the storm with rasterio
    
    Arguments:
        *storm_path* (string) -- path to location of storm
    
    """
    with rio.open(storm_path) as src:    
        # Read as numpy array
        array = src.read(1)
        array = np.array(array,dtype='float32')
        affine_storm = src.affine
    return array,affine_storm

[docs]def extract_buildings(area,country,NUTS3=True):
    """Extracts building from OpenStreetMap pbf file and saves it to an ESRI shapefile
    
    Arguments:
        *area* (string) -- name of area to clip
        
        *country* (string) -- ISO2 code of country to consider.
    
        *NUTS3* (bool) -- specify whether it will be a clip of NUTS3 region or the whole country (default: **True**)
    
    """
    # get data path
    data_path = load_config()['paths']['data']

    wgs = os.path.join(data_path,country,'NUTS3_BUILDINGS','{}_buildings.shp'.format(area))
    if NUTS3 == True:
        pbf = os.path.join(data_path,country,'NUTS3_OSM','{}.osm.pbf'.format(area))
    else:
        pbf = os.path.join(data_path,'OSM','{}.osm.pbf'.format(area))
  
    os.system('ogr2ogr -progress -f "ESRI shapefile" {} {} -sql "select \
              building,amenity from multipolygons where building is not null" \
              -lco ENCODING=UTF-8 -nlt POLYGON -skipfailures'.format(wgs,pbf))

[docs]def convert_buildings(area,country):
    """Converts the coordinate system from EPSG:4326 to EPSG:3035.

    Arguments:
        *area* (string) -- name of area (most often NUTS3) for which buildings should be converted to European coordinate system.

        *country* (string) -- ISO2 code of country to consider.

    Returns:
        *GeoDataframe* -- Geopandas dataframe with all buildings of the selected area
        
    """
    # get data path
    data_path = load_config()['paths']['data']

    # path to area with buildings
    etrs = os.path.join(data_path,country,'NUTS3_BUILDINGS','{}_buildings.shp'.format(area))

    # load data 
    input_ = gpd.read_file(etrs)

    input_ = input_.to_crs(epsg=3035)

    return input_
    
[docs]def get_storm_list(data_path):
    """Small function to create a list of with path strings to all storms.
    
    Arguments:
        *data_path* (string) -- string of data path where all data is located.

    Returns:
        *list* -- list with the path strings of all storms
    """
   
    storm_list = []
    for root, dirs, files in os.walk(os.path.join(data_path,'STORMS')):
        for file in files:
            if file.endswith('.tif'):
                fname = os.path.join(data_path,'STORMS',file)
                (filepath, filename_storm) = os.path.split(fname) 
                (fileshortname_storm, extension) = os.path.splitext(filename_storm) 
                resample_storm =  os.path.join(data_path,'STORMS',fileshortname_storm+'.tif')
                storm_list.append(resample_storm)    

    return storm_list

[docs]def get_event_storm_list(data_path):
    """Small function to create a list of with path strings to all storms in the event set.
    
    Arguments:
        *data_path* (string) -- string of data path where all data is located.

    Returns:
        *list* -- list with the path strings of all storms    
    """
    storm_list = []
    for root, dirs, files in os.walk(os.path.join(data_path,'event_set_tif')):
        for file in files:
            if file.endswith('.tif'):
                fname = os.path.join(data_path,'event_set_tif',file)
                (filepath, filename_storm) = os.path.split(fname) 
                (fileshortname_storm, extension) = os.path.splitext(filename_storm) 
                resample_storm =  os.path.join(data_path,'event_set_tif',fileshortname_storm+'.tif')
                storm_list.append(resample_storm)    

    return storm_list    

[docs]def load_max_dam(data_path):
    """Small function to load the excel with maximum damages.
    
    Arguments:
        *data_path* (string) -- string of data path where all data is located.

    Returns:
        *dataframe* -- pandas dataframe with maximum damages per landuse.
    """

    return pd.read_excel(os.path.join(data_path,'input_data','max_dam2.xlsx'))


[docs]def load_curves(data_path):
    """Small function to load the csv file with the different fragility curves.
    
    Arguments:
        *data_path* (string) -- string of data path where all data is located.

    Returns:
        *dataframe* -- pandas dataframe with fragility curves
    """

    return pd.read_csv(os.path.join(data_path,'input_data','CURVES.csv'),index_col=[0],names=['C1','C2','C3','C4','C5','C6'])


[docs]def load_sample(country):
    """Will load the ratio of each curve and landuse to be used.
    
    Arguments:
        *country* (string) -- ISO2 code of country to consider.

    Returns:
        *tuple* -- tuple with ratios for the selected country. See the **documentation** for an explanation.
        
        **['c2', 'c3', 'c4','lu1','lu2']**
        
    """

    dict_  = dict([('AT', ( 5, 0,95,20,80)),
                   ('CZ', ( 5, 0,95,20,80)),
                   ('CH', ( 5, 0,95,20,80)), 
                         ('BE', ( 0,45,55,50,50)), 
                         ('DK', ( 0,10,90,20,80)),
                         ('FR', (10,30,60,20,80)), 
                         ('DE', ( 5,75,20,50,50)),
                         ('IE', (35,65, 0,30,70)), 
                         ('LU', (50,50, 0,20,80)),
                         ('NL', ( 0,45,55,20,80)), 
                         ('NO', (0,100, 0,20,80)),
                         ('FI', (5,15, 80,50,50)),
                         ('LT', (5,15, 80,50,50)),
                         ('LV', (5,15, 80,50,50)),
                         ('EE', (5,15, 80,50,50)),
                         ('PL', (5,15, 80,30,70)),
                         ('IT', (10,90, 0,20,80)),
                         ('ES', (15,85, 0,20,80)),
                         ('PT', (15,85, 0,20,80)),
                         ('SE', ( 0,10,90,30,70)),
                         ('UK', ( 5,30,65,50,50))])

    return dict_[country]

[docs]def load_osm_data(data_path,country,region='',regional=False):
    """This function loads the OSM file for the country
    
    Arguments:
        *data_path* (string) -- string of data path where all data is located.
        
        *country* (string) -- ISO2 code of country to consider.
        
        *region* (string) -- NUTS3 code of region to consider.
        
        *regional* (boolean) -- Boolean to decide whether this should be done on a regional level or for the entire country (default: **False**). 

    Returns:
        opened OSM file to use in the fetch_roads function.
    
    """
    
    if regional==False:
        osm_path = os.path.join(data_path,'OSM','{}.osm.pbf'.format(country))
    else:
        osm_path = os.path.join(data_path,country,'NUTS3_OSM','{}.osm.pbf'.format(region))
        

    driver=ogr.GetDriverByName('OSM')
    return driver.Open(osm_path)

[docs]def fetch_buildings(data_path,country,region='',regional=False):
    """
    This function directly reads the building data from osm, instead of first converting it to a shapefile
    
    Arguments:
        *data_path* (string) -- string of data path where all data is located.
        
        *country* (string) -- ISO2 code of country to consider.
        
        *region* (string) -- NUTS3 code of region to consider.
        
        *regional* (boolean) -- Boolean to decide whether this should be done on a regional level or for the entire country (default: **False**). 
     
    Returns:
        *Geodataframe* -- Geopandas dataframe with all buildings in the specified **region** or **country**.
     
    """
    data = load_osm_data(data_path,country,region,regional=regional)
    roads=[]    
    if data is not None:
        sql_lyr = data.ExecuteSQL("SELECT osm_id,building from multipolygons where building is not null")
        for feature in sql_lyr:
            try:
                if feature.GetField('building') is not None:
                    osm_id = feature.GetField('osm_id')
                    shapely_geo = loads(feature.geometry().ExportToWkb())
                    if shapely_geo is None:
                        continue
                    highway=feature.GetField('building')
                    #amenity=feature.GetField('amenity')
                    roads.append([osm_id,highway,shapely_geo])
            except:
                    print("warning: skipped building")
        print(len(roads))
    else:
        print("Nonetype error when requesting SQL for region: {}. Skipping region, check required.".format(region))    

    if len(roads) > 0:
        return gpd.GeoDataFrame(roads,columns=['osm_id','building','geometry'],crs={'init': 'epsg:4326'})
    else:
        # raise Exception('No buildings in {}'.format(region))
        print("warning: No buildings or No memory when running region: {}. returning empty geodataframe".format(region)) 
        return gpd.GeoDataFrame(columns=['osm_id','building','geometry'],crs={'init': 'epsg:4326'})
    
[docs]def poly_files(data_path,country):

    """
    This function will create the .poly files from the nuts shapefile. These
    .poly files are used to extract data from the openstreetmap files.
    
    This function is adapted from the OSMPoly function in QGIS.
    
    Arguments:
        *data_path* (string) -- string of data path where all data is located.
        
        *country* (string) -- ISO2 code of country to consider.
   
    Returns:
        *.poly file* -- .poly file for each NUTS3 in a new directory in the working directory of the country.
    """     
   
# =============================================================================
#     """ Create output dir for .poly files if it is doesnt exist yet"""
# =============================================================================
    poly_dir = os.path.join(data_path,country,'NUTS3_POLY')
        
    if not os.path.exists(poly_dir):
        os.makedirs(poly_dir)

# =============================================================================
#   """Load country shapes and country list and only keep the required countries"""
# =============================================================================
    wb_poly = gpd.read_file(os.path.join(data_path,'input_data','NUTS3_ETRS.shp'))
    
    # filter polygon file
    country_poly = wb_poly.loc[(wb_poly['NUTS_ID'].apply(lambda x: x.startswith(country))) & (wb_poly['STAT_LEVL_']==3)]

    country_poly.crs = {'init' :'epsg:3035'}

    country_poly = country_poly.to_crs({'init': 'epsg:4326'})
    
# =============================================================================
#   """ The important part of this function: create .poly files to clip the country 
#   data from the openstreetmap file """    
# =============================================================================
    num = 0
    # iterate over the counties (rows) in the world shapefile
    for f in country_poly.iterrows():
        f = f[1]
        num = num + 1
        geom=f.geometry

#        try:
        # this will create a list of the different subpolygons
        if geom.geom_type == 'MultiPolygon':
            polygons = geom
        
        # the list will be lenght 1 if it is just one polygon
        elif geom.geom_type == 'Polygon':
            polygons = [geom]

        # define the name of the output file, based on the ISO3 code
        attr=f['NUTS_ID']
        
        # start writing the .poly file
        f = open(poly_dir + "/" + attr +'.poly', 'w')
        f.write(attr + "\n")

        i = 0
        
        # loop over the different polygons, get their exterior and write the 
        # coordinates of the ring to the .poly file
        for polygon in polygons:
            polygon = np.array(polygon.exterior)

            j = 0
            f.write(str(i) + "\n")

            for ring in polygon:
                j = j + 1
                f.write("    " + str(ring[0]) + "     " + str(ring[1]) +"\n")

            i = i + 1
            # close the ring of one subpolygon if done
            f.write("END" +"\n")

        # close the file when done
        f.write("END" +"\n")
        f.close()

[docs]def clip_landuse(data_path,country,region,outrast_lu):
    """Clip the landuse from the European Corine Land Cover (CLC) map to the considered country.
    
    Arguments:
        *data_path* (string) -- string of data path where all data is located.
        
        *country* (string) -- ISO2 code of country to consider.
        
        *outrast_lu* (string) -- string path to location of Corine Land Cover dataset 
        
    Returns:
        *raster* (geotiff) -- returns a raster file with only the landuse of the clipped **region** or **country**.
        
    """
    
    inraster = os.path.join(data_path,'input_data','g100_clc12_V18_5.tif')
    
    inshape = os.path.join(data_path,country,'NUTS3_SHAPE','{}.shp'.format(region))
   
    subprocess.call(["gdalwarp","-q","-overwrite","-srcnodata","-9999","-co","compress=lzw","-tr","100","-100","-r","near",inraster, outrast_lu, "-cutline", inshape,"-crop_to_cutline"])
 

[docs]def clip_osm(data_path,osm_path,area_poly,area_pbf):
    """ Clip the an area osm file from the larger continent (or planet) file and save to a new osm.pbf file. 
    
    This is much faster compared to clipping the osm.pbf file while extracting through ogr2ogr.
    
    This function uses the osmconvert tool, which can be found at http://wiki.openstreetmap.org/wiki/Osmconvert. 
    
    Either add the directory where this executable is located to your environmental variables or just put it in the 'scripts' directory.
    
    Arguments:
        *osm_path* (string) -- path string to the osm.pbf file of the continent associated with the country.
        
        *area_pol* (string) -- path string to the .poly file, made through the 'create_poly_files' function.
        
        *area_pbf* (string) -- path string indicating the final output dir and output name of the new .osm.pbf file.
        
    Returns:
        a clipped .osm.pbf file.
    """ 

    osm_convert_path = os.path.join(data_path,'osmconvert','osmconvert64')
    try: 
        if (os.path.exists(area_pbf) is not True):
            os.system('{}  {} -B={} --complete-ways -o={}'.format(osm_convert_path,osm_path,area_poly,area_pbf))

    except:
        print('{} did not finish!'.format(area_pbf))
        
[docs]def load_sens_analysis_storms(storm_name_list=['19991203','19900125','20090124','20070118','19991226']):
    """
    This file load the storms used to perform the sensitivity analysis. 
    
    Arguments:
        *storm_name_list* (list) -- list of storms to include in the sensitivity analysis. The default storms are **Anatol**, **Daria**, **Klaus**, **Kyrill** and **Lothar**.
    
    Returns:
        *storm_list* (list) -- same list of storms but now with full paths to location in directory.
    
    """
    data_path = load_config()['paths']['data']

    storm_list = []
    for root, dirs, files in os.walk(os.path.join(data_path,'STORMS')):
        for file in files:
            for storm in storm_name_list:
                if storm in file:
                    storm_list.append(os.path.join(data_path,'STORMS',file))
    return storm_list

[docs]def get_raster_value(centroid,out_image,out_transform):
    """Small function to obtain raster value from rastermap, using point_query.
    
    Arguments:
        *centroid* (shapely geometry) -- shapely geometry of centroid of the building.
        
        *out_image* (numpy array) -- numpy array of grid.
        
        *out_transform* (Affine) -- georeference of numpy array.
        
    Returns:
        *integer* -- raster value corresponding to location of centroid
    """
    return int(point_query(centroid,out_image,affine=out_transform,nodata=-9999,interpolate='nearest')[0] or 255)   

                    
[docs]def summary_statistics_losses():
    """
    This function creates the file 'output_storms.xlsx'. This file is required to create the summary figures.
    
    Returns:
        *output_storms.xlsx* (excel file) -- Excel file with summary outcomes
    """
    
    data_path = load_config()['paths']['data']
    
    countries = ['AT','BE','DK','FR','DE','IE','LU','NL','NO','SE','UK','PL','IT','FI'] 

    first_line = pd.read_csv(os.path.join(data_path,'output_losses','LU','LU000_losses.csv'), nrows=1)
    extract = first_line.columns.tolist()[2:]
    storm_name_list = extract[8:]
    
    output_storms = pd.DataFrame(np.zeros((len(storm_name_list),len(countries))),index=storm_name_list,columns=countries)
    output_storms_res = pd.DataFrame(np.zeros((len(storm_name_list),len(countries))),index=storm_name_list,columns=countries)
    output_storms_ind_com = pd.DataFrame(np.zeros((len(storm_name_list),len(countries))),index=storm_name_list,columns=countries)

    output_storms_transport = pd.DataFrame(np.zeros((len(storm_name_list),len(countries))),index=storm_name_list,columns=countries)
    output_storms_other = pd.DataFrame(np.zeros((len(storm_name_list),len(countries))),index=storm_name_list,columns=countries)
    output_storms_agri = pd.DataFrame(np.zeros((len(storm_name_list),len(countries))),index=storm_name_list,columns=countries)

    for country in countries:
       output_table = pd.DataFrame()
       for root, dirs, files in os.walk(os.path.join(data_path,'output_losses',country)):
           for file in files:
                output_table = pd.read_csv(os.path.join(data_path,'output_losses',country,file), usecols=extract)
                output_table = output_table.replace([np.inf, -np.inf], np.nan).dropna(how='all')
                output_table = output_table.reset_index(inplace=False)
                # TOTAL
                output_storms[country] += (output_table[storm_name_list].sum(axis=0)/1000000)
                # RESIDENTIAL
                res = output_table[output_table.CLC_2012 < 3]
                output_storms_res[country] += (res[storm_name_list].sum(axis=0)/1000000)     
                # COM/IND
                ind_com = output_table[output_table.CLC_2012 == 3]
                output_storms_ind_com[country] += (ind_com[storm_name_list].sum(axis=0)/1000000)
                # TRANSPORT,PORTS,AIRPORTS
                transport = output_table.loc[np.where(output_table['CLC_2012'].between(4,6, inclusive=True))[0]]
                output_storms_transport[country] += (transport[storm_name_list].sum(axis=0)/1000000)
                # OTHER BUILT-UP
                other = output_table.loc[np.where(output_table['CLC_2012'].between(7,12, inclusive=True))[0]]
                output_storms_other[country] += (other[storm_name_list].sum(axis=0)/1000000)
                # AGRICULTURAL BUILDINGS
                agri = output_table[output_table.CLC_2012 > 12]
                output_storms_agri[country] += (agri[storm_name_list].sum(axis=0)/1000000)
    
    output_storms['Sum'] = output_storms.sum(axis=1)
    output_storms_res['Sum'] = output_storms_res.sum(axis=1)
    output_storms_ind_com['Sum'] = output_storms_ind_com.sum(axis=1)
    output_storms_transport['Sum'] = output_storms_transport.sum(axis=1)
    output_storms_other['Sum'] = output_storms_other.sum(axis=1)
    output_storms_agri['Sum'] = output_storms_agri.sum(axis=1)

    out = pd.ExcelWriter(os.path.join(data_path,'output_storms.xlsx'))
    output_storms.to_excel(out,sheet_name='total_losses',index_label='Storm')
    output_storms_res.to_excel(out,sheet_name='res_losses',index_label='Storm')
    output_storms_ind_com.to_excel(out,sheet_name='ind_com_losses',index_label='Storm')
    output_storms_transport.to_excel(out,sheet_name='transport_losses',index_label='Storm')
    output_storms_other.to_excel(out,sheet_name='other_losses',index_label='Storm')
    output_storms_agri.to_excel(out,sheet_name='agri_losses',index_label='Storm')
    out.save()