Source code for pyeplan.dataproc

import pandas as pd
import numpy as np
import urllib.request
import urllib.parse
import matplotlib.pyplot as plt
import math 
import os
import shutil 
from timezonefinder import TimezoneFinder
from sklearn.cluster import KMeans


[docs]class datsys:
    def __init__(self, inp_folder = '', lat = 0.251148605450955, lon = 32.404833929733,year = 2016, pvcalc = 1, pp = 50, sys_loss = 14, n_clust = 1, pf_c = 1, pf_p = 1, sbase = 1000):
        
        
        '''
        Initialise the Data Processing module. The PVGIS tool (https://ec.europa.eu/jrc/en/pvgis) has been  
        to collect renewable production data sets at different locations across the world.  
        '''
        self.loc = pd.read_excel(inp_folder + os.sep + 'mgpc_dist.xlsx', sheet_name = 'Load Point', skiprows= 0, usecols = 'A,B')
        
        self.pdem = pd.read_excel(inp_folder + os.sep + 'mgpc_dist.xlsx', sheet_name = 'Load Point', skiprows= 0, usecols = 'D:AA')
        
        self.prep = pd.read_excel(inp_folder + os.sep + 'mgpc_dist.xlsx', sheet_name = 'Load Level', skiprows= 0, skipfooter=0, usecols = 'B')
        
        #Latitude (in decimal degrees, south is negative)
        self.lat = lat 
        
        #Longitude (in decimal degrees, west is negative)
        self.lon = lon 
        
        #Raddatabase = 'PVGIS-SARAH' #Name of the radiation database (DB): "PVGIS-SARAH" for Europe, Africa and Asia  are PVGIS-SARAH, PVGIS-NSRDB and PVGIS-ERA5 based on the chosen location.
        
        #Start year of data collection 
        self.startyear = year
        
        #End year of data collection 
        self.endyear = year
        
        #Calculation method of PV output parameters: pvcalc = 0 -> solar radiation calculations, pvcalc = 1 -> solar radiation and power production calculations        
        self.pvcalculation = pvcalc 
        
        #Nominal power of the PV system [kW]
        self.peakpower = pp 
        
        #Sum of system losses [%]
        self.loss = sys_loss 
        
        #Type of sun tracking
        self.trackingtype = 2 
        '''
        0 = fixed 
        1 = single horizontal axis aligned north-south, 
        2 = two-axis tracking,
        3 = vertical axis tracking, 
        4 = single horizontal axis aligned east-west, 
        5 = single inclined axis aligned north-south
        '''
        
        #Calculate the optimum inclination angle
        self.optimalinclination =  1 
        '''
        Value of 1 for "yes". All other values (or no value) mean "no". Not relevant for 2-axis tracking.
        '''
        
        #Calculate the optimum inclination AND orientation angles#
        self.optimalangles =  1 
        '''
        Value of 1 for "yes". All other values (or no value) mean "no". Not relevant for tracking planes.
        '''
        
        #Type of output.
        self.outputformat = 'basic' 
        ''' 
        Choices: 
        "csv"  
        "basic"  
        "json" 
        '''
        
        #Format of outpout
        '''
        0 = output as stream 
        1 = output as file 
        '''
        self.browser = 1
        
        #Number of clusters 
        self.n_clust = n_clust
        
        #Power Factor at each consumption point 
        self.pf_c = pf_c
        
        #Power factor at each production point (renewable)
        self.pf_p = pf_p
        
        #Base apparent power 
        self.sbase = sbase
        
        #Data extraction from PVGIS
        self.data_link = 'https://re.jrc.ec.europa.eu/api/seriescalc'
        self.data_link = self.data_link + '?lat=' + str(self.lat) + '&lon=' + str(self.lon) 
        self.data_link = self.data_link + '&startyear=' + str(self.startyear) + '&endyear=' + str(self.endyear) 
        self.data_link = self.data_link + '&pvcalculation=' + str(self.pvcalculation) + '&peakpower=' + str(self.peakpower) 
        self.data_link = self.data_link + '&loss=' + str(self.loss) + '&trackingtype=' + str(self.trackingtype)
        self.data_link = self.data_link + '&optimalinclination=' + str(self.optimalinclination) + '&optimalangles=' + str(self.optimalangles)
        self.data_link = self.data_link + '&outputformat=' + self.outputformat + '&browser=' + str(self.browser)
        self.data = pd.read_csv(urllib.request.urlopen(self.data_link), skiprows=2, header=None) 
    
        '''
        Data columns description as described by PVGIS:
            
        Time = Date and hour
        P = PV system power (W) ** Column not included if pvcalc = 0
        G(i) = Global irradiance on the inclined plane (plane of the array) (W/m2)
        H_sun = Sun height (degree)
        T2m = 2-m air temperature (degree Celsius)
        WS10m = 10-m total wind speed (m/s)
        Int = 1 means solar radiation values are reconstructed
        
        '''
        
        #Finding timezone based on latitude and longitude 
        tf = TimezoneFinder()
        self.local_time_zone = tf.timezone_at(lng=self.lon, lat=self.lat)
    
        #Calculating active and reactive power at each load point   
        self.prep = self.prep[np.repeat(self.prep.columns.values,self.n_clust)]
        self.qrep = math.tan(math.acos(self.pf_c))*self.prep  

        self.prep.columns = list(range(self.n_clust))
        self.qrep.columns = list(range(self.n_clust))
        
        self.prep.to_csv(inp_folder + os.sep + 'prep_dist.csv', index = False)
        self.qrep.to_csv(inp_folder + os.sep + 'qrep_dist.csv', index = False)
 
        self.loc.to_csv(inp_folder + os.sep + 'geol_dist.csv')
       
        self.pdem.columns = list(range(24))
        self.qdem = self.pdem 
       
        self.pdem.T.to_csv(inp_folder + os.sep + 'pdem_dist.csv', index = False)
        self.qdem.T.to_csv(inp_folder + os.sep + 'qdem_dist.csv', index = False)
        
        self.inp_folder = inp_folder
  
    #Data pre-processing    
    def data_extract(self):
        #Convert to local time zone
        
        #Create yearly UTC timestamps using pandas
        UTC_time = pd.date_range(str(self.startyear) +'-01-01', str(self.endyear+1) +'-01-01', freq='1H', closed='left', tz='UTC')
        
        #Convert UTC to local time
        local_time = UTC_time.tz_convert(self.local_time_zone)
        
        #Convert back to naive timestamps, but in local time zone
        local_time_naive = local_time.tz_localize(None)
    
        date_local = pd.DataFrame(local_time_naive)
        date_local[1] = pd.to_datetime(date_local[0], format='%Y:%M:%D').dt.date
        date_local[2] = pd.to_datetime(date_local[0], format='%Y:%M:%D').dt.time
        
        if self.pvcalculation == 1:
            #Add to data 
            self.data[7] = date_local[0]
            self.data[8] = date_local[1]
            self.data[9] = date_local[2]
    
            #Extract 
            ext = (self.data[7] >= str(self.startyear) + '-1-2 00:00:00') & (self.data[7] <= str(self.endyear) + '-12-30 23:00:00')
            self.data_local_time = self.data.loc[ext]
            
            #Extracting PV power
            self.PV_power = pd.pivot(self.data_local_time, index=8, columns=9, values=1) 
            
            #Extracting solar irradiance data
            self.sol_irrad = pd.pivot(self.data_local_time, index=8, columns=9, values=2) 
            
            #Extracting wind speed data
            self.wind_speed = pd.pivot(self.data_local_time, index=8, columns=9, values=5) 
        
            power_chrono = pd.DataFrame(self.PV_power/self.sbase)
            power_chrono.to_csv(self.inp_folder + os.sep + 'power_chrono.csv',index = False)
            
        if self.pvcalculation == 0:
            #Add to data 
            self.data[6] = date_local[0]
            self.data[7] = date_local[1]
            self.data[8] = date_local[2]
    
            #Extract 
            ext = (self.data[6] >= str(self.startyear) + '-1-2 00:00:00') & (self.data[6] <= str(self.endyear) + '-12-30 23:00:00')
            self.data_local_time = self.data.loc[ext]
            
            #Extracting solar irradiance data 
            self.sol_irrad = pd.pivot(self.data_local_time, index=7, columns=8, values=1) 
            
            #Extracting wind speed data
            self.wind_speed = pd.pivot(self.data_local_time, index=7, columns=8, values=4) 
    
    def kmeans_clust(self):
        #Defining the kmeans function with initialization as k-means++
        kmeans = KMeans(n_clusters=self.n_clust, init='k-means++')
    
        #Fitting the k-means algorithm on data
        model_PV_power = kmeans.fit(self.PV_power)
        PV_centers = model_PV_power.cluster_centers_
        
        PV_labels = model_PV_power.labels_
        model_sol_irrad = kmeans.fit(self.sol_irrad)
        
        irrad_centers = model_sol_irrad.cluster_centers_
        model_wind_speed = kmeans.fit(self.wind_speed)
    
        wind_centers = model_wind_speed.cluster_centers_
        
        ini_dtim = [sum(PV_labels == n) for n in range(self.n_clust)]

        dtim_tot = sum(ini_dtim)

        for n in range(self.n_clust):
            ini_dtim[n] += (365 - dtim_tot)/self.n_clust
        
        dtim = pd.DataFrame(ini_dtim)
        dtim.columns = ['dt']
        
        psol = pd.DataFrame(PV_centers/self.sbase)
        psol = psol.T
        qsol = math.tan(math.acos(self.pf_p))*psol
        #Saving clustered data
        psol.to_csv(self.inp_folder + os.sep + 'psol_dist.csv', index = False) 
        qsol.to_csv(self.inp_folder + os.sep + 'qsol_dist.csv', index = False)
        
        pwin = pd.DataFrame(0*wind_centers/self.sbase)
        pwin = pwin.T
        qwin = math.tan(math.acos(self.pf_p))*pwin
        
        #Saving clustered data
        pwin.to_csv(self.inp_folder + os.sep + 'pwin_dist.csv', index = False) 
        qwin.to_csv(self.inp_folder + os.sep + 'qwin_dist.csv', index = False)
        
        dtim.to_csv(self.inp_folder + os.sep + 'dtim_dist.csv', index = False)