Source code for niimpy.exploration.eda.lineplot

"""
Created on Wed Oct 27 09:53:46 2021

@author: arsii
"""

import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px

[docs]def timeplot(df, users, columns, title, xlabel, ylabel, resample=False, interpolate=False, window=False, reset_index=False, by=False): """ Plot a time series plot. Plot selected users and columns or group level averages, aggregated by hour or weekday. Parameters ---------- df : Pandas Dataframe Dataframe containing the data users : list or str Users to plot. columns : list or str Columns to plot. title : str Plot title. xlabel : str Plot xlabel. ylabel : str Plot ylabel. resample : str, optional Data resampling frequency. The default is False. For details: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.resample.html interpolate : bool, optional If true, time series will be interpolated using splines. The default is False. window : int, optional Rolling window smoothing window size. The default is False. reset_index : bool, optional If true, dataframe index will be resetted. The default is False. by : str, optional Indicator for group level averaging. The default is False. If 'hour', hourly averages per group are presented. If 'weekday', daily averages per gruop are presented. Returns ------- None. """ assert isinstance(df, pd.DataFrame), "df is not a pandas dataframe." assert isinstance(users, str) or (isinstance(users, list)), "users is not a string or a list" assert isinstance(columns, str) or (isinstance(columns, list)), "column is not a string or a list" assert isinstance(title, str), "title is not a string" assert isinstance(xlabel, str), "xlabel is not a string" assert isinstance(ylabel ,str), "ylabel is not a string" assert isinstance(resample, (str, bool)), "resample is not a string or a boolean" assert isinstance(interpolate, bool), "interpolate is not a boolean" assert isinstance(window, int), "window is not an int" assert isinstance(reset_index, bool), "reset_index is not boolean" assert isinstance(by, (str,bool)), "by is not a string or a boolean" if users == 'Group': fig = plot_averages_(df, columns[0], by) else: fig = plot_timeseries_(df, columns, users, title, xlabel, ylabel, resample, interpolate, window, reset_index) return fig
[docs]def calculate_averages_(df,column, by): """calculate group averages by given timerange """ if by == 'hour': averages = df[[column,'group']].groupby([df.index.hour,'group']).mean().reset_index() elif by == 'weekday': averages = df[[column, 'group']].groupby([df.index.weekday, 'group']).mean().reset_index() else: averages = 0 averages.set_index(averages.columns[0],inplace=True) return averages
[docs]def plot_averages_(df, column, by='hour'): """Plot user group level averages by hour or by weekday. Parameters ---------- df : Pandas Dataframe Dataframe containing the data column : str Columns to plot. by : str, optional Indicator for group level averaging. The default is False. If 'hour', hourly averages per group are presented. If 'weekday', daily averages per gruop are presented. Returns ------- None. """ assert isinstance(df,pd.DataFrame), "df is not a pandas dataframe." assert isinstance(column,str), "column is not a string" assert isinstance(by,str), "by is not a string" # GROUP AVERAGES BY HOUR if by == 'hour': averages = calculate_averages_(df,column,by) fig = px.line(averages, x=averages.index, y=column, color="group",) #fig.update_traces(mode='markers+lines') fig.update_layout(title="{} hourly averages".format(column), xaxis_title="Hour", yaxis_title="Value", xaxis=dict(tickmode='array', tickvals=[0, 3, 6, 9, 12, 15, 18, 21], ticktext=['0am', '3am', '6am', '9am', '12pm', '15pm', '18pm', '21pm'])) # GROUP AVERAGES BY WEEKDAY elif by == 'weekday': averages = calculate_averages_(df,column,by) fig = px.line(averages, x=averages.index, y=column, color="group",) #fig.update_traces(mode='markers+lines') fig.update_layout(title="{} weekday averages".format(column), xaxis_title="Weekday", yaxis_title="Value", xaxis=dict( tickmode='array', tickvals=[0, 1, 2, 3, 4, 5, 6], ticktext=['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])) else: pass return fig
[docs]def resample_data_(df, resample, interpolate, window_len, reset_index): """resample dataframe for plotting """ if resample: df = df.resample(resample).mean() if interpolate: df = df.interpolate(method='spline',order=2) if window_len: df = df.rolling(window_len, win_type='gaussian').mean(std=2) if reset_index: df = df.reset_index(drop=True) df.dropna(axis=0, how='any', inplace=True) return df
[docs]def plot_timeseries_(df, columns, users, title, xlabel, ylabel, resample=False, interpolate=False, window_len=False, reset_index=False): """There goes the text. Parameters ---------- df : Pandas Dataframe Dataframe containing the data columns : list or str Columns to plot. users : list or str Users to plot. title : str Plot title. xlabel : str Plot xlabel. ylabel : str Plot ylabel. resample : str, optional Data resampling frequency. The default is False. For details: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.resample.html interpolate : bool, optional If true, time series will be interpolated using splines. The default is False. window : int, optional Rolling window smoothing window size. The default is False. reset_index : bool, optional If true, dataframe index will be resetted. The default is False. Returns ------- None. """ assert isinstance(df, pd.DataFrame), "df is not a pandas dataframe." assert isinstance(users, (str,list)), "users is not a string or a list" assert isinstance(columns, (str, list)), "column is not a string or a list" assert isinstance(title, str), "title is not a string" assert isinstance(xlabel, str), "xlabel is not a string" assert isinstance(ylabel ,str), "ylabel is not a string" assert isinstance(resample, (str,bool)), "resample is not a string or a boolean" assert isinstance(interpolate, bool), "interpolate is not a boolean" assert isinstance(window_len, int), "window is not an int" assert isinstance(reset_index, bool), "reset_index is not boolean" fig = go.Figure() for u in users: for c in columns: df_sel = df[df['user'] == u][c] df_sel = resample_data_(df_sel, resample, interpolate, window_len, reset_index) fig.add_trace(go.Scatter(x=df_sel.index, y=df_sel.values, name= u + ' / ' + c, showlegend=True)) #fig.update_traces(mode='markers+lines') fig.update_layout(title=title, xaxis_title=xlabel, yaxis_title=ylabel, width=1200, height=600,) return fig