Gantt Charts Version 2 - Individual Workstreams
While some users like showing Gantt charts that allow overlapping dates (see community post here), many would prefer a visual that clearly shows individual work streams, especially in organizations where members are focused on one task at a given time.
The input to the Python editor? A table with 4 columns:
- Project Name
- Person - the individual working on the project
- start_date - the day the individual begins working on the project
- end_date - the day the individual stops working on the project
Now let's say we accidentally double booked one of our team members. This chart will give us a warning so we can go back and correct the team workflows!
Here's the Python Code that generates the above chart, complete with error handling.
# SQL output is imported as a pandas dataframe variable called "df" # primary reference: https://sukhbinder.wordpress.com/2016/05/10/quick-gantt-chart-with-matplotlib/ # reference http://www.clowersresearch.com/main/gantt-charts-in-matplotlib/ ####################################################################### ### LIBRARIES ### ####################################################################### import pandas as pd import datetime as dt import matplotlib.dates as dates import matplotlib.pyplot as plt from matplotlib.dates import MONTHLY, WEEKLY, DateFormatter, rrulewrapper, RRuleLocator from collections import OrderedDict import numpy as np import datetime as dt ####################################################################### ### HELPER FUNCTIONS ### ####################################################################### # Function: Return distinct projects/phases # Input: dataframe with a column indicating project names # Output: dataframe of all distinct phases def all_phases(df): phases=df['project'].drop_duplicates().values.tolist() return phases # Function: prepare dataframe for gantt visualization # Input: dataframe with a column for project, team, start_date, and end_date # Output: dataframe with all columns required for main function def df_gantt(df): df['project']=df['project'].astype('category') df['person']=df['person'].astype('category') df['end_date2']=dates.date2num(df['end_date']) df['start_date2']=dates.date2num(df['start_date']) df['duration']=df['end_date2']-df['start_date2'] #assign colors for each series colors=['paleturquoise','lightskyblue','plum','mediumorchid','salmon','lightpink','sandybrown','gold'] allprojects=df['project'].drop_duplicates().sort_values().reset_index(drop=True) # # allgrps=df['person'].drop_duplicates().sort_values().reset_index(drop=True) se=pd.DataFrame(colors[0:len(allprojects)]) se.columns=['col'] colormatrix=pd.concat([allprojects,se],axis=1) df = df.merge(colormatrix,on='project',how='inner').sort_values('person').reset_index(drop=True) return df ########################################################################## ### MAIN PLOT ########################################################################## # Function: Create Gantt chart # Input: dataframe with a column for project, team, start_date, and end_date, optional parameter showtoday (boolean) that shows a line marking the current date # Output: matplotlib object of Gantt chart def gantt_graph(df, showtoday): df=df_gantt(df) fig, ax = plt.subplots() #initialize variables j='' change=0 ylocs=[] #plot chart for i in range(len(df['start_date2'])): if (j!=df['person'][i]): change=change+1 j=df['person'][i] ylocs.append(0.5*change+0.5) ax.barh(0.5*change+0.5, df['duration'][i], left=df['start_date2'][i],height=0.4, align='center', color=df['col'][i], alpha = 1.0, label=df['person'][i]) ax.annotate(df['project'][i],xy=(df['start_date2'][i]+0.5*df['duration'][i],0.5*change+0.5),horizontalalignment='center') #format x axis rule = rrulewrapper(WEEKLY, interval=1) ax.xaxis.set_major_locator(RRuleLocator(rule)) ax.xaxis.set_major_formatter(DateFormatter("%b %d '%y")) plt.tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected top='off', # ticks along the bottom edge are off bottom='on', rotation=0) # ticks along the top edge are off #format y axis ax.invert_yaxis() ax.set_yticks(ylocs) ppl=df['person'].drop_duplicates().values.tolist() ax.set_yticklabels(ppl) plt.tick_params( axis='y', # changes apply to the x-axis which='both', # both major and minor ticks are affected left='off', # ticks along the bottom edge are off right='off') # ticks along the top edge are off #format border ax.spines['bottom'].set_linewidth(0.2) ax.spines['left'].set_linewidth(0.2) ax.spines['top'].set_linewidth(0.2) ax.spines['right'].set_linewidth(0.2) #plot a line showing current date if showtoday == True: plt.axvline(x=dt.date.today(),color='gray') return fig # Function: provide error message handling for double-booked team memebers # Input: 1. dataframe with a column for project, team, start_date, and end_date; 2. optional boolean parameter to show a line for the current date # Output: matplotlib object of gantt chart (if no double booking cases) or error message (if any cases of double booking exist) def workflow_errors(df, show_today=True): df_updated=df_gantt(df) errors=0 for person in df_updated['person'].drop_duplicates().values.tolist(): df2=df_updated[df_updated['person'] == person] df3=df2 for i in range(len(df2['start_date2'])): for j in range(len(df3['start_date2'])): if (df2['end_date2'].iloc[i]>df3['start_date2'].iloc[j] and df2['end_date2'].iloc[i]<df3['end_date2'].iloc[j]): stringtime=df2['end_date'].iloc[i].strftime('%B %d, %Y') txt = person + ' has multiple workflows on/around ' + stringtime errors+=1 if (errors==0): fig = gantt_graph(df, showtoday=show_today) else: fig = plt.figure() plt.axis('off') plt.text(.5, .5, txt, fontsize=15, color='black', ha='center') return fig # # Use Sisense for Cloud Data Teams to visualize a dataframe or an image by passing data to periscope.output() periscope.image(workflow_errors(df, show_today=True))
Please sign in to leave a comment.
Comments
0 comments