Created by SmirkyGraphs. Code: GitHub. Source: BOE.
$525,881.75 was raised from 992 contributions from 29 different states
This looks specifically at donations where the person was living in Rhode Island
Comparing donations based on whether they live in RI or not
All Values Included
Extras Removed
In state 37% out of state 63%
# For data
import pandas as pd
from pandas import Series,DataFrame
import numpy as np
# For visualization
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')
%matplotlib inline
sns.set()
import datetime
my_color = sns.color_palette()
# loading the data
df = pd.read_csv("gina.csv", parse_dates=['receipt_dt'])
# removing personal address
df = df.drop(['address'], axis=1)
# Preview
df.head()
df.info()
df.shape
df.contb_amt.sum()
There are 992 Donations in Q3 making a total of $525,881.75
Those missing First/Last name are PAC's/Party donations
date_df = df.groupby(['receipt_dt'],as_index=False).sum()
date_df.plot('receipt_dt','contb_amt',figsize=(12,6),marker='', legend=False,
linestyle='-',color='purple', xlim=('2017-07-01','2017-10-01'))
# Top 5 Days
date_df.sort_values(by='contb_amt',ascending=False).head()
count_df = df.groupby(['receipt_dt'],as_index=False).count()
count_df.plot('receipt_dt','contb_amt',figsize=(12,6),marker='',linestyle='-',color='purple', xlim=('2017-07-01','2017-10-01'))
mean_df = df.groupby(['receipt_dt'],as_index=False).mean()
mean_df.plot('receipt_dt','contb_amt',figsize=(12,6),marker='',linestyle='-',color='purple', xlim=('2017-07-01','2017-10-01'))
weekday_df = df
weekday_df['weekday'] = pd.Categorical(weekday_df['weekday'],
categories=['Monday','Tuesday','Wednesday','Thursday',
'Friday','Saturday', 'Sunday'], ordered=True)
weekday_df_sum = weekday_df.pivot_table(index=weekday_df['weekday'], values='contb_amt',
aggfunc='sum').plot(kind='bar',rot=0,legend=False, title='Sum of Donations by Weekday')
weekday_df_count = weekday_df.pivot_table(index=weekday_df['weekday'], values='contb_amt',
aggfunc='count').plot(kind='bar',rot=0, legend=False, title='Count of Donations by Weekday')
weekday_df_sum = weekday_df.pivot_table(index=weekday_df['weekday'], values='contb_amt',
aggfunc='mean').plot(kind='bar',rot=0, legend=False, title='Average Donated by Weekday')
df['month'] = df['receipt_dt'].dt.month
df['receipt_dt'].dt.month.value_counts()
month_sum = df.pivot_table(index=df['month'], values='contb_amt',
aggfunc='sum').plot(kind='bar',rot=0,legend=False, title='Sum of Donations by Month')
month_sum = df.pivot_table(index=df['month'], values='contb_amt',
aggfunc='count').plot(kind='bar',rot=0,legend=False, title='Count of Donations by Month')
df.tran_type.value_counts()
df.tran_type.value_counts(normalize=True)
sns.factorplot('tran_type',data=df,kind="count")
df['contb_amt'].sum()
df['contb_amt'].mode()
df['contb_amt'].describe()
I was surprised to see that the average donation was 530 compared to the presidential race when it was only 100
Lowest donation was 1 and highest was 1174
df['contb_amt'].hist(bins=25)
df['contb_amt'].value_counts().head()
Surprisingly 1000 was the most frequent donation
The top most frequent donation values were much higher then those during the presidential race
# 1 State was labeled "Ri" So replace it with RI
df = df.replace(['Ri'],'RI')
df.state.nunique()
df.state.value_counts()
where_sum = df.pivot_table(index=df['state'], values='contb_amt', aggfunc='sum').sort_values(
by='contb_amt').plot(kind='barh', rot=0, legend=False, title='Total Donated by State')
where_count = df.pivot_table(index=df['state'], values='contb_amt', aggfunc='count').sort_values(
by='contb_amt').plot(kind='barh', rot=0, legend=False, title='Count of Donations by State')
where_avg = df.pivot_table(index=df['state'], values='contb_amt', aggfunc='mean').sort_values(
by='contb_amt').plot(kind='barh', rot=0, legend=False, title='Avg Donated by State')
df.city.nunique()
# Top 5 Cities
city_df = df.pivot_table('contb_amt',index='city',aggfunc='sum')
city_df = city_df.sort_values(by="contb_amt",ascending=False)
city_df.head()
city_sum = df.pivot_table(index=df['city'], values='contb_amt', aggfunc='sum').sort_values(
by='contb_amt').nlargest(5, 'contb_amt').plot(kind='barh', color=my_color, legend=False, title='Total Donated')
city_count = df.pivot_table(index=df['city'], values='contb_amt', aggfunc='count').sort_values(
by='contb_amt').nlargest(5, 'contb_amt').plot(kind='barh', color=my_color, legend=False, title='Num of Donations')
# Just donations from RI
RI_df = df[df.state == 'RI']
RI_df.city.unique()
# Connect small towns to the City/Town they're part of
RI_df = RI_df.replace(['Pascoag'],'Burrillville')
RI_df = RI_df.replace(['Wakefield','Kingston','Peace Dale'],'South Kingstown')
RI_df = RI_df.replace(['Saunderstown','N Kingstown','North Kingstownq'],'North Kingstown')
RI_df = RI_df.replace(['E Greenwich'],'East Greenwich')
RI_df = RI_df.replace(['PROVIDENCE'],'Providence')
RI_df = RI_df.replace(['Harmony'],'Glocester')
RI_df = RI_df.replace(['Riverside','Rumford'],'East Providence')
RI_df = RI_df.replace(['Block Island'],'New Shoreham')
RI_df = RI_df.replace(['North Scituate'],'Scituate')
RI_df = RI_df.replace(['Albion'],'Lincoln')
RI_df.city.nunique()
ri_city = df.pivot_table(index=RI_df['city'], values='contb_amt', aggfunc='count').sort_values(ascending=True,
by='contb_amt').plot(kind='barh', figsize=(12,9), legend=False, title='Num of Donations by City')
ri_city = df.pivot_table(index=RI_df['city'], values='contb_amt', aggfunc='sum').sort_values(ascending=True,
by='contb_amt').plot(kind='barh', figsize=(12,9), legend=False, title='Total Donated by City')
RI_df['city'].value_counts().sum()
RI_df['city'].value_counts().head()
RI_Sum = RI_df.pivot_table('contb_amt',index='city',aggfunc='sum')
RI_Sum = RI_Sum.sort_values(by='contb_amt', ascending=False)
RI_Sum.sum()
RI_Sum
# dictionary of RI Counties
county_map = {'Barrington': 'BRISTOL',
'Bristol': 'BRISTOL',
'Burrillville': 'PROVIDENCE',
'Central Falls': 'PROVIDENCE',
'Charlestown': 'WASHINGTON',
'Coventry': 'KENT',
'Cranston': 'PROVIDENCE',
'Cumberland': 'PROVIDENCE',
'East Greenwich': 'KENT',
'East Providence': 'PROVIDENCE',
'Exeter': 'WASHINGTON',
'Foster': 'PROVIDENCE',
'Glocester': 'PROVIDENCE',
'Hopkinton': 'WASHINGTON',
'Jamestown': 'NEWPORT',
'Johnston': 'PROVIDENCE',
'Lincoln': 'PROVIDENCE',
'Little Compton': 'NEWPORT',
'Middletown': 'NEWPORT',
'Narragansett': 'WASHINGTON',
'Newport': 'NEWPORT',
'New Shoreham': 'WASHINGTON',
'North Kingstown': 'WASHINGTON',
'North Providence': 'PROVIDENCE',
'North Smithfield': 'PROVIDENCE',
'Pawtucket': 'PROVIDENCE',
'Portsmouth': 'NEWPORT',
'Providence': 'PROVIDENCE',
'Richmond': 'WASHINGTON',
'Scituate': 'PROVIDENCE',
'Smithfield': 'PROVIDENCE',
'South Kingstown': 'WASHINGTON',
'Tiverton': 'NEWPORT',
'Warren': 'BRISTOL',
'Warwick': 'KENT',
'Westerly': 'WASHINGTON',
'West Greenwich': 'KENT',
'West Warwick': 'KENT',
'Woonsocket': 'PROVIDENCE'}
# creating a party column and mapping party to canidate
RI_df['County'] = RI_df.city.map(county_map)
RI_df['County'].value_counts()
ri_city = df.pivot_table(index=RI_df['County'], values='contb_amt', aggfunc='count').sort_values(ascending=True,
by='contb_amt').plot(kind='barh', legend=False, title='Num of Donations by County')
ri_city = df.pivot_table(index=RI_df['County'], values='contb_amt', aggfunc='sum').sort_values(ascending=True,
by='contb_amt').plot(kind='barh', legend=False, title='Total Donated by County')
def in_ri(state):
if state == 'RI':
return 'in state'
else:
return 'out of state'
df['lives'] = df['state'].apply(in_ri)
df_lives = df
df['lives'] = pd.Categorical(df['lives'], categories=['in state','out of state'], ordered=True)
df.head()
count_df = df.pivot_table(index=df['lives'], values='contb_amt', aggfunc='count').plot(kind='bar',
rot=0, color=my_color, legend=False, title='Count of Donation')
print(df['lives'].value_counts())
(df['lives'].value_counts(normalize=True))
60% (600) Were from Rhode Island
40% (392) Were from another state
mean_df = df.pivot_table(index=df['lives'], values='contb_amt', aggfunc='mean').plot(kind='bar',
rot=0, color=my_color, legend=False, title='Average Donation')
sum_df = df.pivot_table(index=df['lives'], values='contb_amt', aggfunc='sum').plot(kind='bar',
rot=0, color=my_color, legend=False, title='Total Donated')
percent_df = df.pivot_table(index=df['lives'], values='contb_amt', aggfunc='sum')
total_sum = percent_df.contb_amt.sum()
df['lives'] = df['state'].apply(in_ri)
percent_df['Percent'] = percent_df['contb_amt'] / total_sum
percent_df.head()
mean_df = df.pivot_table(index=df['lives'], values='contb_amt', aggfunc='mean')
mean_df.head()
employer_df = df.pivot_table('contb_amt',index='employer',aggfunc='sum')
# Combining Electric Boat & Genral Dynamics
employer_df.loc['General Dynamics'] = employer_df.loc['Electric Boat Corporation'] + employer_df.loc['General Dynamics']
employer_df.drop('Electric Boat Corporation',inplace=True)
employer_df = employer_df.sort_values(by = 'contb_amt',ascending=True)
employer_df.count()
Donations from people who worked at 489 different companies, lets narrow it down to companies over $1000
# Getting all employer records over $1000
employer_df = employer_df[employer_df['contb_amt'] > 1000]
employer_df.plot(kind='barh',figsize=(10,16))
# Graphing Only Companies
employer_df.drop('Homemaker',inplace=True)
employer_df.drop('Retired',inplace=True)
employer_df.drop('Self Employed',inplace=True)
employer_df.drop('Info Requested',inplace=True)
employer_df = employer_df.sort_values(by = 'contb_amt',ascending=True)
# Getting all employer records over $1000
employer_df = employer_df[employer_df['contb_amt'] > 1000]
employer_df.plot(kind='barh',figsize=(10,16))
def in_ri(employ_state):
if employ_state == 'RI':
return 'in state'
else:
return 'out of state'
df['works'] = df['employ_state'].apply(in_ri)
df.head()
# Including Extras
count_df = df.pivot_table(index=df['works'], values='contb_amt', aggfunc='count').plot(kind='bar',
rot=0, color=my_color, legend=False, title='Count of Donation')
df['works'].value_counts()
# Including Extras
count_df = df.pivot_table(index=df['works'], values='contb_amt', aggfunc='sum').plot(kind='bar',
rot=0, color=my_color, legend=False, title='Total Donated')
# Including Extras getting % of sum
percent_df = df.pivot_table(index=df['works'], values='contb_amt', aggfunc='sum')
total_sum = df.contb_amt.sum()
percent_df['Percent'] = percent_df['contb_amt'] / total_sum
percent_df.head()
# Removing Extras
emp_df = df[df.employer != 'Homemaker']
emp_df = df[df.employer != 'Retired']
emp_df = df[df.employer != 'Self Employed']
emp_df = df[df.employer != 'Info Requested']
emp_df = df[df.employer != 'Disabled']
# Extras Removed
count_df = emp_df.pivot_table(index=df['works'], values='contb_amt', aggfunc='count').plot(kind='bar',
rot=0, color=my_color, legend=False, title='Count of Donation')
emp_df['works'].value_counts()
# Extras Removed
count_df = emp_df.pivot_table(index=df['works'], values='contb_amt', aggfunc='sum').plot(kind='bar',
rot=0, color=my_color, legend=False, title='Total Donated')
df.contbr_nm.nunique()
df.first_nm.value_counts().head()
df.last_nm.value_counts().head()
df.contb_type.value_counts()
df.contb_type.value_counts(normalize=True)
don_1k = df[df['contb_amt'] >= 1000]
don_1k = df[(df['contb_amt'] >= 1000)]
don_1k.lives.value_counts(normalize=True)
sum_df = don_1k.pivot_table(index=df_lives['lives'], values='contb_amt', aggfunc='sum').plot(kind='bar',
rot=0, color=my_color, legend=False, title='Total Donated')
don_df_100 = df[df.contb_amt <= 100]
don_df_250 = df[df.contb_amt <= 250]
don_df_350 = df[df.contb_amt <= 350]
don_df_500 = df[df.contb_amt <= 500]
don_df_750 = df[df.contb_amt <= 750]
don_df_1000 = df[df.contb_amt <= 1000]
# Concatinating the datasets together
frames = [don_df_100, don_df_250, don_df_350, don_df_500, don_df_750, don_df_1000]
don_concat = pd.concat(frames, keys=['100', '250', '350', '500', '750', '1000'])
# resetting the index and dropping the columns we don't need
don_concat = don_concat.reset_index()
# Pivoting by the amt ranges
don_concat = don_concat.pivot_table('contb_amt',index='level_0',columns = 'lives',aggfunc='sum')
new_index= ['100', '250', '350', '500', '750', '1000']
don_concat = don_concat.reindex(new_index)
don_concat.head()
don_concat[['in state','out of state']].plot(kind='bar',figsize=(12,4))
plt.xlabel('Ammount')
locs, labels = plt.xticks()
plt.setp(labels, rotation=360)
plt.title('In State vs. Out of State')
# Top 4 Donated Values
don_25 = df[df.contb_amt == 25]
don_250 = df[df.contb_amt == 250]
don_500 = df[df.contb_amt == 500]
don_1000 = df[df.contb_amt == 1000]
# Concatinating the datasets together
frames = [don_25, don_250, don_500, don_1000]
don_concat = pd.concat(frames, keys=['25', '250', '500', '1000'])
#resetting the index and dropping the columns we don't need
don_concat = don_concat.reset_index()
# Pivoting by the amt ranges
don_concat = don_concat.pivot_table('contb_amt',index='level_0',columns = 'lives',aggfunc='sum')
don_concat.head()
new_index= ['25', '250', '500', '1000']
don_concat = don_concat.reindex(new_index)
don_concat.head()
don_concat[['in state','out of state']].plot(kind='bar',figsize=(12,4))
plt.xlabel('Ammount')
locs, labels = plt.xticks()
plt.setp(labels, rotation=360)
plt.title('In State vs. Out of State')