import numpy as np
import pandas as pd
from keras.utils import np_utils
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from scipy.stats import spearmanr, pearsonr
from sklearn.linear_model import LinearRegression
df = pd.read_csv("dataset_Facebook.csv",";")
df.head()
Page total likes | Type | Category | Post Month | Post Weekday | Post Hour | Paid | Lifetime Post Total Reach | Lifetime Post Total Impressions | Lifetime Engaged Users | Lifetime Post Consumers | Lifetime Post Consumptions | Lifetime Post Impressions by people who have liked your Page | Lifetime Post reach by people who like your Page | Lifetime People who have liked your Page and engaged with your post | comment | like | share | Total Interactions | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 139441 | Photo | 2 | 12 | 4 | 3 | 0.0 | 2752 | 5091 | 178 | 109 | 159 | 3078 | 1640 | 119 | 4 | 79.0 | 17.0 | 100 |
1 | 139441 | Status | 2 | 12 | 3 | 10 | 0.0 | 10460 | 19057 | 1457 | 1361 | 1674 | 11710 | 6112 | 1108 | 5 | 130.0 | 29.0 | 164 |
2 | 139441 | Photo | 3 | 12 | 3 | 3 | 0.0 | 2413 | 4373 | 177 | 113 | 154 | 2812 | 1503 | 132 | 0 | 66.0 | 14.0 | 80 |
3 | 139441 | Photo | 2 | 12 | 2 | 10 | 1.0 | 50128 | 87991 | 2211 | 790 | 1119 | 61027 | 32048 | 1386 | 58 | 1572.0 | 147.0 | 1777 |
4 | 139441 | Photo | 2 | 12 | 2 | 3 | 0.0 | 7244 | 13594 | 671 | 410 | 580 | 6228 | 3200 | 396 | 19 | 325.0 | 49.0 | 393 |
plt.figure(figsize=(7,7))
sns.heatmap(df.corr(),annot=True,cbar=False)
<matplotlib.axes._subplots.AxesSubplot at 0x2dc88131d30>
plt.hist(df.iloc[:,0])
plt.title("Total pages likes")
Text(0.5, 1.0, 'Total pages likes')
objects = ('Photos', 'Status', 'Link', 'Video')
y_pos = np.arange(len(objects))
plt.bar(y_pos, df.iloc[:,1].value_counts().values)
plt.xticks(y_pos, objects)
plt.ylabel('Number of posts')
plt.title('Different types of post compared')
Text(0.5, 1.0, 'Different types of post compared')
plt.figure(figsize=(7,7))
sns.countplot(x='Type',hue='Paid',data=df)
plt.title("Number of posts: Paid vs Not Paid")
plt.ylabel("Number of posts")
Text(0, 0.5, 'Number of posts')
plt.figure(figsize=(7,7))
objects = ('Jenuary', 'Febraury', 'March', 'April','May','June','July','August','September','October','November','December')
y_pos = np.arange(len(objects))
plt.bar(y_pos, df.iloc[:,3].value_counts().values)
plt.xticks(y_pos, objects, rotation=70)
plt.ylabel('Number of posts')
plt.title('Number of posts for each month')
Text(0.5, 1.0, 'Number of posts for each month')
plt.figure(figsize=(7,7))
objects = ('Monday', 'Tuesday', 'Wednsday', 'Thoursday','Friday','Saturday','Sunday')
y_pos = np.arange(len(objects))
plt.bar(y_pos, df.iloc[:,4].value_counts().values)
plt.xticks(y_pos, objects, rotation=70)
plt.ylabel('Number of posts')
plt.title('Number of posts for each week-day')
Text(0.5, 1.0, 'Number of posts for each week-day')
plt.figure(figsize=(7,7))
objects = (np.arange(24))
y_pos = np.arange(len(objects))
# print(len(y_pos))
# print(len(a))
plt.bar(y_pos, a)
plt.xticks(y_pos, objects, rotation=70)
plt.ylabel('Number of posts')
plt.title('Number of posts for each hour of the day')
Text(0.5, 1.0, 'Number of posts for each hour of the day')
plt.figure(figsize=(7,7))
objects = ('Free', 'Paid')
y_pos = np.arange(len(objects))
plt.bar(y_pos, df.iloc[:,6].value_counts().values)
plt.xticks(y_pos, objects, rotation=70)
plt.ylabel('Number of posts')
plt.title('Number of posts: Free vs Paid')
Text(0.5, 1.0, 'Number of posts: Free vs Paid')
plt.figure(figsize=(7,7))
sns.countplot(x='Post Weekday',hue='Paid',data=df)
plt.title("Number of posts: Free vs Paid")
Text(0.5, 1.0, 'Number of posts: Free vs Paid')
plt.hist(df.iloc[:,len(df.columns)-4])
plt.title("Comments per post distribution")
Text(0.5, 1.0, 'Comments per post distribution')
plt.hist(df.iloc[:,len(df.columns)-3])
plt.title("Likes per post distribution")
C:\Users\hp\Anaconda3\lib\site-packages\numpy\lib\histograms.py:824: RuntimeWarning: invalid value encountered in greater_equal keep = (tmp_a >= first_edge) C:\Users\hp\Anaconda3\lib\site-packages\numpy\lib\histograms.py:825: RuntimeWarning: invalid value encountered in less_equal keep &= (tmp_a <= last_edge)
Text(0.5, 1.0, 'Likes per post distribution')
plt.hist(df.iloc[:,len(df.columns)-2])
plt.title("Share per post distribution")
Text(0.5, 1.0, 'Share per post distribution')
dfplot = df.drop(df.columns[7:15],axis =1)
sns.pairplot(data=dfplot)
plt.figure(figsize=(7,7))
<Figure size 504x504 with 0 Axes>
<Figure size 504x504 with 0 Axes>
lb_make = LabelEncoder()
df["Type"] = lb_make.fit_transform(df["Type"])
df = df.fillna(0)
df.head(5)
Page total likes | Type | Category | Post Month | Post Weekday | Post Hour | Paid | Lifetime Post Total Reach | Lifetime Post Total Impressions | Lifetime Engaged Users | Lifetime Post Consumers | Lifetime Post Consumptions | Lifetime Post Impressions by people who have liked your Page | Lifetime Post reach by people who like your Page | Lifetime People who have liked your Page and engaged with your post | comment |
---|