1. Design and implement a data analysis project to calculate and predict electricity bills, Using a
dataset containing monthly electricity consumption data for a group of residential customers.
Code:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
# Let's create a sample dataset
np.random.seed(42)
# Generating random data for demonstration
months = pd.date_range(start='2023-01-01', end='2023-12-31', freq='M')
consumption = np.random.randint(100, 500, size=len(months))
dataset = pd.DataFrame({'Month': months, 'Consumption': consumption})
# Display the first few rows of the dataset
print(dataset.head())
Generated data
Month Consumption
0 2023-01-31 202
1 2023-02-28 448
2 2023-03-31 370
3 2023-04-30 206
4 2023-05-31 171
, # Plot the consumption trend over time
plt.figure(figsize=(10, 6))
plt.plot(dataset['Month'], dataset['Consumption'])
plt.title('Monthly Electricity Consumption Over Time')
plt.xlabel('Month')
plt.ylabel('Consumption (kWh)')
plt.show()
# Extract features from the date
dataset['Year'] = dataset['Month'].dt.year
dataset['MonthNumber'] = dataset['Month'].dt.month
# Display the modified dataset
print(dataset.head())
# Make predictions on the test set
y_pred = model.predict(X_test)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
# Visualize the predictions
plt.figure(figsize=(10, 6))
plt.scatter(X_test['MonthNumber'], y_test, color='black', label='Actual Consumption')
plt.plot(X_test['MonthNumber'], y_pred, color='blue', linewidth=3, label='Predicted Consumption')
plt.title('Actual vs Predicted Electricity Consumption')
plt.xlabel('Month')
plt.ylabel('Consumption (kWh)')
plt.legend()
plt.show()