import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
print('Modules are imported.')
importing "Covid19_Confirmed_dataset.csv" from "./Dataset" folder.
corona_dataset_csv = pd.read_csv("Datasets/covid19_Confirmed_dataset.csv")
corona_dataset_csv.head(5)
corona_dataset_csv.drop(["Lat", "Long"], axis=1, inplace=True)
corona_dataset_aggregated = corona_dataset_csv.groupby("Country/Region").sum()
corona_dataset_aggregated.head()
corona_dataset_aggregated.shape
visualization always helps for better understanding of our data.
corona_dataset_aggregated.loc["India"]
we need to find a good measure reperestend as a number, describing the spread of the virus in a country.
corona_dataset_aggregated.loc['China'].plot()
corona_dataset_aggregated.loc["India"].plot()
corona_dataset_aggregated.loc["Spain"].plot()
plt.legend()
corona_dataset_aggregated.loc["India"][:3].plot()
corona_dataset_aggregated.loc["India"].diff().plot()
corona_dataset_aggregated.loc["India"].diff().max()
countries = list(corona_dataset_aggregated.index)
max_infection_rates = []
for c in countries :
max_infection_rates.append(corona_dataset_aggregated.loc[c].diff().max())
corona_dataset_aggregated["max_infection rate"] = max_infection_rates
corona_dataset_aggregated.head()
corona_data = pd.DataFrame(corona_dataset_aggregated["max_infection rate"])
corona_data.head()
happiness_report_csv = pd.read_csv("Datasets/worldwide_happiness_report.csv")
happiness_report_csv.head()
useless_cols = ["Overall rank", "Score", "Generosity", "Perceptions of corruption"]
#happiness_report_csv.drop(useless_cols, axis=1, inplace=True)
happiness_report_csv.head()
happiness_report_csv.set_index("Country or region", inplace =True)
corona_data.head(
)
happiness_report_csv.shape
data = corona_data.join(happiness_report_csv, how="inner")
data.head()
data.corr()
our Analysis is not finished unless we visualize the results in terms figures and graphs so that everyone can understand what you get out of our analysis
data.head()
x = data["GDP per capita"]
y = data["max_infection rate"]
sns.scatterplot(x, np.log(y))
sns.regplot(x, np.log(y))