web123456

The second episode of the Crash Machine Learning - Regression of Supervised Learning + Data Processing (Practical Part)!

  • import pandas as pd
  • import as plt
  • import seaborn as sns
  • #Loading data
  • data_path=r'D:\Machine Learning\Dataset: Cost of Living in Country\Cost_of_Living_Index_by_Country_2024.csv'
  • df=pd.read_csv(data_path)
  • #Show the first few lines of data
  • print(())
  • #Check for missing values
  • print(().sum())
  • #Basic Statistics
  • print(())
  • #Visualization
  • #univariate analysis
  • (bins=20,figsize=(12,10),color='blue')
  • plt.tight_layout() #Adjust the sub-picture parameters to fill the entire image area
  • ()
  • #Multivariate analysis
  • numeric_df = df.select_dtypes(include=['float64', 'int64'])
  • corr_matrix=numeric_df.corr()
  • (corr_matrix,annot=True,cmap='coolwarm')
  • ('Correlation Matrix')
  • ()
  • # #Save pictures
  • # ('8.11Cost_of_Living_Index_by_Country_2024.png')
  • #Bar Chart - Cost of Living Index for Top 10 Countries
  • top_10_countries=(10)
  • (figsize=(12,6))
  • (x='Country',y='Cost of Living Index',data=top_10_countries)
  • (rotation=90)#Rotate x-axis label
  • ('Top 10 Countries by Cost of Living Index')
  • ()
  • #Scatter chart-Relationship between cost of living index and rental index
  • (figsize=(10,6))
  • (x='Cost of Living Index',y='Rent Index',data=df)#Add title and tag
  • ('Cost of Living Index vs Rent Index')
  • ()
  • #Heat Map - Correlation between indicators
  • (figsize=(10,6))
  • numeric1_df = df.select_dtypes(include=['float64', 'int64'])
  • (numeric1_df.corr(),annot=True,cmap='coolwarm')
  • ('Correlation Heatmap')
  • ()