Ćwiczenie 2 - Podstawy wizualizacji¶

In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.colors as clt
/tmp/ipykernel_10858/3694572134.py:1: DeprecationWarning: 
Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd

Dane¶

In [2]:
### Read data
counties = gpd.read_file("data/counties.gpkg")
socio = pd.read_csv("data/socio_economic_2016.csv")
In [3]:
counties.head()
Out[3]:
STATEFP COUNTYFP COUNTYNS AFFGEOID GEOID NAME LSAD ALAND AWATER geometry
0 39 131 01074078 0500000US39131 39131 Pike 06 1140324458 9567612 MULTIPOLYGON (((-83.35353 39.19758, -82.78589 ...
1 46 003 01266983 0500000US46003 46003 Aurora 06 1834813753 11201379 MULTIPOLYGON (((-98.80777 43.93522, -98.33151 ...
2 55 035 01581077 0500000US55035 55035 Eau Claire 06 1652211310 18848512 MULTIPOLYGON (((-91.65045 44.85595, -90.92225 ...
3 48 259 01383915 0500000US48259 48259 Kendall 06 1715747531 1496797 MULTIPOLYGON (((-98.92015 30.13829, -98.58790 ...
4 40 015 01101795 0500000US40015 40015 Caddo 06 3310745124 30820525 MULTIPOLYGON (((-98.62315 35.55160, -98.31325 ...
In [4]:
socio.head()
Out[4]:
GISJOIN county_code FIPS YEAR STATE STATEA COUNTY COUNTYA NAME RUCC_2013 ... PUBLIC_TRANSPORT POVERTY_UNDER1 INSURED_UNDER65 VACANT_HOUSES MEDIAN_HOUSEVALUE POP_DENS MEDIAN_INCOME_DIF POP_DENS_DIF HIGH_EDUCATION IQR_HOUSEVALUE
0 G0100010 1001 1001 2012-2016 Alabama 1 Autauga County 1 Autauga County, Alabama 2.0 ... 0.000782 0.122660 0.896521 0.084265 141000.0 35.167013 -0.012534 0.008408 0.245928 136300.0
1 G0100030 1003 1003 2012-2016 Alabama 1 Baldwin County 3 Baldwin County, Alabama 3.0 ... 0.002143 0.129938 0.856273 0.301453 173400.0 47.226343 0.012996 0.088874 0.295471 157800.0
2 G0100050 1005 1005 2012-2016 Alabama 1 Barbour County 5 Barbour County, Alabama 6.0 ... 0.003871 0.263737 0.842386 0.227080 90300.0 11.360474 0.064819 -0.031126 0.128678 116300.0
3 G0100070 1007 1007 2012-2016 Alabama 1 Bibb County 7 Bibb County, Alabama 1.0 ... 0.004831 0.164539 0.892983 0.214445 97200.0 13.918126 0.080165 -0.008652 0.120000 112300.0
4 G0100090 1009 1009 2012-2016 Alabama 1 Blount County 9 Blount County, Alabama 1.0 ... 0.001733 0.165344 0.867668 0.135472 124200.0 34.243279 0.022570 0.004142 0.130498 119800.0

5 rows × 40 columns

In [5]:
#list(socio.columns)
In [6]:
counties["county_code"] = counties["GEOID"].astype(int)
counties.head()
Out[6]:
STATEFP COUNTYFP COUNTYNS AFFGEOID GEOID NAME LSAD ALAND AWATER geometry county_code
0 39 131 01074078 0500000US39131 39131 Pike 06 1140324458 9567612 MULTIPOLYGON (((-83.35353 39.19758, -82.78589 ... 39131
1 46 003 01266983 0500000US46003 46003 Aurora 06 1834813753 11201379 MULTIPOLYGON (((-98.80777 43.93522, -98.33151 ... 46003
2 55 035 01581077 0500000US55035 55035 Eau Claire 06 1652211310 18848512 MULTIPOLYGON (((-91.65045 44.85595, -90.92225 ... 55035
3 48 259 01383915 0500000US48259 48259 Kendall 06 1715747531 1496797 MULTIPOLYGON (((-98.92015 30.13829, -98.58790 ... 48259
4 40 015 01101795 0500000US40015 40015 Caddo 06 3310745124 30820525 MULTIPOLYGON (((-98.62315 35.55160, -98.31325 ... 40015
In [7]:
#set index
counties = counties.set_index('county_code')
counties.head()
Out[7]:
STATEFP COUNTYFP COUNTYNS AFFGEOID GEOID NAME LSAD ALAND AWATER geometry
county_code
39131 39 131 01074078 0500000US39131 39131 Pike 06 1140324458 9567612 MULTIPOLYGON (((-83.35353 39.19758, -82.78589 ...
46003 46 003 01266983 0500000US46003 46003 Aurora 06 1834813753 11201379 MULTIPOLYGON (((-98.80777 43.93522, -98.33151 ...
55035 55 035 01581077 0500000US55035 55035 Eau Claire 06 1652211310 18848512 MULTIPOLYGON (((-91.65045 44.85595, -90.92225 ...
48259 48 259 01383915 0500000US48259 48259 Kendall 06 1715747531 1496797 MULTIPOLYGON (((-98.92015 30.13829, -98.58790 ...
40015 40 015 01101795 0500000US40015 40015 Caddo 06 3310745124 30820525 MULTIPOLYGON (((-98.62315 35.55160, -98.31325 ...
In [8]:
#by default index column is remove from df, drop=False - column will be keep in df and will be index
socio = socio.set_index("county_code")
socio.head()
Out[8]:
GISJOIN FIPS YEAR STATE STATEA COUNTY COUNTYA NAME RUCC_2013 election_votes_dem ... PUBLIC_TRANSPORT POVERTY_UNDER1 INSURED_UNDER65 VACANT_HOUSES MEDIAN_HOUSEVALUE POP_DENS MEDIAN_INCOME_DIF POP_DENS_DIF HIGH_EDUCATION IQR_HOUSEVALUE
county_code
1001 G0100010 1001 2012-2016 Alabama 1 Autauga County 1 Autauga County, Alabama 2.0 5908.0 ... 0.000782 0.122660 0.896521 0.084265 141000.0 35.167013 -0.012534 0.008408 0.245928 136300.0
1003 G0100030 1003 2012-2016 Alabama 1 Baldwin County 3 Baldwin County, Alabama 3.0 18409.0 ... 0.002143 0.129938 0.856273 0.301453 173400.0 47.226343 0.012996 0.088874 0.295471 157800.0
1005 G0100050 1005 2012-2016 Alabama 1 Barbour County 5 Barbour County, Alabama 6.0 4848.0 ... 0.003871 0.263737 0.842386 0.227080 90300.0 11.360474 0.064819 -0.031126 0.128678 116300.0
1007 G0100070 1007 2012-2016 Alabama 1 Bibb County 7 Bibb County, Alabama 1.0 1874.0 ... 0.004831 0.164539 0.892983 0.214445 97200.0 13.918126 0.080165 -0.008652 0.120000 112300.0
1009 G0100090 1009 2012-2016 Alabama 1 Blount County 9 Blount County, Alabama 1.0 2150.0 ... 0.001733 0.165344 0.867668 0.135472 124200.0 34.243279 0.022570 0.004142 0.130498 119800.0

5 rows × 39 columns

In [9]:
#join attributes to vector
#lsuffix - suffix added to counties in case columns in counties and socio has the same name
#default join by index, no need to indicate column name
counties_attr = counties.join(socio, lsuffix='_cnt')
counties_attr.head()
Out[9]:
STATEFP COUNTYFP COUNTYNS AFFGEOID GEOID NAME_cnt LSAD ALAND AWATER geometry ... PUBLIC_TRANSPORT POVERTY_UNDER1 INSURED_UNDER65 VACANT_HOUSES MEDIAN_HOUSEVALUE POP_DENS MEDIAN_INCOME_DIF POP_DENS_DIF HIGH_EDUCATION IQR_HOUSEVALUE
county_code
39131 39 131 01074078 0500000US39131 39131 Pike 06 1140324458 9567612 MULTIPOLYGON (((-83.35353 39.19758, -82.78589 ... ... 0.004176 0.206823 0.863582 0.129658 97800.0 24.609268 0.035057 -0.012355 0.122750 91900.0
46003 46 003 01266983 0500000US46003 46003 Aurora 06 1834813753 11201379 MULTIPOLYGON (((-98.80777 43.93522, -98.33151 ... ... 0.000000 0.062066 0.926331 0.138659 69000.0 1.479945 0.142150 0.002201 0.206253 90400.0
55035 55 035 01581077 0500000US55035 55035 Eau Claire 06 1652211310 18848512 MULTIPOLYGON (((-91.65045 44.85595, -90.92225 ... ... 0.009981 0.145896 0.906677 0.065005 153200.0 60.983448 0.056816 0.029863 0.307505 103000.0
48259 48 259 01383915 0500000US48259 48259 Kendall 06 1715747531 1496797 MULTIPOLYGON (((-98.92015 30.13829, -98.58790 ... ... 0.001694 0.061375 0.890025 0.106320 285900.0 22.716632 0.067041 0.156023 0.393330 283900.0
40015 40 015 01101795 0500000US40015 40015 Caddo 06 3310745124 30820525 MULTIPOLYGON (((-98.62315 35.55160, -98.31325 ... ... 0.005796 0.207682 0.732646 0.215021 79400.0 8.831189 0.053571 0.003639 0.160735 89800.0

5 rows × 49 columns

In [10]:
#save counties attribute file to geopackage
counties_attr.to_file("counties_attr.gpkg", layer='counties', driver="GPKG")

Podstawowa wizualizacja¶

In [11]:
#Display map
counties_attr.plot()
Out[11]:
<Axes: >
No description has been provided for this image
In [12]:
#Change projection
counties_attr = counties_attr.to_crs("EPSG:5070")
counties_attr.plot()
Out[12]:
<Axes: >
No description has been provided for this image
In [13]:
counties_attr.to_file("counties_attr_5070.gpkg", layer="counties_5070", driver="GPKG")
In [14]:
#plot only boundaries
counties_attr.boundary.plot(color = "black", linewidth = 0.2)
Out[14]:
<Axes: >
No description has been provided for this image
In [15]:
#plot attribute column - RUCC contains categories
counties_attr.plot(column = "RUCC_2013", legend=True)
Out[15]:
<Axes: >
No description has been provided for this image
In [16]:
#change legend position
counties_attr.plot(column = "RUCC_2013", 
                   legend=True, 
                   legend_kwds={"label": "RUCC", "orientation": "horizontal"})
Out[16]:
<Axes: >
No description has been provided for this image
In [17]:
counties_attr.plot(column = "RUCC_2013", 
                   cmap="Set1",
                   legend=True,
                   legend_kwds={"label": "RUCC", "orientation": "horizontal"})
Out[17]:
<Axes: >
No description has been provided for this image

Wykresy¶

In [18]:
counties_attr[["WHITE", "BLACK", "geometry"]].plot.hist(alpha=.4)
Out[18]:
<Axes: ylabel='Frequency'>
No description has been provided for this image
In [19]:
counties_attr.plot(kind="scatter", x="WHITE", y="BLACK")
Out[19]:
<Axes: xlabel='WHITE', ylabel='BLACK'>
No description has been provided for this image

Matplotlib - subplots¶

  • wykorzystanie funkcji subplots do wyświetlenia map
In [20]:
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(6,4), layout='constrained')
ax.set_title('Mapa')
#ax.set_xlabel()
#ax.set_ylabel()
#ax.legend()
#ax.set_xlim()
#ax.set_ylim()
Out[20]:
Text(0.5, 1.0, 'Mapa')
No description has been provided for this image
In [21]:
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(12,7), layout='constrained')
counties_attr.plot(ax = ax, 
                   column = "WHITE",
                   cmap = "YlOrBr",
                   legend=True, 
                   legend_kwds={"label": "Percent of Whites", "orientation": "horizontal"})

ax.set_title('Percent of Whites')
_=ax.set_xlim(-2500000,2300000)
_=ax.set_ylim(200000,3200000)
No description has been provided for this image

Zadanie samodzielne¶

Powiększ mapę, aby pokazywała tylko obszar Texasu

Dwie mapy¶

In [22]:
fig, ax = plt.subplots(1, 2, figsize=(12,8), sharey=True)
counties_attr.plot(ax=ax[0], column = "WHITE", cmap = "Oranges")
counties_attr.plot(ax=ax[1], column = "BLACK", cmap = "Oranges")
ax[0].set_title('Percent of Whites')
ax[1].set_title('Percent of Blacks')
Out[22]:
Text(0.5, 1.0, 'Percent of Blacks')
No description has been provided for this image
In [23]:
fig, ax = plt.subplots(1, 2, figsize=(12,8), layout="tight")
counties_attr.plot(ax=ax[0], column = "WHITE", cmap = "Oranges",
                  legend=True, legend_kwds={"label": "Percent of Whites", "orientation": "horizontal"})
counties_attr.plot(ax=ax[1],column = "BLACK", cmap="Oranges",
                  legend=True, legend_kwds={"label": "Percent of Blacks", "orientation": "horizontal"})
ax[0].set_title('Percent of Whites')
ax[1].set_title('Percent of Blacks')
ax[0].set_axis_off()
ax[1].set_axis_off()
fig.suptitle('Spatial distribution of ethnoracial groups', y = 0.8, fontsize=20)
Out[23]:
Text(0.5, 0.8, 'Spatial distribution of ethnoracial groups')
No description has been provided for this image

Mapa i wykres¶

In [24]:
fig, ax = plt.subplots(2, 1, figsize=(12,8))
counties_attr.plot(ax=ax[0], column = "WHITE", cmap = "Oranges")
ax[1].hist(counties_attr[["WHITE"]], alpha=.4)
ax[0].set_title('Percent of Whites')
ax[1].set_title('Whites distribution')
ax[0].set_axis_off()
#ax[1].set_axis_off()
No description has been provided for this image

Zadanie do wykonania¶

  • Jaki jest rozkład przestrzenny wartośc wybranych zmiennych?
  • Stworzyć figurę składająca się z paneli zawierających: Procent białych, czarnych,oraz Latynosów.