Ćwiczenie 2 - Podstawy wizualizacji¶
In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.colors as clt
/tmp/ipykernel_10858/3694572134.py:1: DeprecationWarning: Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0), (to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries) but was not found to be installed on your system. If this would cause problems for you, please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466 import pandas as pd
Dane¶
In [2]:
### Read data
counties = gpd.read_file("data/counties.gpkg")
socio = pd.read_csv("data/socio_economic_2016.csv")
In [3]:
counties.head()
Out[3]:
STATEFP | COUNTYFP | COUNTYNS | AFFGEOID | GEOID | NAME | LSAD | ALAND | AWATER | geometry | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 39 | 131 | 01074078 | 0500000US39131 | 39131 | Pike | 06 | 1140324458 | 9567612 | MULTIPOLYGON (((-83.35353 39.19758, -82.78589 ... |
1 | 46 | 003 | 01266983 | 0500000US46003 | 46003 | Aurora | 06 | 1834813753 | 11201379 | MULTIPOLYGON (((-98.80777 43.93522, -98.33151 ... |
2 | 55 | 035 | 01581077 | 0500000US55035 | 55035 | Eau Claire | 06 | 1652211310 | 18848512 | MULTIPOLYGON (((-91.65045 44.85595, -90.92225 ... |
3 | 48 | 259 | 01383915 | 0500000US48259 | 48259 | Kendall | 06 | 1715747531 | 1496797 | MULTIPOLYGON (((-98.92015 30.13829, -98.58790 ... |
4 | 40 | 015 | 01101795 | 0500000US40015 | 40015 | Caddo | 06 | 3310745124 | 30820525 | MULTIPOLYGON (((-98.62315 35.55160, -98.31325 ... |
In [4]:
socio.head()
Out[4]:
GISJOIN | county_code | FIPS | YEAR | STATE | STATEA | COUNTY | COUNTYA | NAME | RUCC_2013 | ... | PUBLIC_TRANSPORT | POVERTY_UNDER1 | INSURED_UNDER65 | VACANT_HOUSES | MEDIAN_HOUSEVALUE | POP_DENS | MEDIAN_INCOME_DIF | POP_DENS_DIF | HIGH_EDUCATION | IQR_HOUSEVALUE | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | G0100010 | 1001 | 1001 | 2012-2016 | Alabama | 1 | Autauga County | 1 | Autauga County, Alabama | 2.0 | ... | 0.000782 | 0.122660 | 0.896521 | 0.084265 | 141000.0 | 35.167013 | -0.012534 | 0.008408 | 0.245928 | 136300.0 |
1 | G0100030 | 1003 | 1003 | 2012-2016 | Alabama | 1 | Baldwin County | 3 | Baldwin County, Alabama | 3.0 | ... | 0.002143 | 0.129938 | 0.856273 | 0.301453 | 173400.0 | 47.226343 | 0.012996 | 0.088874 | 0.295471 | 157800.0 |
2 | G0100050 | 1005 | 1005 | 2012-2016 | Alabama | 1 | Barbour County | 5 | Barbour County, Alabama | 6.0 | ... | 0.003871 | 0.263737 | 0.842386 | 0.227080 | 90300.0 | 11.360474 | 0.064819 | -0.031126 | 0.128678 | 116300.0 |
3 | G0100070 | 1007 | 1007 | 2012-2016 | Alabama | 1 | Bibb County | 7 | Bibb County, Alabama | 1.0 | ... | 0.004831 | 0.164539 | 0.892983 | 0.214445 | 97200.0 | 13.918126 | 0.080165 | -0.008652 | 0.120000 | 112300.0 |
4 | G0100090 | 1009 | 1009 | 2012-2016 | Alabama | 1 | Blount County | 9 | Blount County, Alabama | 1.0 | ... | 0.001733 | 0.165344 | 0.867668 | 0.135472 | 124200.0 | 34.243279 | 0.022570 | 0.004142 | 0.130498 | 119800.0 |
5 rows × 40 columns
In [5]:
#list(socio.columns)
In [6]:
counties["county_code"] = counties["GEOID"].astype(int)
counties.head()
Out[6]:
STATEFP | COUNTYFP | COUNTYNS | AFFGEOID | GEOID | NAME | LSAD | ALAND | AWATER | geometry | county_code | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 39 | 131 | 01074078 | 0500000US39131 | 39131 | Pike | 06 | 1140324458 | 9567612 | MULTIPOLYGON (((-83.35353 39.19758, -82.78589 ... | 39131 |
1 | 46 | 003 | 01266983 | 0500000US46003 | 46003 | Aurora | 06 | 1834813753 | 11201379 | MULTIPOLYGON (((-98.80777 43.93522, -98.33151 ... | 46003 |
2 | 55 | 035 | 01581077 | 0500000US55035 | 55035 | Eau Claire | 06 | 1652211310 | 18848512 | MULTIPOLYGON (((-91.65045 44.85595, -90.92225 ... | 55035 |
3 | 48 | 259 | 01383915 | 0500000US48259 | 48259 | Kendall | 06 | 1715747531 | 1496797 | MULTIPOLYGON (((-98.92015 30.13829, -98.58790 ... | 48259 |
4 | 40 | 015 | 01101795 | 0500000US40015 | 40015 | Caddo | 06 | 3310745124 | 30820525 | MULTIPOLYGON (((-98.62315 35.55160, -98.31325 ... | 40015 |
In [7]:
#set index
counties = counties.set_index('county_code')
counties.head()
Out[7]:
STATEFP | COUNTYFP | COUNTYNS | AFFGEOID | GEOID | NAME | LSAD | ALAND | AWATER | geometry | |
---|---|---|---|---|---|---|---|---|---|---|
county_code | ||||||||||
39131 | 39 | 131 | 01074078 | 0500000US39131 | 39131 | Pike | 06 | 1140324458 | 9567612 | MULTIPOLYGON (((-83.35353 39.19758, -82.78589 ... |
46003 | 46 | 003 | 01266983 | 0500000US46003 | 46003 | Aurora | 06 | 1834813753 | 11201379 | MULTIPOLYGON (((-98.80777 43.93522, -98.33151 ... |
55035 | 55 | 035 | 01581077 | 0500000US55035 | 55035 | Eau Claire | 06 | 1652211310 | 18848512 | MULTIPOLYGON (((-91.65045 44.85595, -90.92225 ... |
48259 | 48 | 259 | 01383915 | 0500000US48259 | 48259 | Kendall | 06 | 1715747531 | 1496797 | MULTIPOLYGON (((-98.92015 30.13829, -98.58790 ... |
40015 | 40 | 015 | 01101795 | 0500000US40015 | 40015 | Caddo | 06 | 3310745124 | 30820525 | MULTIPOLYGON (((-98.62315 35.55160, -98.31325 ... |
In [8]:
#by default index column is remove from df, drop=False - column will be keep in df and will be index
socio = socio.set_index("county_code")
socio.head()
Out[8]:
GISJOIN | FIPS | YEAR | STATE | STATEA | COUNTY | COUNTYA | NAME | RUCC_2013 | election_votes_dem | ... | PUBLIC_TRANSPORT | POVERTY_UNDER1 | INSURED_UNDER65 | VACANT_HOUSES | MEDIAN_HOUSEVALUE | POP_DENS | MEDIAN_INCOME_DIF | POP_DENS_DIF | HIGH_EDUCATION | IQR_HOUSEVALUE | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
county_code | |||||||||||||||||||||
1001 | G0100010 | 1001 | 2012-2016 | Alabama | 1 | Autauga County | 1 | Autauga County, Alabama | 2.0 | 5908.0 | ... | 0.000782 | 0.122660 | 0.896521 | 0.084265 | 141000.0 | 35.167013 | -0.012534 | 0.008408 | 0.245928 | 136300.0 |
1003 | G0100030 | 1003 | 2012-2016 | Alabama | 1 | Baldwin County | 3 | Baldwin County, Alabama | 3.0 | 18409.0 | ... | 0.002143 | 0.129938 | 0.856273 | 0.301453 | 173400.0 | 47.226343 | 0.012996 | 0.088874 | 0.295471 | 157800.0 |
1005 | G0100050 | 1005 | 2012-2016 | Alabama | 1 | Barbour County | 5 | Barbour County, Alabama | 6.0 | 4848.0 | ... | 0.003871 | 0.263737 | 0.842386 | 0.227080 | 90300.0 | 11.360474 | 0.064819 | -0.031126 | 0.128678 | 116300.0 |
1007 | G0100070 | 1007 | 2012-2016 | Alabama | 1 | Bibb County | 7 | Bibb County, Alabama | 1.0 | 1874.0 | ... | 0.004831 | 0.164539 | 0.892983 | 0.214445 | 97200.0 | 13.918126 | 0.080165 | -0.008652 | 0.120000 | 112300.0 |
1009 | G0100090 | 1009 | 2012-2016 | Alabama | 1 | Blount County | 9 | Blount County, Alabama | 1.0 | 2150.0 | ... | 0.001733 | 0.165344 | 0.867668 | 0.135472 | 124200.0 | 34.243279 | 0.022570 | 0.004142 | 0.130498 | 119800.0 |
5 rows × 39 columns
In [9]:
#join attributes to vector
#lsuffix - suffix added to counties in case columns in counties and socio has the same name
#default join by index, no need to indicate column name
counties_attr = counties.join(socio, lsuffix='_cnt')
counties_attr.head()
Out[9]:
STATEFP | COUNTYFP | COUNTYNS | AFFGEOID | GEOID | NAME_cnt | LSAD | ALAND | AWATER | geometry | ... | PUBLIC_TRANSPORT | POVERTY_UNDER1 | INSURED_UNDER65 | VACANT_HOUSES | MEDIAN_HOUSEVALUE | POP_DENS | MEDIAN_INCOME_DIF | POP_DENS_DIF | HIGH_EDUCATION | IQR_HOUSEVALUE | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
county_code | |||||||||||||||||||||
39131 | 39 | 131 | 01074078 | 0500000US39131 | 39131 | Pike | 06 | 1140324458 | 9567612 | MULTIPOLYGON (((-83.35353 39.19758, -82.78589 ... | ... | 0.004176 | 0.206823 | 0.863582 | 0.129658 | 97800.0 | 24.609268 | 0.035057 | -0.012355 | 0.122750 | 91900.0 |
46003 | 46 | 003 | 01266983 | 0500000US46003 | 46003 | Aurora | 06 | 1834813753 | 11201379 | MULTIPOLYGON (((-98.80777 43.93522, -98.33151 ... | ... | 0.000000 | 0.062066 | 0.926331 | 0.138659 | 69000.0 | 1.479945 | 0.142150 | 0.002201 | 0.206253 | 90400.0 |
55035 | 55 | 035 | 01581077 | 0500000US55035 | 55035 | Eau Claire | 06 | 1652211310 | 18848512 | MULTIPOLYGON (((-91.65045 44.85595, -90.92225 ... | ... | 0.009981 | 0.145896 | 0.906677 | 0.065005 | 153200.0 | 60.983448 | 0.056816 | 0.029863 | 0.307505 | 103000.0 |
48259 | 48 | 259 | 01383915 | 0500000US48259 | 48259 | Kendall | 06 | 1715747531 | 1496797 | MULTIPOLYGON (((-98.92015 30.13829, -98.58790 ... | ... | 0.001694 | 0.061375 | 0.890025 | 0.106320 | 285900.0 | 22.716632 | 0.067041 | 0.156023 | 0.393330 | 283900.0 |
40015 | 40 | 015 | 01101795 | 0500000US40015 | 40015 | Caddo | 06 | 3310745124 | 30820525 | MULTIPOLYGON (((-98.62315 35.55160, -98.31325 ... | ... | 0.005796 | 0.207682 | 0.732646 | 0.215021 | 79400.0 | 8.831189 | 0.053571 | 0.003639 | 0.160735 | 89800.0 |
5 rows × 49 columns
In [10]:
#save counties attribute file to geopackage
counties_attr.to_file("counties_attr.gpkg", layer='counties', driver="GPKG")
Podstawowa wizualizacja¶
In [11]:
#Display map
counties_attr.plot()
Out[11]:
<Axes: >
In [12]:
#Change projection
counties_attr = counties_attr.to_crs("EPSG:5070")
counties_attr.plot()
Out[12]:
<Axes: >
In [13]:
counties_attr.to_file("counties_attr_5070.gpkg", layer="counties_5070", driver="GPKG")
In [14]:
#plot only boundaries
counties_attr.boundary.plot(color = "black", linewidth = 0.2)
Out[14]:
<Axes: >
In [15]:
#plot attribute column - RUCC contains categories
counties_attr.plot(column = "RUCC_2013", legend=True)
Out[15]:
<Axes: >
In [16]:
#change legend position
counties_attr.plot(column = "RUCC_2013",
legend=True,
legend_kwds={"label": "RUCC", "orientation": "horizontal"})
Out[16]:
<Axes: >
In [17]:
counties_attr.plot(column = "RUCC_2013",
cmap="Set1",
legend=True,
legend_kwds={"label": "RUCC", "orientation": "horizontal"})
Out[17]:
<Axes: >
Wykresy¶
In [18]:
counties_attr[["WHITE", "BLACK", "geometry"]].plot.hist(alpha=.4)
Out[18]:
<Axes: ylabel='Frequency'>
In [19]:
counties_attr.plot(kind="scatter", x="WHITE", y="BLACK")
Out[19]:
<Axes: xlabel='WHITE', ylabel='BLACK'>
Matplotlib - subplots¶
- wykorzystanie funkcji subplots do wyświetlenia map
In [20]:
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(6,4), layout='constrained')
ax.set_title('Mapa')
#ax.set_xlabel()
#ax.set_ylabel()
#ax.legend()
#ax.set_xlim()
#ax.set_ylim()
Out[20]:
Text(0.5, 1.0, 'Mapa')
In [21]:
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(12,7), layout='constrained')
counties_attr.plot(ax = ax,
column = "WHITE",
cmap = "YlOrBr",
legend=True,
legend_kwds={"label": "Percent of Whites", "orientation": "horizontal"})
ax.set_title('Percent of Whites')
_=ax.set_xlim(-2500000,2300000)
_=ax.set_ylim(200000,3200000)
Zadanie samodzielne¶
Powiększ mapę, aby pokazywała tylko obszar Texasu
Dwie mapy¶
In [22]:
fig, ax = plt.subplots(1, 2, figsize=(12,8), sharey=True)
counties_attr.plot(ax=ax[0], column = "WHITE", cmap = "Oranges")
counties_attr.plot(ax=ax[1], column = "BLACK", cmap = "Oranges")
ax[0].set_title('Percent of Whites')
ax[1].set_title('Percent of Blacks')
Out[22]:
Text(0.5, 1.0, 'Percent of Blacks')
In [23]:
fig, ax = plt.subplots(1, 2, figsize=(12,8), layout="tight")
counties_attr.plot(ax=ax[0], column = "WHITE", cmap = "Oranges",
legend=True, legend_kwds={"label": "Percent of Whites", "orientation": "horizontal"})
counties_attr.plot(ax=ax[1],column = "BLACK", cmap="Oranges",
legend=True, legend_kwds={"label": "Percent of Blacks", "orientation": "horizontal"})
ax[0].set_title('Percent of Whites')
ax[1].set_title('Percent of Blacks')
ax[0].set_axis_off()
ax[1].set_axis_off()
fig.suptitle('Spatial distribution of ethnoracial groups', y = 0.8, fontsize=20)
Out[23]:
Text(0.5, 0.8, 'Spatial distribution of ethnoracial groups')
Mapa i wykres¶
In [24]:
fig, ax = plt.subplots(2, 1, figsize=(12,8))
counties_attr.plot(ax=ax[0], column = "WHITE", cmap = "Oranges")
ax[1].hist(counties_attr[["WHITE"]], alpha=.4)
ax[0].set_title('Percent of Whites')
ax[1].set_title('Whites distribution')
ax[0].set_axis_off()
#ax[1].set_axis_off()
Zadanie do wykonania¶
- Jaki jest rozkład przestrzenny wartośc wybranych zmiennych?
- Stworzyć figurę składająca się z paneli zawierających: Procent białych, czarnych,oraz Latynosów.