切线与此帖有关:Customize Bokeh Unemployment Example: Replacing Percentage Value
入门代码:https://bokeh.pydata.org/en/latest/docs/gallery/texas.html
from bokeh.io import show
from bokeh.models import LogColorMapper
from bokeh.palettes import Viridis6 as palette
from bokeh.plotting import figure
from bokeh.sampledata.us_counties import data as counties
counties = { code: county for code, county in counties.items() if county["state"] == "tx" }
csv数据:
I have a dictionary of county names:
{(48, 1): {'name': 'Anderson',
'detailed name': 'Anderson County, Texas',
'state': 'tx'}
{(48, 3): {'name': 'Andrews',
'detailed name': 'Andrews County, Texas',
'state': 'tx'}
and a dataframe created from a csv file of percentage values:
{'Anderson': 21.0,
'Andrews': 28.0,
'Angelina': 31.0,
'Aransas': 24.0,
'Archer': 11.0,
'Armstrong': 53.0,
'Atascosa': 27.0,
'Austin': 30.0,
'Bailey': 42.0,
'Bandera': 0.0}
我试图将数据框的百分比值合并到字典中的县名。
from bokeh.models import LogColorMapper
from bokeh.palettes import Viridis6 as palette
from bokeh.plotting import figure, show
from bokeh.sampledata.us_counties import data as counties
import csv
import pandas as pd
pharmacy_concentration = {}
with open('resources/unemployment.csv', mode = 'r') as infile:
next(infile)
reader = csv.reader(infile, delimiter = ',', quotechar = '"')
for row in reader:
name, concentration = row
pharmacy_concentration[name] = float(concentration)
counties = { code: county for code, county in counties.items() if county["state"] == "tx" }
counties = pd.concat(pharmacy_concentration[concentration], on='name',
how='left', keys='concentration')
counties
我收到一个显示百分比值的keyerror,无法弄清楚原因。
预期产量:
counties
{(48, 1): {'name': 'Anderson',
'detailed name': 'Anderson County, Texas',
'state': 'tx', 'concentration': 21}
如果我理解正确,这就是你想要做的:
首先,我们将您的词典放在两个数据框中:county_names
和csv_data
。之后我将它们转换为正确的格式,但这对您来说可能不是必需的:
county_names = pd.DataFrame({'(48, 1)': {'name': 'Anderson', 'detailed name': 'Anderson County, Texas', 'state': 'tx'},
'(48, 3)': {'name': 'Andrews', 'detailed name': 'Andrews County, Texas', 'state': 'tx'}}).T.reset_index().rename({'index': 'County_ID'}, axis=1)
print(county_names)
County_ID detailed name name state
0 (48, 1) Anderson County, Texas Anderson tx
1 (48, 3) Andrews County, Texas Andrews tx
d = {'Anderson': 21.0,
'Andrews': 28.0,
'Angelina': 31.0,
'Aransas': 24.0,
'Archer': 11.0,
'Armstrong': 53.0,
'Atascosa': 27.0,
'Austin': 30.0,
'Bailey': 42.0,
'Bandera': 0.0}
csv_data = pd.DataFrame(d, index=[0]).melt(var_name='name', value_name='concentration')
print(csv_data)
name concentration
0 Anderson 21.0
1 Andrews 28.0
2 Angelina 31.0
3 Aransas 24.0
4 Archer 11.0
5 Armstrong 53.0
6 Atascosa 27.0
7 Austin 30.0
8 Bailey 42.0
9 Bandera 0.0
现在我们可以在name
列上合并我们的数据:
df_final = pd.merge(county_names, csv_data, on='name')
print(df_final)
County_ID detailed name name state concentration
0 (48, 1) Anderson County, Texas Anderson tx 21.0
1 (48, 3) Andrews County, Texas Andrews tx 28.0
注意
你可以用熊猫轻松阅读csv file
,只需使用:
pd.read_csv(infile, delimiter = ',', quotechar = '"')
感谢@Tony
from bokeh.models import LogColorMapper
from bokeh.palettes import Viridis256 as palette
from bokeh.plotting import figure, show
from bokeh.sampledata.us_counties import data as counties
import csv
pharmacy_concentration = {}
with open('resources/unemployment.csv', mode = 'r') as infile:
reader = [row for row in csv.reader(infile.read().splitlines())]
for row in reader:
try:
county_name, concentration = row
pharmacy_concentration[county_name] = float(concentration)
except Exception, error:
print error, row
counties = { code: county for code, county in counties.items() if county["state"] ==
"tx" }
county_xs = [county["lons"] for county in counties.values()]
county_ys = [county["lats"] for county in counties.values()]
county_names = [county['name'] for county in counties.values()]
# Below is the line of code I was missing to make it work
county_pharmacy_concentration_rates = [pharmacy_concentration[counties[county]
['name']] for county in counties if counties[county]['name'] in
pharmacy_concentration]