米国の州別とフロリダ州の郡別移民数をPlotlyでプロットする

このサイトを参考にしながら、アメリカの州ごとの移民数をplotlyを使って視覚化する。基本的に、アメリカは移民の国であり、合法・不法合わせて相当数の移民が毎年流入している。今年だけで、4〜5百万人の不法移民がアメリカに侵入することが予想されており、日本の在留外国人数(273万人)を考えると恐ろしい数字と言えよう。

スポンサーリンク

モジュールとデータの準備

先ず、必要なモジュールをインポートする。

from matplotlib.pyplot import figure
from pandas import DataFrame
from plotly.subplots import make_subplots
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.offline import plot,iplot
%matplotlib inline

以下のサイトからデータをダウンロードする。

#%download https://raw.githubusercontent.com/Ayeeta/us-international-migration/master/population_estimates.csv

データをロードする。

population_data = pd.read_csv('population_estimates.csv')

欠損値の数を表示する。

population_data.apply(lambda x: sum(x.isnull()), axis=0)
1                                   0
FIPS                                0
State                               0
Area_Name                           0
Rural-urban_Continuum Code_2003    58
                                   ..
R_NET_MIG_2014                     80
R_NET_MIG_2015                     80
R_NET_MIG_2016                     80
R_NET_MIG_2017                     80
R_NET_MIG_2018                     80
Length: 150, dtype: int64
list(population_data)
#エリアネームを使って全ての州を得る
area_name = ['Alabama','Alaska','Arizona', 'Arkansas','California','Colorado','Connecticut','Delaware',
          'Florida','Georgia','Hawaii','Idaho','Illinois','Indiana','Iowa','Kansas','Kentucky','Louisiana',
          'Maine','Maryland','Massachusetts','Michigan','Minnesota','Mississippi','Missouri','Montana','Nebraska',
          'Nevada','New Hampshire','New Jersey','New Mexico','New York','North Carolina','North Dakota','Ohio',
          'Oklahoma','Oregon','Pennsylvania','Rhode Island','South Carolina','South Dakota','Tennessee','Texas',
          'Utah','Vermont','Virginia','Washington','West Virginia','Wisconsin','Wyoming']
states = population_data.loc[population_data['Area_Name'].isin(area_name)]

2010年〜2018年の州別移民数

state = states["State"]
international_2010 = states["INTERNATIONAL_MIG_2010"]
international_2011 = states["INTERNATIONAL_MIG_2011"]
international_2012 = states["INTERNATIONAL_MIG_2012"]
international_2013 = states["INTERNATIONAL_MIG_2013"]
international_2014 = states["INTERNATIONAL_MIG_2014"]
international_2015 = states["INTERNATIONAL_MIG_2015"]
international_2016 = states["INTERNATIONAL_MIG_2016"]
international_2017 = states["INTERNATIONAL_MIG_2017"]
international_2018 = states["INTERNATIONAL_MIG_2018"]

trace0 = go.Scatter(
    x = state,
    y = international_2010,
    mode = "lines",
    name = "2010"
)

trace1 = go.Scatter(
    x = state,
    y = international_2011,
    mode = "lines",
    name = "2011"
)

trace2 = go.Scatter(
    x = state,
    y = international_2012,
    mode = "lines",
    name = "2012"
)

trace3 = go.Scatter(
    x = state,
    y = international_2013,
    mode = "lines",
    name = "2013"
)

trace4 = go.Scatter(
    x = state,
    y = international_2014,
    mode = "lines",
    name = "2014"
)

trace5 = go.Scatter(
    x = state,
    y = international_2015,
    mode = "lines",
    name = "2015"
)

trace6 = go.Scatter(
    x = state,
    y = international_2016,
    mode = "lines",
    name = "2016"
)

trace7 = go.Scatter(
    x = state,
    y = international_2017,
    mode = "lines",
    name = "2017"
)

trace8 = go.Scatter(
    x = state,
    y = international_2018,
    mode = "lines",
    name = "2018"
)

data = [trace0,trace1, trace2, trace3, trace4,trace5, trace6, trace7,trace8]

layout = go.Layout(
    title = "2010 - 2018",title_font=dict(size=22),
     xaxis=dict(
        title='US States',title_font=dict(size=18),
        tickmode='linear',tickfont=dict(size=16)),
     yaxis=dict(
        title='Internation Migration',title_font=dict(size=22),
         tickfont=dict(size=20)),
    autosize=True,width=800, height=800,
    hovermode= 'x',
    hoverlabel=dict(font=dict(size=20)),
    legend=dict(x=0,y=-.15,font=dict(size=21,color='black'),bgcolor='rgba(0,0,0,0)'),
    legend_orientation="h"    
)
fig = go.Figure(data = data, layout = layout)
plot(fig,show_link=False,filename="1.html",include_plotlyjs=False)

フロリダ、カリフォルニア、テキサス、ニューヨークが圧倒的に多いことが見て取れる。

mig = px.line(states, x = 'State', 
 y = "INTERNATIONAL_MIG_2018", title='International Migration 2018')
layout = dict(hoverlabel=dict(font=dict(size=24)))
mig.layout.update(layout)
plot(mig,show_link=False,filename="2.html",include_plotlyjs=False)

2018年は、FL, CA, TX, NYに次いで、MAとNJが多い。

states['text'] = "International Migration 2018"+"\
    "+ states["INTERNATIONAL_MIG_2018"].astype(str) + " " +"State:" +" \
    "+ states["Area_Name"]

fig = go.Figure(data=go.Choropleth(
    locations=states['State'], # Spatial coordinates
    z = states["INTERNATIONAL_MIG_2018"].astype(float), # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'reds',
    text = states['text'],
    colorbar_title = "International Migration",
))

fig.update_layout(
    title_text= "International Migration 2018",
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'),
        showlakes=True, # lakes
        lakecolor='rgb(255,255,255)'),hoverlabel=dict(font=dict(size=24))
)
plot(fig,show_link=False,filename="3.html",include_plotlyjs=False)

マップで視覚化した方が州別の全体的な移民数を把握しやすい。

フロリダ州の郡別移民数

フロリダ州に焦点を絞って、2018年の郡別移民数をプロットする。

florida = population_data.loc[population_data['State']== 'FL']
florida = florida.loc[florida['Area_Name'] != 'Florida']
florida_graph_2018 = px.line(florida, x = 'Area_Name', y = 'INTERNATIONAL_MIG_2018', title = 'Internation Migration Settlement in Florida')
layout = dict(hoverlabel=dict(font=dict(size=24)),
 xaxis=dict(tickfont=dict(size=20)),yaxis=dict(tickfont=dict(size=20)))
florida_graph_2018.layout.update(layout)
plot(florida_graph_2018,show_link=False,filename="4.html",include_plotlyjs=False)

マイアミ・デイド郡が圧倒的に移民数が多いことが分かる。

florida['text'] = "International Migration 2018"+"\
    "+ florida["INTERNATIONAL_MIG_2018"].astype(str) + " " +"County:" +" \
    "+ florida["Area_Name"]

values = florida['INTERNATIONAL_MIG_2018'].tolist()
fips = florida['FIPS'].tolist()

endpts = list(np.mgrid[min(values):max(values):4j])

colorscale = [
    '#ffcccc',
    '#ff9999',
    '#ff6666',
    '#ff3333',
    '#ff0000'
]
fig = ff.create_choropleth(
    fips=fips, values=values, scope=['Florida'], show_state_data=True,
    colorscale=colorscale, binning_endpoints=endpts, round_legend_values=True,
    plot_bgcolor='rgb(229,229,229)',
    paper_bgcolor='rgb(229,229,229)',
    legend_title='International Population by County 2018',
    county_outline={'color': 'rgb(255,255,255)', 'width': 0.5},
    exponent_format=False
)
hover_ix, hover = [(ix, t) for ix, t in enumerate(fig['data']) if t.text][0]

# mismatching lengths indicates bug
if len(hover['text']) != len(florida):

    ht = pd.Series(hover['text'])

    no_dupe_ix = ht.index[~ht.duplicated()]

    hover_x_deduped = np.array(hover['x'])[no_dupe_ix]
    hover_y_deduped = np.array(hover['y'])[no_dupe_ix]

    new_hover_x = [x if type(x) == float else x[0] for x in hover_x_deduped]
    new_hover_y = [y if type(y) == float else y[0] for y in hover_y_deduped]

    fig['data'][hover_ix]['text'] = ht.drop_duplicates()
    fig['data'][hover_ix]['x'] = new_hover_x
    fig['data'][hover_ix]['y'] = new_hover_y

fig.layout.template = None
fig.update_layout(hoverlabel=dict(font=dict(size=24)),
    autosize=False,width=800, height=500,legend=dict(font=dict(size=21)))
plot(fig,show_link=False,filename="5.html",include_plotlyjs=False)
/root/.pyenv/versions/miniconda3-latest/envs/py373/lib/python3.7/site-packages/pandas/core/frame.py:7123: FutureWarning:

Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.