Python 繪制驚艷的?;鶊D

導讀:本文中,我們使用 Python 的 plotly 繪制?;鶊D。


df_medals?=?pd.read_excel("data/Medals.xlsx")
print(df_medals.info())
df_medals.rename(columns={'Team/NOC':'Country',?'Total':?'Total?Medals',?'Gold':'Gold?Medals',?'Silver':?'Silver?Medals',?'Bronze':?'Bronze?Medals'},?inplace=True)
df_medals.drop(columns=['Unnamed:?7','Unnamed:?8','Rank?by?Total'],?inplace=True)
df_medals
<class?'pandas.core.frame.DataFrame'>
RangeIndex:?93?entries,?0?to?92
Data?columns?(total?9?columns):
#???Column?????????Non-Null?Count??Dtype??
---??------?????????--------------??-----??
0???Rank???????????93?non-null?????int64??
1???Team/NOC???????93?non-null?????object
2???Gold???????????93?non-null?????int64??
3???Silver?????????93?non-null?????int64??
4???Bronze?????????93?non-null?????int64??
5???Total??????????93?non-null?????int64??
6???Rank?by?Total??93?non-null?????int64??
7???Unnamed:?7?????0?non-null??????float64
8???Unnamed:?8?????1?non-null??????float64
dtypes:?float64(2),?int64(6),?object(1)
memory?usage:?6.7+?KB
None

0:美國:綠色 1:中國:藍色 2:日本:橙色
3:金牌:金色 4:銀牌:銀色 5:銅牌:棕色
0 (美國) 至 3,4,5 : 39, 41, 33 1 (中國) 至 3,4,5 : 38, 32, 18 2 (日本) 至 3,4,5 : 27, 14, 17
nodes?(源和目標):標簽和顏色作為單獨的列表和 links:源節(jié)點、目標節(jié)點、值(寬度)和鏈接的顏色作為單獨的列表
NODES?=?dict(?
#?????????0???????????????????????????1?????????????????????????????2????????3???????4?????????5?????????????????????????
label?=?["United?States?of?America",?"People's?Republic?of?China",?"Japan",?"Gold",?"Silver",?"Bronze"],
color?=?["seagreen",?????????????????"dodgerblue",?????????????????"orange",?"gold",?"silver",?"brown"?],)
LINKS?=?dict(???
??source?=?[??0,??0,??0,??1,??1,??1,??2,??2,??2],?#?鏈接的起點或源節(jié)點
??target?=?[??3,??4,??5,??3,??4,??5,??3,??4,??5],?#?鏈接的目的地或目標節(jié)點
??value?=??[?39,?41,?33,?38,?32,?18,?27,?14,?17],?#?鏈接的寬度(數(shù)量)
#?鏈接的顏色
#?目標節(jié)點:???????3-Gold??????????4-Silver????????5-Bronze
??color?=?[???
??"lightgreen",???"lightgreen",???"lightgreen",??????#?源節(jié)點:0?-?美國?States?of?America
??"lightskyblue",?"lightskyblue",?"lightskyblue",????#?源節(jié)點:1?-?中華人民共和國China
??"bisque",???????"bisque",???????"bisque"],)????????#?源節(jié)點:2?-?日本
data?=?go.Sankey(node?=?NODES,?link?=?LINKS)
fig?=?go.Figure(data)
fig.show()
NODES?=?dict(?
#?????????0???????????????????????????1?????????????????????????????2????????3???????4?????????5?????????????????????????
label?=?["United?States?of?America",?"People's?Republic?of?China",?"Japan",?"Gold",?"Silver",?"Bronze"],
color?=?["seagreen",?????????????????"dodgerblue",?????????????????"orange",?"gold",?"silver",?"brown"?],)
x?=?[?????0,??????????????????????????0,????????????????????????????0,????????0.5,????0.5,??????0.5],
y?=?[?????0,??????????????????????????0.5,??????????????????????????1,????????0.1,????0.5,????????1],)
data?=?go.Sankey(node?=?NODES,?link?=?LINKS)
fig?=?go.Figure(data)
fig.update_layout(title="Olympics?-?2021:?Country?&??Medals",??font_size=16)
fig.show()



節(jié)點美國共獲得11枚獎牌(=39金+41銀+33銅) 節(jié)點金牌共有104枚獎牌(=美國39枚,中國38枚,日本27枚)
對于節(jié)點,由于hoverlabels 沒有提供新信息,通過傳遞一個空hovertemplate = ""來去掉hoverlabel 對于鏈接,可以使標簽簡潔,格式為 - 對于節(jié)點和鏈接,讓我們使用后綴"Medals"顯示值。例如 113 枚獎牌而不是 113 枚。這可以通過使用具有適當valueformat和valuesuffix的update_traces函數(shù)來實現(xiàn)。
NODES?=?dict(?
#?????????0???????????????????????????1???????????????????????????????2????????3???????4???????????5
label?=?["United?States?of?America",?"People's?Republic?of?China",???"Japan",?"Gold",?"Silver",?"Bronze"],
color?=?[????????????????"seagreen",?????????????????"dodgerblue",??"orange",?"gold",?"silver",?"brown"?],
x?????=?[?????????????????????????0,????????????????????????????0,?????????0,????0.5,??????0.5,??????0.5],
y?????=?[?????????????????????????0,??????????????????????????0.5,?????????1,????0.1,??????0.5,????????1],
hovertemplate="?",)
LINK_LABELS?=?[]
for?country?in?["USA","China","Japan"]:
????for?medal?in?["Gold","Silver","Bronze"]:
????????LINK_LABELS.append(f"{country}-{medal}")
LINKS?=?dict(source?=?[??0,??0,??0,??1,??1,??1,??2,??2,??2],?
???????#?鏈接的起點或源節(jié)點
???????target?=?[??3,??4,??5,??3,??4,??5,??3,??4,??5],?
???????#?鏈接的目的地或目標節(jié)點
???????value?=??[?39,?41,?33,?38,?32,?18,?27,?14,?17],?
???????#?鏈接的寬度(數(shù)量)?
?????????????#?鏈接的顏色
?????????????#?目標節(jié)點:3-Gold??????????4?-Silver????????5-Bronze
?????????????color?=?["lightgreen",???"lightgreen",???"lightgreen",???#?源節(jié)點:0?-?美國
??????????????????????"lightskyblue",?"lightskyblue",?"lightskyblue",?#?源節(jié)點:1?-?中國
??????????????????????"bisque",???????"bisque",???????"bisque"],??????#?源節(jié)點:2?-?日本
?????????????label?=?LINK_LABELS,?
?????????????hovertemplate="%{label}",)
data?=?go.Sankey(node?=?NODES,?link?=?LINKS)
fig?=?go.Figure(data)
fig.update_layout(title="Olympics?-?2021:?Country?&??Medals",??
??????????????????font_size=16,?width=1200,?height=500,)
fig.update_traces(valueformat='3d',?
??????????????????valuesuffix='Medals',?
??????????????????selector=dict(type='sankey'))
fig.update_layout(hoverlabel=dict(bgcolor="lightgray",
??????????????????????????????????font_size=16,
??????????????????????????????????font_family="Rockwell"))
fig.show("png")?#fig.show()

該代碼可以推廣到處理數(shù)據(jù)集中的所有國家。 還可以將圖表擴展到另一個層次,以可視化各國的獎牌總數(shù)。
NUM_COUNTRIES?=?5
X_POS,?Y_POS?=?0.5,?1/(NUM_COUNTRIES-1)
NODE_COLORS?=?["seagreen",?"dodgerblue",?"orange",?"palevioletred",?"darkcyan"]
LINK_COLORS?=?["lightgreen",?"lightskyblue",?"bisque",?"pink",?"lightcyan"]
source?=?[]
node_x_pos,?node_y_pos?=?[],?[]
node_labels,?node_colors?=?[],?NODE_COLORS[0:NUM_COUNTRIES]
link_labels,?link_colors,?link_values?=?[],?[],?[]?
#?第一組鏈接和節(jié)點
for?i?in?range(NUM_COUNTRIES):
????source.extend([i]*3)
????node_x_pos.append(0.01)
????node_y_pos.append(round(i*Y_POS+0.01,2))
????country?=?df_medals['Country'][i]
????node_labels.append(country)?
????for?medal?in?["Gold",?"Silver",?"Bronze"]:
????????link_labels.append(f"{country}-{medal}")
????????link_values.append(df_medals[f"{medal}?Medals"][i])
????link_colors.extend([LINK_COLORS[i]]*3)
source_last?=?max(source)+1
target?=?[?source_last,?source_last+1,?source_last+2]?*?NUM_COUNTRIES
target_last?=?max(target)+1
node_labels.extend(["Gold",?"Silver",?"Bronze"])
node_colors.extend(["gold",?"silver",?"brown"])
node_x_pos.extend([X_POS,?X_POS,?X_POS])
node_y_pos.extend([0.01,?0.5,?1])
#?最后一組鏈接和節(jié)點
source.extend([?source_last,?source_last+1,?source_last+2])
target.extend([target_last]*3)
node_labels.extend(["Total?Medals"])
node_colors.extend(["grey"])
node_x_pos.extend([X_POS+0.25])
node_y_pos.extend([0.5])
for?medal?in?["Gold","Silver","Bronze"]:
????link_labels.append(f"{medal}")
????link_values.append(df_medals[f"{medal}?Medals"][:i+1].sum())
link_colors.extend(["gold",?"silver",?"brown"])
print("node_labels",?node_labels)
print("node_x_pos",?node_x_pos);?print("node_y_pos",?node_y_pos)
node_labels?['United?States?of?America',?"People's?Republic?of?China",
????????????'Japan',?'Great?Britain',?'ROC',?'Gold',?'Silver',
????????????'Bronze',?'Total?Medals']
node_x_pos?[0.01,?0.01,?0.01,?0.01,?0.01,?0.5,?0.5,?0.5,?0.75]
node_y_pos?[0.01,?0.26,?0.51,?0.76,?1.01,?0.01,?0.5,?1,?0.5]
#?顯示的圖
NODES?=?dict(pad??=?20,?thickness?=?20,?
?????????????line?=?dict(color?=?"lightslategrey",
?????????????????????????width?=?0.5),
?????????????hovertemplate="?",
?????????????label?=?node_labels,?
?????????????color?=?node_colors,
?????????????x?=?node_x_pos,?
?????????????y?=?node_y_pos,?)
LINKS?=?dict(source?=?source,?
?????????????target?=?target,?
?????????????value?=?link_values,?
?????????????label?=?link_labels,?
?????????????color?=?link_colors,
?????????????hovertemplate="%{label}",)
data?=?go.Sankey(arrangement='snap',?
?????????????????node?=?NODES,?
?????????????????link?=?LINKS)
fig?=?go.Figure(data)
fig.update_traces(valueformat='3d',?
??????????????????valuesuffix='?Medals',?
??????????????????selector=dict(type='sankey'))
fig.update_layout(title="Olympics?-?2021:?Country?&??Medals",??
??????????????????font_size=16,??
??????????????????width=1200,
??????????????????height=500,)
fig.update_layout(hoverlabel=dict(bgcolor="grey",?
??????????????????????????????????font_size=14,?
??????????????????????????????????font_family="Rockwell"))
fig.show("png")?



延伸閱讀《利用Python進行數(shù)據(jù)分析》
干貨直達??
評論
圖片
表情
