import pandas as pd
import json
경로 = "data/usagov_bitly_data2012-03-16-1331923249.txt"
with open(경로, encoding='utf-8') as f:
records = [json.loads(line) for line in f]
#DataFrame 로드
frame = pd.DataFrame(records)
frame[:3]
_heartbeat_ | a | al | c | cy | g | gr | h | hc | hh | kw | l | ll | nk | r | t | tz | u | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | NaN | Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi... | en-US,en;q=0.8 | US | Danvers | A6qOVH | MA | wfLQtf | 1.331823e+09 | 1.usa.gov | NaN | orofrog | [42.576698, -70.954903] | 1.0 | http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/... | 1.331923e+09 | America/New_York | http://www.ncbi.nlm.nih.gov/pubmed/22415991 |
1 | NaN | GoogleMaps/RochesterNY | NaN | US | Provo | mwszkS | UT | mwszkS | 1.308262e+09 | j.mp | NaN | bitly | [40.218102, -111.613297] | 0.0 | http://www.AwareMap.com/ | 1.331923e+09 | America/Denver | http://www.monroecounty.gov/etc/911/rss.php |
2 | NaN | Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ... | en-US | US | Washington | xxr3Qb | DC | xxr3Qb | 1.331920e+09 | 1.usa.gov | NaN | bitly | [38.9007, -77.043098] | 1.0 | http://t.co/03elZC4Q | 1.331923e+09 | America/New_York | http://boxer.senate.gov/en/press/releases/0316... |
## 정보
frame.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3560 entries, 0 to 3559 Data columns (total 18 columns): _heartbeat_ 120 non-null float64 a 3440 non-null object al 3094 non-null object c 2919 non-null object cy 2919 non-null object g 3440 non-null object gr 2919 non-null object h 3440 non-null object hc 3440 non-null float64 hh 3440 non-null object kw 93 non-null object l 3440 non-null object ll 2919 non-null object nk 3440 non-null float64 r 3440 non-null object t 3440 non-null float64 tz 3440 non-null object u 3440 non-null object dtypes: float64(4), object(14) memory usage: 500.7+ KB
## 타임존 도수 계산
frame["tz"][:10]
0 America/New_York 1 America/Denver 2 America/New_York 3 America/Sao_Paulo 4 America/New_York 5 America/New_York 6 Europe/Warsaw 7 8 9 Name: tz, dtype: object
frame["tz"].value_counts()[:10]
America/New_York 1251 521 America/Chicago 400 America/Los_Angeles 382 America/Denver 191 Europe/London 74 Asia/Tokyo 37 Pacific/Honolulu 36 Europe/Madrid 35 America/Sao_Paulo 33 Name: tz, dtype: int64
## null은 누락, 공백은 모름.
clean_tz = frame['tz'].fillna("누락")
## 공백 : 공배필터 --> 값 셋팅
공백필터 = clean_tz == ''
clean_tz[공백필터] = "모름"
clean_tz[:15]
0 America/New_York 1 America/Denver 2 America/New_York 3 America/Sao_Paulo 4 America/New_York 5 America/New_York 6 Europe/Warsaw 7 모름 8 모름 9 모름 10 America/Los_Angeles 11 America/New_York 12 America/New_York 13 누락 14 America/New_York Name: tz, dtype: object
clean_tz.value_counts()[:15]
America/New_York 1251 모름 521 America/Chicago 400 America/Los_Angeles 382 America/Denver 191 누락 120 Europe/London 74 Asia/Tokyo 37 Pacific/Honolulu 36 Europe/Madrid 35 America/Sao_Paulo 33 Europe/Berlin 28 Europe/Rome 27 America/Rainy_River 25 Europe/Amsterdam 22 Name: tz, dtype: int64
# matplotlib 라이브러가 로딩
%matplotlib inline
## 타임존 도수
tz_counts = clean_tz.value_counts()
tz_counts[:10].plot()
<matplotlib.axes._subplots.AxesSubplot at 0x24d3026a9b0>
tz_counts[:10].plot(kind="barh")
<matplotlib.axes._subplots.AxesSubplot at 0x24d3d9754a8>
# 그래프 한글 문제
# 그래프에 필요한 페키지와 라이브러리를 가져 온다.
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
# 그래프에서 마이너스 폰트 깨지는 문제 해결
# (폰트를 지정하면 이러한 문제가 발생하기 때문에 아래 설정)
mpl.rcParams['axes.unicode_minus'] = False
plt.rcParams["font.family"] = 'NanumGothic'
plt.rcParams["font.size"] = 12
tz_counts[:10].plot(kind="barH")
<matplotlib.axes._subplots.AxesSubplot at 0x24d3c26d160>
# 폰트명 확인
flist = [(f.name, f.fname) for f in fm.fontManager.ttflist]
fontFrame = pd.DataFrame(flist)
fontFrame.columns = ["이름", "경로"]
#fontFrame[:5]
fontFrame[fontFrame["이름"].str.find("Nanum") >= 0]
이름 | 경로 | |
---|---|---|
63 | NanumGothic_Coding | C:\Windows\Fonts\NanumGothic_Coding_Bold.ttf |
106 | NanumGothic | C:\Windows\Fonts\BOLD.TTF |
127 | NanumGothic_Coding | C:\Windows\Fonts\NanumGothic_Coding.ttf |
195 | NanumGothic | C:\Windows\Fonts\EXTRABOLD.TTF |
198 | NanumGothic | C:\WINDOWS\Fonts\.TTF |
# agent 칼럼을 이용한 분석()
agent = frame['a'][0]
agent
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.78 Safari/535.11'
# 분리 split 수행: 옵션없이 공백으로 분리
agent.split()
['Mozilla/5.0', '(Windows', 'NT', '6.1;', 'WOW64)', 'AppleWebKit/535.11', '(KHTML,', 'like', 'Gecko)', 'Chrome/17.0.963.78', 'Safari/535.11']
agent.split()[0]
'Mozilla/5.0'
# for문을 이용한 루프는 좋지 않다.
# Series.str "시리즈에 담긴 문자열들" 이라는 의미를 가진다. 복수형.
# 브라우저 문자열 추출
# 문자열들을 분리하고 그 분리된 문자열의 배열의 첫번째 문자열들을 반환받아 browsers변수에 담는다.
browsers = frame['a'].str.split().str[0]
## 브라우저 확인
browsers[:5]
0 Mozilla/5.0 1 GoogleMaps/RochesterNY 2 Mozilla/4.0 3 Mozilla/5.0 4 Mozilla/5.0 Name: a, dtype: object
frame['a'].sample(10)
1311 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi... 3030 Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ... 1862 Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2)... 1028 TEST_INTERNET_AGENT 2440 Mozilla/4.0 (compatible; MSIE 7.0; Windows NT ... 2507 Mozilla/4.0 (compatible; MSIE 7.0; Windows NT ... 2617 GoogleMaps/RochesterNY 3122 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2)... 542 Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ... 1565 Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; r... Name: a, dtype: object
## 운영체제가 윈도우인지 아닌지 여부 필터링
윈도우 = frame["a"].str.contains("Windows")
윈도우[:5]
0 True 1 False 2 True 3 False 4 True Name: a, dtype: object
# 구별을 쉽게 하도록 --> Win, Not Win
윈도우 = 윈도우.replace({True:"Windows", False:"Not Win"})
# 칼럼 tz : 타임존
# 윈도우... 없어서 --> 윈도우 정보 시리즈... 사용.
시간대_운영체제별그룹 = frame.groupby(['tz', 윈도우])
시간대_운영체제별그룹
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000024D3FF1F0F0>
# size: groupby된 그룹의 크기를 확인하는 함수.
tz_os_counts = 시간대_운영체제별그룹.size()
print(type(tz_counts))
<class 'pandas.core.series.Series'>
tz_os_counts[:10]
tz a Not Win 245 Windows 276 Africa/Cairo Windows 3 Africa/Casablanca Windows 1 Africa/Ceuta Windows 2 Africa/Johannesburg Windows 1 Africa/Lusaka Windows 1 America/Anchorage Not Win 4 Windows 1 America/Argentina/Buenos_Aires Not Win 1 dtype: int64
# 시간대별-운영체제별(색인) 도수(값) 데이터를 피봇팅한다.
# 도움말 확인
#tz_os_counts.unstack?
tz_os_counts.unstack()
a | Not Win | Windows |
---|---|---|
tz | ||
245.0 | 276.0 | |
Africa/Cairo | NaN | 3.0 |
Africa/Casablanca | NaN | 1.0 |
Africa/Ceuta | NaN | 2.0 |
Africa/Johannesburg | NaN | 1.0 |
Africa/Lusaka | NaN | 1.0 |
America/Anchorage | 4.0 | 1.0 |
America/Argentina/Buenos_Aires | 1.0 | NaN |
America/Argentina/Cordoba | NaN | 1.0 |
America/Argentina/Mendoza | NaN | 1.0 |
America/Bogota | 1.0 | 2.0 |
America/Caracas | NaN | 1.0 |
America/Chicago | 115.0 | 285.0 |
America/Chihuahua | 1.0 | 1.0 |
America/Costa_Rica | NaN | 1.0 |
America/Denver | 132.0 | 59.0 |
America/Edmonton | 2.0 | 4.0 |
America/Guayaquil | 2.0 | NaN |
America/Halifax | 1.0 | 3.0 |
America/Indianapolis | 8.0 | 12.0 |
America/La_Paz | NaN | 1.0 |
America/Lima | NaN | 1.0 |
America/Los_Angeles | 130.0 | 252.0 |
America/Managua | NaN | 3.0 |
America/Mazatlan | 1.0 | NaN |
America/Mexico_City | 7.0 | 8.0 |
America/Monterrey | 1.0 | NaN |
America/Montevideo | NaN | 1.0 |
America/Montreal | 3.0 | 6.0 |
America/New_York | 339.0 | 912.0 |
... | ... | ... |
Europe/Berlin | 9.0 | 19.0 |
Europe/Bratislava | 1.0 | 2.0 |
Europe/Brussels | 1.0 | 3.0 |
Europe/Bucharest | 1.0 | 3.0 |
Europe/Budapest | NaN | 5.0 |
Europe/Copenhagen | 2.0 | 3.0 |
Europe/Dublin | 1.0 | 2.0 |
Europe/Helsinki | 2.0 | 8.0 |
Europe/Lisbon | 1.0 | 7.0 |
Europe/Ljubljana | NaN | 1.0 |
Europe/London | 43.0 | 31.0 |
Europe/Madrid | 16.0 | 19.0 |
Europe/Malta | NaN | 2.0 |
Europe/Moscow | 1.0 | 9.0 |
Europe/Oslo | 2.0 | 8.0 |
Europe/Paris | 4.0 | 10.0 |
Europe/Prague | 3.0 | 7.0 |
Europe/Riga | 1.0 | 1.0 |
Europe/Rome | 8.0 | 19.0 |
Europe/Skopje | NaN | 1.0 |
Europe/Sofia | NaN | 1.0 |
Europe/Stockholm | 2.0 | 12.0 |
Europe/Uzhgorod | NaN | 1.0 |
Europe/Vienna | 3.0 | 3.0 |
Europe/Vilnius | NaN | 2.0 |
Europe/Volgograd | NaN | 1.0 |
Europe/Warsaw | 1.0 | 15.0 |
Europe/Zurich | 4.0 | NaN |
Pacific/Auckland | 3.0 | 8.0 |
Pacific/Honolulu | NaN | 36.0 |
97 rows × 2 columns
## 타임존_운영체제 - 접속횟수 피봇데이터.
접속횟수 = tz_os_counts.unstack()
# 데이터를 확인해 본다.
접속횟수[:10]
# NaN 데이터가 존재하것을 확인할 수 있다.
a | Not Win | Windows |
---|---|---|
tz | ||
245.0 | 276.0 | |
Africa/Cairo | NaN | 3.0 |
Africa/Casablanca | NaN | 1.0 |
Africa/Ceuta | NaN | 2.0 |
Africa/Johannesburg | NaN | 1.0 |
Africa/Lusaka | NaN | 1.0 |
America/Anchorage | 4.0 | 1.0 |
America/Argentina/Buenos_Aires | 1.0 | NaN |
America/Argentina/Cordoba | NaN | 1.0 |
America/Argentina/Mendoza | NaN | 1.0 |
## null값(NaN)을 0으로 처리
접속횟수 = 접속횟수.fillna(0)
접속횟수[:10]
a | Not Win | Windows |
---|---|---|
tz | ||
245.0 | 276.0 | |
Africa/Cairo | 0.0 | 3.0 |
Africa/Casablanca | 0.0 | 1.0 |
Africa/Ceuta | 0.0 | 2.0 |
Africa/Johannesburg | 0.0 | 1.0 |
Africa/Lusaka | 0.0 | 1.0 |
America/Anchorage | 4.0 | 1.0 |
America/Argentina/Buenos_Aires | 1.0 | 0.0 |
America/Argentina/Cordoba | 0.0 | 1.0 |
America/Argentina/Mendoza | 0.0 | 1.0 |
## 정보 확인
접속횟수.info()
<class 'pandas.core.frame.DataFrame'> Index: 97 entries, to Pacific/Honolulu Data columns (total 2 columns): Not Win 97 non-null float64 Windows 97 non-null float64 dtypes: float64(2) memory usage: 2.3+ KB
# 5개 가져외서 데이터 확인
접속횟수[:5]['Windows']
tz 276.0 Africa/Cairo 3.0 Africa/Casablanca 1.0 Africa/Ceuta 2.0 Africa/Johannesburg 1.0 Name: Windows, dtype: float64
# 정렬 : Windows 칼럼을 기준으로 역순 상위 10개.
접속횟수.sort_values(by="Windows", ascending=False)[:10]
a | Not Win | Windows |
---|---|---|
tz | ||
America/New_York | 339.0 | 912.0 |
America/Chicago | 115.0 | 285.0 |
245.0 | 276.0 | |
America/Los_Angeles | 130.0 | 252.0 |
America/Denver | 132.0 | 59.0 |
Pacific/Honolulu | 0.0 | 36.0 |
Asia/Tokyo | 2.0 | 35.0 |
Europe/London | 43.0 | 31.0 |
America/Sao_Paulo | 13.0 | 20.0 |
Europe/Madrid | 16.0 | 19.0 |
# 출력하는 칼럼의 순서 변경
접속횟수[["Windows", "Not Win"]].sort_values(by="Windows", ascending=False)[:10]
a | Windows | Not Win |
---|---|---|
tz | ||
America/New_York | 912.0 | 339.0 |
America/Chicago | 285.0 | 115.0 |
276.0 | 245.0 | |
America/Los_Angeles | 252.0 | 130.0 |
America/Denver | 59.0 | 132.0 |
Pacific/Honolulu | 36.0 | 0.0 |
Asia/Tokyo | 35.0 | 2.0 |
Europe/London | 31.0 | 43.0 |
America/Sao_Paulo | 20.0 | 13.0 |
Europe/Madrid | 19.0 | 16.0 |
# Windows와 Not Win의 합계를 기준으로 정렬.
# 합계는 칼럼으로 추가하지는 않을 것임.
## sum 함수의 사용법.
## 칼럼의 합계.
접속횟수.sum(0)
a Not Win 1194.0 Windows 2246.0 dtype: float64
# 행별로 합계.
접속횟수.sum(1)
tz 521.0 Africa/Cairo 3.0 Africa/Casablanca 1.0 Africa/Ceuta 2.0 Africa/Johannesburg 1.0 Africa/Lusaka 1.0 America/Anchorage 5.0 America/Argentina/Buenos_Aires 1.0 America/Argentina/Cordoba 1.0 America/Argentina/Mendoza 1.0 America/Bogota 3.0 America/Caracas 1.0 America/Chicago 400.0 America/Chihuahua 2.0 America/Costa_Rica 1.0 America/Denver 191.0 America/Edmonton 6.0 America/Guayaquil 2.0 America/Halifax 4.0 America/Indianapolis 20.0 America/La_Paz 1.0 America/Lima 1.0 America/Los_Angeles 382.0 America/Managua 3.0 America/Mazatlan 1.0 America/Mexico_City 15.0 America/Monterrey 1.0 America/Montevideo 1.0 America/Montreal 9.0 America/New_York 1251.0 ... Europe/Berlin 28.0 Europe/Bratislava 3.0 Europe/Brussels 4.0 Europe/Bucharest 4.0 Europe/Budapest 5.0 Europe/Copenhagen 5.0 Europe/Dublin 3.0 Europe/Helsinki 10.0 Europe/Lisbon 8.0 Europe/Ljubljana 1.0 Europe/London 74.0 Europe/Madrid 35.0 Europe/Malta 2.0 Europe/Moscow 10.0 Europe/Oslo 10.0 Europe/Paris 14.0 Europe/Prague 10.0 Europe/Riga 2.0 Europe/Rome 27.0 Europe/Skopje 1.0 Europe/Sofia 1.0 Europe/Stockholm 14.0 Europe/Uzhgorod 1.0 Europe/Vienna 6.0 Europe/Vilnius 2.0 Europe/Volgograd 1.0 Europe/Warsaw 16.0 Europe/Zurich 4.0 Pacific/Auckland 11.0 Pacific/Honolulu 36.0 Length: 97, dtype: float64
## 행별합계 변수에 저장
행별합계 = 접속횟수.sum(1)
## 행별합계의 역순 정렬.
행별합계.sort_values(ascending=False)
tz America/New_York 1251.0 521.0 America/Chicago 400.0 America/Los_Angeles 382.0 America/Denver 191.0 Europe/London 74.0 Asia/Tokyo 37.0 Pacific/Honolulu 36.0 Europe/Madrid 35.0 America/Sao_Paulo 33.0 Europe/Berlin 28.0 Europe/Rome 27.0 America/Rainy_River 25.0 Europe/Amsterdam 22.0 America/Indianapolis 20.0 America/Phoenix 20.0 Europe/Warsaw 16.0 America/Mexico_City 15.0 Europe/Paris 14.0 Europe/Stockholm 14.0 America/Vancouver 12.0 Pacific/Auckland 11.0 Europe/Moscow 10.0 Europe/Oslo 10.0 Europe/Prague 10.0 Europe/Helsinki 10.0 Asia/Hong_Kong 10.0 America/Puerto_Rico 10.0 Asia/Calcutta 9.0 America/Montreal 9.0 ... Europe/Riga 2.0 Asia/Kuching 1.0 America/Costa_Rica 1.0 America/Caracas 1.0 Europe/Skopje 1.0 America/Argentina/Mendoza 1.0 America/Argentina/Cordoba 1.0 America/Argentina/Buenos_Aires 1.0 Africa/Lusaka 1.0 Africa/Johannesburg 1.0 Africa/Casablanca 1.0 Europe/Uzhgorod 1.0 Asia/Pontianak 1.0 Europe/Ljubljana 1.0 America/Montevideo 1.0 America/Tegucigalpa 1.0 America/St_Kitts 1.0 Asia/Yekaterinburg 1.0 America/Santo_Domingo 1.0 Australia/Queensland 1.0 Asia/Novosibirsk 1.0 America/Monterrey 1.0 Asia/Riyadh 1.0 Asia/Nicosia 1.0 Asia/Manila 1.0 Europe/Sofia 1.0 Europe/Volgograd 1.0 America/Lima 1.0 America/La_Paz 1.0 America/Mazatlan 1.0 Length: 97, dtype: float64
## 행별합계의 역순 정렬된 것의 색인 추출.
행별합계.sort_values(ascending=False).index
Index(['America/New_York', '', 'America/Chicago', 'America/Los_Angeles', 'America/Denver', 'Europe/London', 'Asia/Tokyo', 'Pacific/Honolulu', 'Europe/Madrid', 'America/Sao_Paulo', 'Europe/Berlin', 'Europe/Rome', 'America/Rainy_River', 'Europe/Amsterdam', 'America/Indianapolis', 'America/Phoenix', 'Europe/Warsaw', 'America/Mexico_City', 'Europe/Paris', 'Europe/Stockholm', 'America/Vancouver', 'Pacific/Auckland', 'Europe/Moscow', 'Europe/Oslo', 'Europe/Prague', 'Europe/Helsinki', 'Asia/Hong_Kong', 'America/Puerto_Rico', 'Asia/Calcutta', 'America/Montreal', 'Asia/Istanbul', 'Europe/Lisbon', 'Europe/Athens', 'Chile/Continental', 'Australia/NSW', 'Europe/Vienna', 'America/Edmonton', 'Asia/Bangkok', 'Europe/Budapest', 'Europe/Copenhagen', 'Asia/Seoul', 'America/Anchorage', 'America/Halifax', 'America/Winnipeg', 'Asia/Beirut', 'Europe/Zurich', 'Asia/Dubai', 'Europe/Bucharest', 'Europe/Brussels', 'Asia/Jerusalem', 'Europe/Bratislava', 'Africa/Cairo', 'America/Bogota', 'Europe/Dublin', 'America/Managua', 'Asia/Karachi', 'Asia/Jakarta', 'Asia/Harbin', 'Asia/Kuala_Lumpur', 'Europe/Malta', 'Africa/Ceuta', 'America/Recife', 'America/Chihuahua', 'Europe/Belgrade', 'Asia/Amman', 'Europe/Vilnius', 'America/Guayaquil', 'Europe/Riga', 'Asia/Kuching', 'America/Costa_Rica', 'America/Caracas', 'Europe/Skopje', 'America/Argentina/Mendoza', 'America/Argentina/Cordoba', 'America/Argentina/Buenos_Aires', 'Africa/Lusaka', 'Africa/Johannesburg', 'Africa/Casablanca', 'Europe/Uzhgorod', 'Asia/Pontianak', 'Europe/Ljubljana', 'America/Montevideo', 'America/Tegucigalpa', 'America/St_Kitts', 'Asia/Yekaterinburg', 'America/Santo_Domingo', 'Australia/Queensland', 'Asia/Novosibirsk', 'America/Monterrey', 'Asia/Riyadh', 'Asia/Nicosia', 'Asia/Manila', 'Europe/Sofia', 'Europe/Volgograd', 'America/Lima', 'America/La_Paz', 'America/Mazatlan'], dtype='object', name='tz')
## 행별합계의 역순 정렬된 것의 색인 추출. -- 역순의 색인.
합계역순색인 = 행별합계.sort_values(ascending=False).index
## loc 함수 사용
## DataFrame.loc[[색인을 나열]] : 나열된 색인(키값) 순서대로 데이터를 반환
접속횟수.loc[['Europe/Malta', 'Africa/Ceuta', 'America/Recife']]
a | Not Win | Windows |
---|---|---|
tz | ||
Europe/Malta | 0.0 | 2.0 |
Africa/Ceuta | 0.0 | 2.0 |
America/Recife | 0.0 | 2.0 |
## 정렬된 색인 이용해서 합계가 큰것 순으로 데이터를 나열....
접속횟수.loc[합계역순색인][:10]
a | Not Win | Windows |
---|---|---|
tz | ||
America/New_York | 339.0 | 912.0 |
245.0 | 276.0 | |
America/Chicago | 115.0 | 285.0 |
America/Los_Angeles | 130.0 | 252.0 |
America/Denver | 132.0 | 59.0 |
Europe/London | 43.0 | 31.0 |
Asia/Tokyo | 2.0 | 35.0 |
Pacific/Honolulu | 0.0 | 36.0 |
Europe/Madrid | 16.0 | 19.0 |
America/Sao_Paulo | 13.0 | 20.0 |
# 수직 막대 그래프
접속횟수.loc[합계역순색인][:10].plot(kind='barh')
<matplotlib.axes._subplots.AxesSubplot at 0x24d40002f28>
# 누적 막대 그래프
접속횟수.loc[합계역순색인][:10].plot(kind='barh', stacked=True)
<matplotlib.axes._subplots.AxesSubplot at 0x24d4014e978>
## 합계....
행별합계 = 접속횟수.sum(1)
# 비율 구하기 : 축방향을 지정. OS타입별 비율로 지정.... 세로방향으로 계산.
접속비율 = 접속횟수.div(행별합계, axis=0)
접속비율[:10]
a | Not Win | Windows |
---|---|---|
tz | ||
0.47025 | 0.52975 | |
Africa/Cairo | 0.00000 | 1.00000 |
Africa/Casablanca | 0.00000 | 1.00000 |
Africa/Ceuta | 0.00000 | 1.00000 |
Africa/Johannesburg | 0.00000 | 1.00000 |
Africa/Lusaka | 0.00000 | 1.00000 |
America/Anchorage | 0.80000 | 0.20000 |
America/Argentina/Buenos_Aires | 1.00000 | 0.00000 |
America/Argentina/Cordoba | 0.00000 | 1.00000 |
America/Argentina/Mendoza | 0.00000 | 1.00000 |
접속비율[:10].plot(kind='barh', stacked=True)
<matplotlib.axes._subplots.AxesSubplot at 0x24d40163b70>
파이썬(Python) 고급문자열 처리 (0) | 2020.12.31 |
---|---|
파이썬(Python) - 분석 실습 영화평점분석 (0) | 2020.12.29 |
파이썬(Python) 분석실습 - URL을 통한 HTML 데이터 수집 및 분석 (0) | 2020.12.29 |
파이썬(Python) 분석실습 - 웹페이지 복사 로딩 후 분석 (0) | 2020.12.28 |
파이썬(Python) 고급자료구조 (0) | 2020.12.28 |