Files

121 lines
5.8 KiB
Python
Raw Permalink Normal View History

import os
import requests
import time
default_url = 'http://db.history.go.kr/item/level.do'
default_user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36'
default_headers = {
'User-Agent': default_user_agent
}
default_item_id = 'tcct'
default_wait_seconds = 5.0
def get_default_params():
return {
'sort': 'levelId',
'dir': 'ASC',
'page': 1,
'pre_page': 1,
'setId': -1,
'totalCount': 0,
'prevPage': 1,
'synonym': 'off',
'chinessChar': 'on',
'searchKeywordType': 'isLeaf',
'searchKeywordMethod': 'EQ',
'searchKeyword': '1',
'searchKeywordConjunction': 'AND',
'position': 0,
}
def get_month_first(year, month):
params = get_default_params()
params['start'] = 1
params['limit'] = 20
params['itemId'] = default_item_id
params['levelId'] = '{}_{:04d}_{:02d}'.format(default_item_id, year, month)
ret_file = '{:04d}_{:02d}_1st'.format(year, month)
file_name = os.path.basename(ret_file)
res = requests.get(default_url, headers = default_headers, params = params)
return write_file(file_name, res.content)
def get_month_all(year, month, total_items):
params = get_default_params()
params['start'] = 1
params['limit'] = total_items
params['itemId'] = default_item_id
params['levelId'] = '{}_{:04d}_{:02d}'.format(default_item_id, year, month)
ret_file = '{:04d}_{:02d}_all'.format(year, month)
file_name = os.path.basename(ret_file)
res = requests.get(default_url, headers = default_headers, params = params)
return write_file(file_name, res.content)
def write_file(file_name, data):
with open(file_name, 'wb') as f:
print('{} ({} bytes)'.format(file_name, len(data)))
f.write(data)
return file_name
def get_inner_string(str, prefix, postfix):
start_index = str.find(prefix)
if start_index != -1:
start_index += len(prefix)
end_index = str.find(postfix, start_index)
if end_index != -1:
return str[start_index:end_index].strip()
return ""
def get_month_total_count(file_name):
f = open(file_name, mode='r', encoding='utf8')
data = f.read()
inner = get_inner_string(data, "<span class='pit'> ", '</span>')
return int(inner)
def down_year(year):
for month in range(1, 13):
down_month(year, month)
time.sleep(default_wait_seconds)
def down_month(year, month):
out_1st = get_month_first(year, month)
total_count = get_month_total_count(out_1st)
out_all = get_month_all(year, month, total_count)
print('total {}'.format(total_count))
for year in range(1985, 2009):
down_year(year)
time.sleep(default_wait_seconds)
#http://db.history.go.kr/item/level.do?sort=levelId&dir=ASC&start=1&limit=80&page=1&pre_page=0&setId=-1&totalCount=0&prevPage=1&prevLimit=20&itemId=tcct&types=&synonym=off&chinessChar=off&brokerPagingInfo=&levelId=tcct_1945_08&searchKeywordType=levelId&searchKeywordMethod=LIKE_LEFT&searchKeyword=tcct_1945_08&searchKeywordConjunction=AND&searchKeywordType=isLeaf&searchKeywordMethod=EQ&searchKeyword=1&searchKeywordConjunction=AND
#http://db.history.go.kr/item/level.do?sort=levelId&dir=ASC&start=1&limit=166&page=1&pre_page=0&setId=-1&totalCount=0&prevPage=1&prevLimit=20&itemId=tcct&types=&synonym=off&chinessChar=off&brokerPagingInfo=&levelId=tcct_1945_09&searchKeywordType=levelId&searchKeywordMethod=LIKE_LEFT&searchKeyword=tcct_1945_09&searchKeywordConjunction=AND&searchKeywordType=isLeaf&searchKeywordMethod=EQ&searchKeyword=1&searchKeywordConjunction=AND
# 8월달 전체 보기
# http://db.history.go.kr/item/level.do?sort=levelId&dir=ASC&start=1&limit=80&page=1&pre_page=0&setId=-1&totalCount=0&prevPage=1&prevLimit=20&itemId=tcct&types=&synonym=off&chinessChar=off&brokerPagingInfo=&levelId=tcct_1945_08
# &searchKeywordType=levelId&searchKeywordMethod=LIKE_LEFT&searchKeyword=tcct_1945_08&searchKeywordConjunction=AND
# &searchKeywordType=isLeaf&searchKeywordMethod=EQ&searchKeyword=1&searchKeywordConjunction=AND
# http://db.history.go.kr/item/level.do?sort=levelId&dir=ASC&start=4&limit=20&page=4&pre_page=1&setId=-1&totalCount=0&prevPage=1&prevLimit=&itemId=tcct&types=&synonym=off&chinessChar=off&brokerPagingInfo=&levelId=tcct_1945_08
# &searchKeywordType=levelId&searchKeywordMethod=LIKE_LEFT&searchKeyword=tcct_1945_08&searchKeywordConjunction=AND
# &searchKeywordType=isLeaf&searchKeywordMethod=EQ&searchKeyword=1&searchKeywordConjunction=AND
# 특정 날짜 보기
# http://db.history.go.kr/item/level.do?sort=levelId&dir=ASC&start=1&limit=20&page=1&pre_page=1&setId=-1&totalCount=0&prevPage=1&prevLimit=&itemId=tcct&types=&synonym=off&chinessChar=on&brokerPagingInfo=&levelId=tcct_1945_08_15
# &searchKeywordType=levelId&searchKeywordMethod=LIKE&searchKeyword=tcct_1945_08_15&searchKeywordConjunction=AND
# &searchKeywordType=isLeaf&searchKeywordMethod=EQ&searchKeyword=1&searchKeywordConjunction=AND&position=0
# http://db.history.go.kr/item/level.do?sort=levelId&dir=ASC&start=1&limit=20&page=1&pre_page=1&setId=-1&totalCount=0&prevPage=1&prevLimit=&itemId=tcct&types=&synonym=off&chinessChar=on&brokerPagingInfo=&levelId=tcct_1945_08_16
# &searchKeywordType=levelId&searchKeywordMethod=LIKE&searchKeyword=tcct_1945_08_16&searchKeywordConjunction=AND
# &searchKeywordType=isLeaf&searchKeywordMethod=EQ&searchKeyword=1&searchKeywordConjunction=AND&position=0
# http://db.history.go.kr/item/level.do?sort=levelId&dir=ASC&start=1&limit=20&page=1&pre_page=1&setId=-1&totalCount=0&prevPage=1&prevLimit=&itemId=tcct&types=&synonym=off&chinessChar=on&brokerPagingInfo=&levelId=tcct_2000_12_29
# &searchKeywordType=levelId&searchKeywordMethod=LIKE&searchKeyword=tcct_2000_12_29&searchKeywordConjunction=AND
# &searchKeywordType=isLeaf&searchKeywordMethod=EQ&searchKeyword=1&searchKeywordConjunction=AND&position=0