121 lines
5.8 KiB
Python
121 lines
5.8 KiB
Python
import os
|
|
import requests
|
|
import time
|
|
|
|
default_url = 'http://db.history.go.kr/item/level.do'
|
|
default_user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36'
|
|
default_headers = {
|
|
'User-Agent': default_user_agent
|
|
}
|
|
default_item_id = 'tcct'
|
|
default_wait_seconds = 5.0
|
|
|
|
def get_default_params():
|
|
return {
|
|
'sort': 'levelId',
|
|
'dir': 'ASC',
|
|
'page': 1,
|
|
'pre_page': 1,
|
|
'setId': -1,
|
|
'totalCount': 0,
|
|
'prevPage': 1,
|
|
'synonym': 'off',
|
|
'chinessChar': 'on',
|
|
'searchKeywordType': 'isLeaf',
|
|
'searchKeywordMethod': 'EQ',
|
|
'searchKeyword': '1',
|
|
'searchKeywordConjunction': 'AND',
|
|
'position': 0,
|
|
}
|
|
|
|
|
|
def get_month_first(year, month):
|
|
params = get_default_params()
|
|
params['start'] = 1
|
|
params['limit'] = 20
|
|
params['itemId'] = default_item_id
|
|
params['levelId'] = '{}_{:04d}_{:02d}'.format(default_item_id, year, month)
|
|
ret_file = '{:04d}_{:02d}_1st'.format(year, month)
|
|
file_name = os.path.basename(ret_file)
|
|
|
|
res = requests.get(default_url, headers = default_headers, params = params)
|
|
return write_file(file_name, res.content)
|
|
|
|
|
|
def get_month_all(year, month, total_items):
|
|
params = get_default_params()
|
|
params['start'] = 1
|
|
params['limit'] = total_items
|
|
params['itemId'] = default_item_id
|
|
params['levelId'] = '{}_{:04d}_{:02d}'.format(default_item_id, year, month)
|
|
ret_file = '{:04d}_{:02d}_all'.format(year, month)
|
|
file_name = os.path.basename(ret_file)
|
|
|
|
res = requests.get(default_url, headers = default_headers, params = params)
|
|
return write_file(file_name, res.content)
|
|
|
|
|
|
def write_file(file_name, data):
|
|
with open(file_name, 'wb') as f:
|
|
print('{} ({} bytes)'.format(file_name, len(data)))
|
|
f.write(data)
|
|
return file_name
|
|
|
|
def get_inner_string(str, prefix, postfix):
|
|
start_index = str.find(prefix)
|
|
if start_index != -1:
|
|
start_index += len(prefix)
|
|
end_index = str.find(postfix, start_index)
|
|
if end_index != -1:
|
|
return str[start_index:end_index].strip()
|
|
return ""
|
|
|
|
def get_month_total_count(file_name):
|
|
f = open(file_name, mode='r', encoding='utf8')
|
|
data = f.read()
|
|
inner = get_inner_string(data, "<span class='pit'> ", '</span>')
|
|
return int(inner)
|
|
|
|
|
|
def down_year(year):
|
|
for month in range(1, 13):
|
|
down_month(year, month)
|
|
time.sleep(default_wait_seconds)
|
|
|
|
def down_month(year, month):
|
|
out_1st = get_month_first(year, month)
|
|
total_count = get_month_total_count(out_1st)
|
|
out_all = get_month_all(year, month, total_count)
|
|
print('total {}'.format(total_count))
|
|
|
|
|
|
for year in range(1985, 2009):
|
|
down_year(year)
|
|
time.sleep(default_wait_seconds)
|
|
|
|
|
|
#http://db.history.go.kr/item/level.do?sort=levelId&dir=ASC&start=1&limit=80&page=1&pre_page=0&setId=-1&totalCount=0&prevPage=1&prevLimit=20&itemId=tcct&types=&synonym=off&chinessChar=off&brokerPagingInfo=&levelId=tcct_1945_08&searchKeywordType=levelId&searchKeywordMethod=LIKE_LEFT&searchKeyword=tcct_1945_08&searchKeywordConjunction=AND&searchKeywordType=isLeaf&searchKeywordMethod=EQ&searchKeyword=1&searchKeywordConjunction=AND
|
|
|
|
#http://db.history.go.kr/item/level.do?sort=levelId&dir=ASC&start=1&limit=166&page=1&pre_page=0&setId=-1&totalCount=0&prevPage=1&prevLimit=20&itemId=tcct&types=&synonym=off&chinessChar=off&brokerPagingInfo=&levelId=tcct_1945_09&searchKeywordType=levelId&searchKeywordMethod=LIKE_LEFT&searchKeyword=tcct_1945_09&searchKeywordConjunction=AND&searchKeywordType=isLeaf&searchKeywordMethod=EQ&searchKeyword=1&searchKeywordConjunction=AND
|
|
|
|
|
|
# 8월달 전체 보기
|
|
# http://db.history.go.kr/item/level.do?sort=levelId&dir=ASC&start=1&limit=80&page=1&pre_page=0&setId=-1&totalCount=0&prevPage=1&prevLimit=20&itemId=tcct&types=&synonym=off&chinessChar=off&brokerPagingInfo=&levelId=tcct_1945_08
|
|
# &searchKeywordType=levelId&searchKeywordMethod=LIKE_LEFT&searchKeyword=tcct_1945_08&searchKeywordConjunction=AND
|
|
# &searchKeywordType=isLeaf&searchKeywordMethod=EQ&searchKeyword=1&searchKeywordConjunction=AND
|
|
|
|
# http://db.history.go.kr/item/level.do?sort=levelId&dir=ASC&start=4&limit=20&page=4&pre_page=1&setId=-1&totalCount=0&prevPage=1&prevLimit=&itemId=tcct&types=&synonym=off&chinessChar=off&brokerPagingInfo=&levelId=tcct_1945_08
|
|
# &searchKeywordType=levelId&searchKeywordMethod=LIKE_LEFT&searchKeyword=tcct_1945_08&searchKeywordConjunction=AND
|
|
# &searchKeywordType=isLeaf&searchKeywordMethod=EQ&searchKeyword=1&searchKeywordConjunction=AND
|
|
|
|
# 특정 날짜 보기
|
|
# http://db.history.go.kr/item/level.do?sort=levelId&dir=ASC&start=1&limit=20&page=1&pre_page=1&setId=-1&totalCount=0&prevPage=1&prevLimit=&itemId=tcct&types=&synonym=off&chinessChar=on&brokerPagingInfo=&levelId=tcct_1945_08_15
|
|
# &searchKeywordType=levelId&searchKeywordMethod=LIKE&searchKeyword=tcct_1945_08_15&searchKeywordConjunction=AND
|
|
# &searchKeywordType=isLeaf&searchKeywordMethod=EQ&searchKeyword=1&searchKeywordConjunction=AND&position=0
|
|
# http://db.history.go.kr/item/level.do?sort=levelId&dir=ASC&start=1&limit=20&page=1&pre_page=1&setId=-1&totalCount=0&prevPage=1&prevLimit=&itemId=tcct&types=&synonym=off&chinessChar=on&brokerPagingInfo=&levelId=tcct_1945_08_16
|
|
# &searchKeywordType=levelId&searchKeywordMethod=LIKE&searchKeyword=tcct_1945_08_16&searchKeywordConjunction=AND
|
|
# &searchKeywordType=isLeaf&searchKeywordMethod=EQ&searchKeyword=1&searchKeywordConjunction=AND&position=0
|
|
# http://db.history.go.kr/item/level.do?sort=levelId&dir=ASC&start=1&limit=20&page=1&pre_page=1&setId=-1&totalCount=0&prevPage=1&prevLimit=&itemId=tcct&types=&synonym=off&chinessChar=on&brokerPagingInfo=&levelId=tcct_2000_12_29
|
|
# &searchKeywordType=levelId&searchKeywordMethod=LIKE&searchKeyword=tcct_2000_12_29&searchKeywordConjunction=AND
|
|
# &searchKeywordType=isLeaf&searchKeywordMethod=EQ&searchKeyword=1&searchKeywordConjunction=AND&position=0
|