기상청 자료 다운로드
library(devtools)
install_github('qkdrk777777/kma2')
library(kma2)
library(RSelenium)
library(stringr)
pack2(c("rvest", "httr", "stringr", "RCurl", "XML", "progress"))
setwd('Y:/data/asos')
dir='Y:/data/asos'
if(sum(list.files()%in%'kma')==0)dir.create('kma')
element=function(var,css,type='click',messages=NULL,using='css selector'){
assign(var,T,envir=.GlobalEnv)
suppressMessages({
try(silent = T,{
while(get(var,envir = .GlobalEnv)==T){
tryCatch({assign(var,remDr$findElement(using=using,css),envir = .GlobalEnv)
if(type=='click'){get(var)$clickElement()
}else if(type=='sendKeys'){
get(var)$clearElement()
get(var)$sendKeysToElement(messages)
}
},error=function(e)assign(var,T,envir = .GlobalEnv))
Sys.sleep(.5)
}
})
})
}
#다운로드 경로 설정-----
open=function(var='remDr',dir='C:/Users/OWNER/Desktop/kma',port=4447L,browser='chrome'){
setwd(dir)
if(sum(list.files()%in%'delete')==0)dir.create('delete')
eCaps <<- list(chromeOptions = list(prefs = list(profile.default_content_settings.popups = 4447L,
download.prompt_for_download = FALSE,
download.default_directory = paste0(dir,'/delete'))))
assign(var,remoteDriver(port=port,browserName=browser,extraCapabilities = eCaps),envir = .GlobalEnv)
remDr$open()}
open(port=4445L,dir='Y:/data/asos')
remDr$navigate("https://data.kma.go.kr/data/rmt/rmtList.do?code=420&pgmNo=572")
element(var='remDr2',css='a#loginBtn')
#로그인 -----
element(var='id_',css="input#loginId.input-medium",type='sendKeys',messages = list('qkdrk777777@naver.com'))
element(var='pw_',css="input#passwordNo.input-medium",type='sendKeys',messages = list('whckdwp1!@'))
element(var='login',css='//*[@id=\"loginbtn\"]',using='xpath')
# element(var='login',css='//*[@id=\"loginbtn\"]',using='xpath',type='sendKeys',messages = list(key='enter'))
###자료 타입----
type='forcast'
select_type=function(type){
asos='https://data.kma.go.kr/data/grnd/selectAsosRltmList.do?pgmNo=36'
aws='https://data.kma.go.kr/data/grnd/selectAwsRltmList.do?pgmNo=56'
#Agricultural weather observation(농업기상관측)
AWO='https://data.kma.go.kr/data/grnd/selectAgrRltmList.do?pgmNo=72'
#North Korea weather observation(북한기상관측)
nkw='https://data.kma.go.kr/data/grnd/selectNkRltmList.do?pgmNo=58'
forcast="https://data.kma.go.kr/data/rmt/rmtList.do?code=420&pgmNo=572"
remDr$navigate(get(type))}
select_type(type='asos')
select_type(type=type)
###자료 기간타입-----
timeType='hour'
select_timeType=function(timeType){
hour='F00502';day='F00501';mon='F00513';year='F00512';min='F00503'
element(var='option1',css=paste0("//*/option[@value ='",get(timeType),"']"),using='xpath')
}
# select_timeType(timeType='mon')
# select_timeType(timeType=timeType)
# select_timeType(timeType='hour')
####-----
select_priod=function(type=NULL,timeType,start=as.POSIXct('2019-05-09 18:00'),end=as.POSIXct('2019-05-10 18:00')){
if(timeType=='hour'){
#시간자료-------
##시작하는 기간
# start=as.POSIXct('2019-05-09 18:00')
element(var='st',css='input#startDt.input-medium.inline.hasDatepicker')
year=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(start)),1,4),"']"))
year[[3]]$clickElement()
# Sys.sleep(1)
# mon=remDr$findElements('xpath',value=paste0("//*/option[@value ='",0,"']"))
# mon[[1]]$clickElement()
mon=remDr$findElements('xpath',value=paste0("//*/option[@value ='",as.numeric(gsub('-','',substr(as.Date(as.character(start)),6,7)))-1,"']"))
mon[[1]]$clickElement()
# Sys.sleep(1)
days=remDr$findElements('css',value='a.ui-state-default')
days[[as.numeric(substr(as.Date(as.character(start)),9,10))]]$clickElement()
###시간
#여는 코드 없어도 되서 생략
# stTime=remDr$findElement('css selector','select#startHh.select')
# stTime$clickElement()
element(var='stTimes',using = 'xpath'
,css=paste0("//*/option[@value ='",substr(start,12,13),"']"))
##끝나는 기간
# end=as.POSIXct('2019-05-10 18:00')
element(var='ed',css='input#endDt.input-medium.inline.hasDatepicker')
year=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(end)),1,4),"']"))
year[[3]]$clickElement()
# Sys.sleep(1)
mon=remDr$findElements('xpath',value=paste0("//*/option[@value ='",as.numeric(gsub('-','',substr(as.Date(as.character(end)),6,7)))-1,"']"))
mon[[5]]$clickElement()
# Sys.sleep(1)
days=remDr$findElements('css',value='a.ui-state-default')
days[[as.numeric(substr(as.Date(end),9,10))]]$clickElement()
Sys.sleep(1)
###시간
edTimes=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(end,12,13),"']"))
edTimes[[2]]$clickElement()
}else if(timeType=='day'){
#일자료--------
##시작하는 기간
# start=as.POSIXct('2019-05-09 18:00')
element(var='st',css='input#startDt.input-medium.inline.hasDatepicker')
year=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(start)),1,4),"']"))
year[[3]]$clickElement()
mon=remDr$findElements('xpath',value=paste0("//*/option[@value ='",as.numeric(gsub('-','',substr(as.Date(as.character(start)),6,7)))-1,"']"))
mon[[1]]$clickElement()
days=remDr$findElements('css',value='a.ui-state-default')
days[[as.numeric(substr(as.Date(as.character(start)),9,10))]]$clickElement()
Sys.sleep(2)
##끝나는 기간
# end=as.POSIXct('2019-05-11 18:00')
element(var='ed',css='input#endDt.input-medium.inline.hasDatepicker')
year=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(end)),1,4),"']"))
year[[3]]$clickElement()
mon=remDr$findElements('xpath',value=paste0("//*/option[@value ='",as.numeric(gsub('-','',substr(as.Date(as.character(end)),6,7)))-1,"']"))
mon[[5]]$clickElement()
days=remDr$findElements('css',value='a.ui-state-default')
days[[as.numeric(substr(as.Date(as.character(end)),9,10))]]$clickElement()
Sys.sleep(2)
}else if(timeType=='mon'){
#월자료----------
##시작하는 기간
#start=as.POSIXct('2018-04-09 18:00')
year=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(start)),1,4),"']"))
year[[1]]$clickElement()
mon=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(start)),6,7),"']"))
if(type=='forcast'){mon[[1]]$clickElement()} else mon[[3]]$clickElement()
##끝나는 기간
#end=as.POSIXct('2019-04-11 18:00')
year=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(end)),1,4),"']"))
year[[2]]$clickElement()
mon=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(end)),6,7),"']"))
if(type=='forcast')mon[[2]]$clickElement() else mon[[4]]$clickElement()
}else if(timeType=='year'){
#년자료-------
##시작하는 기간
# start=as.POSIXct('2017-01-11 18:00')
year=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(start)),1,4),"']"))
year[[1]]$clickElement()
##끝나는 기간
# end=as.POSIXct('2015-04-11 18:00')
year=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(end)),1,4),"']"))
year[[2]]$clickElement()
}else if(timeType=='min'){
#분자료------
##시작하는 기간
# start=as.POSIXct('2017-01-11 18:00')
element(var='st',css='input#startDt.input-medium.inline.hasDatepicker')
year=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(start)),1,4),"']"))
year[[3]]$clickElement()
mon=remDr$findElements('xpath',value=paste0("//*/option[@value ='",as.numeric(gsub('-','',substr(as.Date(as.character(start)),6,7)))-1,"']"))
mon[[1]]$clickElement()
days=remDr$findElements('css',value='a.ui-state-default')
days[[as.numeric(substr(as.Date(as.character(start)),9,10))]]$clickElement()
Sys.sleep(2)
##끝나는 기간
# end=as.POSIXct('2017-05-11 18:00')
element(var='ed',css='input#endDt.input-medium.inline.hasDatepicker')
year=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(end)),1,4),"']"))
year[[3]]$clickElement()
mon=remDr$findElements('xpath',value=paste0("//*/option[@value ='",as.numeric(gsub('-','',substr(as.Date(as.character(end)),6,7)))-1,"']"))
mon[[5]]$clickElement()
days=remDr$findElements('css',value='a.ui-state-default')
days[[as.numeric(substr(as.Date(as.character(end)),9,10))]]$clickElement()
Sys.sleep(2)
}
}
start=as.POSIXct('2018-01-01 00:01')
end =as.POSIXct('2018-12-31 23:00')
# select_priod(timeType='mon',start,end,type='forcast')
select_priod(timeType='hour',start,end,type=NULL)
#변수 전체 선택 ------
select_var=function(type=NULL){
if(type=='forcast'){
element(var='variable',css='input#btnStn.selectBtn1.btn.btn-primary.VAR1_BTN')
}else{
element(var='variable',css='input#gubun.selectBtn2.btn.btn-primary')}
element(var='variable1',css='span#ztree_1_check.button.chk.checkbox_false_full')
element(var='variable_enter',css='li.btn-sitetree-complete')
}
# select_var(type='forcast')
select_var(type='asos')
select_area=function(type=NULL){
#지점 전체 선택-----
#지점별로는 나중에하고 일딴 전체 조회로 했음
##kma2 데이터에 있는 지점 인코딩 변환
# names(citydata)=iconv(names(citydata),'cp949','UTF-8')
# names(citydata2)=iconv(names(citydata2),'cp949','UTF-8')
# for(i in 1:length(citydata)){
# names(citydata[[i]])=iconv(names(citydata[[i]]),'cp949','UTF-8')}
# city_index=which(names(citydata)%in%'서울특별시')
if(type=='forcast'){
element(var='area',css='input#btnStn.selectBtn1.btn.btn-primary.VAR3_BTN')
}else {
element(var='area',css='input#btnStn1.selectBtn1.btn.btn-primary')}
element(var='area1',css='span#ztree_1_check.button.chk.checkbox_false_full')
element(var='area_enter',css='li.btn-sitetree-complete')
}
# select_area(type='forcast')
select_area(type=type)
#조회 -----
select_search=function(){
element(var='n_list',css=paste0("//*/option[@value ='",'100',"']"),using='xpath')
element(var='search',css='a.addBtn.btn-img-detail')
}
select_search()
#데이터 갯수-----
n=as.numeric(gsub('[ㄱ-힣]| ','',remDr$findElement('css selector','span.float-left')$getElementText()[[1]]))
remDr$screenshot(display = T)
# type='asos'
if(type!='forcast'){
for(pageNum in 1){
# Sys.sleep(.5)
if(!paste0(type,'_',timeType,'_',gsub('-','',gsub(' ','hour',substr(as.character(start),1,13)))
,'to',gsub('-','',gsub(' ','hour',substr(as.character(end),1,13))),'(',pageNum,').csv')%in%list.files())
message(round(pageNum/ceiling(n/100)*100,3),'%')
element(var='download',css='a.btn.btn-default')
# element(var='use',css='input#reqstPurposeCd')
element(var='use',css='input#reqstPurposeCd7')
# use<<-remDr$findElement('xpath','input#reqstPurposeCd7')
# while(length(use)==0){
# use<<-remDr$findElement('css selector','input#reqstPurposeCd7')
# }
# use$clickElement()
Sys.sleep(1)
remDr$executeScript(script="fnRltmRequest();",args=1:2)
while(length(list.files(paste0(dir,'/delete'),pattern = 'csv$'))==0){
Sys.sleep(1)}
fileName=list.files('delete',pattern='csv$')
copy=file.copy(
paste0(dir,'/delete/',
list.files('delete',pattern='csv$')),
paste0(dir,'/',
list.files('delete',pattern='csv$')))
if(copy==T){
remove=file.remove(paste0(dir,'/delete/',
list.files('delete',pattern='csv$')))
if(remove!=T){break}
}else break
file.rename(fileName,
paste0(type,'_',timeType,'_',gsub('-','',gsub(' ','hour',substr(as.character(start),1,13)))
,'to',gsub('-','',gsub(' ','hour',substr(as.character(end),1,13))),'(',pageNum,').csv'))
remDr$executeScript(script = paste0('goPage(',pageNum,'); return false;'),args=1:2)
Sys.sleep(2)
}
}
if(type=='forcast'){
k=0
kk=0
while(T){
list=data.frame(na.omit(readHTMLTable(remDr$getPageSource()[[1]])[[2]][,1:3]))
list2=paste0(gsub(' |/|,|-|>','', paste0(list[,1],list[,2],list[,3])),'.csv')
down = remDr$findElements(using = "css selector",
value = "input.btn.btn-default.DATA_DOWN_BTN")
for(i in 1:length(down)){
kk=kk+1
message(round(kk/n*100,3),'%')
try(silent = T,{
down[[i]]$clickElement()
})
if(i==1){Sys.sleep(3)
try(silent = T,{
a=remDr$findElements('css selector','ul.check-list input')
a[[8]]$clickElement()
close2 = remDr$findElements(using = "css selector",
value = "input.btn.btn-primary")
close2[[4]]$clickElement()
})
}
while(length(list.files('delete'))==0){
suppressMessages({
try(silent = T,{
try(silent = T,{
error=remDr$findElement('css selector','button.buttonOK')
error$clickElement()
})
down <<- remDr$findElements(using = "css selector",
value = "input.btn.btn-default.DATA_DOWN_BTN")
down[[i]]$clickElement()
if(length(list.files('delete'))==1){
file.remove(paste0(dir,'/delete/',
list.files('delete',pattern='csv$')))
}
})})
}
Sys.sleep(1)
fileName=list.files('delete',pattern='csv$')
file.copy(paste0(dir,'/delete/',list.files('delete',pattern='csv$')),
paste0(dir,'/',list.files('delete',pattern='csv$')))
file.remove(paste0(dir,'/delete/',
list.files('delete',pattern='csv$')))
file.rename(fileName,list2[i])
}
k=k+1
element('page',using='class name','next_page')
if(k>ceiling(n/100))break
}
}
'R > crawling' 카테고리의 다른 글
terminal code R을 이용해 실행하기 (0) | 2019.07.31 |
---|---|
네이버 실시간 검색어 크롤링하기 (0) | 2019.07.04 |
XML package를 활용한 정적 크롤링 (0) | 2019.04.15 |
PlotGoogleMaps 사용해 AWS, ASOS 위치 나타내기 (0) | 2019.04.11 |
크롤링을 활용한 미세먼지 실시간 시각화하기 (0) | 2019.03.21 |