data analysis & visualization

GET 과 POST(rvest)

R2019. 6. 10. 12:27
RPubs - GET, POST


'R' 카테고리의 다른 글

자신에게 맞게 R 최적화를 해보자.  (0) 2019.06.21
R Cloud 사용하기  (0) 2019.06.21
텍스트마이닝(사회관계망, 워드클라우드)  (0) 2019.06.06
leaflet 지도그리기  (0) 2019.04.09
객체지향언어란  (0) 2019.04.08

RPubs - text maining(dynamic SNA,wordcloud) tm+KoNLP


'R' 카테고리의 다른 글

R Cloud 사용하기  (0) 2019.06.21
GET 과 POST(rvest)  (0) 2019.06.10
leaflet 지도그리기  (0) 2019.04.09
객체지향언어란  (0) 2019.04.08
이미지 벡터화해서 저장하기  (0) 2019.03.21


expand.grid(rep(list,n)) : 각각의 list에 대해 모든 조합을 생성


예제


com=function(kb_station,cn){

data=as.matrix(expand.grid(rep(list(kb_station),cn)))

for(i in 1:(ncol(data)-1)){

  data=data[data[,i]>data[,i+1],]}

return(data)}


cn=2

kb_station=1:11

temp=com(kb_station,cn)

temp=cbind(temp,NA)



com=function(kb_station,cn){

n=0;ls=list()

while(T){

  n=n+1

  ls[[cn-n+1]]=kb_station

  kb_station=kb_station[-1]

  if(n==cn)break

}

data=as.matrix(expand.grid(ls))

for(i in 1:(ncol(data)-1)){

  data=data[data[,i]>data[,i+1],]}

return(data)}

'R > handling' 카테고리의 다른 글

scaling  (0) 2019.04.16
reshape  (0) 2019.04.14
plyr 패키지를 통한 핸들링  (0) 2019.04.12
파일 불러오기  (0) 2019.04.08

library(devtools)

install_github('qkdrk777777/kma2')

library(kma2)

library(RSelenium)

library(stringr)


pack2(c("rvest", "httr", "stringr", "RCurl", "XML", "progress"))


setwd('Y:/data/asos')

dir='Y:/data/asos'

if(sum(list.files()%in%'kma')==0)dir.create('kma')


element=function(var,css,type='click',messages=NULL,using='css selector'){

  assign(var,T,envir=.GlobalEnv)

  suppressMessages({

    try(silent = T,{

      while(get(var,envir = .GlobalEnv)==T){

        tryCatch({assign(var,remDr$findElement(using=using,css),envir = .GlobalEnv)

          if(type=='click'){get(var)$clickElement()

          }else if(type=='sendKeys'){

            get(var)$clearElement()

            get(var)$sendKeysToElement(messages)

          }

        },error=function(e)assign(var,T,envir = .GlobalEnv))

        Sys.sleep(.5)

      }

    })

  })

}



#다운로드 경로 설정-----


open=function(var='remDr',dir='C:/Users/OWNER/Desktop/kma',port=4447L,browser='chrome'){

  setwd(dir)

  if(sum(list.files()%in%'delete')==0)dir.create('delete')

  eCaps <<- list(chromeOptions = list(prefs = list(profile.default_content_settings.popups = 4447L, 

                                                   download.prompt_for_download = FALSE,

                                                   download.default_directory = paste0(dir,'/delete'))))

  assign(var,remoteDriver(port=port,browserName=browser,extraCapabilities = eCaps),envir = .GlobalEnv)

  remDr$open()}

open(port=4445L,dir='Y:/data/asos')


remDr$navigate("https://data.kma.go.kr/data/rmt/rmtList.do?code=420&pgmNo=572")

element(var='remDr2',css='a#loginBtn')



#로그인 -----

element(var='id_',css="input#loginId.input-medium",type='sendKeys',messages = list('qkdrk777777@naver.com'))

element(var='pw_',css="input#passwordNo.input-medium",type='sendKeys',messages = list('whckdwp1!@'))

element(var='login',css='//*[@id=\"loginbtn\"]',using='xpath')

# element(var='login',css='//*[@id=\"loginbtn\"]',using='xpath',type='sendKeys',messages = list(key='enter'))


###자료 타입----

type='forcast'

select_type=function(type){

  asos='https://data.kma.go.kr/data/grnd/selectAsosRltmList.do?pgmNo=36'

  aws='https://data.kma.go.kr/data/grnd/selectAwsRltmList.do?pgmNo=56'

  #Agricultural weather observation(농업기상관측)

  AWO='https://data.kma.go.kr/data/grnd/selectAgrRltmList.do?pgmNo=72'

  #North Korea weather observation(북한기상관측)

  nkw='https://data.kma.go.kr/data/grnd/selectNkRltmList.do?pgmNo=58'

  forcast="https://data.kma.go.kr/data/rmt/rmtList.do?code=420&pgmNo=572"

  remDr$navigate(get(type))}

select_type(type='asos')

select_type(type=type)


###자료 기간타입-----

timeType='hour'

select_timeType=function(timeType){

  hour='F00502';day='F00501';mon='F00513';year='F00512';min='F00503'

  element(var='option1',css=paste0("//*/option[@value ='",get(timeType),"']"),using='xpath')

}

# select_timeType(timeType='mon')

# select_timeType(timeType=timeType)

# select_timeType(timeType='hour')

####-----

select_priod=function(type=NULL,timeType,start=as.POSIXct('2019-05-09 18:00'),end=as.POSIXct('2019-05-10 18:00')){

  

  if(timeType=='hour'){

    #시간자료-------

    ##시작하는 기간

    # start=as.POSIXct('2019-05-09 18:00')

    element(var='st',css='input#startDt.input-medium.inline.hasDatepicker')

    year=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(start)),1,4),"']"))

    year[[3]]$clickElement()

    # Sys.sleep(1)

    # mon=remDr$findElements('xpath',value=paste0("//*/option[@value ='",0,"']"))

    # mon[[1]]$clickElement()

    mon=remDr$findElements('xpath',value=paste0("//*/option[@value ='",as.numeric(gsub('-','',substr(as.Date(as.character(start)),6,7)))-1,"']"))

    mon[[1]]$clickElement()

    # Sys.sleep(1)

    days=remDr$findElements('css',value='a.ui-state-default')

    days[[as.numeric(substr(as.Date(as.character(start)),9,10))]]$clickElement()

    

    

    ###시간

    #여는 코드 없어도 되서 생략

    # stTime=remDr$findElement('css selector','select#startHh.select')

    # stTime$clickElement()

    element(var='stTimes',using = 'xpath'

            ,css=paste0("//*/option[@value ='",substr(start,12,13),"']"))

    

    ##끝나는 기간

    # end=as.POSIXct('2019-05-10 18:00')

    element(var='ed',css='input#endDt.input-medium.inline.hasDatepicker')

    year=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(end)),1,4),"']"))

    year[[3]]$clickElement()

    # Sys.sleep(1)

    mon=remDr$findElements('xpath',value=paste0("//*/option[@value ='",as.numeric(gsub('-','',substr(as.Date(as.character(end)),6,7)))-1,"']"))

    mon[[5]]$clickElement()

    # Sys.sleep(1)

    days=remDr$findElements('css',value='a.ui-state-default')

    days[[as.numeric(substr(as.Date(end),9,10))]]$clickElement()

    Sys.sleep(1)

    

    

    ###시간

    edTimes=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(end,12,13),"']"))

    edTimes[[2]]$clickElement()

    

  }else if(timeType=='day'){

    #일자료--------

    ##시작하는 기간

    # start=as.POSIXct('2019-05-09 18:00')

    element(var='st',css='input#startDt.input-medium.inline.hasDatepicker')

    year=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(start)),1,4),"']"))

    year[[3]]$clickElement()

    mon=remDr$findElements('xpath',value=paste0("//*/option[@value ='",as.numeric(gsub('-','',substr(as.Date(as.character(start)),6,7)))-1,"']"))

    mon[[1]]$clickElement()

    days=remDr$findElements('css',value='a.ui-state-default')

    days[[as.numeric(substr(as.Date(as.character(start)),9,10))]]$clickElement()

    Sys.sleep(2)

    

    

    ##끝나는 기간

    # end=as.POSIXct('2019-05-11 18:00')

    element(var='ed',css='input#endDt.input-medium.inline.hasDatepicker')

    year=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(end)),1,4),"']"))

    year[[3]]$clickElement()

    mon=remDr$findElements('xpath',value=paste0("//*/option[@value ='",as.numeric(gsub('-','',substr(as.Date(as.character(end)),6,7)))-1,"']"))

    mon[[5]]$clickElement()

    days=remDr$findElements('css',value='a.ui-state-default')

    days[[as.numeric(substr(as.Date(as.character(end)),9,10))]]$clickElement()

    Sys.sleep(2)

    

  }else if(timeType=='mon'){

    #월자료----------

    ##시작하는 기간

    #start=as.POSIXct('2018-04-09 18:00')

    year=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(start)),1,4),"']"))

    year[[1]]$clickElement()

    mon=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(start)),6,7),"']"))

    

    if(type=='forcast'){mon[[1]]$clickElement()} else  mon[[3]]$clickElement()

    

    ##끝나는 기간

    #end=as.POSIXct('2019-04-11 18:00')

    year=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(end)),1,4),"']"))

    year[[2]]$clickElement()

    

    mon=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(end)),6,7),"']"))

    if(type=='forcast')mon[[2]]$clickElement() else   mon[[4]]$clickElement()  

  }else if(timeType=='year'){

    #년자료-------

    ##시작하는 기간

    # start=as.POSIXct('2017-01-11 18:00')

    year=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(start)),1,4),"']"))

    year[[1]]$clickElement()

    ##끝나는 기간

    # end=as.POSIXct('2015-04-11 18:00')

    year=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(end)),1,4),"']"))

    year[[2]]$clickElement()

    

  }else if(timeType=='min'){

    #분자료------

    ##시작하는 기간

    # start=as.POSIXct('2017-01-11 18:00')

    element(var='st',css='input#startDt.input-medium.inline.hasDatepicker')

    year=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(start)),1,4),"']"))

    year[[3]]$clickElement()

    mon=remDr$findElements('xpath',value=paste0("//*/option[@value ='",as.numeric(gsub('-','',substr(as.Date(as.character(start)),6,7)))-1,"']"))

    mon[[1]]$clickElement()

    days=remDr$findElements('css',value='a.ui-state-default')

    days[[as.numeric(substr(as.Date(as.character(start)),9,10))]]$clickElement()

    Sys.sleep(2)

    

    ##끝나는 기간

    # end=as.POSIXct('2017-05-11 18:00')

    element(var='ed',css='input#endDt.input-medium.inline.hasDatepicker')

    year=remDr$findElements('xpath',value=paste0("//*/option[@value ='",substr(as.Date(as.character(end)),1,4),"']"))

    year[[3]]$clickElement()

    mon=remDr$findElements('xpath',value=paste0("//*/option[@value ='",as.numeric(gsub('-','',substr(as.Date(as.character(end)),6,7)))-1,"']"))

    mon[[5]]$clickElement()

    days=remDr$findElements('css',value='a.ui-state-default')

    days[[as.numeric(substr(as.Date(as.character(end)),9,10))]]$clickElement()

    Sys.sleep(2)

    

  }

}


start=as.POSIXct('2018-01-01 00:01')

end  =as.POSIXct('2018-12-31 23:00')


# select_priod(timeType='mon',start,end,type='forcast')

select_priod(timeType='hour',start,end,type=NULL)

#변수 전체 선택 ------

select_var=function(type=NULL){

  if(type=='forcast'){

    element(var='variable',css='input#btnStn.selectBtn1.btn.btn-primary.VAR1_BTN')

  }else{

    element(var='variable',css='input#gubun.selectBtn2.btn.btn-primary')}

  

  element(var='variable1',css='span#ztree_1_check.button.chk.checkbox_false_full')

  element(var='variable_enter',css='li.btn-sitetree-complete')

}

# select_var(type='forcast')

select_var(type='asos')

select_area=function(type=NULL){

  #지점 전체 선택-----

  #지점별로는 나중에하고 일딴 전체 조회로 했음

  ##kma2 데이터에 있는 지점 인코딩 변환

  # names(citydata)=iconv(names(citydata),'cp949','UTF-8')

  # names(citydata2)=iconv(names(citydata2),'cp949','UTF-8')

  # for(i in 1:length(citydata)){

  #   names(citydata[[i]])=iconv(names(citydata[[i]]),'cp949','UTF-8')}

  # city_index=which(names(citydata)%in%'서울특별시')

  if(type=='forcast'){

    element(var='area',css='input#btnStn.selectBtn1.btn.btn-primary.VAR3_BTN')

  }else {

    element(var='area',css='input#btnStn1.selectBtn1.btn.btn-primary')}

  element(var='area1',css='span#ztree_1_check.button.chk.checkbox_false_full')

  element(var='area_enter',css='li.btn-sitetree-complete')

}

# select_area(type='forcast')

select_area(type=type)

#조회 -----

select_search=function(){

  element(var='n_list',css=paste0("//*/option[@value ='",'100',"']"),using='xpath')

  element(var='search',css='a.addBtn.btn-img-detail')

}

select_search()

#데이터 갯수-----

n=as.numeric(gsub('[ㄱ-힣]| ','',remDr$findElement('css selector','span.float-left')$getElementText()[[1]]))

remDr$screenshot(display = T)

# type='asos'

if(type!='forcast'){

  for(pageNum in 1){

    # Sys.sleep(.5)

    if(!paste0(type,'_',timeType,'_',gsub('-','',gsub(' ','hour',substr(as.character(start),1,13)))

               ,'to',gsub('-','',gsub(' ','hour',substr(as.character(end),1,13))),'(',pageNum,').csv')%in%list.files())

      message(round(pageNum/ceiling(n/100)*100,3),'%')  

    element(var='download',css='a.btn.btn-default')

    # element(var='use',css='input#reqstPurposeCd')

    element(var='use',css='input#reqstPurposeCd7')

    

    # use<<-remDr$findElement('xpath','input#reqstPurposeCd7')

    # while(length(use)==0){

    # use<<-remDr$findElement('css selector','input#reqstPurposeCd7')

    # }

    # use$clickElement()

    Sys.sleep(1)

    remDr$executeScript(script="fnRltmRequest();",args=1:2)

    while(length(list.files(paste0(dir,'/delete'),pattern = 'csv$'))==0){

      Sys.sleep(1)}

    fileName=list.files('delete',pattern='csv$')

    copy=file.copy(

      paste0(dir,'/delete/',

             list.files('delete',pattern='csv$')),

      paste0(dir,'/',

             list.files('delete',pattern='csv$')))

    if(copy==T){

      remove=file.remove(paste0(dir,'/delete/',

                                list.files('delete',pattern='csv$')))

      if(remove!=T){break}

    }else break

    file.rename(fileName,

                paste0(type,'_',timeType,'_',gsub('-','',gsub(' ','hour',substr(as.character(start),1,13)))

                       ,'to',gsub('-','',gsub(' ','hour',substr(as.character(end),1,13))),'(',pageNum,').csv'))

    

    remDr$executeScript(script = paste0('goPage(',pageNum,'); return false;'),args=1:2)

    Sys.sleep(2)

  }

}


if(type=='forcast'){

  k=0

  kk=0

  while(T){

    list=data.frame(na.omit(readHTMLTable(remDr$getPageSource()[[1]])[[2]][,1:3]))

    list2=paste0(gsub(' |/|,|-|>','', paste0(list[,1],list[,2],list[,3])),'.csv')

    down = remDr$findElements(using = "css selector", 

                              value = "input.btn.btn-default.DATA_DOWN_BTN")

    for(i in 1:length(down)){

      kk=kk+1

      message(round(kk/n*100,3),'%')

      try(silent = T,{

        down[[i]]$clickElement()

      })    

      if(i==1){Sys.sleep(3)

        try(silent = T,{

          a=remDr$findElements('css selector','ul.check-list input')

          a[[8]]$clickElement()

          close2 = remDr$findElements(using = "css selector", 

                                      value = "input.btn.btn-primary")

          close2[[4]]$clickElement()

        })

      }

      

      while(length(list.files('delete'))==0){

        suppressMessages({

          try(silent = T,{

            try(silent = T,{

              error=remDr$findElement('css selector','button.buttonOK')

              error$clickElement()

            })

            

            down <<- remDr$findElements(using = "css selector", 

                                        value = "input.btn.btn-default.DATA_DOWN_BTN")

            down[[i]]$clickElement()

            if(length(list.files('delete'))==1){

              file.remove(paste0(dir,'/delete/',

                                 list.files('delete',pattern='csv$')))        

            }

          })})

      }

      Sys.sleep(1)

      fileName=list.files('delete',pattern='csv$')

      file.copy(paste0(dir,'/delete/',list.files('delete',pattern='csv$')),

                paste0(dir,'/',list.files('delete',pattern='csv$')))

      file.remove(paste0(dir,'/delete/',

                         list.files('delete',pattern='csv$')))

      file.rename(fileName,list2[i])

      

    }

    k=k+1

    element('page',using='class name','next_page')

    if(k>ceiling(n/100))break

  }

  

}


scaling

R/handling2019. 4. 16. 05:37
RPubs - 표준화


'R > handling' 카테고리의 다른 글

expand.grid를 복수로 생성할 때  (0) 2019.05.25
reshape  (0) 2019.04.14
plyr 패키지를 통한 핸들링  (0) 2019.04.12
파일 불러오기  (0) 2019.04.08

RPubs - url을 통한 크롤링(정적 크롤링만 가능)