Melanjutkan postingan saya sebelumnya tentang mengunduh data historis, postingan ini menunjukkan bagaimana data yang diunduh sebelumnya dapat diperbarui dan ditambahkan dengan data baru yang lebih terkini tanpa harus mengunduh ulang semua data lama dari awal lagi.
Fungsi utama untuk melakukan ini, HisPricesDates, mengunduh data antara tanggal yang diberikan sebagai input fungsi dan ditunjukkan di bawah ini.
HisPricesDates = function( Granularity, DayAlign, TimeAlign, AccountToken, Instrument, Start, End ){
% a typical Oanda API call might look like
% https://api-fxtrade.oanda.com/v1/candles?instrument=EUR_USD&granularity=D&start=2014-03-21&end=2014-04-21&candleFormat=midpoint&includeFirst=false
% which is slowly built up by using the R paste function, commented at end of each line below
httpaccount = "https://api-fxtrade.oanda.com"
auth = c(Authorization = paste("Bearer",AccountToken,sep=" "))
QueryHistPrec = paste(httpaccount,"/v1/candles?instrument=",sep="") % https://api-fxtrade.oanda.com/v1/candles?instrument=
QueryHistPrec1 = paste(QueryHistPrec,Instrument,sep="") % https://api-fxtrade.oanda.com/v1/candles?instrument=EUR_USD
qstart = paste("start=",Start,sep="") % start=2014-03-21
qend = paste("end=",End,sep="") % end=2014-04-21
qcandleFormat = "candleFormat=midpoint" % candleFormat=midpoint
qgranularity = paste("granularity=",Granularity,sep="") % granularity=D
qdailyalignment = paste("dailyAlignment=",DayAlign,sep="") % dailyAlignment=0
qincludeFirst = "includeFirst=false" % includeFirst=false
QueryHistPrec2 = paste(QueryHistPrec1,qgranularity,qstart,qend,qcandleFormat,qincludeFirst,qdailyalignment,sep="&")
InstHistP = getURL(QueryHistPrec2,cainfo=system.file("CurlSSL","cacert.pem",package="RCurl"),httpheader=auth)
InstHistPjson = fromJSON(InstHistP, simplifyDataFrame = TRUE)
Prices = data.frame(InstHistPjson[[3]])
Prices$time = paste(substr(Prices$time,1,10),substr(Prices$time,12,19), sep=" ")
colnames(Prices) = c("TimeStamp","Open","High","Low","Close","TickVolume","Complete")
Prices$TimeStamp = as.POSIXct(strptime(Prices$TimeStamp, "%Y-%m-%d %H:%M:%OS"),origin="1970-01-01",tz = "UTC")
attributes(Prices$TimeStamp)$tzone = TimeAlign
return(Prices)
}
Fungsi ini dipanggil oleh dua orang R skrip, satu untuk mengunduh data harian dan satu untuk data intraday.
Skrip pembaruan harian, yang ditunjukkan berikutnya,
% cd to the daily data directory
setwd("~/Documents/octave/oanda_data/daily")
all_current_historical_data_list = read.table("instrument_daily_update_file",header=FALSE,sep="",colClasses=c("character","Date","numeric") )
for( ii in 1 : nrow( all_current_historical_data_list ) ) {
instrument = all_current_historical_data_list[ ii , 1 ]
% read second column of dates in all_current_historical_data_list as a date index
date_ix = as.Date( all_current_historical_data_list[ ii , 2 ] )
todays_date = as.Date( Sys.time() )
% download the missing historical data from date_ix to todays_date, if and only if, date_ix != todays_date
if( date_ix + 1 != todays_date ) {
new_historical_data = HisPricesDates( Granularity = "D", DayAlign, TimeAlign, AccountToken, instrument,
date_ix , todays_date )
% the new_historical_data might only try to add incomplete OHLC data, in which case do not actually
% want to update, so only update if we will be adding new, complete OHLC information
if ( nrow( new_historical_data ) >= 2 & new_historical_data[ 2 , 7 ] == TRUE ) {
% now do some data manipulation
% expect date of last line in Instrument_update_file == date of first line in new_historical_data
if ( date_ix == as.Date( new_historical_data[ 1 , 1 ] ) ) { % this is the case if true
new_historical_data = new_historical_data[ -1 , ] % so delete first row of new_historical_data
}
% similarly, expect last line of new_historical_data to be an incomplete OHLC bar
if ( new_historical_data[ nrow( new_historical_data) , 7 ] == FALSE) { % if so,
new_historical_data = new_historical_data[ -nrow( new_historical_data) , ] % delete this last line
}
% append new_historical_data to the relevant raw data file
write.table( new_historical_data , file = paste( instrument , "raw_OHLC_daily" , sep = "_" ) , row.names = FALSE , na = "" ,
col.names = FALSE , sep = "," , append = TRUE )
added_data_length = nrow( new_historical_data )
new_last_date = as.Date( new_historical_data[ added_data_length , 1 ] )
% and amend Instrument_update file with lastest update information
all_current_historical_data_list[ ii , 2 ] = new_last_date
all_current_historical_data_list[ ii , 3 ] = all_current_historical_data_list[ ii , 3 ] + added_data_length
} % end of download if statement
} % end of ( date_ix != todays_date ) if statement
} % end of for all_current_historical_data_list loop
% Write updated Instrument_update_file to file
write.table( all_current_historical_data_list , file = "instrument_daily_update_file" , row.names = FALSE , col.names = FALSE , na = "" )
memiliki pernyataan if sebagai struktur kontrol untuk memeriksa apakah ada kemungkinan data harian baru yang benar-benar dapat diunduh. Hal ini dilakukan dengan memeriksa tanggal last_update yang terdapat dalam “instrument_daily_update_file” dan membandingkannya dengan waktu sistem OS saat ini. Jika ada kemungkinan data baru, skrip akan berjalan dan kemudian memperbarui “instrument_daily_update_file” ini. Jika tidak, skrip akan keluar tanpa melakukan apa pun.
Skrip pembaruan intraday tidak memiliki pemeriksaan yang dimiliki skrip harian karena saya berasumsi akan selalu ada beberapa data intraday baru yang tersedia untuk diunduh. Dalam kasus ini, tanggal last_update dibaca dari “instrument_update_file” semata-mata untuk bertindak sebagai input ke fungsi HisPricesDates di atas. Akibatnya, skrip ini melibatkan beberapa manipulasi data untuk memastikan bahwa data duplikat tidak dicetak ke dalam file. Skrip ini ditampilkan berikutnya dan diberi banyak komentar untuk menjelaskan apa yang terjadi.
% cd to the hourly data directory
setwd("~/Documents/octave/oanda_data")
all_current_historical_data_list = read.table("instrument_hourly_update_file",header=FALSE,sep="",colClasses=c("character","Date","numeric") )
for( ii in 1 : nrow( all_current_historical_data_list ) ) {
instrument = all_current_historical_data_list[ ii , 1 ]
% read second column of dates in all_current_historical_data_list as a date index
date_ix = as.Date( all_current_historical_data_list[ ii , 2 ] )
todays_date = as.Date( Sys.time() )
% download the missing historical data from date_ix to todays_date. If date_ix == todays_date, will download all
% hourly bars for today only.
new_historical_data = HisPricesDates( Granularity = "H1", DayAlign, TimeAlign, AccountToken, instrument,
date_ix , todays_date + 1 )
% the new_historical_data will almost certainly have incomplete hourly OHLC data in its last line,
% so delete this incomplete OHLC information
if ( new_historical_data[ nrow( new_historical_data ) , 7 ] == FALSE ) {
new_historical_data = new_historical_data[ -nrow( new_historical_data ) , ]
}
% read the last line only of the current OHLC file for this instrument
file = paste( instrument , "raw_OHLC_hourly" , sep = "_" ) % get the filename
system_command = paste( "tail -1" , file , sep = " " ) % create a unix system command to read the last line of this file
% read the file's last line
old_historical_data = read.csv( textConnection( system( system_command , intern = TRUE ) ) , header = FALSE , sep = "," ,
stringsAsFactors = FALSE )
old_historical_data_end_date_time = old_historical_data[ 1 , 1 ] % get the date value to be matched
new_historical_data_date_times = as.character( new_historical_data[ , 1 ] ) % vector to search for the above date value
ix = charmatch( old_historical_data_end_date_time , new_historical_data_date_times ) % get the matching index value
% delete that part of new_historical_data which is already contained in filename
new_historical_data = new_historical_data[ -( 1 : ix ) , ]
% append new_historical_data to the relevant raw data file
write.table( new_historical_data , file = paste( instrument , "raw_OHLC_hourly" , sep = "_" ) , row.names = FALSE , na = "" ,
col.names = FALSE , sep = "," , append = TRUE )
added_data_length = nrow( new_historical_data ) % length of added new data
new_last_date = as.Date( new_historical_data[ added_data_length , 1 ] ) % date of last update
% and amend Instrument_update file with lastest update information
all_current_historical_data_list[ ii , 2 ] = new_last_date
all_current_historical_data_list[ ii , 3 ] = all_current_historical_data_list[ ii , 3 ] + added_data_length
} % end of for all_current_historical_data_list loop
% finally, write updated Instrument_update_file to file
write.table( all_current_historical_data_list , file = "instrument_hourly_update_file" , row.names = FALSE , col.names = FALSE , na = "" )
Ada satu hal penting yang perlu diperhatikan pada baris 29 hingga 33, yaitu bahwa bagian kode ini bergantung pada Unix perintah based, yang berarti bahwa perintah ini hampir pasti tidak akan berfungsi pada OS berbasis Windows. Pengguna Windows harus mencari cara sendiri untuk memuat baris terakhir dari berkas yang relevan, atau memuat seluruh berkas data historis dan mengindeks baris terakhir saja.