quantmod::getSymbols
). Also, the
data behind Yahoo Finance’s source code also had a big
change, and many of the parsing methods I designed may not compatible
either. I decided to build a new version to fix all the problems and
added some new features to the dashboard.user_inputs
|-- symbol
| |-- default_symbol
| |-- customized_symbol
|-- name
|-- market_price
|-- market_change
|-- market_change_percent
|-- currency
|-- time_period
| |-- default_time_period
| |-- customized_time_period
|-- frequency
|-- dataframe_columns
|-- visualization_type
exchange_index
|-- default_index
|-- customized_index
default_index:
- ^DJI
- ^GSPC
- ^IXIC
- ^RUT
- ^FTSE
- ^N225
- 000001.SS
- 399106.SZ
- ^TNX
- BTC-USD
customized_index:
- C (default)
read_source_code = function(symbol) {
web_link = paste0("https://finance.yahoo.com/quote/", symbol)
source_code = read_lines(web_link)
return(source_code)
}
extract_main_info_line = function(source_code) {
h1_class_locations = which(source_code %>%
str_detect(pattern = "h1 class") %>%
equals(TRUE))
### "h1 class" not in source code caused by invalid input exchange code
if (h1_class_locations %>% length() %>% equals(0)) {
# cat("h1 class has no location, the exchange code might be invalid")
return(NA)
}
### pick the first "h1 class" line
if (h1_class_locations %>% length() %>% equals(1)) {
main_info_line_index = h1_class_locations
} else {
main_info_line_index = h1_class_locations %>% extract(1)
# cat("h1 class has multiple locations")
}
main_info_line = source_code %>% extract(main_info_line_index)
return(main_info_line)
}
h_clean_name = function(raw_name) {
if (raw_name %>% str_detect(pattern = "&")) {
cleaned_name = raw_name %>%
str_replace(pattern = "&", replacement = "&")
} else {
cleaned_name = raw_name
}
return(cleaned_name)
}
extract_name = function(main_info_line) {
### main_info_line is NA caused by invalid input exchange code
if (main_info_line %>% is.na()) {
return(NA)
}
name = main_info_line %>%
str_split(pattern = "<h1 class") %>% extract2(1) %>%
extract(2) %>%
str_split(pattern = "</h1>") %>% extract2(1) %>%
extract(1) %>%
str_split(pattern = ">") %>% extract2(1) %>%
extract(2) %>%
h_clean_name()
return(name)
}
extract_market_price = function(main_info_line) {
### main_info_line is NA caused by invalid input exchange code
if (main_info_line %>% is.na()) {
return(NA)
}
market_price = main_info_line %>%
str_split(pattern = "regularMarketPrice") %>% extract2(1) %>%
extract(2) %>%
str_split(pattern = "</fin-streamer") %>% extract2(1) %>%
extract(1) %>%
str_split(pattern = ">") %>% extract2(1) %>%
extract(2)
return(market_price)
}
extract_market_change = function(main_info_line) {
### main_info_line is NA caused by invalid input exchange code
if (main_info_line %>% is.na()) {
return(NA)
}
market_change = main_info_line %>%
str_split(pattern = "regularMarketChange") %>% extract2(1) %>%
extract(2) %>%
str_split(pattern = "</fin-streamer") %>% extract2(1) %>%
extract(1) %>%
str_split(pattern = ">") %>% extract2(1) %>%
extract(3) %>%
str_split(pattern = "<") %>% extract2(1) %>%
extract(1)
return(market_change)
}
extract_market_change_percent = function(main_info_line) {
### main_info_line is NA caused by invalid input exchange code
if (main_info_line %>% is.na()) {
return(NA)
}
market_change_percent = main_info_line %>%
str_split(pattern = "regularMarketChangePercent") %>% extract2(1) %>%
extract(2) %>%
str_split(pattern = "</fin-streamer") %>% extract2(1) %>%
extract(1) %>%
str_split(pattern = ">") %>% extract2(1) %>%
extract(3) %>%
str_split(pattern = "<") %>% extract2(1) %>%
extract(1)
# %>%
# str_split(pattern = "\\(") %>% extract2(1) %>%
# extract(2) %>%
# str_split(pattern = "\\)") %>% extract2(1) %>%
# extract(1)
return(market_change_percent)
}
extract_currency = function(main_info_line) {
### main_info_line is NA caused by invalid input exchange code
if (main_info_line %>% is.na()) {
return(NA)
}
currency = main_info_line %>%
str_split(pattern = "Currency in ") %>% extract2(1) %>%
extract(2) %>%
str_split(pattern = "<") %>% extract2(1) %>%
extract(1)
return(currency)
}
h_clean_df_colname = function(col_name) {
cleaned_col_name = col_name %>%
str_split(pattern = "\\.") %>% extract2(1)
cleaned_col_name = cleaned_col_name %>%
extract(cleaned_col_name %>% length())
return(cleaned_col_name)
}
extract_data = function(symbol, from_date = today() - years(1), to_date = today(), frequency = "daily") {
df = getSymbols(Symbols = symbol,
from = from_date,
to = to_date,
periodicity = frequency,
return.class = "xts",
auto.assign = FALSE) %>%
as.data.frame()
colnames(df) = colnames(df) %>% sapply(h_clean_df_colname)
df = df %>%
mutate(Date = df %>% rownames()) %>%
mutate(Date = Date %>% as.Date(),
Open = Open %>% round(digits = 2),
High = High %>% round(digits = 2),
Low = Low %>% round(digits = 2),
Close = Close %>% round(digits = 2),
Adjusted = Adjusted %>% round(digits = 2)) %>%
arrange(Date %>% desc())
rownames(df) = NULL
df = df %>%
select(Date, Open, High, Low, Close, Adjusted, Volume) %>%
rename(`Adjusted Close` = Adjusted)
return(df)
}
test_symbols = c("^DJI",
"^GSPC",
"^IXIC",
"^RUT",
"^FTSE",
"^N225",
"000001.SS",
"399001.SZ",
"^TNX",
"IVV")
# test_symbols = test_symbols[1:3]
test_source_codes = test_symbols %>% lapply(read_source_code)
test_main_info_lines = test_source_codes %>% lapply(extract_main_info_line)
test_names = test_main_info_lines %>% sapply(extract_name)
test_market_prices = test_main_info_lines %>% sapply(extract_market_price)
test_market_changes = test_main_info_lines %>% sapply(extract_market_change)
test_market_change_percents = test_main_info_lines %>% sapply(extract_market_change_percent)
test_currencies = test_main_info_lines %>% sapply(extract_currency)
df = tibble(`Name` = test_names,
`Market Price` = test_market_prices,
`Market Change` = test_market_changes,
`Market Change Percent` = test_market_change_percents,
`Currency` = test_currencies)
df
# test_symbols[1]
df2 = test_symbols[8] %>% extract_data()
# df2 = getSymbols(Symbols = test_symbols[7],
# from = "2022-01-01",
# to = "2022-08-01",
# periodicity = "daily",
# return.class = "xts",
# auto.assign = FALSE)
df2
p1 = ggplot(data = df2,
mapping = aes(x = Date, y = `Adjusted Close`)) +
labs(y = "Adjusted Close") +
geom_line(color = "DodgerBlue", size = 1.2)
p1
p2 = plot_ly(data = df2,
x = ~Date,
width = 800,
height = 480) %>%
add_trace(y = ~`Adjusted Close`,
type = "scatter",
mode = "lines",
name = "Adjusted Close",
line = list(color = "Orange",
width = 2)
) %>%
add_trace(y = ~Open,
type = "scatter",
mode = "lines",
name = "Open",
visible = "legendonly",
line = list(color = "LimeGreen",
width = 2,
dash = "dash")
) %>%
add_trace(y = ~High,
type = "scatter",
mode = "lines",
name = "High",
line = list(color = "DodgerBlue",
width = 1,
dash = "dot")
) %>%
add_trace(y = ~Low,
type = "scatter",
mode = "lines",
name = "Low",
line = list(color = "Crimson",
width = 1,
dash = "dot")
) %>%
layout(font = list(family = "Helvetica",
size = 12,
color = "Black"),
title = paste("From",
min(df2$Date),
"to",
max(df2$Date)),
xaxis = list(title = "Date"),
yaxis = list(title = "Value"),
legend = list(x = 1,
y = 0.9,
title = list(
text = "Value Type",
font = list(family = "Helvetica",
size = 12,
color = "Blue")),
bgcolor = "rgba(0,0,0,0)"),
plot_bgcolor = "WhiteSmoke")
p2
Copyright © 2022 Harry Lu. All rights reserved.