1 Background

I joined Prof. Hong-Kun Zhang’s research group in the field of financial mathematics when I was at UMass Amherst. At that time, the data were extracted from Yahoo Finance.
After completing my Master’s program at Cornell, I decided to improve my skills in R, including:
- Web wrangling
- Regex and string manipulation
- Function
- Tidyverse ecosystem syntax
- Shiny (dashboard)
So, in late 2019, I built a Shiny application (version 1) using yahoo finance data and deployed it on RStudio’s shinyapps.io.
In 2022, I found the app can barely run. There are warnings when opening the app, and both chart and table could not display properly (may cause by a bug from quantmod::getSymbols). Also, the data behind Yahoo Finance’s source code also had a big change, and many of the parsing methods I designed may not compatible either. I decided to build a new version to fix all the problems and added some new features to the dashboard.

2 Design

2.1 User Inputs

user_inputs
 |-- symbol
 |    |-- default_symbol
 |    |-- customized_symbol
 |-- name
 |-- market_price
 |-- market_change
 |-- market_change_percent
 |-- currency
 |-- time_period
 |    |-- default_time_period
 |    |-- customized_time_period
 |-- frequency
 |-- dataframe_columns
 |-- visualization_type

2.1.1 Exchange Index

exchange_index
 |-- default_index
 |-- customized_index

default_index:
- ^DJI
- ^GSPC
- ^IXIC
- ^RUT
- ^FTSE
- ^N225
- 000001.SS
- 399106.SZ
- ^TNX
- BTC-USD

customized_index:
- C (default)

3 Implementation

3.1 Helper Functions

3.1.1 Read Source Code

read_source_code = function(symbol) {
  web_link = paste0("https://finance.yahoo.com/quote/", symbol)
  source_code = read_lines(web_link)
  return(source_code)
}

3.1.2 Extract Info

extract_main_info_line = function(source_code) {
  h1_class_locations = which(source_code %>%
                               str_detect(pattern = "h1 class") %>%
                               equals(TRUE))
  
  ### "h1 class" not in source code caused by invalid input exchange code
  if (h1_class_locations %>% length() %>% equals(0)) {
    # cat("h1 class has no location, the exchange code might be invalid")
    return(NA)
  }
  
  ### pick the first "h1 class" line
  if (h1_class_locations %>% length() %>% equals(1)) {
    main_info_line_index = h1_class_locations
  } else {
    main_info_line_index = h1_class_locations %>% extract(1)
    # cat("h1 class has multiple locations")
  }
  
  main_info_line = source_code %>% extract(main_info_line_index)
  return(main_info_line)
}

3.1.3 Extract Name

h_clean_name = function(raw_name) {
  if (raw_name %>% str_detect(pattern = "&amp;")) {
    cleaned_name = raw_name %>%
      str_replace(pattern = "&amp;", replacement = "&")
  } else {
    cleaned_name = raw_name
  }
  return(cleaned_name)
}

extract_name = function(main_info_line) {
  ### main_info_line is NA caused by invalid input exchange code
  if (main_info_line %>% is.na()) {
    return(NA)
  }
  
  name = main_info_line %>%
    str_split(pattern = "<h1 class") %>% extract2(1) %>%
    extract(2) %>%
    str_split(pattern = "</h1>") %>% extract2(1) %>%
    extract(1) %>%
    str_split(pattern = ">") %>% extract2(1) %>%
    extract(2) %>%
    h_clean_name()
  return(name)
}

3.1.4 Extract Market Price

extract_market_price = function(main_info_line) {
  ### main_info_line is NA caused by invalid input exchange code
  if (main_info_line %>% is.na()) {
    return(NA)
  }
  
  market_price = main_info_line %>%
    str_split(pattern = "regularMarketPrice") %>% extract2(1) %>%
    extract(2) %>%
    str_split(pattern = "</fin-streamer") %>% extract2(1) %>%
    extract(1) %>%
    str_split(pattern = ">") %>% extract2(1) %>%
    extract(2)
  return(market_price)
}

3.1.5 Extract Market Change

extract_market_change = function(main_info_line) {
  ### main_info_line is NA caused by invalid input exchange code
  if (main_info_line %>% is.na()) {
    return(NA)
  }
  
  market_change = main_info_line %>%
    str_split(pattern = "regularMarketChange") %>% extract2(1) %>%
    extract(2) %>%
    str_split(pattern = "</fin-streamer") %>% extract2(1) %>%
    extract(1) %>%
    str_split(pattern = ">") %>% extract2(1) %>%
    extract(3) %>%
    str_split(pattern = "<") %>% extract2(1) %>%
    extract(1)
  return(market_change)
}

3.1.6 Extract Market Change Percent

extract_market_change_percent = function(main_info_line) {
  ### main_info_line is NA caused by invalid input exchange code
  if (main_info_line %>% is.na()) {
    return(NA)
  }
  
  market_change_percent = main_info_line %>%
    str_split(pattern = "regularMarketChangePercent") %>% extract2(1) %>%
    extract(2) %>%
    str_split(pattern = "</fin-streamer") %>% extract2(1) %>%
    extract(1) %>%
    str_split(pattern = ">") %>% extract2(1) %>%
    extract(3) %>%
    str_split(pattern = "<") %>% extract2(1) %>%
    extract(1)
  # %>%
  #   str_split(pattern = "\\(") %>% extract2(1) %>%
  #   extract(2) %>%
  #   str_split(pattern = "\\)") %>% extract2(1) %>%
  #   extract(1)
  return(market_change_percent)
}

3.1.7 Extract Currency

extract_currency = function(main_info_line) {
  ### main_info_line is NA caused by invalid input exchange code
  if (main_info_line %>% is.na()) {
    return(NA)
  }
  
  currency = main_info_line %>%
    str_split(pattern = "Currency in ") %>% extract2(1) %>%
    extract(2) %>%
    str_split(pattern = "<") %>% extract2(1) %>%
    extract(1)
  return(currency)
}

3.1.8 Extract Data

h_clean_df_colname = function(col_name) {
  cleaned_col_name = col_name %>%
    str_split(pattern = "\\.") %>% extract2(1)
  cleaned_col_name = cleaned_col_name %>%
    extract(cleaned_col_name %>% length())
  return(cleaned_col_name)
}

extract_data = function(symbol, from_date = today() - years(1), to_date = today(), frequency = "daily") {
  df = getSymbols(Symbols = symbol,
                  from = from_date,
                  to = to_date,
                  periodicity = frequency,
                  return.class = "xts",
                  auto.assign = FALSE) %>%
    as.data.frame()
  colnames(df) = colnames(df) %>% sapply(h_clean_df_colname)
  df = df %>%
    mutate(Date = df %>% rownames()) %>%
    mutate(Date = Date %>% as.Date(),
           Open = Open %>% round(digits = 2),
           High = High %>% round(digits = 2),
           Low = Low %>% round(digits = 2),
           Close = Close %>% round(digits = 2),
           Adjusted = Adjusted %>% round(digits = 2)) %>%
    arrange(Date %>% desc())
  rownames(df) = NULL
  df = df %>%
    select(Date, Open, High, Low, Close, Adjusted, Volume) %>%
    rename(`Adjusted Close` = Adjusted)
  return(df)
}

3.2 test

3.2.1 test_symbols

test_symbols = c("^DJI",
                 "^GSPC",
                 "^IXIC",
                 "^RUT",
                 "^FTSE",
                 "^N225",
                 "000001.SS",
                 "399001.SZ",
                 "^TNX",
                 "IVV")
# test_symbols = test_symbols[1:3]

3.2.2 df1: string extraction

test_source_codes = test_symbols %>% lapply(read_source_code)
test_main_info_lines = test_source_codes %>% lapply(extract_main_info_line)
test_names = test_main_info_lines %>% sapply(extract_name)
test_market_prices = test_main_info_lines %>% sapply(extract_market_price)
test_market_changes = test_main_info_lines %>% sapply(extract_market_change)
test_market_change_percents = test_main_info_lines %>% sapply(extract_market_change_percent)
test_currencies = test_main_info_lines %>% sapply(extract_currency)

df = tibble(`Name` = test_names,
            `Market Price` = test_market_prices,
            `Market Change` = test_market_changes,
            `Market Change Percent` = test_market_change_percents,
            `Currency` = test_currencies)
df

3.2.3 df2: data extraction

# test_symbols[1]
df2 = test_symbols[8] %>% extract_data()
# df2 = getSymbols(Symbols = test_symbols[7],
#                   from = "2022-01-01",
#                   to = "2022-08-01",
#                   periodicity = "daily",
#                   return.class = "xts",
#                   auto.assign = FALSE)
df2

3.2.4 p1: static plot

p1 = ggplot(data = df2,
            mapping = aes(x = Date, y = `Adjusted Close`)) +
      labs(y = "Adjusted Close") +
      geom_line(color = "DodgerBlue", size = 1.2)

p1

3.2.5 p2: interactive plot

p2 = plot_ly(data = df2,
             x = ~Date,
             width = 800,
             height = 480) %>%
  add_trace(y = ~`Adjusted Close`,
            type = "scatter",
            mode = "lines",
            name = "Adjusted Close",
            line = list(color = "Orange",
                        width = 2)
            ) %>%
  add_trace(y = ~Open,
            type = "scatter",
            mode = "lines",
            name = "Open",
            visible = "legendonly",
            line = list(color = "LimeGreen",
                        width = 2,
                        dash = "dash")
            ) %>%
  add_trace(y = ~High,
            type = "scatter",
            mode = "lines",
            name = "High",
            line = list(color = "DodgerBlue",
                        width = 1,
                        dash = "dot")
            ) %>%
  add_trace(y = ~Low,
            type = "scatter",
            mode = "lines",
            name = "Low",
            line = list(color = "Crimson",
                        width = 1,
                        dash = "dot")
            ) %>%
  layout(font = list(family = "Helvetica",
                     size = 12,
                     color = "Black"),
         title = paste("From",
                       min(df2$Date),
                       "to",
                       max(df2$Date)),
         xaxis = list(title = "Date"),
         yaxis = list(title = "Value"),
         legend = list(x = 1,
                       y = 0.9,
                       title = list(
                         text = "Value Type",
                         font = list(family = "Helvetica",
                                     size = 12,
                                     color = "Blue")),
                       bgcolor = "rgba(0,0,0,0)"),
         plot_bgcolor = "WhiteSmoke")

p2

Yahoo Finance Data

Documentation

Da (Harry) Lu

2022/07/30 - 2022/08/09

1 Background

2 Design

2.1 User Inputs

2.1.1 Exchange Index

3 Implementation

3.1 Helper Functions

3.1.1 Read Source Code

3.1.2 Extract Info

3.1.3 Extract Name

3.1.4 Extract Market Price

3.1.5 Extract Market Change

3.1.6 Extract Market Change Percent

3.1.7 Extract Currency

3.1.8 Extract Data

3.2 test

3.2.1 test_symbols

3.2.2 df1: string extraction

3.2.3 df2: data extraction

3.2.4 p1: static plot

3.2.5 p2: interactive plot

3.3 Main Functions

3.4 Shiny