1 R
1.1 Create template tables
1.1.1 Template tables
Create table with random dates between two dates. Use max date and create a table with dates until this date.
#https://www.cyclismo.org/tutorial/R/basicOps.html#basic-operations
library(dplyr)
df <- tibble(
date = sample(seq(as.Date('2020/06/01'), as.Date('2025/01/01'), by="day"), 20)
)
df2 <-tibble(
date = seq.Date(Sys.Date(), max(as.Date(df$date)), by = "day")
)
Create new column with random number
library(tidyverse)
df <- data.frame(Amount = 1:10)
df %>%
rowwise %>%
mutate(newColumn = sample(1:5, 1))
#It´s often neccessary to ungroup rowwise.
df <- as.data.frame(df)
Create table with a combination of fixed and random numbers
library(tidyverse)
df <- tibble(
value = seq(10,90,1),
rand = seq(10,90,1) +runif(81, min=-10, max=15)
)
Create df with dates by end of quarter
####Create dataframe with dates by quarter. end of quarter
dates <- data.frame(
date = seq(as.Date('2010-02-01'), Sys.Date(), by = 'quarter')-1
)
**Create a list and bind together, only 1 column*
1.1.2 Creating list-columns
library(tidyverse)
library(purrr)
library(gapminder)
#List columns
#data.frame treats a list as a list of columns
data.frame(x = list(1:3, 3:5))
#Use of I can prevent this but dosen´t look good.
data.frame(
x = I(list(1:3, 3:5)),
y = c("1,2","3,4,5")
)
#Tibble can handle it
tibble(
x = list(1:3, 3:5),
y = c("1,2", "3,4,5")
)
#Even easier with tribble
tribble(
~x, ~y,
1:3, "1,2",
3:5, "3,4,5"
)
#nest() creates a nested data frame with a list-column of data frames.
gapminder %>%
group_by(country, continent)%>%
nest()
#from vectorized functions
df <- tribble(
~x1,
"a,b,c",
"d,e,f,g"
)
df %>%
mutate(x2 = stringr::str_split(x1, ","))%>%
unnest()
#using Purrr
sim <- tribble(
~f, ~params,
"runif", list(min = -1, max=-1),
"rnorm", list(sd = 5),
"rpois", list(lambda=10)
)
sim %>%
mutate(sims = invoke_map(f, params, n = 10))
#from multivalued summaries, you need to wrap it in a list for being able to summarise.
mtcars %>%
group_by(cyl)%>%
summarize(q=list(quantile(mpg)))
probs <- c(0.01, 0.25,0.5,0.75,0.99)
mtcars %>%
group_by(cyl)%>%
summarize(p = list(probs), q=list(quantile(mpg)))
#from a named list
x <- list(
a = 1:5,
b = 3:4,
c = 5:6
)
df <- enframe(x)
df
df %>%
mutate(smry = map2_chr(
name,
value,
~ stringr::str_c(.x, ": ", .y[1])
))
1.2 Import & Export
1.2.1 Import
Import fast using httpcashe
Improving efficiency in importing
get_data <- function(url) {
httpcache::GET(url) %>%
httr::content()
}
url_jobless_claims="https://oui.doleta.gov/unemploy/csv/ar539.csv"
data_jobless_claims <- get_data(url_jobless_claims)
Import all files in a folder
Import all files in a folder. In the example below files are named “2020-05-05 Saldo”. Import and create a table where the date of the filename is used in a column. Change name for column 1 and 2.
Map has similiar functionality to lapply. When you add _dfr it will generate data.frames and that these is merged.
parse_date <- function(x) as.Date(gsub( ".*(\\d{4}-\\d{2}-\\d{2}).*", "\\1", x))
dir_loc <- '...RX-filer\\Saldo'
rix_saldo <- dir(dir_loc, full.names = T) %>%
map_dfr(~{
read.csv2(.x, skip = 1, header = F) %>%
mutate(date = as.Date(parse_date(basename(.x))))
})
colnames(rix_saldo)[colnames(rix_saldo) == 'V1'] <- 'Participant'
colnames(rix_saldo)[colnames(rix_saldo) == 'V2'] <- 'Saldo'
Import all files in a folder with conditions of name
Use pattern to set the conditions which files that should be imported. For example, those who ends with “Saldo.csv” as below. Regexp (see http://jkorpela.fi/perl/regexp.html) is used in pattern. For example, you need to use \. for the dot. Also, ^ can be used to determine the beginning of a string while $ is used to set the end. pattern = "^.Saldo\.csv$"*
parse_date <- function(x) as.Date(gsub( ".*(\\d{4}-\\d{2}-\\d{2}).*", "\\1", x))
dir_loc <- '...RX-filer\\Saldo'
rix_saldo <- dir(dir_loc, full.names = T, pattern = "^.*Saldo\\.csv$") %>%
map_dfr(~{
read.csv2(.x, skip = 1, header = F) %>%
mutate(date = as.Date(parse_date(basename(.x))))
})
colnames(rix_saldo)[colnames(rix_saldo) == 'V1'] <- 'Participant'
colnames(rix_saldo)[colnames(rix_saldo) == 'V2'] <- 'Saldo'
Import all excel files in a folder with condition of name
#Import a .xlsm files, one particular sheet.
dir_loc <- '...\\Operations\\History_bokningssnurra'
dir <-dir(dir_loc, full.names = T, pattern =".xlsm")
rawData <- map_dfr(dir, read_xlsx, sheet = "indata_AUPD", col_names = TRUE, col_types = 'text')
Import excel from web
Import excel from web by downloading it temp
library(readxl)
url_data_gdp <- ("https://www.bea.gov/system/files/2020-04/qgdpstate0420.xlsx")
download.file(url=url_data_gdp, destfile="localcopy.xlsx", mode="wb")
#Table 1: Percent Change in Real Gross Domestic Product (GDP) by State and state
table1 <- read_excel('localcopy.xlsx', sheet = 1, skip =4, col_names = FALSE)
1.2.2 Export
Export to txt file
Export to csv file
#Using both write.csv2 or write.table
library(data.table)
#Write csv2. No row.names, na = "" and quote ("") mark as false
write.csv2(total_purchases_commercial_papers, ".....R_tables\\Output_data\\webpage_purchases\\Total_purchases_commercial_papers.csv",row.names=FALSE,na = "", quote = FALSE)
#Write.table. No row.names, na = "" and quote ("") mark as false
write.table(total_purchases_commercial_papers,file="...\\Total_purchases_commercial_papers3.csv",row.names=FALSE,sep=";",dec = " ",quote = FALSE)
Get table to paste into excel
1.3 Tidy & Transform
1.3.1 Cleaning
Cleaning some data
Gather, Spread, Separate, Unite
library(tidyr)
#Create a messy dataset
messy <- data.frame(
country = c("A", "B", "C"),
q1_2017 = c(0.03, 0.05, 0.01),
q2_2017 = c(0.05, 0.07, 0.02),
q3_2017 = c(0.04, 0.05, 0.01),
q4_2017 = c(0.03, 0.02, 0.04))
messy
#Reshape the data. in this function we create two new variables instead of the one in the original dataset.
tidier <- messy%>%
gather(quarter, growth, q1_2017:q4_2017)
tidier
#Spread
#the spread function does the opposite of gather.
#Reshape the tidier dataset back to messy.
messy_1 <- tidier %>%
spread(quarter, growth)
messy_1
#Separate
#Separate splits a column into two according to a separator. This function is helpful in some situations where the variable is a date, i.e. separate year and month.
separate_tidier <- tidier %>%
separate(quarter, c("Qrt", "year"), sep ="_")
head(separate_tidier)
#Unite
#Unite concatenates two columns into one.
unit_tidier <- separate_tidier%>%
unite(Quarter, Qrt, year, sep = "_")
head(unit_tidier)
1.3.2 Expand
Expand table
One example with expanding to all alternatives. Another to fill in gaps.
library(tidyverse)
library(dplyr)
##Expand all alternatives
a <- c(1:10)
b <- c(1:10)
c <- c(1:10)
df <- tibble(a,b,c)
all_combinations <- expand(df, a,b,c)
#Expand by missing Date
df <- tibble(
year = c(2010, 2010, 2010, 2010, 2012, 2012, 2012),
qtr = c( 1, 2, 3, 4, 1, 2, 3),
return = rnorm(7)
)
df %>% expand(year, qtr)
df %>% expand(year = 2010:2012, qtr)
df %>% complete(year = full_seq(year, 1), qtr)
1.3.3 Join and Merge
Join tables
Different ways to join tables.
library(dplyr)
df_primary <- tribble(
~ID,~y,
"A", 5,
"B", 5,
"C", 8,
"D", 0,
"E", 9)
df_secondary <- tribble(
~ID,~y,
"A", 30,
"B", 21,
"C", 22,
"D", 25,
"F", 29)
#Most common way to merge two datasets is to uset the left_join() function.
left_join_ <- left_join(df_primary, df_secondary, by ='ID')
#The right_join works like the left one.
right_join_ <- right_join(df_primary, df_secondary, by = 'ID')
#When we are sure that two datasets won´t match, we can consider to return only rows existing in both datasets.
#This is legit when we need a clean dataset or when we dont want to impute missing values with the mean or median.
inner_join_ <- inner_join(df_primary, df_secondary, by ='ID')
# Full_join keeps all observations and replace missing values with NA.
full_join_ <- full_join(df_primary, df_secondary, by = 'ID')
Join tables on multiple conditions
Join Tables on multiple conditions
library(dplyr)
df_primary <- tribble(
~ID, ~year, ~items,
"A", 2015,3,
"A", 2016,7,
"A", 2017,6,
"B", 2015,4,
"B", 2016,8,
"B", 2017,7,
"C", 2015,4,
"C", 2016,6,
"C", 2017,6)
df_secondary <- tribble(
~ID, ~year, ~prices,
"A", 2015,9,
"A", 2016,8,
"A", 2017,12,
"B", 2015,13,
"B", 2016,14,
"B", 2017,6,
"C", 2015,15,
"C", 2016,15,
"C", 2017,13)
left_join(df_primary, df_secondary, by = c('ID', 'year'))
Merge Data Frames
Merge Data Frames in R: Full and partial match
producers <- data.frame(
surname = c("Spielberg","Scorsese","Hitchcock","Tarantino","Polanski"),
nationality = c("US","US","UK","US","Poland"),
stringsAsFactors=FALSE)
# Create destination dataframe
movies <- data.frame(
surname = c("Spielberg",
"Scorsese",
"Hitchcock",
"Hitchcock",
"Spielberg",
"Tarantino",
"Polanski"),
title = c("Super 8",
"Taxi Driver",
"Psycho",
"North by Northwest",
"Catch Me If You Can",
"Reservoir Dogs","Chinatown"),
stringsAsFactors=FALSE)
m1 <- merge(producers, movies, by.x = "surname")
m1
# Change name of ` movies ` dataframe
colnames(movies)[colnames(movies) == 'surname'] <- 'name'
# Merge with different key value
m2 <- merge(producers, movies, by.x = "surname", by.y = "name")
##Partial match
# Create a new producer
add_producer <- c('Lucas', 'US')
# Append it to the ` producer` dataframe
producers <- rbind(producers, add_producer)
# Use a partial merge
m3 <-merge(producers, movies, by.x = "surname", by.y = "name", all.x = TRUE)
m3
1.3.4 Transforming data with Apply etc
apply(), lapply(), sapply(), tapply()
apply()
library(dplyr)
m1 <- matrix(c<-(1:10), nrow=5,ncol=6)
m1
#Sums columns
a_m1 <- apply(m1,2,sum)
a_m1
#Sums rows
a_m1 <- apply(m1,1,sum)
a_m1
lapply()
library(dplyr)
movies <- c("spyderman", "batman", "vertigo", "chinatown")
movies_lower <- lapply(movies, tolower)
str(movies_lower)
#if we like to convert the list into a vector we can use unlist()
movies_lower <- unlist(lapply(movies, tolower))
str(movies_lower)
sapply()
#sapply() function does the same jobs as lapply() function but returns a vectorÄ
library(dplyr)
dt <- cars
lmn_cars <- lapply(dt, min)
smn_cars <- sapply(dt,min)
lmn_cars
smn_cars
lmxcars <- lapply(dt,max)
smxcars <- sapply(dt,max)
lmxcars
smxcars
#lets create a function names avg to compute the average of the minimun and maximun of the vector.
avg <- function(x){
(min(x) + max(x))/2
}
fcars <- sapply(dt, avg)
fcars
#sapply() function is more efficient than lapply() in the output returned because sapply() store values directly into a vector.
#it is possible to use lapply or sapply interchangeable to slice a data frame.
#lets compute a function that takes a vector of numerical values and returns a vector that only contains the values that are strictly above the average.
below_ave <- function(x){
ave <- mean(x)
return(x[x>ave])
}
dt_s <- sapply(dt, below_ave)
dt_l <- lapply(dt, below_ave)
identical(dt_s, dt_l)
tapply()
1.3.5 Tally-function
Tally()
Tally is a useful wrapper for summarise with grouping conditions. In the example below we have a data set with countries. For US, there are no aggregate number, so we need to summarize each state.
library(tidyr)
library(dplyr)
df <- tibble::tribble(
~country, ~state, ~t1, ~t2,
"SE", NA, 1,2,
"US", "A", 10,20,
"US", "B", 11,21,
)
df%>%
tidyr::gather(date, value, -country, -state)%>%
group_by(country, date) %>%
tally(value)
## Iteration with purrr
Purrr: map functions map() makes a list map_lgl() makes a logical vector map_int() makes an integer vector map_dbl() makes a double vector map_chr() makes a character vector
#calculate mean
library(tidyverse)
df <- tibble(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
map_dbl(df, mean)
z <- list(x = 1:3, y = 4:5)
map_int(z, length)
#using splits. splits mtcars in three pieces and fits the same linear model to each piece.
models <- mtcars %>%
split(.$cyl) %>%
map(~lm(mpg ~ wt, data =.))
#. refers to the current list element, as i referred to the current index in the for loop.
models %>%
map(summary) %>%
map_dbl("r.squared")
#use an integer to select elements
x <- list(list(1,2,3), list(4,5,6), list(7,8,9))
x %>% map_dbl(2)
#Dealing with failure: Safely
safe_log <- safely(log)
str(safe_log(10))
x <- list(1,10,"a")
y <- x %>% map(safely(log))
str(y)
#combine with transpose to get two lists, one with failure and one that works.
y <- y %>% transpose()
str(y)
#work with the values that are ok or not ok.
is_ok <- y$error %>%map_lgl(is_null)
x[!is_ok]
y$result[is_ok] %>% flatten_dbl()
#other useful verbs with purrr: possibly() and quietly()
# possibly
x <- list(1,10,"a")
x %>% map_dbl(possibly(log, NA_real_))
# quietly()
x <- list(1, -1)
x %>% map(quietly(log)) %>%str()
1.3.6 Mapping over multiple arguments
map2
##Mapping over multiple arguments
## map2: iterates over two vectors parallel.
#arguments that vary for each call come before the function, arguments that are the same for every call come after.
mu <- list(5,10,-3)
sigma <- list(1,5,10)
map2(mu, sigma, rnorm, n=5) %>% str()
## pmap(): takes a list of arguments
n <- lsit(1,3,5)
args1 <- list(n,mu,sigma)
args1 %>%
pmap(rnorm) %>%
str()
#good practise is to name the arguments.
args2 <- list(mean = mu, sd = sigma, n = n)
args2 %>%
pmap(rnorm)%>%
str()
#since the argments are the same length, it makes sense to store them in a data frame.
params <- tribble(
~mean, ~sd, ~n,
5, 1, 1,
10, 5, 3,
-3, 10,5
)
params%>%
pmap(rnorm)
#invoking different functions
f <- c("runif", "rnorm", "rpois")
param <- list(
list(min = -1, max = 1),
list(sd =5),
list(lambda = 10)
)
invoke_map(f, param, n= 5)%>% str()
Purrr pattern finders
## Keep elements of input where the predicate is true
iris %>%
keep(is.factor) %>%
str()
#discard elements
iris %>%
discard(is.factor) %>%
str()
#some determine if the predicate is true for any
x <- list(1:5, letters, list(10))
x %>%
some(is_character)
#every determine if the predicate is true for all
x %>%
every(is_vector)
# detect() finds the first element where the predicate is true
x <- sample(10)
x
x %>%
detect(~ . > 5)
x %>%
detect_index(~ . >5)
#head_While
#take elements from the start or end of a vector while a predicate is true.
pos <- function(x) x >= 0
head_while(5:-5, pos)
big <- function(x) x > 5
head_while(0:10, big)
tail_while(0:10, big)
#reduce and accumulate
dfs <- list(
age = tibble(name = "john", age = 30),
sex = tibble(name = c("john", "mary"), sex = c("M", "F")),
trt = tibble(name = "mary", treatment = "A")
)
dfs %>% reduce(full_join)
# find the intersection
vs <- list(
c(1,3,5,6,10),
c(1,2,3,6,7,8),
c(1,2,3,4,5,6)
)
vs %>% reduce(intersect)
#accumulate
x <- sample(10)
x
x %>% accumulate(`+`)
1.4 Working with strings and characters
Remove last n characters
1.5 Visualize
1.5.1 Ggplots
geom_line with geom_ribbon
geom_line with geom_ribbon for pos / neg numbers
library(ggplot2)
df <- tibble(
value = seq(1,50,1),
rand = seq(1,50,1) +runif(50, min=-10, max=15)
)%>%
mutate(diff = rand - value)
exposure_graph <- ggplot(df, aes(x=value,y=rand)) +
geom_ribbon(aes(ymin=pmin(df$diff,0), ymax=0), fill="red", col="black", alpha=0.5) +
geom_ribbon(aes(ymin=0, ymax=pmax(df$diff,0)), fill="blue", col="black", alpha=0.5) +
geom_line(aes(y=0))
1.5.2 Different tables
Create table with kableExtra
Create table with different colors for pos / neg numbers
library(tidyverse)
library(kableExtra)
df <- tibble(
type = c("gov_bond", "ssa", "ssa", "gov_bond","ssa", "ssa", "gov_bond", "gov_bond", "gov_bond", "ssa"),
maturity_bucket = as.integer(runif(10, min =1, max=6)),
diff_bm = runif(10, min = -10, max = 10)
)
sum_type <- df %>%
group_by(type, maturity_bucket)%>%
summarise(
diff_exposure = round(sum(diff_bm),1)
)
## Create table with green for positive, red for negative
sum_table <- sum_type%>%
mutate(
diff_exposure = ifelse(diff_exposure < 0,
cell_spec(diff_exposure, "html", color = "red", bold = T),
cell_spec(diff_exposure, "html", color = "green", italic = T)))%>%
kable("html", escape = F, format.args=list(big.mark=" ", scientific=F)) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = F, position= "right", fixed_thead = T)
sum_table
1.6 Misc
1.6.1 Create moving average function
1.6.2 Convert comma to dot
1.6.3 Reference column name using !!sym
1.6.4 Create Spread between two variables
library(tidyverse
#Create temp table
df <- tibble(
date= sample(seq(as.Date('2020/06/01'), as.Date('2025/01/01'), by="day"), 81),
value = seq(10,90,1)+runif(81, min=-1, max=5),
rand = seq(10,90,1) +runif(81, min=-10, max=15)
) %>%
arrange(date) %>%
pivot_longer(
!date,
names_to = "variables",
values_to = "values")
## Creat spread function
variable_1 = "value"
variable_2 = "rand"
create_ticker = "Spread"
create_name = "SpreadName"
create_country = "Sweden"
create_spread <- function(df, variable_1, variable_2,create_ticker,create_name, create_country) {
df <- df %>%
filter(variables %in% c(variable_1, variable_2)) %>%
mutate(var = case_when( #this is done for in the formula always using variable_1 - variable_2
variables == variable_1 ~ "var_1",
variables == variable_2 ~ "var_2",
))
#calculate spread
df <- df %>%
pivot_wider(
c("date"),
#cols = -c(date, name, country),
names_from = var,
values_from = values,
) %>%
mutate(values = var_1 - var_2,
ticker = create_ticker,
name = create_name,
country= create_country) %>%
select(-c("var_1", "var_2"))
return(df)
}
df <- create_spread(df, variable_1, variable_2,create_ticker,create_name, create_country)
1.6.5 Convert Column to List Vector
library(tidyverse)
df <- tibble(
ticker = c("USGG2YR Index",
"USGG10YR Index",
"USGG5YR Index",
"GSGB2YR Index",
"GSGB5YR Index"
))
df_vector <- df %>%
mutate(ticker = map_chr(ticker, paste, collapse = ','))
library(tidyverse)
library(lubridate)
df <- mtcars
# Contains / grepl
df %>%
mutate(group = case_when(grepl("0", vs) ~ "Group0",
grepl("1", vs, ignore.case = TRUE) ~"Group1"))
# Fill all columns
df <- tibble(
date = c("2020-01-31", "2020-02-29", "2020-03-30"),
values = c(1, 5, NA)
)
df <- df %>%
fill(names(df), .direction = "down")
##Change from na to 0
df <- tibble(
date = c("2020-01-31", "2020-02-29", "2020-03-30"),
values = c(1, 5, NA)
)
df$values[is.na(df$values)] <- 0
#Create vector
last_update = "2020-01-01"
years <- c(year(last_update):year(Sys.Date()))
years <- toString(paste0(years) ) ##to make it a vector
1.6.6 Crossing
Cross two tables for all combinations
library(tidyverse)
dates <- data.frame(
date = seq(as.Date("2020-04-01"), Sys.Date(), by = 'months')-1 #create date table,
)
df_bonds <- tribble(~value, ~bond, ~maturity,
1, "SCBC", "2022-12-31",
2, "Danske", "2020-12-31",
3, "Danske2", "2022-12-31",
4, "HB", "2021-12-31",
5, "Nordea", "2023-12-31",
6, "SCBC", "2022-12-31")
df3 <- crossing(df_bonds, dates)
1.6.7 Moving average
Create a moving average
Example of creating a moving average for dates.
library(tidyverse)
library(dplyr)
library(lubridate)
library(slider)
library(dplyr, warn.conflicts = FALSE)
library(lubridate, warn.conflicts = FALSE)
df <- tibble(
Date = seq.Date(Sys.Date()-19, Sys.Date(), by="day"),
indicator = c(rep(1,10),rep(2,10)),
value = rnorm(20)
)
df <- arrange(df, Date)
df %>%
group_by(indicator) %>%
mutate(MA_3m = slide_index_dbl(value, Date, mean, .before=lubridate::days(2), .after=0,.complete=T))
#Use before or after = Inf if you like to get the calculation based on all values before or after.
1.6.8 Number rows within group
1.6.9 Date Formating
Different ways to format dates
dmy
Dates
Formating date with use of gsub to adjust the the string.
1.6.10 Loops
1.6.10.1 For loop example
Creates a non-linear function by using the polynomial of x between 1 and 4 and we store it in a list
#
# Create an empty list
list <- c()
# Create a for statement to populate the list
for (i in seq(1, 4, by=1)) {
list[[i]] <- i*i
}
print(list)
For loop over a matrix
A matrix has 2-dimension, rows and columns. To iterate over a matrix, we have to define two for loop, namely one for the rows and another for the column.
1.6.10.2 For loop example
Creates a non-linear function by using the polynomial of x between 1 and 4 and we store it in a list
1.6.10.3 For loop and while loop
#for loops
df <- tibble(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
#calculate median
output <- vector("double", ncol(df)) ###Allocate sufficient space, otherwise it could be slow.
for (i in seq_along(df)){ ###Seq_along is better than sing 1:length(), becasue it can handle zero-length vector.
output[[i]] <- median(df[[i]])
}
output
#for loop: modifying an existing object
df <- tibble(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
rescale01 <- function(x){
rng <- range(x, na.rm = TRUE)
(x - rng[1]) / rng[2] - rng[1]
}
for (i in seq_along(df)){
df[[i]] <- rescale01(df[[1]])
}
## unknown output length
means <- c(0,1,2)
output <- double()
for (i in seq_along(means)) { #not very efficient
n <- sample(100,1)
output <- c(output, rnorm(n, means[[i]]))
}
str(output)
#better to save in a list and then combine
out <- vector("list", length(means))
for (i in seq_along(means)){
n <- sample(100,1)
out[[i]] <- rnorm(n, means[[i]])
}
str(out)
str(unlist(out)) ##unlist a list
#unknown sequence length. While loop. WHile loops only has two components, a condition and a body.
#how many tries it takes to get three heads in a row.
flip <- function() sample(c("T", "H"), 1)
flips <- 0
nheads <- 0
while (nheads < 3) {
if(flip() == "H"){
nheads <- nheads + 1
} else {
nheads <- 0
}
flips <- flips + 1
}
flips
Function for Right and Left
Functions for Right and Left.
library(dplyr)
right = function(text, num_char) {
substr(text, nchar(text) - (num_char-1), nchar(text))
}
left = function(text, num_char) {
substr(text, 1, num_char)
}
df <- tibble(
Date = seq.Date(Sys.Date()-19, Sys.Date(), by="day"),
indicator = c(rep(1,10),rep(2,10)),
value = rnorm(20)
)
left(df$value, 3)
right(df$Date, 3)
Bloomberg API in R
Blmrg API with package Rblpapi
library(Rblpapi)
con <- blpConnect()
#generic
us10 <- bdh(securities = "USGG10YR Index",
fields = "PX_LAST",
start.date = as.Date("2020-03-01"))
#Isin
us10 <- bdh(securities = "US912828ZQ64 Govt",
fields = "PX_LAST",
start.date = as.Date("2020-03-01"))
##Multiple fields
bonds <- c("CA135087K601 Govt","CA563469UP83 Govt")
fields <- c("PX_LAST", "YLD_YTM_MID", "PX_DIRTY_MID", "Issuer", "SHORT_NAME", "YRS_TO_MTY_ISSUE","YAS_ASW_SPREAD", "CPN", "AMT_OUTSTANDING", "%_OF_TSY_HLD_IN_THE_FED_RES_SOMA", "YLD_CHG_NET_1D", "YLD_CHG_NET_1M", "INTERVAL_Z_SCORE", "MTY_YEARS_TDY", "YLD_CHG_NET_5D")
df <- bdp(securities = bonds,
fields = fields)
df <- tibble::rownames_to_column(df, "isin_govt")
1.7 R Markdown
1.7.1 Render multiple reports
Render multiple reports in different folders.
In the example below one report is created for each stated currency. Params = list(currency) is the key.
#Write in one R Script
#Remove old
file.remove("...xxx/report/Benchmark_R/Portfolio_report_GBP.html")
file.remove("...xxx/report/Benchmark_R/Portfolio_report_AUD.html")
file.remove("...xxx/report/Benchmark_R/Portfolio_report_EUR.html")
purrr::map(
c("AUD", "EUR", "GBP"),
~ {
res <- rmarkdown::render("...xxx\\report\\Benchmark_R\\R code\\Markdown BM.Rmd", output_file = sprintf("...xxx\\report\\Benchmark_R\\Portfolio_report_%s.html", .x), params = list(currency = .x))
file.copy(res, sprintf("...xxx\\report\\Benchmark_R\\Old_reports\\Portfolio_report_%1$s_%2$s.html", .x, Sys.Date()))
file.copy(res, sprintf("...xxx/report/Benchmark_R//Portfolio_report_%s.html", .x))
}
)
#Markdown Report header
---
#title: "Portfolio and benchmark report"
output: html_document
date: "`r Sys.Date()`"
author: christoffer.nordenlow@outlook.com
params:
currency: "EUR"
title: "`r sprintf('Portfolio and benchmark report, %s', params$currency)`"
---
1.8 Web Scraping
1.8.1 Scrape all sub page
Scrape web page info and save in a table
Scrape all different sub web pages under a base page. In the below example there a number of sub pages under the base bage. R is scraping all different URL under the main page. Info in the tables under the sub pages are saved in a table. You will need to have HTTP_PROXY/HTTPS_PROXY as environment variables.
#https://cran.r-project.org/web/packages/rvest/rvest.pdf
require(rvest)
require(xml2)
require(tidyverse)
.base_url <- "https://www.riksbank.se"
doc <- read_html(file.path(.base_url, "sv/penningpolitik/penningpolitiska-instrument/kop-av-foretagscertifikat/special-terms-and-conditions/"))
urls <- doc %>%
html_nodes("a") %>%
html_attr("href")
urls <- urls[str_detect(urls, regex(".*/special-terms-and-conditions/.*bid-date.*$"))]
urls <- file.path(.base_url, urls)
names(urls) <- basename(urls)
doc_subpage <- read_html(urls[[1]])
df <- urls %>%
map_dfr(~{
doc_subpage %>%
html_node("table") %>%
html_table() %>%
rename(key=X1, value=X2) %>%
as_tibble()
}, .id = "url")
#It is possible to filter which files should be imported.
#map(...) %>% filter(lubridate::year(date) == 2019)
1.8.2 Scrape PL table
Scrape one table
1.8.3 Scrape all tables
Scrape all tables, use one
##Web scrape US Data. Payroll
#http://bradleyboehmke.github.io/2015/12/scraping-html-tables.html
library(rvest)
web_bls <- read_html("http://www.bls.gov/web/empsit/cesbmart.htm")
tbls <- html_nodes(web_bls, "table") #extract all table nodes that exist on the page.
head(tbls)
#To parse the HTML, we use html_table. In this example it creates
table_bls <- web_bls %>%
html_nodes("table") %>%
.[3:4] %>% ##determines which tables. In this case, table 3 and 4.
html_table(fill = TRUE)
str(table_bls)
#Extract table 2, non-farm
head(table_bls[[2]], 4)
# remove row 1 that includes part of the headings. Not neccessary here
#table_bls[[2]] <- table_bls[[2]][-1,]
table_bls2 <-table_bls[[2]]
1.9 Useful functions / expressions
1.9.1 Sub / Gsub
Replace the first occurence of a pattern with a sub or replace all occurrences with gsub. Gsub() replaces all matches of a string.
x <- "Old City"
gsub("Old", "New", x)
#case insensitive
gsub("old", "New", x, ignore.case=T)
#Vector replacement
y <- c("Stockholm City", "Uppsala City", "Malmö")
gsub(" City","",y)
### rnorm
Generate number from a normal distribution.
1.9.2 slice
Example: way to take out a single row.
1.9.3 unique
Example: get all unique values in a column
1.9.4 Map (purrr)
Apply a function to each element of as list or vector. https://purrr.tidyverse.org/reference/map.html
# map_dfr
# apply a function to each element
library(tidyverse)
leading_indicators <- c(
"INJCJC Index",
"INJCJC4 Index",
"INJCSP Index",
"RSTAMOM Index",
"SAARTOTL Index",
"USHBTRAF Index",
"DGNOCHNG Index",
"DGNOYOY Index",
"LEI CHNG Index"
)
get_data <- function(indicator) {
tibble(ind = indicator, data = 1)
}
leading_indicators %>%
map_dfr(get_data)