6 Python
6.1 Basic
6.1.1 Regular Expressions
Regular expressions | |
---|---|
^ | Matches the beginning of a line |
$ | Matches the end of a line |
. | Matches any character |
backslash s | Matches whitespace |
backslash S | Matches any non-whitespace character |
* | Repeats a character zero or more times |
*? | Repeats a character zero or more times (non-greedy) |
+ | Repeats a character one or more times |
+? | Repeats a character one or more times (non-greedy) |
[aeiou] | Matches a single character in the listed set |
[^YXZ] | Matches a single character not in the listed set |
[a-z0-9] | The set of characters can include a range |
( | Indicates where string extraction is to start |
) | Indicates where string extraction is to end |
/th> | Matches word boundary |
/th> | any digit |
any non-digit | |
any whitespace | |
any non-whitespace | |
any alphanumerical character | |
any non-alphanumerical character | |
{n} | Exactly n repetitions |
{n,} | At least n repetitions |
{,n} | At most n repetitions |
{m,n} | At least m and at most n repetitions |
6.1.1.1 Regex working with dates
###Regular expression for dates
#date variations are many
dateStr = '23-10-2002\n23/10/2002\n23/10/02\n10/23/2002\nOct 23, 2002\nOctober 23, 2002\n23 October 2002'
print(re.findall(r'\d{1,2}[/-]\d{1,2}[/-]\d{4}', dateStr)) ###matches any of first 1-2 digits, then either -/ then 1-2 digits, then either, then two digits
print(re.findall(r'\d{1,2}[/-]\d{1,2}[/-]\d{2,4}', dateStr)) ##at least 2 and max 4
print(re.findall(r'\d{1,2} (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{2,4}', dateStr)) ###Using ? says that it should not just give the information in the ()
#starts with the characters but can have more characters after1
print(re.findall(r'(?:\d{1,2} )?(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug| Sep|Oct|Nov|Dec)[a-z]* (?:\d{1,2}, )?\d{2,4}', dateStr)) #the above says it may have information at the start or at the expanded
6.1.2 Create temp table Pandas
6.1.3 Working with Text and Strings
#Working with Text
text1 = "Since February 2015, the Riksbank has purchased government bonds with the aim of making monetary policy more expansionary."
len(text1) #get number of characters
text2 = text1.split(' ') #split into words
len(text2) #get number of words
print(text2)
#finding specific words_fomc
print([w for w in text2 if len(w) > 3]) #get all words longer then 3 characters
#capitalized words
print([w for w in text2 if w.istitle()]) #use istitle to get words with capitalized first character
#words that end with s
print([w for w in text2 if w.endswith('s')]) #words that end with s
#Finding unique words
text3 = 'To be or not to be'
text4 = text3.split(' ')
print(len(text4))
print(set(text4)) ##finding unique words with set (but caps sensitive)
print(len(set([w.lower() for w in text4]))) ##only get lower
print(set([w.lower() for w in text4]))
#minut 4.
##check what it is
#s.startwidth(t)
#s.endswith(t)
#t in s #if a substring is in a bigger
#s.isupper(); s.islower(); s.istitle()
#s.isalpha(); s.isdigit(); s.isalnum() #if string is alphabetical, numerical, or both
##string operations (change the string)
#s.lower(); s.upper(); s.titlecase()
#s.split(t) #split depending on t
#s.splitlines() #split sentence line on new line character or end of line character
#s.join(t) #takes word t and join with s
#s.strip(); s.rstrip() ##strip, take out for example space. strip from front, rstrip from endswith
#s.find(t); s.rfind(t)
#s.replace(u,v)
###from words to characters
text5 = 'ouagadougou'
text6 = text5.split('ou') #split on ou
print(text6)
text6 = 'ou'.join(text6) #now you get back the full. split and join opposite.
print(text6)
#get all of the characters, do it as a list
print(list(text5))
print([c for c in text5])
###Cleaning text
text8 = ' A quick brown fox jumped over the lazy dog. '
text9 = text8.strip() #strips out white characters
print(text9.split(' ')) #split on space
###Changing text1
#find and replace
text9 = ' A quick brown fox jumped over the lazy dog. '
print(text9.find('o')) #starts from start
print(text9.rfind('o')) #starts from end
print(text9.replace('o', 'O')) #replace small letters with capital
6.1.4 Case_when equivalent in Python
# Case_when equivalent with contains-statement
import pandas as pd
import numpy as np
dict = {'instrument_name':["volvo to2", "volvo", "skandia", "skandia teckningsratt hej", "seb bta"],
'direction': ["buy", "sell", "buy", "sell", "buy"]}
# creating a dataframe from a dictionary
df = pd.DataFrame(dict, columns = ['instrument_name', 'direction'])
conditions = [
(df['instrument_name'].str.contains("to2")),
(df['instrument_name'].str.contains("bta")),
(df['instrument_name'].str.contains("teckningsratt")),
(df['instrument_name'].str.contains("teckningsratter")),
(df['instrument_name'].str.contains("teckningsoption")),
]
choices = ["subscription_option","subscription_option","subscription_option","subscription_option", "subscription_option"]
df["type"] = np.select(conditions, choices)
print(df)
6.2 Scrapy
pip install Scrapy
6.2.1 Shell commands
Run scrapy in shell: Write ‘scrapy shell’ in miniconda
Fetch page: Write ‘Fetch (“https://www.riksbank.se/sv/penningpolitik/penningpolitiska-instrument/kop-av-foretagscertifikat/auction-results/2020/results-of-auctions-2020-09-23/”)’
Use view(response)
6.2.2 Scrape table
# Spider
import scrapy
class ToScrapeCSSSpider(scrapy.Spider):
name = "rb_scrape2"
start_urls = [
'https://www.riksbank.se/sv/penningpolitik/penningpolitiska-instrument/kop-av-foretagscertifikat/auction-results/2020/results-of-auctions-2020-09-23/'
]
def parse(self, response):
for item in response.xpath('//*[@class="page-base__main__body"]//tr'):
yield {
'terms' : item.xpath('td[1]//text()').extract(),
'auction' : item.xpath('td[2]//text()').extract()
}
next_page_url = response.css("li.next > a::attr(href)").extract_first()
if next_page_url is not None:
yield scrapy.Request(response.urljoin(next_page_url))
# Settings
#Export as CSV Feed
#FEED_FORMAT = "csv"
#FEED_URI = "rbbot.csv"
#Run crawler in miniconda with creating an csv (if its not in settings): scrapy crawl rb_scrape2 -o quotes.csv
6.2.3 Scrape table json treasury scraper
# Spider
# -*- coding: utf-8 -*-
#from scrapy import BaseSpider
import scrapy
#from TreasuryScraper.items import TreasuryItem
import json
class TreasurySpider(scrapy.Spider):
name = 'treasury'
start_urls = [
'https://www.treasurydirect.gov/TA_WS/securities/jqsearch?format=json&filterscount=0&groupscount=0&pagenum=0&pagesize=1000&recordstartindex=0&recordendindex=1000',
]
def parse(self, response):
jsonresponse = json.loads(response.text)
for item in jsonresponse['securityList']:
yield {
'cusip': item['cusip'],
'securityType': item['securityType'],
'securityTerm': item['securityTerm'],
'offeringAmount': item['offeringAmount'],
'tips': item['tips'],
'type': item['type'],
'pricePer100': item['pricePer100'],
'floatingRate': item['floatingRate'],
'reopening': item['reopening'],
'auctionDate': item['auctionDate'],
'maturityDate': item['maturityDate'],
'term': item['term'],
'competitiveAccepted': item['competitiveAccepted'],
'allocationPercentage': item['allocationPercentage'],
'averageMedianYield': item['averageMedianYield'],
'bidToCoverRatio': item['bidToCoverRatio'],
'competitiveAccepted': item['competitiveAccepted'],
'highYield': item['highYield'],
'lowYield': item['lowYield'],
'somaAccepted': item['somaAccepted'],
'somaHoldings': item['somaHoldings'],
'primaryDealerAccepted': item['primaryDealerAccepted'],
'directBidderAccepted': item['directBidderAccepted'],
'directBidderTendered': item['directBidderTendered'],
'indirectBidderAccepted': item['indirectBidderAccepted'],
'indirectBidderTendered': item['indirectBidderTendered'],
'interestPaymentFrequency': item['interestPaymentFrequency']
}
#settings
# -*- coding: utf-8 -*-
#BOT_NAME = 'TreasuryScraper'
#SPIDER_MODULES = ['TreasuryScraper.spiders']
#NEWSPIDER_MODULE = 'TreasuryScraper.spiders'
#ROBOTSTXT_OBEY = False
#DOWNLOAD_DELAY = 60.0
#AUTOTHROTTLE_ENABLED = True
#HTTPCACHE_ENABLED = True
#FEED_EXPORT_ENCODING = 'utf-8'
#scrapy crawl treasury -o output.csv
6.2.4 Scrape table json auction results Riksbank
import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from RiksbankAuctionScraper.items import GovernmentBond
def get_table_attr(response, x): # to handle strange tables
# using normalize-space() to avoid "\n" as matching xpath
xpaths = [
'.//td[contains(text(),"{0}")]/following-sibling::td/text()[normalize-space()]', #if td contains, use following sibling
'.//td[contains(text(),"{0}")]/following-sibling::td/p/text()[normalize-space()]',
'.//th/span[contains(text(),"{0}")]/../following-sibling::td/span/text()[normalize-space()]' #if th/span contains text, use following sibbling
]
xpath_str = '|'.join(xpaths).format(x)
return response.xpath(xpath_str).get()
class RiksbankSpider(CrawlSpider):
name = "RiksbankAuctionScraper_v5"
allowed_domains = ['riksbank.se']
start_urls = [
'https://www.riksbank.se/sv/penningpolitik/penningpolitiska-instrument/kop-av-statsobligationer/results-of-auctions'
]
rules = (
Rule(LinkExtractor(
allow=('\/kop-av-statsobligationer\/results-of-auctions\/2020\/results-of-auctions-\d{4}-\d{2}-\d{2}\/$')
), callback='parse_government_bond'),
)
def parse_government_bond(self, response):
for selector in response.xpath("//table"):
item = GovernmentBond()
item['auction_type'] = 'statsobligationer'
item['auction_date'] = get_table_attr(selector, "Auction date") #text to look for
item['loan_number'] = get_table_attr(selector, "Loan")
yield item
#Items
# -*- coding: utf-8 -*-
from scrapy import Item, Field
class GovernmentBonds(Item):
auction_type = Field()
auction_date = Field()
loan_number= Field()
6.2.5 Scrape table RiksbankAuctionScraper
import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from RiksbankAuctionScraper.items import GovernmentBonds, SekLending, FundingForLending, CommercialPapers, UsLending, CoveredBonds, MunicipalBonds
def get_table_attr(response, x):
# using normalize-space() to avoid "\n" as matching xpath
xpaths = [
'.//td[contains(translate(text(), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"),"{0}")]/following-sibling::td/text()[normalize-space()]',
'.//td[contains(translate(text(), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"),"{0}")]/following-sibling::td/*/text()',
'.//td/span[contains(translate(text(), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"),"{0}")]/../following-sibling::td/span/text()',
'.//th/span[contains(translate(text(), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"),"{0}")]/../following-sibling::td/span/text()'
]
xpath_str = '|'.join(xpaths).format(x)
return response.xpath(xpath_str).get()
class RiksbankSpider(CrawlSpider):
name = "RiksbankAuctionScraper"
allowed_domains = ['riksbank.se']
start_urls = [
# 'https://www.riksbank.se/sv/penningpolitik/penningpolitiska-instrument/kop-av-kommunobligationer/auction-results/',
# 'https://www.riksbank.se/sv/penningpolitik/penningpolitiska-instrument/kop-av-statsobligationer/results-of-auctions/',
'https://www.riksbank.se/sv/penningpolitik/penningpolitiska-instrument/kop-av-sakerstallda-obligationer/results-of-auctions/',
#'https://www.riksbank.se/sv/penningpolitik/penningpolitiska-instrument/kop-av-foretagscertifikat/auction-results/',
#'https://www.riksbank.se/sv/penningpolitik/penningpolitiska-instrument/lan-till-bankerna-for-vidareutlaning-till-foretag/auction-results/',
# 'https://www.riksbank.se/sv/penningpolitik/penningpolitiska-instrument/lan-i-amerikanska-dollar/auction-results/',
# 'https://www.riksbank.se/sv/penningpolitik/penningpolitiska-instrument/veckovisa-extraordinara-marknadsoperationer/auction-results/'
]
rules = (
Rule(LinkExtractor(
allow=('\/kop-av-statsobligationer\/results-of-auctions\/')
), callback='parse_bonds'),
Rule(LinkExtractor(
allow=('\/kop-av-sakerstallda-obligationer\/results-of-auctions\/2020\/')
), callback='parse_covered_bonds'),
Rule(LinkExtractor(
allow=('\/veckovisa-extraordinara-marknadsoperationer\/auction-results\/2020\/')
), callback='parse_sek_lending'),
Rule(LinkExtractor(
allow=('\/kop-av-foretagscertifikat\/auction-results\/2020\/')
), callback='parse_ftg_cert'),
Rule(LinkExtractor(
allow=('\/lan-till-bankerna-for-vidareutlaning-till-foretag\/auction-results\/2020\/')
), callback='parse_ffl'),
Rule(LinkExtractor(
allow=('\/lan-i-amerikanska-dollar\/auction-results\/2020\/')
), callback='parse_us_lending'),
Rule(LinkExtractor(
allow=('\/kop-av-kommunobligationer\/auction-results\/2020\/')
), callback='parse_municipal'),
)
def parse_bonds(self, response):
for selector in response.xpath("//table"):
item = GovernmentBonds()
item['auction_type'] = 'governmentBonds'
item['auction_date'] = get_table_attr(selector, "auction date")
item['loan_number'] = get_table_attr(selector, "loan")
item['isin'] = get_table_attr(selector, "isin")
item['coupon'] = get_table_attr(selector, "coupon")
item['tendered_volume'] = get_table_attr(selector, "tendered")
item['volume_offered'] = get_table_attr(selector, "offered")
item['volume_bought'] = get_table_attr(selector, "bought")
item['number_of_bids'] = get_table_attr(selector, "number of bids")
item['number_of_accepted_bids'] = get_table_attr(selector, "number of accepted bids")
item['average_yield'] = get_table_attr(selector, "average yield")
item['lowest_accepted_yield'] = get_table_attr(selector, "lowest accepted yield")
item['highest_accepted_yield'] = get_table_attr(selector, "highest yield")
item['accepted_at_lowest_perc'] = get_table_attr(selector, "accepted at lowest")
yield item
def parse_covered_bonds(self, response):
for selector in response.xpath("//table"):
item = CoveredBonds()
item['auction_type'] = 'coveredBonds'
item['auction_date'] = get_table_attr(selector, "auction date")
item['loan_number'] = get_table_attr(selector, "loan")
item['isin'] = get_table_attr(selector, "isin")
item['coupon'] = get_table_attr(selector, "coupon")
item['tendered_volume'] = get_table_attr(selector, "tendered")
item['volume_offered'] = get_table_attr(selector, "offered")
item['volume_bought'] = get_table_attr(selector, "bought")
item['number_of_bids'] = get_table_attr(response, "number of bids")
item['number_of_accepted_bids'] = get_table_attr(selector, "number of accepted bids")
item['average_yield'] = get_table_attr(selector, "average yield")
item['lowest_accepted_yield'] = get_table_attr(selector, "lowest accepted yield")
item['highest_accepted_yield'] = get_table_attr(selector, "highest yield")
item['accepted_at_lowest_perc'] = get_table_attr(selector, "accepted at lowest")
yield item
def parse_sek_lending(self, response):
for selector in response.xpath("//table"):
item = SekLending()
item['auction_type'] = 'sekLending'
item['auction_date'] = get_table_attr(selector, "auction date")
item['payment_date'] = get_table_attr(selector, "payment date")
item['maturity_date'] = get_table_attr(selector, "maturity date")
item['term'] = get_table_attr(selector, "term")
item['offered_volume'] = get_table_attr(selector, "offered volume")
item['total_bid_amount'] = get_table_attr(selector, "total bid amount")
item['number_of_bids'] = get_table_attr(selector, "number of bids")
item['allotment'] = get_table_attr(selector, "allotment")
item['interest_rate'] = get_table_attr(selector, "interest rate")
yield item
def parse_ftg_cert(self, response):
for selector in response.xpath("//table"):
item = CommercialPapers()
item['auction_type'] = 'commercialPapers'
item['auction_date'] = get_table_attr(selector, "auction date")
item['credit_rating_class'] = get_table_attr(selector, "credit rating class")
item['term'] = get_table_attr(selector, "term")
item['fixed_purchase_rate'] = get_table_attr(selector, "fixed purchase rate")
item['total_bid_amount'] = get_table_attr(selector, "total bid amount")
item['volume_bought'] = get_table_attr(selector, "accepted volume")
item['percentage_alloted'] = get_table_attr(selector, "percentage allotted")
item['number_of_bids'] = get_table_attr(selector, "number of bids")
yield item
def parse_ffl(self, response):
for selector in response.xpath("//table"):
item = FundingForLending()
item['auction_type'] = 'fundingForLending'
item['auction_date'] = get_table_attr(selector, "auction date")
item['settlement_date'] = get_table_attr(selector, "settlement date")
item['final_repayment_date'] = get_table_attr(selector, "final repayment date")
item['offered_volume'] = get_table_attr(selector, "offered volume")
item['total_bid_amount'] = get_table_attr(selector, "total bid amount")
item['number_of_bids'] = get_table_attr(selector, "number of bids")
item['allotment'] = get_table_attr(selector, "allotment")
item['interest_rate'] = get_table_attr(selector, "interest rate")
item['interest_rate_supplement'] = get_table_attr(selector, "interest rate supplement")
yield item
def parse_us_lending(self, response):
for selector in response.xpath("//table"):
item = UsLending()
item['auction_type'] = 'usLending'
item['auction_date'] = get_table_attr(selector, "auction date")
item['settlement_date'] = get_table_attr(selector, "settlement date")
item['maturity_date'] = get_table_attr(selector, "maturity date")
item['term'] = get_table_attr(selector, "term")
item['offered_volume'] = get_table_attr(selector, "offered volume")
item['marginal_interest_rate'] = get_table_attr(selector, "marginal interest rate")
item['allotment_at_marginal'] = get_table_attr(selector, "allotment at marginal")
item['total_bid_amount'] = get_table_attr(selector, "total bid amount")
item['number_of_bids'] = get_table_attr(selector, "number of bids")
item['allotment'] = get_table_attr(selector, "allotment") #if item['allotment] == none. hmm?
yield item
def parse_municipal(self, response):
for selector in response.xpath("//table"):
item = MunicipalBonds()
item['auction_type'] = 'municipalBonds'
item['auction_date'] = get_table_attr(selector, "auction date")
item['loan_number'] = get_table_attr(selector, "loan")
item['isin'] = get_table_attr(selector, "isin")
item['coupon'] = get_table_attr(selector, "coupon")
item['tendered_volume'] = get_table_attr(selector, "tendered")
item['volume_offered'] = get_table_attr(selector, "offered")
item['volume_bought'] = get_table_attr(selector, "bought")
item['number_of_bids'] = get_table_attr(selector, "number of bids")
item['number_of_accepted_bids'] = get_table_attr(selector, "number of accepted bids")
item['average_yield'] = get_table_attr(selector, "average yield")
item['lowest_accepted_yield'] = get_table_attr(selector, "lowest accepted yield")
item['highest_accepted_yield'] = get_table_attr(selector, "highest yield")
item['accepted_at_lowest_perc'] = get_table_attr(selector, "accepted at lowest")
yield item
#items
# -*- coding: utf-8 -*-
from scrapy import Item, Field
class GovernmentBonds(Item):
auction_type = Field()
auction_date = Field()
loan_number = Field()
isin = Field()
coupon = Field()
tendered_volume = Field()
volume_offered = Field()
volume_bought = Field()
number_of_bids = Field()
number_of_accepted_bids = Field()
average_yield = Field()
lowest_accepted_yield = Field()
highest_accepted_yield = Field()
accepted_at_lowest_perc = Field()
#auction_date = Field()
auction_type = Field()
class CoveredBonds(Item):
auction_type = Field()
auction_date = Field()
loan_number = Field()
isin = Field()
coupon = Field()
tendered_volume = Field()
volume_offered = Field()
volume_bought = Field()
number_of_bids = Field()
number_of_accepted_bids = Field()
average_yield = Field()
lowest_accepted_yield = Field()
highest_accepted_yield = Field()
accepted_at_lowest_perc = Field()
#auction_date = Field()
auction_type = Field()
class MunicipalBonds(Item):
auction_type = Field()
auction_date = Field()
loan_number = Field()
isin = Field()
coupon = Field()
tendered_volume = Field()
volume_offered = Field()
volume_bought = Field()
number_of_bids = Field()
number_of_accepted_bids = Field()
average_yield = Field()
highest_accepted_yield = Field()
lowest_accepted_yield = Field()
accepted_at_lowest_perc = Field()
#auction_date = Field()
auction_type = Field()
class SekLending(Item):
auction_type = Field()
auction_date = Field()
payment_date = Field()
maturity_date = Field()
term = Field()
offered_volume = Field()
total_bid_amount = Field()
number_of_bids = Field()
allotment = Field()
interest_rate = Field()
class CommercialPapers(Item):
auction_type = Field()
auction_date = Field()
credit_rating_class = Field()
term = Field()
fixed_purchase_rate = Field()
total_bid_amount = Field()
volume_bought = Field()
percentage_alloted = Field()
number_of_bids = Field()
class FundingForLending(Item):
auction_type = Field()
auction_date = Field()
settlement_date = Field()
final_repayment_date = Field()
offered_volume = Field()
total_bid_amount = Field()
number_of_bids = Field()
allotment = Field()
interest_rate = Field()
interest_rate_supplement = Field()
class UsLending(Item):
auction_type = Field()
auction_date = Field()
settlement_date = Field()
maturity_date = Field()
term = Field()
offered_volume = Field()
marginal_interest_rate = Field()
allotment_at_marginal = Field()
total_bid_amount = Field()
number_of_bids = Field()
allotment = Field()
#settings
BOT_NAME = 'RiksbankAuctionScraper'
SPIDER_MODULES = ['RiksbankAuctionScraper.spiders']
NEWSPIDER_MODULE = 'RiksbankAuctionScraper.spiders'
# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'ReportScraper (+http://www.yourdomain.com)'
# Obey robots.txt rules
ROBOTSTXT_OBEY = True
HTTPCACHE_ENABLED = True
DOWNLOAD_DELAY = 10.0
#crawl
#scrapy crawl RiksbankAuctionScraper -o output.json
###import it to R
library("rjson")
output <- fromJSON(file = "C:\\Users\\chris\\Documents\\Python\\Projects\\RiksbankAuctionScraper\\output.json")
output <- rbindlist(output, idcol = TRUE)
library(data.table)
df <- rbindlist(output, idcol = TRUE)
# Enable or disable extensions
# See https://docs.scrapy.org/en/latest/topics/extensions.html
#EXTENSIONS = {
# 'scrapy.extensions.telnet.TelnetConsole': None,
#}
AUTOTHROTTLE_ENABLED = True
FEED_EXPORT_ENCODING = 'utf-8'
6.2.6 BeautifulSoup
mport urllib.request, urllib.parse, urllib.error
from bs4 import BeautifulSoup
import ssl
# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
url = 'https://www.federalreserve.gov/monetarypolicy/fomccalendars.htm'
html = urllib.request.urlopen(url, context=ctx).read()
soup = BeautifulSoup(html, 'html.parser')
#text = soup.get_text() ##get all text
#print(text) ##print all text
#tables = soup.find_all("table") #find all tables
#print(tables)
#print(tables[1]["style"]) ##get the style element
#lists = soup.find_all("li") #find all list elements
#print(len(lists)) ##print number of list
links = soup.select("a[href*=minutes]")
#print(links)
for link in links:
print(link.get('href'))
6.3 Misc
6.3.1 Python in R Markdown
#```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(reticulate)
py_install("pandas") ##install packages
#https://rstudio.github.io/reticulate/articles/python_packages.html
#https://stackoverflow.com/questions/54261906/reticulate-importerror-no-module-named-pandas-in-rstudio-version-1-2