General fixes

Added color, config file, moved packages into ./modules
This commit is contained in:
Andrew Dinh 2019-03-18 10:26:07 -07:00
parent 6366453f63
commit 5d1f96c403
9 changed files with 1657 additions and 205 deletions

2
.gitignore vendored
View File

@ -3,4 +3,4 @@ test/
.vscode/ .vscode/
*.sqlite *.sqlite
README.html README.html
*stocks.txt *-stocks.txt

View File

@ -1,5 +1,8 @@
# Python file for general functions # Python file for general functions
import sys
sys.path.insert(0, './modules')
def getNearest(items, pivot): def getNearest(items, pivot):
return min(items, key=lambda x: abs(x - pivot)) return min(items, key=lambda x: abs(x - pivot))
@ -52,14 +55,18 @@ def strintIsFloat(s):
def fromCache(r): def fromCache(r):
import requests_cache import requests_cache
from termcolor import colored, cprint
if r.from_cache == True: if r.from_cache == True:
print('(Response taken from cache)') cprint('(Response taken from cache)', 'white', attrs=['dark'])
return return
def getJoke(): def getJoke():
import requests import requests
import sys
from termcolor import colored, cprint
import requests_cache import requests_cache
from halo import Halo
with requests_cache.disabled(): with requests_cache.disabled():
''' '''
f = requests.get('https://official-joke-api.appspot.com/jokes/random').json() f = requests.get('https://official-joke-api.appspot.com/jokes/random').json()
@ -69,9 +76,13 @@ def getJoke():
''' '''
headers = {'Accept': 'application/json', headers = {'Accept': 'application/json',
'User-Agent': 'fund-indicators (https://github.com/andrewkdinh/fund-indicators)'} 'User-Agent': 'fund-indicators (https://github.com/andrewkdinh/fund-indicators)'}
url = 'https://icanhazdadjoke.com'
cprint('Get: ' + url, 'white', attrs=['dark'])
with Halo(spinner='dots'):
f = requests.get('https://icanhazdadjoke.com/', headers=headers).json() f = requests.get('https://icanhazdadjoke.com/', headers=headers).json()
print('') print('')
print(f['joke']) print(colored(f['joke'], 'green'))
def hasNumbers(inputString): def hasNumbers(inputString):
@ -127,6 +138,50 @@ def fileExists(file):
import os.path import os.path
return os.path.exists(file) return os.path.exists(file)
def listIndexExists(i):
try:
i
return True
except IndexError:
return False
def removeOutliers(i):
import statistics
m = statistics.median(i)
firstQ = []
thirdQ = []
for x in i:
if x < m:
firstQ.append(x)
elif x > m:
thirdQ.append(x)
firstQm = statistics.median(firstQ)
thirdQm = statistics.median(thirdQ)
iqr = (thirdQm - firstQm) * 1.5
goodList = []
badList = []
for x in i:
if x < (thirdQm + iqr) and x > (firstQm - iqr):
goodList.append(x)
else:
badList.append(x) # In case I want to know. If not, then I just make it equal to returnlist[0]
returnList = [goodList, badList, firstQm, m, thirdQm, iqr]
return returnList
def validateJson(text):
import json
try:
json.loads(text)
return True
except ValueError:
return False
def keyInDict(dict, key):
if key in dict:
return True
else:
return False
def main(): def main():
exit() exit()

View File

@ -1,25 +1,45 @@
# Mutual Fund Indicators # fund-indicators
[![License](https://img.shields.io/github/license/andrewkdinh/fund-indicators.svg)](https://raw.githubusercontent.com/andrewkdinh/fund-indicators/master/LICENSE) [![License](https://img.shields.io/github/license/andrewkdinh/fund-indicators.svg)](https://raw.githubusercontent.com/andrewkdinh/fund-indicators/master/LICENSE)
![](https://img.shields.io/github/last-commit/andrewkdinh/fund-indicators.svg) [![](https://img.shields.io/github/last-commit/andrewkdinh/fund-indicators.svg)](https://github.com/andrewkdinh/fund-indicators/commits/master)
![](https://img.shields.io/github/languages/top/andrewkdinh/fund-indicators.svg) ![](https://img.shields.io/github/languages/top/andrewkdinh/fund-indicators.svg)
![](https://img.shields.io/github/languages/code-size/andrewkdinh/fund-indicators.svg) ![](https://img.shields.io/github/languages/code-size/andrewkdinh/fund-indicators.svg)
A project to determine indicators of overperforming mutual funds. A project to determine relationships between mutual funds and different factors.
Examine correlation between performance and market capitalization, persistence, turnover, and expense ratios. Calculates relationships between: Previous performance, Alpha, Sharpe Ratio, Sortino Ratio
## Prerequisites and Expense ratios, Turnover, Market Capitalization (Asset Size), Persistence
`$ pip install -r requirements.txt` Give it a try at [repl.run](https://fund-indicators.andrewkdinh.repl.run) or [repl.it](https://repl.it/@andrewkdinh/fund-indicators)
## Key Features
- 100% automated
- Uses multiple API's in case another fails
- Caches http requests for future runs
- Scrapes data from Yahoo Finance
- Color-coded for easy viewing
- Optional graphs to easily visualize linear regression results
- A new joke every time it runs
## Quickstart ## Quickstart
To begin, run ```shell
pip install -r requirements.txt
python main.py
```
`$ python main.py` Pre-chosen stocks listed in `stocks.txt`
Some ticker values to try: ## Credits
SPY, VFINX, VTHR, DJIA
This project uses a wide variety of open-source projects
- [NumPy](https://github.com/numpy/numpy), [Termcolor](https://github.com/hfeeki/termcolor), [Beautiful Soup](https://launchpad.net/beautifulsoup), [yahoofinancials](https://github.com/JECSand/yahoofinancials), [requests-cache](https://github.com/reclosedev/requests-cache), [halo](https://github.com/manrajgrover/halo)
And thank you to those that have helped me with the idea and product:
- Amber Bruce, [Alex Stoykov](http://stoykov.us/), Doug Achterman, [Stack Overflow](https://stackoverflow.com)
Created by Andrew Dinh from Dr. TJ Owens Gilroy Early College Academy Created by Andrew Dinh from Dr. TJ Owens Gilroy Early College Academy

63
config.example.json Normal file
View File

@ -0,0 +1,63 @@
{
"_comment": "Only use this if everything you know is correct",
"Config": {
"Check Packages": true,
"Check Python Version": true,
"Check Internet Connection": false,
"Get Joke": true,
"Benchmark": "SPY",
"Method": "Kiplinger",
"Time Frame": 60,
"Indicator": "Expense Ratio",
"Remove Outliers": true,
"Sources": [
"Alpha Vantage",
"Yahoo",
"IEX",
"Tiingo"
]
},
"Possible Values": {
"Check Packages": [
true,
false
],
"Check Python Version": [
true,
false
],
"Check Internet Connection": [
true,
false
],
"Get Joke": [
true,
false
],
"Benchmark": [
"SPY",
"DJIA",
"VTHR",
"EFG"
],
"Method": [
"Read",
"Manual",
"U.S. News",
"Kiplinger",
"TheStreet"
],
"Time Frame": "Any integer",
"Indicator": [
"Expense Ratio",
"Market Capitalization",
"Turnover",
"Persistence"
],
"Remove Outliers": [
true,
false
],
"Sources": "Choose an order out of ['Alpha Vantage', 'Yahoo', 'IEX', 'Tiingo']"
}
}

495
main.py
View File

@ -3,25 +3,33 @@
# Andrew Dinh # Andrew Dinh
# Python 3.6.7 # Python 3.6.7
# Required # PYTHON FILES
from bs4 import BeautifulSoup
import requests
import json
import datetime
import Functions import Functions
import numpy as np from yahoofinancials import YahooFinancials
import re from termcolor import cprint
# REQUIRED
import requests_cache
import os.path import os.path
import re
import datetime
import json
import requests
from bs4 import BeautifulSoup
import numpy as np
# Required for linear regression # OPTIONAL
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import sys from halo import Halo
# Optional # FOR ASYNC
from concurrent.futures import ThreadPoolExecutor as PoolExecutor from concurrent.futures import ThreadPoolExecutor as PoolExecutor
import time import time
import random import random
import requests_cache
import sys
sys.path.insert(0, './modules')
requests_cache.install_cache( requests_cache.install_cache(
'cache', backend='sqlite', expire_after=43200) # 12 hours 'cache', backend='sqlite', expire_after=43200) # 12 hours
@ -59,7 +67,6 @@ API Keys:
No: Tiingo No: Tiingo
''' '''
class Stock: class Stock:
# GLOBAL VARIABLES # GLOBAL VARIABLES
@ -67,6 +74,11 @@ class Stock:
riskFreeRate = 0 riskFreeRate = 0
indicator = '' indicator = ''
# CONFIG
removeOutliers = True
sourceList = ['Alpha Vantage', 'Yahoo', 'IEX', 'Tiingo']
config = 'N/A'
# BENCHMARK VALUES # BENCHMARK VALUES
benchmarkDates = [] benchmarkDates = []
benchmarkCloseValues = [] benchmarkCloseValues = []
@ -100,6 +112,7 @@ class Stock:
self.downsideDeviation = 0 self.downsideDeviation = 0
self.kurtosis = 0 self.kurtosis = 0
self.skewness = 0 # Not sure if I need this self.skewness = 0 # Not sure if I need this
self.correlation = 0
self.linearRegression = [] # for y=mx+b, this list has [m,b] self.linearRegression = [] # for y=mx+b, this list has [m,b]
self.indicatorValue = '' self.indicatorValue = ''
@ -117,17 +130,17 @@ class Stock:
return self.allCloseValues return self.allCloseValues
def IEX(self): def IEX(self):
print('IEX')
url = ''.join( url = ''.join(
('https://api.iextrading.com/1.0/stock/', self.name, '/chart/5y')) ('https://api.iextrading.com/1.0/stock/', self.name, '/chart/5y'))
# link = "https://api.iextrading.com/1.0/stock/spy/chart/5y" # link = "https://api.iextrading.com/1.0/stock/spy/chart/5y"
print("\nSending request to:", url) cprint("Get: " + url, 'white', attrs=['dark'])
with Halo(spinner='dots'):
f = requests.get(url) f = requests.get(url)
Functions.fromCache(f) Functions.fromCache(f)
json_data = f.text json_data = f.text
if json_data == 'Unknown symbol' or f.status_code != 200: if json_data == 'Unknown symbol' or f.status_code != 200:
print("IEX not available") print("IEX not available")
return 'Not available' return 'N/A'
loaded_json = json.loads(json_data) loaded_json = json.loads(json_data)
listIEX = [] listIEX = []
@ -141,7 +154,7 @@ class Stock:
listIEX.append(allDates) listIEX.append(allDates)
print(len(listIEX[0]), "dates") print(len(listIEX[0]), "dates")
print("\nFinding close values for each date") # print("\nFinding close values for each date")
values = [] values = []
for i in range(0, len(loaded_json), 1): # If you want to do oldest first for i in range(0, len(loaded_json), 1): # If you want to do oldest first
# for i in range(len(loaded_json)-1, -1, -1): # for i in range(len(loaded_json)-1, -1, -1):
@ -149,18 +162,18 @@ class Stock:
value = line['close'] value = line['close']
values.append(value) values.append(value)
listIEX.append(values) listIEX.append(values)
print(len(listIEX[1]), "close values")
print(len(listIEX[0]), 'dates and', len(listIEX[1]), "close values")
return listIEX return listIEX
def AV(self): def AV(self):
print('Alpha Vantage')
listAV = [] listAV = []
url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=', url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=',
self.name, '&outputsize=full&apikey=', apiAV)) self.name, '&outputsize=full&apikey=', apiAV))
# https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=MSFT&outputsize=full&apikey=demo # https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=MSFT&outputsize=full&apikey=demo
print("\nSending request to:", url) cprint("Get: " + url, 'white', attrs=['dark'])
with Halo(spinner='dots'):
f = requests.get(url) f = requests.get(url)
Functions.fromCache(f) Functions.fromCache(f)
json_data = f.text json_data = f.text
@ -168,14 +181,14 @@ class Stock:
if len(loaded_json) == 1 or f.status_code != 200 or len(loaded_json) == 0: if len(loaded_json) == 1 or f.status_code != 200 or len(loaded_json) == 0:
print("Alpha Vantage not available") print("Alpha Vantage not available")
return 'Not available' return 'N/A'
dailyTimeSeries = loaded_json['Time Series (Daily)'] dailyTimeSeries = loaded_json['Time Series (Daily)']
listOfDates = list(dailyTimeSeries) listOfDates = list(dailyTimeSeries)
# listAV.append(listOfDates) # listAV.append(listOfDates)
listAV.append(list(reversed(listOfDates))) listAV.append(list(reversed(listOfDates)))
print("\nFinding close values for each date") # print("\nFinding close values for each date")
values = [] values = []
for i in range(0, len(listOfDates), 1): for i in range(0, len(listOfDates), 1):
temp = listOfDates[i] temp = listOfDates[i]
@ -185,25 +198,25 @@ class Stock:
values.append(float(value)) values.append(float(value))
# listAV.append(values) # listAV.append(values)
listAV.append(list(reversed(values))) listAV.append(list(reversed(values)))
print(len(listAV[1]), "close values") print(len(listAV[0]), 'dates and', len(listAV[1]), "close values")
return listAV return listAV
def Tiingo(self): def Tiingo(self):
print('Tiingo')
token = ''.join(('Token ', apiTiingo)) token = ''.join(('Token ', apiTiingo))
headers = { headers = {
'Content-Type': 'application/json', 'Content-Type': 'application/json',
'Authorization': token 'Authorization': token
} }
url = ''.join(('https://api.tiingo.com/tiingo/daily/', self.name)) url = ''.join(('https://api.tiingo.com/tiingo/daily/', self.name))
print("\nSending request to:", url) cprint("Get: " + url, 'white', attrs=['dark'])
with Halo(spinner='dots'):
f = requests.get(url, headers=headers) f = requests.get(url, headers=headers)
Functions.fromCache(f) Functions.fromCache(f)
loaded_json = f.json() loaded_json = f.json()
if len(loaded_json) == 1 or f.status_code != 200 or loaded_json['startDate'] == None: if len(loaded_json) == 1 or f.status_code != 200 or loaded_json['startDate'] == None:
print("Tiingo not available") print("Tiingo not available")
return 'Not available' return 'N/A'
listTiingo = [] listTiingo = []
@ -218,7 +231,8 @@ class Stock:
url2 = ''.join((url, '/prices?startDate=', url2 = ''.join((url, '/prices?startDate=',
firstDate, '&endDate=', lastDate)) firstDate, '&endDate=', lastDate))
# https://api.tiingo.com/tiingo/daily/<ticker>/prices?startDate=2012-1-1&endDate=2016-1-1 # https://api.tiingo.com/tiingo/daily/<ticker>/prices?startDate=2012-1-1&endDate=2016-1-1
print("\nSending request to:", url2, '\n') cprint("\nGet: " + url2 + '\n', 'white', attrs=['dark'])
with Halo(spinner='dots'):
requestResponse2 = requests.get(url2, headers=headers) requestResponse2 = requests.get(url2, headers=headers)
Functions.fromCache(requestResponse2) Functions.fromCache(requestResponse2)
loaded_json2 = requestResponse2.json() loaded_json2 = requestResponse2.json()
@ -234,38 +248,86 @@ class Stock:
listTiingo.append(dates) listTiingo.append(dates)
print(len(listTiingo[0]), "dates") print(len(listTiingo[0]), "dates")
print("Finding close values for each date") # print("Finding close values for each date")
# Used loop from finding dates # Used loop from finding dates
listTiingo.append(values) listTiingo.append(values)
print(len(listTiingo[1]), "close values")
print(len(listTiingo[0]), 'dates and',
len(listTiingo[1]), "close values")
return listTiingo return listTiingo
def datesAndClose(self): def Yahoo(self):
print('\n', Stock.getName(self), sep='') url = ''.join(('https://finance.yahoo.com/quote/',
self.name, '?p=', self.name))
cprint('Get: ' + url, 'white', attrs=['dark'])
with Halo(spinner='dots'):
t = requests.get(url)
if t.history:
print('Yahoo Finance does not have data for', self.name)
print('Yahoo not available')
return 'N/A'
else:
print('Yahoo Finance has data for', self.name)
sourceList = ['AV', 'IEX', 'Tiingo'] ticker = self.name
# sourceList = ['IEX', 'Tiingo', 'AV'] firstDate = datetime.datetime.now().date(
) - datetime.timedelta(days=self.timeFrame*31) # 31 days as a buffer just in case
with Halo(spinner='dots'):
yahoo_financials = YahooFinancials(ticker)
r = yahoo_financials.get_historical_price_data(
str(firstDate), str(datetime.date.today()), 'daily')
s = r[self.name]['prices']
listOfDates = []
listOfCloseValues = []
for i in range(0, len(s), 1):
listOfDates.append(s[i]['formatted_date'])
listOfCloseValues.append(s[i]['close'])
listYahoo = [listOfDates, listOfCloseValues]
# Sometimes close value is a None value
i = 0
while i < len(listYahoo[1]):
if Functions.listIndexExists(listYahoo[1][i]) == True:
if listYahoo[1][i] == None:
del listYahoo[1][i]
del listYahoo[0][i]
i = i - 1
i = i + 1
else:
break
print(len(listYahoo[0]), 'dates and',
len(listYahoo[1]), "close values")
return listYahoo
def datesAndClose(self):
cprint('\n' + str(self.name), 'cyan')
sourceList = Stock.sourceList
# Use each source until you get a value # Use each source until you get a value
for j in range(0, len(sourceList), 1): for j in range(0, len(sourceList), 1):
source = sourceList[j] source = sourceList[j]
print('\nSource being used:', source) print('Source being used:', source)
if source == 'AV': if source == 'Alpha Vantage':
datesAndCloseList = Stock.AV(self) datesAndCloseList = Stock.AV(self)
elif source == 'Tiingo': elif source == 'Yahoo':
datesAndCloseList = Stock.Tiingo(self) datesAndCloseList = Stock.Yahoo(self)
elif source == 'IEX': elif source == 'IEX':
datesAndCloseList = Stock.IEX(self) datesAndCloseList = Stock.IEX(self)
elif source == 'Tiingo':
datesAndCloseList = Stock.Tiingo(self)
if datesAndCloseList != 'Not available': if datesAndCloseList != 'N/A':
break break
else: else:
if j == len(sourceList)-1: if j == len(sourceList)-1:
print('\nNo sources have data for', self.name) print('\nNo sources have data for', self.name)
print('Removing', self.name, print('Removing ' + self.name +
'from list of stocks to ensure compatibility later') ' from list of stocks to ensure compatibility later')
return 'Not available' return 'N/A'
print('')
# Convert dates to datetime # Convert dates to datetime
allDates = datesAndCloseList[0] allDates = datesAndCloseList[0]
@ -278,14 +340,14 @@ class Stock:
for i in datesAndCloseList[1]: for i in datesAndCloseList[1]:
if i == 0: if i == 0:
print('Found close value of 0. This is likely something like ticker RGN (Daily Time Series with Splits and Dividend Events)') print('Found close value of 0. This is likely something like ticker RGN (Daily Time Series with Splits and Dividend Events)')
print('Removing', self.name, print('Removing ' + self.name +
'from list of stocks to ensure compability later') 'from list of stocks to ensure compability later')
return 'Not available' return 'N/A'
return datesAndCloseList return datesAndCloseList
def datesAndCloseFitTimeFrame(self): def datesAndCloseFitTimeFrame(self):
print('Shortening list to fit time frame') print('\nShortening list to fit time frame')
# Have to do this because if I just make dates = self.allDates & closeValues = self.allCloseValues, then deleting from dates & closeValues also deletes it from self.allDates & self.allCloseValues (I'm not sure why) # Have to do this because if I just make dates = self.allDates & closeValues = self.allCloseValues, then deleting from dates & closeValues also deletes it from self.allDates & self.allCloseValues (I'm not sure why)
dates = [] dates = []
closeValues = [] closeValues = []
@ -295,7 +357,7 @@ class Stock:
firstDate = datetime.datetime.now().date() - datetime.timedelta( firstDate = datetime.datetime.now().date() - datetime.timedelta(
days=self.timeFrame*30) days=self.timeFrame*30)
print('\n', self.timeFrame, ' months ago: ', firstDate, sep='') print(self.timeFrame, ' months ago: ', firstDate, sep='')
closestDate = Functions.getNearest(dates, firstDate) closestDate = Functions.getNearest(dates, firstDate)
if closestDate != firstDate: if closestDate != firstDate:
print('Closest date available for', self.name, ':', closestDate) print('Closest date available for', self.name, ':', closestDate)
@ -315,9 +377,7 @@ class Stock:
datesAndCloseList2.append(dates) datesAndCloseList2.append(dates)
datesAndCloseList2.append(closeValues) datesAndCloseList2.append(closeValues)
print(len(dates), 'dates') print(len(dates), 'dates and', len(closeValues), 'close values')
print(len(closeValues), 'close values')
return datesAndCloseList2 return datesAndCloseList2
def calcAverageMonthlyReturn(self): # pylint: disable=E0202 def calcAverageMonthlyReturn(self): # pylint: disable=E0202
@ -345,7 +405,7 @@ class Stock:
if firstDate == secondDate: if firstDate == secondDate:
print('Closest date is', firstDate, print('Closest date is', firstDate,
'which is after the given time frame.') 'which is after the given time frame.')
return 'Not available' return 'N/A'
# Get corresponding close values and calculate monthly return # Get corresponding close values and calculate monthly return
for i in range(0, len(self.dates), 1): for i in range(0, len(self.dates), 1):
@ -499,34 +559,49 @@ class Stock:
def scrapeYahooFinance(self): def scrapeYahooFinance(self):
# Determine if ETF, Mutual fund, or stock # Determine if ETF, Mutual fund, or stock
print('Determining if Yahoo Finance has data for', self.name, end=": ")
url = ''.join(('https://finance.yahoo.com/quote/', url = ''.join(('https://finance.yahoo.com/quote/',
self.name, '?p=', self.name)) self.name, '?p=', self.name))
if requests.get(url).history: cprint('Get: ' + url, 'white', attrs=['dark'])
print('No') with Halo(spinner='dots'):
return 'Not available' t = requests.get(url)
Functions.fromCache(t)
if t.history:
print('Yahoo Finance does not have data for', self.name)
return 'N/A'
else: else:
print('Yes') print('Yahoo Finance has data for', self.name)
stockType = '' stockType = ''
url2 = ''.join(('https://finance.yahoo.com/lookup?s=', self.name)) url2 = ''.join(('https://finance.yahoo.com/lookup?s=', self.name))
print('Sending request to:', url2) cprint('Get: ' + url2, 'white', attrs=['dark'])
raw_html = requests.get(url2).text with Halo(spinner='dots'):
x = requests.get(url2)
raw_html = x.text
Functions.fromCache(x)
soup2 = BeautifulSoup(raw_html, 'html.parser') soup2 = BeautifulSoup(raw_html, 'html.parser')
# Type (Stock, ETF, Mutual Fund) # Type (Stock, ETF, Mutual Fund)
r = soup2.find_all( r = soup2.find_all(
'td', attrs={'class': 'data-col4 Ta(start) Pstart(20px) Miw(30px)'}) 'td', attrs={'class': 'data-col4 Ta(start) Pstart(20px) Miw(30px)'})
t = soup2.find_all('a', attrs={'class': 'Fw(b)'}) # Name and class u = soup2.find_all('a', attrs={'class': 'Fw(b)'}) # Name and class
z = soup2.find_all('td', attrs={ z = soup2.find_all('td', attrs={
'class': 'data-col1 Ta(start) Pstart(10px) Miw(80px)'}) # Name of stock 'class': 'data-col1 Ta(start) Pstart(10px) Miw(80px)'}) # Name of stock
listNames = [] listNames = []
for i in t: for i in u:
if i.text.strip() == i.text.strip().upper():
listNames.append(i.text.strip())
'''
if len(i.text.strip()) < 6: if len(i.text.strip()) < 6:
listNames.append(i.text.strip()) listNames.append(i.text.strip())
elif '.' in i.text.strip():
listNames.append(i.text.strip()) # Example: TSNAX (TSN.AX)
#! If having problems later, separate them by Industries (Mutual funds and ETF's are always N/A)
'''
for i in range(0, len(listNames), 1): for i in range(0, len(listNames), 1):
if listNames[i] == self.name: if listNames[i] == self.name:
break break
r = r[i].text.strip() r = r[i].text.strip()
z = z[i].text.strip() z = z[i].text.strip()
print('Name:', z) print('Name:', z)
@ -536,36 +611,32 @@ class Stock:
elif r == 'Stocks': elif r == 'Stocks':
stockType = 'Stock' stockType = 'Stock'
elif r == 'Mutual Fund': elif r == 'Mutual Fund':
stockType = 'Fund' stockType = 'Mutual Fund'
else: else:
print('Could not determine fund type') print('Could not determine fund type')
return 'Not available' return 'N/A'
print('Type:', stockType) print('Type:', stockType)
if Stock.indicator == 'Expense Ratio': if Stock.indicator == 'Expense Ratio':
if stockType == 'Stock': if stockType == 'Stock':
print( print(
self.name, 'is a stock, and therefore does not have an expense ratio') self.name, 'is a stock, and therefore does not have an expense ratio')
return 'Not available' return 'Stock'
url = ''.join(('https://finance.yahoo.com/quote/', raw_html = t.text
self.name, '?p=', self.name))
# https://finance.yahoo.com/quote/SPY?p=SPY
print('Sending request to:', url)
raw_html = requests.get(url).text
soup = BeautifulSoup(raw_html, 'html.parser') soup = BeautifulSoup(raw_html, 'html.parser')
r = soup.find_all('span', attrs={'class': 'Trsdu(0.3s)'}) r = soup.find_all('span', attrs={'class': 'Trsdu(0.3s)'})
if r == []: if r == []:
print('Something went wrong with scraping expense ratio') print('Something went wrong with scraping expense ratio')
return('Not available') return('N/A')
if stockType == 'ETF': if stockType == 'ETF':
for i in range(len(r)-1, 0, -1): for i in range(len(r)-1, 0, -1):
s = r[i].text.strip() s = r[i].text.strip()
if s[-1] == '%': if s[-1] == '%':
break break
elif stockType == 'Fund': elif stockType == 'Mutual Fund':
count = 0 # Second in set count = 0 # Second in set
for i in range(0, len(r)-1, 1): for i in range(0, len(r)-1, 1):
s = r[i].text.strip() s = r[i].text.strip()
@ -578,64 +649,78 @@ class Stock:
expenseRatio = float(s.replace('%', '')) expenseRatio = float(s.replace('%', ''))
else: else:
print('Something went wrong with scraping expense ratio') print('Something went wrong with scraping expense ratio')
return 'Not available' return 'N/A'
print(Stock.indicator + ': ', end='')
print(str(expenseRatio) + '%') print(str(expenseRatio) + '%')
return expenseRatio return expenseRatio
elif Stock.indicator == 'Market Capitalization': elif Stock.indicator == 'Market Capitalization':
url = ''.join(('https://finance.yahoo.com/quote/', somethingWrong = False
self.name, '?p=', self.name)) raw_html = t.text
# https://finance.yahoo.com/quote/GOOGL?p=GOOGL
raw_html = requests.get(url).text
soup = BeautifulSoup(raw_html, 'html.parser') soup = BeautifulSoup(raw_html, 'html.parser')
r = soup.find_all( r = soup.find_all(
'span', attrs={'class': 'Trsdu(0.3s)'}) 'span', attrs={'class': 'Trsdu(0.3s)'})
if r == []: if r == []:
print('Something went wrong with scraping market capitalization') somethingWrong = True
return 'Not available' else:
marketCap = 0 marketCap = 0
for t in r: for t in r:
s = t.text.strip() s = t.text.strip()
if s[-1] == 'B': if s[-1] == 'B':
print(Stock.indicator + ': ', end='')
print(s, end='') print(s, end='')
s = s.replace('B', '') s = s.replace('B', '')
marketCap = float(s) * 1000000000 # 1 billion marketCap = float(s) * 1000000000 # 1 billion
break break
elif s[-1] == 'M': elif s[-1] == 'M':
print(Stock.indicator + ': ', end='')
print(s, end='') print(s, end='')
s = s.replace('M', '') s = s.replace('M', '')
marketCap = float(s) * 1000000 # 1 million marketCap = float(s) * 1000000 # 1 million
break break
elif s[-1] == 'K': elif s[-1] == 'K':
print(Stock.indicator + ': ', end='')
print(s, end='') print(s, end='')
s = s.replace('K', '') s = s.replace('K', '')
marketCap = float(s) * 1000 # 1 thousand marketCap = float(s) * 1000 # 1 thousand
break break
if marketCap == 0: if marketCap == 0:
print('\nSomething went wrong with scraping market capitalization') somethingWrong = True
return 'Not available' if somethingWrong == True:
ticker = self.name
yahoo_financials = YahooFinancials(ticker)
marketCap = yahoo_financials.get_market_cap()
if marketCap != None:
print('(Taken from yahoofinancials)')
print(marketCap)
return int(marketCap)
else:
print(
'Was not able to scrape or get market capitalization from yahoo finance')
return 'N/A'
marketCap = int(marketCap) marketCap = int(marketCap)
return marketCap
print(' =', marketCap) print(' =', marketCap)
marketCap = marketCap / 1000000
print(
'Dividing marketCap by 1 million (to work with linear regression module):', marketCap)
return marketCap return marketCap
elif Stock.indicator == 'Turnover': elif Stock.indicator == 'Turnover':
if stockType == 'Stock': if stockType == 'Stock':
print(self.name, 'is a stock, and therefore does not have turnover') print(self.name, 'is a stock, and therefore does not have turnover')
return 'Not available' return 'Stock'
if stockType == 'Fund': if stockType == 'Mutual Fund':
url = ''.join(('https://finance.yahoo.com/quote/', raw_html = t.text
self.name, '?p=', self.name))
# https://finance.yahoo.com/quote/SPY?p=SPY
print('Sending request to', url)
raw_html = requests.get(url).text
soup = BeautifulSoup(raw_html, 'html.parser') soup = BeautifulSoup(raw_html, 'html.parser')
r = soup.find_all( r = soup.find_all(
'span', attrs={'class': 'Trsdu(0.3s)'}) 'span', attrs={'class': 'Trsdu(0.3s)'})
if r == []: if r == []:
print('Something went wrong without scraping turnover') print('Something went wrong without scraping turnover')
return 'Not available' return 'N/A'
turnover = 0 turnover = 0
for i in range(len(r)-1, 0, -1): for i in range(len(r)-1, 0, -1):
s = r[i].text.strip() s = r[i].text.strip()
@ -646,7 +731,8 @@ class Stock:
url = ''.join(('https://finance.yahoo.com/quote/', url = ''.join(('https://finance.yahoo.com/quote/',
self.name, '/profile?p=', self.name)) self.name, '/profile?p=', self.name))
# https://finance.yahoo.com/quote/SPY/profile?p=SPY # https://finance.yahoo.com/quote/SPY/profile?p=SPY
print('Sending request to', url) cprint('Get: ' + url, 'white', attrs=['dark'])
with Halo(spinner='dots'):
raw_html = requests.get(url).text raw_html = requests.get(url).text
soup = BeautifulSoup(raw_html, 'html.parser') soup = BeautifulSoup(raw_html, 'html.parser')
@ -654,17 +740,21 @@ class Stock:
'span', attrs={'class': 'W(20%) D(b) Fl(start) Ta(e)'}) 'span', attrs={'class': 'W(20%) D(b) Fl(start) Ta(e)'})
if r == []: if r == []:
print('Something went wrong without scraping turnover') print('Something went wrong without scraping turnover')
return 'Not available' return 'N/A'
turnover = 0 turnover = 0
for i in range(len(r)-1, 0, -1): for i in range(len(r)-1, 0, -1):
s = r[i].text.strip() s = r[i].text.strip()
if s[-1] == '%': if s[-1] == '%':
turnover = float(s.replace('%', '')) turnover = float(s.replace('%', ''))
break break
elif s == 'N/A':
print(self.name, 'has a value of N/A for turnover')
return 'N/A'
if turnover == 0: if turnover == 0:
print('Something went wrong with scraping turnover') print('Something went wrong with scraping turnover')
return 'Not available' return 'N/A'
print(Stock.indicator + ': ', end='')
print(str(turnover) + '%') print(str(turnover) + '%')
return turnover return turnover
@ -684,7 +774,9 @@ class Stock:
indicatorValue = str( indicatorValue = str(
input(Stock.indicator + ' of ' + self.name + ': ')) input(Stock.indicator + ' of ' + self.name + ': '))
else: else:
print('Something is wrong. Indicator was not found. Ending program.') # print('Something is wrong. Indicator was not found. Ending program.')
cprint(
'Something is wrong. Indicator was not found. Ending program.', 'white', 'on_red')
exit() exit()
if Functions.strintIsFloat(indicatorValue) == True: if Functions.strintIsFloat(indicatorValue) == True:
@ -698,7 +790,7 @@ class Stock:
0, Stock.persTimeFrame, 1))) / Stock.persTimeFrame 0, Stock.persTimeFrame, 1))) / Stock.persTimeFrame
persistenceSecond = self.averageMonthlyReturn persistenceSecond = self.averageMonthlyReturn
persistence = persistenceSecond-persistenceFirst persistence = persistenceSecond-persistenceFirst
print('Change in average monthly return:', persistence) print('Change (difference) in average monthly return:', persistence)
return persistence return persistence
@ -765,6 +857,15 @@ def stocksInit():
method = 0 method = 0
methods = ['Read from a file', 'Enter manually', methods = ['Read from a file', 'Enter manually',
'U.S. News popular funds (~35)', 'Kiplinger top-performing funds (50)', 'TheStreet top-rated mutual funds (20)'] 'U.S. News popular funds (~35)', 'Kiplinger top-performing funds (50)', 'TheStreet top-rated mutual funds (20)']
if Stock.config != 'N/A':
methodsConfig = ['Read', 'Manual',
'U.S. News', 'Kiplinger', 'TheStreet']
for i in range(0, len(methodsConfig), 1):
if Stock.config['Method'] == methodsConfig[i]:
method = i + 1
else:
for i in range(0, len(methods), 1): for i in range(0, len(methods), 1):
print(str(i+1) + '. ' + methods[i]) print(str(i+1) + '. ' + methods[i])
while method == 0 or method > len(methods): while method == 0 or method > len(methods):
@ -776,13 +877,13 @@ def stocksInit():
else: else:
method = 0 method = 0
print('Please choose a number') print('Please choose a number')
print('')
print('')
if method == 1: if method == 1:
defaultFiles = ['.gitignore', 'LICENSE', 'main.py', 'Functions.py', defaultFiles = ['.gitignore', 'LICENSE', 'main.py', 'Functions.py',
'README.md', 'requirements.txt', 'cache.sqlite', '_test_runner.py'] # Added by repl.it for whatever reason 'README.md', 'requirements.txt', 'cache.sqlite', 'yahoofinancials.py', 'termcolor.py', 'README.html', 'config.json', '_test_runner.py'] # Added by repl.it for whatever reason
stocksFound = False stocksFound = False
print('Files in current directory (not including default files): ') print('\nFiles in current directory (not including default files): ')
listOfFilesTemp = [f for f in os.listdir() if os.path.isfile(f)] listOfFilesTemp = [f for f in os.listdir() if os.path.isfile(f)]
listOfFiles = [] listOfFiles = []
for files in listOfFilesTemp: for files in listOfFilesTemp:
@ -851,7 +952,8 @@ def stocksInit():
url = 'https://money.usnews.com/funds/mutual-funds/most-popular' url = 'https://money.usnews.com/funds/mutual-funds/most-popular'
headers = { headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'} 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
print('Sending request to', url) cprint('Get: ' + url, 'white', attrs=['dark'])
with Halo(spinner='dots'):
f = requests.get(url, headers=headers) f = requests.get(url, headers=headers)
Functions.fromCache(f) Functions.fromCache(f)
raw_html = f.text raw_html = f.text
@ -878,7 +980,8 @@ def stocksInit():
url = 'https://www.kiplinger.com/tool/investing/T041-S001-top-performing-mutual-funds/index.php' url = 'https://www.kiplinger.com/tool/investing/T041-S001-top-performing-mutual-funds/index.php'
headers = { headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'} 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'}
print('Sending request to', url) cprint('Get: ' + url, 'white', attrs=['dark'])
with Halo(spinner='dots'):
f = requests.get(url, headers=headers) f = requests.get(url, headers=headers)
Functions.fromCache(f) Functions.fromCache(f)
raw_html = f.text raw_html = f.text
@ -904,7 +1007,8 @@ def stocksInit():
url = 'https://www.thestreet.com/topic/21421/top-rated-mutual-funds.html' url = 'https://www.thestreet.com/topic/21421/top-rated-mutual-funds.html'
headers = { headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'} 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'}
print('Sending request to', url) cprint('Get: ' + url, 'white', attrs=['dark'])
with Halo(spinner='dots'):
f = requests.get(url, headers=headers) f = requests.get(url, headers=headers)
Functions.fromCache(f) Functions.fromCache(f)
raw_html = f.text raw_html = f.text
@ -977,7 +1081,7 @@ def asyncData(benchmark, listOfStocks):
def sendAsync(url): def sendAsync(url):
time.sleep(random.randrange(0, 2)) time.sleep(random.randrange(0, 2))
print('Sending request to', url) cprint('Get: ' + url, 'white', attrs=['dark'])
requests.get(url) requests.get(url)
return return
@ -990,7 +1094,7 @@ def timeFrameInit():
temp = input(' ') temp = input(' ')
isInteger = Functions.stringIsInt(temp) isInteger = Functions.stringIsInt(temp)
if isInteger == True: if isInteger == True:
if int(temp) > 1: if int(temp) > 1 and int(temp) < 1000:
months = int(temp) months = int(temp)
else: else:
print('Please enter a number greater than 1') print('Please enter a number greater than 1')
@ -1003,15 +1107,15 @@ def timeFrameInit():
def dataMain(listOfStocks): def dataMain(listOfStocks):
print('\nGathering dates and close values')
i = 0 i = 0
while i < len(listOfStocks): while i < len(listOfStocks):
datesAndCloseList = Stock.datesAndClose(listOfStocks[i]) datesAndCloseList = Stock.datesAndClose(listOfStocks[i])
if datesAndCloseList == 'Not available': if datesAndCloseList == 'N/A':
del listOfStocks[i] del listOfStocks[i]
if len(listOfStocks) == 0: if len(listOfStocks) == 0:
print('No stocks to analyze. Ending program') # print('No stocks to analyze. Ending program')
cprint('No stocks to analyze. Ending program', 'white', 'on_red')
exit() exit()
else: else:
listOfStocks[i].allDates = datesAndCloseList[0] listOfStocks[i].allDates = datesAndCloseList[0]
@ -1032,7 +1136,8 @@ def riskFreeRate():
('https://www.quandl.com/api/v3/datasets/USTREASURY/LONGTERMRATES.json?api_key=', apiQuandl)) ('https://www.quandl.com/api/v3/datasets/USTREASURY/LONGTERMRATES.json?api_key=', apiQuandl))
# https://www.quandl.com/api/v3/datasets/USTREASURY/LONGTERMRATES.json?api_key=KUh3U3hxke9tCimjhWEF # https://www.quandl.com/api/v3/datasets/USTREASURY/LONGTERMRATES.json?api_key=KUh3U3hxke9tCimjhWEF
print("\nSending request to:", url) cprint('\nGet: ' + url, 'white', attrs=['dark'])
with Halo(spinner='dots'):
f = requests.get(url) f = requests.get(url)
Functions.fromCache(f) Functions.fromCache(f)
json_data = f.text json_data = f.text
@ -1043,7 +1148,7 @@ def riskFreeRate():
print('Risk-free rate:', riskFreeRate, end='\n\n') print('Risk-free rate:', riskFreeRate, end='\n\n')
if f.status_code != 200: if f.status_code != 200:
print("Quandl not available") print('Quandl not available')
print('Returning 2.50 as risk-free rate', end='\n\n') print('Returning 2.50 as risk-free rate', end='\n\n')
# return 0.0250 # return 0.0250
return 2.50 return 2.50
@ -1052,13 +1157,14 @@ def riskFreeRate():
def returnMain(benchmark, listOfStocks): def returnMain(benchmark, listOfStocks):
print('\nCalculating unadjusted return, Sharpe ratio, Sortino ratio, and Treynor ratio\n') cprint('\nCalculating return statistics\n', 'white', attrs=['underline'])
print('Getting risk-free rate from current 10-year treasury bill rates', end='\n\n') print('Getting risk-free rate from current 10-year treasury bill rates', end='\n\n')
Stock.riskFreeRate = riskFreeRate() Stock.riskFreeRate = riskFreeRate()
print(benchmark.name, end='\n\n') cprint(benchmark.name, 'cyan')
benchmark.monthlyReturn = Stock.calcMonthlyReturn(benchmark) benchmark.monthlyReturn = Stock.calcMonthlyReturn(benchmark)
if benchmark.monthlyReturn == 'Not available': if benchmark.monthlyReturn == 'N/A':
print('Please use a lower time frame\nEnding program') # print('Please use a lower time frame\nEnding program')
cprint('Please use a lower time frame. Ending program', 'white', 'on_red')
exit() exit()
benchmark.averageMonthlyReturn = Stock.calcAverageMonthlyReturn(benchmark) benchmark.averageMonthlyReturn = Stock.calcAverageMonthlyReturn(benchmark)
benchmark.standardDeviation = Stock.calcStandardDeviation(benchmark) benchmark.standardDeviation = Stock.calcStandardDeviation(benchmark)
@ -1071,7 +1177,7 @@ def returnMain(benchmark, listOfStocks):
i = 0 i = 0
while i < len(listOfStocks): while i < len(listOfStocks):
print('\n' + listOfStocks[i].name, end='\n\n') cprint('\n' + listOfStocks[i].name, 'cyan')
# Make sure each date has a value for both the benchmark and the stock # Make sure each date has a value for both the benchmark and the stock
list1 = [] list1 = []
@ -1088,11 +1194,13 @@ def returnMain(benchmark, listOfStocks):
# Calculate everything for each stock # Calculate everything for each stock
listOfStocks[i].monthlyReturn = Stock.calcMonthlyReturn( listOfStocks[i].monthlyReturn = Stock.calcMonthlyReturn(
listOfStocks[i]) listOfStocks[i])
if listOfStocks[i].monthlyReturn == 'Not available': if listOfStocks[i].monthlyReturn == 'N/A':
print('Removing', listOfStocks[i].name, 'from list of stocks') print('Removing ' + listOfStocks[i].name + ' from list of stocks')
del listOfStocks[i] del listOfStocks[i]
if len(listOfStocks) == 0: if len(listOfStocks) == 0:
print('No stocks fit time frame. Ending program') print('No stocks fit time frame. Ending program')
cprint('No stocks fit time frame. Ending program',
'white', 'on_red')
exit() exit()
else: else:
listOfStocks[i].averageMonthlyReturn = Stock.calcAverageMonthlyReturn( listOfStocks[i].averageMonthlyReturn = Stock.calcAverageMonthlyReturn(
@ -1117,13 +1225,36 @@ def returnMain(benchmark, listOfStocks):
i += 1 i += 1
print('\nNumber of stocks from original list that fit time frame:', cprint('\nNumber of stocks from original list that fit time frame: ' +
len(listOfStocks)) str(len(listOfStocks)), 'green')
if len(listOfStocks) < 2: if len(listOfStocks) < 2:
print('Cannot proceed to the next step. Exiting program.') #print('Cannot proceed to the next step. Exiting program.')
cprint('Cannot proceed to the next step. Exiting program.',
'white', 'on_red')
exit() exit()
def outlierChoice():
print('\nWould you like to remove indicator outliers?')
print('1. Yes\n2. No')
found = False
while found == False:
outlierChoice = str(input('Choice: '))
if Functions.stringIsInt(outlierChoice):
if int(outlierChoice) == 1:
return True
elif int(outlierChoice) == 2:
return False
else:
print('Please enter 1 or 2')
elif outlierChoice.lower() == 'yes':
return True
elif outlierChoice.lower() == 'no':
return False
else:
print('Not valid. Please enter a number or yes or no.')
def indicatorInit(): def indicatorInit():
# Runs correlation or regression study # Runs correlation or regression study
indicatorFound = False indicatorFound = False
@ -1220,6 +1351,8 @@ def plot_regression_line(x, y, b, i):
plt.xlabel(Stock.indicator + ' (%)') plt.xlabel(Stock.indicator + ' (%)')
elif Stock.indicator == 'Persistence': elif Stock.indicator == 'Persistence':
plt.xlabel(Stock.indicator + ' (Difference in average monthly return)') plt.xlabel(Stock.indicator + ' (Difference in average monthly return)')
elif Stock.indicator == 'Market Capitalization':
plt.xlabel(Stock.indicator + ' (millions)')
else: else:
plt.xlabel(Stock.indicator) plt.xlabel(Stock.indicator)
@ -1266,25 +1399,61 @@ def persistenceTimeFrame():
def indicatorMain(listOfStocks): def indicatorMain(listOfStocks):
print('\n' + str(Stock.indicator) + '\n') cprint('\n' + str(Stock.indicator) + '\n', 'white', attrs=['underline'])
listOfStocksIndicatorValues = [] listOfStocksIndicatorValues = []
for i in range(0, len(listOfStocks), 1): for i in range(0, len(listOfStocks), 1):
print(listOfStocks[i].name) cprint(listOfStocks[i].name, 'cyan')
if Stock.indicator != 'Persistence': if Stock.indicator == 'Persistence':
listOfStocks[i].indicatorValue = Stock.scrapeYahooFinance( listOfStocks[i].indicatorValue = Stock.calcPersistence(
listOfStocks[i]) listOfStocks[i])
else: else:
listOfStocks[i].indicatorValue = Stock.calcPersistence( listOfStocks[i].indicatorValue = Stock.scrapeYahooFinance(
listOfStocks[i]) listOfStocks[i])
print('') print('')
if listOfStocks[i].indicatorValue == 'Not available': if listOfStocks[i].indicatorValue == 'N/A':
listOfStocks[i].indicatorValue = Stock.indicatorManual( listOfStocks[i].indicatorValue = Stock.indicatorManual(
listOfStocks[i]) listOfStocks[i])
elif listOfStocks[i].indicatorValue == 'Stock':
print('Removing ' + listOfStocks[i].name + ' from list of stocks')
del listOfStocks[i]
if len(listOfStocks) < 2:
# print('Not able to go to the next step. Ending program')
cprint('Not able to go to the next step. Ending program',
'white', 'on_red')
exit()
listOfStocksIndicatorValues.append(listOfStocks[i].indicatorValue) listOfStocksIndicatorValues.append(listOfStocks[i].indicatorValue)
# Remove outliers
if Stock.removeOutliers == True:
cprint('\nRemoving outliers\n', 'white', attrs=['underline'])
temp = Functions.removeOutliers(listOfStocksIndicatorValues)
if temp[0] == listOfStocksIndicatorValues:
print('No outliers\n')
else:
print('First quartile:', temp[2], ', Median:', temp[3],
', Third quartile:', temp[4], 'Interquartile range:', temp[5])
# print('Original list:', listOfStocksIndicatorValues)
listOfStocksIndicatorValues = temp[0]
i = 0
while i < len(listOfStocks)-1:
for j in temp[1]:
if listOfStocks[i].indicatorValue == j:
print('Removing', listOfStocks[i].name, 'because it has a',
Stock.indicator.lower(), 'value of', listOfStocks[i].indicatorValue)
del listOfStocks[i]
i = i - 1
break
i += 1
# print('New list:', listOfStocksIndicatorValues, '\n')
print('')
# Calculate data
cprint('Calculating correlation and linear regression\n',
'white', attrs=['underline'])
listOfReturns = [] # A list that matches the above list with return values [[averageMonthlyReturn1, aAR2, aAR3], [sharpe1, sharpe2, sharpe3], etc.] listOfReturns = [] # A list that matches the above list with return values [[averageMonthlyReturn1, aAR2, aAR3], [sharpe1, sharpe2, sharpe3], etc.]
tempListOfReturns = [] tempListOfReturns = []
for i in range(0, len(listOfStocks), 1): for i in range(0, len(listOfStocks), 1):
@ -1318,7 +1487,7 @@ def indicatorMain(listOfStocks):
listOfReturnStrings = ['Average Monthly Return', listOfReturnStrings = ['Average Monthly Return',
'Sharpe Ratio', 'Sortino Ratio', 'Treynor Ratio', 'Alpha'] 'Sharpe Ratio', 'Sortino Ratio', 'Treynor Ratio', 'Alpha']
for i in range(0, len(Stock.indicatorCorrelation), 1): for i in range(0, len(Stock.indicatorCorrelation), 1):
print('Correlation with ' + Stock.indicator.lower() + ' and ' + print('Correlation for ' + Stock.indicator.lower() + ' and ' +
listOfReturnStrings[i].lower() + ': ' + str(Stock.indicatorCorrelation[i])) listOfReturnStrings[i].lower() + ': ' + str(Stock.indicatorCorrelation[i]))
Stock.indicatorRegression = calcIndicatorRegression( Stock.indicatorRegression = calcIndicatorRegression(
@ -1331,10 +1500,29 @@ def indicatorMain(listOfStocks):
listOfReturnStrings[i].lower() + ': ' + formula) listOfReturnStrings[i].lower() + ': ' + formula)
def checkConfig(fileName):
if Functions.fileExists(fileName) == False:
return 'N/A'
file = open(fileName, 'r')
n = file.read()
file.close()
if Functions.validateJson(n) == False:
print('Config file is not valid')
return 'N/A'
t = json.loads(n)
r = t['Config']
return r
def main(): def main():
# Check config file for errors and if not, then use values
#! Only use this if you know it is exactly correct. I haven't spent much time debugging this
Stock.config = checkConfig('config.json')
# Check that all required packages are installed # Check that all required packages are installed
if Stock.config == 'N/A':
packagesInstalled = Functions.checkPackages( packagesInstalled = Functions.checkPackages(
['numpy', 'requests', 'bs4', 'requests_cache']) ['numpy', 'requests', 'bs4', 'requests_cache', 'halo'])
if not packagesInstalled: if not packagesInstalled:
exit() exit()
else: else:
@ -1343,18 +1531,15 @@ def main():
# Check python version is above 3.3 # Check python version is above 3.3
pythonVersionGood = Functions.checkPythonVersion() pythonVersionGood = Functions.checkPythonVersion()
if not pythonVersionGood: if not pythonVersionGood:
return exit()
# Test internet connection # Test internet connection
internetConnection = Functions.isConnected() internetConnection = Functions.isConnected()
if not internetConnection: if not internetConnection:
return exit()
else: else:
Functions.getJoke() Functions.getJoke()
# Functions.getJoke()
# Choose benchmark and makes it class Stock # Choose benchmark and makes it class Stock
benchmark = benchmarkInit() benchmark = benchmarkInit()
# Add it to a list to work with other functions # Add it to a list to work with other functions
@ -1373,10 +1558,67 @@ def main():
if Stock.indicator == 'Persistence': if Stock.indicator == 'Persistence':
Stock.persTimeFrame = persistenceTimeFrame() Stock.persTimeFrame = persistenceTimeFrame()
# Choose whether to remove outliers or not
Stock.removeOutliers = outlierChoice()
else:
if Stock.config['Check Packages'] != False:
packagesInstalled = Functions.checkPackages(
['numpy', 'requests', 'bs4', 'requests_cache', 'halo'])
if not packagesInstalled:
exit()
else:
print('All required packages are installed')
if Stock.config['Check Python Version'] != False:
pythonVersionGood = Functions.checkPythonVersion()
if not pythonVersionGood:
exit()
if Stock.config['Check Internet Connection'] != False:
internetConnection = Functions.isConnected()
if not internetConnection:
exit()
if Stock.config['Get Joke'] != False:
Functions.getJoke()
benchmarksTicker = ['SPY', 'DJIA', 'VTHR', 'EFT']
if Stock.config['Benchmark'] in benchmarksTicker:
benchmark = Stock()
benchmark.setName(str(Stock.config['Benchmark']))
benchmarkAsList = [benchmark]
else:
benchmark = benchmarkInit()
benchmarkAsList = [benchmark]
listOfStocks = stocksInit()
if int(Stock.config['Time Frame']) >= 2:
timeFrame = int(Stock.config['Time Frame'])
else:
timeFrame = timeFrameInit()
Stock.timeFrame = timeFrame # Needs to be a global variable for all stocks
indicators = ['Expense Ratio',
'Market Capitalization', 'Turnover', 'Persistence']
if Stock.config['Indicator'] in indicators:
Stock.indicator = Stock.config['Indicator']
else:
Stock.indicator = indicatorInit()
if Stock.indicator == 'Persistence':
Stock.persTimeFrame = persistenceTimeFrame()
# Choose whether to remove outliers or not
if Stock.config['Remove Outliers'] != False:
Stock.removeOutliers = True
else:
Stock.removeOutliers = outlierChoice()
# Send async request to AV for listOfStocks and benchmark # Send async request to AV for listOfStocks and benchmark
asyncData(benchmark, listOfStocks) # asyncData(benchmark, listOfStocks)
# Gather data for benchmark and stock(s) # Gather data for benchmark and stock(s)
cprint('\nGathering data', 'white', attrs=['underline'])
dataMain(benchmarkAsList) dataMain(benchmarkAsList)
dataMain(listOfStocks) dataMain(listOfStocks)
@ -1386,6 +1628,7 @@ def main():
# Choose indicator and calculate correlation with indicator # Choose indicator and calculate correlation with indicator
indicatorMain(listOfStocks) indicatorMain(listOfStocks)
print('')
exit() exit()

168
modules/termcolor.py Normal file
View File

@ -0,0 +1,168 @@
# coding: utf-8
# Copyright (c) 2008-2011 Volvox Development Team
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# Author: Konstantin Lepa <konstantin.lepa@gmail.com>
"""ANSII Color formatting for output in terminal."""
from __future__ import print_function
import os
__ALL__ = [ 'colored', 'cprint' ]
VERSION = (1, 1, 0)
ATTRIBUTES = dict(
list(zip([
'bold',
'dark',
'',
'underline',
'blink',
'',
'reverse',
'concealed'
],
list(range(1, 9))
))
)
del ATTRIBUTES['']
HIGHLIGHTS = dict(
list(zip([
'on_grey',
'on_red',
'on_green',
'on_yellow',
'on_blue',
'on_magenta',
'on_cyan',
'on_white'
],
list(range(40, 48))
))
)
COLORS = dict(
list(zip([
'grey',
'red',
'green',
'yellow',
'blue',
'magenta',
'cyan',
'white',
],
list(range(30, 38))
))
)
RESET = '\033[0m'
def colored(text, color=None, on_color=None, attrs=None):
"""Colorize text.
Available text colors:
red, green, yellow, blue, magenta, cyan, white.
Available text highlights:
on_red, on_green, on_yellow, on_blue, on_magenta, on_cyan, on_white.
Available attributes:
bold, dark, underline, blink, reverse, concealed.
Example:
colored('Hello, World!', 'red', 'on_grey', ['blue', 'blink'])
colored('Hello, World!', 'green')
"""
if os.getenv('ANSI_COLORS_DISABLED') is None:
fmt_str = '\033[%dm%s'
if color is not None:
text = fmt_str % (COLORS[color], text)
if on_color is not None:
text = fmt_str % (HIGHLIGHTS[on_color], text)
if attrs is not None:
for attr in attrs:
text = fmt_str % (ATTRIBUTES[attr], text)
text += RESET
return text
def cprint(text, color=None, on_color=None, attrs=None, **kwargs):
"""Print colorize text.
It accepts arguments of print function.
"""
print((colored(text, color, on_color, attrs)), **kwargs)
if __name__ == '__main__':
print('Current terminal type: %s' % os.getenv('TERM'))
print('Test basic colors:')
cprint('Grey color', 'grey')
cprint('Red color', 'red')
cprint('Green color', 'green')
cprint('Yellow color', 'yellow')
cprint('Blue color', 'blue')
cprint('Magenta color', 'magenta')
cprint('Cyan color', 'cyan')
cprint('White color', 'white')
print(('-' * 78))
print('Test highlights:')
cprint('On grey color', on_color='on_grey')
cprint('On red color', on_color='on_red')
cprint('On green color', on_color='on_green')
cprint('On yellow color', on_color='on_yellow')
cprint('On blue color', on_color='on_blue')
cprint('On magenta color', on_color='on_magenta')
cprint('On cyan color', on_color='on_cyan')
cprint('On white color', color='grey', on_color='on_white')
print('-' * 78)
print('Test attributes:')
cprint('Bold grey color', 'grey', attrs=['bold'])
cprint('Dark red color', 'red', attrs=['dark'])
cprint('Underline green color', 'green', attrs=['underline'])
cprint('Blink yellow color', 'yellow', attrs=['blink'])
cprint('Reversed blue color', 'blue', attrs=['reverse'])
cprint('Concealed Magenta color', 'magenta', attrs=['concealed'])
cprint('Bold underline reverse cyan color', 'cyan',
attrs=['bold', 'underline', 'reverse'])
cprint('Dark blink concealed white color', 'white',
attrs=['dark', 'blink', 'concealed'])
print(('-' * 78))
print('Test mixing:')
cprint('Underline red on grey color', 'red', 'on_grey',
['underline'])
cprint('Reversed green on red color', 'green', 'on_red', ['reverse'])

891
modules/yahoofinancials.py Normal file
View File

@ -0,0 +1,891 @@
"""
==============================
The Yahoo Financials Module
Version: 1.5
==============================
Author: Connor Sanders
Email: sandersconnor1@gmail.com
Version Released: 01/27/2019
Tested on Python 2.7, 3.3, 3.4, 3.5, 3.6, and 3.7
Copyright (c) 2019 Connor Sanders
MIT License
List of Included Functions:
1) get_financial_stmts(frequency, statement_type, reformat=True)
- frequency can be either 'annual' or 'quarterly'.
- statement_type can be 'income', 'balance', 'cash'.
- reformat optional value defaulted to true. Enter False for unprocessed raw data from Yahoo Finance.
2) get_stock_price_data(reformat=True)
- reformat optional value defaulted to true. Enter False for unprocessed raw data from Yahoo Finance.
3) get_stock_earnings_data(reformat=True)
- reformat optional value defaulted to true. Enter False for unprocessed raw data from Yahoo Finance.
4) get_summary_data(reformat=True)
- reformat optional value defaulted to true. Enter False for unprocessed raw data from Yahoo Finance.
5) get_stock_quote_type_data()
6) get_historical_price_data(start_date, end_date, time_interval)
- Gets historical price data for currencies, stocks, indexes, cryptocurrencies, and commodity futures.
- start_date should be entered in the 'YYYY-MM-DD' format. First day that financial data will be pulled.
- end_date should be entered in the 'YYYY-MM-DD' format. Last day that financial data will be pulled.
- time_interval can be either 'daily', 'weekly', or 'monthly'. Parameter determines the time period interval.
Usage Examples:
from yahoofinancials import YahooFinancials
#tickers = 'AAPL'
#or
tickers = ['AAPL', 'WFC', 'F', 'JPY=X', 'XRP-USD', 'GC=F']
yahoo_financials = YahooFinancials(tickers)
balance_sheet_data = yahoo_financials.get_financial_stmts('quarterly', 'balance')
earnings_data = yahoo_financials.get_stock_earnings_data()
historical_prices = yahoo_financials.get_historical_price_data('2015-01-15', '2017-10-15', 'weekly')
"""
import sys
import calendar
import re
from json import loads
import time
from bs4 import BeautifulSoup
import datetime
import pytz
import random
try:
from urllib import FancyURLopener
except:
from urllib.request import FancyURLopener
# track the last get timestamp to add a minimum delay between gets - be nice!
_lastget = 0
# Custom Exception class to handle custom error
class ManagedException(Exception):
pass
# Class used to open urls for financial data
class UrlOpener(FancyURLopener):
version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'
# Class containing Yahoo Finance ETL Functionality
class YahooFinanceETL(object):
def __init__(self, ticker):
self.ticker = ticker.upper() if isinstance(ticker, str) else [t.upper() for t in ticker]
self._cache = {}
# Minimum interval between Yahoo Finance requests for this instance
_MIN_INTERVAL = 7
# Meta-data dictionaries for the classes to use
YAHOO_FINANCIAL_TYPES = {
'income': ['financials', 'incomeStatementHistory', 'incomeStatementHistoryQuarterly'],
'balance': ['balance-sheet', 'balanceSheetHistory', 'balanceSheetHistoryQuarterly', 'balanceSheetStatements'],
'cash': ['cash-flow', 'cashflowStatementHistory', 'cashflowStatementHistoryQuarterly', 'cashflowStatements'],
'keystats': ['key-statistics'],
'history': ['history']
}
# Interval value translation dictionary
_INTERVAL_DICT = {
'daily': '1d',
'weekly': '1wk',
'monthly': '1mo'
}
# Base Yahoo Finance URL for the class to build on
_BASE_YAHOO_URL = 'https://finance.yahoo.com/quote/'
# private static method to get the appropriate report type identifier
@staticmethod
def get_report_type(frequency):
if frequency == 'annual':
report_num = 1
else:
report_num = 2
return report_num
# Public static method to format date serial string to readable format and vice versa
@staticmethod
def format_date(in_date):
if isinstance(in_date, str):
form_date = int(calendar.timegm(time.strptime(in_date, '%Y-%m-%d')))
else:
form_date = str((datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=in_date)).date())
return form_date
# Private Static Method to Convert Eastern Time to UTC
@staticmethod
def _convert_to_utc(date, mask='%Y-%m-%d %H:%M:%S'):
utc = pytz.utc
eastern = pytz.timezone('US/Eastern')
date_ = datetime.datetime.strptime(date.replace(" 0:", " 12:"), mask)
date_eastern = eastern.localize(date_, is_dst=None)
date_utc = date_eastern.astimezone(utc)
return date_utc.strftime('%Y-%m-%d %H:%M:%S %Z%z')
# Private method to scrape data from yahoo finance
def _scrape_data(self, url, tech_type, statement_type):
global _lastget
if not self._cache.get(url):
now = int(time.time())
if _lastget and now - _lastget < self._MIN_INTERVAL:
time.sleep(self._MIN_INTERVAL - (now - _lastget) + 1)
now = int(time.time())
_lastget = now
urlopener = UrlOpener()
# Try to open the URL up to 10 times sleeping random time if something goes wrong
max_retry = 10
for i in range(0, max_retry):
response = urlopener.open(url)
if response.getcode() != 200:
time.sleep(random.randrange(10, 20))
else:
response_content = response.read()
soup = BeautifulSoup(response_content, "html.parser")
re_script = soup.find("script", text=re.compile("root.App.main"))
if re_script is not None:
script = re_script.text
self._cache[url] = loads(re.search("root.App.main\s+=\s+(\{.*\})", script).group(1))
response.close()
break
else:
time.sleep(random.randrange(10, 20))
if i == max_retry - 1:
# Raise a custom exception if we can't get the web page within max_retry attempts
raise ManagedException("Server replied with HTTP " + str(response.getcode()) +
" code while opening the url: " + str(url))
data = self._cache[url]
if tech_type == '' and statement_type != 'history':
stores = data["context"]["dispatcher"]["stores"]["QuoteSummaryStore"]
elif tech_type != '' and statement_type != 'history':
stores = data["context"]["dispatcher"]["stores"]["QuoteSummaryStore"][tech_type]
else:
stores = data["context"]["dispatcher"]["stores"]["HistoricalPriceStore"]
return stores
# Private static method to determine if a numerical value is in the data object being cleaned
@staticmethod
def _determine_numeric_value(value_dict):
if 'raw' in value_dict.keys():
numerical_val = value_dict['raw']
else:
numerical_val = None
return numerical_val
# Private method to format date serial string to readable format and vice versa
def _format_time(self, in_time):
form_date_time = datetime.datetime.fromtimestamp(int(in_time)).strftime('%Y-%m-%d %H:%M:%S')
utc_dt = self._convert_to_utc(form_date_time)
return utc_dt
# Private method to return the a sub dictionary entry for the earning report cleaning
def _get_cleaned_sub_dict_ent(self, key, val_list):
sub_list = []
for rec in val_list:
sub_sub_dict = {}
for k, v in rec.items():
if k == 'date':
sub_sub_dict_ent = {k: v}
else:
numerical_val = self._determine_numeric_value(v)
sub_sub_dict_ent = {k: numerical_val}
sub_sub_dict.update(sub_sub_dict_ent)
sub_list.append(sub_sub_dict)
sub_ent = {key: sub_list}
return sub_ent
# Private method to process raw earnings data and clean
def _clean_earnings_data(self, raw_data):
cleaned_data = {}
earnings_key = 'earningsData'
financials_key = 'financialsData'
for k, v in raw_data.items():
if k == 'earningsChart':
sub_dict = {}
for k2, v2 in v.items():
if k2 == 'quarterly':
sub_ent = self._get_cleaned_sub_dict_ent(k2, v2)
elif k2 == 'currentQuarterEstimate':
numerical_val = self._determine_numeric_value(v2)
sub_ent = {k2: numerical_val}
else:
sub_ent = {k2: v2}
sub_dict.update(sub_ent)
dict_ent = {earnings_key: sub_dict}
cleaned_data.update(dict_ent)
elif k == 'financialsChart':
sub_dict = {}
for k2, v2, in v.items():
sub_ent = self._get_cleaned_sub_dict_ent(k2, v2)
sub_dict.update(sub_ent)
dict_ent = {financials_key: sub_dict}
cleaned_data.update(dict_ent)
else:
if k != 'maxAge':
dict_ent = {k: v}
cleaned_data.update(dict_ent)
return cleaned_data
# Private method to clean summary and price reports
def _clean_reports(self, raw_data):
cleaned_dict = {}
if raw_data is None:
return None
for k, v in raw_data.items():
if 'Time' in k:
formatted_utc_time = self._format_time(v)
dict_ent = {k: formatted_utc_time}
elif 'Date' in k:
try:
formatted_date = v['fmt']
except (KeyError, TypeError):
formatted_date = '-'
dict_ent = {k: formatted_date}
elif v is None or isinstance(v, str) or isinstance(v, int) or isinstance(v, float):
dict_ent = {k: v}
# Python 2 and Unicode
elif sys.version_info < (3, 0) and isinstance(v, unicode):
dict_ent = {k: v}
else:
numerical_val = self._determine_numeric_value(v)
dict_ent = {k: numerical_val}
cleaned_dict.update(dict_ent)
return cleaned_dict
# Private Static Method to ensure ticker is URL encoded
@staticmethod
def _encode_ticker(ticker_str):
encoded_ticker = ticker_str.replace('=', '%3D')
return encoded_ticker
# Private method to get time interval code
def _build_historical_url(self, ticker, hist_oj):
url = self._BASE_YAHOO_URL + self._encode_ticker(ticker) + '/history?period1=' + str(hist_oj['start']) + \
'&period2=' + str(hist_oj['end']) + '&interval=' + hist_oj['interval'] + '&filter=history&frequency=' + \
hist_oj['interval']
return url
# Private Method to clean the dates of the newly returns historical stock data into readable format
def _clean_historical_data(self, hist_data, last_attempt=False):
data = {}
for k, v in hist_data.items():
if k == 'eventsData':
event_obj = {}
if isinstance(v, list):
dict_ent = {k: event_obj}
else:
for type_key, type_obj in v.items():
formatted_type_obj = {}
for date_key, date_obj in type_obj.items():
formatted_date_key = self.format_date(int(date_key))
cleaned_date = self.format_date(int(date_obj['date']))
date_obj.update({'formatted_date': cleaned_date})
formatted_type_obj.update({formatted_date_key: date_obj})
event_obj.update({type_key: formatted_type_obj})
dict_ent = {k: event_obj}
elif 'date' in k.lower():
if v is not None:
cleaned_date = self.format_date(v)
dict_ent = {k: {'formatted_date': cleaned_date, 'date': v}}
else:
if last_attempt is False:
return None
else:
dict_ent = {k: {'formatted_date': None, 'date': v}}
elif isinstance(v, list):
sub_dict_list = []
for sub_dict in v:
sub_dict['formatted_date'] = self.format_date(sub_dict['date'])
sub_dict_list.append(sub_dict)
dict_ent = {k: sub_dict_list}
else:
dict_ent = {k: v}
data.update(dict_ent)
return data
# Private Static Method to build API url for GET Request
@staticmethod
def _build_api_url(hist_obj, up_ticker):
base_url = "https://query1.finance.yahoo.com/v8/finance/chart/"
api_url = base_url + up_ticker + '?symbol=' + up_ticker + '&period1=' + str(hist_obj['start']) + '&period2=' + \
str(hist_obj['end']) + '&interval=' + hist_obj['interval']
api_url += '&events=div|split|earn&lang=en-US&region=US'
return api_url
# Private Method to get financial data via API Call
def _get_api_data(self, api_url, tries=0):
urlopener = UrlOpener()
response = urlopener.open(api_url)
if response.getcode() == 200:
res_content = response.read()
response.close()
if sys.version_info < (3, 0):
return loads(res_content)
return loads(res_content.decode('utf-8'))
else:
if tries < 5:
time.sleep(random.randrange(10, 20))
tries += 1
return self._get_api_data(api_url, tries)
else:
return None
# Private Method to clean API data
def _clean_api_data(self, api_url):
raw_data = self._get_api_data(api_url)
ret_obj = {}
ret_obj.update({'eventsData': []})
if raw_data is None:
return ret_obj
results = raw_data['chart']['result']
if results is None:
return ret_obj
for result in results:
tz_sub_dict = {}
ret_obj.update({'eventsData': result.get('events', {})})
ret_obj.update({'firstTradeDate': result['meta'].get('firstTradeDate', 'NA')})
ret_obj.update({'currency': result['meta'].get('currency', 'NA')})
ret_obj.update({'instrumentType': result['meta'].get('instrumentType', 'NA')})
tz_sub_dict.update({'gmtOffset': result['meta']['gmtoffset']})
ret_obj.update({'timeZone': tz_sub_dict})
timestamp_list = result['timestamp']
high_price_list = result['indicators']['quote'][0]['high']
low_price_list = result['indicators']['quote'][0]['low']
open_price_list = result['indicators']['quote'][0]['open']
close_price_list = result['indicators']['quote'][0]['close']
volume_list = result['indicators']['quote'][0]['volume']
adj_close_list = result['indicators']['adjclose'][0]['adjclose']
i = 0
prices_list = []
for timestamp in timestamp_list:
price_dict = {}
price_dict.update({'date': timestamp})
price_dict.update({'high': high_price_list[i]})
price_dict.update({'low': low_price_list[i]})
price_dict.update({'open': open_price_list[i]})
price_dict.update({'close': close_price_list[i]})
price_dict.update({'volume': volume_list[i]})
price_dict.update({'adjclose': adj_close_list[i]})
prices_list.append(price_dict)
i += 1
ret_obj.update({'prices': prices_list})
return ret_obj
# Private Method to Handle Recursive API Request
def _recursive_api_request(self, hist_obj, up_ticker, i=0):
api_url = self._build_api_url(hist_obj, up_ticker)
re_data = self._clean_api_data(api_url)
cleaned_re_data = self._clean_historical_data(re_data)
if cleaned_re_data is not None:
return cleaned_re_data
else:
if i < 3:
i += 1
return self._recursive_api_request(hist_obj, up_ticker, i)
else:
return self._clean_historical_data(re_data, True)
# Private Method to take scrapped data and build a data dictionary with
def _create_dict_ent(self, up_ticker, statement_type, tech_type, report_name, hist_obj):
YAHOO_URL = self._BASE_YAHOO_URL + up_ticker + '/' + self.YAHOO_FINANCIAL_TYPES[statement_type][0] + '?p=' +\
up_ticker
if tech_type == '' and statement_type != 'history':
try:
re_data = self._scrape_data(YAHOO_URL, tech_type, statement_type)
dict_ent = {up_ticker: re_data[u'' + report_name], 'dataType': report_name}
except KeyError:
re_data = None
dict_ent = {up_ticker: re_data, 'dataType': report_name}
elif tech_type != '' and statement_type != 'history':
try:
re_data = self._scrape_data(YAHOO_URL, tech_type, statement_type)
except KeyError:
re_data = None
dict_ent = {up_ticker: re_data}
else:
YAHOO_URL = self._build_historical_url(up_ticker, hist_obj)
try:
cleaned_re_data = self._recursive_api_request(hist_obj, up_ticker)
except KeyError:
try:
re_data = self._scrape_data(YAHOO_URL, tech_type, statement_type)
cleaned_re_data = self._clean_historical_data(re_data)
except KeyError:
cleaned_re_data = None
dict_ent = {up_ticker: cleaned_re_data}
return dict_ent
# Private method to return the stmt_id for the reformat_process
def _get_stmt_id(self, statement_type, raw_data):
stmt_id = ''
i = 0
for key in raw_data.keys():
if key in self.YAHOO_FINANCIAL_TYPES[statement_type.lower()]:
stmt_id = key
i += 1
if i != 1:
return None
return stmt_id
# Private Method for the Reformat Process
def _reformat_stmt_data_process(self, raw_data, statement_type):
final_data_list = []
if raw_data is not None:
stmt_id = self._get_stmt_id(statement_type, raw_data)
if stmt_id is None:
return final_data_list
hashed_data_list = raw_data[stmt_id]
for data_item in hashed_data_list:
data_date = ''
sub_data_dict = {}
for k, v in data_item.items():
if k == 'endDate':
data_date = v['fmt']
elif k != 'maxAge':
numerical_val = self._determine_numeric_value(v)
sub_dict_item = {k: numerical_val}
sub_data_dict.update(sub_dict_item)
dict_item = {data_date: sub_data_dict}
final_data_list.append(dict_item)
return final_data_list
else:
return raw_data
# Private Method to return subdict entry for the statement reformat process
def _get_sub_dict_ent(self, ticker, raw_data, statement_type):
form_data_list = self._reformat_stmt_data_process(raw_data[ticker], statement_type)
return {ticker: form_data_list}
# Public method to get time interval code
def get_time_code(self, time_interval):
interval_code = self._INTERVAL_DICT[time_interval.lower()]
return interval_code
# Public Method to get stock data
def get_stock_data(self, statement_type='income', tech_type='', report_name='', hist_obj={}):
data = {}
if isinstance(self.ticker, str):
dict_ent = self._create_dict_ent(self.ticker, statement_type, tech_type, report_name, hist_obj)
data.update(dict_ent)
else:
for tick in self.ticker:
try:
dict_ent = self._create_dict_ent(tick, statement_type, tech_type, report_name, hist_obj)
data.update(dict_ent)
except ManagedException:
print("Warning! Ticker: " + str(tick) + " error - " + str(ManagedException))
print("The process is still running...")
continue
return data
# Public Method to get technical stock datafrom yahoofinancials import YahooFinancials
def get_stock_tech_data(self, tech_type):
if tech_type == 'defaultKeyStatistics':
return self.get_stock_data(statement_type='keystats', tech_type=tech_type)
else:
return self.get_stock_data(tech_type=tech_type)
# Public Method to get reformatted statement data
def get_reformatted_stmt_data(self, raw_data, statement_type):
data_dict = {}
sub_dict = {}
data_type = raw_data['dataType']
if isinstance(self.ticker, str):
sub_dict_ent = self._get_sub_dict_ent(self.ticker, raw_data, statement_type)
sub_dict.update(sub_dict_ent)
dict_ent = {data_type: sub_dict}
data_dict.update(dict_ent)
else:
for tick in self.ticker:
sub_dict_ent = self._get_sub_dict_ent(tick, raw_data, statement_type)
sub_dict.update(sub_dict_ent)
dict_ent = {data_type: sub_dict}
data_dict.update(dict_ent)
return data_dict
# Public method to get cleaned summary and price report data
def get_clean_data(self, raw_report_data, report_type):
cleaned_data_dict = {}
if isinstance(self.ticker, str):
if report_type == 'earnings':
try:
cleaned_data = self._clean_earnings_data(raw_report_data[self.ticker])
except:
cleaned_data = None
else:
try:
cleaned_data = self._clean_reports(raw_report_data[self.ticker])
except:
cleaned_data = None
cleaned_data_dict.update({self.ticker: cleaned_data})
else:
for tick in self.ticker:
if report_type == 'earnings':
try:
cleaned_data = self._clean_earnings_data(raw_report_data[tick])
except:
cleaned_data = None
else:
try:
cleaned_data = self._clean_reports(raw_report_data[tick])
except:
cleaned_data = None
cleaned_data_dict.update({tick: cleaned_data})
return cleaned_data_dict
# Private method to handle dividend data requestsfrom yahoofinancials import YahooFinancials
def _handle_api_dividend_request(self, cur_ticker, start, end, interval):
re_dividends = []
test_url = 'https://query1.finance.yahoo.com/v8/finance/chart/' + cur_ticker + \
'?period1=' + str(start) + '&period2=' + str(end) + '&interval=' + interval + '&events=div'
div_dict = self._get_api_data(test_url)['chart']['result'][0]['events']['dividends']
for div_time_key, div_obj in div_dict.items():
dividend_obj = {
'date': div_obj['date'],
'formatted_date': self.format_date(int(div_obj['date'])),
'amount': div_obj.get('amount', None)
}
re_dividends.append(dividend_obj)
return sorted(re_dividends, key=lambda div: div['date'])
# Public method to get daily dividend data
def get_stock_dividend_data(self, start, end, interval):
interval_code = self.get_time_code(interval)
if isinstance(self.ticker, str):
try:
return {self.ticker: self._handle_api_dividend_request(self.ticker, start, end, interval_code)}
except:
return {self.ticker: None}
else:
re_data = {}
for tick in self.ticker:
try:
div_data = self._handle_api_dividend_request(tick, start, end, interval_code)
re_data.update({tick: div_data})
except:
re_data.update({tick: None})
return re_data
# Class containing methods to create stock data extracts
class YahooFinancials(YahooFinanceETL):
# Private method that handles financial statement extraction
def _run_financial_stmt(self, statement_type, report_num, reformat):
report_name = self.YAHOO_FINANCIAL_TYPES[statement_type][report_num]
if reformat:
raw_data = self.get_stock_data(statement_type, report_name=report_name)
data = self.get_reformatted_stmt_data(raw_data, statement_type)
else:
data = self.get_stock_data(statement_type, report_name=report_name)
return data
# Public Method for the user to get financial statement data
def get_financial_stmts(self, frequency, statement_type, reformat=True):
report_num = self.get_report_type(frequency)
if isinstance(statement_type, str):
data = self._run_financial_stmt(statement_type, report_num, reformat)
else:
data = {}
for stmt_type in statement_type:
re_data = self._run_financial_stmt(stmt_type, report_num, reformat)
data.update(re_data)
return data
# Public Method for the user to get stock price data
def get_stock_price_data(self, reformat=True):
if reformat:
return self.get_clean_data(self.get_stock_tech_data('price'), 'price')
else:
return self.get_stock_tech_data('price')
# Public Method for the user to return key-statistics data
def get_key_statistics_data(self, reformat=True):
if reformat:
return self.get_clean_data(self.get_stock_tech_data('defaultKeyStatistics'), 'defaultKeyStatistics')
else:
return self.get_stock_tech_data('defaultKeyStatistics')
# Public Method for the user to get stock earnings data
def get_stock_earnings_data(self, reformat=True):
if reformat:
return self.get_clean_data(self.get_stock_tech_data('earnings'), 'earnings')
else:
return self.get_stock_tech_data('earnings')
# Public Method for the user to get stock summary data
def get_summary_data(self, reformat=True):
if reformat:
return self.get_clean_data(self.get_stock_tech_data('summaryDetail'), 'summaryDetail')
else:
return self.get_stock_tech_data('summaryDetail')
# Public Method for the user to get the yahoo summary url
def get_stock_summary_url(self):
if isinstance(self.ticker, str):
return self._BASE_YAHOO_URL + self.ticker
return {t: self._BASE_YAHOO_URL + t for t in self.ticker}
# Public Method for the user to get stock quote data
def get_stock_quote_type_data(self):
return self.get_stock_tech_data('quoteType')
# Public Method for user to get historical price data with
def get_historical_price_data(self, start_date, end_date, time_interval):
interval_code = self.get_time_code(time_interval)
start = self.format_date(start_date)
end = self.format_date(end_date)
hist_obj = {'start': start, 'end': end, 'interval': interval_code}
return self.get_stock_data('history', hist_obj=hist_obj)
# Private Method for Functions needing stock_price_data
def _stock_price_data(self, data_field):
if isinstance(self.ticker, str):
if self.get_stock_price_data()[self.ticker] is None:
return None
return self.get_stock_price_data()[self.ticker].get(data_field, None)
else:
ret_obj = {}
for tick in self.ticker:
if self.get_stock_price_data()[tick] is None:
ret_obj.update({tick: None})
else:
ret_obj.update({tick: self.get_stock_price_data()[tick].get(data_field, None)})
return ret_obj
# Private Method for Functions needing stock_price_data
def _stock_summary_data(self, data_field):
if isinstance(self.ticker, str):
if self.get_summary_data()[self.ticker] is None:
return None
return self.get_summary_data()[self.ticker].get(data_field, None)
else:
ret_obj = {}
for tick in self.ticker:
if self.get_summary_data()[tick] is None:
ret_obj.update({tick: None})
else:
ret_obj.update({tick: self.get_summary_data()[tick].get(data_field, None)})
return ret_obj
# Private Method for Functions needing financial statement data
def _financial_statement_data(self, stmt_type, stmt_code, field_name, freq):
re_data = self.get_financial_stmts(freq, stmt_type)[stmt_code]
if isinstance(self.ticker, str):
try:
date_key = re_data[self.ticker][0].keys()[0]
except (IndexError, AttributeError, TypeError):
date_key = list(re_data[self.ticker][0])[0]
data = re_data[self.ticker][0][date_key][field_name]
else:
data = {}
for tick in self.ticker:
try:
date_key = re_data[tick][0].keys()[0]
except:
try:
date_key = list(re_data[tick][0].keys())[0]
except:
date_key = None
if date_key is not None:
sub_data = re_data[tick][0][date_key][field_name]
data.update({tick: sub_data})
else:
data.update({tick: None})
return data
# Public method to get daily dividend data
def get_daily_dividend_data(self, start_date, end_date):
start = self.format_date(start_date)
end = self.format_date(end_date)
return self.get_stock_dividend_data(start, end, 'daily')
# Public Price Data Methods
def get_current_price(self):
return self._stock_price_data('regularMarketPrice')
def get_current_change(self):
return self._stock_price_data('regularMarketChange')
def get_current_percent_change(self):
return self._stock_price_data('regularMarketChangePercent')
def get_current_volume(self):
return self._stock_price_data('regularMarketVolume')
def get_prev_close_price(self):
return self._stock_price_data('regularMarketPreviousClose')
def get_open_price(self):
return self._stock_price_data('regularMarketOpen')
def get_ten_day_avg_daily_volume(self):
return self._stock_price_data('averageDailyVolume10Day')
def get_three_month_avg_daily_volume(self):
return self._stock_price_data('averageDailyVolume3Month')
def get_stock_exchange(self):
return self._stock_price_data('exchangeName')
def get_market_cap(self):
return self._stock_price_data('marketCap')
def get_daily_low(self):
return self._stock_price_data('regularMarketDayLow')
def get_daily_high(self):
return self._stock_price_data('regularMarketDayHigh')
def get_currency(self):
return self._stock_price_data('currency')
# Public Summary Data Methods
def get_yearly_high(self):
return self._stock_summary_data('fiftyTwoWeekHigh')
def get_yearly_low(self):
return self._stock_summary_data('fiftyTwoWeekLow')
def get_dividend_yield(self):
return self._stock_summary_data('dividendYield')
def get_annual_avg_div_yield(self):
return self._stock_summary_data('trailingAnnualDividendYield')
def get_five_yr_avg_div_yield(self):
return self._stock_summary_data('fiveYearAvgDividendYield')
def get_dividend_rate(self):
return self._stock_summary_data('dividendRate')
def get_annual_avg_div_rate(self):
return self._stock_summary_data('trailingAnnualDividendRate')
def get_50day_moving_avg(self):
return self._stock_summary_data('fiftyDayAverage')
def get_200day_moving_avg(self):
return self._stock_summary_data('twoHundredDayAverage')
def get_beta(self):
return self._stock_summary_data('beta')
def get_payout_ratio(self):
return self._stock_summary_data('payoutRatio')
def get_pe_ratio(self):
return self._stock_summary_data('trailingPE')
def get_price_to_sales(self):
return self._stock_summary_data('priceToSalesTrailing12Months')
def get_exdividend_date(self):
return self._stock_summary_data('exDividendDate')
# Financial Statement Data Methods
def get_book_value(self):
return self._financial_statement_data('balance', 'balanceSheetHistoryQuarterly',
'totalStockholderEquity', 'quarterly')
def get_ebit(self):
return self._financial_statement_data('income', 'incomeStatementHistory', 'ebit', 'annual')
def get_net_income(self):
return self._financial_statement_data('income', 'incomeStatementHistory', 'netIncome', 'annual')
def get_interest_expense(self):
return self._financial_statement_data('income', 'incomeStatementHistory', 'interestExpense', 'annual')
def get_operating_income(self):
return self._financial_statement_data('income', 'incomeStatementHistory', 'operatingIncome', 'annual')
def get_total_operating_expense(self):
return self._financial_statement_data('income', 'incomeStatementHistory', 'totalOperatingExpenses', 'annual')
def get_total_revenue(self):
return self._financial_statement_data('income', 'incomeStatementHistory', 'totalRevenue', 'annual')
def get_cost_of_revenue(self):
return self._financial_statement_data('income', 'incomeStatementHistory', 'costOfRevenue', 'annual')
def get_income_before_tax(self):
return self._financial_statement_data('income', 'incomeStatementHistory', 'incomeBeforeTax', 'annual')
def get_income_tax_expense(self):
return self._financial_statement_data('income', 'incomeStatementHistory', 'incomeTaxExpense', 'annual')
def get_gross_profit(self):
return self._financial_statement_data('income', 'incomeStatementHistory', 'grossProfit', 'annual')
def get_net_income_from_continuing_ops(self):
return self._financial_statement_data('income', 'incomeStatementHistory',
'netIncomeFromContinuingOps', 'annual')
def get_research_and_development(self):
return self._financial_statement_data('income', 'incomeStatementHistory', 'researchDevelopment', 'annual')
# Calculated Financial Methods
def get_earnings_per_share(self):
price_data = self.get_current_price()
pe_ratio = self.get_pe_ratio()
if isinstance(self.ticker, str):
if price_data is not None and pe_ratio is not None:
return price_data / pe_ratio
else:
return None
else:
ret_obj = {}
for tick in self.ticker:
if price_data[tick] is not None and pe_ratio[tick] is not None:
ret_obj.update({tick: price_data[tick] / pe_ratio[tick]})
else:
ret_obj.update({tick: None})
return ret_obj
def get_num_shares_outstanding(self, price_type='current'):
today_low = self._stock_summary_data('dayHigh')
today_high = self._stock_summary_data('dayLow')
cur_market_cap = self._stock_summary_data('marketCap')
if isinstance(self.ticker, str):
if cur_market_cap is not None:
if price_type == 'current':
current = self.get_current_price()
if current is not None:
today_average = current
else:
return None
else:
if today_high is not None and today_low is not None:
today_average = (today_high + today_low) / 2
else:
return None
return cur_market_cap / today_average
else:
return None
else:
ret_obj = {}
for tick in self.ticker:
if cur_market_cap[tick] is not None:
if price_type == 'current':
current = self.get_current_price()
if current[tick] is not None:
ret_obj.update({tick: cur_market_cap[tick] / current[tick]})
else:
ret_obj.update({tick: None})
else:
if today_low[tick] is not None and today_high[tick] is not None:
today_average = (today_high[tick] + today_low[tick]) / 2
ret_obj.update({tick: cur_market_cap[tick] / today_average})
else:
ret_obj.update({tick: None})
else:
ret_obj.update({tick: None})
return ret_obj

View File

@ -1,4 +1,6 @@
requests~=2.21.0 requests~=2.21.0
numpy~=1.15.4 numpy~=1.15.4
beautifulsoup4~=4.7.1 beautifulsoup4~=4.7.1
halo~=0.0.23
requests-cache~=0.4.13 # NOT REQUIRED requests-cache~=0.4.13 # NOT REQUIRED
yahoofinancials~=1.5 # NOT REQUIRED

10
stocks.txt Normal file
View File

@ -0,0 +1,10 @@
VFINX
SMARX
BRASX
USIBX
DSIAX
TIHYX
SGYAX
TPLGX
PREFX
FBGRX