From 5d1f96c403ecd3dabb5898bc30b3d04f12a2e3fd Mon Sep 17 00:00:00 2001 From: Andrew Dinh Date: Mon, 18 Mar 2019 10:26:07 -0700 Subject: [PATCH] General fixes Added color, config file, moved packages into ./modules --- .gitignore | 2 +- Functions.py | 61 ++- README.md | 40 +- config.example.json | 63 +++ main.py | 623 ++++++++++++++++++-------- modules/termcolor.py | 168 +++++++ modules/yahoofinancials.py | 891 +++++++++++++++++++++++++++++++++++++ requirements.txt | 4 +- stocks.txt | 10 + 9 files changed, 1657 insertions(+), 205 deletions(-) create mode 100644 config.example.json create mode 100644 modules/termcolor.py create mode 100644 modules/yahoofinancials.py create mode 100644 stocks.txt diff --git a/.gitignore b/.gitignore index 25929cd..c8109c5 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,4 @@ test/ .vscode/ *.sqlite README.html -*stocks.txt \ No newline at end of file +*-stocks.txt \ No newline at end of file diff --git a/Functions.py b/Functions.py index 4c311c1..b6bf33a 100644 --- a/Functions.py +++ b/Functions.py @@ -1,5 +1,8 @@ # Python file for general functions +import sys +sys.path.insert(0, './modules') + def getNearest(items, pivot): return min(items, key=lambda x: abs(x - pivot)) @@ -52,14 +55,18 @@ def strintIsFloat(s): def fromCache(r): import requests_cache + from termcolor import colored, cprint if r.from_cache == True: - print('(Response taken from cache)') + cprint('(Response taken from cache)', 'white', attrs=['dark']) return def getJoke(): import requests + import sys + from termcolor import colored, cprint import requests_cache + from halo import Halo with requests_cache.disabled(): ''' f = requests.get('https://official-joke-api.appspot.com/jokes/random').json() @@ -69,9 +76,13 @@ def getJoke(): ''' headers = {'Accept': 'application/json', 'User-Agent': 'fund-indicators (https://github.com/andrewkdinh/fund-indicators)'} - f = requests.get('https://icanhazdadjoke.com/', headers=headers).json() + url = 'https://icanhazdadjoke.com' + + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get('https://icanhazdadjoke.com/', headers=headers).json() print('') - print(f['joke']) + print(colored(f['joke'], 'green')) def hasNumbers(inputString): @@ -127,6 +138,50 @@ def fileExists(file): import os.path return os.path.exists(file) +def listIndexExists(i): + try: + i + return True + except IndexError: + return False + +def removeOutliers(i): + import statistics + m = statistics.median(i) + firstQ = [] + thirdQ = [] + for x in i: + if x < m: + firstQ.append(x) + elif x > m: + thirdQ.append(x) + firstQm = statistics.median(firstQ) + thirdQm = statistics.median(thirdQ) + iqr = (thirdQm - firstQm) * 1.5 + + goodList = [] + badList = [] + for x in i: + if x < (thirdQm + iqr) and x > (firstQm - iqr): + goodList.append(x) + else: + badList.append(x) # In case I want to know. If not, then I just make it equal to returnlist[0] + returnList = [goodList, badList, firstQm, m, thirdQm, iqr] + return returnList + +def validateJson(text): + import json + try: + json.loads(text) + return True + except ValueError: + return False + +def keyInDict(dict, key): + if key in dict: + return True + else: + return False def main(): exit() diff --git a/README.md b/README.md index d33a263..9f01dad 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,45 @@ -# Mutual Fund Indicators +# fund-indicators [![License](https://img.shields.io/github/license/andrewkdinh/fund-indicators.svg)](https://raw.githubusercontent.com/andrewkdinh/fund-indicators/master/LICENSE) -![](https://img.shields.io/github/last-commit/andrewkdinh/fund-indicators.svg) +[![](https://img.shields.io/github/last-commit/andrewkdinh/fund-indicators.svg)](https://github.com/andrewkdinh/fund-indicators/commits/master) ![](https://img.shields.io/github/languages/top/andrewkdinh/fund-indicators.svg) ![](https://img.shields.io/github/languages/code-size/andrewkdinh/fund-indicators.svg) -A project to determine indicators of overperforming mutual funds. +A project to determine relationships between mutual funds and different factors. -Examine correlation between performance and market capitalization, persistence, turnover, and expense ratios. +Calculates relationships between: Previous performance, Alpha, Sharpe Ratio, Sortino Ratio -## Prerequisites +and Expense ratios, Turnover, Market Capitalization (Asset Size), Persistence -`$ pip install -r requirements.txt` +Give it a try at [repl.run](https://fund-indicators.andrewkdinh.repl.run) or [repl.it](https://repl.it/@andrewkdinh/fund-indicators) + +## Key Features + +- 100% automated +- Uses multiple API's in case another fails +- Caches http requests for future runs +- Scrapes data from Yahoo Finance +- Color-coded for easy viewing +- Optional graphs to easily visualize linear regression results +- A new joke every time it runs ## Quickstart -To begin, run +```shell +pip install -r requirements.txt +python main.py +``` -`$ python main.py` +Pre-chosen stocks listed in `stocks.txt` -Some ticker values to try: -SPY, VFINX, VTHR, DJIA +## Credits + +This project uses a wide variety of open-source projects + +- [NumPy](https://github.com/numpy/numpy), [Termcolor](https://github.com/hfeeki/termcolor), [Beautiful Soup](https://launchpad.net/beautifulsoup), [yahoofinancials](https://github.com/JECSand/yahoofinancials), [requests-cache](https://github.com/reclosedev/requests-cache), [halo](https://github.com/manrajgrover/halo) + +And thank you to those that have helped me with the idea and product: + +- Amber Bruce, [Alex Stoykov](http://stoykov.us/), Doug Achterman, [Stack Overflow](https://stackoverflow.com) Created by Andrew Dinh from Dr. TJ Owens Gilroy Early College Academy diff --git a/config.example.json b/config.example.json new file mode 100644 index 0000000..f90a8e5 --- /dev/null +++ b/config.example.json @@ -0,0 +1,63 @@ +{ + "_comment": "Only use this if everything you know is correct", + "Config": { + "Check Packages": true, + "Check Python Version": true, + "Check Internet Connection": false, + "Get Joke": true, + "Benchmark": "SPY", + "Method": "Kiplinger", + "Time Frame": 60, + "Indicator": "Expense Ratio", + "Remove Outliers": true, + "Sources": [ + "Alpha Vantage", + "Yahoo", + "IEX", + "Tiingo" + ] + }, + "Possible Values": { + "Check Packages": [ + true, + false + ], + "Check Python Version": [ + true, + false + ], + "Check Internet Connection": [ + true, + false + ], + "Get Joke": [ + true, + false + ], + "Benchmark": [ + "SPY", + "DJIA", + "VTHR", + "EFG" + ], + "Method": [ + "Read", + "Manual", + "U.S. News", + "Kiplinger", + "TheStreet" + ], + "Time Frame": "Any integer", + "Indicator": [ + "Expense Ratio", + "Market Capitalization", + "Turnover", + "Persistence" + ], + "Remove Outliers": [ + true, + false + ], + "Sources": "Choose an order out of ['Alpha Vantage', 'Yahoo', 'IEX', 'Tiingo']" + } +} diff --git a/main.py b/main.py index e1ddc2b..14957f7 100644 --- a/main.py +++ b/main.py @@ -3,25 +3,33 @@ # Andrew Dinh # Python 3.6.7 -# Required -from bs4 import BeautifulSoup -import requests -import json -import datetime +# PYTHON FILES import Functions -import numpy as np -import re +from yahoofinancials import YahooFinancials +from termcolor import cprint + +# REQUIRED +import requests_cache import os.path +import re +import datetime +import json +import requests +from bs4 import BeautifulSoup +import numpy as np -# Required for linear regression +# OPTIONAL import matplotlib.pyplot as plt -import sys +from halo import Halo -# Optional +# FOR ASYNC from concurrent.futures import ThreadPoolExecutor as PoolExecutor import time import random -import requests_cache + +import sys +sys.path.insert(0, './modules') + requests_cache.install_cache( 'cache', backend='sqlite', expire_after=43200) # 12 hours @@ -59,7 +67,6 @@ API Keys: No: Tiingo ''' - class Stock: # GLOBAL VARIABLES @@ -67,6 +74,11 @@ class Stock: riskFreeRate = 0 indicator = '' + # CONFIG + removeOutliers = True + sourceList = ['Alpha Vantage', 'Yahoo', 'IEX', 'Tiingo'] + config = 'N/A' + # BENCHMARK VALUES benchmarkDates = [] benchmarkCloseValues = [] @@ -100,6 +112,7 @@ class Stock: self.downsideDeviation = 0 self.kurtosis = 0 self.skewness = 0 # Not sure if I need this + self.correlation = 0 self.linearRegression = [] # for y=mx+b, this list has [m,b] self.indicatorValue = '' @@ -117,17 +130,17 @@ class Stock: return self.allCloseValues def IEX(self): - print('IEX') url = ''.join( ('https://api.iextrading.com/1.0/stock/', self.name, '/chart/5y')) # link = "https://api.iextrading.com/1.0/stock/spy/chart/5y" - print("\nSending request to:", url) - f = requests.get(url) + cprint("Get: " + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url) Functions.fromCache(f) json_data = f.text if json_data == 'Unknown symbol' or f.status_code != 200: print("IEX not available") - return 'Not available' + return 'N/A' loaded_json = json.loads(json_data) listIEX = [] @@ -141,7 +154,7 @@ class Stock: listIEX.append(allDates) print(len(listIEX[0]), "dates") - print("\nFinding close values for each date") + # print("\nFinding close values for each date") values = [] for i in range(0, len(loaded_json), 1): # If you want to do oldest first # for i in range(len(loaded_json)-1, -1, -1): @@ -149,33 +162,33 @@ class Stock: value = line['close'] values.append(value) listIEX.append(values) - print(len(listIEX[1]), "close values") + print(len(listIEX[0]), 'dates and', len(listIEX[1]), "close values") return listIEX def AV(self): - print('Alpha Vantage') listAV = [] url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=', self.name, '&outputsize=full&apikey=', apiAV)) # https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=MSFT&outputsize=full&apikey=demo - print("\nSending request to:", url) - f = requests.get(url) + cprint("Get: " + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url) Functions.fromCache(f) json_data = f.text loaded_json = json.loads(json_data) if len(loaded_json) == 1 or f.status_code != 200 or len(loaded_json) == 0: print("Alpha Vantage not available") - return 'Not available' + return 'N/A' dailyTimeSeries = loaded_json['Time Series (Daily)'] listOfDates = list(dailyTimeSeries) # listAV.append(listOfDates) listAV.append(list(reversed(listOfDates))) - print("\nFinding close values for each date") + # print("\nFinding close values for each date") values = [] for i in range(0, len(listOfDates), 1): temp = listOfDates[i] @@ -185,25 +198,25 @@ class Stock: values.append(float(value)) # listAV.append(values) listAV.append(list(reversed(values))) - print(len(listAV[1]), "close values") + print(len(listAV[0]), 'dates and', len(listAV[1]), "close values") return listAV def Tiingo(self): - print('Tiingo') token = ''.join(('Token ', apiTiingo)) headers = { 'Content-Type': 'application/json', 'Authorization': token } url = ''.join(('https://api.tiingo.com/tiingo/daily/', self.name)) - print("\nSending request to:", url) - f = requests.get(url, headers=headers) + cprint("Get: " + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url, headers=headers) Functions.fromCache(f) loaded_json = f.json() if len(loaded_json) == 1 or f.status_code != 200 or loaded_json['startDate'] == None: print("Tiingo not available") - return 'Not available' + return 'N/A' listTiingo = [] @@ -218,8 +231,9 @@ class Stock: url2 = ''.join((url, '/prices?startDate=', firstDate, '&endDate=', lastDate)) # https://api.tiingo.com/tiingo/daily//prices?startDate=2012-1-1&endDate=2016-1-1 - print("\nSending request to:", url2, '\n') - requestResponse2 = requests.get(url2, headers=headers) + cprint("\nGet: " + url2 + '\n', 'white', attrs=['dark']) + with Halo(spinner='dots'): + requestResponse2 = requests.get(url2, headers=headers) Functions.fromCache(requestResponse2) loaded_json2 = requestResponse2.json() for i in range(0, len(loaded_json2)-1, 1): @@ -234,38 +248,86 @@ class Stock: listTiingo.append(dates) print(len(listTiingo[0]), "dates") - print("Finding close values for each date") + # print("Finding close values for each date") # Used loop from finding dates listTiingo.append(values) - print(len(listTiingo[1]), "close values") + print(len(listTiingo[0]), 'dates and', + len(listTiingo[1]), "close values") return listTiingo - def datesAndClose(self): - print('\n', Stock.getName(self), sep='') + def Yahoo(self): + url = ''.join(('https://finance.yahoo.com/quote/', + self.name, '?p=', self.name)) + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + t = requests.get(url) + if t.history: + print('Yahoo Finance does not have data for', self.name) + print('Yahoo not available') + return 'N/A' + else: + print('Yahoo Finance has data for', self.name) - sourceList = ['AV', 'IEX', 'Tiingo'] - # sourceList = ['IEX', 'Tiingo', 'AV'] + ticker = self.name + firstDate = datetime.datetime.now().date( + ) - datetime.timedelta(days=self.timeFrame*31) # 31 days as a buffer just in case + with Halo(spinner='dots'): + yahoo_financials = YahooFinancials(ticker) + r = yahoo_financials.get_historical_price_data( + str(firstDate), str(datetime.date.today()), 'daily') + + s = r[self.name]['prices'] + listOfDates = [] + listOfCloseValues = [] + for i in range(0, len(s), 1): + listOfDates.append(s[i]['formatted_date']) + listOfCloseValues.append(s[i]['close']) + listYahoo = [listOfDates, listOfCloseValues] + + # Sometimes close value is a None value + i = 0 + while i < len(listYahoo[1]): + if Functions.listIndexExists(listYahoo[1][i]) == True: + if listYahoo[1][i] == None: + del listYahoo[1][i] + del listYahoo[0][i] + i = i - 1 + i = i + 1 + else: + break + + print(len(listYahoo[0]), 'dates and', + len(listYahoo[1]), "close values") + return listYahoo + + def datesAndClose(self): + cprint('\n' + str(self.name), 'cyan') + + sourceList = Stock.sourceList # Use each source until you get a value for j in range(0, len(sourceList), 1): source = sourceList[j] - print('\nSource being used:', source) + print('Source being used:', source) - if source == 'AV': + if source == 'Alpha Vantage': datesAndCloseList = Stock.AV(self) - elif source == 'Tiingo': - datesAndCloseList = Stock.Tiingo(self) + elif source == 'Yahoo': + datesAndCloseList = Stock.Yahoo(self) elif source == 'IEX': datesAndCloseList = Stock.IEX(self) + elif source == 'Tiingo': + datesAndCloseList = Stock.Tiingo(self) - if datesAndCloseList != 'Not available': + if datesAndCloseList != 'N/A': break else: if j == len(sourceList)-1: print('\nNo sources have data for', self.name) - print('Removing', self.name, - 'from list of stocks to ensure compatibility later') - return 'Not available' + print('Removing ' + self.name + + ' from list of stocks to ensure compatibility later') + return 'N/A' + print('') # Convert dates to datetime allDates = datesAndCloseList[0] @@ -278,14 +340,14 @@ class Stock: for i in datesAndCloseList[1]: if i == 0: print('Found close value of 0. This is likely something like ticker RGN (Daily Time Series with Splits and Dividend Events)') - print('Removing', self.name, + print('Removing ' + self.name + 'from list of stocks to ensure compability later') - return 'Not available' + return 'N/A' return datesAndCloseList def datesAndCloseFitTimeFrame(self): - print('Shortening list to fit time frame') + print('\nShortening list to fit time frame') # Have to do this because if I just make dates = self.allDates & closeValues = self.allCloseValues, then deleting from dates & closeValues also deletes it from self.allDates & self.allCloseValues (I'm not sure why) dates = [] closeValues = [] @@ -295,7 +357,7 @@ class Stock: firstDate = datetime.datetime.now().date() - datetime.timedelta( days=self.timeFrame*30) - print('\n', self.timeFrame, ' months ago: ', firstDate, sep='') + print(self.timeFrame, ' months ago: ', firstDate, sep='') closestDate = Functions.getNearest(dates, firstDate) if closestDate != firstDate: print('Closest date available for', self.name, ':', closestDate) @@ -315,9 +377,7 @@ class Stock: datesAndCloseList2.append(dates) datesAndCloseList2.append(closeValues) - print(len(dates), 'dates') - print(len(closeValues), 'close values') - + print(len(dates), 'dates and', len(closeValues), 'close values') return datesAndCloseList2 def calcAverageMonthlyReturn(self): # pylint: disable=E0202 @@ -345,7 +405,7 @@ class Stock: if firstDate == secondDate: print('Closest date is', firstDate, 'which is after the given time frame.') - return 'Not available' + return 'N/A' # Get corresponding close values and calculate monthly return for i in range(0, len(self.dates), 1): @@ -499,34 +559,49 @@ class Stock: def scrapeYahooFinance(self): # Determine if ETF, Mutual fund, or stock - print('Determining if Yahoo Finance has data for', self.name, end=": ") url = ''.join(('https://finance.yahoo.com/quote/', self.name, '?p=', self.name)) - if requests.get(url).history: - print('No') - return 'Not available' + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + t = requests.get(url) + Functions.fromCache(t) + if t.history: + print('Yahoo Finance does not have data for', self.name) + return 'N/A' else: - print('Yes') + print('Yahoo Finance has data for', self.name) stockType = '' url2 = ''.join(('https://finance.yahoo.com/lookup?s=', self.name)) - print('Sending request to:', url2) - raw_html = requests.get(url2).text + cprint('Get: ' + url2, 'white', attrs=['dark']) + with Halo(spinner='dots'): + x = requests.get(url2) + raw_html = x.text + Functions.fromCache(x) soup2 = BeautifulSoup(raw_html, 'html.parser') # Type (Stock, ETF, Mutual Fund) r = soup2.find_all( 'td', attrs={'class': 'data-col4 Ta(start) Pstart(20px) Miw(30px)'}) - t = soup2.find_all('a', attrs={'class': 'Fw(b)'}) # Name and class + u = soup2.find_all('a', attrs={'class': 'Fw(b)'}) # Name and class z = soup2.find_all('td', attrs={ 'class': 'data-col1 Ta(start) Pstart(10px) Miw(80px)'}) # Name of stock listNames = [] - for i in t: + for i in u: + if i.text.strip() == i.text.strip().upper(): + listNames.append(i.text.strip()) + ''' if len(i.text.strip()) < 6: listNames.append(i.text.strip()) + elif '.' in i.text.strip(): + listNames.append(i.text.strip()) # Example: TSNAX (TSN.AX) + #! If having problems later, separate them by Industries (Mutual funds and ETF's are always N/A) + ''' + for i in range(0, len(listNames), 1): if listNames[i] == self.name: break + r = r[i].text.strip() z = z[i].text.strip() print('Name:', z) @@ -536,36 +611,32 @@ class Stock: elif r == 'Stocks': stockType = 'Stock' elif r == 'Mutual Fund': - stockType = 'Fund' + stockType = 'Mutual Fund' else: print('Could not determine fund type') - return 'Not available' + return 'N/A' print('Type:', stockType) if Stock.indicator == 'Expense Ratio': if stockType == 'Stock': print( self.name, 'is a stock, and therefore does not have an expense ratio') - return 'Not available' + return 'Stock' - url = ''.join(('https://finance.yahoo.com/quote/', - self.name, '?p=', self.name)) - # https://finance.yahoo.com/quote/SPY?p=SPY - print('Sending request to:', url) - raw_html = requests.get(url).text + raw_html = t.text soup = BeautifulSoup(raw_html, 'html.parser') r = soup.find_all('span', attrs={'class': 'Trsdu(0.3s)'}) if r == []: print('Something went wrong with scraping expense ratio') - return('Not available') + return('N/A') if stockType == 'ETF': for i in range(len(r)-1, 0, -1): s = r[i].text.strip() if s[-1] == '%': break - elif stockType == 'Fund': + elif stockType == 'Mutual Fund': count = 0 # Second in set for i in range(0, len(r)-1, 1): s = r[i].text.strip() @@ -578,64 +649,78 @@ class Stock: expenseRatio = float(s.replace('%', '')) else: print('Something went wrong with scraping expense ratio') - return 'Not available' + return 'N/A' + print(Stock.indicator + ': ', end='') print(str(expenseRatio) + '%') return expenseRatio elif Stock.indicator == 'Market Capitalization': - url = ''.join(('https://finance.yahoo.com/quote/', - self.name, '?p=', self.name)) - # https://finance.yahoo.com/quote/GOOGL?p=GOOGL - raw_html = requests.get(url).text + somethingWrong = False + raw_html = t.text soup = BeautifulSoup(raw_html, 'html.parser') r = soup.find_all( 'span', attrs={'class': 'Trsdu(0.3s)'}) if r == []: - print('Something went wrong with scraping market capitalization') - return 'Not available' - marketCap = 0 - for t in r: - s = t.text.strip() - if s[-1] == 'B': - print(s, end='') - s = s.replace('B', '') - marketCap = float(s) * 1000000000 # 1 billion - break - elif s[-1] == 'M': - print(s, end='') - s = s.replace('M', '') - marketCap = float(s) * 1000000 # 1 million - break - elif s[-1] == 'K': - print(s, end='') - s = s.replace('K', '') - marketCap = float(s) * 1000 # 1 thousand - break - if marketCap == 0: - print('\nSomething went wrong with scraping market capitalization') - return 'Not available' - marketCap = int(marketCap) + somethingWrong = True + else: + marketCap = 0 + for t in r: + s = t.text.strip() + if s[-1] == 'B': + print(Stock.indicator + ': ', end='') + print(s, end='') + s = s.replace('B', '') + marketCap = float(s) * 1000000000 # 1 billion + break + elif s[-1] == 'M': + print(Stock.indicator + ': ', end='') + print(s, end='') + s = s.replace('M', '') + marketCap = float(s) * 1000000 # 1 million + break + elif s[-1] == 'K': + print(Stock.indicator + ': ', end='') + print(s, end='') + s = s.replace('K', '') + marketCap = float(s) * 1000 # 1 thousand + break + if marketCap == 0: + somethingWrong = True + if somethingWrong == True: + ticker = self.name + yahoo_financials = YahooFinancials(ticker) + marketCap = yahoo_financials.get_market_cap() + if marketCap != None: + print('(Taken from yahoofinancials)') + print(marketCap) + return int(marketCap) + else: + print( + 'Was not able to scrape or get market capitalization from yahoo finance') + return 'N/A' + marketCap = int(marketCap) + return marketCap + print(' =', marketCap) + marketCap = marketCap / 1000000 + print( + 'Dividing marketCap by 1 million (to work with linear regression module):', marketCap) return marketCap elif Stock.indicator == 'Turnover': if stockType == 'Stock': print(self.name, 'is a stock, and therefore does not have turnover') - return 'Not available' + return 'Stock' - if stockType == 'Fund': - url = ''.join(('https://finance.yahoo.com/quote/', - self.name, '?p=', self.name)) - # https://finance.yahoo.com/quote/SPY?p=SPY - print('Sending request to', url) - raw_html = requests.get(url).text + if stockType == 'Mutual Fund': + raw_html = t.text soup = BeautifulSoup(raw_html, 'html.parser') r = soup.find_all( 'span', attrs={'class': 'Trsdu(0.3s)'}) if r == []: print('Something went wrong without scraping turnover') - return 'Not available' + return 'N/A' turnover = 0 for i in range(len(r)-1, 0, -1): s = r[i].text.strip() @@ -646,25 +731,30 @@ class Stock: url = ''.join(('https://finance.yahoo.com/quote/', self.name, '/profile?p=', self.name)) # https://finance.yahoo.com/quote/SPY/profile?p=SPY - print('Sending request to', url) - raw_html = requests.get(url).text + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + raw_html = requests.get(url).text soup = BeautifulSoup(raw_html, 'html.parser') r = soup.find_all( 'span', attrs={'class': 'W(20%) D(b) Fl(start) Ta(e)'}) if r == []: print('Something went wrong without scraping turnover') - return 'Not available' + return 'N/A' turnover = 0 for i in range(len(r)-1, 0, -1): s = r[i].text.strip() if s[-1] == '%': turnover = float(s.replace('%', '')) break + elif s == 'N/A': + print(self.name, 'has a value of N/A for turnover') + return 'N/A' if turnover == 0: print('Something went wrong with scraping turnover') - return 'Not available' + return 'N/A' + print(Stock.indicator + ': ', end='') print(str(turnover) + '%') return turnover @@ -684,7 +774,9 @@ class Stock: indicatorValue = str( input(Stock.indicator + ' of ' + self.name + ': ')) else: - print('Something is wrong. Indicator was not found. Ending program.') + # print('Something is wrong. Indicator was not found. Ending program.') + cprint( + 'Something is wrong. Indicator was not found. Ending program.', 'white', 'on_red') exit() if Functions.strintIsFloat(indicatorValue) == True: @@ -698,7 +790,7 @@ class Stock: 0, Stock.persTimeFrame, 1))) / Stock.persTimeFrame persistenceSecond = self.averageMonthlyReturn persistence = persistenceSecond-persistenceFirst - print('Change in average monthly return:', persistence) + print('Change (difference) in average monthly return:', persistence) return persistence @@ -765,24 +857,33 @@ def stocksInit(): method = 0 methods = ['Read from a file', 'Enter manually', 'U.S. News popular funds (~35)', 'Kiplinger top-performing funds (50)', 'TheStreet top-rated mutual funds (20)'] - for i in range(0, len(methods), 1): - print(str(i+1) + '. ' + methods[i]) - while method == 0 or method > len(methods): - method = str(input('Which method? ')) - if Functions.stringIsInt(method) == True: - method = int(method) - if method == 0 or method > len(methods): - print('Please choose a valid method') - else: - method = 0 - print('Please choose a number') - print('') + if Stock.config != 'N/A': + methodsConfig = ['Read', 'Manual', + 'U.S. News', 'Kiplinger', 'TheStreet'] + for i in range(0, len(methodsConfig), 1): + if Stock.config['Method'] == methodsConfig[i]: + method = i + 1 + + else: + for i in range(0, len(methods), 1): + print(str(i+1) + '. ' + methods[i]) + while method == 0 or method > len(methods): + method = str(input('Which method? ')) + if Functions.stringIsInt(method) == True: + method = int(method) + if method == 0 or method > len(methods): + print('Please choose a valid method') + else: + method = 0 + print('Please choose a number') + + print('') if method == 1: defaultFiles = ['.gitignore', 'LICENSE', 'main.py', 'Functions.py', - 'README.md', 'requirements.txt', 'cache.sqlite', '_test_runner.py'] # Added by repl.it for whatever reason + 'README.md', 'requirements.txt', 'cache.sqlite', 'yahoofinancials.py', 'termcolor.py', 'README.html', 'config.json', '_test_runner.py'] # Added by repl.it for whatever reason stocksFound = False - print('Files in current directory (not including default files): ') + print('\nFiles in current directory (not including default files): ') listOfFilesTemp = [f for f in os.listdir() if os.path.isfile(f)] listOfFiles = [] for files in listOfFilesTemp: @@ -851,8 +952,9 @@ def stocksInit(): url = 'https://money.usnews.com/funds/mutual-funds/most-popular' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'} - print('Sending request to', url) - f = requests.get(url, headers=headers) + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url, headers=headers) Functions.fromCache(f) raw_html = f.text soup = BeautifulSoup(raw_html, 'html.parser') @@ -878,8 +980,9 @@ def stocksInit(): url = 'https://www.kiplinger.com/tool/investing/T041-S001-top-performing-mutual-funds/index.php' headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'} - print('Sending request to', url) - f = requests.get(url, headers=headers) + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url, headers=headers) Functions.fromCache(f) raw_html = f.text soup = BeautifulSoup(raw_html, 'html.parser') @@ -904,8 +1007,9 @@ def stocksInit(): url = 'https://www.thestreet.com/topic/21421/top-rated-mutual-funds.html' headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'} - print('Sending request to', url) - f = requests.get(url, headers=headers) + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url, headers=headers) Functions.fromCache(f) raw_html = f.text soup = BeautifulSoup(raw_html, 'html.parser') @@ -977,7 +1081,7 @@ def asyncData(benchmark, listOfStocks): def sendAsync(url): time.sleep(random.randrange(0, 2)) - print('Sending request to', url) + cprint('Get: ' + url, 'white', attrs=['dark']) requests.get(url) return @@ -990,7 +1094,7 @@ def timeFrameInit(): temp = input(' ') isInteger = Functions.stringIsInt(temp) if isInteger == True: - if int(temp) > 1: + if int(temp) > 1 and int(temp) < 1000: months = int(temp) else: print('Please enter a number greater than 1') @@ -1003,15 +1107,15 @@ def timeFrameInit(): def dataMain(listOfStocks): - print('\nGathering dates and close values') i = 0 while i < len(listOfStocks): datesAndCloseList = Stock.datesAndClose(listOfStocks[i]) - if datesAndCloseList == 'Not available': + if datesAndCloseList == 'N/A': del listOfStocks[i] if len(listOfStocks) == 0: - print('No stocks to analyze. Ending program') + # print('No stocks to analyze. Ending program') + cprint('No stocks to analyze. Ending program', 'white', 'on_red') exit() else: listOfStocks[i].allDates = datesAndCloseList[0] @@ -1032,8 +1136,9 @@ def riskFreeRate(): ('https://www.quandl.com/api/v3/datasets/USTREASURY/LONGTERMRATES.json?api_key=', apiQuandl)) # https://www.quandl.com/api/v3/datasets/USTREASURY/LONGTERMRATES.json?api_key=KUh3U3hxke9tCimjhWEF - print("\nSending request to:", url) - f = requests.get(url) + cprint('\nGet: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url) Functions.fromCache(f) json_data = f.text loaded_json = json.loads(json_data) @@ -1043,7 +1148,7 @@ def riskFreeRate(): print('Risk-free rate:', riskFreeRate, end='\n\n') if f.status_code != 200: - print("Quandl not available") + print('Quandl not available') print('Returning 2.50 as risk-free rate', end='\n\n') # return 0.0250 return 2.50 @@ -1052,13 +1157,14 @@ def riskFreeRate(): def returnMain(benchmark, listOfStocks): - print('\nCalculating unadjusted return, Sharpe ratio, Sortino ratio, and Treynor ratio\n') + cprint('\nCalculating return statistics\n', 'white', attrs=['underline']) print('Getting risk-free rate from current 10-year treasury bill rates', end='\n\n') Stock.riskFreeRate = riskFreeRate() - print(benchmark.name, end='\n\n') + cprint(benchmark.name, 'cyan') benchmark.monthlyReturn = Stock.calcMonthlyReturn(benchmark) - if benchmark.monthlyReturn == 'Not available': - print('Please use a lower time frame\nEnding program') + if benchmark.monthlyReturn == 'N/A': + # print('Please use a lower time frame\nEnding program') + cprint('Please use a lower time frame. Ending program', 'white', 'on_red') exit() benchmark.averageMonthlyReturn = Stock.calcAverageMonthlyReturn(benchmark) benchmark.standardDeviation = Stock.calcStandardDeviation(benchmark) @@ -1071,7 +1177,7 @@ def returnMain(benchmark, listOfStocks): i = 0 while i < len(listOfStocks): - print('\n' + listOfStocks[i].name, end='\n\n') + cprint('\n' + listOfStocks[i].name, 'cyan') # Make sure each date has a value for both the benchmark and the stock list1 = [] @@ -1088,11 +1194,13 @@ def returnMain(benchmark, listOfStocks): # Calculate everything for each stock listOfStocks[i].monthlyReturn = Stock.calcMonthlyReturn( listOfStocks[i]) - if listOfStocks[i].monthlyReturn == 'Not available': - print('Removing', listOfStocks[i].name, 'from list of stocks') + if listOfStocks[i].monthlyReturn == 'N/A': + print('Removing ' + listOfStocks[i].name + ' from list of stocks') del listOfStocks[i] if len(listOfStocks) == 0: print('No stocks fit time frame. Ending program') + cprint('No stocks fit time frame. Ending program', + 'white', 'on_red') exit() else: listOfStocks[i].averageMonthlyReturn = Stock.calcAverageMonthlyReturn( @@ -1117,13 +1225,36 @@ def returnMain(benchmark, listOfStocks): i += 1 - print('\nNumber of stocks from original list that fit time frame:', - len(listOfStocks)) + cprint('\nNumber of stocks from original list that fit time frame: ' + + str(len(listOfStocks)), 'green') if len(listOfStocks) < 2: - print('Cannot proceed to the next step. Exiting program.') + #print('Cannot proceed to the next step. Exiting program.') + cprint('Cannot proceed to the next step. Exiting program.', + 'white', 'on_red') exit() +def outlierChoice(): + print('\nWould you like to remove indicator outliers?') + print('1. Yes\n2. No') + found = False + while found == False: + outlierChoice = str(input('Choice: ')) + if Functions.stringIsInt(outlierChoice): + if int(outlierChoice) == 1: + return True + elif int(outlierChoice) == 2: + return False + else: + print('Please enter 1 or 2') + elif outlierChoice.lower() == 'yes': + return True + elif outlierChoice.lower() == 'no': + return False + else: + print('Not valid. Please enter a number or yes or no.') + + def indicatorInit(): # Runs correlation or regression study indicatorFound = False @@ -1220,6 +1351,8 @@ def plot_regression_line(x, y, b, i): plt.xlabel(Stock.indicator + ' (%)') elif Stock.indicator == 'Persistence': plt.xlabel(Stock.indicator + ' (Difference in average monthly return)') + elif Stock.indicator == 'Market Capitalization': + plt.xlabel(Stock.indicator + ' (millions)') else: plt.xlabel(Stock.indicator) @@ -1266,25 +1399,61 @@ def persistenceTimeFrame(): def indicatorMain(listOfStocks): - print('\n' + str(Stock.indicator) + '\n') + cprint('\n' + str(Stock.indicator) + '\n', 'white', attrs=['underline']) listOfStocksIndicatorValues = [] for i in range(0, len(listOfStocks), 1): - print(listOfStocks[i].name) - if Stock.indicator != 'Persistence': - listOfStocks[i].indicatorValue = Stock.scrapeYahooFinance( + cprint(listOfStocks[i].name, 'cyan') + if Stock.indicator == 'Persistence': + listOfStocks[i].indicatorValue = Stock.calcPersistence( listOfStocks[i]) else: - listOfStocks[i].indicatorValue = Stock.calcPersistence( + listOfStocks[i].indicatorValue = Stock.scrapeYahooFinance( listOfStocks[i]) print('') - if listOfStocks[i].indicatorValue == 'Not available': + if listOfStocks[i].indicatorValue == 'N/A': listOfStocks[i].indicatorValue = Stock.indicatorManual( listOfStocks[i]) + elif listOfStocks[i].indicatorValue == 'Stock': + print('Removing ' + listOfStocks[i].name + ' from list of stocks') + del listOfStocks[i] + if len(listOfStocks) < 2: + # print('Not able to go to the next step. Ending program') + cprint('Not able to go to the next step. Ending program', + 'white', 'on_red') + exit() listOfStocksIndicatorValues.append(listOfStocks[i].indicatorValue) + # Remove outliers + if Stock.removeOutliers == True: + cprint('\nRemoving outliers\n', 'white', attrs=['underline']) + temp = Functions.removeOutliers(listOfStocksIndicatorValues) + if temp[0] == listOfStocksIndicatorValues: + print('No outliers\n') + else: + print('First quartile:', temp[2], ', Median:', temp[3], + ', Third quartile:', temp[4], 'Interquartile range:', temp[5]) + # print('Original list:', listOfStocksIndicatorValues) + listOfStocksIndicatorValues = temp[0] + i = 0 + while i < len(listOfStocks)-1: + for j in temp[1]: + if listOfStocks[i].indicatorValue == j: + print('Removing', listOfStocks[i].name, 'because it has a', + Stock.indicator.lower(), 'value of', listOfStocks[i].indicatorValue) + del listOfStocks[i] + i = i - 1 + break + i += 1 + # print('New list:', listOfStocksIndicatorValues, '\n') + print('') + + # Calculate data + cprint('Calculating correlation and linear regression\n', + 'white', attrs=['underline']) + listOfReturns = [] # A list that matches the above list with return values [[averageMonthlyReturn1, aAR2, aAR3], [sharpe1, sharpe2, sharpe3], etc.] tempListOfReturns = [] for i in range(0, len(listOfStocks), 1): @@ -1318,7 +1487,7 @@ def indicatorMain(listOfStocks): listOfReturnStrings = ['Average Monthly Return', 'Sharpe Ratio', 'Sortino Ratio', 'Treynor Ratio', 'Alpha'] for i in range(0, len(Stock.indicatorCorrelation), 1): - print('Correlation with ' + Stock.indicator.lower() + ' and ' + + print('Correlation for ' + Stock.indicator.lower() + ' and ' + listOfReturnStrings[i].lower() + ': ' + str(Stock.indicatorCorrelation[i])) Stock.indicatorRegression = calcIndicatorRegression( @@ -1331,52 +1500,125 @@ def indicatorMain(listOfStocks): listOfReturnStrings[i].lower() + ': ' + formula) +def checkConfig(fileName): + if Functions.fileExists(fileName) == False: + return 'N/A' + file = open(fileName, 'r') + n = file.read() + file.close() + if Functions.validateJson(n) == False: + print('Config file is not valid') + return 'N/A' + t = json.loads(n) + r = t['Config'] + return r + + def main(): + # Check config file for errors and if not, then use values + #! Only use this if you know it is exactly correct. I haven't spent much time debugging this + Stock.config = checkConfig('config.json') + # Check that all required packages are installed - packagesInstalled = Functions.checkPackages( - ['numpy', 'requests', 'bs4', 'requests_cache']) - if not packagesInstalled: - exit() + if Stock.config == 'N/A': + packagesInstalled = Functions.checkPackages( + ['numpy', 'requests', 'bs4', 'requests_cache', 'halo']) + if not packagesInstalled: + exit() + else: + print('All required packages are installed') + + # Check python version is above 3.3 + pythonVersionGood = Functions.checkPythonVersion() + if not pythonVersionGood: + exit() + + # Test internet connection + internetConnection = Functions.isConnected() + if not internetConnection: + exit() + else: + Functions.getJoke() + + # Choose benchmark and makes it class Stock + benchmark = benchmarkInit() + # Add it to a list to work with other functions + benchmarkAsList = [benchmark] + + # Asks for stock(s) ticker and makes them class Stock + listOfStocks = stocksInit() + + # Determine time frame (Years) + timeFrame = timeFrameInit() + Stock.timeFrame = timeFrame # Needs to be a global variable for all stocks + + # Choose indicator + Stock.indicator = indicatorInit() + # Choose time frame for initial persistence + if Stock.indicator == 'Persistence': + Stock.persTimeFrame = persistenceTimeFrame() + + # Choose whether to remove outliers or not + Stock.removeOutliers = outlierChoice() else: - print('All required packages are installed') + if Stock.config['Check Packages'] != False: + packagesInstalled = Functions.checkPackages( + ['numpy', 'requests', 'bs4', 'requests_cache', 'halo']) + if not packagesInstalled: + exit() + else: + print('All required packages are installed') - # Check python version is above 3.3 - pythonVersionGood = Functions.checkPythonVersion() - if not pythonVersionGood: - return + if Stock.config['Check Python Version'] != False: + pythonVersionGood = Functions.checkPythonVersion() + if not pythonVersionGood: + exit() - # Test internet connection + if Stock.config['Check Internet Connection'] != False: + internetConnection = Functions.isConnected() + if not internetConnection: + exit() + if Stock.config['Get Joke'] != False: + Functions.getJoke() - internetConnection = Functions.isConnected() - if not internetConnection: - return - else: - Functions.getJoke() + benchmarksTicker = ['SPY', 'DJIA', 'VTHR', 'EFT'] + if Stock.config['Benchmark'] in benchmarksTicker: + benchmark = Stock() + benchmark.setName(str(Stock.config['Benchmark'])) + benchmarkAsList = [benchmark] + else: + benchmark = benchmarkInit() + benchmarkAsList = [benchmark] - # Functions.getJoke() + listOfStocks = stocksInit() - # Choose benchmark and makes it class Stock - benchmark = benchmarkInit() - # Add it to a list to work with other functions - benchmarkAsList = [benchmark] + if int(Stock.config['Time Frame']) >= 2: + timeFrame = int(Stock.config['Time Frame']) + else: + timeFrame = timeFrameInit() + Stock.timeFrame = timeFrame # Needs to be a global variable for all stocks - # Asks for stock(s) ticker and makes them class Stock - listOfStocks = stocksInit() + indicators = ['Expense Ratio', + 'Market Capitalization', 'Turnover', 'Persistence'] + if Stock.config['Indicator'] in indicators: + Stock.indicator = Stock.config['Indicator'] + else: + Stock.indicator = indicatorInit() - # Determine time frame (Years) - timeFrame = timeFrameInit() - Stock.timeFrame = timeFrame # Needs to be a global variable for all stocks + if Stock.indicator == 'Persistence': + Stock.persTimeFrame = persistenceTimeFrame() - # Choose indicator - Stock.indicator = indicatorInit() - # Choose time frame for initial persistence - if Stock.indicator == 'Persistence': - Stock.persTimeFrame = persistenceTimeFrame() + # Choose whether to remove outliers or not + if Stock.config['Remove Outliers'] != False: + Stock.removeOutliers = True + else: + Stock.removeOutliers = outlierChoice() # Send async request to AV for listOfStocks and benchmark - asyncData(benchmark, listOfStocks) + # asyncData(benchmark, listOfStocks) # Gather data for benchmark and stock(s) + cprint('\nGathering data', 'white', attrs=['underline']) dataMain(benchmarkAsList) dataMain(listOfStocks) @@ -1386,6 +1628,7 @@ def main(): # Choose indicator and calculate correlation with indicator indicatorMain(listOfStocks) + print('') exit() diff --git a/modules/termcolor.py b/modules/termcolor.py new file mode 100644 index 0000000..f11b824 --- /dev/null +++ b/modules/termcolor.py @@ -0,0 +1,168 @@ +# coding: utf-8 +# Copyright (c) 2008-2011 Volvox Development Team +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# Author: Konstantin Lepa + +"""ANSII Color formatting for output in terminal.""" + +from __future__ import print_function +import os + + +__ALL__ = [ 'colored', 'cprint' ] + +VERSION = (1, 1, 0) + +ATTRIBUTES = dict( + list(zip([ + 'bold', + 'dark', + '', + 'underline', + 'blink', + '', + 'reverse', + 'concealed' + ], + list(range(1, 9)) + )) + ) +del ATTRIBUTES[''] + + +HIGHLIGHTS = dict( + list(zip([ + 'on_grey', + 'on_red', + 'on_green', + 'on_yellow', + 'on_blue', + 'on_magenta', + 'on_cyan', + 'on_white' + ], + list(range(40, 48)) + )) + ) + + +COLORS = dict( + list(zip([ + 'grey', + 'red', + 'green', + 'yellow', + 'blue', + 'magenta', + 'cyan', + 'white', + ], + list(range(30, 38)) + )) + ) + + +RESET = '\033[0m' + + +def colored(text, color=None, on_color=None, attrs=None): + """Colorize text. + + Available text colors: + red, green, yellow, blue, magenta, cyan, white. + + Available text highlights: + on_red, on_green, on_yellow, on_blue, on_magenta, on_cyan, on_white. + + Available attributes: + bold, dark, underline, blink, reverse, concealed. + + Example: + colored('Hello, World!', 'red', 'on_grey', ['blue', 'blink']) + colored('Hello, World!', 'green') + """ + if os.getenv('ANSI_COLORS_DISABLED') is None: + fmt_str = '\033[%dm%s' + if color is not None: + text = fmt_str % (COLORS[color], text) + + if on_color is not None: + text = fmt_str % (HIGHLIGHTS[on_color], text) + + if attrs is not None: + for attr in attrs: + text = fmt_str % (ATTRIBUTES[attr], text) + + text += RESET + return text + + +def cprint(text, color=None, on_color=None, attrs=None, **kwargs): + """Print colorize text. + + It accepts arguments of print function. + """ + + print((colored(text, color, on_color, attrs)), **kwargs) + + +if __name__ == '__main__': + print('Current terminal type: %s' % os.getenv('TERM')) + print('Test basic colors:') + cprint('Grey color', 'grey') + cprint('Red color', 'red') + cprint('Green color', 'green') + cprint('Yellow color', 'yellow') + cprint('Blue color', 'blue') + cprint('Magenta color', 'magenta') + cprint('Cyan color', 'cyan') + cprint('White color', 'white') + print(('-' * 78)) + + print('Test highlights:') + cprint('On grey color', on_color='on_grey') + cprint('On red color', on_color='on_red') + cprint('On green color', on_color='on_green') + cprint('On yellow color', on_color='on_yellow') + cprint('On blue color', on_color='on_blue') + cprint('On magenta color', on_color='on_magenta') + cprint('On cyan color', on_color='on_cyan') + cprint('On white color', color='grey', on_color='on_white') + print('-' * 78) + + print('Test attributes:') + cprint('Bold grey color', 'grey', attrs=['bold']) + cprint('Dark red color', 'red', attrs=['dark']) + cprint('Underline green color', 'green', attrs=['underline']) + cprint('Blink yellow color', 'yellow', attrs=['blink']) + cprint('Reversed blue color', 'blue', attrs=['reverse']) + cprint('Concealed Magenta color', 'magenta', attrs=['concealed']) + cprint('Bold underline reverse cyan color', 'cyan', + attrs=['bold', 'underline', 'reverse']) + cprint('Dark blink concealed white color', 'white', + attrs=['dark', 'blink', 'concealed']) + print(('-' * 78)) + + print('Test mixing:') + cprint('Underline red on grey color', 'red', 'on_grey', + ['underline']) + cprint('Reversed green on red color', 'green', 'on_red', ['reverse']) + diff --git a/modules/yahoofinancials.py b/modules/yahoofinancials.py new file mode 100644 index 0000000..9f477f1 --- /dev/null +++ b/modules/yahoofinancials.py @@ -0,0 +1,891 @@ +""" +============================== +The Yahoo Financials Module +Version: 1.5 +============================== + +Author: Connor Sanders +Email: sandersconnor1@gmail.com +Version Released: 01/27/2019 +Tested on Python 2.7, 3.3, 3.4, 3.5, 3.6, and 3.7 + +Copyright (c) 2019 Connor Sanders +MIT License + +List of Included Functions: + +1) get_financial_stmts(frequency, statement_type, reformat=True) + - frequency can be either 'annual' or 'quarterly'. + - statement_type can be 'income', 'balance', 'cash'. + - reformat optional value defaulted to true. Enter False for unprocessed raw data from Yahoo Finance. +2) get_stock_price_data(reformat=True) + - reformat optional value defaulted to true. Enter False for unprocessed raw data from Yahoo Finance. +3) get_stock_earnings_data(reformat=True) + - reformat optional value defaulted to true. Enter False for unprocessed raw data from Yahoo Finance. +4) get_summary_data(reformat=True) + - reformat optional value defaulted to true. Enter False for unprocessed raw data from Yahoo Finance. +5) get_stock_quote_type_data() +6) get_historical_price_data(start_date, end_date, time_interval) + - Gets historical price data for currencies, stocks, indexes, cryptocurrencies, and commodity futures. + - start_date should be entered in the 'YYYY-MM-DD' format. First day that financial data will be pulled. + - end_date should be entered in the 'YYYY-MM-DD' format. Last day that financial data will be pulled. + - time_interval can be either 'daily', 'weekly', or 'monthly'. Parameter determines the time period interval. + +Usage Examples: +from yahoofinancials import YahooFinancials +#tickers = 'AAPL' +#or +tickers = ['AAPL', 'WFC', 'F', 'JPY=X', 'XRP-USD', 'GC=F'] +yahoo_financials = YahooFinancials(tickers) +balance_sheet_data = yahoo_financials.get_financial_stmts('quarterly', 'balance') +earnings_data = yahoo_financials.get_stock_earnings_data() +historical_prices = yahoo_financials.get_historical_price_data('2015-01-15', '2017-10-15', 'weekly') +""" + +import sys +import calendar +import re +from json import loads +import time +from bs4 import BeautifulSoup +import datetime +import pytz +import random +try: + from urllib import FancyURLopener +except: + from urllib.request import FancyURLopener + + +# track the last get timestamp to add a minimum delay between gets - be nice! +_lastget = 0 + + +# Custom Exception class to handle custom error +class ManagedException(Exception): + pass + + +# Class used to open urls for financial data +class UrlOpener(FancyURLopener): + version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11' + + +# Class containing Yahoo Finance ETL Functionality +class YahooFinanceETL(object): + + def __init__(self, ticker): + self.ticker = ticker.upper() if isinstance(ticker, str) else [t.upper() for t in ticker] + self._cache = {} + + # Minimum interval between Yahoo Finance requests for this instance + _MIN_INTERVAL = 7 + + # Meta-data dictionaries for the classes to use + YAHOO_FINANCIAL_TYPES = { + 'income': ['financials', 'incomeStatementHistory', 'incomeStatementHistoryQuarterly'], + 'balance': ['balance-sheet', 'balanceSheetHistory', 'balanceSheetHistoryQuarterly', 'balanceSheetStatements'], + 'cash': ['cash-flow', 'cashflowStatementHistory', 'cashflowStatementHistoryQuarterly', 'cashflowStatements'], + 'keystats': ['key-statistics'], + 'history': ['history'] + } + + # Interval value translation dictionary + _INTERVAL_DICT = { + 'daily': '1d', + 'weekly': '1wk', + 'monthly': '1mo' + } + + # Base Yahoo Finance URL for the class to build on + _BASE_YAHOO_URL = 'https://finance.yahoo.com/quote/' + + # private static method to get the appropriate report type identifier + @staticmethod + def get_report_type(frequency): + if frequency == 'annual': + report_num = 1 + else: + report_num = 2 + return report_num + + # Public static method to format date serial string to readable format and vice versa + @staticmethod + def format_date(in_date): + if isinstance(in_date, str): + form_date = int(calendar.timegm(time.strptime(in_date, '%Y-%m-%d'))) + else: + form_date = str((datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=in_date)).date()) + return form_date + + # Private Static Method to Convert Eastern Time to UTC + @staticmethod + def _convert_to_utc(date, mask='%Y-%m-%d %H:%M:%S'): + utc = pytz.utc + eastern = pytz.timezone('US/Eastern') + date_ = datetime.datetime.strptime(date.replace(" 0:", " 12:"), mask) + date_eastern = eastern.localize(date_, is_dst=None) + date_utc = date_eastern.astimezone(utc) + return date_utc.strftime('%Y-%m-%d %H:%M:%S %Z%z') + + # Private method to scrape data from yahoo finance + def _scrape_data(self, url, tech_type, statement_type): + global _lastget + if not self._cache.get(url): + now = int(time.time()) + if _lastget and now - _lastget < self._MIN_INTERVAL: + time.sleep(self._MIN_INTERVAL - (now - _lastget) + 1) + now = int(time.time()) + _lastget = now + urlopener = UrlOpener() + # Try to open the URL up to 10 times sleeping random time if something goes wrong + max_retry = 10 + for i in range(0, max_retry): + response = urlopener.open(url) + if response.getcode() != 200: + time.sleep(random.randrange(10, 20)) + else: + response_content = response.read() + soup = BeautifulSoup(response_content, "html.parser") + re_script = soup.find("script", text=re.compile("root.App.main")) + if re_script is not None: + script = re_script.text + self._cache[url] = loads(re.search("root.App.main\s+=\s+(\{.*\})", script).group(1)) + response.close() + break + else: + time.sleep(random.randrange(10, 20)) + if i == max_retry - 1: + # Raise a custom exception if we can't get the web page within max_retry attempts + raise ManagedException("Server replied with HTTP " + str(response.getcode()) + + " code while opening the url: " + str(url)) + data = self._cache[url] + if tech_type == '' and statement_type != 'history': + stores = data["context"]["dispatcher"]["stores"]["QuoteSummaryStore"] + elif tech_type != '' and statement_type != 'history': + stores = data["context"]["dispatcher"]["stores"]["QuoteSummaryStore"][tech_type] + else: + stores = data["context"]["dispatcher"]["stores"]["HistoricalPriceStore"] + return stores + + # Private static method to determine if a numerical value is in the data object being cleaned + @staticmethod + def _determine_numeric_value(value_dict): + if 'raw' in value_dict.keys(): + numerical_val = value_dict['raw'] + else: + numerical_val = None + return numerical_val + + # Private method to format date serial string to readable format and vice versa + def _format_time(self, in_time): + form_date_time = datetime.datetime.fromtimestamp(int(in_time)).strftime('%Y-%m-%d %H:%M:%S') + utc_dt = self._convert_to_utc(form_date_time) + return utc_dt + + # Private method to return the a sub dictionary entry for the earning report cleaning + def _get_cleaned_sub_dict_ent(self, key, val_list): + sub_list = [] + for rec in val_list: + sub_sub_dict = {} + for k, v in rec.items(): + if k == 'date': + sub_sub_dict_ent = {k: v} + else: + numerical_val = self._determine_numeric_value(v) + sub_sub_dict_ent = {k: numerical_val} + sub_sub_dict.update(sub_sub_dict_ent) + sub_list.append(sub_sub_dict) + sub_ent = {key: sub_list} + return sub_ent + + # Private method to process raw earnings data and clean + def _clean_earnings_data(self, raw_data): + cleaned_data = {} + earnings_key = 'earningsData' + financials_key = 'financialsData' + for k, v in raw_data.items(): + if k == 'earningsChart': + sub_dict = {} + for k2, v2 in v.items(): + if k2 == 'quarterly': + sub_ent = self._get_cleaned_sub_dict_ent(k2, v2) + elif k2 == 'currentQuarterEstimate': + numerical_val = self._determine_numeric_value(v2) + sub_ent = {k2: numerical_val} + else: + sub_ent = {k2: v2} + sub_dict.update(sub_ent) + dict_ent = {earnings_key: sub_dict} + cleaned_data.update(dict_ent) + elif k == 'financialsChart': + sub_dict = {} + for k2, v2, in v.items(): + sub_ent = self._get_cleaned_sub_dict_ent(k2, v2) + sub_dict.update(sub_ent) + dict_ent = {financials_key: sub_dict} + cleaned_data.update(dict_ent) + else: + if k != 'maxAge': + dict_ent = {k: v} + cleaned_data.update(dict_ent) + return cleaned_data + + # Private method to clean summary and price reports + def _clean_reports(self, raw_data): + cleaned_dict = {} + if raw_data is None: + return None + for k, v in raw_data.items(): + if 'Time' in k: + formatted_utc_time = self._format_time(v) + dict_ent = {k: formatted_utc_time} + elif 'Date' in k: + try: + formatted_date = v['fmt'] + except (KeyError, TypeError): + formatted_date = '-' + dict_ent = {k: formatted_date} + elif v is None or isinstance(v, str) or isinstance(v, int) or isinstance(v, float): + dict_ent = {k: v} + # Python 2 and Unicode + elif sys.version_info < (3, 0) and isinstance(v, unicode): + dict_ent = {k: v} + else: + numerical_val = self._determine_numeric_value(v) + dict_ent = {k: numerical_val} + cleaned_dict.update(dict_ent) + return cleaned_dict + + # Private Static Method to ensure ticker is URL encoded + @staticmethod + def _encode_ticker(ticker_str): + encoded_ticker = ticker_str.replace('=', '%3D') + return encoded_ticker + + # Private method to get time interval code + def _build_historical_url(self, ticker, hist_oj): + url = self._BASE_YAHOO_URL + self._encode_ticker(ticker) + '/history?period1=' + str(hist_oj['start']) + \ + '&period2=' + str(hist_oj['end']) + '&interval=' + hist_oj['interval'] + '&filter=history&frequency=' + \ + hist_oj['interval'] + return url + + # Private Method to clean the dates of the newly returns historical stock data into readable format + def _clean_historical_data(self, hist_data, last_attempt=False): + data = {} + for k, v in hist_data.items(): + if k == 'eventsData': + event_obj = {} + if isinstance(v, list): + dict_ent = {k: event_obj} + else: + for type_key, type_obj in v.items(): + formatted_type_obj = {} + for date_key, date_obj in type_obj.items(): + formatted_date_key = self.format_date(int(date_key)) + cleaned_date = self.format_date(int(date_obj['date'])) + date_obj.update({'formatted_date': cleaned_date}) + formatted_type_obj.update({formatted_date_key: date_obj}) + event_obj.update({type_key: formatted_type_obj}) + dict_ent = {k: event_obj} + elif 'date' in k.lower(): + if v is not None: + cleaned_date = self.format_date(v) + dict_ent = {k: {'formatted_date': cleaned_date, 'date': v}} + else: + if last_attempt is False: + return None + else: + dict_ent = {k: {'formatted_date': None, 'date': v}} + elif isinstance(v, list): + sub_dict_list = [] + for sub_dict in v: + sub_dict['formatted_date'] = self.format_date(sub_dict['date']) + sub_dict_list.append(sub_dict) + dict_ent = {k: sub_dict_list} + else: + dict_ent = {k: v} + data.update(dict_ent) + return data + + # Private Static Method to build API url for GET Request + @staticmethod + def _build_api_url(hist_obj, up_ticker): + base_url = "https://query1.finance.yahoo.com/v8/finance/chart/" + api_url = base_url + up_ticker + '?symbol=' + up_ticker + '&period1=' + str(hist_obj['start']) + '&period2=' + \ + str(hist_obj['end']) + '&interval=' + hist_obj['interval'] + api_url += '&events=div|split|earn&lang=en-US®ion=US' + return api_url + + # Private Method to get financial data via API Call + def _get_api_data(self, api_url, tries=0): + urlopener = UrlOpener() + response = urlopener.open(api_url) + if response.getcode() == 200: + res_content = response.read() + response.close() + if sys.version_info < (3, 0): + return loads(res_content) + return loads(res_content.decode('utf-8')) + else: + if tries < 5: + time.sleep(random.randrange(10, 20)) + tries += 1 + return self._get_api_data(api_url, tries) + else: + return None + + # Private Method to clean API data + def _clean_api_data(self, api_url): + raw_data = self._get_api_data(api_url) + ret_obj = {} + ret_obj.update({'eventsData': []}) + if raw_data is None: + return ret_obj + results = raw_data['chart']['result'] + if results is None: + return ret_obj + for result in results: + tz_sub_dict = {} + ret_obj.update({'eventsData': result.get('events', {})}) + ret_obj.update({'firstTradeDate': result['meta'].get('firstTradeDate', 'NA')}) + ret_obj.update({'currency': result['meta'].get('currency', 'NA')}) + ret_obj.update({'instrumentType': result['meta'].get('instrumentType', 'NA')}) + tz_sub_dict.update({'gmtOffset': result['meta']['gmtoffset']}) + ret_obj.update({'timeZone': tz_sub_dict}) + timestamp_list = result['timestamp'] + high_price_list = result['indicators']['quote'][0]['high'] + low_price_list = result['indicators']['quote'][0]['low'] + open_price_list = result['indicators']['quote'][0]['open'] + close_price_list = result['indicators']['quote'][0]['close'] + volume_list = result['indicators']['quote'][0]['volume'] + adj_close_list = result['indicators']['adjclose'][0]['adjclose'] + i = 0 + prices_list = [] + for timestamp in timestamp_list: + price_dict = {} + price_dict.update({'date': timestamp}) + price_dict.update({'high': high_price_list[i]}) + price_dict.update({'low': low_price_list[i]}) + price_dict.update({'open': open_price_list[i]}) + price_dict.update({'close': close_price_list[i]}) + price_dict.update({'volume': volume_list[i]}) + price_dict.update({'adjclose': adj_close_list[i]}) + prices_list.append(price_dict) + i += 1 + ret_obj.update({'prices': prices_list}) + return ret_obj + + # Private Method to Handle Recursive API Request + def _recursive_api_request(self, hist_obj, up_ticker, i=0): + api_url = self._build_api_url(hist_obj, up_ticker) + re_data = self._clean_api_data(api_url) + cleaned_re_data = self._clean_historical_data(re_data) + if cleaned_re_data is not None: + return cleaned_re_data + else: + if i < 3: + i += 1 + return self._recursive_api_request(hist_obj, up_ticker, i) + else: + return self._clean_historical_data(re_data, True) + + # Private Method to take scrapped data and build a data dictionary with + def _create_dict_ent(self, up_ticker, statement_type, tech_type, report_name, hist_obj): + YAHOO_URL = self._BASE_YAHOO_URL + up_ticker + '/' + self.YAHOO_FINANCIAL_TYPES[statement_type][0] + '?p=' +\ + up_ticker + if tech_type == '' and statement_type != 'history': + try: + re_data = self._scrape_data(YAHOO_URL, tech_type, statement_type) + dict_ent = {up_ticker: re_data[u'' + report_name], 'dataType': report_name} + except KeyError: + re_data = None + dict_ent = {up_ticker: re_data, 'dataType': report_name} + elif tech_type != '' and statement_type != 'history': + try: + re_data = self._scrape_data(YAHOO_URL, tech_type, statement_type) + except KeyError: + re_data = None + dict_ent = {up_ticker: re_data} + else: + YAHOO_URL = self._build_historical_url(up_ticker, hist_obj) + try: + cleaned_re_data = self._recursive_api_request(hist_obj, up_ticker) + except KeyError: + try: + re_data = self._scrape_data(YAHOO_URL, tech_type, statement_type) + cleaned_re_data = self._clean_historical_data(re_data) + except KeyError: + cleaned_re_data = None + dict_ent = {up_ticker: cleaned_re_data} + return dict_ent + + # Private method to return the stmt_id for the reformat_process + def _get_stmt_id(self, statement_type, raw_data): + stmt_id = '' + i = 0 + for key in raw_data.keys(): + if key in self.YAHOO_FINANCIAL_TYPES[statement_type.lower()]: + stmt_id = key + i += 1 + if i != 1: + return None + return stmt_id + + # Private Method for the Reformat Process + def _reformat_stmt_data_process(self, raw_data, statement_type): + final_data_list = [] + if raw_data is not None: + stmt_id = self._get_stmt_id(statement_type, raw_data) + if stmt_id is None: + return final_data_list + hashed_data_list = raw_data[stmt_id] + for data_item in hashed_data_list: + data_date = '' + sub_data_dict = {} + for k, v in data_item.items(): + if k == 'endDate': + data_date = v['fmt'] + elif k != 'maxAge': + numerical_val = self._determine_numeric_value(v) + sub_dict_item = {k: numerical_val} + sub_data_dict.update(sub_dict_item) + dict_item = {data_date: sub_data_dict} + final_data_list.append(dict_item) + return final_data_list + else: + return raw_data + + # Private Method to return subdict entry for the statement reformat process + def _get_sub_dict_ent(self, ticker, raw_data, statement_type): + form_data_list = self._reformat_stmt_data_process(raw_data[ticker], statement_type) + return {ticker: form_data_list} + + # Public method to get time interval code + def get_time_code(self, time_interval): + interval_code = self._INTERVAL_DICT[time_interval.lower()] + return interval_code + + # Public Method to get stock data + def get_stock_data(self, statement_type='income', tech_type='', report_name='', hist_obj={}): + data = {} + if isinstance(self.ticker, str): + dict_ent = self._create_dict_ent(self.ticker, statement_type, tech_type, report_name, hist_obj) + data.update(dict_ent) + else: + for tick in self.ticker: + try: + dict_ent = self._create_dict_ent(tick, statement_type, tech_type, report_name, hist_obj) + data.update(dict_ent) + except ManagedException: + print("Warning! Ticker: " + str(tick) + " error - " + str(ManagedException)) + print("The process is still running...") + continue + return data + + # Public Method to get technical stock datafrom yahoofinancials import YahooFinancials + + def get_stock_tech_data(self, tech_type): + if tech_type == 'defaultKeyStatistics': + return self.get_stock_data(statement_type='keystats', tech_type=tech_type) + else: + return self.get_stock_data(tech_type=tech_type) + + # Public Method to get reformatted statement data + def get_reformatted_stmt_data(self, raw_data, statement_type): + data_dict = {} + sub_dict = {} + data_type = raw_data['dataType'] + if isinstance(self.ticker, str): + sub_dict_ent = self._get_sub_dict_ent(self.ticker, raw_data, statement_type) + sub_dict.update(sub_dict_ent) + dict_ent = {data_type: sub_dict} + data_dict.update(dict_ent) + else: + for tick in self.ticker: + sub_dict_ent = self._get_sub_dict_ent(tick, raw_data, statement_type) + sub_dict.update(sub_dict_ent) + dict_ent = {data_type: sub_dict} + data_dict.update(dict_ent) + return data_dict + + # Public method to get cleaned summary and price report data + def get_clean_data(self, raw_report_data, report_type): + cleaned_data_dict = {} + if isinstance(self.ticker, str): + if report_type == 'earnings': + try: + cleaned_data = self._clean_earnings_data(raw_report_data[self.ticker]) + except: + cleaned_data = None + else: + try: + cleaned_data = self._clean_reports(raw_report_data[self.ticker]) + except: + cleaned_data = None + cleaned_data_dict.update({self.ticker: cleaned_data}) + else: + for tick in self.ticker: + if report_type == 'earnings': + try: + cleaned_data = self._clean_earnings_data(raw_report_data[tick]) + except: + cleaned_data = None + else: + try: + cleaned_data = self._clean_reports(raw_report_data[tick]) + except: + cleaned_data = None + cleaned_data_dict.update({tick: cleaned_data}) + return cleaned_data_dict + + # Private method to handle dividend data requestsfrom yahoofinancials import YahooFinancials + + def _handle_api_dividend_request(self, cur_ticker, start, end, interval): + re_dividends = [] + test_url = 'https://query1.finance.yahoo.com/v8/finance/chart/' + cur_ticker + \ + '?period1=' + str(start) + '&period2=' + str(end) + '&interval=' + interval + '&events=div' + div_dict = self._get_api_data(test_url)['chart']['result'][0]['events']['dividends'] + for div_time_key, div_obj in div_dict.items(): + dividend_obj = { + 'date': div_obj['date'], + 'formatted_date': self.format_date(int(div_obj['date'])), + 'amount': div_obj.get('amount', None) + } + re_dividends.append(dividend_obj) + return sorted(re_dividends, key=lambda div: div['date']) + + # Public method to get daily dividend data + def get_stock_dividend_data(self, start, end, interval): + interval_code = self.get_time_code(interval) + if isinstance(self.ticker, str): + try: + return {self.ticker: self._handle_api_dividend_request(self.ticker, start, end, interval_code)} + except: + return {self.ticker: None} + else: + re_data = {} + for tick in self.ticker: + try: + div_data = self._handle_api_dividend_request(tick, start, end, interval_code) + re_data.update({tick: div_data}) + except: + re_data.update({tick: None}) + return re_data + + +# Class containing methods to create stock data extracts +class YahooFinancials(YahooFinanceETL): + + # Private method that handles financial statement extraction + def _run_financial_stmt(self, statement_type, report_num, reformat): + report_name = self.YAHOO_FINANCIAL_TYPES[statement_type][report_num] + if reformat: + raw_data = self.get_stock_data(statement_type, report_name=report_name) + data = self.get_reformatted_stmt_data(raw_data, statement_type) + else: + data = self.get_stock_data(statement_type, report_name=report_name) + return data + + # Public Method for the user to get financial statement data + def get_financial_stmts(self, frequency, statement_type, reformat=True): + report_num = self.get_report_type(frequency) + if isinstance(statement_type, str): + data = self._run_financial_stmt(statement_type, report_num, reformat) + else: + data = {} + for stmt_type in statement_type: + re_data = self._run_financial_stmt(stmt_type, report_num, reformat) + data.update(re_data) + return data + + # Public Method for the user to get stock price data + def get_stock_price_data(self, reformat=True): + if reformat: + return self.get_clean_data(self.get_stock_tech_data('price'), 'price') + else: + return self.get_stock_tech_data('price') + + # Public Method for the user to return key-statistics data + def get_key_statistics_data(self, reformat=True): + if reformat: + return self.get_clean_data(self.get_stock_tech_data('defaultKeyStatistics'), 'defaultKeyStatistics') + else: + return self.get_stock_tech_data('defaultKeyStatistics') + + # Public Method for the user to get stock earnings data + def get_stock_earnings_data(self, reformat=True): + if reformat: + return self.get_clean_data(self.get_stock_tech_data('earnings'), 'earnings') + else: + return self.get_stock_tech_data('earnings') + + # Public Method for the user to get stock summary data + def get_summary_data(self, reformat=True): + if reformat: + return self.get_clean_data(self.get_stock_tech_data('summaryDetail'), 'summaryDetail') + else: + return self.get_stock_tech_data('summaryDetail') + + # Public Method for the user to get the yahoo summary url + def get_stock_summary_url(self): + if isinstance(self.ticker, str): + return self._BASE_YAHOO_URL + self.ticker + return {t: self._BASE_YAHOO_URL + t for t in self.ticker} + + # Public Method for the user to get stock quote data + def get_stock_quote_type_data(self): + return self.get_stock_tech_data('quoteType') + + # Public Method for user to get historical price data with + def get_historical_price_data(self, start_date, end_date, time_interval): + interval_code = self.get_time_code(time_interval) + start = self.format_date(start_date) + end = self.format_date(end_date) + hist_obj = {'start': start, 'end': end, 'interval': interval_code} + return self.get_stock_data('history', hist_obj=hist_obj) + + # Private Method for Functions needing stock_price_data + def _stock_price_data(self, data_field): + if isinstance(self.ticker, str): + if self.get_stock_price_data()[self.ticker] is None: + return None + return self.get_stock_price_data()[self.ticker].get(data_field, None) + else: + ret_obj = {} + for tick in self.ticker: + if self.get_stock_price_data()[tick] is None: + ret_obj.update({tick: None}) + else: + ret_obj.update({tick: self.get_stock_price_data()[tick].get(data_field, None)}) + return ret_obj + + # Private Method for Functions needing stock_price_data + def _stock_summary_data(self, data_field): + if isinstance(self.ticker, str): + if self.get_summary_data()[self.ticker] is None: + return None + return self.get_summary_data()[self.ticker].get(data_field, None) + else: + ret_obj = {} + for tick in self.ticker: + if self.get_summary_data()[tick] is None: + ret_obj.update({tick: None}) + else: + ret_obj.update({tick: self.get_summary_data()[tick].get(data_field, None)}) + return ret_obj + + # Private Method for Functions needing financial statement data + def _financial_statement_data(self, stmt_type, stmt_code, field_name, freq): + re_data = self.get_financial_stmts(freq, stmt_type)[stmt_code] + if isinstance(self.ticker, str): + try: + date_key = re_data[self.ticker][0].keys()[0] + except (IndexError, AttributeError, TypeError): + date_key = list(re_data[self.ticker][0])[0] + data = re_data[self.ticker][0][date_key][field_name] + else: + data = {} + for tick in self.ticker: + try: + date_key = re_data[tick][0].keys()[0] + except: + try: + date_key = list(re_data[tick][0].keys())[0] + except: + date_key = None + if date_key is not None: + sub_data = re_data[tick][0][date_key][field_name] + data.update({tick: sub_data}) + else: + data.update({tick: None}) + return data + + # Public method to get daily dividend data + def get_daily_dividend_data(self, start_date, end_date): + start = self.format_date(start_date) + end = self.format_date(end_date) + return self.get_stock_dividend_data(start, end, 'daily') + + # Public Price Data Methods + def get_current_price(self): + return self._stock_price_data('regularMarketPrice') + + def get_current_change(self): + return self._stock_price_data('regularMarketChange') + + def get_current_percent_change(self): + return self._stock_price_data('regularMarketChangePercent') + + def get_current_volume(self): + return self._stock_price_data('regularMarketVolume') + + def get_prev_close_price(self): + return self._stock_price_data('regularMarketPreviousClose') + + def get_open_price(self): + return self._stock_price_data('regularMarketOpen') + + def get_ten_day_avg_daily_volume(self): + return self._stock_price_data('averageDailyVolume10Day') + + def get_three_month_avg_daily_volume(self): + return self._stock_price_data('averageDailyVolume3Month') + + def get_stock_exchange(self): + return self._stock_price_data('exchangeName') + + def get_market_cap(self): + return self._stock_price_data('marketCap') + + def get_daily_low(self): + return self._stock_price_data('regularMarketDayLow') + + def get_daily_high(self): + return self._stock_price_data('regularMarketDayHigh') + + def get_currency(self): + return self._stock_price_data('currency') + + # Public Summary Data Methods + def get_yearly_high(self): + return self._stock_summary_data('fiftyTwoWeekHigh') + + def get_yearly_low(self): + return self._stock_summary_data('fiftyTwoWeekLow') + + def get_dividend_yield(self): + return self._stock_summary_data('dividendYield') + + def get_annual_avg_div_yield(self): + return self._stock_summary_data('trailingAnnualDividendYield') + + def get_five_yr_avg_div_yield(self): + return self._stock_summary_data('fiveYearAvgDividendYield') + + def get_dividend_rate(self): + return self._stock_summary_data('dividendRate') + + def get_annual_avg_div_rate(self): + return self._stock_summary_data('trailingAnnualDividendRate') + + def get_50day_moving_avg(self): + return self._stock_summary_data('fiftyDayAverage') + + def get_200day_moving_avg(self): + return self._stock_summary_data('twoHundredDayAverage') + + def get_beta(self): + return self._stock_summary_data('beta') + + def get_payout_ratio(self): + return self._stock_summary_data('payoutRatio') + + def get_pe_ratio(self): + return self._stock_summary_data('trailingPE') + + def get_price_to_sales(self): + return self._stock_summary_data('priceToSalesTrailing12Months') + + def get_exdividend_date(self): + return self._stock_summary_data('exDividendDate') + + # Financial Statement Data Methods + def get_book_value(self): + return self._financial_statement_data('balance', 'balanceSheetHistoryQuarterly', + 'totalStockholderEquity', 'quarterly') + + def get_ebit(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'ebit', 'annual') + + def get_net_income(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'netIncome', 'annual') + + def get_interest_expense(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'interestExpense', 'annual') + + def get_operating_income(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'operatingIncome', 'annual') + + def get_total_operating_expense(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'totalOperatingExpenses', 'annual') + + def get_total_revenue(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'totalRevenue', 'annual') + + def get_cost_of_revenue(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'costOfRevenue', 'annual') + + def get_income_before_tax(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'incomeBeforeTax', 'annual') + + def get_income_tax_expense(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'incomeTaxExpense', 'annual') + + def get_gross_profit(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'grossProfit', 'annual') + + def get_net_income_from_continuing_ops(self): + return self._financial_statement_data('income', 'incomeStatementHistory', + 'netIncomeFromContinuingOps', 'annual') + + def get_research_and_development(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'researchDevelopment', 'annual') + + # Calculated Financial Methods + def get_earnings_per_share(self): + price_data = self.get_current_price() + pe_ratio = self.get_pe_ratio() + if isinstance(self.ticker, str): + if price_data is not None and pe_ratio is not None: + return price_data / pe_ratio + else: + return None + else: + ret_obj = {} + for tick in self.ticker: + if price_data[tick] is not None and pe_ratio[tick] is not None: + ret_obj.update({tick: price_data[tick] / pe_ratio[tick]}) + else: + ret_obj.update({tick: None}) + return ret_obj + + def get_num_shares_outstanding(self, price_type='current'): + today_low = self._stock_summary_data('dayHigh') + today_high = self._stock_summary_data('dayLow') + cur_market_cap = self._stock_summary_data('marketCap') + if isinstance(self.ticker, str): + if cur_market_cap is not None: + if price_type == 'current': + current = self.get_current_price() + if current is not None: + today_average = current + else: + return None + else: + if today_high is not None and today_low is not None: + today_average = (today_high + today_low) / 2 + else: + return None + return cur_market_cap / today_average + else: + return None + else: + ret_obj = {} + for tick in self.ticker: + if cur_market_cap[tick] is not None: + if price_type == 'current': + current = self.get_current_price() + if current[tick] is not None: + ret_obj.update({tick: cur_market_cap[tick] / current[tick]}) + else: + ret_obj.update({tick: None}) + else: + if today_low[tick] is not None and today_high[tick] is not None: + today_average = (today_high[tick] + today_low[tick]) / 2 + ret_obj.update({tick: cur_market_cap[tick] / today_average}) + else: + ret_obj.update({tick: None}) + else: + ret_obj.update({tick: None}) + return ret_obj \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index d4dbb6f..d201613 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ requests~=2.21.0 numpy~=1.15.4 beautifulsoup4~=4.7.1 -requests-cache~=0.4.13 # NOT REQUIRED \ No newline at end of file +halo~=0.0.23 +requests-cache~=0.4.13 # NOT REQUIRED +yahoofinancials~=1.5 # NOT REQUIRED \ No newline at end of file diff --git a/stocks.txt b/stocks.txt new file mode 100644 index 0000000..c8bb22d --- /dev/null +++ b/stocks.txt @@ -0,0 +1,10 @@ +VFINX +SMARX +BRASX +USIBX +DSIAX +TIHYX +SGYAX +TPLGX +PREFX +FBGRX