diff --git a/.gitignore b/.gitignore index 293b5a3..c8109c5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,6 @@ -__pycache__/StockData.cpython-37.pyc __pycache__/ -*.pyc -quickstart.py -creds.json test/ .vscode/ -listGoogle.py \ No newline at end of file +*.sqlite +README.html +*-stocks.txt \ No newline at end of file diff --git a/ExpenseRatio.py b/ExpenseRatio.py deleted file mode 100644 index f6d6df0..0000000 --- a/ExpenseRatio.py +++ /dev/null @@ -1,28 +0,0 @@ -# ExpenseRatio.py -# Andrew Dinh -# Python 3.6.1 -# Description: -''' -Asks user for expense ratio of stock (I don't think there's an API for expense ratios) -Runs corrrelation study (I'm not sure if I want another class for this or not) -''' - -import numpy -#import urllib2, re -from urllib.request import urlopen -import re - -class ExpenseRatio: - def __init__(self): - - -def main(): # For testing purposes - ''' - a = [1,2,3] - b = [2,4,6] - c = numpy.corrcoef(a, b)[0, 1] - print(c) - ''' - -if __name__ == "__main__": - main() diff --git a/Functions.py b/Functions.py index 1ec3db4..b6bf33a 100644 --- a/Functions.py +++ b/Functions.py @@ -1,24 +1,191 @@ # Python file for general functions -class Functions: - def getNearest(items, pivot): - return min(items, key=lambda x: abs(x - pivot)) - def stringToDate(date): - from datetime import datetime - #datetime_object = datetime.strptime('Jun 1 2005 1:33PM', '%b %d %Y %I:%M%p') - datetime_object = datetime.strptime(date, '%Y-%m-%d').date() - return(datetime_object) +import sys +sys.path.insert(0, './modules') + +def getNearest(items, pivot): + return min(items, key=lambda x: abs(x - pivot)) + + +def stringToDate(date): + from datetime import datetime + + #datetime_object = datetime.strptime('Jun 1 2005 1:33PM', '%b %d %Y %I:%M%p') + datetime_object = datetime.strptime(date, '%Y-%m-%d').date() + return(datetime_object) + + +def removeExtraDatesAndCloseValues(list1, list2): + # Returns the two lists but with the extra dates and corresponding close values removed + # list = [[dates], [close values]] + + newList1 = [[], []] + newList2 = [[], []] + + for i in range(0, len(list1[0]), 1): + for j in range(0, len(list2[0]), 1): + if list1[0][i] == list2[0][j]: + newList1[0].append(list1[0][i]) + newList2[0].append(list1[0][i]) + newList1[1].append(list1[1][i]) + newList2[1].append(list2[1][j]) + break + + returnList = [] + returnList.append(newList1) + returnList.append(newList2) + return returnList + + +def stringIsInt(s): + try: + int(s) + return True + except ValueError: + return False + + +def strintIsFloat(s): + try: + float(s) + return True + except ValueError: + return False + + +def fromCache(r): + import requests_cache + from termcolor import colored, cprint + if r.from_cache == True: + cprint('(Response taken from cache)', 'white', attrs=['dark']) + return + + +def getJoke(): + import requests + import sys + from termcolor import colored, cprint + import requests_cache + from halo import Halo + with requests_cache.disabled(): ''' - dateSplit = date.split('-') - year = int(dateSplit[0]) - month = int(dateSplit[1]) - day = int(dateSplit[2]) - datetime_object = datetime.date(year, month, day) + f = requests.get('https://official-joke-api.appspot.com/jokes/random').json() + print('') + print(f['setup']) + print(f['punchline'], end='\n\n') ''' - return datetime_object + headers = {'Accept': 'application/json', + 'User-Agent': 'fund-indicators (https://github.com/andrewkdinh/fund-indicators)'} + url = 'https://icanhazdadjoke.com' + + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get('https://icanhazdadjoke.com/', headers=headers).json() + print('') + print(colored(f['joke'], 'green')) + + +def hasNumbers(inputString): + return any(char.isdigit() for char in inputString) + + +def checkPackages(listOfPackages): + import importlib.util + import sys + + packagesInstalled = True + packages = listOfPackages + for i in range(0, len(packages), 1): + package_name = packages[i] + spec = importlib.util.find_spec(package_name) + if spec is None: + print( + package_name + + " is not installed\nPlease enter 'pip install -r requirements.txt' to install all required packages") + packagesInstalled = False + return packagesInstalled + + +def checkPythonVersion(): + import platform + #print('Checking Python version') + i = platform.python_version() + r = i.split('.') + k = float(''.join((r[0], '.', r[1]))) + if k < 3.3: + print('Your Python version is', i, + '\nIt needs to be greater than version 3.3') + return False + else: + print('Your Python version of', i, 'is good') + return True + + +def isConnected(): + import socket # To check internet connection + try: + # connect to the host -- tells us if the host is actually reachable + socket.create_connection(("www.andrewkdinh.com", 80)) + print('Internet connection is good') + return True + except OSError: + # pass + print("No internet connection!") + return False + + +def fileExists(file): + import os.path + return os.path.exists(file) + +def listIndexExists(i): + try: + i + return True + except IndexError: + return False + +def removeOutliers(i): + import statistics + m = statistics.median(i) + firstQ = [] + thirdQ = [] + for x in i: + if x < m: + firstQ.append(x) + elif x > m: + thirdQ.append(x) + firstQm = statistics.median(firstQ) + thirdQm = statistics.median(thirdQ) + iqr = (thirdQm - firstQm) * 1.5 + + goodList = [] + badList = [] + for x in i: + if x < (thirdQm + iqr) and x > (firstQm - iqr): + goodList.append(x) + else: + badList.append(x) # In case I want to know. If not, then I just make it equal to returnlist[0] + returnList = [goodList, badList, firstQm, m, thirdQm, iqr] + return returnList + +def validateJson(text): + import json + try: + json.loads(text) + return True + except ValueError: + return False + +def keyInDict(dict, key): + if key in dict: + return True + else: + return False def main(): exit() + if __name__ == "__main__": - main() + main() diff --git a/README.md b/README.md index d842a24..9f01dad 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,45 @@ -# Mutual Fund Indicators +# fund-indicators -A project to determine indicators of overperforming mutual funds. -This project is written in Python 3 and will examine market capitalization, persistence, turnover, and expense ratios. +[![License](https://img.shields.io/github/license/andrewkdinh/fund-indicators.svg)](https://raw.githubusercontent.com/andrewkdinh/fund-indicators/master/LICENSE) +[![](https://img.shields.io/github/last-commit/andrewkdinh/fund-indicators.svg)](https://github.com/andrewkdinh/fund-indicators/commits/master) +![](https://img.shields.io/github/languages/top/andrewkdinh/fund-indicators.svg) +![](https://img.shields.io/github/languages/code-size/andrewkdinh/fund-indicators.svg) -### Prerequisites +A project to determine relationships between mutual funds and different factors. -`$ pip install -r requirements.txt` +Calculates relationships between: Previous performance, Alpha, Sharpe Ratio, Sortino Ratio -### Quickstart +and Expense ratios, Turnover, Market Capitalization (Asset Size), Persistence -To begin, run +Give it a try at [repl.run](https://fund-indicators.andrewkdinh.repl.run) or [repl.it](https://repl.it/@andrewkdinh/fund-indicators) -`$ python main.py` +## Key Features -Some ticker values to try: -SPY, VFINX, AAPL, GOOGL +- 100% automated +- Uses multiple API's in case another fails +- Caches http requests for future runs +- Scrapes data from Yahoo Finance +- Color-coded for easy viewing +- Optional graphs to easily visualize linear regression results +- A new joke every time it runs -`$ pip install numpy` +## Quickstart + +```shell +pip install -r requirements.txt +python main.py +``` + +Pre-chosen stocks listed in `stocks.txt` + +## Credits + +This project uses a wide variety of open-source projects + +- [NumPy](https://github.com/numpy/numpy), [Termcolor](https://github.com/hfeeki/termcolor), [Beautiful Soup](https://launchpad.net/beautifulsoup), [yahoofinancials](https://github.com/JECSand/yahoofinancials), [requests-cache](https://github.com/reclosedev/requests-cache), [halo](https://github.com/manrajgrover/halo) + +And thank you to those that have helped me with the idea and product: + +- Amber Bruce, [Alex Stoykov](http://stoykov.us/), Doug Achterman, [Stack Overflow](https://stackoverflow.com) Created by Andrew Dinh from Dr. TJ Owens Gilroy Early College Academy diff --git a/StockData.py b/StockData.py deleted file mode 100644 index d515d3a..0000000 --- a/StockData.py +++ /dev/null @@ -1,561 +0,0 @@ -# StockData.py -# Andrew Dinh -# Python 3.6.1 -# Description: Returns all available dates and prices for each stock requested. - -''' -Available API's: Can it do mutual funds? -IEX: No -Alpha Vantage (AV): Yes -Tiingo: Yes -Barchart: No -''' - -# Alpha Vantage API Key: O42ICUV58EIZZQMU -# Barchart API Key: a17fab99a1c21cd6f847e2f82b592838 # Possible other one? f40b136c6dc4451f9136bb53b9e70ffa -# Tiingo API Key: 2e72b53f2ab4f5f4724c5c1e4d5d4ac0af3f7ca8 -# Tradier API Key: n26IFFpkOFRVsB5SNTVNXicE5MPD -# If you're going to take these API keys and abuse it, you should really reconsider your life priorities - -apiAV = 'O42ICUV58EIZZQMU' -#apiBarchart = 'a17fab99a1c21cd6f847e2f82b592838' # 150 getHistory queries per day -apiBarchart = 'f40b136c6dc4451f9136bb53b9e70ffa' -apiTiingo = '2e72b53f2ab4f5f4724c5c1e4d5d4ac0af3f7ca8' -apiTradier = 'n26IFFpkOFRVsB5SNTVNXicE5MPD' -''' -Monthly Bandwidth = 5 GB -Hourly Requests = 500 -Daily Requests = 20,000 -Symbol Requests = 500 -''' - -import requests, json -from datetime import datetime - -class StockData: - - def __init__(self, newName = '', newAbsFirstLastDates = [], newFinalDatesAndClose = [], newFinalDatesAndClose2 = [],newAllLists = []): - self.name = newName # Name of stock - self.absFirstLastDates = newAbsFirstLastDates # Absolute first and last dates from all sources - self.finalDatesAndClose = newFinalDatesAndClose # All available dates with corresponding close values - self.finalDatesAndClose2 = newFinalDatesAndClose2 # After some consideration, I decided to keep what I had already done here and make a new list that's the same except dates are in datetime format - self.allLists = newAllLists - ''' - Format: - # List from each source containing: [firstDate, lastDate, allDates, values, timeFrame] - # firstDate & lastDate = '2018-12-18' (year-month-date) - allDates = ['2018-12-17', '2018-12-14'] (year-month-date) - values (close) = ['164.6307', 164.6307] - timeFrame = [days, weeks, years] - ''' - - def set(self, newName, newFirstLastDates, newAbsFirstLastDates, newFinalDatesAndClose, newAllLists): - self.name = newName # Name of stock - self.firstLastDates = newFirstLastDates # Dates that at least 2 sources have (or should it be all?) - Maybe let user decide - self.absFirstLastDates = newAbsFirstLastDates # Absolute first and last dates from all sources - self.finalDatesAndClose = newFinalDatesAndClose - self.allLists = newAllLists - - def setName(self, newName): - self.name = newName - def returnName(self): - return self.name - def returnAllLists(self): - return self.allLists - def returnAbsFirstLastDates(self): - return self.absFirstLastDates - def returnAllLists(self): - return self.allLists - def returnFinalDatesAndClose(self): - return self.finalDatesAndClose - def returnFinalDatesAndClose2(self): - return self.finalDatesAndClose2 - - def getIEX(self): - url = ''.join(('https://api.iextrading.com/1.0/stock/', self.name, '/chart/5y')) - #link = "https://api.iextrading.com/1.0/stock/spy/chart/5y" - print("\nSending request to:", url) - f = requests.get(url) - json_data = f.text - #print(json_data) - if (json_data == 'Unknown symbol'): - print("IEX not available") - return 'Not available' - loaded_json = json.loads(json_data) - listIEX = [] - - print("\nFinding first and last date") - # Adding (firstDate, lastDate) to listIEX - # Find firstDate (comes first) - firstLine = loaded_json[0] - #print("firstLine:", firstLine) - firstDate = firstLine['date'] - #print("firstDate:",firstDate) - # Find lastDate (comes last) - lastLine = loaded_json[-1] # Returns last value of the list (Equivalent to len(loaded_json)-1) - #print("lastLine:", lastLine) - lastDate = lastLine['date'] - #print("last date:", lastDate) - listIEX.append(firstDate) - listIEX.append(lastDate) - print(listIEX[0], ',', listIEX[1]) - - print("\nFinding all dates given") - allDates = [] -# for i in range(0, len(loaded_json), 1): # If you want to do oldest first - for i in range(len(loaded_json)-1, -1, -1): - line = loaded_json[i] - date = line['date'] - allDates.append(date) - listIEX.append(allDates) - - #print(listIEX[2]) - print(len(listIEX[2]), "dates") - - print("\nFinding close values for each date") - values = [] -# for i in range(0, len(loaded_json), 1): # If you want to do oldest first - for i in range(len(loaded_json)-1, -1, -1): - line = loaded_json[i] - value = line['close'] - values.append(value) - listIEX.append(values) - #print(listIEX[3]) - print(len(listIEX[3]), "close values") - - print("\nFinding time frame given [days, weeks, years]") - timeFrame = [] - d1 = datetime.strptime(firstDate, "%Y-%m-%d") - d2 = datetime.strptime(lastDate, "%Y-%m-%d") - timeFrameDays = abs((d2 - d1).days) - #print(timeFrameDays) - timeFrameYears = float(timeFrameDays / 365) - timeFrameWeeks = float(timeFrameDays / 7) - timeFrame.append(timeFrameDays) - timeFrame.append(timeFrameWeeks) - timeFrame.append(timeFrameYears) - listIEX.append(timeFrame) - print(listIEX[4]) - - return listIEX - - def getAV(self): - listAV = [] - #url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_MONTHLY&symbol=', self.name, '&apikey=', apiAV)) - # https://www.alphavantage.co/query?function=TIME_SERIES_MONTHLY&symbol=MSFT&apikey=demo - - #url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=', self.name, '&outputsize=full&apikey=', apiAV)) - # https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=MSFT&outputsize=full&apikey=demo - - url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=', self.name, '&outputsize=full&apikey=', apiAV)) - # https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=MSFT&outputsize=full&apikey=demo - - print("\nSending request to:", url) - print("(This will take a while)") - f = requests.get(url) - json_data = f.text - loaded_json = json.loads(json_data) - #print(loaded_json) - - #print(type(loaded_json)) # Dictionary - #print(len(loaded_json)) - if len(loaded_json) == 1: - print("Alpha Vantage not available") - return 'Not available' - - #print(loaded_json['Monthly Time Series']) - dailyTimeSeries = loaded_json['Time Series (Daily)'] - #print(monthlyTimeSeries) - listOfDates = list(dailyTimeSeries) - #print(listOfDates) - - firstDate = listOfDates[-1] - lastDate = listOfDates[0] - #print("firstDate:", firstDate) - #print("lastDate:", lastDate) - listAV.append(firstDate) - listAV.append(lastDate) - listAV.append(listOfDates) - - print("\nFinding first and last date") - print(listAV[0], ',', listAV[1]) - print("\nFinding all dates given") - #print(listAV[2]) - print(len(listAV[2]), "dates") - - print("\nFinding close values for each date") - values = [] - for i in range(0, len(listOfDates), 1): - temp = listOfDates[i] - loaded_json2 = dailyTimeSeries[temp] - #value = loaded_json2['4. close'] - value = loaded_json2['5. adjusted close'] - values.append(value) - listAV.append(values) - #print(listOfDates[0]) - #i = listOfDates[0] - #print(monthlyTimeSeries[i]) - #print(listAV[3]) - print(len(listAV[3]), "close values") - - print("\nFinding time frame given [days, weeks, years]") - timeFrame = [] - d1 = datetime.strptime(firstDate, "%Y-%m-%d") - d2 = datetime.strptime(lastDate, "%Y-%m-%d") - timeFrameDays = abs((d2 - d1).days) - #print(timeFrameDays) - timeFrameYears = float(timeFrameDays / 365) - timeFrameWeeks = float(timeFrameDays / 7) - timeFrame.append(timeFrameDays) - timeFrame.append(timeFrameWeeks) - timeFrame.append(timeFrameYears) - listAV.append(timeFrame) - print(listAV[4]) - - return listAV - - def getTiingo(self): - ''' - #OR we can use the token directly in the url - headers = { - 'Content-Type': 'application/json' - } - requestResponse = requests.get("https://api.tiingo.com/api/test?token=", - headers=headers) - print(requestResponse.json()) - ''' - token = ''.join(('Token ', apiTiingo)) - headers = { - 'Content-Type': 'application/json', - 'Authorization' : token - } - url = ''.join(('https://api.tiingo.com/tiingo/daily/', self.name)) - print("\nSending request to:", url) - requestResponse = requests.get(url, headers=headers) - #print(requestResponse.json()) - loaded_json = requestResponse.json() - #print(len(loaded_json)) - if len(loaded_json) == 1: - print("Tiingo not available") - return 'Not available' - #print(loaded_json) - ''' - list1 = list(loaded_json) - for i in range (0, len(list1), 1): - if list1[i] == 'startDate': - startNum = i - elif list1[i] == 'endDate': - endNum = i - print(list1[startNum]) - print(list1[endNum]) - ''' - listTiingo = [] - - print("\nFinding first and last date") - firstDate = loaded_json['startDate'] - lastDate = loaded_json['endDate'] - #print(firstDate) - #print(lastDate) - listTiingo.append(firstDate) - listTiingo.append(lastDate) - print(listTiingo[0], ',', listTiingo[1]) - - print("\nFinding all dates given") - dates = [] - values = [] # Used loop for finding values - url2 = ''.join((url, '/prices?startDate=', firstDate, '&endDate=', lastDate)) - # https://api.tiingo.com/tiingo/daily//prices?startDate=2012-1-1&endDate=2016-1-1 - print("\nSending request to:", url2) - requestResponse2 = requests.get(url2, headers=headers) - loaded_json2 = requestResponse2.json() - #print(loaded_json2) - #print(len(loaded_json2)) - for i in range(len(loaded_json2)-1, -1, -1): - line = loaded_json2[i] - dateWithTime = line['date'] - temp = dateWithTime.split('T00:00:00.000Z') - date = temp[0] - dates.append(date) - - value = line['close'] - values.append(value) - listTiingo.append(dates) - #print(listTiingo[2]) - print(len(listTiingo[2]), "dates") - - print("Finding close values for each date") - # Used loop from finding dates - listTiingo.append(values) - #print(listTiingo[3]) - print(len(listTiingo[3]), "close values") - - print("Finding time frame given [days, weeks, years]") - timeFrame = [] - d1 = datetime.strptime(firstDate, "%Y-%m-%d") - d2 = datetime.strptime(lastDate, "%Y-%m-%d") - timeFrameDays = abs((d2 - d1).days) - #print(timeFrameDays) - timeFrameYears = float(timeFrameDays / 365) - timeFrameWeeks = float(timeFrameDays / 7) - timeFrame.append(timeFrameDays) - timeFrame.append(timeFrameWeeks) - timeFrame.append(timeFrameYears) - listTiingo.append(timeFrame) - print(listTiingo[4]) - - return listTiingo - - def getFirstLastDate(self, listOfFirstLastDates): - listOfFirstDates = [] - listOfLastDates = [] - #print(len(listOfFirstLastDates)) - for i in range (0, len(listOfFirstLastDates), 1): - firstLastDates = listOfFirstLastDates[i] - firstDate = firstLastDates[0] - lastDate = firstLastDates[1] - listOfFirstDates.append(firstDate) - listOfLastDates.append(lastDate) - #print(listOfFirstDates) - #print(listOfLastDates) - for i in range (0, len(listOfFirstDates), 1): - date = listOfFirstDates[i] - if i == 0: - firstDate = date - yearMonthDay = firstDate.split('-') - firstYear = yearMonthDay[0] - firstMonth = yearMonthDay[1] - firstDay = yearMonthDay[2] - else: - yearMonthDay = date.split('-') - year = yearMonthDay[0] - month = yearMonthDay[1] - day = yearMonthDay[2] - if year < firstYear or (year == firstYear and month < firstMonth) or (year == firstYear and month == firstMonth and day < firstDay): - firstDate = date - firstYear = year - firstMonth = month - firstDay = day - #print(firstDate) - if len(listOfFirstDates) > 1: - for i in range(0, len(listOfLastDates),1): - date = listOfLastDates[i] - if i == 0: - lastDate = date - yearMonthDay = lastDate.split('-') - lastYear = yearMonthDay[0] - lastMonth = yearMonthDay[1] - lastDay = yearMonthDay[2] - else: - yearMonthDay = date.split('-') - year = yearMonthDay[0] - month = yearMonthDay[1] - day = yearMonthDay[2] - if year > lastYear or (year == lastYear and month > lastMonth) or (year == lastYear and month == lastMonth and day > lastDay): - lastDate = date - lastYear = year - lastMonth = month - lastDay = day - #print(lastDate) - absFirstLastDates = [] - absFirstLastDates.append(firstDate) - absFirstLastDates.append(lastDate) - return absFirstLastDates - - def getFinalDatesAndClose(self): - # finalDates and finalClose will coincide (aka i = 1 will correspond to one another) - finalDatesAndClose = [] # Will combine finalDates then finalClose - finalDates = [] - finalClose = [] - #print(self.absFirstLastDates) - absFirstDate = self.absFirstLastDates[0] - absLastDate = self.absFirstLastDates[1] - date = absFirstDate - - allLists = self.allLists - while date != absLastDate: # DOESN'T DO LAST DATE - tempListOfClose = [] - found = False - for j in range(0, len(allLists), 1): # Look for date in all lists - list1 = allLists[j] - listOfDates = list1[2] - listOfClose = list1[3] - for k in range(0, len(listOfDates), 1): - if listOfDates[k] == date: - if found == False: - finalDates.append(date) - found = True - #print(listOfDates[k]) - #print(listOfClose[k]) - #print(listOfClose) - tempListOfClose.append(float(listOfClose[k])) - k = len(listOfDates) # Dates don't repeat - - if found == True: - sum = 0 - for r in range(0, len(tempListOfClose), 1): - sum = sum + tempListOfClose[r] - close = sum/len(tempListOfClose) - - finalClose.append(close) - #print(close) - - # Go to the next day - yearMonthDay = date.split('-') - year = int(yearMonthDay[0]) - month = int(yearMonthDay[1]) - day = int(yearMonthDay[2]) - - day = day + 1 - if day == 32 and month == 12: # Next year - day = 1 - month = 1 - year = year + 1 - elif day == 32: # Next month - month = month + 1 - day = 1 - if day < 10: - day = ''.join(('0', str(day))) - if month < 10: - month = ''.join(('0', str(month))) - date = ''.join((str(year), '-', str(month), '-', str(day))) - #print(date) - - # For last date - finalDates.append(date) - tempListOfClose = [] - for j in range(0, len(allLists), 1): # Look for date in all lists - list1 = allLists[j] - listOfDates = list1[2] - listOfClose = list1[3] - for k in range(0, len(listOfDates), 1): - if listOfDates[k] == date: - tempListOfClose.append(float(listOfClose[k])) - k = len(listOfDates) # Dates don't repeat - sum = 0 - for r in range(0, len(tempListOfClose), 1): - sum = sum + tempListOfClose[r] - close = sum/len(tempListOfClose) - finalClose.append(close) - #print(finalDates) - #print(finalClose) - - # Want lists from most recent to oldest, comment this out if you don't want that - finalDates = list(reversed(finalDates)) - finalClose = list(reversed(finalClose)) - - finalDatesAndClose.append(finalDates) - finalDatesAndClose.append(finalClose) - return finalDatesAndClose - - def datetimeDates(self): - finalDatesAndClose2 = [] - finalDatesAndClose = self.finalDatesAndClose - finalDatesStrings = finalDatesAndClose[0] - finalClose = finalDatesAndClose[1] - finalDates = [] - - from Functions import Functions - for i in range(0, len(finalDatesStrings), 1): - temp = Functions.stringToDate(finalDatesStrings[i]) - finalDates.append(temp) - #print(finalDates) - - finalDatesAndClose2.append(finalDates) - finalDatesAndClose2.append(finalClose) - return(finalDatesAndClose2) - - def is_connected(): - import socket # To check internet connection - try: - # connect to the host -- tells us if the host is actually - # reachable - socket.create_connection(("www.andrewkdinh.com", 80)) - return True - except OSError: - #pass - print("\nNo internet connection!") - return False - - def main(self): - print('Beginning StockData.py') - - import importlib.util, sys # To check whether a package is installed - - packages = ['requests'] - for i in range(0, len(packages), 1): - package_name = packages[i] - spec = importlib.util.find_spec(package_name) - if spec is None: - print(package_name +" is not installed\nPlease type in 'pip install -r requirements.txt' to install all required packages") - - # Test internet connection - internetConnection = StockData.is_connected() - if internetConnection == False: - return - - listOfFirstLastDates = [] - self.allLists = [] - - print('\nNOTE: Only IEX and Alpha Vantage support adjusted returns') - print('NOTE: Only Alpha Vantage and Tiingo support mutual fund data') - - # IEX - print("\nIEX") - listIEX = StockData.getIEX(self) - #print(listIEX) - if listIEX != 'Not available': - listOfFirstLastDates.append((listIEX[0], listIEX[1])) - self.allLists.append(listIEX) - - # Alpha Vantage - print("\nAlpha Vantage (AV)") - listAV = StockData.getAV(self) - #print(listAV) - if listAV != 'Not available': - listOfFirstLastDates.append((listAV[0], listAV[1])) - self.allLists.append(listAV) - - # COMMENTED OUT FOR NOW B/C LIMITED - ''' - print("\nTiingo") - print("NOTE: Tiingo does not return adjusted returns!!") - listTiingo = StockData.getTiingo(self) - #print(listTiingo) - if listTiingo != 'Not available': - listOfFirstLastDates.append((listTiingo[0], listTiingo[1])) - self.allLists.append(listTiingo) - ''' - - #print(self.allLists) - #print(listOfFirstLastDates) - if (len(self.allLists) > 0): - print("\n", end='') - print(len(self.allLists), "available source(s) for", self.name) - self.absFirstLastDates = StockData.getFirstLastDate(self, listOfFirstLastDates) - print("\nThe absolute first date with close values is:", self.absFirstLastDates[0]) - print("The absolute last date with close values is:", self.absFirstLastDates[1]) - - print("\nCombining dates and averaging close values") - self.finalDatesAndClose = StockData.getFinalDatesAndClose(self) # Returns [List of Dates, List of Corresponding Close Values] - #print("All dates available:", self.finalDatesAndClose[0]) - #print("All close values:\n", self.finalDatesAndClose[1]) - finalDates = self.finalDatesAndClose[0] - finalClose = self.finalDatesAndClose[1] - print(len(finalDates), "unique dates:", finalDates[len(finalDates)-1], "...", finalDates[0]) - print(len(finalClose), "close values:", finalClose[len(finalClose)-1], "...", finalClose[0]) - - print("\nConverting list of final dates to datetime\n") - self.finalDatesAndClose2 = StockData.datetimeDates(self) - #print(self.finalDatesAndClose2[0][0]) - - else: - print("No sources have data for", self.name) - -def main(): # For testing purposes - stockName = 'spy' - stock1 = StockData(stockName) - print("Finding available dates and close values for", stock1.name) - StockData.main(stock1) - -if __name__ == "__main__": - main() diff --git a/StockReturn.py b/StockReturn.py deleted file mode 100644 index 3911de5..0000000 --- a/StockReturn.py +++ /dev/null @@ -1,190 +0,0 @@ -# ExpenseRatio.py -# Andrew Dinh -# Python 3.6.7 -# Description: -''' -Calculates return for each stock from the lists from ExpenseRatio.py -listOfReturn = [Unadjusted Return, Sharpe Ratio, Sortino Ratio, Treynor Ratio, Jensen's Alpha] -''' - -from StockData import StockData -import datetime -from Functions import Functions - -class Return: - def __init__(self, newListOfReturn = [], newTimeFrame = [], newBeta = 0, newStandardDeviation = 0, newNegativeStandardDeviation = 0, newMarketReturn = 0, newSize = 0, newSizeOfNeg = 0, newFirstLastDates = [], newAllLists = [], newAbsFirstLastDates = ''): - self.listOfReturn = newListOfReturn - self.timeFrame = newTimeFrame # [years, months (30 days)] - self.beta = newBeta - self.standardDeviation = newStandardDeviation - self.negativeStandardDeviation = newNegativeStandardDeviation - self.marketReturn = newMarketReturn - self.size = newSize - self.sizeOfNeg = newSizeOfNeg - self.firstLastDates = newFirstLastDates - - def returnTimeFrame(self): - return self.timeFrame - - def setTimeFrame(self, newTimeFrame): - self.timeFrame = newTimeFrame - - def getFirstLastDates(self, stock): - firstLastDates = [] - timeFrame = self.timeFrame - firstDate = datetime.datetime.now() - datetime.timedelta(days=timeFrame[0]*365) - firstDate = firstDate - datetime.timedelta(days=timeFrame[1]*30) - firstDate = ''.join((str(firstDate.year),'-', str(firstDate.month), '-', str(firstDate.day))) - - lastDate = StockData.returnAbsFirstLastDates(stock)[1] - #print(lastDate) - firstLastDates.append(firstDate) - firstLastDates.append(lastDate) - return firstLastDates - - def getFirstLastDates2(self, stock): - finalDatesAndClose = StockData.returnFinalDatesAndClose(stock) - finalDatesAndClose2 = StockData.returnFinalDatesAndClose2(stock) - firstDate = self.firstLastDates[0] - lastDate = self.firstLastDates[1] - finalDates = finalDatesAndClose[0] - - firstDateExists = False - lastDateExists = False - for i in range(0, len(finalDates), 1): - if finalDates[i] == str(firstDate): - firstDateExists = True - elif finalDates[i] == lastDate: - lastDateExists = True - i = len(finalDates) - - if firstDateExists == False: - print("Could not find first date. Changing first date to closest date") - tempDate = Functions.stringToDate(firstDate) # Change to datetime - print('Original first date:', tempDate) - #tempDate = datetime.date(2014,1,17) - newFirstDate = Functions.getNearest(finalDatesAndClose2[0], tempDate) - print('New first date:', newFirstDate) - firstDate = str(newFirstDate) - - if lastDateExists == False: - print("Could not find final date. Changing final date to closest date") - tempDate2 = Functions.stringToDate(lastDate) # Change to datetime - print('Original final date:', tempDate2) - #tempDate2 = datetime.date(2014,1,17) - newLastDate = Functions.getNearest(finalDatesAndClose2[0], tempDate2) - print('New final date:', newLastDate) - lastDate = str(newLastDate) - - firstLastDates = [] - firstLastDates.append(firstDate) - firstLastDates.append(lastDate) - return firstLastDates - - def getUnadjustedReturn(self, stock): - finalDatesAndClose = StockData.returnFinalDatesAndClose(stock) - firstDate = self.firstLastDates[0] - lastDate = self.firstLastDates[1] - finalDates = finalDatesAndClose[0] - finalClose = finalDatesAndClose[1] - - for i in range(0, len(finalDates), 1): - if finalDates[i] == str(firstDate): - firstClose = finalClose[i] - elif finalDates[i] == lastDate: - lastClose = finalClose[i] - i = len(finalDates) - - print('Close values:', firstClose, '...', lastClose) - fullUnadjustedReturn = float(lastClose/firstClose) - unadjustedReturn = fullUnadjustedReturn**(1/(self.timeFrame[0]+(self.timeFrame[1])*.1)) - return unadjustedReturn - - def getBeta(self): - # Can be calculated with correlation - import numpy as np - - finalDatesAndClose = StockData.returnFinalDatesAndClose(stock) - firstDate = self.firstLastDates[0] - lastDate = self.firstLastDates[1] - finalDates = finalDatesAndClose[0] - finalClose = finalDatesAndClose[1] - - for i in range(0, len(finalDates), 1): - if finalDates[i] == str(firstDate): - firstClose = finalClose[i] -55ggbh - #list1 = - list2 = [1,2,4,1] - - print(numpy.corrcoef(list1, list2)[0, 1]) - -# def getStandardDeviation(self, timeFrame): - - def mainBenchmark(self, stock): - print('Beginning StockReturn.py') - - # Find date to start from and last date - self.timeFrame = [] - self.listOfReturn = [] - - print("\nPlease enter a time frame in years: ", end='') - #timeFrameYear = int(input()) - timeFrameYear = 5 - print(timeFrameYear) - self.timeFrame.append(timeFrameYear) - print("Please enter a time frame in months (30 days): ", end='') - #timeFrameMonth = int(input()) - timeFrameMonth = 0 - print(timeFrameMonth) - self.timeFrame.append(timeFrameMonth) - #print(self.timeFrame) - self.firstLastDates = Return.getFirstLastDates(self, stock) - print('Dates: ', self.firstLastDates) - - print('\nMaking sure dates are within list...') - self.firstLastDates = Return.getFirstLastDates2(self, stock) - print('New dates: ', self.firstLastDates) - - print('\nGetting unadjusted return') - unadjustedReturn = Return.getUnadjustedReturn(self, stock) - self.listOfReturn.append(unadjustedReturn) - print('Average annual return for the past', self.timeFrame[0], 'years and', self.timeFrame[1], 'months: ', end='') - print((self.listOfReturn[0]-1)*100, '%', sep='') - - - def main(self, stock): - print('Beginning StockReturn.py') - - # Find date to start from and last date - self.listOfReturn = [] - - self.firstLastDates = Return.getFirstLastDates(self, stock) - print('Dates: ', self.firstLastDates) - - print('\nMaking sure dates are within list...') - self.firstLastDates = Return.getFirstLastDates2(self, stock) - print('New dates: ', self.firstLastDates) - - print('\nGetting unadjusted return') - unadjustedReturn = Return.getUnadjustedReturn(self, stock) - self.listOfReturn.append(unadjustedReturn) - print('Average annual return for the past', self.timeFrame[0], 'years and', self.timeFrame[1], 'months: ', end='') - print((self.listOfReturn[0]-1)*100, '%', sep='') - - #print('\nGetting beta') - #beta = Return.getBeta(self, stock) - -def main(): - stockName = 'spy' - stock1 = StockData(stockName) - print("Finding available dates and close values for", stock1.name) - StockData.main(stock1) - - stock1Return = Return() - Return.setTimeFrame(stock1Return, [5, 0]) - - Return.main(stock1Return, stock1) - -if __name__ == "__main__": - main() diff --git a/config.example.json b/config.example.json new file mode 100644 index 0000000..f90a8e5 --- /dev/null +++ b/config.example.json @@ -0,0 +1,63 @@ +{ + "_comment": "Only use this if everything you know is correct", + "Config": { + "Check Packages": true, + "Check Python Version": true, + "Check Internet Connection": false, + "Get Joke": true, + "Benchmark": "SPY", + "Method": "Kiplinger", + "Time Frame": 60, + "Indicator": "Expense Ratio", + "Remove Outliers": true, + "Sources": [ + "Alpha Vantage", + "Yahoo", + "IEX", + "Tiingo" + ] + }, + "Possible Values": { + "Check Packages": [ + true, + false + ], + "Check Python Version": [ + true, + false + ], + "Check Internet Connection": [ + true, + false + ], + "Get Joke": [ + true, + false + ], + "Benchmark": [ + "SPY", + "DJIA", + "VTHR", + "EFG" + ], + "Method": [ + "Read", + "Manual", + "U.S. News", + "Kiplinger", + "TheStreet" + ], + "Time Frame": "Any integer", + "Indicator": [ + "Expense Ratio", + "Market Capitalization", + "Turnover", + "Persistence" + ], + "Remove Outliers": [ + true, + false + ], + "Sources": "Choose an order out of ['Alpha Vantage', 'Yahoo', 'IEX', 'Tiingo']" + } +} diff --git a/listGoogle.py b/listGoogle.py deleted file mode 100644 index f911dba..0000000 --- a/listGoogle.py +++ /dev/null @@ -1,54 +0,0 @@ -# https://support.google.com/docs/answer/3093281?hl=en -# Historical data cannot be downloaded or accessed via the Sheets API or Apps Script. If you attempt to do so, you will see a #N/A error in place of the values in the corresponding cells of your spreadsheet. - -import gspread, time, webbrowser, msvcrt -from oauth2client.service_account import ServiceAccountCredentials - -def main(): - scope = ['https://spreadsheets.google.com/feeds', - 'https://www.googleapis.com/auth/drive'] - - credentials = ServiceAccountCredentials.from_json_keyfile_name('creds.json', scope) - - gc = gspread.authorize(credentials) - ''' - # Just by ID: - #sheet = gc.open_by_key('1YS8qBQCXKNfSgQgXeUdSGOd6lM2wm-inV0_1YE36vQM') - sheet = gc.open_by_url('https://docs.google.com/spreadsheets/d/1YS8qBQCXKNfSgQgXeUdSGOd6lM2wm-inV0_1YE36vQM') - worksheet = sheet.get_worksheet(0) - worksheet.update_acell('B1', 'bingo!') - #worksheet.update_cell(1, 2, 'Bingo!') - val = worksheet.acell('B1').value - #val = worksheet.cell(1, 2).value - print(val) - ''' - url = 'https://docs.google.com/spreadsheets/d/1YS8qBQCXKNfSgQgXeUdSGOd6lM2wm-inV0_1YE36vQM' - surl = 'https://www.andrewkdinh.com/u/listGoogle' - print("Opening", url) - #webbrowser.open(surl) - sheet = gc.open_by_url(url) - worksheet = sheet.get_worksheet(0) - print('Writing Google Finance function to A1') - worksheet.update_cell(1, 1, '=GOOGLEFINANCE("GOOG", "price", DATE(2014,1,1), DATE(2014,12,31), "DAILY")') - print('\nOpening link to the Google Sheet. Please download the file as comma-separated values (.csv) and move it to the directory of this Python file', - '\nFile > Download as > Comma-separated values(.csv,currentsheet)') - print("If the link did not open, please go to", surl) - print("Press any key to continue") - #time.sleep(45) - ''' - for i in range(60, 0, -1): - print(i, end='\r') - time.sleep(1) - ''' - waiting = True - while waiting == True: - if msvcrt.kbhit(): - waiting = False - - print("e") - - #val = worksheet.acell('A1').value - #print(val) - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/main.py b/main.py index 138b810..14957f7 100644 --- a/main.py +++ b/main.py @@ -1,119 +1,1636 @@ -# main.py +# https://github.com/andrewkdinh/fund-indicators +# Determine indicators of overperforming mutual funds # Andrew Dinh -# Python 3.6.1 -# Description: -''' -Asks users for mutual funds/stocks to compare -Asks to be compared (expense ratio, turnover, market capitalization, or persistence) -Asks for time period (Possibly: 1 year, 5 years, 10 years) -Makes the mutual funds as class Stock -Gets data from each API -Compare and contrast dates and end changeOverTime for set time period - NOTES: Later can worry about getting close values to make a graph or something -Gives correlation value using equation at the end (from 0 to 1) +# Python 3.6.7 -FIRST TESTING WITH EXPENSE RATIO +# PYTHON FILES +import Functions +from yahoofinancials import YahooFinancials +from termcolor import cprint + +# REQUIRED +import requests_cache +import os.path +import re +import datetime +import json +import requests +from bs4 import BeautifulSoup +import numpy as np + +# OPTIONAL +import matplotlib.pyplot as plt +from halo import Halo + +# FOR ASYNC +from concurrent.futures import ThreadPoolExecutor as PoolExecutor +import time +import random + +import sys +sys.path.insert(0, './modules') + +requests_cache.install_cache( + 'cache', backend='sqlite', expire_after=43200) # 12 hours + +# API Keys +apiAV = 'O42ICUV58EIZZQMU' +# apiBarchart = 'a17fab99a1c21cd6f847e2f82b592838' +apiBarchart = 'f40b136c6dc4451f9136bb53b9e70ffa' +apiTiingo = '2e72b53f2ab4f5f4724c5c1e4d5d4ac0af3f7ca8' +apiTradier = 'n26IFFpkOFRVsB5SNTVNXicE5MPD' +apiQuandl = 'KUh3U3hxke9tCimjhWEF' +# apiIntrinio = 'OmNmN2E5YWI1YzYxN2Q4NzEzZDhhOTgwN2E2NWRhOWNl' +# If you're going to take these API keys and abuse it, you should really reconsider your life priorities + +''' +API Keys: + Alpha Vantage API Key: O42ICUV58EIZZQMU + Barchart API Key: a17fab99a1c21cd6f847e2f82b592838 + Possible other one? f40b136c6dc4451f9136bb53b9e70ffa + 150 getHistory queries per day + Tiingo API Key: 2e72b53f2ab4f5f4724c5c1e4d5d4ac0af3f7ca8 + Tradier API Key: n26IFFpkOFRVsB5SNTVNXicE5MPD + Monthly Bandwidth = 5 GB + Hourly Requests = 500 + Daily Requests = 20,000 + Symbol Requests = 500 + Quandl API Key: KUh3U3hxke9tCimjhWEF + Intrinio API Key: OmNmN2E5YWI1YzYxN2Q4NzEzZDhhOTgwN2E2NWRhOWNl + + Mutual funds? + Yes: Alpha Vantage, Tiingo + No: IEX, Barchart + + Adjusted? + Yes: Alpha Vantage, IEX + No: Tiingo ''' -from StockData import StockData -from StockReturn import Return +class Stock: -listOfStocksData = [] -listOfStocksReturn = [] -#numberOfStocks = int(input("How many stocks or mutual funds would you like to analyze? ")) # CHANGE BACK LATER -numberOfStocks = 1 -for i in range(0, numberOfStocks, 1): - print("Stock", i+1, ": ", end='') - stockName = str(input()) - listOfStocksData.append(i) - listOfStocksData[i] = StockData() - listOfStocksData[i].setName(stockName) - # print(listOfStocksData[i].name) + # GLOBAL VARIABLES + timeFrame = 0 # Months + riskFreeRate = 0 + indicator = '' - #listOfStocksReturn.append(i) - #listOfStocksReturn[i] = StockReturn() + # CONFIG + removeOutliers = True + sourceList = ['Alpha Vantage', 'Yahoo', 'IEX', 'Tiingo'] + config = 'N/A' + + # BENCHMARK VALUES + benchmarkDates = [] + benchmarkCloseValues = [] + benchmarkAverageMonthlyReturn = 0 + benchmarkStandardDeviation = 0 + + # INDICATOR VALUES + indicatorCorrelation = [] + indicatorRegression = [] + persTimeFrame = 0 + + def __init__(self): + # BASIC DATA + self.name = '' # Ticker symbol + self.allDates = [] + self.allCloseValues = [] + self.dates = [] + self.closeValues = [] + self.datesMatchBenchmark = [] + self.closeValuesMatchBenchmark = [] + + # CALCULATED RETURN + self.averageMonthlyReturn = 0 + self.monthlyReturn = [] + self.sharpe = 0 + self.sortino = 0 + self.treynor = 0 + self.alpha = 0 + self.beta = 0 + self.standardDeviation = 0 + self.downsideDeviation = 0 + self.kurtosis = 0 + self.skewness = 0 # Not sure if I need this + self.correlation = 0 + self.linearRegression = [] # for y=mx+b, this list has [m,b] + + self.indicatorValue = '' + + def setName(self, newName): + self.name = newName + + def getName(self): + return self.name + + def getAllDates(self): + return self.allDates + + def getAllCloseValues(self): + return self.allCloseValues + + def IEX(self): + url = ''.join( + ('https://api.iextrading.com/1.0/stock/', self.name, '/chart/5y')) + # link = "https://api.iextrading.com/1.0/stock/spy/chart/5y" + cprint("Get: " + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url) + Functions.fromCache(f) + json_data = f.text + if json_data == 'Unknown symbol' or f.status_code != 200: + print("IEX not available") + return 'N/A' + loaded_json = json.loads(json_data) + listIEX = [] + + print("\nFinding all dates given") + allDates = [] + for i in range(0, len(loaded_json), 1): # If you want to do oldest first + # for i in range(len(loaded_json)-1, -1, -1): + line = loaded_json[i] + date = line['date'] + allDates.append(date) + listIEX.append(allDates) + print(len(listIEX[0]), "dates") + + # print("\nFinding close values for each date") + values = [] + for i in range(0, len(loaded_json), 1): # If you want to do oldest first + # for i in range(len(loaded_json)-1, -1, -1): + line = loaded_json[i] + value = line['close'] + values.append(value) + listIEX.append(values) + + print(len(listIEX[0]), 'dates and', len(listIEX[1]), "close values") + return listIEX + + def AV(self): + listAV = [] + url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=', + self.name, '&outputsize=full&apikey=', apiAV)) + # https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=MSFT&outputsize=full&apikey=demo + + cprint("Get: " + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url) + Functions.fromCache(f) + json_data = f.text + loaded_json = json.loads(json_data) + + if len(loaded_json) == 1 or f.status_code != 200 or len(loaded_json) == 0: + print("Alpha Vantage not available") + return 'N/A' + + dailyTimeSeries = loaded_json['Time Series (Daily)'] + listOfDates = list(dailyTimeSeries) + # listAV.append(listOfDates) + listAV.append(list(reversed(listOfDates))) + + # print("\nFinding close values for each date") + values = [] + for i in range(0, len(listOfDates), 1): + temp = listOfDates[i] + loaded_json2 = dailyTimeSeries[temp] + # value = loaded_json2['4. close'] + value = loaded_json2['5. adjusted close'] + values.append(float(value)) + # listAV.append(values) + listAV.append(list(reversed(values))) + print(len(listAV[0]), 'dates and', len(listAV[1]), "close values") + + return listAV + + def Tiingo(self): + token = ''.join(('Token ', apiTiingo)) + headers = { + 'Content-Type': 'application/json', + 'Authorization': token + } + url = ''.join(('https://api.tiingo.com/tiingo/daily/', self.name)) + cprint("Get: " + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url, headers=headers) + Functions.fromCache(f) + loaded_json = f.json() + if len(loaded_json) == 1 or f.status_code != 200 or loaded_json['startDate'] == None: + print("Tiingo not available") + return 'N/A' + + listTiingo = [] + + print("\nFinding first and last date") + firstDate = loaded_json['startDate'] + lastDate = loaded_json['endDate'] + print(firstDate, '...', lastDate) + + print("\nFinding all dates given", end='') + dates = [] + values = [] + url2 = ''.join((url, '/prices?startDate=', + firstDate, '&endDate=', lastDate)) + # https://api.tiingo.com/tiingo/daily//prices?startDate=2012-1-1&endDate=2016-1-1 + cprint("\nGet: " + url2 + '\n', 'white', attrs=['dark']) + with Halo(spinner='dots'): + requestResponse2 = requests.get(url2, headers=headers) + Functions.fromCache(requestResponse2) + loaded_json2 = requestResponse2.json() + for i in range(0, len(loaded_json2)-1, 1): + line = loaded_json2[i] + dateWithTime = line['date'] + temp = dateWithTime.split('T00:00:00.000Z') + date = temp[0] + dates.append(date) + + value = line['close'] + values.append(value) + listTiingo.append(dates) + print(len(listTiingo[0]), "dates") + + # print("Finding close values for each date") + # Used loop from finding dates + listTiingo.append(values) + + print(len(listTiingo[0]), 'dates and', + len(listTiingo[1]), "close values") + return listTiingo + + def Yahoo(self): + url = ''.join(('https://finance.yahoo.com/quote/', + self.name, '?p=', self.name)) + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + t = requests.get(url) + if t.history: + print('Yahoo Finance does not have data for', self.name) + print('Yahoo not available') + return 'N/A' + else: + print('Yahoo Finance has data for', self.name) + + ticker = self.name + firstDate = datetime.datetime.now().date( + ) - datetime.timedelta(days=self.timeFrame*31) # 31 days as a buffer just in case + with Halo(spinner='dots'): + yahoo_financials = YahooFinancials(ticker) + r = yahoo_financials.get_historical_price_data( + str(firstDate), str(datetime.date.today()), 'daily') + + s = r[self.name]['prices'] + listOfDates = [] + listOfCloseValues = [] + for i in range(0, len(s), 1): + listOfDates.append(s[i]['formatted_date']) + listOfCloseValues.append(s[i]['close']) + listYahoo = [listOfDates, listOfCloseValues] + + # Sometimes close value is a None value + i = 0 + while i < len(listYahoo[1]): + if Functions.listIndexExists(listYahoo[1][i]) == True: + if listYahoo[1][i] == None: + del listYahoo[1][i] + del listYahoo[0][i] + i = i - 1 + i = i + 1 + else: + break + + print(len(listYahoo[0]), 'dates and', + len(listYahoo[1]), "close values") + return listYahoo + + def datesAndClose(self): + cprint('\n' + str(self.name), 'cyan') + + sourceList = Stock.sourceList + # Use each source until you get a value + for j in range(0, len(sourceList), 1): + source = sourceList[j] + print('Source being used:', source) + + if source == 'Alpha Vantage': + datesAndCloseList = Stock.AV(self) + elif source == 'Yahoo': + datesAndCloseList = Stock.Yahoo(self) + elif source == 'IEX': + datesAndCloseList = Stock.IEX(self) + elif source == 'Tiingo': + datesAndCloseList = Stock.Tiingo(self) + + if datesAndCloseList != 'N/A': + break + else: + if j == len(sourceList)-1: + print('\nNo sources have data for', self.name) + print('Removing ' + self.name + + ' from list of stocks to ensure compatibility later') + return 'N/A' + print('') + + # Convert dates to datetime + allDates = datesAndCloseList[0] + for j in range(0, len(allDates), 1): + allDates[j] = Functions.stringToDate(allDates[j]) + datesAndCloseList[0] = allDates + + # Determine if close value list has value of zero + # AKA https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=RGN&outputsize=full&apikey=O42ICUV58EIZZQMU + for i in datesAndCloseList[1]: + if i == 0: + print('Found close value of 0. This is likely something like ticker RGN (Daily Time Series with Splits and Dividend Events)') + print('Removing ' + self.name + + 'from list of stocks to ensure compability later') + return 'N/A' + + return datesAndCloseList + + def datesAndCloseFitTimeFrame(self): + print('\nShortening list to fit time frame') + # Have to do this because if I just make dates = self.allDates & closeValues = self.allCloseValues, then deleting from dates & closeValues also deletes it from self.allDates & self.allCloseValues (I'm not sure why) + dates = [] + closeValues = [] + for i in range(0, len(self.allDates), 1): + dates.append(self.allDates[i]) + closeValues.append(self.allCloseValues[i]) + + firstDate = datetime.datetime.now().date() - datetime.timedelta( + days=self.timeFrame*30) + print(self.timeFrame, ' months ago: ', firstDate, sep='') + closestDate = Functions.getNearest(dates, firstDate) + if closestDate != firstDate: + print('Closest date available for', self.name, ':', closestDate) + firstDate = closestDate + else: + print(self.name, 'has a close value for', firstDate) + + # Remove dates in list up to firstDate + while dates[0] != firstDate: + dates.remove(dates[0]) + + # Remove close values until list is same length as dates + while len(closeValues) != len(dates): + closeValues.remove(closeValues[0]) + + datesAndCloseList2 = [] + datesAndCloseList2.append(dates) + datesAndCloseList2.append(closeValues) + + print(len(dates), 'dates and', len(closeValues), 'close values') + return datesAndCloseList2 + + def calcAverageMonthlyReturn(self): # pylint: disable=E0202 + # averageMonthlyReturn = (float(self.closeValues[len(self.closeValues)-1]/self.closeValues[0])**(1/(self.timeFrame)))-1 + # averageMonthlyReturn = averageMonthlyReturn * 100 + averageMonthlyReturn = sum(self.monthlyReturn)/self.timeFrame + print('Average monthly return:', averageMonthlyReturn) + return averageMonthlyReturn + + def calcMonthlyReturn(self): + monthlyReturn = [] + + # Calculate monthly return in order from oldest to newest + monthlyReturn = [] + for i in range(0, self.timeFrame, 1): + firstDate = datetime.datetime.now().date() - datetime.timedelta( + days=(self.timeFrame-i)*30) + secondDate = datetime.datetime.now().date() - datetime.timedelta( + days=(self.timeFrame-i-1)*30) + + # Find closest dates to firstDate and lastDate + firstDate = Functions.getNearest(self.dates, firstDate) + secondDate = Functions.getNearest(self.dates, secondDate) + + if firstDate == secondDate: + print('Closest date is', firstDate, + 'which is after the given time frame.') + return 'N/A' + + # Get corresponding close values and calculate monthly return + for i in range(0, len(self.dates), 1): + if self.dates[i] == firstDate: + firstClose = self.closeValues[i] + elif self.dates[i] == secondDate: + secondClose = self.closeValues[i] + break + + monthlyReturnTemp = (secondClose/firstClose)-1 + monthlyReturnTemp = monthlyReturnTemp * 100 + monthlyReturn.append(monthlyReturnTemp) + + # print('Monthly return over the past', self.timeFrame, 'months:', monthlyReturn) + return monthlyReturn + + def calcCorrelation(self, closeList): + correlation = np.corrcoef( + self.closeValuesMatchBenchmark, closeList)[0, 1] + print('Correlation with benchmark:', correlation) + return correlation + + def calcStandardDeviation(self): + numberOfValues = self.timeFrame + mean = self.averageMonthlyReturn + standardDeviation = ( + (sum((self.monthlyReturn[x]-mean)**2 for x in range(0, numberOfValues, 1)))/(numberOfValues-1))**(1/2) + print('Standard Deviation:', standardDeviation) + return standardDeviation + + def calcDownsideDeviation(self): + numberOfValues = self.timeFrame + targetReturn = self.averageMonthlyReturn + downsideDeviation = ( + (sum(min(0, (self.monthlyReturn[x]-targetReturn))**2 for x in range(0, numberOfValues, 1)))/(numberOfValues-1))**(1/2) + print('Downside Deviation:', downsideDeviation) + return downsideDeviation + + def calcKurtosis(self): + numberOfValues = self.timeFrame + mean = self.averageMonthlyReturn + kurtosis = (sum((self.monthlyReturn[x]-mean)**4 for x in range( + 0, numberOfValues, 1)))/((numberOfValues-1)*(self.standardDeviation ** 4)) + print('Kurtosis:', kurtosis) + return kurtosis + + def calcSkewness(self): + numberOfValues = self.timeFrame + mean = self.averageMonthlyReturn + skewness = (sum((self.monthlyReturn[x]-mean)**3 for x in range( + 0, numberOfValues, 1)))/((numberOfValues-1)*(self.standardDeviation ** 3)) + print('Skewness:', skewness) + return skewness + + def calcBeta(self): + beta = self.correlation * \ + (self.standardDeviation/Stock.benchmarkStandardDeviation) + print('Beta:', beta) + return beta + + def calcAlpha(self): + alpha = self.averageMonthlyReturn - \ + (Stock.riskFreeRate+((Stock.benchmarkAverageMonthlyReturn - + Stock.riskFreeRate) * self.beta)) + print('Alpha:', alpha) + return alpha + + def calcSharpe(self): + sharpe = (self.averageMonthlyReturn - Stock.riskFreeRate) / \ + self.standardDeviation + print('Sharpe Ratio:', sharpe) + return sharpe + + def calcSortino(self): + sortino = (self.averageMonthlyReturn - self.riskFreeRate) / \ + self.downsideDeviation + print('Sortino Ratio:', sortino) + return sortino + + def calcTreynor(self): + treynor = (self.averageMonthlyReturn - Stock.riskFreeRate)/self.beta + print('Treynor Ratio:', treynor) + return treynor + + def calcLinearRegression(self): + dates = self.dates + y = self.closeValues + + # First change dates to integers (days from first date) + x = datesToDays(dates) + + x = np.array(x) + y = np.array(y) + + # Estimate coefficients + # number of observations/points + n = np.size(x) + + # mean of x and y vector + m_x, m_y = np.mean(x), np.mean(y) + + # calculating cross-deviation and deviation about x + SS_xy = np.sum(y*x) - n*m_y*m_x + SS_xx = np.sum(x*x) - n*m_x*m_x + + # calculating regression coefficients + b_1 = SS_xy / SS_xx + b_0 = m_y - b_1*m_x + + b = [b_0, b_1] + + formula = ''.join( + ('y = ', str(round(float(b[0]), 2)), 'x + ', str(round(float(b[1]), 2)))) + print('Linear regression formula:', formula) + + # Stock.plot_regression_line(self, x, y, b) + + regression = [] + regression.append(b[0]) + regression.append(b[1]) + return regression + + def plot_regression_line(self, x, y, b): + # plotting the actual points as scatter plot + plt.scatter(self.dates, y, color="m", + marker="o", s=30) + + # predicted response vector + y_pred = b[0] + b[1]*x + + # plotting the regression line + plt.plot(self.dates, y_pred, color="g") + + # putting labels + plt.title(self.name) + plt.xlabel('Dates') + plt.ylabel('Close Values') + + # function to show plot + plt.show(block=False) + for i in range(3, 0, -1): + if i == 1: + sys.stdout.write('Keeping plot open for ' + + str(i) + ' second \r') + else: + sys.stdout.write('Keeping plot open for ' + + str(i) + ' seconds \r') + plt.pause(1) + sys.stdout.flush() + plt.close() + + def scrapeYahooFinance(self): + # Determine if ETF, Mutual fund, or stock + url = ''.join(('https://finance.yahoo.com/quote/', + self.name, '?p=', self.name)) + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + t = requests.get(url) + Functions.fromCache(t) + if t.history: + print('Yahoo Finance does not have data for', self.name) + return 'N/A' + else: + print('Yahoo Finance has data for', self.name) + + stockType = '' + url2 = ''.join(('https://finance.yahoo.com/lookup?s=', self.name)) + cprint('Get: ' + url2, 'white', attrs=['dark']) + with Halo(spinner='dots'): + x = requests.get(url2) + raw_html = x.text + Functions.fromCache(x) + + soup2 = BeautifulSoup(raw_html, 'html.parser') + # Type (Stock, ETF, Mutual Fund) + r = soup2.find_all( + 'td', attrs={'class': 'data-col4 Ta(start) Pstart(20px) Miw(30px)'}) + u = soup2.find_all('a', attrs={'class': 'Fw(b)'}) # Name and class + z = soup2.find_all('td', attrs={ + 'class': 'data-col1 Ta(start) Pstart(10px) Miw(80px)'}) # Name of stock + listNames = [] + for i in u: + if i.text.strip() == i.text.strip().upper(): + listNames.append(i.text.strip()) + ''' + if len(i.text.strip()) < 6: + listNames.append(i.text.strip()) + elif '.' in i.text.strip(): + listNames.append(i.text.strip()) # Example: TSNAX (TSN.AX) + #! If having problems later, separate them by Industries (Mutual funds and ETF's are always N/A) + ''' + + for i in range(0, len(listNames), 1): + if listNames[i] == self.name: + break + + r = r[i].text.strip() + z = z[i].text.strip() + print('Name:', z) + + if r == 'ETF': + stockType = 'ETF' + elif r == 'Stocks': + stockType = 'Stock' + elif r == 'Mutual Fund': + stockType = 'Mutual Fund' + else: + print('Could not determine fund type') + return 'N/A' + print('Type:', stockType) + + if Stock.indicator == 'Expense Ratio': + if stockType == 'Stock': + print( + self.name, 'is a stock, and therefore does not have an expense ratio') + return 'Stock' + + raw_html = t.text + soup = BeautifulSoup(raw_html, 'html.parser') + + r = soup.find_all('span', attrs={'class': 'Trsdu(0.3s)'}) + if r == []: + print('Something went wrong with scraping expense ratio') + return('N/A') + + if stockType == 'ETF': + for i in range(len(r)-1, 0, -1): + s = r[i].text.strip() + if s[-1] == '%': + break + elif stockType == 'Mutual Fund': + count = 0 # Second in set + for i in range(0, len(r)-1, 1): + s = r[i].text.strip() + if s[-1] == '%' and count == 0: + count += 1 + elif s[-1] == '%' and count == 1: + break + + if s[-1] == '%': + expenseRatio = float(s.replace('%', '')) + else: + print('Something went wrong with scraping expense ratio') + return 'N/A' + print(Stock.indicator + ': ', end='') + print(str(expenseRatio) + '%') + return expenseRatio + + elif Stock.indicator == 'Market Capitalization': + somethingWrong = False + raw_html = t.text + soup = BeautifulSoup(raw_html, 'html.parser') + r = soup.find_all( + 'span', attrs={'class': 'Trsdu(0.3s)'}) + if r == []: + somethingWrong = True + else: + marketCap = 0 + for t in r: + s = t.text.strip() + if s[-1] == 'B': + print(Stock.indicator + ': ', end='') + print(s, end='') + s = s.replace('B', '') + marketCap = float(s) * 1000000000 # 1 billion + break + elif s[-1] == 'M': + print(Stock.indicator + ': ', end='') + print(s, end='') + s = s.replace('M', '') + marketCap = float(s) * 1000000 # 1 million + break + elif s[-1] == 'K': + print(Stock.indicator + ': ', end='') + print(s, end='') + s = s.replace('K', '') + marketCap = float(s) * 1000 # 1 thousand + break + if marketCap == 0: + somethingWrong = True + if somethingWrong == True: + ticker = self.name + yahoo_financials = YahooFinancials(ticker) + marketCap = yahoo_financials.get_market_cap() + if marketCap != None: + print('(Taken from yahoofinancials)') + print(marketCap) + return int(marketCap) + else: + print( + 'Was not able to scrape or get market capitalization from yahoo finance') + return 'N/A' + marketCap = int(marketCap) + return marketCap + + print(' =', marketCap) + marketCap = marketCap / 1000000 + print( + 'Dividing marketCap by 1 million (to work with linear regression module):', marketCap) + return marketCap + + elif Stock.indicator == 'Turnover': + if stockType == 'Stock': + print(self.name, 'is a stock, and therefore does not have turnover') + return 'Stock' + + if stockType == 'Mutual Fund': + raw_html = t.text + soup = BeautifulSoup(raw_html, 'html.parser') + + r = soup.find_all( + 'span', attrs={'class': 'Trsdu(0.3s)'}) + if r == []: + print('Something went wrong without scraping turnover') + return 'N/A' + turnover = 0 + for i in range(len(r)-1, 0, -1): + s = r[i].text.strip() + if s[-1] == '%': + turnover = float(s.replace('%', '')) + break + if stockType == 'ETF': + url = ''.join(('https://finance.yahoo.com/quote/', + self.name, '/profile?p=', self.name)) + # https://finance.yahoo.com/quote/SPY/profile?p=SPY + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + raw_html = requests.get(url).text + soup = BeautifulSoup(raw_html, 'html.parser') + + r = soup.find_all( + 'span', attrs={'class': 'W(20%) D(b) Fl(start) Ta(e)'}) + if r == []: + print('Something went wrong without scraping turnover') + return 'N/A' + turnover = 0 + for i in range(len(r)-1, 0, -1): + s = r[i].text.strip() + if s[-1] == '%': + turnover = float(s.replace('%', '')) + break + elif s == 'N/A': + print(self.name, 'has a value of N/A for turnover') + return 'N/A' + + if turnover == 0: + print('Something went wrong with scraping turnover') + return 'N/A' + print(Stock.indicator + ': ', end='') + print(str(turnover) + '%') + return turnover + + def indicatorManual(self): + indicatorValueFound = False + while indicatorValueFound == False: + if Stock.indicator == 'Expense Ratio': + indicatorValue = str( + input(Stock.indicator + ' for ' + self.name + ' (%): ')) + elif Stock.indicator == 'Persistence': + indicatorValue = str( + input(Stock.indicator + ' for ' + self.name + ' (years): ')) + elif Stock.indicator == 'Turnover': + indicatorValue = str(input( + Stock.indicator + ' for ' + self.name + ' in the last ' + str(Stock.timeFrame) + ' years: ')) + elif Stock.indicator == 'Market Capitalization': + indicatorValue = str( + input(Stock.indicator + ' of ' + self.name + ': ')) + else: + # print('Something is wrong. Indicator was not found. Ending program.') + cprint( + 'Something is wrong. Indicator was not found. Ending program.', 'white', 'on_red') + exit() + + if Functions.strintIsFloat(indicatorValue) == True: + indicatorValueFound = True + return float(indicatorValue) + else: + print('Please enter a number') + + def calcPersistence(self): + persistenceFirst = (sum(self.monthlyReturn[i] for i in range( + 0, Stock.persTimeFrame, 1))) / Stock.persTimeFrame + persistenceSecond = self.averageMonthlyReturn + persistence = persistenceSecond-persistenceFirst + print('Change (difference) in average monthly return:', persistence) + return persistence -# Decide on a benchmark -benchmarkTicker = '' -while benchmarkTicker == '': - listOfBenchmarks = ['S&P500', 'DJIA', 'Russell 3000', 'MSCI EAFE'] - listOfBenchmarksTicker = ['SPY', 'DJIA', 'VTHR', 'EFT'] - print('\nList of benchmarks:', listOfBenchmarks) - #benchmark = str(input('Benchmark to compare to: ')) - benchmark = 'S&P500' +def datesToDays(dates): + days = [] + firstDate = dates[0] + days.append(0) + for i in range(1, len(dates), 1): + # Calculate days from first date to current date + daysDiff = (dates[i]-firstDate).days + days.append(daysDiff) + return days - for i in range(0,len(listOfBenchmarks), 1): - if benchmark == listOfBenchmarks[i]: - benchmarkTicker = listOfBenchmarksTicker[i] - i = len(listOfBenchmarks) - if benchmarkTicker == '': - print('Benchmark not found. Please type in a benchmark from the list') +def benchmarkInit(): + # Treat benchmark like stock + benchmarkTicker = '' + benchmarks = ['S&P500', 'DJIA', 'Russell 3000', 'MSCI EAFE'] + benchmarksTicker = ['SPY', 'DJIA', 'VTHR', 'EFT'] + print('\nList of benchmarks:') + for i in range(0, len(benchmarks), 1): + print(str(i+1) + '. ' + + benchmarks[i] + ' (' + benchmarksTicker[i] + ')') + while benchmarkTicker == '': -print('\n', benchmark, ' (', benchmarkTicker, ')', sep='') + benchmark = str(input('Please choose a benchmark from the list: ')) + # benchmark = 'SPY' # TESTING -benchmarkName = str(benchmark) -benchmark = StockData() -benchmark.setName(benchmarkName) -StockData.main(benchmark) + if Functions.stringIsInt(benchmark) == True: + if int(benchmark) <= len(benchmarks) and int(benchmark) > 0: + benchmarkInt = int(benchmark) + benchmark = benchmarks[benchmarkInt-1] + benchmarkTicker = benchmarksTicker[benchmarkInt-1] + else: + for i in range(0, len(benchmarks), 1): + if benchmark == benchmarks[i]: + benchmarkTicker = benchmarksTicker[i] + break + if benchmark == benchmarksTicker[i] or benchmark == benchmarksTicker[i].lower(): + benchmark = benchmarks[i] + benchmarkTicker = benchmarksTicker[i] + break -benchmarkReturn = Return() -Return.mainBenchmark(benchmarkReturn, benchmark) + if benchmarkTicker == '': + print('Benchmark not found. Please use a benchmark from the list') -timeFrame = Return.returnTimeFrame(benchmarkReturn) -print('Time Frame [years, months]:', timeFrame) + print(benchmark, ' (', benchmarkTicker, ')', sep='') -sumOfListLengths = 0 -for i in range(0, numberOfStocks, 1): - print('\n', listOfStocksData[i].name, sep='') - StockData.main(listOfStocksData[i]) - # Count how many stocks are available - sumOfListLengths = sumOfListLengths + len(StockData.returnAllLists(listOfStocksData[i])) + benchmark = Stock() + benchmark.setName(benchmarkTicker) -if sumOfListLengths == 0: - print("No sources have data for given stocks") + return benchmark + + +def stocksInit(): + listOfStocks = [] + + print('\nThis program can analyze stocks (GOOGL), mutual funds (VFINX), and ETFs (SPY)') + print('For simplicity, all of them will be referred to as "stock"') + + found = False + while found == False: + print('\nMethods:') + method = 0 + methods = ['Read from a file', 'Enter manually', + 'U.S. News popular funds (~35)', 'Kiplinger top-performing funds (50)', 'TheStreet top-rated mutual funds (20)'] + + if Stock.config != 'N/A': + methodsConfig = ['Read', 'Manual', + 'U.S. News', 'Kiplinger', 'TheStreet'] + for i in range(0, len(methodsConfig), 1): + if Stock.config['Method'] == methodsConfig[i]: + method = i + 1 + + else: + for i in range(0, len(methods), 1): + print(str(i+1) + '. ' + methods[i]) + while method == 0 or method > len(methods): + method = str(input('Which method? ')) + if Functions.stringIsInt(method) == True: + method = int(method) + if method == 0 or method > len(methods): + print('Please choose a valid method') + else: + method = 0 + print('Please choose a number') + + print('') + if method == 1: + defaultFiles = ['.gitignore', 'LICENSE', 'main.py', 'Functions.py', + 'README.md', 'requirements.txt', 'cache.sqlite', 'yahoofinancials.py', 'termcolor.py', 'README.html', 'config.json', '_test_runner.py'] # Added by repl.it for whatever reason + stocksFound = False + print('\nFiles in current directory (not including default files): ') + listOfFilesTemp = [f for f in os.listdir() if os.path.isfile(f)] + listOfFiles = [] + for files in listOfFilesTemp: + if files[0] != '.' and any(x in files for x in defaultFiles) != True: + listOfFiles.append(files) + for i in range(0, len(listOfFiles), 1): + if listOfFiles[i][0] != '.': + print(str(i+1) + '. ' + listOfFiles[i]) + while stocksFound == False: + fileName = str(input('What is the file number/name? ')) + if Functions.stringIsInt(fileName) == True: + if int(fileName) < len(listOfFiles)+1 and int(fileName) > 0: + fileName = listOfFiles[int(fileName)-1] + print(fileName) + if Functions.fileExists(fileName) == True: + listOfStocks = [] + file = open(fileName, 'r') + n = file.read() + file.close() + s = re.findall(r'[^,;\s]+', n) + for i in s: + if str(i) != '' and Functions.hasNumbers(str(i)) == False: + listOfStocks.append(str(i).upper()) + stocksFound = True + else: + print('File not found') + for i in range(0, len(listOfStocks), 1): + stockName = listOfStocks[i].upper() + listOfStocks[i] = Stock() + listOfStocks[i].setName(stockName) + + for k in listOfStocks: + print(k.name, end=' ') + print('\n' + str(len(listOfStocks)) + ' stocks total') + + elif method == 2: + isInteger = False + while isInteger == False: + temp = input('\nNumber of stocks to analyze (2 minimum): ') + isInteger = Functions.stringIsInt(temp) + if isInteger == True: + if int(temp) >= 2: + numberOfStocks = int(temp) + else: + print('Please type a number greater than or equal to 2') + isInteger = False + else: + print('Please type an integer') + + i = 0 + while i < numberOfStocks: + print('Stock', i + 1, end=' ') + stockName = str(input('ticker: ')) + + if stockName != '' and Functions.hasNumbers(stockName) == False: + stockName = stockName.upper() + listOfStocks.append(stockName) + listOfStocks[i] = Stock() + listOfStocks[i].setName(stockName) + i += 1 + else: + print('Invalid ticker') + + elif method == 3: + listOfStocks = [] + url = 'https://money.usnews.com/funds/mutual-funds/most-popular' + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'} + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url, headers=headers) + Functions.fromCache(f) + raw_html = f.text + soup = BeautifulSoup(raw_html, 'html.parser') + + file = open('usnews-stocks.txt', 'w') + r = soup.find_all( + 'span', attrs={'class': 'text-smaller text-muted'}) + for k in r: + print(k.text.strip(), end=' ') + listOfStocks.append(k.text.strip()) + file.write(str(k.text.strip()) + '\n') + file.close() + + for i in range(0, len(listOfStocks), 1): + stockName = listOfStocks[i].upper() + listOfStocks[i] = Stock() + listOfStocks[i].setName(stockName) + + print('\n' + str(len(listOfStocks)) + ' mutual funds total') + + elif method == 4: + listOfStocks = [] + url = 'https://www.kiplinger.com/tool/investing/T041-S001-top-performing-mutual-funds/index.php' + headers = { + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'} + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url, headers=headers) + Functions.fromCache(f) + raw_html = f.text + soup = BeautifulSoup(raw_html, 'html.parser') + + file = open('kiplinger-stocks.txt', 'w') + r = soup.find_all('a', attrs={'style': 'font-weight:700;'}) + for k in r: + print(k.text.strip(), end=' ') + listOfStocks.append(k.text.strip()) + file.write(str(k.text.strip()) + '\n') + file.close() + + for i in range(0, len(listOfStocks), 1): + stockName = listOfStocks[i].upper() + listOfStocks[i] = Stock() + listOfStocks[i].setName(stockName) + + print('\n' + str(len(listOfStocks)) + ' mutual funds total') + + elif method == 5: + listOfStocks = [] + url = 'https://www.thestreet.com/topic/21421/top-rated-mutual-funds.html' + headers = { + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'} + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url, headers=headers) + Functions.fromCache(f) + raw_html = f.text + soup = BeautifulSoup(raw_html, 'html.parser') + + file = open('thestreet-stocks.txt', 'w') + r = soup.find_all('a') + for k in r: + if len(k.text.strip()) == 5: + n = re.findall(r'^/quote/.*\.html', k['href']) + if len(n) != 0: + print(k.text.strip(), end=' ') + listOfStocks.append(k.text.strip()) + file.write(str(k.text.strip()) + '\n') + file.close() + + for i in range(0, len(listOfStocks), 1): + stockName = listOfStocks[i].upper() + listOfStocks[i] = Stock() + listOfStocks[i].setName(stockName) + + print('\n' + str(len(listOfStocks)) + ' mutual funds total') + + if len(listOfStocks) < 2: + print('Please choose another method') + else: + found = True + + return listOfStocks + + +def asyncData(benchmark, listOfStocks): + # Make list of urls to send requests to + urlList = [] + # Benchmark + url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=', + benchmark.name, '&outputsize=full&apikey=', apiAV)) + urlList.append(url) + + # Stocks + for i in range(0, len(listOfStocks), 1): + # Alpha Vantage + url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=', + listOfStocks[i].name, '&outputsize=full&apikey=', apiAV)) + urlList.append(url) + + # Risk-free rate + url = ''.join( + ('https://www.quandl.com/api/v3/datasets/USTREASURY/LONGTERMRATES.json?api_key=', apiQuandl)) + urlList.append(url) + + # Yahoo Finance + for i in range(0, len(listOfStocks), 1): + url = ''.join(('https://finance.yahoo.com/quote/', + listOfStocks[i].name, '?p=', listOfStocks[i].name)) + urlList.append(url) + for i in range(0, len(listOfStocks), 1): + url = ''.join( + ('https://finance.yahoo.com/lookup?s=', listOfStocks[i].name)) + urlList.append(url) + + # Send async requests + print('\nSending async requests (Assuming Alpha Vantage is first choice)') + with PoolExecutor(max_workers=3) as executor: + for _ in executor.map(sendAsync, urlList): + pass + + return + + +def sendAsync(url): + time.sleep(random.randrange(0, 2)) + cprint('Get: ' + url, 'white', attrs=['dark']) + requests.get(url) + return + + +def timeFrameInit(): + isInteger = False + while isInteger == False: + print( + '\nPlease enter the time frame in months (<60 months recommended):', end='') + temp = input(' ') + isInteger = Functions.stringIsInt(temp) + if isInteger == True: + if int(temp) > 1 and int(temp) < 1000: + months = int(temp) + else: + print('Please enter a number greater than 1') + isInteger = False + else: + print('Please type an integer') + + timeFrame = months + return timeFrame + + +def dataMain(listOfStocks): + i = 0 + while i < len(listOfStocks): + + datesAndCloseList = Stock.datesAndClose(listOfStocks[i]) + if datesAndCloseList == 'N/A': + del listOfStocks[i] + if len(listOfStocks) == 0: + # print('No stocks to analyze. Ending program') + cprint('No stocks to analyze. Ending program', 'white', 'on_red') + exit() + else: + listOfStocks[i].allDates = datesAndCloseList[0] + listOfStocks[i].allCloseValues = datesAndCloseList[1] + + # Clip list to fit time frame + datesAndCloseList2 = Stock.datesAndCloseFitTimeFrame( + listOfStocks[i]) + listOfStocks[i].dates = datesAndCloseList2[0] + listOfStocks[i].closeValues = datesAndCloseList2[1] + + i += 1 + + +def riskFreeRate(): + print('Quandl') + url = ''.join( + ('https://www.quandl.com/api/v3/datasets/USTREASURY/LONGTERMRATES.json?api_key=', apiQuandl)) + # https://www.quandl.com/api/v3/datasets/USTREASURY/LONGTERMRATES.json?api_key=KUh3U3hxke9tCimjhWEF + + cprint('\nGet: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url) + Functions.fromCache(f) + json_data = f.text + loaded_json = json.loads(json_data) + riskFreeRate = (loaded_json['dataset']['data'][0][1])/100 + riskFreeRate = riskFreeRate * 100 + riskFreeRate = round(riskFreeRate, 2) + print('Risk-free rate:', riskFreeRate, end='\n\n') + + if f.status_code != 200: + print('Quandl not available') + print('Returning 2.50 as risk-free rate', end='\n\n') + # return 0.0250 + return 2.50 + + return riskFreeRate + + +def returnMain(benchmark, listOfStocks): + cprint('\nCalculating return statistics\n', 'white', attrs=['underline']) + print('Getting risk-free rate from current 10-year treasury bill rates', end='\n\n') + Stock.riskFreeRate = riskFreeRate() + cprint(benchmark.name, 'cyan') + benchmark.monthlyReturn = Stock.calcMonthlyReturn(benchmark) + if benchmark.monthlyReturn == 'N/A': + # print('Please use a lower time frame\nEnding program') + cprint('Please use a lower time frame. Ending program', 'white', 'on_red') + exit() + benchmark.averageMonthlyReturn = Stock.calcAverageMonthlyReturn(benchmark) + benchmark.standardDeviation = Stock.calcStandardDeviation(benchmark) + + # Make benchmark data global + Stock.benchmarkDates = benchmark.dates + Stock.benchmarkCloseValues = benchmark.closeValues + Stock.benchmarkAverageMonthlyReturn = benchmark.averageMonthlyReturn + Stock.benchmarkStandardDeviation = benchmark.standardDeviation + + i = 0 + while i < len(listOfStocks): + cprint('\n' + listOfStocks[i].name, 'cyan') + + # Make sure each date has a value for both the benchmark and the stock + list1 = [] + list2 = [] + list1.append(listOfStocks[i].dates) + list1.append(listOfStocks[i].closeValues) + list2.append(Stock.benchmarkDates) + list2.append(Stock.benchmarkCloseValues) + temp = Functions.removeExtraDatesAndCloseValues(list1, list2) + listOfStocks[i].datesMatchBenchmark = temp[0][0] + listOfStocks[i].closeValuesMatchBenchmark = temp[0][1] + benchmarkMatchDatesAndCloseValues = temp[1] + + # Calculate everything for each stock + listOfStocks[i].monthlyReturn = Stock.calcMonthlyReturn( + listOfStocks[i]) + if listOfStocks[i].monthlyReturn == 'N/A': + print('Removing ' + listOfStocks[i].name + ' from list of stocks') + del listOfStocks[i] + if len(listOfStocks) == 0: + print('No stocks fit time frame. Ending program') + cprint('No stocks fit time frame. Ending program', + 'white', 'on_red') + exit() + else: + listOfStocks[i].averageMonthlyReturn = Stock.calcAverageMonthlyReturn( + listOfStocks[i]) + listOfStocks[i].correlation = Stock.calcCorrelation( + listOfStocks[i], benchmarkMatchDatesAndCloseValues[1]) + listOfStocks[i].standardDeviation = Stock.calcStandardDeviation( + listOfStocks[i]) + listOfStocks[i].downsideDeviation = Stock.calcDownsideDeviation( + listOfStocks[i]) + listOfStocks[i].kurtosis = Stock.calcKurtosis( + listOfStocks[i]) + listOfStocks[i].skewness = Stock.calcSkewness( + listOfStocks[i]) + listOfStocks[i].beta = Stock.calcBeta(listOfStocks[i]) + listOfStocks[i].alpha = Stock.calcAlpha(listOfStocks[i]) + listOfStocks[i].sharpe = Stock.calcSharpe(listOfStocks[i]) + listOfStocks[i].sortino = Stock.calcSortino(listOfStocks[i]) + listOfStocks[i].treynor = Stock.calcTreynor(listOfStocks[i]) + listOfStocks[i].linearRegression = Stock.calcLinearRegression( + listOfStocks[i]) + + i += 1 + + cprint('\nNumber of stocks from original list that fit time frame: ' + + str(len(listOfStocks)), 'green') + if len(listOfStocks) < 2: + #print('Cannot proceed to the next step. Exiting program.') + cprint('Cannot proceed to the next step. Exiting program.', + 'white', 'on_red') + exit() + + +def outlierChoice(): + print('\nWould you like to remove indicator outliers?') + print('1. Yes\n2. No') + found = False + while found == False: + outlierChoice = str(input('Choice: ')) + if Functions.stringIsInt(outlierChoice): + if int(outlierChoice) == 1: + return True + elif int(outlierChoice) == 2: + return False + else: + print('Please enter 1 or 2') + elif outlierChoice.lower() == 'yes': + return True + elif outlierChoice.lower() == 'no': + return False + else: + print('Not valid. Please enter a number or yes or no.') + + +def indicatorInit(): + # Runs correlation or regression study + indicatorFound = False + listOfIndicators = ['Expense Ratio', + 'Market Capitalization', 'Turnover', 'Persistence'] + print('\n', end='') + print('List of indicators:') + for i in range(0, len(listOfIndicators), 1): + print(str(i + 1) + '. ' + listOfIndicators[i]) + while indicatorFound == False: + indicator = str(input('Choose an indicator from the list: ')) + + # indicator = 'expense ratio' # TESTING + + if Functions.stringIsInt(indicator) == True: + if int(indicator) <= 4 and int(indicator) > 0: + indicator = listOfIndicators[int(indicator)-1] + indicatorFound = True + else: + indicatorFormats = [ + indicator.upper(), indicator.lower(), indicator.title()] + for i in range(0, len(indicatorFormats), 1): + for j in range(0, len(listOfIndicators), 1): + if listOfIndicators[j] == indicatorFormats[i]: + indicator = listOfIndicators[j] + indicatorFound = True + break + + if indicatorFound == False: + print('Please choose an indicator from the list\n') + + return indicator + + +def calcIndicatorCorrelation(listOfIndicatorValues, listOfReturns): + correlationList = [] + for i in range(0, len(listOfReturns), 1): + correlation = np.corrcoef( + listOfIndicatorValues, listOfReturns[i])[0, 1] + correlationList.append(correlation) + return correlationList + + +def calcIndicatorRegression(listOfIndicatorValues, listOfReturns): + regressionList = [] + x = np.array(listOfIndicatorValues) + for i in range(0, len(listOfReturns), 1): + y = np.array(listOfReturns[i]) + + # Estimate coefficients + # number of observations/points + n = np.size(x) + + # mean of x and y vector + m_x, m_y = np.mean(x), np.mean(y) + + # calculating cross-deviation and deviation about x + SS_xy = np.sum(y*x) - n*m_y*m_x + SS_xx = np.sum(x*x) - n*m_x*m_x + + # calculating regression coefficients + b_1 = SS_xy / SS_xx + b_0 = m_y - b_1*m_x + + b = [b_0, b_1] + + regression = [] + regression.append(b[0]) + regression.append(b[1]) + regressionList.append(regression) + + # plot_regression_line(x, y, b, i) + + return regressionList + + +def plot_regression_line(x, y, b, i): + # plotting the actual points as scatter plot + plt.scatter(x, y, color="m", + marker="o", s=30) + + # predicted response vector + y_pred = b[0] + b[1]*x + + # plotting the regression line + plt.plot(x, y_pred, color="g") + + # putting labels + listOfReturnStrings = ['Average Monthly Return', + 'Sharpe Ratio', 'Sortino Ratio', 'Treynor Ratio', 'Alpha'] + + plt.title(Stock.indicator + ' and ' + listOfReturnStrings[i]) + if Stock.indicator == 'Expense Ratio' or Stock.indicator == 'Turnover': + plt.xlabel(Stock.indicator + ' (%)') + elif Stock.indicator == 'Persistence': + plt.xlabel(Stock.indicator + ' (Difference in average monthly return)') + elif Stock.indicator == 'Market Capitalization': + plt.xlabel(Stock.indicator + ' (millions)') + else: + plt.xlabel(Stock.indicator) + + if i == 0: + plt.ylabel(listOfReturnStrings[i] + ' (%)') + else: + plt.ylabel(listOfReturnStrings[i]) + + # function to show plot + plt.show(block=False) + for i in range(3, 0, -1): + if i == 1: + sys.stdout.write('Keeping plot open for ' + + str(i) + ' second \r') + else: + sys.stdout.write('Keeping plot open for ' + + str(i) + ' seconds \r') + plt.pause(1) + sys.stdout.flush() + sys.stdout.write( + ' \r') + sys.stdout.flush() + plt.close() + + +def persistenceTimeFrame(): + print('\nTime frame you chose was', Stock.timeFrame, 'months') + persTimeFrameFound = False + while persTimeFrameFound == False: + persistenceTimeFrame = str( + input('Please choose how many months to measure persistence: ')) + if Functions.stringIsInt(persistenceTimeFrame) == True: + if int(persistenceTimeFrame) > 0 and int(persistenceTimeFrame) < Stock.timeFrame - 1: + persistenceTimeFrame = int(persistenceTimeFrame) + persTimeFrameFound = True + else: + print('Please choose a number between 0 and', + Stock.timeFrame, end='\n') + else: + print('Please choose an integer between 0 and', + Stock.timeFrame, end='\n') + + return persistenceTimeFrame + + +def indicatorMain(listOfStocks): + cprint('\n' + str(Stock.indicator) + '\n', 'white', attrs=['underline']) + + listOfStocksIndicatorValues = [] + for i in range(0, len(listOfStocks), 1): + cprint(listOfStocks[i].name, 'cyan') + if Stock.indicator == 'Persistence': + listOfStocks[i].indicatorValue = Stock.calcPersistence( + listOfStocks[i]) + else: + listOfStocks[i].indicatorValue = Stock.scrapeYahooFinance( + listOfStocks[i]) + print('') + + if listOfStocks[i].indicatorValue == 'N/A': + listOfStocks[i].indicatorValue = Stock.indicatorManual( + listOfStocks[i]) + elif listOfStocks[i].indicatorValue == 'Stock': + print('Removing ' + listOfStocks[i].name + ' from list of stocks') + del listOfStocks[i] + if len(listOfStocks) < 2: + # print('Not able to go to the next step. Ending program') + cprint('Not able to go to the next step. Ending program', + 'white', 'on_red') + exit() + + listOfStocksIndicatorValues.append(listOfStocks[i].indicatorValue) + + # Remove outliers + if Stock.removeOutliers == True: + cprint('\nRemoving outliers\n', 'white', attrs=['underline']) + temp = Functions.removeOutliers(listOfStocksIndicatorValues) + if temp[0] == listOfStocksIndicatorValues: + print('No outliers\n') + else: + print('First quartile:', temp[2], ', Median:', temp[3], + ', Third quartile:', temp[4], 'Interquartile range:', temp[5]) + # print('Original list:', listOfStocksIndicatorValues) + listOfStocksIndicatorValues = temp[0] + i = 0 + while i < len(listOfStocks)-1: + for j in temp[1]: + if listOfStocks[i].indicatorValue == j: + print('Removing', listOfStocks[i].name, 'because it has a', + Stock.indicator.lower(), 'value of', listOfStocks[i].indicatorValue) + del listOfStocks[i] + i = i - 1 + break + i += 1 + # print('New list:', listOfStocksIndicatorValues, '\n') + print('') + + # Calculate data + cprint('Calculating correlation and linear regression\n', + 'white', attrs=['underline']) + + listOfReturns = [] # A list that matches the above list with return values [[averageMonthlyReturn1, aAR2, aAR3], [sharpe1, sharpe2, sharpe3], etc.] + tempListOfReturns = [] + for i in range(0, len(listOfStocks), 1): + tempListOfReturns.append(listOfStocks[i].averageMonthlyReturn) + listOfReturns.append(tempListOfReturns) + tempListOfReturns = [] + for i in range(0, len(listOfStocks), 1): + tempListOfReturns.append(listOfStocks[i].sharpe) + listOfReturns.append(tempListOfReturns) + tempListOfReturns = [] + for i in range(0, len(listOfStocks), 1): + tempListOfReturns.append(listOfStocks[i].sortino) + listOfReturns.append(tempListOfReturns) + tempListOfReturns = [] + for i in range(0, len(listOfStocks), 1): + tempListOfReturns.append(listOfStocks[i].treynor) + listOfReturns.append(tempListOfReturns) + tempListOfReturns = [] + for i in range(0, len(listOfStocks), 1): + tempListOfReturns.append(listOfStocks[i].alpha) + listOfReturns.append(tempListOfReturns) + + # Create list of each indicator (e.g. expense ratio) + listOfIndicatorValues = [] + for i in range(0, len(listOfStocks), 1): + listOfIndicatorValues.append(listOfStocks[i].indicatorValue) + + Stock.indicatorCorrelation = calcIndicatorCorrelation( + listOfIndicatorValues, listOfReturns) + + listOfReturnStrings = ['Average Monthly Return', + 'Sharpe Ratio', 'Sortino Ratio', 'Treynor Ratio', 'Alpha'] + for i in range(0, len(Stock.indicatorCorrelation), 1): + print('Correlation for ' + Stock.indicator.lower() + ' and ' + + listOfReturnStrings[i].lower() + ': ' + str(Stock.indicatorCorrelation[i])) + + Stock.indicatorRegression = calcIndicatorRegression( + listOfIndicatorValues, listOfReturns) + print('\n', end='') + for i in range(0, len(Stock.indicatorCorrelation), 1): + formula = ''.join( + ('y = ', str(round(float(Stock.indicatorRegression[i][0]), 2)), 'x + ', str(round(float(Stock.indicatorRegression[i][1]), 2)))) + print('Linear regression equation for ' + Stock.indicator.lower() + ' and ' + + listOfReturnStrings[i].lower() + ': ' + formula) + + +def checkConfig(fileName): + if Functions.fileExists(fileName) == False: + return 'N/A' + file = open(fileName, 'r') + n = file.read() + file.close() + if Functions.validateJson(n) == False: + print('Config file is not valid') + return 'N/A' + t = json.loads(n) + r = t['Config'] + return r + + +def main(): + # Check config file for errors and if not, then use values + #! Only use this if you know it is exactly correct. I haven't spent much time debugging this + Stock.config = checkConfig('config.json') + + # Check that all required packages are installed + if Stock.config == 'N/A': + packagesInstalled = Functions.checkPackages( + ['numpy', 'requests', 'bs4', 'requests_cache', 'halo']) + if not packagesInstalled: + exit() + else: + print('All required packages are installed') + + # Check python version is above 3.3 + pythonVersionGood = Functions.checkPythonVersion() + if not pythonVersionGood: + exit() + + # Test internet connection + internetConnection = Functions.isConnected() + if not internetConnection: + exit() + else: + Functions.getJoke() + + # Choose benchmark and makes it class Stock + benchmark = benchmarkInit() + # Add it to a list to work with other functions + benchmarkAsList = [benchmark] + + # Asks for stock(s) ticker and makes them class Stock + listOfStocks = stocksInit() + + # Determine time frame (Years) + timeFrame = timeFrameInit() + Stock.timeFrame = timeFrame # Needs to be a global variable for all stocks + + # Choose indicator + Stock.indicator = indicatorInit() + # Choose time frame for initial persistence + if Stock.indicator == 'Persistence': + Stock.persTimeFrame = persistenceTimeFrame() + + # Choose whether to remove outliers or not + Stock.removeOutliers = outlierChoice() + else: + if Stock.config['Check Packages'] != False: + packagesInstalled = Functions.checkPackages( + ['numpy', 'requests', 'bs4', 'requests_cache', 'halo']) + if not packagesInstalled: + exit() + else: + print('All required packages are installed') + + if Stock.config['Check Python Version'] != False: + pythonVersionGood = Functions.checkPythonVersion() + if not pythonVersionGood: + exit() + + if Stock.config['Check Internet Connection'] != False: + internetConnection = Functions.isConnected() + if not internetConnection: + exit() + if Stock.config['Get Joke'] != False: + Functions.getJoke() + + benchmarksTicker = ['SPY', 'DJIA', 'VTHR', 'EFT'] + if Stock.config['Benchmark'] in benchmarksTicker: + benchmark = Stock() + benchmark.setName(str(Stock.config['Benchmark'])) + benchmarkAsList = [benchmark] + else: + benchmark = benchmarkInit() + benchmarkAsList = [benchmark] + + listOfStocks = stocksInit() + + if int(Stock.config['Time Frame']) >= 2: + timeFrame = int(Stock.config['Time Frame']) + else: + timeFrame = timeFrameInit() + Stock.timeFrame = timeFrame # Needs to be a global variable for all stocks + + indicators = ['Expense Ratio', + 'Market Capitalization', 'Turnover', 'Persistence'] + if Stock.config['Indicator'] in indicators: + Stock.indicator = Stock.config['Indicator'] + else: + Stock.indicator = indicatorInit() + + if Stock.indicator == 'Persistence': + Stock.persTimeFrame = persistenceTimeFrame() + + # Choose whether to remove outliers or not + if Stock.config['Remove Outliers'] != False: + Stock.removeOutliers = True + else: + Stock.removeOutliers = outlierChoice() + + # Send async request to AV for listOfStocks and benchmark + # asyncData(benchmark, listOfStocks) + + # Gather data for benchmark and stock(s) + cprint('\nGathering data', 'white', attrs=['underline']) + dataMain(benchmarkAsList) + dataMain(listOfStocks) + + # Calculate return for benchmark and stock(s) + returnMain(benchmark, listOfStocks) + + # Choose indicator and calculate correlation with indicator + indicatorMain(listOfStocks) + + print('') exit() -# Find return over time using either Jensen's Alpha, Sharpe Ratio, Sortino Ratio, or Treynor Ratio -for i in range(0, numberOfStocks, 1): - print('\n', listOfStocksData[i].name, sep='') - #StockReturn.main(listOfStocksReturn[i]) - -# Runs correlation or regression study -# print(listOfStocksData[0].name, listOfStocksData[0].absFirstLastDates, listOfStocksData[0].finalDatesAndClose) -indicatorFound = False -while indicatorFound == False: - print("1. Expense Ratio\n2. Asset Size\n3. Turnover\n4. Persistence\nWhich indicator would you like to look at? ", end='') - - #indicator = str(input()) # CHANGE BACK TO THIS LATER - indicator = 'Expense Ratio' - print(indicator, end='') - - indicatorFound = True - print('\n', end='') - - if indicator == 'Expense Ratio' or indicator == '1' or indicator == 'expense ratio': - #from ExpenseRatio import ExpenseRatio - print('\nExpense Ratio') - - elif indicator == 'Asset Size' or indicator == '2' or indicator == 'asset size': - print('\nAsset Size') - - elif indicator == 'Turnover' or indicator == '3' or indicator == 'turnover': - print('\nTurnover') - - elif indicator == 'Persistence' or indicator == '4' or indicator == 'persistence': - print('\nPersistence') - - else: - indicatorFound = False - print('Invalid input, please enter indicator again') - -''' -stockName = 'IWV' -stock1 = Stock(stockName) -print("Finding available dates and close values for", stock1.name) -StockData.main(stock1) -''' +if __name__ == "__main__": + main() diff --git a/modules/termcolor.py b/modules/termcolor.py new file mode 100644 index 0000000..f11b824 --- /dev/null +++ b/modules/termcolor.py @@ -0,0 +1,168 @@ +# coding: utf-8 +# Copyright (c) 2008-2011 Volvox Development Team +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# Author: Konstantin Lepa + +"""ANSII Color formatting for output in terminal.""" + +from __future__ import print_function +import os + + +__ALL__ = [ 'colored', 'cprint' ] + +VERSION = (1, 1, 0) + +ATTRIBUTES = dict( + list(zip([ + 'bold', + 'dark', + '', + 'underline', + 'blink', + '', + 'reverse', + 'concealed' + ], + list(range(1, 9)) + )) + ) +del ATTRIBUTES[''] + + +HIGHLIGHTS = dict( + list(zip([ + 'on_grey', + 'on_red', + 'on_green', + 'on_yellow', + 'on_blue', + 'on_magenta', + 'on_cyan', + 'on_white' + ], + list(range(40, 48)) + )) + ) + + +COLORS = dict( + list(zip([ + 'grey', + 'red', + 'green', + 'yellow', + 'blue', + 'magenta', + 'cyan', + 'white', + ], + list(range(30, 38)) + )) + ) + + +RESET = '\033[0m' + + +def colored(text, color=None, on_color=None, attrs=None): + """Colorize text. + + Available text colors: + red, green, yellow, blue, magenta, cyan, white. + + Available text highlights: + on_red, on_green, on_yellow, on_blue, on_magenta, on_cyan, on_white. + + Available attributes: + bold, dark, underline, blink, reverse, concealed. + + Example: + colored('Hello, World!', 'red', 'on_grey', ['blue', 'blink']) + colored('Hello, World!', 'green') + """ + if os.getenv('ANSI_COLORS_DISABLED') is None: + fmt_str = '\033[%dm%s' + if color is not None: + text = fmt_str % (COLORS[color], text) + + if on_color is not None: + text = fmt_str % (HIGHLIGHTS[on_color], text) + + if attrs is not None: + for attr in attrs: + text = fmt_str % (ATTRIBUTES[attr], text) + + text += RESET + return text + + +def cprint(text, color=None, on_color=None, attrs=None, **kwargs): + """Print colorize text. + + It accepts arguments of print function. + """ + + print((colored(text, color, on_color, attrs)), **kwargs) + + +if __name__ == '__main__': + print('Current terminal type: %s' % os.getenv('TERM')) + print('Test basic colors:') + cprint('Grey color', 'grey') + cprint('Red color', 'red') + cprint('Green color', 'green') + cprint('Yellow color', 'yellow') + cprint('Blue color', 'blue') + cprint('Magenta color', 'magenta') + cprint('Cyan color', 'cyan') + cprint('White color', 'white') + print(('-' * 78)) + + print('Test highlights:') + cprint('On grey color', on_color='on_grey') + cprint('On red color', on_color='on_red') + cprint('On green color', on_color='on_green') + cprint('On yellow color', on_color='on_yellow') + cprint('On blue color', on_color='on_blue') + cprint('On magenta color', on_color='on_magenta') + cprint('On cyan color', on_color='on_cyan') + cprint('On white color', color='grey', on_color='on_white') + print('-' * 78) + + print('Test attributes:') + cprint('Bold grey color', 'grey', attrs=['bold']) + cprint('Dark red color', 'red', attrs=['dark']) + cprint('Underline green color', 'green', attrs=['underline']) + cprint('Blink yellow color', 'yellow', attrs=['blink']) + cprint('Reversed blue color', 'blue', attrs=['reverse']) + cprint('Concealed Magenta color', 'magenta', attrs=['concealed']) + cprint('Bold underline reverse cyan color', 'cyan', + attrs=['bold', 'underline', 'reverse']) + cprint('Dark blink concealed white color', 'white', + attrs=['dark', 'blink', 'concealed']) + print(('-' * 78)) + + print('Test mixing:') + cprint('Underline red on grey color', 'red', 'on_grey', + ['underline']) + cprint('Reversed green on red color', 'green', 'on_red', ['reverse']) + diff --git a/modules/yahoofinancials.py b/modules/yahoofinancials.py new file mode 100644 index 0000000..9f477f1 --- /dev/null +++ b/modules/yahoofinancials.py @@ -0,0 +1,891 @@ +""" +============================== +The Yahoo Financials Module +Version: 1.5 +============================== + +Author: Connor Sanders +Email: sandersconnor1@gmail.com +Version Released: 01/27/2019 +Tested on Python 2.7, 3.3, 3.4, 3.5, 3.6, and 3.7 + +Copyright (c) 2019 Connor Sanders +MIT License + +List of Included Functions: + +1) get_financial_stmts(frequency, statement_type, reformat=True) + - frequency can be either 'annual' or 'quarterly'. + - statement_type can be 'income', 'balance', 'cash'. + - reformat optional value defaulted to true. Enter False for unprocessed raw data from Yahoo Finance. +2) get_stock_price_data(reformat=True) + - reformat optional value defaulted to true. Enter False for unprocessed raw data from Yahoo Finance. +3) get_stock_earnings_data(reformat=True) + - reformat optional value defaulted to true. Enter False for unprocessed raw data from Yahoo Finance. +4) get_summary_data(reformat=True) + - reformat optional value defaulted to true. Enter False for unprocessed raw data from Yahoo Finance. +5) get_stock_quote_type_data() +6) get_historical_price_data(start_date, end_date, time_interval) + - Gets historical price data for currencies, stocks, indexes, cryptocurrencies, and commodity futures. + - start_date should be entered in the 'YYYY-MM-DD' format. First day that financial data will be pulled. + - end_date should be entered in the 'YYYY-MM-DD' format. Last day that financial data will be pulled. + - time_interval can be either 'daily', 'weekly', or 'monthly'. Parameter determines the time period interval. + +Usage Examples: +from yahoofinancials import YahooFinancials +#tickers = 'AAPL' +#or +tickers = ['AAPL', 'WFC', 'F', 'JPY=X', 'XRP-USD', 'GC=F'] +yahoo_financials = YahooFinancials(tickers) +balance_sheet_data = yahoo_financials.get_financial_stmts('quarterly', 'balance') +earnings_data = yahoo_financials.get_stock_earnings_data() +historical_prices = yahoo_financials.get_historical_price_data('2015-01-15', '2017-10-15', 'weekly') +""" + +import sys +import calendar +import re +from json import loads +import time +from bs4 import BeautifulSoup +import datetime +import pytz +import random +try: + from urllib import FancyURLopener +except: + from urllib.request import FancyURLopener + + +# track the last get timestamp to add a minimum delay between gets - be nice! +_lastget = 0 + + +# Custom Exception class to handle custom error +class ManagedException(Exception): + pass + + +# Class used to open urls for financial data +class UrlOpener(FancyURLopener): + version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11' + + +# Class containing Yahoo Finance ETL Functionality +class YahooFinanceETL(object): + + def __init__(self, ticker): + self.ticker = ticker.upper() if isinstance(ticker, str) else [t.upper() for t in ticker] + self._cache = {} + + # Minimum interval between Yahoo Finance requests for this instance + _MIN_INTERVAL = 7 + + # Meta-data dictionaries for the classes to use + YAHOO_FINANCIAL_TYPES = { + 'income': ['financials', 'incomeStatementHistory', 'incomeStatementHistoryQuarterly'], + 'balance': ['balance-sheet', 'balanceSheetHistory', 'balanceSheetHistoryQuarterly', 'balanceSheetStatements'], + 'cash': ['cash-flow', 'cashflowStatementHistory', 'cashflowStatementHistoryQuarterly', 'cashflowStatements'], + 'keystats': ['key-statistics'], + 'history': ['history'] + } + + # Interval value translation dictionary + _INTERVAL_DICT = { + 'daily': '1d', + 'weekly': '1wk', + 'monthly': '1mo' + } + + # Base Yahoo Finance URL for the class to build on + _BASE_YAHOO_URL = 'https://finance.yahoo.com/quote/' + + # private static method to get the appropriate report type identifier + @staticmethod + def get_report_type(frequency): + if frequency == 'annual': + report_num = 1 + else: + report_num = 2 + return report_num + + # Public static method to format date serial string to readable format and vice versa + @staticmethod + def format_date(in_date): + if isinstance(in_date, str): + form_date = int(calendar.timegm(time.strptime(in_date, '%Y-%m-%d'))) + else: + form_date = str((datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=in_date)).date()) + return form_date + + # Private Static Method to Convert Eastern Time to UTC + @staticmethod + def _convert_to_utc(date, mask='%Y-%m-%d %H:%M:%S'): + utc = pytz.utc + eastern = pytz.timezone('US/Eastern') + date_ = datetime.datetime.strptime(date.replace(" 0:", " 12:"), mask) + date_eastern = eastern.localize(date_, is_dst=None) + date_utc = date_eastern.astimezone(utc) + return date_utc.strftime('%Y-%m-%d %H:%M:%S %Z%z') + + # Private method to scrape data from yahoo finance + def _scrape_data(self, url, tech_type, statement_type): + global _lastget + if not self._cache.get(url): + now = int(time.time()) + if _lastget and now - _lastget < self._MIN_INTERVAL: + time.sleep(self._MIN_INTERVAL - (now - _lastget) + 1) + now = int(time.time()) + _lastget = now + urlopener = UrlOpener() + # Try to open the URL up to 10 times sleeping random time if something goes wrong + max_retry = 10 + for i in range(0, max_retry): + response = urlopener.open(url) + if response.getcode() != 200: + time.sleep(random.randrange(10, 20)) + else: + response_content = response.read() + soup = BeautifulSoup(response_content, "html.parser") + re_script = soup.find("script", text=re.compile("root.App.main")) + if re_script is not None: + script = re_script.text + self._cache[url] = loads(re.search("root.App.main\s+=\s+(\{.*\})", script).group(1)) + response.close() + break + else: + time.sleep(random.randrange(10, 20)) + if i == max_retry - 1: + # Raise a custom exception if we can't get the web page within max_retry attempts + raise ManagedException("Server replied with HTTP " + str(response.getcode()) + + " code while opening the url: " + str(url)) + data = self._cache[url] + if tech_type == '' and statement_type != 'history': + stores = data["context"]["dispatcher"]["stores"]["QuoteSummaryStore"] + elif tech_type != '' and statement_type != 'history': + stores = data["context"]["dispatcher"]["stores"]["QuoteSummaryStore"][tech_type] + else: + stores = data["context"]["dispatcher"]["stores"]["HistoricalPriceStore"] + return stores + + # Private static method to determine if a numerical value is in the data object being cleaned + @staticmethod + def _determine_numeric_value(value_dict): + if 'raw' in value_dict.keys(): + numerical_val = value_dict['raw'] + else: + numerical_val = None + return numerical_val + + # Private method to format date serial string to readable format and vice versa + def _format_time(self, in_time): + form_date_time = datetime.datetime.fromtimestamp(int(in_time)).strftime('%Y-%m-%d %H:%M:%S') + utc_dt = self._convert_to_utc(form_date_time) + return utc_dt + + # Private method to return the a sub dictionary entry for the earning report cleaning + def _get_cleaned_sub_dict_ent(self, key, val_list): + sub_list = [] + for rec in val_list: + sub_sub_dict = {} + for k, v in rec.items(): + if k == 'date': + sub_sub_dict_ent = {k: v} + else: + numerical_val = self._determine_numeric_value(v) + sub_sub_dict_ent = {k: numerical_val} + sub_sub_dict.update(sub_sub_dict_ent) + sub_list.append(sub_sub_dict) + sub_ent = {key: sub_list} + return sub_ent + + # Private method to process raw earnings data and clean + def _clean_earnings_data(self, raw_data): + cleaned_data = {} + earnings_key = 'earningsData' + financials_key = 'financialsData' + for k, v in raw_data.items(): + if k == 'earningsChart': + sub_dict = {} + for k2, v2 in v.items(): + if k2 == 'quarterly': + sub_ent = self._get_cleaned_sub_dict_ent(k2, v2) + elif k2 == 'currentQuarterEstimate': + numerical_val = self._determine_numeric_value(v2) + sub_ent = {k2: numerical_val} + else: + sub_ent = {k2: v2} + sub_dict.update(sub_ent) + dict_ent = {earnings_key: sub_dict} + cleaned_data.update(dict_ent) + elif k == 'financialsChart': + sub_dict = {} + for k2, v2, in v.items(): + sub_ent = self._get_cleaned_sub_dict_ent(k2, v2) + sub_dict.update(sub_ent) + dict_ent = {financials_key: sub_dict} + cleaned_data.update(dict_ent) + else: + if k != 'maxAge': + dict_ent = {k: v} + cleaned_data.update(dict_ent) + return cleaned_data + + # Private method to clean summary and price reports + def _clean_reports(self, raw_data): + cleaned_dict = {} + if raw_data is None: + return None + for k, v in raw_data.items(): + if 'Time' in k: + formatted_utc_time = self._format_time(v) + dict_ent = {k: formatted_utc_time} + elif 'Date' in k: + try: + formatted_date = v['fmt'] + except (KeyError, TypeError): + formatted_date = '-' + dict_ent = {k: formatted_date} + elif v is None or isinstance(v, str) or isinstance(v, int) or isinstance(v, float): + dict_ent = {k: v} + # Python 2 and Unicode + elif sys.version_info < (3, 0) and isinstance(v, unicode): + dict_ent = {k: v} + else: + numerical_val = self._determine_numeric_value(v) + dict_ent = {k: numerical_val} + cleaned_dict.update(dict_ent) + return cleaned_dict + + # Private Static Method to ensure ticker is URL encoded + @staticmethod + def _encode_ticker(ticker_str): + encoded_ticker = ticker_str.replace('=', '%3D') + return encoded_ticker + + # Private method to get time interval code + def _build_historical_url(self, ticker, hist_oj): + url = self._BASE_YAHOO_URL + self._encode_ticker(ticker) + '/history?period1=' + str(hist_oj['start']) + \ + '&period2=' + str(hist_oj['end']) + '&interval=' + hist_oj['interval'] + '&filter=history&frequency=' + \ + hist_oj['interval'] + return url + + # Private Method to clean the dates of the newly returns historical stock data into readable format + def _clean_historical_data(self, hist_data, last_attempt=False): + data = {} + for k, v in hist_data.items(): + if k == 'eventsData': + event_obj = {} + if isinstance(v, list): + dict_ent = {k: event_obj} + else: + for type_key, type_obj in v.items(): + formatted_type_obj = {} + for date_key, date_obj in type_obj.items(): + formatted_date_key = self.format_date(int(date_key)) + cleaned_date = self.format_date(int(date_obj['date'])) + date_obj.update({'formatted_date': cleaned_date}) + formatted_type_obj.update({formatted_date_key: date_obj}) + event_obj.update({type_key: formatted_type_obj}) + dict_ent = {k: event_obj} + elif 'date' in k.lower(): + if v is not None: + cleaned_date = self.format_date(v) + dict_ent = {k: {'formatted_date': cleaned_date, 'date': v}} + else: + if last_attempt is False: + return None + else: + dict_ent = {k: {'formatted_date': None, 'date': v}} + elif isinstance(v, list): + sub_dict_list = [] + for sub_dict in v: + sub_dict['formatted_date'] = self.format_date(sub_dict['date']) + sub_dict_list.append(sub_dict) + dict_ent = {k: sub_dict_list} + else: + dict_ent = {k: v} + data.update(dict_ent) + return data + + # Private Static Method to build API url for GET Request + @staticmethod + def _build_api_url(hist_obj, up_ticker): + base_url = "https://query1.finance.yahoo.com/v8/finance/chart/" + api_url = base_url + up_ticker + '?symbol=' + up_ticker + '&period1=' + str(hist_obj['start']) + '&period2=' + \ + str(hist_obj['end']) + '&interval=' + hist_obj['interval'] + api_url += '&events=div|split|earn&lang=en-US®ion=US' + return api_url + + # Private Method to get financial data via API Call + def _get_api_data(self, api_url, tries=0): + urlopener = UrlOpener() + response = urlopener.open(api_url) + if response.getcode() == 200: + res_content = response.read() + response.close() + if sys.version_info < (3, 0): + return loads(res_content) + return loads(res_content.decode('utf-8')) + else: + if tries < 5: + time.sleep(random.randrange(10, 20)) + tries += 1 + return self._get_api_data(api_url, tries) + else: + return None + + # Private Method to clean API data + def _clean_api_data(self, api_url): + raw_data = self._get_api_data(api_url) + ret_obj = {} + ret_obj.update({'eventsData': []}) + if raw_data is None: + return ret_obj + results = raw_data['chart']['result'] + if results is None: + return ret_obj + for result in results: + tz_sub_dict = {} + ret_obj.update({'eventsData': result.get('events', {})}) + ret_obj.update({'firstTradeDate': result['meta'].get('firstTradeDate', 'NA')}) + ret_obj.update({'currency': result['meta'].get('currency', 'NA')}) + ret_obj.update({'instrumentType': result['meta'].get('instrumentType', 'NA')}) + tz_sub_dict.update({'gmtOffset': result['meta']['gmtoffset']}) + ret_obj.update({'timeZone': tz_sub_dict}) + timestamp_list = result['timestamp'] + high_price_list = result['indicators']['quote'][0]['high'] + low_price_list = result['indicators']['quote'][0]['low'] + open_price_list = result['indicators']['quote'][0]['open'] + close_price_list = result['indicators']['quote'][0]['close'] + volume_list = result['indicators']['quote'][0]['volume'] + adj_close_list = result['indicators']['adjclose'][0]['adjclose'] + i = 0 + prices_list = [] + for timestamp in timestamp_list: + price_dict = {} + price_dict.update({'date': timestamp}) + price_dict.update({'high': high_price_list[i]}) + price_dict.update({'low': low_price_list[i]}) + price_dict.update({'open': open_price_list[i]}) + price_dict.update({'close': close_price_list[i]}) + price_dict.update({'volume': volume_list[i]}) + price_dict.update({'adjclose': adj_close_list[i]}) + prices_list.append(price_dict) + i += 1 + ret_obj.update({'prices': prices_list}) + return ret_obj + + # Private Method to Handle Recursive API Request + def _recursive_api_request(self, hist_obj, up_ticker, i=0): + api_url = self._build_api_url(hist_obj, up_ticker) + re_data = self._clean_api_data(api_url) + cleaned_re_data = self._clean_historical_data(re_data) + if cleaned_re_data is not None: + return cleaned_re_data + else: + if i < 3: + i += 1 + return self._recursive_api_request(hist_obj, up_ticker, i) + else: + return self._clean_historical_data(re_data, True) + + # Private Method to take scrapped data and build a data dictionary with + def _create_dict_ent(self, up_ticker, statement_type, tech_type, report_name, hist_obj): + YAHOO_URL = self._BASE_YAHOO_URL + up_ticker + '/' + self.YAHOO_FINANCIAL_TYPES[statement_type][0] + '?p=' +\ + up_ticker + if tech_type == '' and statement_type != 'history': + try: + re_data = self._scrape_data(YAHOO_URL, tech_type, statement_type) + dict_ent = {up_ticker: re_data[u'' + report_name], 'dataType': report_name} + except KeyError: + re_data = None + dict_ent = {up_ticker: re_data, 'dataType': report_name} + elif tech_type != '' and statement_type != 'history': + try: + re_data = self._scrape_data(YAHOO_URL, tech_type, statement_type) + except KeyError: + re_data = None + dict_ent = {up_ticker: re_data} + else: + YAHOO_URL = self._build_historical_url(up_ticker, hist_obj) + try: + cleaned_re_data = self._recursive_api_request(hist_obj, up_ticker) + except KeyError: + try: + re_data = self._scrape_data(YAHOO_URL, tech_type, statement_type) + cleaned_re_data = self._clean_historical_data(re_data) + except KeyError: + cleaned_re_data = None + dict_ent = {up_ticker: cleaned_re_data} + return dict_ent + + # Private method to return the stmt_id for the reformat_process + def _get_stmt_id(self, statement_type, raw_data): + stmt_id = '' + i = 0 + for key in raw_data.keys(): + if key in self.YAHOO_FINANCIAL_TYPES[statement_type.lower()]: + stmt_id = key + i += 1 + if i != 1: + return None + return stmt_id + + # Private Method for the Reformat Process + def _reformat_stmt_data_process(self, raw_data, statement_type): + final_data_list = [] + if raw_data is not None: + stmt_id = self._get_stmt_id(statement_type, raw_data) + if stmt_id is None: + return final_data_list + hashed_data_list = raw_data[stmt_id] + for data_item in hashed_data_list: + data_date = '' + sub_data_dict = {} + for k, v in data_item.items(): + if k == 'endDate': + data_date = v['fmt'] + elif k != 'maxAge': + numerical_val = self._determine_numeric_value(v) + sub_dict_item = {k: numerical_val} + sub_data_dict.update(sub_dict_item) + dict_item = {data_date: sub_data_dict} + final_data_list.append(dict_item) + return final_data_list + else: + return raw_data + + # Private Method to return subdict entry for the statement reformat process + def _get_sub_dict_ent(self, ticker, raw_data, statement_type): + form_data_list = self._reformat_stmt_data_process(raw_data[ticker], statement_type) + return {ticker: form_data_list} + + # Public method to get time interval code + def get_time_code(self, time_interval): + interval_code = self._INTERVAL_DICT[time_interval.lower()] + return interval_code + + # Public Method to get stock data + def get_stock_data(self, statement_type='income', tech_type='', report_name='', hist_obj={}): + data = {} + if isinstance(self.ticker, str): + dict_ent = self._create_dict_ent(self.ticker, statement_type, tech_type, report_name, hist_obj) + data.update(dict_ent) + else: + for tick in self.ticker: + try: + dict_ent = self._create_dict_ent(tick, statement_type, tech_type, report_name, hist_obj) + data.update(dict_ent) + except ManagedException: + print("Warning! Ticker: " + str(tick) + " error - " + str(ManagedException)) + print("The process is still running...") + continue + return data + + # Public Method to get technical stock datafrom yahoofinancials import YahooFinancials + + def get_stock_tech_data(self, tech_type): + if tech_type == 'defaultKeyStatistics': + return self.get_stock_data(statement_type='keystats', tech_type=tech_type) + else: + return self.get_stock_data(tech_type=tech_type) + + # Public Method to get reformatted statement data + def get_reformatted_stmt_data(self, raw_data, statement_type): + data_dict = {} + sub_dict = {} + data_type = raw_data['dataType'] + if isinstance(self.ticker, str): + sub_dict_ent = self._get_sub_dict_ent(self.ticker, raw_data, statement_type) + sub_dict.update(sub_dict_ent) + dict_ent = {data_type: sub_dict} + data_dict.update(dict_ent) + else: + for tick in self.ticker: + sub_dict_ent = self._get_sub_dict_ent(tick, raw_data, statement_type) + sub_dict.update(sub_dict_ent) + dict_ent = {data_type: sub_dict} + data_dict.update(dict_ent) + return data_dict + + # Public method to get cleaned summary and price report data + def get_clean_data(self, raw_report_data, report_type): + cleaned_data_dict = {} + if isinstance(self.ticker, str): + if report_type == 'earnings': + try: + cleaned_data = self._clean_earnings_data(raw_report_data[self.ticker]) + except: + cleaned_data = None + else: + try: + cleaned_data = self._clean_reports(raw_report_data[self.ticker]) + except: + cleaned_data = None + cleaned_data_dict.update({self.ticker: cleaned_data}) + else: + for tick in self.ticker: + if report_type == 'earnings': + try: + cleaned_data = self._clean_earnings_data(raw_report_data[tick]) + except: + cleaned_data = None + else: + try: + cleaned_data = self._clean_reports(raw_report_data[tick]) + except: + cleaned_data = None + cleaned_data_dict.update({tick: cleaned_data}) + return cleaned_data_dict + + # Private method to handle dividend data requestsfrom yahoofinancials import YahooFinancials + + def _handle_api_dividend_request(self, cur_ticker, start, end, interval): + re_dividends = [] + test_url = 'https://query1.finance.yahoo.com/v8/finance/chart/' + cur_ticker + \ + '?period1=' + str(start) + '&period2=' + str(end) + '&interval=' + interval + '&events=div' + div_dict = self._get_api_data(test_url)['chart']['result'][0]['events']['dividends'] + for div_time_key, div_obj in div_dict.items(): + dividend_obj = { + 'date': div_obj['date'], + 'formatted_date': self.format_date(int(div_obj['date'])), + 'amount': div_obj.get('amount', None) + } + re_dividends.append(dividend_obj) + return sorted(re_dividends, key=lambda div: div['date']) + + # Public method to get daily dividend data + def get_stock_dividend_data(self, start, end, interval): + interval_code = self.get_time_code(interval) + if isinstance(self.ticker, str): + try: + return {self.ticker: self._handle_api_dividend_request(self.ticker, start, end, interval_code)} + except: + return {self.ticker: None} + else: + re_data = {} + for tick in self.ticker: + try: + div_data = self._handle_api_dividend_request(tick, start, end, interval_code) + re_data.update({tick: div_data}) + except: + re_data.update({tick: None}) + return re_data + + +# Class containing methods to create stock data extracts +class YahooFinancials(YahooFinanceETL): + + # Private method that handles financial statement extraction + def _run_financial_stmt(self, statement_type, report_num, reformat): + report_name = self.YAHOO_FINANCIAL_TYPES[statement_type][report_num] + if reformat: + raw_data = self.get_stock_data(statement_type, report_name=report_name) + data = self.get_reformatted_stmt_data(raw_data, statement_type) + else: + data = self.get_stock_data(statement_type, report_name=report_name) + return data + + # Public Method for the user to get financial statement data + def get_financial_stmts(self, frequency, statement_type, reformat=True): + report_num = self.get_report_type(frequency) + if isinstance(statement_type, str): + data = self._run_financial_stmt(statement_type, report_num, reformat) + else: + data = {} + for stmt_type in statement_type: + re_data = self._run_financial_stmt(stmt_type, report_num, reformat) + data.update(re_data) + return data + + # Public Method for the user to get stock price data + def get_stock_price_data(self, reformat=True): + if reformat: + return self.get_clean_data(self.get_stock_tech_data('price'), 'price') + else: + return self.get_stock_tech_data('price') + + # Public Method for the user to return key-statistics data + def get_key_statistics_data(self, reformat=True): + if reformat: + return self.get_clean_data(self.get_stock_tech_data('defaultKeyStatistics'), 'defaultKeyStatistics') + else: + return self.get_stock_tech_data('defaultKeyStatistics') + + # Public Method for the user to get stock earnings data + def get_stock_earnings_data(self, reformat=True): + if reformat: + return self.get_clean_data(self.get_stock_tech_data('earnings'), 'earnings') + else: + return self.get_stock_tech_data('earnings') + + # Public Method for the user to get stock summary data + def get_summary_data(self, reformat=True): + if reformat: + return self.get_clean_data(self.get_stock_tech_data('summaryDetail'), 'summaryDetail') + else: + return self.get_stock_tech_data('summaryDetail') + + # Public Method for the user to get the yahoo summary url + def get_stock_summary_url(self): + if isinstance(self.ticker, str): + return self._BASE_YAHOO_URL + self.ticker + return {t: self._BASE_YAHOO_URL + t for t in self.ticker} + + # Public Method for the user to get stock quote data + def get_stock_quote_type_data(self): + return self.get_stock_tech_data('quoteType') + + # Public Method for user to get historical price data with + def get_historical_price_data(self, start_date, end_date, time_interval): + interval_code = self.get_time_code(time_interval) + start = self.format_date(start_date) + end = self.format_date(end_date) + hist_obj = {'start': start, 'end': end, 'interval': interval_code} + return self.get_stock_data('history', hist_obj=hist_obj) + + # Private Method for Functions needing stock_price_data + def _stock_price_data(self, data_field): + if isinstance(self.ticker, str): + if self.get_stock_price_data()[self.ticker] is None: + return None + return self.get_stock_price_data()[self.ticker].get(data_field, None) + else: + ret_obj = {} + for tick in self.ticker: + if self.get_stock_price_data()[tick] is None: + ret_obj.update({tick: None}) + else: + ret_obj.update({tick: self.get_stock_price_data()[tick].get(data_field, None)}) + return ret_obj + + # Private Method for Functions needing stock_price_data + def _stock_summary_data(self, data_field): + if isinstance(self.ticker, str): + if self.get_summary_data()[self.ticker] is None: + return None + return self.get_summary_data()[self.ticker].get(data_field, None) + else: + ret_obj = {} + for tick in self.ticker: + if self.get_summary_data()[tick] is None: + ret_obj.update({tick: None}) + else: + ret_obj.update({tick: self.get_summary_data()[tick].get(data_field, None)}) + return ret_obj + + # Private Method for Functions needing financial statement data + def _financial_statement_data(self, stmt_type, stmt_code, field_name, freq): + re_data = self.get_financial_stmts(freq, stmt_type)[stmt_code] + if isinstance(self.ticker, str): + try: + date_key = re_data[self.ticker][0].keys()[0] + except (IndexError, AttributeError, TypeError): + date_key = list(re_data[self.ticker][0])[0] + data = re_data[self.ticker][0][date_key][field_name] + else: + data = {} + for tick in self.ticker: + try: + date_key = re_data[tick][0].keys()[0] + except: + try: + date_key = list(re_data[tick][0].keys())[0] + except: + date_key = None + if date_key is not None: + sub_data = re_data[tick][0][date_key][field_name] + data.update({tick: sub_data}) + else: + data.update({tick: None}) + return data + + # Public method to get daily dividend data + def get_daily_dividend_data(self, start_date, end_date): + start = self.format_date(start_date) + end = self.format_date(end_date) + return self.get_stock_dividend_data(start, end, 'daily') + + # Public Price Data Methods + def get_current_price(self): + return self._stock_price_data('regularMarketPrice') + + def get_current_change(self): + return self._stock_price_data('regularMarketChange') + + def get_current_percent_change(self): + return self._stock_price_data('regularMarketChangePercent') + + def get_current_volume(self): + return self._stock_price_data('regularMarketVolume') + + def get_prev_close_price(self): + return self._stock_price_data('regularMarketPreviousClose') + + def get_open_price(self): + return self._stock_price_data('regularMarketOpen') + + def get_ten_day_avg_daily_volume(self): + return self._stock_price_data('averageDailyVolume10Day') + + def get_three_month_avg_daily_volume(self): + return self._stock_price_data('averageDailyVolume3Month') + + def get_stock_exchange(self): + return self._stock_price_data('exchangeName') + + def get_market_cap(self): + return self._stock_price_data('marketCap') + + def get_daily_low(self): + return self._stock_price_data('regularMarketDayLow') + + def get_daily_high(self): + return self._stock_price_data('regularMarketDayHigh') + + def get_currency(self): + return self._stock_price_data('currency') + + # Public Summary Data Methods + def get_yearly_high(self): + return self._stock_summary_data('fiftyTwoWeekHigh') + + def get_yearly_low(self): + return self._stock_summary_data('fiftyTwoWeekLow') + + def get_dividend_yield(self): + return self._stock_summary_data('dividendYield') + + def get_annual_avg_div_yield(self): + return self._stock_summary_data('trailingAnnualDividendYield') + + def get_five_yr_avg_div_yield(self): + return self._stock_summary_data('fiveYearAvgDividendYield') + + def get_dividend_rate(self): + return self._stock_summary_data('dividendRate') + + def get_annual_avg_div_rate(self): + return self._stock_summary_data('trailingAnnualDividendRate') + + def get_50day_moving_avg(self): + return self._stock_summary_data('fiftyDayAverage') + + def get_200day_moving_avg(self): + return self._stock_summary_data('twoHundredDayAverage') + + def get_beta(self): + return self._stock_summary_data('beta') + + def get_payout_ratio(self): + return self._stock_summary_data('payoutRatio') + + def get_pe_ratio(self): + return self._stock_summary_data('trailingPE') + + def get_price_to_sales(self): + return self._stock_summary_data('priceToSalesTrailing12Months') + + def get_exdividend_date(self): + return self._stock_summary_data('exDividendDate') + + # Financial Statement Data Methods + def get_book_value(self): + return self._financial_statement_data('balance', 'balanceSheetHistoryQuarterly', + 'totalStockholderEquity', 'quarterly') + + def get_ebit(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'ebit', 'annual') + + def get_net_income(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'netIncome', 'annual') + + def get_interest_expense(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'interestExpense', 'annual') + + def get_operating_income(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'operatingIncome', 'annual') + + def get_total_operating_expense(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'totalOperatingExpenses', 'annual') + + def get_total_revenue(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'totalRevenue', 'annual') + + def get_cost_of_revenue(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'costOfRevenue', 'annual') + + def get_income_before_tax(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'incomeBeforeTax', 'annual') + + def get_income_tax_expense(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'incomeTaxExpense', 'annual') + + def get_gross_profit(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'grossProfit', 'annual') + + def get_net_income_from_continuing_ops(self): + return self._financial_statement_data('income', 'incomeStatementHistory', + 'netIncomeFromContinuingOps', 'annual') + + def get_research_and_development(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'researchDevelopment', 'annual') + + # Calculated Financial Methods + def get_earnings_per_share(self): + price_data = self.get_current_price() + pe_ratio = self.get_pe_ratio() + if isinstance(self.ticker, str): + if price_data is not None and pe_ratio is not None: + return price_data / pe_ratio + else: + return None + else: + ret_obj = {} + for tick in self.ticker: + if price_data[tick] is not None and pe_ratio[tick] is not None: + ret_obj.update({tick: price_data[tick] / pe_ratio[tick]}) + else: + ret_obj.update({tick: None}) + return ret_obj + + def get_num_shares_outstanding(self, price_type='current'): + today_low = self._stock_summary_data('dayHigh') + today_high = self._stock_summary_data('dayLow') + cur_market_cap = self._stock_summary_data('marketCap') + if isinstance(self.ticker, str): + if cur_market_cap is not None: + if price_type == 'current': + current = self.get_current_price() + if current is not None: + today_average = current + else: + return None + else: + if today_high is not None and today_low is not None: + today_average = (today_high + today_low) / 2 + else: + return None + return cur_market_cap / today_average + else: + return None + else: + ret_obj = {} + for tick in self.ticker: + if cur_market_cap[tick] is not None: + if price_type == 'current': + current = self.get_current_price() + if current[tick] is not None: + ret_obj.update({tick: cur_market_cap[tick] / current[tick]}) + else: + ret_obj.update({tick: None}) + else: + if today_low[tick] is not None and today_high[tick] is not None: + today_average = (today_high[tick] + today_low[tick]) / 2 + ret_obj.update({tick: cur_market_cap[tick] / today_average}) + else: + ret_obj.update({tick: None}) + else: + ret_obj.update({tick: None}) + return ret_obj \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 2bd7352..d201613 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,6 @@ -requests==2.21.0 -numpy==1.15.4 \ No newline at end of file +requests~=2.21.0 +numpy~=1.15.4 +beautifulsoup4~=4.7.1 +halo~=0.0.23 +requests-cache~=0.4.13 # NOT REQUIRED +yahoofinancials~=1.5 # NOT REQUIRED \ No newline at end of file diff --git a/stocks.txt b/stocks.txt new file mode 100644 index 0000000..c8bb22d --- /dev/null +++ b/stocks.txt @@ -0,0 +1,10 @@ +VFINX +SMARX +BRASX +USIBX +DSIAX +TIHYX +SGYAX +TPLGX +PREFX +FBGRX