From 0dcdd1049d2a32671e01adc6abb22df7229239b5 Mon Sep 17 00:00:00 2001 From: Andrew Dinh Date: Thu, 31 Jan 2019 13:22:02 -0800 Subject: [PATCH 1/6] Finished overhaul of version-1 --- .gitignore | 5 +- ExpenseRatio.py | 14 +- Functions.py | 57 ++- README.md | 2 - StockData.py | 915 ++++++++++++++++++++++++----------------------- StockReturn.py | 45 ++- listGoogle.py | 54 --- main.py | 569 ++++++++++++++++++++++++++--- requirements.txt | 4 +- 9 files changed, 1059 insertions(+), 606 deletions(-) delete mode 100644 listGoogle.py diff --git a/.gitignore b/.gitignore index 293b5a3..1acf435 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,5 @@ __pycache__/StockData.cpython-37.pyc __pycache__/ *.pyc -quickstart.py -creds.json test/ -.vscode/ -listGoogle.py \ No newline at end of file +.vscode/ \ No newline at end of file diff --git a/ExpenseRatio.py b/ExpenseRatio.py index f6d6df0..a64312e 100644 --- a/ExpenseRatio.py +++ b/ExpenseRatio.py @@ -12,17 +12,19 @@ import numpy from urllib.request import urlopen import re + class ExpenseRatio: def __init__(self): -def main(): # For testing purposes +def main(): # For testing purposes ''' - a = [1,2,3] - b = [2,4,6] - c = numpy.corrcoef(a, b)[0, 1] - print(c) + a = [1,2,3] + b = [2,4,6] + c = numpy.corrcoef(a, b)[0, 1] + print(c) ''' + if __name__ == "__main__": - main() + main() diff --git a/Functions.py b/Functions.py index 1ec3db4..ae24926 100644 --- a/Functions.py +++ b/Functions.py @@ -1,24 +1,47 @@ # Python file for general functions -class Functions: - def getNearest(items, pivot): - return min(items, key=lambda x: abs(x - pivot)) - def stringToDate(date): - from datetime import datetime +def getNearest(items, pivot): + return min(items, key=lambda x: abs(x - pivot)) + +def stringToDate(date): + from datetime import datetime + + #datetime_object = datetime.strptime('Jun 1 2005 1:33PM', '%b %d %Y %I:%M%p') + datetime_object = datetime.strptime(date, '%Y-%m-%d').date() + return(datetime_object) + ''' + dateSplit = date.split('-') + year = int(dateSplit[0]) + month = int(dateSplit[1]) + day = int(dateSplit[2]) + datetime_object = datetime.date(year, month, day) + ''' + return datetime_object + +def removeExtraDatesAndCloseValues(list1, list2): + # Returns the two lists but with the extra dates and corresponding close values removed + # list = [[dates], [close values]] + + newList1 = [[], []] + newList2 = [[], []] + + for i in range(0, len(list1[0]), 1): + for j in range(0, len(list2[0]), 1): + if list1[0][i] == list2[0][j]: + newList1[0].append(list1[0][i]) + newList2[0].append(list1[0][i]) + newList1[1].append(list1[1][i]) + newList2[1].append(list2[1][j]) + break + + returnList = [] + returnList.append(newList1) + returnList.append(newList2) + return returnList - #datetime_object = datetime.strptime('Jun 1 2005 1:33PM', '%b %d %Y %I:%M%p') - datetime_object = datetime.strptime(date, '%Y-%m-%d').date() - return(datetime_object) - ''' - dateSplit = date.split('-') - year = int(dateSplit[0]) - month = int(dateSplit[1]) - day = int(dateSplit[2]) - datetime_object = datetime.date(year, month, day) - ''' - return datetime_object def main(): exit() + if __name__ == "__main__": - main() + main() diff --git a/README.md b/README.md index d842a24..ebb6232 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,4 @@ To begin, run Some ticker values to try: SPY, VFINX, AAPL, GOOGL -`$ pip install numpy` - Created by Andrew Dinh from Dr. TJ Owens Gilroy Early College Academy diff --git a/StockData.py b/StockData.py index d515d3a..41f81f5 100644 --- a/StockData.py +++ b/StockData.py @@ -3,6 +3,9 @@ # Python 3.6.1 # Description: Returns all available dates and prices for each stock requested. +import json +import requests +from datetime import datetime ''' Available API's: Can it do mutual funds? IEX: No @@ -18,7 +21,7 @@ Barchart: No # If you're going to take these API keys and abuse it, you should really reconsider your life priorities apiAV = 'O42ICUV58EIZZQMU' -#apiBarchart = 'a17fab99a1c21cd6f847e2f82b592838' # 150 getHistory queries per day +# apiBarchart = 'a17fab99a1c21cd6f847e2f82b592838' # 150 getHistory queries per day apiBarchart = 'f40b136c6dc4451f9136bb53b9e70ffa' apiTiingo = '2e72b53f2ab4f5f4724c5c1e4d5d4ac0af3f7ca8' apiTradier = 'n26IFFpkOFRVsB5SNTVNXicE5MPD' @@ -29,18 +32,19 @@ Daily Requests = 20,000 Symbol Requests = 500 ''' -import requests, json -from datetime import datetime class StockData: - def __init__(self, newName = '', newAbsFirstLastDates = [], newFinalDatesAndClose = [], newFinalDatesAndClose2 = [],newAllLists = []): - self.name = newName # Name of stock - self.absFirstLastDates = newAbsFirstLastDates # Absolute first and last dates from all sources - self.finalDatesAndClose = newFinalDatesAndClose # All available dates with corresponding close values - self.finalDatesAndClose2 = newFinalDatesAndClose2 # After some consideration, I decided to keep what I had already done here and make a new list that's the same except dates are in datetime format - self.allLists = newAllLists - ''' + def __init__(self, newName='', newAbsFirstLastDates=[], newFinalDatesAndClose=[], newFinalDatesAndClose2=[], newAllLists=[]): + self.name = newName # Name of stock + # Absolute first and last dates from all sources + self.absFirstLastDates = newAbsFirstLastDates + # All available dates with corresponding close values + self.finalDatesAndClose = newFinalDatesAndClose + # After some consideration, I decided to keep what I had already done here and make a new list that's the same except dates are in datetime format + self.finalDatesAndClose2 = newFinalDatesAndClose2 + self.allLists = newAllLists + ''' Format: # List from each source containing: [firstDate, lastDate, allDates, values, timeFrame] # firstDate & lastDate = '2018-12-18' (year-month-date) @@ -49,197 +53,208 @@ class StockData: timeFrame = [days, weeks, years] ''' - def set(self, newName, newFirstLastDates, newAbsFirstLastDates, newFinalDatesAndClose, newAllLists): - self.name = newName # Name of stock - self.firstLastDates = newFirstLastDates # Dates that at least 2 sources have (or should it be all?) - Maybe let user decide - self.absFirstLastDates = newAbsFirstLastDates # Absolute first and last dates from all sources - self.finalDatesAndClose = newFinalDatesAndClose - self.allLists = newAllLists + def set(self, newName, newFirstLastDates, newAbsFirstLastDates, newFinalDatesAndClose, newAllLists): + self.name = newName # Name of stock + # Dates that at least 2 sources have (or should it be all?) - Maybe let user decide + self.firstLastDates = newFirstLastDates + # Absolute first and last dates from all sources + self.absFirstLastDates = newAbsFirstLastDates + self.finalDatesAndClose = newFinalDatesAndClose + self.allLists = newAllLists - def setName(self, newName): - self.name = newName - def returnName(self): - return self.name - def returnAllLists(self): - return self.allLists - def returnAbsFirstLastDates(self): - return self.absFirstLastDates - def returnAllLists(self): - return self.allLists - def returnFinalDatesAndClose(self): - return self.finalDatesAndClose - def returnFinalDatesAndClose2(self): - return self.finalDatesAndClose2 + def setName(self, newName): + self.name = newName - def getIEX(self): - url = ''.join(('https://api.iextrading.com/1.0/stock/', self.name, '/chart/5y')) - #link = "https://api.iextrading.com/1.0/stock/spy/chart/5y" - print("\nSending request to:", url) - f = requests.get(url) - json_data = f.text - #print(json_data) - if (json_data == 'Unknown symbol'): - print("IEX not available") - return 'Not available' - loaded_json = json.loads(json_data) - listIEX = [] + def returnName(self): + return self.name - print("\nFinding first and last date") - # Adding (firstDate, lastDate) to listIEX - # Find firstDate (comes first) - firstLine = loaded_json[0] - #print("firstLine:", firstLine) - firstDate = firstLine['date'] - #print("firstDate:",firstDate) - # Find lastDate (comes last) - lastLine = loaded_json[-1] # Returns last value of the list (Equivalent to len(loaded_json)-1) - #print("lastLine:", lastLine) - lastDate = lastLine['date'] - #print("last date:", lastDate) - listIEX.append(firstDate) - listIEX.append(lastDate) - print(listIEX[0], ',', listIEX[1]) + def returnAllLists(self): + return self.allLists - print("\nFinding all dates given") - allDates = [] + def returnAbsFirstLastDates(self): + return self.absFirstLastDates + + def returnAllLists(self): + return self.allLists + + def returnFinalDatesAndClose(self): + return self.finalDatesAndClose + + def returnFinalDatesAndClose2(self): + return self.finalDatesAndClose2 + + def getIEX(self): + url = ''.join( + ('https://api.iextrading.com/1.0/stock/', self.name, '/chart/5y')) + #link = "https://api.iextrading.com/1.0/stock/spy/chart/5y" + print("\nSending request to:", url) + f = requests.get(url) + json_data = f.text + # print(json_data) + if (json_data == 'Unknown symbol'): + print("IEX not available") + return 'Not available' + loaded_json = json.loads(json_data) + listIEX = [] + + print("\nFinding first and last date") + # Adding (firstDate, lastDate) to listIEX + # Find firstDate (comes first) + firstLine = loaded_json[0] + #print("firstLine:", firstLine) + firstDate = firstLine['date'] + # print("firstDate:",firstDate) + # Find lastDate (comes last) + # Returns last value of the list (Equivalent to len(loaded_json)-1) + lastLine = loaded_json[-1] + #print("lastLine:", lastLine) + lastDate = lastLine['date'] + #print("last date:", lastDate) + listIEX.append(firstDate) + listIEX.append(lastDate) + print(listIEX[0], ',', listIEX[1]) + + print("\nFinding all dates given") + allDates = [] # for i in range(0, len(loaded_json), 1): # If you want to do oldest first - for i in range(len(loaded_json)-1, -1, -1): - line = loaded_json[i] - date = line['date'] - allDates.append(date) - listIEX.append(allDates) + for i in range(len(loaded_json)-1, -1, -1): + line = loaded_json[i] + date = line['date'] + allDates.append(date) + listIEX.append(allDates) - #print(listIEX[2]) - print(len(listIEX[2]), "dates") + # print(listIEX[2]) + print(len(listIEX[2]), "dates") - print("\nFinding close values for each date") - values = [] + print("\nFinding close values for each date") + values = [] # for i in range(0, len(loaded_json), 1): # If you want to do oldest first - for i in range(len(loaded_json)-1, -1, -1): - line = loaded_json[i] - value = line['close'] - values.append(value) - listIEX.append(values) - #print(listIEX[3]) - print(len(listIEX[3]), "close values") + for i in range(len(loaded_json)-1, -1, -1): + line = loaded_json[i] + value = line['close'] + values.append(value) + listIEX.append(values) + # print(listIEX[3]) + print(len(listIEX[3]), "close values") - print("\nFinding time frame given [days, weeks, years]") - timeFrame = [] - d1 = datetime.strptime(firstDate, "%Y-%m-%d") - d2 = datetime.strptime(lastDate, "%Y-%m-%d") - timeFrameDays = abs((d2 - d1).days) - #print(timeFrameDays) - timeFrameYears = float(timeFrameDays / 365) - timeFrameWeeks = float(timeFrameDays / 7) - timeFrame.append(timeFrameDays) - timeFrame.append(timeFrameWeeks) - timeFrame.append(timeFrameYears) - listIEX.append(timeFrame) - print(listIEX[4]) + print("\nFinding time frame given [days, weeks, years]") + timeFrame = [] + d1 = datetime.strptime(firstDate, "%Y-%m-%d") + d2 = datetime.strptime(lastDate, "%Y-%m-%d") + timeFrameDays = abs((d2 - d1).days) + # print(timeFrameDays) + timeFrameYears = float(timeFrameDays / 365) + timeFrameWeeks = float(timeFrameDays / 7) + timeFrame.append(timeFrameDays) + timeFrame.append(timeFrameWeeks) + timeFrame.append(timeFrameYears) + listIEX.append(timeFrame) + print(listIEX[4]) - return listIEX + return listIEX - def getAV(self): - listAV = [] - #url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_MONTHLY&symbol=', self.name, '&apikey=', apiAV)) - # https://www.alphavantage.co/query?function=TIME_SERIES_MONTHLY&symbol=MSFT&apikey=demo + def getAV(self): + listAV = [] + #url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_MONTHLY&symbol=', self.name, '&apikey=', apiAV)) + # https://www.alphavantage.co/query?function=TIME_SERIES_MONTHLY&symbol=MSFT&apikey=demo - #url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=', self.name, '&outputsize=full&apikey=', apiAV)) - # https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=MSFT&outputsize=full&apikey=demo + #url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=', self.name, '&outputsize=full&apikey=', apiAV)) + # https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=MSFT&outputsize=full&apikey=demo - url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=', self.name, '&outputsize=full&apikey=', apiAV)) - # https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=MSFT&outputsize=full&apikey=demo + url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=', + self.name, '&outputsize=full&apikey=', apiAV)) + # https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=MSFT&outputsize=full&apikey=demo - print("\nSending request to:", url) - print("(This will take a while)") - f = requests.get(url) - json_data = f.text - loaded_json = json.loads(json_data) - #print(loaded_json) + print("\nSending request to:", url) + print("(This will take a while)") + f = requests.get(url) + json_data = f.text + loaded_json = json.loads(json_data) + # print(loaded_json) - #print(type(loaded_json)) # Dictionary - #print(len(loaded_json)) - if len(loaded_json) == 1: - print("Alpha Vantage not available") - return 'Not available' + # print(type(loaded_json)) # Dictionary + # print(len(loaded_json)) + if len(loaded_json) == 1: + print("Alpha Vantage not available") + return 'Not available' - #print(loaded_json['Monthly Time Series']) - dailyTimeSeries = loaded_json['Time Series (Daily)'] - #print(monthlyTimeSeries) - listOfDates = list(dailyTimeSeries) - #print(listOfDates) + #print(loaded_json['Monthly Time Series']) + dailyTimeSeries = loaded_json['Time Series (Daily)'] + # print(monthlyTimeSeries) + listOfDates = list(dailyTimeSeries) + # print(listOfDates) - firstDate = listOfDates[-1] - lastDate = listOfDates[0] - #print("firstDate:", firstDate) - #print("lastDate:", lastDate) - listAV.append(firstDate) - listAV.append(lastDate) - listAV.append(listOfDates) + firstDate = listOfDates[-1] + lastDate = listOfDates[0] + #print("firstDate:", firstDate) + #print("lastDate:", lastDate) + listAV.append(firstDate) + listAV.append(lastDate) + listAV.append(listOfDates) - print("\nFinding first and last date") - print(listAV[0], ',', listAV[1]) - print("\nFinding all dates given") - #print(listAV[2]) - print(len(listAV[2]), "dates") + print("\nFinding first and last date") + print(listAV[0], ',', listAV[1]) + print("\nFinding all dates given") + # print(listAV[2]) + print(len(listAV[2]), "dates") - print("\nFinding close values for each date") - values = [] - for i in range(0, len(listOfDates), 1): - temp = listOfDates[i] - loaded_json2 = dailyTimeSeries[temp] - #value = loaded_json2['4. close'] - value = loaded_json2['5. adjusted close'] - values.append(value) - listAV.append(values) - #print(listOfDates[0]) - #i = listOfDates[0] - #print(monthlyTimeSeries[i]) - #print(listAV[3]) - print(len(listAV[3]), "close values") + print("\nFinding close values for each date") + values = [] + for i in range(0, len(listOfDates), 1): + temp = listOfDates[i] + loaded_json2 = dailyTimeSeries[temp] + #value = loaded_json2['4. close'] + value = loaded_json2['5. adjusted close'] + values.append(value) + listAV.append(values) + # print(listOfDates[0]) + #i = listOfDates[0] + # print(monthlyTimeSeries[i]) + # print(listAV[3]) + print(len(listAV[3]), "close values") - print("\nFinding time frame given [days, weeks, years]") - timeFrame = [] - d1 = datetime.strptime(firstDate, "%Y-%m-%d") - d2 = datetime.strptime(lastDate, "%Y-%m-%d") - timeFrameDays = abs((d2 - d1).days) - #print(timeFrameDays) - timeFrameYears = float(timeFrameDays / 365) - timeFrameWeeks = float(timeFrameDays / 7) - timeFrame.append(timeFrameDays) - timeFrame.append(timeFrameWeeks) - timeFrame.append(timeFrameYears) - listAV.append(timeFrame) - print(listAV[4]) + print("\nFinding time frame given [days, weeks, years]") + timeFrame = [] + d1 = datetime.strptime(firstDate, "%Y-%m-%d") + d2 = datetime.strptime(lastDate, "%Y-%m-%d") + timeFrameDays = abs((d2 - d1).days) + # print(timeFrameDays) + timeFrameYears = float(timeFrameDays / 365) + timeFrameWeeks = float(timeFrameDays / 7) + timeFrame.append(timeFrameDays) + timeFrame.append(timeFrameWeeks) + timeFrame.append(timeFrameYears) + listAV.append(timeFrame) + print(listAV[4]) - return listAV + return listAV - def getTiingo(self): - ''' - #OR we can use the token directly in the url - headers = { - 'Content-Type': 'application/json' - } - requestResponse = requests.get("https://api.tiingo.com/api/test?token=", - headers=headers) - print(requestResponse.json()) - ''' - token = ''.join(('Token ', apiTiingo)) - headers = { - 'Content-Type': 'application/json', - 'Authorization' : token - } - url = ''.join(('https://api.tiingo.com/tiingo/daily/', self.name)) - print("\nSending request to:", url) - requestResponse = requests.get(url, headers=headers) - #print(requestResponse.json()) - loaded_json = requestResponse.json() - #print(len(loaded_json)) - if len(loaded_json) == 1: - print("Tiingo not available") - return 'Not available' - #print(loaded_json) - ''' + def getTiingo(self): + ''' + #OR we can use the token directly in the url + headers = { + 'Content-Type': 'application/json' + } + requestResponse = requests.get("https://api.tiingo.com/api/test?token=", + headers=headers) + print(requestResponse.json()) + ''' + token = ''.join(('Token ', apiTiingo)) + headers = { + 'Content-Type': 'application/json', + 'Authorization': token + } + url = ''.join(('https://api.tiingo.com/tiingo/daily/', self.name)) + print("\nSending request to:", url) + requestResponse = requests.get(url, headers=headers) + # print(requestResponse.json()) + loaded_json = requestResponse.json() + # print(len(loaded_json)) + if len(loaded_json) == 1: + print("Tiingo not available") + return 'Not available' + # print(loaded_json) + ''' list1 = list(loaded_json) for i in range (0, len(list1), 1): if list1[i] == 'startDate': @@ -249,274 +264,276 @@ class StockData: print(list1[startNum]) print(list1[endNum]) ''' - listTiingo = [] + listTiingo = [] - print("\nFinding first and last date") - firstDate = loaded_json['startDate'] - lastDate = loaded_json['endDate'] - #print(firstDate) - #print(lastDate) - listTiingo.append(firstDate) - listTiingo.append(lastDate) - print(listTiingo[0], ',', listTiingo[1]) + print("\nFinding first and last date") + firstDate = loaded_json['startDate'] + lastDate = loaded_json['endDate'] + # print(firstDate) + # print(lastDate) + listTiingo.append(firstDate) + listTiingo.append(lastDate) + print(listTiingo[0], ',', listTiingo[1]) - print("\nFinding all dates given") - dates = [] - values = [] # Used loop for finding values - url2 = ''.join((url, '/prices?startDate=', firstDate, '&endDate=', lastDate)) - # https://api.tiingo.com/tiingo/daily//prices?startDate=2012-1-1&endDate=2016-1-1 - print("\nSending request to:", url2) - requestResponse2 = requests.get(url2, headers=headers) - loaded_json2 = requestResponse2.json() - #print(loaded_json2) - #print(len(loaded_json2)) - for i in range(len(loaded_json2)-1, -1, -1): - line = loaded_json2[i] - dateWithTime = line['date'] - temp = dateWithTime.split('T00:00:00.000Z') - date = temp[0] - dates.append(date) + print("\nFinding all dates given") + dates = [] + values = [] # Used loop for finding values + url2 = ''.join((url, '/prices?startDate=', + firstDate, '&endDate=', lastDate)) + # https://api.tiingo.com/tiingo/daily//prices?startDate=2012-1-1&endDate=2016-1-1 + print("\nSending request to:", url2) + requestResponse2 = requests.get(url2, headers=headers) + loaded_json2 = requestResponse2.json() + # print(loaded_json2) + # print(len(loaded_json2)) + for i in range(len(loaded_json2)-1, -1, -1): + line = loaded_json2[i] + dateWithTime = line['date'] + temp = dateWithTime.split('T00:00:00.000Z') + date = temp[0] + dates.append(date) - value = line['close'] - values.append(value) - listTiingo.append(dates) - #print(listTiingo[2]) - print(len(listTiingo[2]), "dates") + value = line['close'] + values.append(value) + listTiingo.append(dates) + # print(listTiingo[2]) + print(len(listTiingo[2]), "dates") - print("Finding close values for each date") - # Used loop from finding dates - listTiingo.append(values) - #print(listTiingo[3]) - print(len(listTiingo[3]), "close values") + print("Finding close values for each date") + # Used loop from finding dates + listTiingo.append(values) + # print(listTiingo[3]) + print(len(listTiingo[3]), "close values") - print("Finding time frame given [days, weeks, years]") - timeFrame = [] - d1 = datetime.strptime(firstDate, "%Y-%m-%d") - d2 = datetime.strptime(lastDate, "%Y-%m-%d") - timeFrameDays = abs((d2 - d1).days) - #print(timeFrameDays) - timeFrameYears = float(timeFrameDays / 365) - timeFrameWeeks = float(timeFrameDays / 7) - timeFrame.append(timeFrameDays) - timeFrame.append(timeFrameWeeks) - timeFrame.append(timeFrameYears) - listTiingo.append(timeFrame) - print(listTiingo[4]) + print("Finding time frame given [days, weeks, years]") + timeFrame = [] + d1 = datetime.strptime(firstDate, "%Y-%m-%d") + d2 = datetime.strptime(lastDate, "%Y-%m-%d") + timeFrameDays = abs((d2 - d1).days) + # print(timeFrameDays) + timeFrameYears = float(timeFrameDays / 365) + timeFrameWeeks = float(timeFrameDays / 7) + timeFrame.append(timeFrameDays) + timeFrame.append(timeFrameWeeks) + timeFrame.append(timeFrameYears) + listTiingo.append(timeFrame) + print(listTiingo[4]) - return listTiingo + return listTiingo - def getFirstLastDate(self, listOfFirstLastDates): - listOfFirstDates = [] - listOfLastDates = [] - #print(len(listOfFirstLastDates)) - for i in range (0, len(listOfFirstLastDates), 1): - firstLastDates = listOfFirstLastDates[i] - firstDate = firstLastDates[0] - lastDate = firstLastDates[1] - listOfFirstDates.append(firstDate) - listOfLastDates.append(lastDate) - #print(listOfFirstDates) - #print(listOfLastDates) - for i in range (0, len(listOfFirstDates), 1): - date = listOfFirstDates[i] - if i == 0: - firstDate = date - yearMonthDay = firstDate.split('-') - firstYear = yearMonthDay[0] - firstMonth = yearMonthDay[1] - firstDay = yearMonthDay[2] - else: - yearMonthDay = date.split('-') - year = yearMonthDay[0] - month = yearMonthDay[1] - day = yearMonthDay[2] - if year < firstYear or (year == firstYear and month < firstMonth) or (year == firstYear and month == firstMonth and day < firstDay): - firstDate = date - firstYear = year - firstMonth = month - firstDay = day - #print(firstDate) - if len(listOfFirstDates) > 1: - for i in range(0, len(listOfLastDates),1): - date = listOfLastDates[i] - if i == 0: - lastDate = date - yearMonthDay = lastDate.split('-') - lastYear = yearMonthDay[0] - lastMonth = yearMonthDay[1] - lastDay = yearMonthDay[2] - else: - yearMonthDay = date.split('-') - year = yearMonthDay[0] - month = yearMonthDay[1] - day = yearMonthDay[2] - if year > lastYear or (year == lastYear and month > lastMonth) or (year == lastYear and month == lastMonth and day > lastDay): - lastDate = date - lastYear = year - lastMonth = month - lastDay = day - #print(lastDate) - absFirstLastDates = [] - absFirstLastDates.append(firstDate) - absFirstLastDates.append(lastDate) - return absFirstLastDates + def getFirstLastDate(self, listOfFirstLastDates): + listOfFirstDates = [] + listOfLastDates = [] + # print(len(listOfFirstLastDates)) + for i in range(0, len(listOfFirstLastDates), 1): + firstLastDates = listOfFirstLastDates[i] + firstDate = firstLastDates[0] + lastDate = firstLastDates[1] + listOfFirstDates.append(firstDate) + listOfLastDates.append(lastDate) + # print(listOfFirstDates) + # print(listOfLastDates) + for i in range(0, len(listOfFirstDates), 1): + date = listOfFirstDates[i] + if i == 0: + firstDate = date + yearMonthDay = firstDate.split('-') + firstYear = yearMonthDay[0] + firstMonth = yearMonthDay[1] + firstDay = yearMonthDay[2] + else: + yearMonthDay = date.split('-') + year = yearMonthDay[0] + month = yearMonthDay[1] + day = yearMonthDay[2] + if year < firstYear or (year == firstYear and month < firstMonth) or (year == firstYear and month == firstMonth and day < firstDay): + firstDate = date + firstYear = year + firstMonth = month + firstDay = day + # print(firstDate) + if len(listOfFirstDates) > 1: + for i in range(0, len(listOfLastDates), 1): + date = listOfLastDates[i] + if i == 0: + lastDate = date + yearMonthDay = lastDate.split('-') + lastYear = yearMonthDay[0] + lastMonth = yearMonthDay[1] + lastDay = yearMonthDay[2] + else: + yearMonthDay = date.split('-') + year = yearMonthDay[0] + month = yearMonthDay[1] + day = yearMonthDay[2] + if year > lastYear or (year == lastYear and month > lastMonth) or (year == lastYear and month == lastMonth and day > lastDay): + lastDate = date + lastYear = year + lastMonth = month + lastDay = day + # print(lastDate) + absFirstLastDates = [] + absFirstLastDates.append(firstDate) + absFirstLastDates.append(lastDate) + return absFirstLastDates - def getFinalDatesAndClose(self): - # finalDates and finalClose will coincide (aka i = 1 will correspond to one another) - finalDatesAndClose = [] # Will combine finalDates then finalClose - finalDates = [] - finalClose = [] - #print(self.absFirstLastDates) - absFirstDate = self.absFirstLastDates[0] - absLastDate = self.absFirstLastDates[1] - date = absFirstDate + def getFinalDatesAndClose(self): + # finalDates and finalClose will coincide (aka i = 1 will correspond to one another) + finalDatesAndClose = [] # Will combine finalDates then finalClose + finalDates = [] + finalClose = [] + # print(self.absFirstLastDates) + absFirstDate = self.absFirstLastDates[0] + absLastDate = self.absFirstLastDates[1] + date = absFirstDate - allLists = self.allLists - while date != absLastDate: # DOESN'T DO LAST DATE - tempListOfClose = [] - found = False - for j in range(0, len(allLists), 1): # Look for date in all lists - list1 = allLists[j] - listOfDates = list1[2] - listOfClose = list1[3] - for k in range(0, len(listOfDates), 1): - if listOfDates[k] == date: - if found == False: - finalDates.append(date) - found = True - #print(listOfDates[k]) - #print(listOfClose[k]) - #print(listOfClose) - tempListOfClose.append(float(listOfClose[k])) - k = len(listOfDates) # Dates don't repeat + allLists = self.allLists + while date != absLastDate: # DOESN'T DO LAST DATE + tempListOfClose = [] + found = False + for j in range(0, len(allLists), 1): # Look for date in all lists + list1 = allLists[j] + listOfDates = list1[2] + listOfClose = list1[3] + for k in range(0, len(listOfDates), 1): + if listOfDates[k] == date: + if found == False: + finalDates.append(date) + found = True + # print(listOfDates[k]) + # print(listOfClose[k]) + # print(listOfClose) + tempListOfClose.append(float(listOfClose[k])) + k = len(listOfDates) # Dates don't repeat - if found == True: + if found == True: + sum = 0 + for r in range(0, len(tempListOfClose), 1): + sum = sum + tempListOfClose[r] + close = sum/len(tempListOfClose) + + finalClose.append(close) + # print(close) + + # Go to the next day + yearMonthDay = date.split('-') + year = int(yearMonthDay[0]) + month = int(yearMonthDay[1]) + day = int(yearMonthDay[2]) + + day = day + 1 + if day == 32 and month == 12: # Next year + day = 1 + month = 1 + year = year + 1 + elif day == 32: # Next month + month = month + 1 + day = 1 + if day < 10: + day = ''.join(('0', str(day))) + if month < 10: + month = ''.join(('0', str(month))) + date = ''.join((str(year), '-', str(month), '-', str(day))) + # print(date) + + # For last date + finalDates.append(date) + tempListOfClose = [] + for j in range(0, len(allLists), 1): # Look for date in all lists + list1 = allLists[j] + listOfDates = list1[2] + listOfClose = list1[3] + for k in range(0, len(listOfDates), 1): + if listOfDates[k] == date: + tempListOfClose.append(float(listOfClose[k])) + k = len(listOfDates) # Dates don't repeat sum = 0 for r in range(0, len(tempListOfClose), 1): - sum = sum + tempListOfClose[r] + sum = sum + tempListOfClose[r] close = sum/len(tempListOfClose) - finalClose.append(close) - #print(close) + # print(finalDates) + # print(finalClose) - # Go to the next day - yearMonthDay = date.split('-') - year = int(yearMonthDay[0]) - month = int(yearMonthDay[1]) - day = int(yearMonthDay[2]) + # Want lists from most recent to oldest, comment this out if you don't want that + finalDates = list(reversed(finalDates)) + finalClose = list(reversed(finalClose)) - day = day + 1 - if day == 32 and month == 12: # Next year - day = 1 - month = 1 - year = year + 1 - elif day == 32: # Next month - month = month + 1 - day = 1 - if day < 10: - day = ''.join(('0', str(day))) - if month < 10: - month = ''.join(('0', str(month))) - date = ''.join((str(year), '-', str(month), '-', str(day))) - #print(date) + finalDatesAndClose.append(finalDates) + finalDatesAndClose.append(finalClose) + return finalDatesAndClose - # For last date - finalDates.append(date) - tempListOfClose = [] - for j in range(0, len(allLists), 1): # Look for date in all lists - list1 = allLists[j] - listOfDates = list1[2] - listOfClose = list1[3] - for k in range(0, len(listOfDates), 1): - if listOfDates[k] == date: - tempListOfClose.append(float(listOfClose[k])) - k = len(listOfDates) # Dates don't repeat - sum = 0 - for r in range(0, len(tempListOfClose), 1): - sum = sum + tempListOfClose[r] - close = sum/len(tempListOfClose) - finalClose.append(close) - #print(finalDates) - #print(finalClose) + def datetimeDates(self): + finalDatesAndClose2 = [] + finalDatesAndClose = self.finalDatesAndClose + finalDatesStrings = finalDatesAndClose[0] + finalClose = finalDatesAndClose[1] + finalDates = [] - # Want lists from most recent to oldest, comment this out if you don't want that - finalDates = list(reversed(finalDates)) - finalClose = list(reversed(finalClose)) + from Functions import Functions + for i in range(0, len(finalDatesStrings), 1): + temp = Functions.stringToDate(finalDatesStrings[i]) + finalDates.append(temp) + # print(finalDates) - finalDatesAndClose.append(finalDates) - finalDatesAndClose.append(finalClose) - return finalDatesAndClose + finalDatesAndClose2.append(finalDates) + finalDatesAndClose2.append(finalClose) + return(finalDatesAndClose2) - def datetimeDates(self): - finalDatesAndClose2 = [] - finalDatesAndClose = self.finalDatesAndClose - finalDatesStrings = finalDatesAndClose[0] - finalClose = finalDatesAndClose[1] - finalDates = [] + def is_connected(): + import socket # To check internet connection + try: + # connect to the host -- tells us if the host is actually + # reachable + socket.create_connection(("www.andrewkdinh.com", 80)) + return True + except OSError: + # pass + print("\nNo internet connection!") + return False - from Functions import Functions - for i in range(0, len(finalDatesStrings), 1): - temp = Functions.stringToDate(finalDatesStrings[i]) - finalDates.append(temp) - #print(finalDates) + def main(self): + print('Beginning StockData.py') - finalDatesAndClose2.append(finalDates) - finalDatesAndClose2.append(finalClose) - return(finalDatesAndClose2) + import importlib.util + import sys # To check whether a package is installed - def is_connected(): - import socket # To check internet connection - try: - # connect to the host -- tells us if the host is actually - # reachable - socket.create_connection(("www.andrewkdinh.com", 80)) - return True - except OSError: - #pass - print("\nNo internet connection!") - return False + packages = ['requests'] + for i in range(0, len(packages), 1): + package_name = packages[i] + spec = importlib.util.find_spec(package_name) + if spec is None: + print(package_name + " is not installed\nPlease type in 'pip install -r requirements.txt' to install all required packages") - def main(self): - print('Beginning StockData.py') - - import importlib.util, sys # To check whether a package is installed + # Test internet connection + internetConnection = StockData.is_connected() + if internetConnection == False: + return - packages = ['requests'] - for i in range(0, len(packages), 1): - package_name = packages[i] - spec = importlib.util.find_spec(package_name) - if spec is None: - print(package_name +" is not installed\nPlease type in 'pip install -r requirements.txt' to install all required packages") + listOfFirstLastDates = [] + self.allLists = [] - # Test internet connection - internetConnection = StockData.is_connected() - if internetConnection == False: - return + print('\nNOTE: Only IEX and Alpha Vantage support adjusted returns') + print('NOTE: Only Alpha Vantage and Tiingo support mutual fund data') - listOfFirstLastDates = [] - self.allLists = [] + # IEX + print("\nIEX") + listIEX = StockData.getIEX(self) + # print(listIEX) + if listIEX != 'Not available': + listOfFirstLastDates.append((listIEX[0], listIEX[1])) + self.allLists.append(listIEX) - print('\nNOTE: Only IEX and Alpha Vantage support adjusted returns') - print('NOTE: Only Alpha Vantage and Tiingo support mutual fund data') + # Alpha Vantage + print("\nAlpha Vantage (AV)") + listAV = StockData.getAV(self) + # print(listAV) + if listAV != 'Not available': + listOfFirstLastDates.append((listAV[0], listAV[1])) + self.allLists.append(listAV) - # IEX - print("\nIEX") - listIEX = StockData.getIEX(self) - #print(listIEX) - if listIEX != 'Not available': - listOfFirstLastDates.append((listIEX[0], listIEX[1])) - self.allLists.append(listIEX) - - # Alpha Vantage - print("\nAlpha Vantage (AV)") - listAV = StockData.getAV(self) - #print(listAV) - if listAV != 'Not available': - listOfFirstLastDates.append((listAV[0], listAV[1])) - self.allLists.append(listAV) - - # COMMENTED OUT FOR NOW B/C LIMITED - ''' + # COMMENTED OUT FOR NOW B/C LIMITED + ''' print("\nTiingo") print("NOTE: Tiingo does not return adjusted returns!!") listTiingo = StockData.getTiingo(self) @@ -526,36 +543,44 @@ class StockData: self.allLists.append(listTiingo) ''' - #print(self.allLists) - #print(listOfFirstLastDates) - if (len(self.allLists) > 0): - print("\n", end='') - print(len(self.allLists), "available source(s) for", self.name) - self.absFirstLastDates = StockData.getFirstLastDate(self, listOfFirstLastDates) - print("\nThe absolute first date with close values is:", self.absFirstLastDates[0]) - print("The absolute last date with close values is:", self.absFirstLastDates[1]) + # print(self.allLists) + # print(listOfFirstLastDates) + if (len(self.allLists) > 0): + print("\n", end='') + print(len(self.allLists), "available source(s) for", self.name) + self.absFirstLastDates = StockData.getFirstLastDate( + self, listOfFirstLastDates) + print("\nThe absolute first date with close values is:", + self.absFirstLastDates[0]) + print("The absolute last date with close values is:", + self.absFirstLastDates[1]) - print("\nCombining dates and averaging close values") - self.finalDatesAndClose = StockData.getFinalDatesAndClose(self) # Returns [List of Dates, List of Corresponding Close Values] - #print("All dates available:", self.finalDatesAndClose[0]) - #print("All close values:\n", self.finalDatesAndClose[1]) - finalDates = self.finalDatesAndClose[0] - finalClose = self.finalDatesAndClose[1] - print(len(finalDates), "unique dates:", finalDates[len(finalDates)-1], "...", finalDates[0]) - print(len(finalClose), "close values:", finalClose[len(finalClose)-1], "...", finalClose[0]) + print("\nCombining dates and averaging close values") + # Returns [List of Dates, List of Corresponding Close Values] + self.finalDatesAndClose = StockData.getFinalDatesAndClose(self) + #print("All dates available:", self.finalDatesAndClose[0]) + #print("All close values:\n", self.finalDatesAndClose[1]) + finalDates = self.finalDatesAndClose[0] + finalClose = self.finalDatesAndClose[1] + print(len(finalDates), "unique dates:", + finalDates[len(finalDates)-1], "...", finalDates[0]) + print(len(finalClose), "close values:", + finalClose[len(finalClose)-1], "...", finalClose[0]) - print("\nConverting list of final dates to datetime\n") - self.finalDatesAndClose2 = StockData.datetimeDates(self) - #print(self.finalDatesAndClose2[0][0]) + print("\nConverting list of final dates to datetime\n") + self.finalDatesAndClose2 = StockData.datetimeDates(self) + # print(self.finalDatesAndClose2[0][0]) - else: - print("No sources have data for", self.name) + else: + print("No sources have data for", self.name) + + +def main(): # For testing purposes + stockName = 'spy' + stock1 = StockData(stockName) + print("Finding available dates and close values for", stock1.name) + StockData.main(stock1) -def main(): # For testing purposes - stockName = 'spy' - stock1 = StockData(stockName) - print("Finding available dates and close values for", stock1.name) - StockData.main(stock1) if __name__ == "__main__": - main() + main() diff --git a/StockReturn.py b/StockReturn.py index 3911de5..5e8f78a 100644 --- a/StockReturn.py +++ b/StockReturn.py @@ -11,10 +11,11 @@ from StockData import StockData import datetime from Functions import Functions + class Return: - def __init__(self, newListOfReturn = [], newTimeFrame = [], newBeta = 0, newStandardDeviation = 0, newNegativeStandardDeviation = 0, newMarketReturn = 0, newSize = 0, newSizeOfNeg = 0, newFirstLastDates = [], newAllLists = [], newAbsFirstLastDates = ''): + def __init__(self, newListOfReturn=[], newTimeFrame=[], newBeta=0, newStandardDeviation=0, newNegativeStandardDeviation=0, newMarketReturn=0, newSize=0, newSizeOfNeg=0, newFirstLastDates=[], newAllLists=[], newAbsFirstLastDates=''): self.listOfReturn = newListOfReturn - self.timeFrame = newTimeFrame # [years, months (30 days)] + self.timeFrame = newTimeFrame # [years, months (30 days)] self.beta = newBeta self.standardDeviation = newStandardDeviation self.negativeStandardDeviation = newNegativeStandardDeviation @@ -32,12 +33,14 @@ class Return: def getFirstLastDates(self, stock): firstLastDates = [] timeFrame = self.timeFrame - firstDate = datetime.datetime.now() - datetime.timedelta(days=timeFrame[0]*365) + firstDate = datetime.datetime.now( + ) - datetime.timedelta(days=timeFrame[0]*365) firstDate = firstDate - datetime.timedelta(days=timeFrame[1]*30) - firstDate = ''.join((str(firstDate.year),'-', str(firstDate.month), '-', str(firstDate.day))) + firstDate = ''.join( + (str(firstDate.year), '-', str(firstDate.month), '-', str(firstDate.day))) lastDate = StockData.returnAbsFirstLastDates(stock)[1] - #print(lastDate) + # print(lastDate) firstLastDates.append(firstDate) firstLastDates.append(lastDate) return firstLastDates @@ -60,19 +63,21 @@ class Return: if firstDateExists == False: print("Could not find first date. Changing first date to closest date") - tempDate = Functions.stringToDate(firstDate) # Change to datetime + tempDate = Functions.stringToDate(firstDate) # Change to datetime print('Original first date:', tempDate) #tempDate = datetime.date(2014,1,17) - newFirstDate = Functions.getNearest(finalDatesAndClose2[0], tempDate) + newFirstDate = Functions.getNearest( + finalDatesAndClose2[0], tempDate) print('New first date:', newFirstDate) firstDate = str(newFirstDate) if lastDateExists == False: print("Could not find final date. Changing final date to closest date") - tempDate2 = Functions.stringToDate(lastDate) # Change to datetime + tempDate2 = Functions.stringToDate(lastDate) # Change to datetime print('Original final date:', tempDate2) #tempDate2 = datetime.date(2014,1,17) - newLastDate = Functions.getNearest(finalDatesAndClose2[0], tempDate2) + newLastDate = Functions.getNearest( + finalDatesAndClose2[0], tempDate2) print('New final date:', newLastDate) lastDate = str(newLastDate) @@ -97,7 +102,8 @@ class Return: print('Close values:', firstClose, '...', lastClose) fullUnadjustedReturn = float(lastClose/firstClose) - unadjustedReturn = fullUnadjustedReturn**(1/(self.timeFrame[0]+(self.timeFrame[1])*.1)) + unadjustedReturn = fullUnadjustedReturn**( + 1/(self.timeFrame[0]+(self.timeFrame[1])*.1)) return unadjustedReturn def getBeta(self): @@ -113,9 +119,9 @@ class Return: for i in range(0, len(finalDates), 1): if finalDates[i] == str(firstDate): firstClose = finalClose[i] -55ggbh - #list1 = - list2 = [1,2,4,1] + + # list1 = + list2 = [1, 2, 4, 1] print(numpy.corrcoef(list1, list2)[0, 1]) @@ -138,7 +144,7 @@ class Return: timeFrameMonth = 0 print(timeFrameMonth) self.timeFrame.append(timeFrameMonth) - #print(self.timeFrame) + # print(self.timeFrame) self.firstLastDates = Return.getFirstLastDates(self, stock) print('Dates: ', self.firstLastDates) @@ -149,10 +155,10 @@ class Return: print('\nGetting unadjusted return') unadjustedReturn = Return.getUnadjustedReturn(self, stock) self.listOfReturn.append(unadjustedReturn) - print('Average annual return for the past', self.timeFrame[0], 'years and', self.timeFrame[1], 'months: ', end='') + print('Average annual return for the past', + self.timeFrame[0], 'years and', self.timeFrame[1], 'months: ', end='') print((self.listOfReturn[0]-1)*100, '%', sep='') - def main(self, stock): print('Beginning StockReturn.py') @@ -169,12 +175,14 @@ class Return: print('\nGetting unadjusted return') unadjustedReturn = Return.getUnadjustedReturn(self, stock) self.listOfReturn.append(unadjustedReturn) - print('Average annual return for the past', self.timeFrame[0], 'years and', self.timeFrame[1], 'months: ', end='') + print('Average annual return for the past', + self.timeFrame[0], 'years and', self.timeFrame[1], 'months: ', end='') print((self.listOfReturn[0]-1)*100, '%', sep='') #print('\nGetting beta') #beta = Return.getBeta(self, stock) + def main(): stockName = 'spy' stock1 = StockData(stockName) @@ -186,5 +194,6 @@ def main(): Return.main(stock1Return, stock1) + if __name__ == "__main__": - main() + main() diff --git a/listGoogle.py b/listGoogle.py deleted file mode 100644 index f911dba..0000000 --- a/listGoogle.py +++ /dev/null @@ -1,54 +0,0 @@ -# https://support.google.com/docs/answer/3093281?hl=en -# Historical data cannot be downloaded or accessed via the Sheets API or Apps Script. If you attempt to do so, you will see a #N/A error in place of the values in the corresponding cells of your spreadsheet. - -import gspread, time, webbrowser, msvcrt -from oauth2client.service_account import ServiceAccountCredentials - -def main(): - scope = ['https://spreadsheets.google.com/feeds', - 'https://www.googleapis.com/auth/drive'] - - credentials = ServiceAccountCredentials.from_json_keyfile_name('creds.json', scope) - - gc = gspread.authorize(credentials) - ''' - # Just by ID: - #sheet = gc.open_by_key('1YS8qBQCXKNfSgQgXeUdSGOd6lM2wm-inV0_1YE36vQM') - sheet = gc.open_by_url('https://docs.google.com/spreadsheets/d/1YS8qBQCXKNfSgQgXeUdSGOd6lM2wm-inV0_1YE36vQM') - worksheet = sheet.get_worksheet(0) - worksheet.update_acell('B1', 'bingo!') - #worksheet.update_cell(1, 2, 'Bingo!') - val = worksheet.acell('B1').value - #val = worksheet.cell(1, 2).value - print(val) - ''' - url = 'https://docs.google.com/spreadsheets/d/1YS8qBQCXKNfSgQgXeUdSGOd6lM2wm-inV0_1YE36vQM' - surl = 'https://www.andrewkdinh.com/u/listGoogle' - print("Opening", url) - #webbrowser.open(surl) - sheet = gc.open_by_url(url) - worksheet = sheet.get_worksheet(0) - print('Writing Google Finance function to A1') - worksheet.update_cell(1, 1, '=GOOGLEFINANCE("GOOG", "price", DATE(2014,1,1), DATE(2014,12,31), "DAILY")') - print('\nOpening link to the Google Sheet. Please download the file as comma-separated values (.csv) and move it to the directory of this Python file', - '\nFile > Download as > Comma-separated values(.csv,currentsheet)') - print("If the link did not open, please go to", surl) - print("Press any key to continue") - #time.sleep(45) - ''' - for i in range(60, 0, -1): - print(i, end='\r') - time.sleep(1) - ''' - waiting = True - while waiting == True: - if msvcrt.kbhit(): - waiting = False - - print("e") - - #val = worksheet.acell('A1').value - #print(val) - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/main.py b/main.py index 138b810..2805c5b 100644 --- a/main.py +++ b/main.py @@ -1,55 +1,509 @@ # main.py # Andrew Dinh -# Python 3.6.1 -# Description: -''' -Asks users for mutual funds/stocks to compare -Asks to be compared (expense ratio, turnover, market capitalization, or persistence) -Asks for time period (Possibly: 1 year, 5 years, 10 years) -Makes the mutual funds as class Stock -Gets data from each API -Compare and contrast dates and end changeOverTime for set time period - NOTES: Later can worry about getting close values to make a graph or something -Gives correlation value using equation at the end (from 0 to 1) +# Python 3.6.7 -FIRST TESTING WITH EXPENSE RATIO +import requests +import json +import datetime +import numpy +import Functions + +# API Keys +apiAV = 'O42ICUV58EIZZQMU' +# apiBarchart = 'a17fab99a1c21cd6f847e2f82b592838' +apiBarchart = 'f40b136c6dc4451f9136bb53b9e70ffa' +apiTiingo = '2e72b53f2ab4f5f4724c5c1e4d5d4ac0af3f7ca8' +apiTradier = 'n26IFFpkOFRVsB5SNTVNXicE5MPD' +# If you're going to take these API keys and abuse it, you should really reconsider your life priorities + +''' +API Keys: + Alpha Vantage API Key: O42ICUV58EIZZQMU + Barchart API Key: a17fab99a1c21cd6f847e2f82b592838 + Possible other one? f40b136c6dc4451f9136bb53b9e70ffa + 150 getHistory queries per day + Tiingo API Key: 2e72b53f2ab4f5f4724c5c1e4d5d4ac0af3f7ca8 + Tradier API Key: n26IFFpkOFRVsB5SNTVNXicE5MPD + Monthly Bandwidth = 5 GB + Hourly Requests = 500 + Daily Requests = 20,000 + Symbol Requests = 500 + + Mutual funds: + Yes: Alpha Vantage, Tiingo + No: IEX, Barchart ''' + +class Stock: + + # GLOBAL VARIABLES + timeFrame = [] + benchmarkDates = [] + benchmarkCloseValues = [] + benchmarkUnadjustedReturn = 0 + + def __init__(self): + # BASIC DATA + self.name = '' # Ticker symbol + self.allDates = [] + self.allCloseValues = [] + self.dates = [] + self.closeValues = [] + self.datesMatchBenchmark = [] + self.closeValuesMatchBenchmark = [] + + # CALCULATED RETURN + self.unadjustedReturn = 0 + self.sortino = 0 + self.sharpe = 0 + self.treynor = 0 + self.alpha = 0 + self.beta = 0 + self.standardDeviation = 0 + self.negStandardDeviation = 0 + + # INDICATOR VALUES + self.expenseRatio = 0 + self.assetSize = 0 + self.turnover = 0 + self.persistence = [] # [Years, Months] + + # CALCULATED VALUES FOR INDICATORS + self.correlation = 0 + self.regression = 0 + + def setName(self, newName): + self.name = newName + + def getName(self): + return self.name + + def getAllDates(self): + return self.allDates + + def getAllCloseValues(self): + return self.allCloseValues + + def IEX(self): + print('IEX') + url = ''.join( + ('https://api.iextrading.com/1.0/stock/', self.name, '/chart/5y')) + #link = "https://api.iextrading.com/1.0/stock/spy/chart/5y" + print("\nSending request to:", url) + f = requests.get(url) + json_data = f.text + if json_data == 'Unknown symbol' or f.status_code == 404: + print("IEX not available") + return 'Not available' + loaded_json = json.loads(json_data) + listIEX = [] + + print("\nFinding all dates given") + allDates = [] + for i in range(0, len(loaded_json), 1): # If you want to do oldest first + # for i in range(len(loaded_json)-1, -1, -1): + line = loaded_json[i] + date = line['date'] + allDates.append(date) + listIEX.append(allDates) + print(len(listIEX[0]), "dates") + + print("\nFinding close values for each date") + values = [] + for i in range(0, len(loaded_json), 1): # If you want to do oldest first + # for i in range(len(loaded_json)-1, -1, -1): + line = loaded_json[i] + value = line['close'] + values.append(value) + listIEX.append(values) + print(len(listIEX[1]), "close values") + + return listIEX + + def AV(self): + print('Alpha Vantage') + listAV = [] + url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=', + self.name, '&outputsize=full&apikey=', apiAV)) + # https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=MSFT&outputsize=full&apikey=demo + + print("\nSending request to:", url) + print("(This will take a while)") + f = requests.get(url) + json_data = f.text + loaded_json = json.loads(json_data) + + if len(loaded_json) == 1 or f.status_code == 404: + print("Alpha Vantage not available") + return 'Not available' + + dailyTimeSeries = loaded_json['Time Series (Daily)'] + listOfDates = list(dailyTimeSeries) + # listAV.append(listOfDates) + listAV.append(list(reversed(listOfDates))) + + print("\nFinding close values for each date") + values = [] + for i in range(0, len(listOfDates), 1): + temp = listOfDates[i] + loaded_json2 = dailyTimeSeries[temp] + #value = loaded_json2['4. close'] + value = loaded_json2['5. adjusted close'] + values.append(value) + # listAV.append(values) + listAV.append(list(reversed(values))) + print(len(listAV[1]), "close values") + + return listAV + + def Tiingo(self): + print('Tiingo') + token = ''.join(('Token ', apiTiingo)) + headers = { + 'Content-Type': 'application/json', + 'Authorization': token + } + url = ''.join(('https://api.tiingo.com/tiingo/daily/', self.name)) + print("\nSending request to:", url) + f = requests.get(url, headers=headers) + loaded_json = f.json() + if len(loaded_json) == 1 or f.status_code == 404: + print("Tiingo not available") + return 'Not available' + + listTiingo = [] + + print("\nFinding first and last date") + firstDate = loaded_json['startDate'] + lastDate = loaded_json['endDate'] + print(firstDate, '...', lastDate) + + print("\nFinding all dates given", end='') + dates = [] + values = [] + url2 = ''.join((url, '/prices?startDate=', + firstDate, '&endDate=', lastDate)) + # https://api.tiingo.com/tiingo/daily//prices?startDate=2012-1-1&endDate=2016-1-1 + print("\nSending request to:", url2, '\n') + requestResponse2 = requests.get(url2, headers=headers) + loaded_json2 = requestResponse2.json() + for i in range(0, len(loaded_json2)-1, 1): + line = loaded_json2[i] + dateWithTime = line['date'] + temp = dateWithTime.split('T00:00:00.000Z') + date = temp[0] + dates.append(date) + + value = line['close'] + values.append(value) + listTiingo.append(dates) + print(len(listTiingo[0]), "dates") + + print("Finding close values for each date") + # Used loop from finding dates + listTiingo.append(values) + print(len(listTiingo[1]), "close values") + + return listTiingo + + def datesAndClose(self): + print('\n', Stock.getName(self), sep='') + + # sourceList = ['AV', 'Tiingo', 'IEX'] # Change back to this later + sourceList = ['Tiingo', 'IEX', 'AV'] + # Use each source until you get a value + for j in range(0, len(sourceList), 1): + source = sourceList[j] + print('\nSource being used: ', source) + + if source == 'AV': + datesAndCloseList = Stock.AV(self) + elif source == 'Tiingo': + datesAndCloseList = Stock.Tiingo(self) + elif source == 'IEX': + datesAndCloseList = Stock.IEX(self) + + if datesAndCloseList != 'Not available': + break + else: + #print(sourceList[j], 'does not have data available') + if j == len(sourceList)-1: + print('\nNo sources have data for', self.name) + return + # FIGURE OUT WHAT TO DO HERE + + # Convert dates to datetime + allDates = datesAndCloseList[0] + for j in range(0, len(allDates), 1): + allDates[j] = Functions.stringToDate(allDates[j]) + datesAndCloseList[0] = allDates + + return datesAndCloseList + + def datesAndClose2(self): + print('Shortening list to fit time frame') + # Have to do this because if I just make dates = self.allDates & closeValues = self.allCloseValues, then deleting from dates & closeValues also deletes it from self.allDates & self.allCloseValues (I'm not sure why) + dates = [] + closeValues = [] + for i in range(0, len(self.allDates), 1): + dates.append(self.allDates[i]) + closeValues.append(self.allCloseValues[i]) + + firstDate = datetime.datetime.now().date() - datetime.timedelta( + days=self.timeFrame[0]*365) - datetime.timedelta(days=self.timeFrame[1]*30) + print('\n', self.timeFrame[0], ' years and ', + self.timeFrame[1], ' months ago: ', firstDate, sep='') + closestDate = Functions.getNearest(dates, firstDate) + if closestDate != firstDate: + print('Closest date available for', self.name, ':', closestDate) + firstDate = closestDate + else: + print(self.name, 'has a close value for', firstDate) + + # Remove dates in list up to firstDate + while dates[0] != firstDate: + dates.remove(dates[0]) + + # Remove close values until list is same length as dates + while len(closeValues) != len(dates): + closeValues.remove(closeValues[0]) + + datesAndCloseList2 = [] + datesAndCloseList2.append(dates) + datesAndCloseList2.append(closeValues) + + print(len(dates), 'dates') + print(len(closeValues), 'close values') + + return datesAndCloseList2 + + def unadjustedReturn(self): + unadjustedReturn = (float(self.closeValues[len( + self.closeValues)-1]/self.closeValues[0])**(1/(self.timeFrame[0]+(self.timeFrame[1])*.1)))-1 + print('Annual unadjusted return:', unadjustedReturn) + return unadjustedReturn + + def beta(self, benchmarkMatchDatesAndCloseValues): + beta = numpy.corrcoef(self.closeValuesMatchBenchmark, + benchmarkMatchDatesAndCloseValues[1])[0, 1] + print('Beta:', beta) + return beta + + +def isConnected(): + import socket # To check internet connection + try: + # connect to the host -- tells us if the host is actually reachable + socket.create_connection(("www.andrewkdinh.com", 80)) + print('Internet connection is good!') + return True + except OSError: + # pass + print("No internet connection!") + return False + + +def checkPackages(): + import importlib.util + import sys + + packagesInstalled = True + packages = ['requests', 'numpy'] + for i in range(0, len(packages), 1): + package_name = packages[i] + spec = importlib.util.find_spec(package_name) + if spec is None: + print( + package_name + + " is not installed\nPlease type in 'pip install -r requirements.txt' to install all required packages") + packagesInstalled = False + return packagesInstalled + + +def benchmarkInit(): + # Treat benchmark like stock + benchmarkTicker = '' + while benchmarkTicker == '': + benchmarks = ['S&P500', 'DJIA', 'Russell 3000', 'MSCI EAFE'] + benchmarksTicker = ['SPY', 'DJIA', 'VTHR', 'EFT'] + print('\nList of benchmarks:', benchmarks) + + # benchmark = str(input('Benchmark to compare to: ')) + benchmark = 'S&P500' + + for i in range(0, len(benchmarks), 1): + if benchmark == benchmarks[i]: + benchmarkTicker = benchmarksTicker[i] + + if benchmarkTicker == '': + print('Benchmark not found. Please type in a benchmark from the list') + + print(benchmark, ' (', benchmarkTicker, ')', sep='') + + benchmark = Stock() + benchmark.setName(benchmarkTicker) + + return benchmark + + +def stocksInit(): + listOfStocks = [] + + # numberOfStocks = int(input('\nHow many stocks/mutual funds/ETFs would you like to analyze? ')) + numberOfStocks = 1 + + print('\nHow many stocks/mutual funds/ETFs would you like to analyze? ', numberOfStocks) + + for i in range(0, numberOfStocks, 1): + print('Stock', i + 1, ': ', end='') + #stockName = str(input()) + + stockName = 'FBGRX' + print(stockName) + + listOfStocks.append(stockName) + listOfStocks[i] = Stock() + listOfStocks[i].setName(stockName) + + return listOfStocks + + +def timeFrameInit(): + print('\nPlease enter the time frame in years and months (30 days)') + print("Years: ", end='') + #years = int(input()) + years = 5 + print(years) + print("Months: ", end='') + #months = int(input()) + months = 0 + print(months) + + timeFrame = [] + timeFrame.append(years) + timeFrame.append(months) + return timeFrame + + +def dataMain(listOfStocks): + print('\nGathering dates and close values') + for i in range(0, len(listOfStocks), 1): + + datesAndCloseList = Stock.datesAndClose(listOfStocks[i]) + listOfStocks[i].allDates = datesAndCloseList[0] + listOfStocks[i].allCloseValues = datesAndCloseList[1] + + # Clip list to fit time frame + datesAndCloseList2 = Stock.datesAndClose2(listOfStocks[i]) + listOfStocks[i].dates = datesAndCloseList2[0] + listOfStocks[i].closeValues = datesAndCloseList2[1] + + +def returnMain(benchmark, listOfStocks): + print('\nCalculating unadjusted return, Sharpe ratio, Sortino ratio, and Treynor ratio\n') + print(benchmark.name) + benchmark.unadjustedReturn = Stock.unadjustedReturn(benchmark) + + # Make benchmark data global + # Maybe remove this later + Stock.benchmarkDates = benchmark.dates + Stock.benchmarkCloseValues = benchmark.closeValues + Stock.benchmarkUnadjustedReturn = benchmark.unadjustedReturn + + for i in range(0, len(listOfStocks), 1): + print(listOfStocks[i].name) + + # Make sure each date has a value for both the benchmark and the stock + list1 = [] + list2 = [] + list1.append(listOfStocks[i].dates) + list1.append(listOfStocks[i].closeValues) + list2.append(Stock.benchmarkDates) + list2.append(Stock.benchmarkCloseValues) + temp = Functions.removeExtraDatesAndCloseValues(list1, list2) + listOfStocks[i].datesMatchBenchmark = temp[0][0] + listOfStocks[i].closeValuesMatchBenchmark = temp[0][1] + benchmarkMatchDatesAndCloseValues = temp[1] + + listOfStocks[i].unadjustedReturn = Stock.unadjustedReturn( + listOfStocks[i]) + listOfStocks[i].beta = Stock.beta( + listOfStocks[i], benchmarkMatchDatesAndCloseValues) + + +def main(): + # Test internet connection + internetConnection = isConnected() + if not internetConnection: + return + + # Check that all required packages are installed + packagesInstalled = checkPackages() + if not packagesInstalled: + return + + # Choose benchmark and makes it class Stock + benchmark = benchmarkInit() + # Add it to a list to work with other functions + benchmarkAsList = [] + benchmarkAsList.append(benchmark) + + # Asks for stock(s) ticker and makes them class Stock + listOfStocks = stocksInit() + + # Determine time frame [Years, Months] + timeFrame = timeFrameInit() + Stock.timeFrame = timeFrame # Needs to be a global variable for all stocks + + # Gather data for benchmark and stock(s) + dataMain(benchmarkAsList) + dataMain(listOfStocks) + + # Calculate return for benchmark and stock(s) + returnMain(benchmark, listOfStocks) + + +if __name__ == "__main__": + main() + + +''' from StockData import StockData from StockReturn import Return listOfStocksData = [] listOfStocksReturn = [] -#numberOfStocks = int(input("How many stocks or mutual funds would you like to analyze? ")) # CHANGE BACK LATER +# numberOfStocks = int(input("How many stocks or mutual funds would you like to analyze? ")) # CHANGE BACK LATER numberOfStocks = 1 for i in range(0, numberOfStocks, 1): - print("Stock", i+1, ": ", end='') - stockName = str(input()) - listOfStocksData.append(i) - listOfStocksData[i] = StockData() - listOfStocksData[i].setName(stockName) - # print(listOfStocksData[i].name) + print("Stock", i+1, ": ", end='') + stockName = str(input()) + listOfStocksData.append(i) + listOfStocksData[i] = StockData() + listOfStocksData[i].setName(stockName) + # print(listOfStocksData[i].name) - #listOfStocksReturn.append(i) - #listOfStocksReturn[i] = StockReturn() + # listOfStocksReturn.append(i) + # listOfStocksReturn[i] = StockReturn() # Decide on a benchmark benchmarkTicker = '' while benchmarkTicker == '': - listOfBenchmarks = ['S&P500', 'DJIA', 'Russell 3000', 'MSCI EAFE'] - listOfBenchmarksTicker = ['SPY', 'DJIA', 'VTHR', 'EFT'] - print('\nList of benchmarks:', listOfBenchmarks) - #benchmark = str(input('Benchmark to compare to: ')) - benchmark = 'S&P500' + listOfBenchmarks = ['S&P500', 'DJIA', 'Russell 3000', 'MSCI EAFE'] + listOfBenchmarksTicker = ['SPY', 'DJIA', 'VTHR', 'EFT'] + print('\nList of benchmarks:', listOfBenchmarks) + # benchmark = str(input('Benchmark to compare to: ')) + benchmark = 'S&P500' - for i in range(0,len(listOfBenchmarks), 1): - if benchmark == listOfBenchmarks[i]: - benchmarkTicker = listOfBenchmarksTicker[i] - i = len(listOfBenchmarks) + for i in range(0,len(listOfBenchmarks), 1): + if benchmark == listOfBenchmarks[i]: + benchmarkTicker = listOfBenchmarksTicker[i] + i = len(listOfBenchmarks) - if benchmarkTicker == '': - print('Benchmark not found. Please type in a benchmark from the list') + if benchmarkTicker == '': + print('Benchmark not found. Please type in a benchmark from the list') print('\n', benchmark, ' (', benchmarkTicker, ')', sep='') @@ -66,10 +520,10 @@ print('Time Frame [years, months]:', timeFrame) sumOfListLengths = 0 for i in range(0, numberOfStocks, 1): - print('\n', listOfStocksData[i].name, sep='') - StockData.main(listOfStocksData[i]) - # Count how many stocks are available - sumOfListLengths = sumOfListLengths + len(StockData.returnAllLists(listOfStocksData[i])) + print('\n', listOfStocksData[i].name, sep='') + StockData.main(listOfStocksData[i]) + # Count how many stocks are available + sumOfListLengths = sumOfListLengths + len(StockData.returnAllLists(listOfStocksData[i])) if sumOfListLengths == 0: print("No sources have data for given stocks") @@ -77,41 +531,40 @@ if sumOfListLengths == 0: # Find return over time using either Jensen's Alpha, Sharpe Ratio, Sortino Ratio, or Treynor Ratio for i in range(0, numberOfStocks, 1): - print('\n', listOfStocksData[i].name, sep='') - #StockReturn.main(listOfStocksReturn[i]) + print('\n', listOfStocksData[i].name, sep='') + # StockReturn.main(listOfStocksReturn[i]) # Runs correlation or regression study # print(listOfStocksData[0].name, listOfStocksData[0].absFirstLastDates, listOfStocksData[0].finalDatesAndClose) indicatorFound = False while indicatorFound == False: - print("1. Expense Ratio\n2. Asset Size\n3. Turnover\n4. Persistence\nWhich indicator would you like to look at? ", end='') - - #indicator = str(input()) # CHANGE BACK TO THIS LATER - indicator = 'Expense Ratio' - print(indicator, end='') + print("1. Expense Ratio\n2. Asset Size\n3. Turnover\n4. Persistence\nWhich indicator would you like to look at? ", end='') - indicatorFound = True - print('\n', end='') + # indicator = str(input()) # CHANGE BACK TO THIS LATER + indicator = 'Expense Ratio' + print(indicator, end='') - if indicator == 'Expense Ratio' or indicator == '1' or indicator == 'expense ratio': - #from ExpenseRatio import ExpenseRatio - print('\nExpense Ratio') + indicatorFound = True + print('\n', end='') - elif indicator == 'Asset Size' or indicator == '2' or indicator == 'asset size': - print('\nAsset Size') + if indicator == 'Expense Ratio' or indicator == '1' or indicator == 'expense ratio': + # from ExpenseRatio import ExpenseRatio + print('\nExpense Ratio') - elif indicator == 'Turnover' or indicator == '3' or indicator == 'turnover': - print('\nTurnover') + elif indicator == 'Asset Size' or indicator == '2' or indicator == 'asset size': + print('\nAsset Size') - elif indicator == 'Persistence' or indicator == '4' or indicator == 'persistence': - print('\nPersistence') + elif indicator == 'Turnover' or indicator == '3' or indicator == 'turnover': + print('\nTurnover') - else: - indicatorFound = False - print('Invalid input, please enter indicator again') + elif indicator == 'Persistence' or indicator == '4' or indicator == 'persistence': + print('\nPersistence') + + else: + indicatorFound = False + print('Invalid input, please enter indicator again') -''' stockName = 'IWV' stock1 = Stock(stockName) print("Finding available dates and close values for", stock1.name) diff --git a/requirements.txt b/requirements.txt index 2bd7352..fe41b2e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -requests==2.21.0 -numpy==1.15.4 \ No newline at end of file +requests~=2.21.0 +numpy~=1.15.4 \ No newline at end of file From bec100cd449064fd5531d3125b86706e5550e653 Mon Sep 17 00:00:00 2001 From: Andrew Dinh Date: Thu, 31 Jan 2019 13:22:24 -0800 Subject: [PATCH 2/6] Remove unneeded files --- ExpenseRatio.py | 30 --- StockData.py | 586 ------------------------------------------------ StockReturn.py | 199 ---------------- 3 files changed, 815 deletions(-) delete mode 100644 ExpenseRatio.py delete mode 100644 StockData.py delete mode 100644 StockReturn.py diff --git a/ExpenseRatio.py b/ExpenseRatio.py deleted file mode 100644 index a64312e..0000000 --- a/ExpenseRatio.py +++ /dev/null @@ -1,30 +0,0 @@ -# ExpenseRatio.py -# Andrew Dinh -# Python 3.6.1 -# Description: -''' -Asks user for expense ratio of stock (I don't think there's an API for expense ratios) -Runs corrrelation study (I'm not sure if I want another class for this or not) -''' - -import numpy -#import urllib2, re -from urllib.request import urlopen -import re - - -class ExpenseRatio: - def __init__(self): - - -def main(): # For testing purposes - ''' - a = [1,2,3] - b = [2,4,6] - c = numpy.corrcoef(a, b)[0, 1] - print(c) - ''' - - -if __name__ == "__main__": - main() diff --git a/StockData.py b/StockData.py deleted file mode 100644 index 41f81f5..0000000 --- a/StockData.py +++ /dev/null @@ -1,586 +0,0 @@ -# StockData.py -# Andrew Dinh -# Python 3.6.1 -# Description: Returns all available dates and prices for each stock requested. - -import json -import requests -from datetime import datetime -''' -Available API's: Can it do mutual funds? -IEX: No -Alpha Vantage (AV): Yes -Tiingo: Yes -Barchart: No -''' - -# Alpha Vantage API Key: O42ICUV58EIZZQMU -# Barchart API Key: a17fab99a1c21cd6f847e2f82b592838 # Possible other one? f40b136c6dc4451f9136bb53b9e70ffa -# Tiingo API Key: 2e72b53f2ab4f5f4724c5c1e4d5d4ac0af3f7ca8 -# Tradier API Key: n26IFFpkOFRVsB5SNTVNXicE5MPD -# If you're going to take these API keys and abuse it, you should really reconsider your life priorities - -apiAV = 'O42ICUV58EIZZQMU' -# apiBarchart = 'a17fab99a1c21cd6f847e2f82b592838' # 150 getHistory queries per day -apiBarchart = 'f40b136c6dc4451f9136bb53b9e70ffa' -apiTiingo = '2e72b53f2ab4f5f4724c5c1e4d5d4ac0af3f7ca8' -apiTradier = 'n26IFFpkOFRVsB5SNTVNXicE5MPD' -''' -Monthly Bandwidth = 5 GB -Hourly Requests = 500 -Daily Requests = 20,000 -Symbol Requests = 500 -''' - - -class StockData: - - def __init__(self, newName='', newAbsFirstLastDates=[], newFinalDatesAndClose=[], newFinalDatesAndClose2=[], newAllLists=[]): - self.name = newName # Name of stock - # Absolute first and last dates from all sources - self.absFirstLastDates = newAbsFirstLastDates - # All available dates with corresponding close values - self.finalDatesAndClose = newFinalDatesAndClose - # After some consideration, I decided to keep what I had already done here and make a new list that's the same except dates are in datetime format - self.finalDatesAndClose2 = newFinalDatesAndClose2 - self.allLists = newAllLists - ''' - Format: - # List from each source containing: [firstDate, lastDate, allDates, values, timeFrame] - # firstDate & lastDate = '2018-12-18' (year-month-date) - allDates = ['2018-12-17', '2018-12-14'] (year-month-date) - values (close) = ['164.6307', 164.6307] - timeFrame = [days, weeks, years] - ''' - - def set(self, newName, newFirstLastDates, newAbsFirstLastDates, newFinalDatesAndClose, newAllLists): - self.name = newName # Name of stock - # Dates that at least 2 sources have (or should it be all?) - Maybe let user decide - self.firstLastDates = newFirstLastDates - # Absolute first and last dates from all sources - self.absFirstLastDates = newAbsFirstLastDates - self.finalDatesAndClose = newFinalDatesAndClose - self.allLists = newAllLists - - def setName(self, newName): - self.name = newName - - def returnName(self): - return self.name - - def returnAllLists(self): - return self.allLists - - def returnAbsFirstLastDates(self): - return self.absFirstLastDates - - def returnAllLists(self): - return self.allLists - - def returnFinalDatesAndClose(self): - return self.finalDatesAndClose - - def returnFinalDatesAndClose2(self): - return self.finalDatesAndClose2 - - def getIEX(self): - url = ''.join( - ('https://api.iextrading.com/1.0/stock/', self.name, '/chart/5y')) - #link = "https://api.iextrading.com/1.0/stock/spy/chart/5y" - print("\nSending request to:", url) - f = requests.get(url) - json_data = f.text - # print(json_data) - if (json_data == 'Unknown symbol'): - print("IEX not available") - return 'Not available' - loaded_json = json.loads(json_data) - listIEX = [] - - print("\nFinding first and last date") - # Adding (firstDate, lastDate) to listIEX - # Find firstDate (comes first) - firstLine = loaded_json[0] - #print("firstLine:", firstLine) - firstDate = firstLine['date'] - # print("firstDate:",firstDate) - # Find lastDate (comes last) - # Returns last value of the list (Equivalent to len(loaded_json)-1) - lastLine = loaded_json[-1] - #print("lastLine:", lastLine) - lastDate = lastLine['date'] - #print("last date:", lastDate) - listIEX.append(firstDate) - listIEX.append(lastDate) - print(listIEX[0], ',', listIEX[1]) - - print("\nFinding all dates given") - allDates = [] -# for i in range(0, len(loaded_json), 1): # If you want to do oldest first - for i in range(len(loaded_json)-1, -1, -1): - line = loaded_json[i] - date = line['date'] - allDates.append(date) - listIEX.append(allDates) - - # print(listIEX[2]) - print(len(listIEX[2]), "dates") - - print("\nFinding close values for each date") - values = [] -# for i in range(0, len(loaded_json), 1): # If you want to do oldest first - for i in range(len(loaded_json)-1, -1, -1): - line = loaded_json[i] - value = line['close'] - values.append(value) - listIEX.append(values) - # print(listIEX[3]) - print(len(listIEX[3]), "close values") - - print("\nFinding time frame given [days, weeks, years]") - timeFrame = [] - d1 = datetime.strptime(firstDate, "%Y-%m-%d") - d2 = datetime.strptime(lastDate, "%Y-%m-%d") - timeFrameDays = abs((d2 - d1).days) - # print(timeFrameDays) - timeFrameYears = float(timeFrameDays / 365) - timeFrameWeeks = float(timeFrameDays / 7) - timeFrame.append(timeFrameDays) - timeFrame.append(timeFrameWeeks) - timeFrame.append(timeFrameYears) - listIEX.append(timeFrame) - print(listIEX[4]) - - return listIEX - - def getAV(self): - listAV = [] - #url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_MONTHLY&symbol=', self.name, '&apikey=', apiAV)) - # https://www.alphavantage.co/query?function=TIME_SERIES_MONTHLY&symbol=MSFT&apikey=demo - - #url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=', self.name, '&outputsize=full&apikey=', apiAV)) - # https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=MSFT&outputsize=full&apikey=demo - - url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=', - self.name, '&outputsize=full&apikey=', apiAV)) - # https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=MSFT&outputsize=full&apikey=demo - - print("\nSending request to:", url) - print("(This will take a while)") - f = requests.get(url) - json_data = f.text - loaded_json = json.loads(json_data) - # print(loaded_json) - - # print(type(loaded_json)) # Dictionary - # print(len(loaded_json)) - if len(loaded_json) == 1: - print("Alpha Vantage not available") - return 'Not available' - - #print(loaded_json['Monthly Time Series']) - dailyTimeSeries = loaded_json['Time Series (Daily)'] - # print(monthlyTimeSeries) - listOfDates = list(dailyTimeSeries) - # print(listOfDates) - - firstDate = listOfDates[-1] - lastDate = listOfDates[0] - #print("firstDate:", firstDate) - #print("lastDate:", lastDate) - listAV.append(firstDate) - listAV.append(lastDate) - listAV.append(listOfDates) - - print("\nFinding first and last date") - print(listAV[0], ',', listAV[1]) - print("\nFinding all dates given") - # print(listAV[2]) - print(len(listAV[2]), "dates") - - print("\nFinding close values for each date") - values = [] - for i in range(0, len(listOfDates), 1): - temp = listOfDates[i] - loaded_json2 = dailyTimeSeries[temp] - #value = loaded_json2['4. close'] - value = loaded_json2['5. adjusted close'] - values.append(value) - listAV.append(values) - # print(listOfDates[0]) - #i = listOfDates[0] - # print(monthlyTimeSeries[i]) - # print(listAV[3]) - print(len(listAV[3]), "close values") - - print("\nFinding time frame given [days, weeks, years]") - timeFrame = [] - d1 = datetime.strptime(firstDate, "%Y-%m-%d") - d2 = datetime.strptime(lastDate, "%Y-%m-%d") - timeFrameDays = abs((d2 - d1).days) - # print(timeFrameDays) - timeFrameYears = float(timeFrameDays / 365) - timeFrameWeeks = float(timeFrameDays / 7) - timeFrame.append(timeFrameDays) - timeFrame.append(timeFrameWeeks) - timeFrame.append(timeFrameYears) - listAV.append(timeFrame) - print(listAV[4]) - - return listAV - - def getTiingo(self): - ''' - #OR we can use the token directly in the url - headers = { - 'Content-Type': 'application/json' - } - requestResponse = requests.get("https://api.tiingo.com/api/test?token=", - headers=headers) - print(requestResponse.json()) - ''' - token = ''.join(('Token ', apiTiingo)) - headers = { - 'Content-Type': 'application/json', - 'Authorization': token - } - url = ''.join(('https://api.tiingo.com/tiingo/daily/', self.name)) - print("\nSending request to:", url) - requestResponse = requests.get(url, headers=headers) - # print(requestResponse.json()) - loaded_json = requestResponse.json() - # print(len(loaded_json)) - if len(loaded_json) == 1: - print("Tiingo not available") - return 'Not available' - # print(loaded_json) - ''' - list1 = list(loaded_json) - for i in range (0, len(list1), 1): - if list1[i] == 'startDate': - startNum = i - elif list1[i] == 'endDate': - endNum = i - print(list1[startNum]) - print(list1[endNum]) - ''' - listTiingo = [] - - print("\nFinding first and last date") - firstDate = loaded_json['startDate'] - lastDate = loaded_json['endDate'] - # print(firstDate) - # print(lastDate) - listTiingo.append(firstDate) - listTiingo.append(lastDate) - print(listTiingo[0], ',', listTiingo[1]) - - print("\nFinding all dates given") - dates = [] - values = [] # Used loop for finding values - url2 = ''.join((url, '/prices?startDate=', - firstDate, '&endDate=', lastDate)) - # https://api.tiingo.com/tiingo/daily//prices?startDate=2012-1-1&endDate=2016-1-1 - print("\nSending request to:", url2) - requestResponse2 = requests.get(url2, headers=headers) - loaded_json2 = requestResponse2.json() - # print(loaded_json2) - # print(len(loaded_json2)) - for i in range(len(loaded_json2)-1, -1, -1): - line = loaded_json2[i] - dateWithTime = line['date'] - temp = dateWithTime.split('T00:00:00.000Z') - date = temp[0] - dates.append(date) - - value = line['close'] - values.append(value) - listTiingo.append(dates) - # print(listTiingo[2]) - print(len(listTiingo[2]), "dates") - - print("Finding close values for each date") - # Used loop from finding dates - listTiingo.append(values) - # print(listTiingo[3]) - print(len(listTiingo[3]), "close values") - - print("Finding time frame given [days, weeks, years]") - timeFrame = [] - d1 = datetime.strptime(firstDate, "%Y-%m-%d") - d2 = datetime.strptime(lastDate, "%Y-%m-%d") - timeFrameDays = abs((d2 - d1).days) - # print(timeFrameDays) - timeFrameYears = float(timeFrameDays / 365) - timeFrameWeeks = float(timeFrameDays / 7) - timeFrame.append(timeFrameDays) - timeFrame.append(timeFrameWeeks) - timeFrame.append(timeFrameYears) - listTiingo.append(timeFrame) - print(listTiingo[4]) - - return listTiingo - - def getFirstLastDate(self, listOfFirstLastDates): - listOfFirstDates = [] - listOfLastDates = [] - # print(len(listOfFirstLastDates)) - for i in range(0, len(listOfFirstLastDates), 1): - firstLastDates = listOfFirstLastDates[i] - firstDate = firstLastDates[0] - lastDate = firstLastDates[1] - listOfFirstDates.append(firstDate) - listOfLastDates.append(lastDate) - # print(listOfFirstDates) - # print(listOfLastDates) - for i in range(0, len(listOfFirstDates), 1): - date = listOfFirstDates[i] - if i == 0: - firstDate = date - yearMonthDay = firstDate.split('-') - firstYear = yearMonthDay[0] - firstMonth = yearMonthDay[1] - firstDay = yearMonthDay[2] - else: - yearMonthDay = date.split('-') - year = yearMonthDay[0] - month = yearMonthDay[1] - day = yearMonthDay[2] - if year < firstYear or (year == firstYear and month < firstMonth) or (year == firstYear and month == firstMonth and day < firstDay): - firstDate = date - firstYear = year - firstMonth = month - firstDay = day - # print(firstDate) - if len(listOfFirstDates) > 1: - for i in range(0, len(listOfLastDates), 1): - date = listOfLastDates[i] - if i == 0: - lastDate = date - yearMonthDay = lastDate.split('-') - lastYear = yearMonthDay[0] - lastMonth = yearMonthDay[1] - lastDay = yearMonthDay[2] - else: - yearMonthDay = date.split('-') - year = yearMonthDay[0] - month = yearMonthDay[1] - day = yearMonthDay[2] - if year > lastYear or (year == lastYear and month > lastMonth) or (year == lastYear and month == lastMonth and day > lastDay): - lastDate = date - lastYear = year - lastMonth = month - lastDay = day - # print(lastDate) - absFirstLastDates = [] - absFirstLastDates.append(firstDate) - absFirstLastDates.append(lastDate) - return absFirstLastDates - - def getFinalDatesAndClose(self): - # finalDates and finalClose will coincide (aka i = 1 will correspond to one another) - finalDatesAndClose = [] # Will combine finalDates then finalClose - finalDates = [] - finalClose = [] - # print(self.absFirstLastDates) - absFirstDate = self.absFirstLastDates[0] - absLastDate = self.absFirstLastDates[1] - date = absFirstDate - - allLists = self.allLists - while date != absLastDate: # DOESN'T DO LAST DATE - tempListOfClose = [] - found = False - for j in range(0, len(allLists), 1): # Look for date in all lists - list1 = allLists[j] - listOfDates = list1[2] - listOfClose = list1[3] - for k in range(0, len(listOfDates), 1): - if listOfDates[k] == date: - if found == False: - finalDates.append(date) - found = True - # print(listOfDates[k]) - # print(listOfClose[k]) - # print(listOfClose) - tempListOfClose.append(float(listOfClose[k])) - k = len(listOfDates) # Dates don't repeat - - if found == True: - sum = 0 - for r in range(0, len(tempListOfClose), 1): - sum = sum + tempListOfClose[r] - close = sum/len(tempListOfClose) - - finalClose.append(close) - # print(close) - - # Go to the next day - yearMonthDay = date.split('-') - year = int(yearMonthDay[0]) - month = int(yearMonthDay[1]) - day = int(yearMonthDay[2]) - - day = day + 1 - if day == 32 and month == 12: # Next year - day = 1 - month = 1 - year = year + 1 - elif day == 32: # Next month - month = month + 1 - day = 1 - if day < 10: - day = ''.join(('0', str(day))) - if month < 10: - month = ''.join(('0', str(month))) - date = ''.join((str(year), '-', str(month), '-', str(day))) - # print(date) - - # For last date - finalDates.append(date) - tempListOfClose = [] - for j in range(0, len(allLists), 1): # Look for date in all lists - list1 = allLists[j] - listOfDates = list1[2] - listOfClose = list1[3] - for k in range(0, len(listOfDates), 1): - if listOfDates[k] == date: - tempListOfClose.append(float(listOfClose[k])) - k = len(listOfDates) # Dates don't repeat - sum = 0 - for r in range(0, len(tempListOfClose), 1): - sum = sum + tempListOfClose[r] - close = sum/len(tempListOfClose) - finalClose.append(close) - # print(finalDates) - # print(finalClose) - - # Want lists from most recent to oldest, comment this out if you don't want that - finalDates = list(reversed(finalDates)) - finalClose = list(reversed(finalClose)) - - finalDatesAndClose.append(finalDates) - finalDatesAndClose.append(finalClose) - return finalDatesAndClose - - def datetimeDates(self): - finalDatesAndClose2 = [] - finalDatesAndClose = self.finalDatesAndClose - finalDatesStrings = finalDatesAndClose[0] - finalClose = finalDatesAndClose[1] - finalDates = [] - - from Functions import Functions - for i in range(0, len(finalDatesStrings), 1): - temp = Functions.stringToDate(finalDatesStrings[i]) - finalDates.append(temp) - # print(finalDates) - - finalDatesAndClose2.append(finalDates) - finalDatesAndClose2.append(finalClose) - return(finalDatesAndClose2) - - def is_connected(): - import socket # To check internet connection - try: - # connect to the host -- tells us if the host is actually - # reachable - socket.create_connection(("www.andrewkdinh.com", 80)) - return True - except OSError: - # pass - print("\nNo internet connection!") - return False - - def main(self): - print('Beginning StockData.py') - - import importlib.util - import sys # To check whether a package is installed - - packages = ['requests'] - for i in range(0, len(packages), 1): - package_name = packages[i] - spec = importlib.util.find_spec(package_name) - if spec is None: - print(package_name + " is not installed\nPlease type in 'pip install -r requirements.txt' to install all required packages") - - # Test internet connection - internetConnection = StockData.is_connected() - if internetConnection == False: - return - - listOfFirstLastDates = [] - self.allLists = [] - - print('\nNOTE: Only IEX and Alpha Vantage support adjusted returns') - print('NOTE: Only Alpha Vantage and Tiingo support mutual fund data') - - # IEX - print("\nIEX") - listIEX = StockData.getIEX(self) - # print(listIEX) - if listIEX != 'Not available': - listOfFirstLastDates.append((listIEX[0], listIEX[1])) - self.allLists.append(listIEX) - - # Alpha Vantage - print("\nAlpha Vantage (AV)") - listAV = StockData.getAV(self) - # print(listAV) - if listAV != 'Not available': - listOfFirstLastDates.append((listAV[0], listAV[1])) - self.allLists.append(listAV) - - # COMMENTED OUT FOR NOW B/C LIMITED - ''' - print("\nTiingo") - print("NOTE: Tiingo does not return adjusted returns!!") - listTiingo = StockData.getTiingo(self) - #print(listTiingo) - if listTiingo != 'Not available': - listOfFirstLastDates.append((listTiingo[0], listTiingo[1])) - self.allLists.append(listTiingo) - ''' - - # print(self.allLists) - # print(listOfFirstLastDates) - if (len(self.allLists) > 0): - print("\n", end='') - print(len(self.allLists), "available source(s) for", self.name) - self.absFirstLastDates = StockData.getFirstLastDate( - self, listOfFirstLastDates) - print("\nThe absolute first date with close values is:", - self.absFirstLastDates[0]) - print("The absolute last date with close values is:", - self.absFirstLastDates[1]) - - print("\nCombining dates and averaging close values") - # Returns [List of Dates, List of Corresponding Close Values] - self.finalDatesAndClose = StockData.getFinalDatesAndClose(self) - #print("All dates available:", self.finalDatesAndClose[0]) - #print("All close values:\n", self.finalDatesAndClose[1]) - finalDates = self.finalDatesAndClose[0] - finalClose = self.finalDatesAndClose[1] - print(len(finalDates), "unique dates:", - finalDates[len(finalDates)-1], "...", finalDates[0]) - print(len(finalClose), "close values:", - finalClose[len(finalClose)-1], "...", finalClose[0]) - - print("\nConverting list of final dates to datetime\n") - self.finalDatesAndClose2 = StockData.datetimeDates(self) - # print(self.finalDatesAndClose2[0][0]) - - else: - print("No sources have data for", self.name) - - -def main(): # For testing purposes - stockName = 'spy' - stock1 = StockData(stockName) - print("Finding available dates and close values for", stock1.name) - StockData.main(stock1) - - -if __name__ == "__main__": - main() diff --git a/StockReturn.py b/StockReturn.py deleted file mode 100644 index 5e8f78a..0000000 --- a/StockReturn.py +++ /dev/null @@ -1,199 +0,0 @@ -# ExpenseRatio.py -# Andrew Dinh -# Python 3.6.7 -# Description: -''' -Calculates return for each stock from the lists from ExpenseRatio.py -listOfReturn = [Unadjusted Return, Sharpe Ratio, Sortino Ratio, Treynor Ratio, Jensen's Alpha] -''' - -from StockData import StockData -import datetime -from Functions import Functions - - -class Return: - def __init__(self, newListOfReturn=[], newTimeFrame=[], newBeta=0, newStandardDeviation=0, newNegativeStandardDeviation=0, newMarketReturn=0, newSize=0, newSizeOfNeg=0, newFirstLastDates=[], newAllLists=[], newAbsFirstLastDates=''): - self.listOfReturn = newListOfReturn - self.timeFrame = newTimeFrame # [years, months (30 days)] - self.beta = newBeta - self.standardDeviation = newStandardDeviation - self.negativeStandardDeviation = newNegativeStandardDeviation - self.marketReturn = newMarketReturn - self.size = newSize - self.sizeOfNeg = newSizeOfNeg - self.firstLastDates = newFirstLastDates - - def returnTimeFrame(self): - return self.timeFrame - - def setTimeFrame(self, newTimeFrame): - self.timeFrame = newTimeFrame - - def getFirstLastDates(self, stock): - firstLastDates = [] - timeFrame = self.timeFrame - firstDate = datetime.datetime.now( - ) - datetime.timedelta(days=timeFrame[0]*365) - firstDate = firstDate - datetime.timedelta(days=timeFrame[1]*30) - firstDate = ''.join( - (str(firstDate.year), '-', str(firstDate.month), '-', str(firstDate.day))) - - lastDate = StockData.returnAbsFirstLastDates(stock)[1] - # print(lastDate) - firstLastDates.append(firstDate) - firstLastDates.append(lastDate) - return firstLastDates - - def getFirstLastDates2(self, stock): - finalDatesAndClose = StockData.returnFinalDatesAndClose(stock) - finalDatesAndClose2 = StockData.returnFinalDatesAndClose2(stock) - firstDate = self.firstLastDates[0] - lastDate = self.firstLastDates[1] - finalDates = finalDatesAndClose[0] - - firstDateExists = False - lastDateExists = False - for i in range(0, len(finalDates), 1): - if finalDates[i] == str(firstDate): - firstDateExists = True - elif finalDates[i] == lastDate: - lastDateExists = True - i = len(finalDates) - - if firstDateExists == False: - print("Could not find first date. Changing first date to closest date") - tempDate = Functions.stringToDate(firstDate) # Change to datetime - print('Original first date:', tempDate) - #tempDate = datetime.date(2014,1,17) - newFirstDate = Functions.getNearest( - finalDatesAndClose2[0], tempDate) - print('New first date:', newFirstDate) - firstDate = str(newFirstDate) - - if lastDateExists == False: - print("Could not find final date. Changing final date to closest date") - tempDate2 = Functions.stringToDate(lastDate) # Change to datetime - print('Original final date:', tempDate2) - #tempDate2 = datetime.date(2014,1,17) - newLastDate = Functions.getNearest( - finalDatesAndClose2[0], tempDate2) - print('New final date:', newLastDate) - lastDate = str(newLastDate) - - firstLastDates = [] - firstLastDates.append(firstDate) - firstLastDates.append(lastDate) - return firstLastDates - - def getUnadjustedReturn(self, stock): - finalDatesAndClose = StockData.returnFinalDatesAndClose(stock) - firstDate = self.firstLastDates[0] - lastDate = self.firstLastDates[1] - finalDates = finalDatesAndClose[0] - finalClose = finalDatesAndClose[1] - - for i in range(0, len(finalDates), 1): - if finalDates[i] == str(firstDate): - firstClose = finalClose[i] - elif finalDates[i] == lastDate: - lastClose = finalClose[i] - i = len(finalDates) - - print('Close values:', firstClose, '...', lastClose) - fullUnadjustedReturn = float(lastClose/firstClose) - unadjustedReturn = fullUnadjustedReturn**( - 1/(self.timeFrame[0]+(self.timeFrame[1])*.1)) - return unadjustedReturn - - def getBeta(self): - # Can be calculated with correlation - import numpy as np - - finalDatesAndClose = StockData.returnFinalDatesAndClose(stock) - firstDate = self.firstLastDates[0] - lastDate = self.firstLastDates[1] - finalDates = finalDatesAndClose[0] - finalClose = finalDatesAndClose[1] - - for i in range(0, len(finalDates), 1): - if finalDates[i] == str(firstDate): - firstClose = finalClose[i] - - # list1 = - list2 = [1, 2, 4, 1] - - print(numpy.corrcoef(list1, list2)[0, 1]) - -# def getStandardDeviation(self, timeFrame): - - def mainBenchmark(self, stock): - print('Beginning StockReturn.py') - - # Find date to start from and last date - self.timeFrame = [] - self.listOfReturn = [] - - print("\nPlease enter a time frame in years: ", end='') - #timeFrameYear = int(input()) - timeFrameYear = 5 - print(timeFrameYear) - self.timeFrame.append(timeFrameYear) - print("Please enter a time frame in months (30 days): ", end='') - #timeFrameMonth = int(input()) - timeFrameMonth = 0 - print(timeFrameMonth) - self.timeFrame.append(timeFrameMonth) - # print(self.timeFrame) - self.firstLastDates = Return.getFirstLastDates(self, stock) - print('Dates: ', self.firstLastDates) - - print('\nMaking sure dates are within list...') - self.firstLastDates = Return.getFirstLastDates2(self, stock) - print('New dates: ', self.firstLastDates) - - print('\nGetting unadjusted return') - unadjustedReturn = Return.getUnadjustedReturn(self, stock) - self.listOfReturn.append(unadjustedReturn) - print('Average annual return for the past', - self.timeFrame[0], 'years and', self.timeFrame[1], 'months: ', end='') - print((self.listOfReturn[0]-1)*100, '%', sep='') - - def main(self, stock): - print('Beginning StockReturn.py') - - # Find date to start from and last date - self.listOfReturn = [] - - self.firstLastDates = Return.getFirstLastDates(self, stock) - print('Dates: ', self.firstLastDates) - - print('\nMaking sure dates are within list...') - self.firstLastDates = Return.getFirstLastDates2(self, stock) - print('New dates: ', self.firstLastDates) - - print('\nGetting unadjusted return') - unadjustedReturn = Return.getUnadjustedReturn(self, stock) - self.listOfReturn.append(unadjustedReturn) - print('Average annual return for the past', - self.timeFrame[0], 'years and', self.timeFrame[1], 'months: ', end='') - print((self.listOfReturn[0]-1)*100, '%', sep='') - - #print('\nGetting beta') - #beta = Return.getBeta(self, stock) - - -def main(): - stockName = 'spy' - stock1 = StockData(stockName) - print("Finding available dates and close values for", stock1.name) - StockData.main(stock1) - - stock1Return = Return() - Return.setTimeFrame(stock1Return, [5, 0]) - - Return.main(stock1Return, stock1) - - -if __name__ == "__main__": - main() From d398924c0d4afe3d62a389c387e4b6b0c2fd5650 Mon Sep 17 00:00:00 2001 From: Andrew Dinh Date: Thu, 14 Feb 2019 12:17:22 -0800 Subject: [PATCH 3/6] Finished basic functionalities --- .gitignore | 3 +- Functions.py | 31 +- main.py | 787 +++++++++++++++++++++++++++++++++++------------ output.bin | 51 +++ requirements.txt | 3 +- test.html | 0 6 files changed, 676 insertions(+), 199 deletions(-) create mode 100644 output.bin create mode 100644 test.html diff --git a/.gitignore b/.gitignore index 1acf435..a204a08 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ __pycache__/StockData.cpython-37.pyc __pycache__/ *.pyc test/ -.vscode/ \ No newline at end of file +.vscode/ +requests_cache.sqlite \ No newline at end of file diff --git a/Functions.py b/Functions.py index ae24926..32aeab4 100644 --- a/Functions.py +++ b/Functions.py @@ -1,21 +1,17 @@ # Python file for general functions + + def getNearest(items, pivot): return min(items, key=lambda x: abs(x - pivot)) + def stringToDate(date): from datetime import datetime #datetime_object = datetime.strptime('Jun 1 2005 1:33PM', '%b %d %Y %I:%M%p') datetime_object = datetime.strptime(date, '%Y-%m-%d').date() return(datetime_object) - ''' - dateSplit = date.split('-') - year = int(dateSplit[0]) - month = int(dateSplit[1]) - day = int(dateSplit[2]) - datetime_object = datetime.date(year, month, day) - ''' - return datetime_object + def removeExtraDatesAndCloseValues(list1, list2): # Returns the two lists but with the extra dates and corresponding close values removed @@ -39,6 +35,25 @@ def removeExtraDatesAndCloseValues(list1, list2): return returnList +def stringIsInt(s): + try: + int(s) + return True + except ValueError: + return False + +def strintIsFloat(s): + try: + float(s) + return True + except ValueError: + return False + +def fromCache(r): + import requests_cache + if r.from_cache == True: + print('(Response taken from cache)') + def main(): exit() diff --git a/main.py b/main.py index 2805c5b..c1a43a0 100644 --- a/main.py +++ b/main.py @@ -1,12 +1,24 @@ -# main.py +# https://github.com/andrewkdinh/fund-indicators +# Determine indicators of overperforming mutual funds # Andrew Dinh # Python 3.6.7 +# Required import requests import json import datetime -import numpy import Functions +import numpy as np + +# Required for linear regression +import matplotlib.pyplot as plt +import sys + +# Optional +import requests_cache +# https://requests-cache.readthedocs.io/en/lates/user_guide.html +requests_cache.install_cache( + 'requests_cache', backend='sqlite', expire_after=43200) # 12 hours # API Keys apiAV = 'O42ICUV58EIZZQMU' @@ -14,12 +26,14 @@ apiAV = 'O42ICUV58EIZZQMU' apiBarchart = 'f40b136c6dc4451f9136bb53b9e70ffa' apiTiingo = '2e72b53f2ab4f5f4724c5c1e4d5d4ac0af3f7ca8' apiTradier = 'n26IFFpkOFRVsB5SNTVNXicE5MPD' +apiQuandl = 'KUh3U3hxke9tCimjhWEF' +# apiIntrinio = 'OmNmN2E5YWI1YzYxN2Q4NzEzZDhhOTgwN2E2NWRhOWNl' # If you're going to take these API keys and abuse it, you should really reconsider your life priorities ''' API Keys: Alpha Vantage API Key: O42ICUV58EIZZQMU - Barchart API Key: a17fab99a1c21cd6f847e2f82b592838 + Barchart API Key: a17fab99a1c21cd6f847e2f82b592838 Possible other one? f40b136c6dc4451f9136bb53b9e70ffa 150 getHistory queries per day Tiingo API Key: 2e72b53f2ab4f5f4724c5c1e4d5d4ac0af3f7ca8 @@ -28,20 +42,35 @@ API Keys: Hourly Requests = 500 Daily Requests = 20,000 Symbol Requests = 500 + Quandl API Key: KUh3U3hxke9tCimjhWEF + Intrinio API Key: OmNmN2E5YWI1YzYxN2Q4NzEzZDhhOTgwN2E2NWRhOWNl - Mutual funds: + Mutual funds? Yes: Alpha Vantage, Tiingo No: IEX, Barchart + + Adjusted? + Yes: Alpha Vantage, IEX + No: Tiingo ''' class Stock: # GLOBAL VARIABLES - timeFrame = [] + timeFrame = 0 + riskFreeRate = 0 + indicator = '' + + # BENCHMARK VALUES benchmarkDates = [] benchmarkCloseValues = [] - benchmarkUnadjustedReturn = 0 + benchmarkAverageAnnualReturn = 0 + benchmarkStandardDeviation = 0 + + # INDICATOR VALUES + indicatorCorrelation = [] + indicatorRegression = [] def __init__(self): # BASIC DATA @@ -54,24 +83,20 @@ class Stock: self.closeValuesMatchBenchmark = [] # CALCULATED RETURN - self.unadjustedReturn = 0 - self.sortino = 0 + self.averageAnnualReturn = 0 + self.annualReturn = [] self.sharpe = 0 + self.sortino = 0 self.treynor = 0 self.alpha = 0 self.beta = 0 self.standardDeviation = 0 - self.negStandardDeviation = 0 + self.downsideDeviation = 0 + self.kurtosis = 0 + self.skewness = 0 # Not sure if I need this + self.linearRegression = [] # for y=mx+b, this list has [m,b] - # INDICATOR VALUES - self.expenseRatio = 0 - self.assetSize = 0 - self.turnover = 0 - self.persistence = [] # [Years, Months] - - # CALCULATED VALUES FOR INDICATORS - self.correlation = 0 - self.regression = 0 + self.indicatorValue = '' def setName(self, newName): self.name = newName @@ -89,9 +114,10 @@ class Stock: print('IEX') url = ''.join( ('https://api.iextrading.com/1.0/stock/', self.name, '/chart/5y')) - #link = "https://api.iextrading.com/1.0/stock/spy/chart/5y" + # link = "https://api.iextrading.com/1.0/stock/spy/chart/5y" print("\nSending request to:", url) f = requests.get(url) + Functions.fromCache(f) json_data = f.text if json_data == 'Unknown symbol' or f.status_code == 404: print("IEX not available") @@ -129,8 +155,8 @@ class Stock: # https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=MSFT&outputsize=full&apikey=demo print("\nSending request to:", url) - print("(This will take a while)") f = requests.get(url) + Functions.fromCache(f) json_data = f.text loaded_json = json.loads(json_data) @@ -148,9 +174,9 @@ class Stock: for i in range(0, len(listOfDates), 1): temp = listOfDates[i] loaded_json2 = dailyTimeSeries[temp] - #value = loaded_json2['4. close'] + # value = loaded_json2['4. close'] value = loaded_json2['5. adjusted close'] - values.append(value) + values.append(float(value)) # listAV.append(values) listAV.append(list(reversed(values))) print(len(listAV[1]), "close values") @@ -167,8 +193,9 @@ class Stock: url = ''.join(('https://api.tiingo.com/tiingo/daily/', self.name)) print("\nSending request to:", url) f = requests.get(url, headers=headers) + Functions.fromCache(f) loaded_json = f.json() - if len(loaded_json) == 1 or f.status_code == 404: + if len(loaded_json) == 1 or f.status_code == 404 or loaded_json['startDate'] == None: print("Tiingo not available") return 'Not available' @@ -187,6 +214,7 @@ class Stock: # https://api.tiingo.com/tiingo/daily//prices?startDate=2012-1-1&endDate=2016-1-1 print("\nSending request to:", url2, '\n') requestResponse2 = requests.get(url2, headers=headers) + Functions.fromCache(requestResponse2) loaded_json2 = requestResponse2.json() for i in range(0, len(loaded_json2)-1, 1): line = loaded_json2[i] @@ -210,12 +238,12 @@ class Stock: def datesAndClose(self): print('\n', Stock.getName(self), sep='') - # sourceList = ['AV', 'Tiingo', 'IEX'] # Change back to this later - sourceList = ['Tiingo', 'IEX', 'AV'] + sourceList = ['AV', 'IEX', 'Tiingo'] + # sourceList = ['IEX', 'Tiingo', 'AV'] # Use each source until you get a value for j in range(0, len(sourceList), 1): source = sourceList[j] - print('\nSource being used: ', source) + print('\nSource being used:', source) if source == 'AV': datesAndCloseList = Stock.AV(self) @@ -227,11 +255,11 @@ class Stock: if datesAndCloseList != 'Not available': break else: - #print(sourceList[j], 'does not have data available') if j == len(sourceList)-1: print('\nNo sources have data for', self.name) - return - # FIGURE OUT WHAT TO DO HERE + print('Removing', self.name, + 'from list of stocks to ensure compatibility later') + return 'Not available' # Convert dates to datetime allDates = datesAndCloseList[0] @@ -241,7 +269,7 @@ class Stock: return datesAndCloseList - def datesAndClose2(self): + def datesAndCloseFitTimeFrame(self): print('Shortening list to fit time frame') # Have to do this because if I just make dates = self.allDates & closeValues = self.allCloseValues, then deleting from dates & closeValues also deletes it from self.allDates & self.allCloseValues (I'm not sure why) dates = [] @@ -251,9 +279,8 @@ class Stock: closeValues.append(self.allCloseValues[i]) firstDate = datetime.datetime.now().date() - datetime.timedelta( - days=self.timeFrame[0]*365) - datetime.timedelta(days=self.timeFrame[1]*30) - print('\n', self.timeFrame[0], ' years and ', - self.timeFrame[1], ' months ago: ', firstDate, sep='') + days=self.timeFrame*365) + print('\n', self.timeFrame, ' years ago: ', firstDate, sep='') closestDate = Functions.getNearest(dates, firstDate) if closestDate != firstDate: print('Closest date available for', self.name, ':', closestDate) @@ -278,21 +305,199 @@ class Stock: return datesAndCloseList2 - def unadjustedReturn(self): - unadjustedReturn = (float(self.closeValues[len( - self.closeValues)-1]/self.closeValues[0])**(1/(self.timeFrame[0]+(self.timeFrame[1])*.1)))-1 - print('Annual unadjusted return:', unadjustedReturn) - return unadjustedReturn + def calcAverageAnnualReturn(self): # pylint: disable=E0202 + # averageAnnualReturn = (float(self.closeValues[len(self.closeValues)-1]/self.closeValues[0])**(1/(self.timeFrame)))-1 + # averageAnnualReturn = averageAnnualReturn * 100 + averageAnnualReturn = sum(self.annualReturn)/self.timeFrame + print('Average annual return:', averageAnnualReturn) + return averageAnnualReturn - def beta(self, benchmarkMatchDatesAndCloseValues): - beta = numpy.corrcoef(self.closeValuesMatchBenchmark, - benchmarkMatchDatesAndCloseValues[1])[0, 1] + def calcAnnualReturn(self): + annualReturn = [] + + # Calculate annual return in order from oldest to newest + annualReturn = [] + for i in range(0, self.timeFrame, 1): + firstDate = datetime.datetime.now().date() - datetime.timedelta( + days=(self.timeFrame-i)*365) + secondDate = datetime.datetime.now().date() - datetime.timedelta( + days=(self.timeFrame-i-1)*365) + + # Find closest dates to firstDate and lastDate + firstDate = Functions.getNearest(self.dates, firstDate) + secondDate = Functions.getNearest(self.dates, secondDate) + + if firstDate == secondDate: + print('Closest date is', firstDate, + 'which is after the given time frame.') + return 'Not available' + + # Get corresponding close values and calculate annual return + for i in range(0, len(self.dates), 1): + if self.dates[i] == firstDate: + firstClose = self.closeValues[i] + elif self.dates[i] == secondDate: + secondClose = self.closeValues[i] + break + + annualReturnTemp = (secondClose/firstClose)-1 + annualReturnTemp = annualReturnTemp * 100 + annualReturn.append(annualReturnTemp) + + print('Annual return over the past', + self.timeFrame, 'years:', annualReturn) + return annualReturn + + def calcCorrelation(self, closeList): + correlation = np.corrcoef( + self.closeValuesMatchBenchmark, closeList)[0, 1] + print('Correlation with benchmark:', correlation) + return correlation + + def calcStandardDeviation(self): + numberOfValues = self.timeFrame + mean = self.averageAnnualReturn + standardDeviation = ( + (sum((self.annualReturn[x]-mean)**2 for x in range(0, numberOfValues, 1)))/(numberOfValues-1))**(1/2) + print('Standard Deviation:', standardDeviation) + return standardDeviation + + def calcDownsideDeviation(self): + numberOfValues = self.timeFrame + targetReturn = self.averageAnnualReturn + downsideDeviation = ( + (sum(min(0, (self.annualReturn[x]-targetReturn))**2 for x in range(0, numberOfValues, 1)))/(numberOfValues-1))**(1/2) + print('Downside Deviation:', downsideDeviation) + return downsideDeviation + + def calcKurtosis(self): + numberOfValues = self.timeFrame + mean = self.averageAnnualReturn + kurtosis = (sum((self.annualReturn[x]-mean)**4 for x in range( + 0, numberOfValues, 1)))/((numberOfValues-1)*(self.standardDeviation ** 4)) + print('Kurtosis:', kurtosis) + return kurtosis + + def calcSkewness(self): + numberOfValues = self.timeFrame + mean = self.averageAnnualReturn + skewness = (sum((self.annualReturn[x]-mean)**3 for x in range( + 0, numberOfValues, 1)))/((numberOfValues-1)*(self.standardDeviation ** 3)) + print('Skewness:', skewness) + return skewness + + def calcBeta(self): + beta = self.correlation * \ + (self.standardDeviation/Stock.benchmarkStandardDeviation) print('Beta:', beta) return beta + def calcAlpha(self): + alpha = self.averageAnnualReturn - \ + (Stock.riskFreeRate+((Stock.benchmarkAverageAnnualReturn - + Stock.riskFreeRate) * self.beta)) + print('Alpha:', alpha) + return alpha + + def calcSharpe(self): + sharpe = (self.averageAnnualReturn - Stock.riskFreeRate) / \ + self.standardDeviation + print('Sharpe Ratio:', sharpe) + return sharpe + + def calcSortino(self): + sortino = (self.averageAnnualReturn - self.riskFreeRate) / \ + self.downsideDeviation + print('Sortino Ratio:', sortino) + return sortino + + def calcTreynor(self): + treynor = (self.averageAnnualReturn - Stock.riskFreeRate)/self.beta + print('Treynor Ratio:', treynor) + return treynor + + def calcLinearRegression(self): + dates = self.dates + y = self.closeValues + + # First change dates to integers (days from first date) + x = datesToDays(dates) + + x = np.array(x) + y = np.array(y) + + # Estimate coefficients + # number of observations/points + n = np.size(x) + + # mean of x and y vector + m_x, m_y = np.mean(x), np.mean(y) + + # calculating cross-deviation and deviation about x + SS_xy = np.sum(y*x) - n*m_y*m_x + SS_xx = np.sum(x*x) - n*m_x*m_x + + # calculating regression coefficients + b_1 = SS_xy / SS_xx + b_0 = m_y - b_1*m_x + + b = [b_0, b_1] + + formula = ''.join( + ('y = ', str(round(float(b[0]), 2)), 'x + ', str(round(float(b[1]), 2)))) + print('Linear regression formula:', formula) + + # Stock.plot_regression_line(self, x, y, b) + + regression = [] + regression.append(b[0]) + regression.append(b[1]) + return regression + + def plot_regression_line(self, x, y, b): + # plotting the actual points as scatter plot + plt.scatter(self.dates, y, color="m", + marker="o", s=30) + + # predicted response vector + y_pred = b[0] + b[1]*x + + # plotting the regression line + plt.plot(self.dates, y_pred, color="g") + + # putting labels + plt.title(self.name) + plt.xlabel('Dates') + plt.ylabel('Close Values') + + # function to show plot + plt.show(block=False) + for i in range(3, 0, -1): + if i == 1: + sys.stdout.write('Keeping plot open for ' + + str(i) + ' second \r') + else: + sys.stdout.write('Keeping plot open for ' + + str(i) + ' seconds \r') + plt.pause(1) + sys.stdout.flush() + plt.close() + + +def datesToDays(dates): + days = [] + firstDate = dates[0] + days.append(0) + for i in range(1, len(dates), 1): + # Calculate days from first date to current date + daysDiff = (dates[i]-firstDate).days + days.append(daysDiff) + return days + def isConnected(): import socket # To check internet connection + print('Checking internet connection') try: # connect to the host -- tells us if the host is actually reachable socket.create_connection(("www.andrewkdinh.com", 80)) @@ -327,17 +532,31 @@ def benchmarkInit(): while benchmarkTicker == '': benchmarks = ['S&P500', 'DJIA', 'Russell 3000', 'MSCI EAFE'] benchmarksTicker = ['SPY', 'DJIA', 'VTHR', 'EFT'] - print('\nList of benchmarks:', benchmarks) - - # benchmark = str(input('Benchmark to compare to: ')) - benchmark = 'S&P500' - + print('\nList of benchmarks:') for i in range(0, len(benchmarks), 1): - if benchmark == benchmarks[i]: - benchmarkTicker = benchmarksTicker[i] + print(str(i+1) + '. ' + + benchmarks[i] + ' (' + benchmarksTicker[i] + ')') + + benchmark = str(input('Please choose a benchmark from the list: ')) + # benchmark = 'SPY' # TESTING + + if Functions.stringIsInt(benchmark) == True: + if int(benchmark) <= len(benchmarks): + benchmarkInt = int(benchmark) + benchmark = benchmarks[benchmarkInt-1] + benchmarkTicker = benchmarksTicker[benchmarkInt-1] + else: + for i in range(0, len(benchmarks), 1): + if benchmark == benchmarks[i]: + benchmarkTicker = benchmarksTicker[i] + break + if benchmark == benchmarksTicker[i] or benchmark == benchmarksTicker[i].lower(): + benchmark = benchmarks[i] + benchmarkTicker = benchmarksTicker[i] + break if benchmarkTicker == '': - print('Benchmark not found. Please type in a benchmark from the list') + print('Benchmark not found. Please use a benchmark from the list') print(benchmark, ' (', benchmarkTicker, ')', sep='') @@ -350,18 +569,31 @@ def benchmarkInit(): def stocksInit(): listOfStocks = [] - # numberOfStocks = int(input('\nHow many stocks/mutual funds/ETFs would you like to analyze? ')) - numberOfStocks = 1 + isInteger = False + while isInteger == False: + temp = input('\nNumber of stocks to analyze (2 minimum): ') + isInteger = Functions.stringIsInt(temp) + if isInteger == True: + numberOfStocks = int(temp) + else: + print('Please type an integer') - print('\nHow many stocks/mutual funds/ETFs would you like to analyze? ', numberOfStocks) + # numberOfStocks = 5 # TESTING + # print('How many stocks would you like to analyze? ', numberOfStocks) + + print('\nThis program can analyze stocks (GOOGL), mutual funds (VFINX), and ETFs (SPY)') + print('For simplicity, all of them will be referred to as "stock"\n') + + # listOfGenericStocks = ['googl', 'aapl', 'vfinx', 'tsla', 'vthr'] for i in range(0, numberOfStocks, 1): - print('Stock', i + 1, ': ', end='') - #stockName = str(input()) + print('Stock', i + 1, end=' ') + stockName = str(input('ticker: ')) - stockName = 'FBGRX' - print(stockName) + # stockName = listOfGenericStocks[i] + # print(':', stockName) + stockName = stockName.upper() listOfStocks.append(stockName) listOfStocks[i] = Stock() listOfStocks[i].setName(stockName) @@ -370,49 +602,94 @@ def stocksInit(): def timeFrameInit(): - print('\nPlease enter the time frame in years and months (30 days)') - print("Years: ", end='') - #years = int(input()) - years = 5 - print(years) - print("Months: ", end='') - #months = int(input()) - months = 0 - print(months) + isInteger = False + while isInteger == False: + print( + '\nPlease enter the time frame in years (10 years or less recommended):', end='') + temp = input(' ') + isInteger = Functions.stringIsInt(temp) + if isInteger == True: + years = int(temp) + else: + print('Please type an integer') - timeFrame = [] - timeFrame.append(years) - timeFrame.append(months) + # years = 5 # TESTING + # print('Years:', years) + + timeFrame = years return timeFrame def dataMain(listOfStocks): print('\nGathering dates and close values') - for i in range(0, len(listOfStocks), 1): + i = 0 + while i < len(listOfStocks): datesAndCloseList = Stock.datesAndClose(listOfStocks[i]) - listOfStocks[i].allDates = datesAndCloseList[0] - listOfStocks[i].allCloseValues = datesAndCloseList[1] + if datesAndCloseList == 'Not available': + del listOfStocks[i] + if len(listOfStocks) == 0: + print('No stocks to analyze. Ending program') + exit() + else: + listOfStocks[i].allDates = datesAndCloseList[0] + listOfStocks[i].allCloseValues = datesAndCloseList[1] - # Clip list to fit time frame - datesAndCloseList2 = Stock.datesAndClose2(listOfStocks[i]) - listOfStocks[i].dates = datesAndCloseList2[0] - listOfStocks[i].closeValues = datesAndCloseList2[1] + # Clip list to fit time frame + datesAndCloseList2 = Stock.datesAndCloseFitTimeFrame( + listOfStocks[i]) + listOfStocks[i].dates = datesAndCloseList2[0] + listOfStocks[i].closeValues = datesAndCloseList2[1] + + i += 1 + + +def riskFreeRate(): + print('Quandl') + url = ''.join( + ('https://www.quandl.com/api/v3/datasets/USTREASURY/LONGTERMRATES.json?api_key=', apiQuandl)) + # https://www.quandl.com/api/v3/datasets/USTREASURY/LONGTERMRATES.json?api_key=KUh3U3hxke9tCimjhWEF + + print("\nSending request to:", url) + f = requests.get(url) + Functions.fromCache(f) + json_data = f.text + loaded_json = json.loads(json_data) + riskFreeRate = (loaded_json['dataset']['data'][0][1])/100 + riskFreeRate = riskFreeRate * 100 + riskFreeRate = round(riskFreeRate, 2) + print('Risk-free rate:', riskFreeRate, end='\n\n') + + if f.status_code == 404: + print("Quandl not available") + print('Returning 2.50 as risk-free rate', end='\n\n') + # return 0.0250 + return 2.50 + + return riskFreeRate def returnMain(benchmark, listOfStocks): print('\nCalculating unadjusted return, Sharpe ratio, Sortino ratio, and Treynor ratio\n') - print(benchmark.name) - benchmark.unadjustedReturn = Stock.unadjustedReturn(benchmark) + print('Getting risk-free rate from current 10-year treasury bill rates', end='\n\n') + Stock.riskFreeRate = riskFreeRate() + print(benchmark.name, end='\n\n') + benchmark.annualReturn = Stock.calcAnnualReturn(benchmark) + if benchmark.annualReturn == 'Not available': + print('Please use a lower time frame\nEnding program') + exit() + benchmark.averageAnnualReturn = Stock.calcAverageAnnualReturn(benchmark) + benchmark.standardDeviation = Stock.calcStandardDeviation(benchmark) # Make benchmark data global - # Maybe remove this later Stock.benchmarkDates = benchmark.dates Stock.benchmarkCloseValues = benchmark.closeValues - Stock.benchmarkUnadjustedReturn = benchmark.unadjustedReturn + Stock.benchmarkAverageAnnualReturn = benchmark.averageAnnualReturn + Stock.benchmarkStandardDeviation = benchmark.standardDeviation - for i in range(0, len(listOfStocks), 1): - print(listOfStocks[i].name) + i = 0 + while i < len(listOfStocks): + print('\n' + listOfStocks[i].name, end='\n\n') # Make sure each date has a value for both the benchmark and the stock list1 = [] @@ -426,10 +703,241 @@ def returnMain(benchmark, listOfStocks): listOfStocks[i].closeValuesMatchBenchmark = temp[0][1] benchmarkMatchDatesAndCloseValues = temp[1] - listOfStocks[i].unadjustedReturn = Stock.unadjustedReturn( - listOfStocks[i]) - listOfStocks[i].beta = Stock.beta( - listOfStocks[i], benchmarkMatchDatesAndCloseValues) + # Calculate everything for each stock + listOfStocks[i].annualReturn = Stock.calcAnnualReturn(listOfStocks[i]) + if listOfStocks[i].annualReturn == 'Not available': + print('Removing', listOfStocks[i].name, 'from list of stocks') + del listOfStocks[i] + if len(listOfStocks) == 0: + print('No stocks to analyze. Ending program') + exit() + else: + listOfStocks[i].averageAnnualReturn = Stock.calcAverageAnnualReturn( + listOfStocks[i]) + listOfStocks[i].correlation = Stock.calcCorrelation( + listOfStocks[i], benchmarkMatchDatesAndCloseValues[1]) + listOfStocks[i].standardDeviation = Stock.calcStandardDeviation( + listOfStocks[i]) + listOfStocks[i].downsideDeviation = Stock.calcDownsideDeviation( + listOfStocks[i]) + listOfStocks[i].kurtosis = Stock.calcKurtosis( + listOfStocks[i]) + listOfStocks[i].skewness = Stock.calcSkewness( + listOfStocks[i]) + listOfStocks[i].beta = Stock.calcBeta(listOfStocks[i]) + listOfStocks[i].alpha = Stock.calcAlpha(listOfStocks[i]) + listOfStocks[i].sharpe = Stock.calcSharpe(listOfStocks[i]) + listOfStocks[i].sortino = Stock.calcSortino(listOfStocks[i]) + listOfStocks[i].treynor = Stock.calcTreynor(listOfStocks[i]) + listOfStocks[i].linearRegression = Stock.calcLinearRegression( + listOfStocks[i]) + + i += 1 + + print('\nNumber of stocks from original list that fit time frame:', + len(listOfStocks)) + + +def indicatorInit(): + # Runs correlation or regression study + indicatorFound = False + listOfIndicators = ['Expense Ratio', + 'Market Capitalization', 'Turnover', 'Persistence'] + print('\n', end='') + while indicatorFound == False: + print('List of indicators:') + for i in range(0, len(listOfIndicators), 1): + print(str(i + 1) + '. ' + listOfIndicators[i]) + + indicator = str(input('Choose an indicator from the list: ')) + + # indicator = 'expense ratio' # TESTING + + if Functions.stringIsInt(indicator) == True: + if int(indicator) <= 4: + indicator = listOfIndicators[int(indicator)-1] + indicatorFound = True + else: + indicatorFormats = [ + indicator.upper(), indicator.lower(), indicator.title()] + for i in range(0, len(indicatorFormats), 1): + for j in range(0, len(listOfIndicators), 1): + if listOfIndicators[j] == indicatorFormats[i]: + indicator = listOfIndicators[j] + indicatorFound = True + break + + if indicatorFound == False: + print('Please choose an indicator from the list') + + return indicator + + +def calcIndicatorCorrelation(listOfIndicatorValues, listOfReturns): + correlationList = [] + for i in range(0, len(listOfReturns), 1): + correlation = np.corrcoef( + listOfIndicatorValues, listOfReturns[i])[0, 1] + correlationList.append(correlation) + return correlationList + + +def calcIndicatorRegression(listOfIndicatorValues, listOfReturns): + regressionList = [] + x = np.array(listOfIndicatorValues) + for i in range(0, len(listOfReturns), 1): + y = np.array(listOfReturns[i]) + + # Estimate coefficients + # number of observations/points + n = np.size(x) + + # mean of x and y vector + m_x, m_y = np.mean(x), np.mean(y) + + # calculating cross-deviation and deviation about x + SS_xy = np.sum(y*x) - n*m_y*m_x + SS_xx = np.sum(x*x) - n*m_x*m_x + + # calculating regression coefficients + b_1 = SS_xy / SS_xx + b_0 = m_y - b_1*m_x + + b = [b_0, b_1] + + regression = [] + regression.append(b[0]) + regression.append(b[1]) + regressionList.append(regression) + + # plot_regression_line(x, y, b, i) + + return regressionList + + +def plot_regression_line(x, y, b, i): + # plotting the actual points as scatter plot + plt.scatter(x, y, color="m", + marker="o", s=30) + + # predicted response vector + y_pred = b[0] + b[1]*x + + # plotting the regression line + plt.plot(x, y_pred, color="g") + + # putting labels + listOfReturnStrings = ['Average Annual Return', + 'Sharpe Ratio', 'Sortino Ratio', 'Treynor Ratio', 'Alpha'] + + plt.title(Stock.indicator + ' and ' + listOfReturnStrings[i]) + if Stock.indicator == 'Expense Ratio': + plt.xlabel(Stock.indicator + ' (%)') + else: + plt.xlabel(Stock.indicator) + + if i == 0: + plt.ylabel(listOfReturnStrings[i] + ' (%)') + else: + plt.ylabel(listOfReturnStrings[i]) + + # function to show plot + plt.show(block=False) + for i in range(2, 0, -1): + if i == 1: + sys.stdout.write('Keeping plot open for ' + + str(i) + ' second \r') + else: + sys.stdout.write('Keeping plot open for ' + + str(i) + ' seconds \r') + plt.pause(1) + sys.stdout.flush() + sys.stdout.write( + ' \r') + sys.stdout.flush() + plt.close() + + +def indicatorMain(listOfStocks): + Stock.indicator = indicatorInit() + print(Stock.indicator, end='\n\n') + + # indicatorValuesGenericExpenseRatio = [2.5, 4.3, 3.1, 2.6, 4.2] # TESTING + + listOfStocksIndicatorValues = [] + for i in range(0, len(listOfStocks), 1): + indicatorValueFound = False + while indicatorValueFound == False: + if Stock.indicator == 'Expense Ratio': + indicatorValue = str( + input(Stock.indicator + ' for ' + listOfStocks[i].name + ' (%): ')) + elif Stock.indicator == 'Persistence': + indicatorValue = str( + input(Stock.indicator + ' for ' + listOfStocks[i].name + ' (years): ')) + elif Stock.indicator == 'Turnover': + indicatorValue = str(input( + Stock.indicator + ' for ' + listOfStocks[i].name + ' in the last ' + str(Stock.timeFrame) + ' years: ')) + elif Stock.indicator == 'Market Capitalization': + indicatorValue = str( + input(Stock.indicator + ' of ' + listOfStocks[i].name + ': ')) + else: + print('Something is wrong. Indicator was not found. Ending program.') + exit() + + if Functions.strintIsFloat(indicatorValue) == True: + listOfStocks[i].indicatorValue = float(indicatorValue) + indicatorValueFound = True + else: + print('Please enter a number') + + # listOfStocks[i].indicatorValue = indicatorValuesGenericExpenseRatio[i] # TESTING + listOfStocksIndicatorValues.append(listOfStocks[i].indicatorValue) + + listOfReturns = [] # A list that matches the above list with return values [[averageAnnualReturn1, aAR2, aAR3], [sharpe1, sharpe2, sharpe3], etc.] + tempListOfReturns = [] + for i in range(0, len(listOfStocks), 1): + tempListOfReturns.append(listOfStocks[i].averageAnnualReturn) + listOfReturns.append(tempListOfReturns) + tempListOfReturns = [] + for i in range(0, len(listOfStocks), 1): + tempListOfReturns.append(listOfStocks[i].sharpe) + listOfReturns.append(tempListOfReturns) + tempListOfReturns = [] + for i in range(0, len(listOfStocks), 1): + tempListOfReturns.append(listOfStocks[i].sortino) + listOfReturns.append(tempListOfReturns) + tempListOfReturns = [] + for i in range(0, len(listOfStocks), 1): + tempListOfReturns.append(listOfStocks[i].treynor) + listOfReturns.append(tempListOfReturns) + tempListOfReturns = [] + for i in range(0, len(listOfStocks), 1): + tempListOfReturns.append(listOfStocks[i].alpha) + listOfReturns.append(tempListOfReturns) + + # Create list of each indicator (e.g. expense ratio) + listOfIndicatorValues = [] + for i in range(0, len(listOfStocks), 1): + listOfIndicatorValues.append(listOfStocks[i].indicatorValue) + + Stock.indicatorCorrelation = calcIndicatorCorrelation( + listOfIndicatorValues, listOfReturns) + + listOfReturnStrings = ['Average Annual Return', + 'Sharpe Ratio', 'Sortino Ratio', 'Treynor Ratio', 'Alpha'] + print('\n', end='') + for i in range(0, len(Stock.indicatorCorrelation), 1): + print('Correlation with ' + Stock.indicator.lower() + ' and ' + + listOfReturnStrings[i].lower() + ': ' + str(Stock.indicatorCorrelation[i])) + + Stock.indicatorRegression = calcIndicatorRegression( + listOfIndicatorValues, listOfReturns) + print('\n', end='') + for i in range(0, len(Stock.indicatorCorrelation), 1): + formula = ''.join( + ('y = ', str(round(float(Stock.indicatorRegression[i][0]), 2)), 'x + ', str(round(float(Stock.indicatorRegression[i][1]), 2)))) + print('Linear regression equation for ' + Stock.indicator.lower() + ' and ' + + listOfReturnStrings[i].lower() + ': ' + formula) def main(): @@ -463,110 +971,11 @@ def main(): # Calculate return for benchmark and stock(s) returnMain(benchmark, listOfStocks) + # Choose indicator and calculate correlation with indicator + indicatorMain(listOfStocks) + + exit() + if __name__ == "__main__": main() - - -''' -from StockData import StockData -from StockReturn import Return - -listOfStocksData = [] -listOfStocksReturn = [] -# numberOfStocks = int(input("How many stocks or mutual funds would you like to analyze? ")) # CHANGE BACK LATER -numberOfStocks = 1 -for i in range(0, numberOfStocks, 1): - print("Stock", i+1, ": ", end='') - stockName = str(input()) - listOfStocksData.append(i) - listOfStocksData[i] = StockData() - listOfStocksData[i].setName(stockName) - # print(listOfStocksData[i].name) - - # listOfStocksReturn.append(i) - # listOfStocksReturn[i] = StockReturn() - - -# Decide on a benchmark -benchmarkTicker = '' -while benchmarkTicker == '': - listOfBenchmarks = ['S&P500', 'DJIA', 'Russell 3000', 'MSCI EAFE'] - listOfBenchmarksTicker = ['SPY', 'DJIA', 'VTHR', 'EFT'] - print('\nList of benchmarks:', listOfBenchmarks) - # benchmark = str(input('Benchmark to compare to: ')) - benchmark = 'S&P500' - - for i in range(0,len(listOfBenchmarks), 1): - if benchmark == listOfBenchmarks[i]: - benchmarkTicker = listOfBenchmarksTicker[i] - i = len(listOfBenchmarks) - - if benchmarkTicker == '': - print('Benchmark not found. Please type in a benchmark from the list') - -print('\n', benchmark, ' (', benchmarkTicker, ')', sep='') - -benchmarkName = str(benchmark) -benchmark = StockData() -benchmark.setName(benchmarkName) -StockData.main(benchmark) - -benchmarkReturn = Return() -Return.mainBenchmark(benchmarkReturn, benchmark) - -timeFrame = Return.returnTimeFrame(benchmarkReturn) -print('Time Frame [years, months]:', timeFrame) - -sumOfListLengths = 0 -for i in range(0, numberOfStocks, 1): - print('\n', listOfStocksData[i].name, sep='') - StockData.main(listOfStocksData[i]) - # Count how many stocks are available - sumOfListLengths = sumOfListLengths + len(StockData.returnAllLists(listOfStocksData[i])) - -if sumOfListLengths == 0: - print("No sources have data for given stocks") - exit() - -# Find return over time using either Jensen's Alpha, Sharpe Ratio, Sortino Ratio, or Treynor Ratio -for i in range(0, numberOfStocks, 1): - print('\n', listOfStocksData[i].name, sep='') - # StockReturn.main(listOfStocksReturn[i]) - - -# Runs correlation or regression study -# print(listOfStocksData[0].name, listOfStocksData[0].absFirstLastDates, listOfStocksData[0].finalDatesAndClose) -indicatorFound = False -while indicatorFound == False: - print("1. Expense Ratio\n2. Asset Size\n3. Turnover\n4. Persistence\nWhich indicator would you like to look at? ", end='') - - # indicator = str(input()) # CHANGE BACK TO THIS LATER - indicator = 'Expense Ratio' - print(indicator, end='') - - indicatorFound = True - print('\n', end='') - - if indicator == 'Expense Ratio' or indicator == '1' or indicator == 'expense ratio': - # from ExpenseRatio import ExpenseRatio - print('\nExpense Ratio') - - elif indicator == 'Asset Size' or indicator == '2' or indicator == 'asset size': - print('\nAsset Size') - - elif indicator == 'Turnover' or indicator == '3' or indicator == 'turnover': - print('\nTurnover') - - elif indicator == 'Persistence' or indicator == '4' or indicator == 'persistence': - print('\nPersistence') - - else: - indicatorFound = False - print('Invalid input, please enter indicator again') - -stockName = 'IWV' -stock1 = Stock(stockName) -print("Finding available dates and close values for", stock1.name) -StockData.main(stock1) -''' diff --git a/output.bin b/output.bin new file mode 100644 index 0000000..109a282 --- /dev/null +++ b/output.bin @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +Andrew Dinh + + + + + + + + + + + + + + + + + + + + +
+
+
+ + diff --git a/requirements.txt b/requirements.txt index fe41b2e..232d04a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ requests~=2.21.0 -numpy~=1.15.4 \ No newline at end of file +numpy~=1.15.4 +requests-cache~=0.4.13 # NOT REQUIRED \ No newline at end of file diff --git a/test.html b/test.html new file mode 100644 index 0000000..e69de29 From 531c41862a99c11f38de3fc7ee4ca37b6fb414fc Mon Sep 17 00:00:00 2001 From: Andrew Dinh Date: Thu, 21 Feb 2019 11:43:46 -0800 Subject: [PATCH 4/6] Add async function --- .gitignore | 5 ++-- Functions.py | 4 +++ README.md | 16 +++++++---- main.py | 80 +++++++++++++++++++++++++++++++++++++++++++++------- output.bin | 51 --------------------------------- test.html | 0 6 files changed, 87 insertions(+), 69 deletions(-) delete mode 100644 output.bin delete mode 100644 test.html diff --git a/.gitignore b/.gitignore index a204a08..238abb6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,5 @@ -__pycache__/StockData.cpython-37.pyc __pycache__/ -*.pyc test/ .vscode/ -requests_cache.sqlite \ No newline at end of file +requests_cache.sqlite +README.html \ No newline at end of file diff --git a/Functions.py b/Functions.py index 32aeab4..63ae2db 100644 --- a/Functions.py +++ b/Functions.py @@ -42,6 +42,7 @@ def stringIsInt(s): except ValueError: return False + def strintIsFloat(s): try: float(s) @@ -49,10 +50,13 @@ def strintIsFloat(s): except ValueError: return False + def fromCache(r): import requests_cache if r.from_cache == True: print('(Response taken from cache)') + return + def main(): exit() diff --git a/README.md b/README.md index ebb6232..d33a263 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,25 @@ # Mutual Fund Indicators -A project to determine indicators of overperforming mutual funds. -This project is written in Python 3 and will examine market capitalization, persistence, turnover, and expense ratios. +[![License](https://img.shields.io/github/license/andrewkdinh/fund-indicators.svg)](https://raw.githubusercontent.com/andrewkdinh/fund-indicators/master/LICENSE) +![](https://img.shields.io/github/last-commit/andrewkdinh/fund-indicators.svg) +![](https://img.shields.io/github/languages/top/andrewkdinh/fund-indicators.svg) +![](https://img.shields.io/github/languages/code-size/andrewkdinh/fund-indicators.svg) -### Prerequisites +A project to determine indicators of overperforming mutual funds. + +Examine correlation between performance and market capitalization, persistence, turnover, and expense ratios. + +## Prerequisites `$ pip install -r requirements.txt` -### Quickstart +## Quickstart To begin, run `$ python main.py` Some ticker values to try: -SPY, VFINX, AAPL, GOOGL +SPY, VFINX, VTHR, DJIA Created by Andrew Dinh from Dr. TJ Owens Gilroy Early College Academy diff --git a/main.py b/main.py index c1a43a0..3358188 100644 --- a/main.py +++ b/main.py @@ -4,6 +4,7 @@ # Python 3.6.7 # Required +from concurrent.futures import ThreadPoolExecutor as PoolExecutor import requests import json import datetime @@ -16,10 +17,10 @@ import sys # Optional import requests_cache -# https://requests-cache.readthedocs.io/en/lates/user_guide.html requests_cache.install_cache( 'requests_cache', backend='sqlite', expire_after=43200) # 12 hours + # API Keys apiAV = 'O42ICUV58EIZZQMU' # apiBarchart = 'a17fab99a1c21cd6f847e2f82b592838' @@ -119,7 +120,7 @@ class Stock: f = requests.get(url) Functions.fromCache(f) json_data = f.text - if json_data == 'Unknown symbol' or f.status_code == 404: + if json_data == 'Unknown symbol' or f.status_code != 200: print("IEX not available") return 'Not available' loaded_json = json.loads(json_data) @@ -160,7 +161,7 @@ class Stock: json_data = f.text loaded_json = json.loads(json_data) - if len(loaded_json) == 1 or f.status_code == 404: + if len(loaded_json) == 1 or f.status_code != 200: print("Alpha Vantage not available") return 'Not available' @@ -195,7 +196,7 @@ class Stock: f = requests.get(url, headers=headers) Functions.fromCache(f) loaded_json = f.json() - if len(loaded_json) == 1 or f.status_code == 404 or loaded_json['startDate'] == None: + if len(loaded_json) == 1 or f.status_code != 200 or loaded_json['startDate'] == None: print("Tiingo not available") return 'Not available' @@ -497,11 +498,11 @@ def datesToDays(dates): def isConnected(): import socket # To check internet connection - print('Checking internet connection') + #print('Checking internet connection') try: # connect to the host -- tells us if the host is actually reachable socket.create_connection(("www.andrewkdinh.com", 80)) - print('Internet connection is good!') + print('Internet connection is good') return True except OSError: # pass @@ -526,6 +527,22 @@ def checkPackages(): return packagesInstalled +def checkPythonVersion(): + import platform + #print('Checking Python version') + i = platform.python_version() + r = i.split('.') + k = ''.join((r[0], '.', r[1])) + k = float(k) + if k < 3.3: + print('Your Python version is', i, + '\nIt needs to be greater than version 3.3') + return False + else: + print('Your Python version of', i, 'is good') + return True + + def benchmarkInit(): # Treat benchmark like stock benchmarkTicker = '' @@ -601,11 +618,45 @@ def stocksInit(): return listOfStocks +def asyncData(benchmark, listOfStocks): + # Make list of urls to send requests to + urlList = [] + # Benchmark + url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=', + benchmark.name, '&outputsize=full&apikey=', apiAV)) + urlList.append(url) + + # Stocks + for i in range(0, len(listOfStocks), 1): + # Alpha Vantage + url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=', + listOfStocks[i].name, '&outputsize=full&apikey=', apiAV)) + urlList.append(url) + + # Risk-free rate + url = ''.join( + ('https://www.quandl.com/api/v3/datasets/USTREASURY/LONGTERMRATES.json?api_key=', apiQuandl)) + urlList.append(url) + + # Send async requests + print('\nSending async requests (Assuming Alpha Vantage is first choice)') + with PoolExecutor(max_workers=3) as executor: + for _ in executor.map(sendAsync, urlList): + pass + + return + + +def sendAsync(url): + requests.get(url) + return + + def timeFrameInit(): isInteger = False while isInteger == False: print( - '\nPlease enter the time frame in years (10 years or less recommended):', end='') + '\nPlease enter the time frame in years (<10 years recommended):', end='') temp = input(' ') isInteger = Functions.stringIsInt(temp) if isInteger == True: @@ -660,7 +711,7 @@ def riskFreeRate(): riskFreeRate = round(riskFreeRate, 2) print('Risk-free rate:', riskFreeRate, end='\n\n') - if f.status_code == 404: + if f.status_code != 200: print("Quandl not available") print('Returning 2.50 as risk-free rate', end='\n\n') # return 0.0250 @@ -950,12 +1001,18 @@ def main(): packagesInstalled = checkPackages() if not packagesInstalled: return + else: + print('All required packages are installed') + + # Check python version is above 3.3 + pythonVersionGood = checkPythonVersion() + if not pythonVersionGood: + return # Choose benchmark and makes it class Stock benchmark = benchmarkInit() # Add it to a list to work with other functions - benchmarkAsList = [] - benchmarkAsList.append(benchmark) + benchmarkAsList = [benchmark] # Asks for stock(s) ticker and makes them class Stock listOfStocks = stocksInit() @@ -964,6 +1021,9 @@ def main(): timeFrame = timeFrameInit() Stock.timeFrame = timeFrame # Needs to be a global variable for all stocks + # Send async request to AV for listOfStocks and benchmark + asyncData(benchmark, listOfStocks) + # Gather data for benchmark and stock(s) dataMain(benchmarkAsList) dataMain(listOfStocks) diff --git a/output.bin b/output.bin deleted file mode 100644 index 109a282..0000000 --- a/output.bin +++ /dev/null @@ -1,51 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - -Andrew Dinh - - - - - - - - - - - - - - - - - - - - -
-
-
- - diff --git a/test.html b/test.html deleted file mode 100644 index e69de29..0000000 From 6366453f63d1898a8dac4c8ed6fdd6a78fef78fd Mon Sep 17 00:00:00 2001 From: Andrew Dinh Date: Fri, 1 Mar 2019 11:06:09 -0800 Subject: [PATCH 5/6] Added support for persistence indicator Changed from years to months Added function to scrape Yahoo Finance for indicator data Moved generic functions to Functions.py Added function to scrape websites for stocks Attempted to alleviate problem of async function --- .gitignore | 5 +- Functions.py | 72 ++++- main.py | 716 +++++++++++++++++++++++++++++++++++------------ requirements.txt | 1 + 4 files changed, 609 insertions(+), 185 deletions(-) diff --git a/.gitignore b/.gitignore index 238abb6..25929cd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ __pycache__/ test/ .vscode/ -requests_cache.sqlite -README.html \ No newline at end of file +*.sqlite +README.html +*stocks.txt \ No newline at end of file diff --git a/Functions.py b/Functions.py index 63ae2db..4c311c1 100644 --- a/Functions.py +++ b/Functions.py @@ -1,6 +1,5 @@ # Python file for general functions - def getNearest(items, pivot): return min(items, key=lambda x: abs(x - pivot)) @@ -58,6 +57,77 @@ def fromCache(r): return +def getJoke(): + import requests + import requests_cache + with requests_cache.disabled(): + ''' + f = requests.get('https://official-joke-api.appspot.com/jokes/random').json() + print('') + print(f['setup']) + print(f['punchline'], end='\n\n') + ''' + headers = {'Accept': 'application/json', + 'User-Agent': 'fund-indicators (https://github.com/andrewkdinh/fund-indicators)'} + f = requests.get('https://icanhazdadjoke.com/', headers=headers).json() + print('') + print(f['joke']) + + +def hasNumbers(inputString): + return any(char.isdigit() for char in inputString) + + +def checkPackages(listOfPackages): + import importlib.util + import sys + + packagesInstalled = True + packages = listOfPackages + for i in range(0, len(packages), 1): + package_name = packages[i] + spec = importlib.util.find_spec(package_name) + if spec is None: + print( + package_name + + " is not installed\nPlease enter 'pip install -r requirements.txt' to install all required packages") + packagesInstalled = False + return packagesInstalled + + +def checkPythonVersion(): + import platform + #print('Checking Python version') + i = platform.python_version() + r = i.split('.') + k = float(''.join((r[0], '.', r[1]))) + if k < 3.3: + print('Your Python version is', i, + '\nIt needs to be greater than version 3.3') + return False + else: + print('Your Python version of', i, 'is good') + return True + + +def isConnected(): + import socket # To check internet connection + try: + # connect to the host -- tells us if the host is actually reachable + socket.create_connection(("www.andrewkdinh.com", 80)) + print('Internet connection is good') + return True + except OSError: + # pass + print("No internet connection!") + return False + + +def fileExists(file): + import os.path + return os.path.exists(file) + + def main(): exit() diff --git a/main.py b/main.py index 3358188..e1ddc2b 100644 --- a/main.py +++ b/main.py @@ -4,22 +4,26 @@ # Python 3.6.7 # Required -from concurrent.futures import ThreadPoolExecutor as PoolExecutor +from bs4 import BeautifulSoup import requests import json import datetime import Functions import numpy as np +import re +import os.path # Required for linear regression import matplotlib.pyplot as plt import sys # Optional +from concurrent.futures import ThreadPoolExecutor as PoolExecutor +import time +import random import requests_cache requests_cache.install_cache( - 'requests_cache', backend='sqlite', expire_after=43200) # 12 hours - + 'cache', backend='sqlite', expire_after=43200) # 12 hours # API Keys apiAV = 'O42ICUV58EIZZQMU' @@ -59,19 +63,20 @@ API Keys: class Stock: # GLOBAL VARIABLES - timeFrame = 0 + timeFrame = 0 # Months riskFreeRate = 0 indicator = '' # BENCHMARK VALUES benchmarkDates = [] benchmarkCloseValues = [] - benchmarkAverageAnnualReturn = 0 + benchmarkAverageMonthlyReturn = 0 benchmarkStandardDeviation = 0 # INDICATOR VALUES indicatorCorrelation = [] indicatorRegression = [] + persTimeFrame = 0 def __init__(self): # BASIC DATA @@ -84,8 +89,8 @@ class Stock: self.closeValuesMatchBenchmark = [] # CALCULATED RETURN - self.averageAnnualReturn = 0 - self.annualReturn = [] + self.averageMonthlyReturn = 0 + self.monthlyReturn = [] self.sharpe = 0 self.sortino = 0 self.treynor = 0 @@ -161,7 +166,7 @@ class Stock: json_data = f.text loaded_json = json.loads(json_data) - if len(loaded_json) == 1 or f.status_code != 200: + if len(loaded_json) == 1 or f.status_code != 200 or len(loaded_json) == 0: print("Alpha Vantage not available") return 'Not available' @@ -268,6 +273,15 @@ class Stock: allDates[j] = Functions.stringToDate(allDates[j]) datesAndCloseList[0] = allDates + # Determine if close value list has value of zero + # AKA https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=RGN&outputsize=full&apikey=O42ICUV58EIZZQMU + for i in datesAndCloseList[1]: + if i == 0: + print('Found close value of 0. This is likely something like ticker RGN (Daily Time Series with Splits and Dividend Events)') + print('Removing', self.name, + 'from list of stocks to ensure compability later') + return 'Not available' + return datesAndCloseList def datesAndCloseFitTimeFrame(self): @@ -280,8 +294,8 @@ class Stock: closeValues.append(self.allCloseValues[i]) firstDate = datetime.datetime.now().date() - datetime.timedelta( - days=self.timeFrame*365) - print('\n', self.timeFrame, ' years ago: ', firstDate, sep='') + days=self.timeFrame*30) + print('\n', self.timeFrame, ' months ago: ', firstDate, sep='') closestDate = Functions.getNearest(dates, firstDate) if closestDate != firstDate: print('Closest date available for', self.name, ':', closestDate) @@ -306,23 +320,23 @@ class Stock: return datesAndCloseList2 - def calcAverageAnnualReturn(self): # pylint: disable=E0202 - # averageAnnualReturn = (float(self.closeValues[len(self.closeValues)-1]/self.closeValues[0])**(1/(self.timeFrame)))-1 - # averageAnnualReturn = averageAnnualReturn * 100 - averageAnnualReturn = sum(self.annualReturn)/self.timeFrame - print('Average annual return:', averageAnnualReturn) - return averageAnnualReturn + def calcAverageMonthlyReturn(self): # pylint: disable=E0202 + # averageMonthlyReturn = (float(self.closeValues[len(self.closeValues)-1]/self.closeValues[0])**(1/(self.timeFrame)))-1 + # averageMonthlyReturn = averageMonthlyReturn * 100 + averageMonthlyReturn = sum(self.monthlyReturn)/self.timeFrame + print('Average monthly return:', averageMonthlyReturn) + return averageMonthlyReturn - def calcAnnualReturn(self): - annualReturn = [] + def calcMonthlyReturn(self): + monthlyReturn = [] - # Calculate annual return in order from oldest to newest - annualReturn = [] + # Calculate monthly return in order from oldest to newest + monthlyReturn = [] for i in range(0, self.timeFrame, 1): firstDate = datetime.datetime.now().date() - datetime.timedelta( - days=(self.timeFrame-i)*365) + days=(self.timeFrame-i)*30) secondDate = datetime.datetime.now().date() - datetime.timedelta( - days=(self.timeFrame-i-1)*365) + days=(self.timeFrame-i-1)*30) # Find closest dates to firstDate and lastDate firstDate = Functions.getNearest(self.dates, firstDate) @@ -333,7 +347,7 @@ class Stock: 'which is after the given time frame.') return 'Not available' - # Get corresponding close values and calculate annual return + # Get corresponding close values and calculate monthly return for i in range(0, len(self.dates), 1): if self.dates[i] == firstDate: firstClose = self.closeValues[i] @@ -341,13 +355,12 @@ class Stock: secondClose = self.closeValues[i] break - annualReturnTemp = (secondClose/firstClose)-1 - annualReturnTemp = annualReturnTemp * 100 - annualReturn.append(annualReturnTemp) + monthlyReturnTemp = (secondClose/firstClose)-1 + monthlyReturnTemp = monthlyReturnTemp * 100 + monthlyReturn.append(monthlyReturnTemp) - print('Annual return over the past', - self.timeFrame, 'years:', annualReturn) - return annualReturn + # print('Monthly return over the past', self.timeFrame, 'months:', monthlyReturn) + return monthlyReturn def calcCorrelation(self, closeList): correlation = np.corrcoef( @@ -357,32 +370,32 @@ class Stock: def calcStandardDeviation(self): numberOfValues = self.timeFrame - mean = self.averageAnnualReturn + mean = self.averageMonthlyReturn standardDeviation = ( - (sum((self.annualReturn[x]-mean)**2 for x in range(0, numberOfValues, 1)))/(numberOfValues-1))**(1/2) + (sum((self.monthlyReturn[x]-mean)**2 for x in range(0, numberOfValues, 1)))/(numberOfValues-1))**(1/2) print('Standard Deviation:', standardDeviation) return standardDeviation def calcDownsideDeviation(self): numberOfValues = self.timeFrame - targetReturn = self.averageAnnualReturn + targetReturn = self.averageMonthlyReturn downsideDeviation = ( - (sum(min(0, (self.annualReturn[x]-targetReturn))**2 for x in range(0, numberOfValues, 1)))/(numberOfValues-1))**(1/2) + (sum(min(0, (self.monthlyReturn[x]-targetReturn))**2 for x in range(0, numberOfValues, 1)))/(numberOfValues-1))**(1/2) print('Downside Deviation:', downsideDeviation) return downsideDeviation def calcKurtosis(self): numberOfValues = self.timeFrame - mean = self.averageAnnualReturn - kurtosis = (sum((self.annualReturn[x]-mean)**4 for x in range( + mean = self.averageMonthlyReturn + kurtosis = (sum((self.monthlyReturn[x]-mean)**4 for x in range( 0, numberOfValues, 1)))/((numberOfValues-1)*(self.standardDeviation ** 4)) print('Kurtosis:', kurtosis) return kurtosis def calcSkewness(self): numberOfValues = self.timeFrame - mean = self.averageAnnualReturn - skewness = (sum((self.annualReturn[x]-mean)**3 for x in range( + mean = self.averageMonthlyReturn + skewness = (sum((self.monthlyReturn[x]-mean)**3 for x in range( 0, numberOfValues, 1)))/((numberOfValues-1)*(self.standardDeviation ** 3)) print('Skewness:', skewness) return skewness @@ -394,26 +407,26 @@ class Stock: return beta def calcAlpha(self): - alpha = self.averageAnnualReturn - \ - (Stock.riskFreeRate+((Stock.benchmarkAverageAnnualReturn - + alpha = self.averageMonthlyReturn - \ + (Stock.riskFreeRate+((Stock.benchmarkAverageMonthlyReturn - Stock.riskFreeRate) * self.beta)) print('Alpha:', alpha) return alpha def calcSharpe(self): - sharpe = (self.averageAnnualReturn - Stock.riskFreeRate) / \ + sharpe = (self.averageMonthlyReturn - Stock.riskFreeRate) / \ self.standardDeviation print('Sharpe Ratio:', sharpe) return sharpe def calcSortino(self): - sortino = (self.averageAnnualReturn - self.riskFreeRate) / \ + sortino = (self.averageMonthlyReturn - self.riskFreeRate) / \ self.downsideDeviation print('Sortino Ratio:', sortino) return sortino def calcTreynor(self): - treynor = (self.averageAnnualReturn - Stock.riskFreeRate)/self.beta + treynor = (self.averageMonthlyReturn - Stock.riskFreeRate)/self.beta print('Treynor Ratio:', treynor) return treynor @@ -484,6 +497,210 @@ class Stock: sys.stdout.flush() plt.close() + def scrapeYahooFinance(self): + # Determine if ETF, Mutual fund, or stock + print('Determining if Yahoo Finance has data for', self.name, end=": ") + url = ''.join(('https://finance.yahoo.com/quote/', + self.name, '?p=', self.name)) + if requests.get(url).history: + print('No') + return 'Not available' + else: + print('Yes') + + stockType = '' + url2 = ''.join(('https://finance.yahoo.com/lookup?s=', self.name)) + print('Sending request to:', url2) + raw_html = requests.get(url2).text + + soup2 = BeautifulSoup(raw_html, 'html.parser') + # Type (Stock, ETF, Mutual Fund) + r = soup2.find_all( + 'td', attrs={'class': 'data-col4 Ta(start) Pstart(20px) Miw(30px)'}) + t = soup2.find_all('a', attrs={'class': 'Fw(b)'}) # Name and class + z = soup2.find_all('td', attrs={ + 'class': 'data-col1 Ta(start) Pstart(10px) Miw(80px)'}) # Name of stock + listNames = [] + for i in t: + if len(i.text.strip()) < 6: + listNames.append(i.text.strip()) + for i in range(0, len(listNames), 1): + if listNames[i] == self.name: + break + r = r[i].text.strip() + z = z[i].text.strip() + print('Name:', z) + + if r == 'ETF': + stockType = 'ETF' + elif r == 'Stocks': + stockType = 'Stock' + elif r == 'Mutual Fund': + stockType = 'Fund' + else: + print('Could not determine fund type') + return 'Not available' + print('Type:', stockType) + + if Stock.indicator == 'Expense Ratio': + if stockType == 'Stock': + print( + self.name, 'is a stock, and therefore does not have an expense ratio') + return 'Not available' + + url = ''.join(('https://finance.yahoo.com/quote/', + self.name, '?p=', self.name)) + # https://finance.yahoo.com/quote/SPY?p=SPY + print('Sending request to:', url) + raw_html = requests.get(url).text + soup = BeautifulSoup(raw_html, 'html.parser') + + r = soup.find_all('span', attrs={'class': 'Trsdu(0.3s)'}) + if r == []: + print('Something went wrong with scraping expense ratio') + return('Not available') + + if stockType == 'ETF': + for i in range(len(r)-1, 0, -1): + s = r[i].text.strip() + if s[-1] == '%': + break + elif stockType == 'Fund': + count = 0 # Second in set + for i in range(0, len(r)-1, 1): + s = r[i].text.strip() + if s[-1] == '%' and count == 0: + count += 1 + elif s[-1] == '%' and count == 1: + break + + if s[-1] == '%': + expenseRatio = float(s.replace('%', '')) + else: + print('Something went wrong with scraping expense ratio') + return 'Not available' + print(str(expenseRatio) + '%') + return expenseRatio + + elif Stock.indicator == 'Market Capitalization': + url = ''.join(('https://finance.yahoo.com/quote/', + self.name, '?p=', self.name)) + # https://finance.yahoo.com/quote/GOOGL?p=GOOGL + raw_html = requests.get(url).text + soup = BeautifulSoup(raw_html, 'html.parser') + r = soup.find_all( + 'span', attrs={'class': 'Trsdu(0.3s)'}) + if r == []: + print('Something went wrong with scraping market capitalization') + return 'Not available' + marketCap = 0 + for t in r: + s = t.text.strip() + if s[-1] == 'B': + print(s, end='') + s = s.replace('B', '') + marketCap = float(s) * 1000000000 # 1 billion + break + elif s[-1] == 'M': + print(s, end='') + s = s.replace('M', '') + marketCap = float(s) * 1000000 # 1 million + break + elif s[-1] == 'K': + print(s, end='') + s = s.replace('K', '') + marketCap = float(s) * 1000 # 1 thousand + break + if marketCap == 0: + print('\nSomething went wrong with scraping market capitalization') + return 'Not available' + marketCap = int(marketCap) + print(' =', marketCap) + return marketCap + + elif Stock.indicator == 'Turnover': + if stockType == 'Stock': + print(self.name, 'is a stock, and therefore does not have turnover') + return 'Not available' + + if stockType == 'Fund': + url = ''.join(('https://finance.yahoo.com/quote/', + self.name, '?p=', self.name)) + # https://finance.yahoo.com/quote/SPY?p=SPY + print('Sending request to', url) + raw_html = requests.get(url).text + soup = BeautifulSoup(raw_html, 'html.parser') + + r = soup.find_all( + 'span', attrs={'class': 'Trsdu(0.3s)'}) + if r == []: + print('Something went wrong without scraping turnover') + return 'Not available' + turnover = 0 + for i in range(len(r)-1, 0, -1): + s = r[i].text.strip() + if s[-1] == '%': + turnover = float(s.replace('%', '')) + break + if stockType == 'ETF': + url = ''.join(('https://finance.yahoo.com/quote/', + self.name, '/profile?p=', self.name)) + # https://finance.yahoo.com/quote/SPY/profile?p=SPY + print('Sending request to', url) + raw_html = requests.get(url).text + soup = BeautifulSoup(raw_html, 'html.parser') + + r = soup.find_all( + 'span', attrs={'class': 'W(20%) D(b) Fl(start) Ta(e)'}) + if r == []: + print('Something went wrong without scraping turnover') + return 'Not available' + turnover = 0 + for i in range(len(r)-1, 0, -1): + s = r[i].text.strip() + if s[-1] == '%': + turnover = float(s.replace('%', '')) + break + + if turnover == 0: + print('Something went wrong with scraping turnover') + return 'Not available' + print(str(turnover) + '%') + return turnover + + def indicatorManual(self): + indicatorValueFound = False + while indicatorValueFound == False: + if Stock.indicator == 'Expense Ratio': + indicatorValue = str( + input(Stock.indicator + ' for ' + self.name + ' (%): ')) + elif Stock.indicator == 'Persistence': + indicatorValue = str( + input(Stock.indicator + ' for ' + self.name + ' (years): ')) + elif Stock.indicator == 'Turnover': + indicatorValue = str(input( + Stock.indicator + ' for ' + self.name + ' in the last ' + str(Stock.timeFrame) + ' years: ')) + elif Stock.indicator == 'Market Capitalization': + indicatorValue = str( + input(Stock.indicator + ' of ' + self.name + ': ')) + else: + print('Something is wrong. Indicator was not found. Ending program.') + exit() + + if Functions.strintIsFloat(indicatorValue) == True: + indicatorValueFound = True + return float(indicatorValue) + else: + print('Please enter a number') + + def calcPersistence(self): + persistenceFirst = (sum(self.monthlyReturn[i] for i in range( + 0, Stock.persTimeFrame, 1))) / Stock.persTimeFrame + persistenceSecond = self.averageMonthlyReturn + persistence = persistenceSecond-persistenceFirst + print('Change in average monthly return:', persistence) + return persistence + def datesToDays(dates): days = [] @@ -496,69 +713,22 @@ def datesToDays(dates): return days -def isConnected(): - import socket # To check internet connection - #print('Checking internet connection') - try: - # connect to the host -- tells us if the host is actually reachable - socket.create_connection(("www.andrewkdinh.com", 80)) - print('Internet connection is good') - return True - except OSError: - # pass - print("No internet connection!") - return False - - -def checkPackages(): - import importlib.util - import sys - - packagesInstalled = True - packages = ['requests', 'numpy'] - for i in range(0, len(packages), 1): - package_name = packages[i] - spec = importlib.util.find_spec(package_name) - if spec is None: - print( - package_name + - " is not installed\nPlease type in 'pip install -r requirements.txt' to install all required packages") - packagesInstalled = False - return packagesInstalled - - -def checkPythonVersion(): - import platform - #print('Checking Python version') - i = platform.python_version() - r = i.split('.') - k = ''.join((r[0], '.', r[1])) - k = float(k) - if k < 3.3: - print('Your Python version is', i, - '\nIt needs to be greater than version 3.3') - return False - else: - print('Your Python version of', i, 'is good') - return True - - def benchmarkInit(): # Treat benchmark like stock benchmarkTicker = '' + benchmarks = ['S&P500', 'DJIA', 'Russell 3000', 'MSCI EAFE'] + benchmarksTicker = ['SPY', 'DJIA', 'VTHR', 'EFT'] + print('\nList of benchmarks:') + for i in range(0, len(benchmarks), 1): + print(str(i+1) + '. ' + + benchmarks[i] + ' (' + benchmarksTicker[i] + ')') while benchmarkTicker == '': - benchmarks = ['S&P500', 'DJIA', 'Russell 3000', 'MSCI EAFE'] - benchmarksTicker = ['SPY', 'DJIA', 'VTHR', 'EFT'] - print('\nList of benchmarks:') - for i in range(0, len(benchmarks), 1): - print(str(i+1) + '. ' + - benchmarks[i] + ' (' + benchmarksTicker[i] + ')') benchmark = str(input('Please choose a benchmark from the list: ')) # benchmark = 'SPY' # TESTING if Functions.stringIsInt(benchmark) == True: - if int(benchmark) <= len(benchmarks): + if int(benchmark) <= len(benchmarks) and int(benchmark) > 0: benchmarkInt = int(benchmark) benchmark = benchmarks[benchmarkInt-1] benchmarkTicker = benchmarksTicker[benchmarkInt-1] @@ -586,34 +756,182 @@ def benchmarkInit(): def stocksInit(): listOfStocks = [] - isInteger = False - while isInteger == False: - temp = input('\nNumber of stocks to analyze (2 minimum): ') - isInteger = Functions.stringIsInt(temp) - if isInteger == True: - numberOfStocks = int(temp) - else: - print('Please type an integer') - - # numberOfStocks = 5 # TESTING - # print('How many stocks would you like to analyze? ', numberOfStocks) - print('\nThis program can analyze stocks (GOOGL), mutual funds (VFINX), and ETFs (SPY)') - print('For simplicity, all of them will be referred to as "stock"\n') + print('For simplicity, all of them will be referred to as "stock"') - # listOfGenericStocks = ['googl', 'aapl', 'vfinx', 'tsla', 'vthr'] + found = False + while found == False: + print('\nMethods:') + method = 0 + methods = ['Read from a file', 'Enter manually', + 'U.S. News popular funds (~35)', 'Kiplinger top-performing funds (50)', 'TheStreet top-rated mutual funds (20)'] + for i in range(0, len(methods), 1): + print(str(i+1) + '. ' + methods[i]) + while method == 0 or method > len(methods): + method = str(input('Which method? ')) + if Functions.stringIsInt(method) == True: + method = int(method) + if method == 0 or method > len(methods): + print('Please choose a valid method') + else: + method = 0 + print('Please choose a number') + print('') - for i in range(0, numberOfStocks, 1): - print('Stock', i + 1, end=' ') - stockName = str(input('ticker: ')) + if method == 1: + defaultFiles = ['.gitignore', 'LICENSE', 'main.py', 'Functions.py', + 'README.md', 'requirements.txt', 'cache.sqlite', '_test_runner.py'] # Added by repl.it for whatever reason + stocksFound = False + print('Files in current directory (not including default files): ') + listOfFilesTemp = [f for f in os.listdir() if os.path.isfile(f)] + listOfFiles = [] + for files in listOfFilesTemp: + if files[0] != '.' and any(x in files for x in defaultFiles) != True: + listOfFiles.append(files) + for i in range(0, len(listOfFiles), 1): + if listOfFiles[i][0] != '.': + print(str(i+1) + '. ' + listOfFiles[i]) + while stocksFound == False: + fileName = str(input('What is the file number/name? ')) + if Functions.stringIsInt(fileName) == True: + if int(fileName) < len(listOfFiles)+1 and int(fileName) > 0: + fileName = listOfFiles[int(fileName)-1] + print(fileName) + if Functions.fileExists(fileName) == True: + listOfStocks = [] + file = open(fileName, 'r') + n = file.read() + file.close() + s = re.findall(r'[^,;\s]+', n) + for i in s: + if str(i) != '' and Functions.hasNumbers(str(i)) == False: + listOfStocks.append(str(i).upper()) + stocksFound = True + else: + print('File not found') + for i in range(0, len(listOfStocks), 1): + stockName = listOfStocks[i].upper() + listOfStocks[i] = Stock() + listOfStocks[i].setName(stockName) - # stockName = listOfGenericStocks[i] - # print(':', stockName) + for k in listOfStocks: + print(k.name, end=' ') + print('\n' + str(len(listOfStocks)) + ' stocks total') - stockName = stockName.upper() - listOfStocks.append(stockName) - listOfStocks[i] = Stock() - listOfStocks[i].setName(stockName) + elif method == 2: + isInteger = False + while isInteger == False: + temp = input('\nNumber of stocks to analyze (2 minimum): ') + isInteger = Functions.stringIsInt(temp) + if isInteger == True: + if int(temp) >= 2: + numberOfStocks = int(temp) + else: + print('Please type a number greater than or equal to 2') + isInteger = False + else: + print('Please type an integer') + + i = 0 + while i < numberOfStocks: + print('Stock', i + 1, end=' ') + stockName = str(input('ticker: ')) + + if stockName != '' and Functions.hasNumbers(stockName) == False: + stockName = stockName.upper() + listOfStocks.append(stockName) + listOfStocks[i] = Stock() + listOfStocks[i].setName(stockName) + i += 1 + else: + print('Invalid ticker') + + elif method == 3: + listOfStocks = [] + url = 'https://money.usnews.com/funds/mutual-funds/most-popular' + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'} + print('Sending request to', url) + f = requests.get(url, headers=headers) + Functions.fromCache(f) + raw_html = f.text + soup = BeautifulSoup(raw_html, 'html.parser') + + file = open('usnews-stocks.txt', 'w') + r = soup.find_all( + 'span', attrs={'class': 'text-smaller text-muted'}) + for k in r: + print(k.text.strip(), end=' ') + listOfStocks.append(k.text.strip()) + file.write(str(k.text.strip()) + '\n') + file.close() + + for i in range(0, len(listOfStocks), 1): + stockName = listOfStocks[i].upper() + listOfStocks[i] = Stock() + listOfStocks[i].setName(stockName) + + print('\n' + str(len(listOfStocks)) + ' mutual funds total') + + elif method == 4: + listOfStocks = [] + url = 'https://www.kiplinger.com/tool/investing/T041-S001-top-performing-mutual-funds/index.php' + headers = { + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'} + print('Sending request to', url) + f = requests.get(url, headers=headers) + Functions.fromCache(f) + raw_html = f.text + soup = BeautifulSoup(raw_html, 'html.parser') + + file = open('kiplinger-stocks.txt', 'w') + r = soup.find_all('a', attrs={'style': 'font-weight:700;'}) + for k in r: + print(k.text.strip(), end=' ') + listOfStocks.append(k.text.strip()) + file.write(str(k.text.strip()) + '\n') + file.close() + + for i in range(0, len(listOfStocks), 1): + stockName = listOfStocks[i].upper() + listOfStocks[i] = Stock() + listOfStocks[i].setName(stockName) + + print('\n' + str(len(listOfStocks)) + ' mutual funds total') + + elif method == 5: + listOfStocks = [] + url = 'https://www.thestreet.com/topic/21421/top-rated-mutual-funds.html' + headers = { + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'} + print('Sending request to', url) + f = requests.get(url, headers=headers) + Functions.fromCache(f) + raw_html = f.text + soup = BeautifulSoup(raw_html, 'html.parser') + + file = open('thestreet-stocks.txt', 'w') + r = soup.find_all('a') + for k in r: + if len(k.text.strip()) == 5: + n = re.findall(r'^/quote/.*\.html', k['href']) + if len(n) != 0: + print(k.text.strip(), end=' ') + listOfStocks.append(k.text.strip()) + file.write(str(k.text.strip()) + '\n') + file.close() + + for i in range(0, len(listOfStocks), 1): + stockName = listOfStocks[i].upper() + listOfStocks[i] = Stock() + listOfStocks[i].setName(stockName) + + print('\n' + str(len(listOfStocks)) + ' mutual funds total') + + if len(listOfStocks) < 2: + print('Please choose another method') + else: + found = True return listOfStocks @@ -638,6 +956,16 @@ def asyncData(benchmark, listOfStocks): ('https://www.quandl.com/api/v3/datasets/USTREASURY/LONGTERMRATES.json?api_key=', apiQuandl)) urlList.append(url) + # Yahoo Finance + for i in range(0, len(listOfStocks), 1): + url = ''.join(('https://finance.yahoo.com/quote/', + listOfStocks[i].name, '?p=', listOfStocks[i].name)) + urlList.append(url) + for i in range(0, len(listOfStocks), 1): + url = ''.join( + ('https://finance.yahoo.com/lookup?s=', listOfStocks[i].name)) + urlList.append(url) + # Send async requests print('\nSending async requests (Assuming Alpha Vantage is first choice)') with PoolExecutor(max_workers=3) as executor: @@ -648,6 +976,8 @@ def asyncData(benchmark, listOfStocks): def sendAsync(url): + time.sleep(random.randrange(0, 2)) + print('Sending request to', url) requests.get(url) return @@ -656,18 +986,19 @@ def timeFrameInit(): isInteger = False while isInteger == False: print( - '\nPlease enter the time frame in years (<10 years recommended):', end='') + '\nPlease enter the time frame in months (<60 months recommended):', end='') temp = input(' ') isInteger = Functions.stringIsInt(temp) if isInteger == True: - years = int(temp) + if int(temp) > 1: + months = int(temp) + else: + print('Please enter a number greater than 1') + isInteger = False else: print('Please type an integer') - # years = 5 # TESTING - # print('Years:', years) - - timeFrame = years + timeFrame = months return timeFrame @@ -725,17 +1056,17 @@ def returnMain(benchmark, listOfStocks): print('Getting risk-free rate from current 10-year treasury bill rates', end='\n\n') Stock.riskFreeRate = riskFreeRate() print(benchmark.name, end='\n\n') - benchmark.annualReturn = Stock.calcAnnualReturn(benchmark) - if benchmark.annualReturn == 'Not available': + benchmark.monthlyReturn = Stock.calcMonthlyReturn(benchmark) + if benchmark.monthlyReturn == 'Not available': print('Please use a lower time frame\nEnding program') exit() - benchmark.averageAnnualReturn = Stock.calcAverageAnnualReturn(benchmark) + benchmark.averageMonthlyReturn = Stock.calcAverageMonthlyReturn(benchmark) benchmark.standardDeviation = Stock.calcStandardDeviation(benchmark) # Make benchmark data global Stock.benchmarkDates = benchmark.dates Stock.benchmarkCloseValues = benchmark.closeValues - Stock.benchmarkAverageAnnualReturn = benchmark.averageAnnualReturn + Stock.benchmarkAverageMonthlyReturn = benchmark.averageMonthlyReturn Stock.benchmarkStandardDeviation = benchmark.standardDeviation i = 0 @@ -755,15 +1086,16 @@ def returnMain(benchmark, listOfStocks): benchmarkMatchDatesAndCloseValues = temp[1] # Calculate everything for each stock - listOfStocks[i].annualReturn = Stock.calcAnnualReturn(listOfStocks[i]) - if listOfStocks[i].annualReturn == 'Not available': + listOfStocks[i].monthlyReturn = Stock.calcMonthlyReturn( + listOfStocks[i]) + if listOfStocks[i].monthlyReturn == 'Not available': print('Removing', listOfStocks[i].name, 'from list of stocks') del listOfStocks[i] if len(listOfStocks) == 0: - print('No stocks to analyze. Ending program') + print('No stocks fit time frame. Ending program') exit() else: - listOfStocks[i].averageAnnualReturn = Stock.calcAverageAnnualReturn( + listOfStocks[i].averageMonthlyReturn = Stock.calcAverageMonthlyReturn( listOfStocks[i]) listOfStocks[i].correlation = Stock.calcCorrelation( listOfStocks[i], benchmarkMatchDatesAndCloseValues[1]) @@ -787,6 +1119,9 @@ def returnMain(benchmark, listOfStocks): print('\nNumber of stocks from original list that fit time frame:', len(listOfStocks)) + if len(listOfStocks) < 2: + print('Cannot proceed to the next step. Exiting program.') + exit() def indicatorInit(): @@ -795,17 +1130,16 @@ def indicatorInit(): listOfIndicators = ['Expense Ratio', 'Market Capitalization', 'Turnover', 'Persistence'] print('\n', end='') + print('List of indicators:') + for i in range(0, len(listOfIndicators), 1): + print(str(i + 1) + '. ' + listOfIndicators[i]) while indicatorFound == False: - print('List of indicators:') - for i in range(0, len(listOfIndicators), 1): - print(str(i + 1) + '. ' + listOfIndicators[i]) - indicator = str(input('Choose an indicator from the list: ')) # indicator = 'expense ratio' # TESTING if Functions.stringIsInt(indicator) == True: - if int(indicator) <= 4: + if int(indicator) <= 4 and int(indicator) > 0: indicator = listOfIndicators[int(indicator)-1] indicatorFound = True else: @@ -819,7 +1153,7 @@ def indicatorInit(): break if indicatorFound == False: - print('Please choose an indicator from the list') + print('Please choose an indicator from the list\n') return indicator @@ -878,12 +1212,14 @@ def plot_regression_line(x, y, b, i): plt.plot(x, y_pred, color="g") # putting labels - listOfReturnStrings = ['Average Annual Return', + listOfReturnStrings = ['Average Monthly Return', 'Sharpe Ratio', 'Sortino Ratio', 'Treynor Ratio', 'Alpha'] plt.title(Stock.indicator + ' and ' + listOfReturnStrings[i]) - if Stock.indicator == 'Expense Ratio': + if Stock.indicator == 'Expense Ratio' or Stock.indicator == 'Turnover': plt.xlabel(Stock.indicator + ' (%)') + elif Stock.indicator == 'Persistence': + plt.xlabel(Stock.indicator + ' (Difference in average monthly return)') else: plt.xlabel(Stock.indicator) @@ -894,7 +1230,7 @@ def plot_regression_line(x, y, b, i): # function to show plot plt.show(block=False) - for i in range(2, 0, -1): + for i in range(3, 0, -1): if i == 1: sys.stdout.write('Keeping plot open for ' + str(i) + ' second \r') @@ -909,45 +1245,50 @@ def plot_regression_line(x, y, b, i): plt.close() -def indicatorMain(listOfStocks): - Stock.indicator = indicatorInit() - print(Stock.indicator, end='\n\n') +def persistenceTimeFrame(): + print('\nTime frame you chose was', Stock.timeFrame, 'months') + persTimeFrameFound = False + while persTimeFrameFound == False: + persistenceTimeFrame = str( + input('Please choose how many months to measure persistence: ')) + if Functions.stringIsInt(persistenceTimeFrame) == True: + if int(persistenceTimeFrame) > 0 and int(persistenceTimeFrame) < Stock.timeFrame - 1: + persistenceTimeFrame = int(persistenceTimeFrame) + persTimeFrameFound = True + else: + print('Please choose a number between 0 and', + Stock.timeFrame, end='\n') + else: + print('Please choose an integer between 0 and', + Stock.timeFrame, end='\n') - # indicatorValuesGenericExpenseRatio = [2.5, 4.3, 3.1, 2.6, 4.2] # TESTING + return persistenceTimeFrame + + +def indicatorMain(listOfStocks): + print('\n' + str(Stock.indicator) + '\n') listOfStocksIndicatorValues = [] for i in range(0, len(listOfStocks), 1): - indicatorValueFound = False - while indicatorValueFound == False: - if Stock.indicator == 'Expense Ratio': - indicatorValue = str( - input(Stock.indicator + ' for ' + listOfStocks[i].name + ' (%): ')) - elif Stock.indicator == 'Persistence': - indicatorValue = str( - input(Stock.indicator + ' for ' + listOfStocks[i].name + ' (years): ')) - elif Stock.indicator == 'Turnover': - indicatorValue = str(input( - Stock.indicator + ' for ' + listOfStocks[i].name + ' in the last ' + str(Stock.timeFrame) + ' years: ')) - elif Stock.indicator == 'Market Capitalization': - indicatorValue = str( - input(Stock.indicator + ' of ' + listOfStocks[i].name + ': ')) - else: - print('Something is wrong. Indicator was not found. Ending program.') - exit() + print(listOfStocks[i].name) + if Stock.indicator != 'Persistence': + listOfStocks[i].indicatorValue = Stock.scrapeYahooFinance( + listOfStocks[i]) + else: + listOfStocks[i].indicatorValue = Stock.calcPersistence( + listOfStocks[i]) + print('') - if Functions.strintIsFloat(indicatorValue) == True: - listOfStocks[i].indicatorValue = float(indicatorValue) - indicatorValueFound = True - else: - print('Please enter a number') + if listOfStocks[i].indicatorValue == 'Not available': + listOfStocks[i].indicatorValue = Stock.indicatorManual( + listOfStocks[i]) - # listOfStocks[i].indicatorValue = indicatorValuesGenericExpenseRatio[i] # TESTING listOfStocksIndicatorValues.append(listOfStocks[i].indicatorValue) - listOfReturns = [] # A list that matches the above list with return values [[averageAnnualReturn1, aAR2, aAR3], [sharpe1, sharpe2, sharpe3], etc.] + listOfReturns = [] # A list that matches the above list with return values [[averageMonthlyReturn1, aAR2, aAR3], [sharpe1, sharpe2, sharpe3], etc.] tempListOfReturns = [] for i in range(0, len(listOfStocks), 1): - tempListOfReturns.append(listOfStocks[i].averageAnnualReturn) + tempListOfReturns.append(listOfStocks[i].averageMonthlyReturn) listOfReturns.append(tempListOfReturns) tempListOfReturns = [] for i in range(0, len(listOfStocks), 1): @@ -974,9 +1315,8 @@ def indicatorMain(listOfStocks): Stock.indicatorCorrelation = calcIndicatorCorrelation( listOfIndicatorValues, listOfReturns) - listOfReturnStrings = ['Average Annual Return', + listOfReturnStrings = ['Average Monthly Return', 'Sharpe Ratio', 'Sortino Ratio', 'Treynor Ratio', 'Alpha'] - print('\n', end='') for i in range(0, len(Stock.indicatorCorrelation), 1): print('Correlation with ' + Stock.indicator.lower() + ' and ' + listOfReturnStrings[i].lower() + ': ' + str(Stock.indicatorCorrelation[i])) @@ -992,23 +1332,29 @@ def indicatorMain(listOfStocks): def main(): - # Test internet connection - internetConnection = isConnected() - if not internetConnection: - return - # Check that all required packages are installed - packagesInstalled = checkPackages() + packagesInstalled = Functions.checkPackages( + ['numpy', 'requests', 'bs4', 'requests_cache']) if not packagesInstalled: - return + exit() else: print('All required packages are installed') # Check python version is above 3.3 - pythonVersionGood = checkPythonVersion() + pythonVersionGood = Functions.checkPythonVersion() if not pythonVersionGood: return + # Test internet connection + + internetConnection = Functions.isConnected() + if not internetConnection: + return + else: + Functions.getJoke() + + # Functions.getJoke() + # Choose benchmark and makes it class Stock benchmark = benchmarkInit() # Add it to a list to work with other functions @@ -1017,10 +1363,16 @@ def main(): # Asks for stock(s) ticker and makes them class Stock listOfStocks = stocksInit() - # Determine time frame [Years, Months] + # Determine time frame (Years) timeFrame = timeFrameInit() Stock.timeFrame = timeFrame # Needs to be a global variable for all stocks + # Choose indicator + Stock.indicator = indicatorInit() + # Choose time frame for initial persistence + if Stock.indicator == 'Persistence': + Stock.persTimeFrame = persistenceTimeFrame() + # Send async request to AV for listOfStocks and benchmark asyncData(benchmark, listOfStocks) diff --git a/requirements.txt b/requirements.txt index 232d04a..d4dbb6f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ requests~=2.21.0 numpy~=1.15.4 +beautifulsoup4~=4.7.1 requests-cache~=0.4.13 # NOT REQUIRED \ No newline at end of file From 5d1f96c403ecd3dabb5898bc30b3d04f12a2e3fd Mon Sep 17 00:00:00 2001 From: Andrew Dinh Date: Mon, 18 Mar 2019 10:26:07 -0700 Subject: [PATCH 6/6] General fixes Added color, config file, moved packages into ./modules --- .gitignore | 2 +- Functions.py | 61 ++- README.md | 40 +- config.example.json | 63 +++ main.py | 623 ++++++++++++++++++-------- modules/termcolor.py | 168 +++++++ modules/yahoofinancials.py | 891 +++++++++++++++++++++++++++++++++++++ requirements.txt | 4 +- stocks.txt | 10 + 9 files changed, 1657 insertions(+), 205 deletions(-) create mode 100644 config.example.json create mode 100644 modules/termcolor.py create mode 100644 modules/yahoofinancials.py create mode 100644 stocks.txt diff --git a/.gitignore b/.gitignore index 25929cd..c8109c5 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,4 @@ test/ .vscode/ *.sqlite README.html -*stocks.txt \ No newline at end of file +*-stocks.txt \ No newline at end of file diff --git a/Functions.py b/Functions.py index 4c311c1..b6bf33a 100644 --- a/Functions.py +++ b/Functions.py @@ -1,5 +1,8 @@ # Python file for general functions +import sys +sys.path.insert(0, './modules') + def getNearest(items, pivot): return min(items, key=lambda x: abs(x - pivot)) @@ -52,14 +55,18 @@ def strintIsFloat(s): def fromCache(r): import requests_cache + from termcolor import colored, cprint if r.from_cache == True: - print('(Response taken from cache)') + cprint('(Response taken from cache)', 'white', attrs=['dark']) return def getJoke(): import requests + import sys + from termcolor import colored, cprint import requests_cache + from halo import Halo with requests_cache.disabled(): ''' f = requests.get('https://official-joke-api.appspot.com/jokes/random').json() @@ -69,9 +76,13 @@ def getJoke(): ''' headers = {'Accept': 'application/json', 'User-Agent': 'fund-indicators (https://github.com/andrewkdinh/fund-indicators)'} - f = requests.get('https://icanhazdadjoke.com/', headers=headers).json() + url = 'https://icanhazdadjoke.com' + + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get('https://icanhazdadjoke.com/', headers=headers).json() print('') - print(f['joke']) + print(colored(f['joke'], 'green')) def hasNumbers(inputString): @@ -127,6 +138,50 @@ def fileExists(file): import os.path return os.path.exists(file) +def listIndexExists(i): + try: + i + return True + except IndexError: + return False + +def removeOutliers(i): + import statistics + m = statistics.median(i) + firstQ = [] + thirdQ = [] + for x in i: + if x < m: + firstQ.append(x) + elif x > m: + thirdQ.append(x) + firstQm = statistics.median(firstQ) + thirdQm = statistics.median(thirdQ) + iqr = (thirdQm - firstQm) * 1.5 + + goodList = [] + badList = [] + for x in i: + if x < (thirdQm + iqr) and x > (firstQm - iqr): + goodList.append(x) + else: + badList.append(x) # In case I want to know. If not, then I just make it equal to returnlist[0] + returnList = [goodList, badList, firstQm, m, thirdQm, iqr] + return returnList + +def validateJson(text): + import json + try: + json.loads(text) + return True + except ValueError: + return False + +def keyInDict(dict, key): + if key in dict: + return True + else: + return False def main(): exit() diff --git a/README.md b/README.md index d33a263..9f01dad 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,45 @@ -# Mutual Fund Indicators +# fund-indicators [![License](https://img.shields.io/github/license/andrewkdinh/fund-indicators.svg)](https://raw.githubusercontent.com/andrewkdinh/fund-indicators/master/LICENSE) -![](https://img.shields.io/github/last-commit/andrewkdinh/fund-indicators.svg) +[![](https://img.shields.io/github/last-commit/andrewkdinh/fund-indicators.svg)](https://github.com/andrewkdinh/fund-indicators/commits/master) ![](https://img.shields.io/github/languages/top/andrewkdinh/fund-indicators.svg) ![](https://img.shields.io/github/languages/code-size/andrewkdinh/fund-indicators.svg) -A project to determine indicators of overperforming mutual funds. +A project to determine relationships between mutual funds and different factors. -Examine correlation between performance and market capitalization, persistence, turnover, and expense ratios. +Calculates relationships between: Previous performance, Alpha, Sharpe Ratio, Sortino Ratio -## Prerequisites +and Expense ratios, Turnover, Market Capitalization (Asset Size), Persistence -`$ pip install -r requirements.txt` +Give it a try at [repl.run](https://fund-indicators.andrewkdinh.repl.run) or [repl.it](https://repl.it/@andrewkdinh/fund-indicators) + +## Key Features + +- 100% automated +- Uses multiple API's in case another fails +- Caches http requests for future runs +- Scrapes data from Yahoo Finance +- Color-coded for easy viewing +- Optional graphs to easily visualize linear regression results +- A new joke every time it runs ## Quickstart -To begin, run +```shell +pip install -r requirements.txt +python main.py +``` -`$ python main.py` +Pre-chosen stocks listed in `stocks.txt` -Some ticker values to try: -SPY, VFINX, VTHR, DJIA +## Credits + +This project uses a wide variety of open-source projects + +- [NumPy](https://github.com/numpy/numpy), [Termcolor](https://github.com/hfeeki/termcolor), [Beautiful Soup](https://launchpad.net/beautifulsoup), [yahoofinancials](https://github.com/JECSand/yahoofinancials), [requests-cache](https://github.com/reclosedev/requests-cache), [halo](https://github.com/manrajgrover/halo) + +And thank you to those that have helped me with the idea and product: + +- Amber Bruce, [Alex Stoykov](http://stoykov.us/), Doug Achterman, [Stack Overflow](https://stackoverflow.com) Created by Andrew Dinh from Dr. TJ Owens Gilroy Early College Academy diff --git a/config.example.json b/config.example.json new file mode 100644 index 0000000..f90a8e5 --- /dev/null +++ b/config.example.json @@ -0,0 +1,63 @@ +{ + "_comment": "Only use this if everything you know is correct", + "Config": { + "Check Packages": true, + "Check Python Version": true, + "Check Internet Connection": false, + "Get Joke": true, + "Benchmark": "SPY", + "Method": "Kiplinger", + "Time Frame": 60, + "Indicator": "Expense Ratio", + "Remove Outliers": true, + "Sources": [ + "Alpha Vantage", + "Yahoo", + "IEX", + "Tiingo" + ] + }, + "Possible Values": { + "Check Packages": [ + true, + false + ], + "Check Python Version": [ + true, + false + ], + "Check Internet Connection": [ + true, + false + ], + "Get Joke": [ + true, + false + ], + "Benchmark": [ + "SPY", + "DJIA", + "VTHR", + "EFG" + ], + "Method": [ + "Read", + "Manual", + "U.S. News", + "Kiplinger", + "TheStreet" + ], + "Time Frame": "Any integer", + "Indicator": [ + "Expense Ratio", + "Market Capitalization", + "Turnover", + "Persistence" + ], + "Remove Outliers": [ + true, + false + ], + "Sources": "Choose an order out of ['Alpha Vantage', 'Yahoo', 'IEX', 'Tiingo']" + } +} diff --git a/main.py b/main.py index e1ddc2b..14957f7 100644 --- a/main.py +++ b/main.py @@ -3,25 +3,33 @@ # Andrew Dinh # Python 3.6.7 -# Required -from bs4 import BeautifulSoup -import requests -import json -import datetime +# PYTHON FILES import Functions -import numpy as np -import re +from yahoofinancials import YahooFinancials +from termcolor import cprint + +# REQUIRED +import requests_cache import os.path +import re +import datetime +import json +import requests +from bs4 import BeautifulSoup +import numpy as np -# Required for linear regression +# OPTIONAL import matplotlib.pyplot as plt -import sys +from halo import Halo -# Optional +# FOR ASYNC from concurrent.futures import ThreadPoolExecutor as PoolExecutor import time import random -import requests_cache + +import sys +sys.path.insert(0, './modules') + requests_cache.install_cache( 'cache', backend='sqlite', expire_after=43200) # 12 hours @@ -59,7 +67,6 @@ API Keys: No: Tiingo ''' - class Stock: # GLOBAL VARIABLES @@ -67,6 +74,11 @@ class Stock: riskFreeRate = 0 indicator = '' + # CONFIG + removeOutliers = True + sourceList = ['Alpha Vantage', 'Yahoo', 'IEX', 'Tiingo'] + config = 'N/A' + # BENCHMARK VALUES benchmarkDates = [] benchmarkCloseValues = [] @@ -100,6 +112,7 @@ class Stock: self.downsideDeviation = 0 self.kurtosis = 0 self.skewness = 0 # Not sure if I need this + self.correlation = 0 self.linearRegression = [] # for y=mx+b, this list has [m,b] self.indicatorValue = '' @@ -117,17 +130,17 @@ class Stock: return self.allCloseValues def IEX(self): - print('IEX') url = ''.join( ('https://api.iextrading.com/1.0/stock/', self.name, '/chart/5y')) # link = "https://api.iextrading.com/1.0/stock/spy/chart/5y" - print("\nSending request to:", url) - f = requests.get(url) + cprint("Get: " + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url) Functions.fromCache(f) json_data = f.text if json_data == 'Unknown symbol' or f.status_code != 200: print("IEX not available") - return 'Not available' + return 'N/A' loaded_json = json.loads(json_data) listIEX = [] @@ -141,7 +154,7 @@ class Stock: listIEX.append(allDates) print(len(listIEX[0]), "dates") - print("\nFinding close values for each date") + # print("\nFinding close values for each date") values = [] for i in range(0, len(loaded_json), 1): # If you want to do oldest first # for i in range(len(loaded_json)-1, -1, -1): @@ -149,33 +162,33 @@ class Stock: value = line['close'] values.append(value) listIEX.append(values) - print(len(listIEX[1]), "close values") + print(len(listIEX[0]), 'dates and', len(listIEX[1]), "close values") return listIEX def AV(self): - print('Alpha Vantage') listAV = [] url = ''.join(('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=', self.name, '&outputsize=full&apikey=', apiAV)) # https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=MSFT&outputsize=full&apikey=demo - print("\nSending request to:", url) - f = requests.get(url) + cprint("Get: " + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url) Functions.fromCache(f) json_data = f.text loaded_json = json.loads(json_data) if len(loaded_json) == 1 or f.status_code != 200 or len(loaded_json) == 0: print("Alpha Vantage not available") - return 'Not available' + return 'N/A' dailyTimeSeries = loaded_json['Time Series (Daily)'] listOfDates = list(dailyTimeSeries) # listAV.append(listOfDates) listAV.append(list(reversed(listOfDates))) - print("\nFinding close values for each date") + # print("\nFinding close values for each date") values = [] for i in range(0, len(listOfDates), 1): temp = listOfDates[i] @@ -185,25 +198,25 @@ class Stock: values.append(float(value)) # listAV.append(values) listAV.append(list(reversed(values))) - print(len(listAV[1]), "close values") + print(len(listAV[0]), 'dates and', len(listAV[1]), "close values") return listAV def Tiingo(self): - print('Tiingo') token = ''.join(('Token ', apiTiingo)) headers = { 'Content-Type': 'application/json', 'Authorization': token } url = ''.join(('https://api.tiingo.com/tiingo/daily/', self.name)) - print("\nSending request to:", url) - f = requests.get(url, headers=headers) + cprint("Get: " + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url, headers=headers) Functions.fromCache(f) loaded_json = f.json() if len(loaded_json) == 1 or f.status_code != 200 or loaded_json['startDate'] == None: print("Tiingo not available") - return 'Not available' + return 'N/A' listTiingo = [] @@ -218,8 +231,9 @@ class Stock: url2 = ''.join((url, '/prices?startDate=', firstDate, '&endDate=', lastDate)) # https://api.tiingo.com/tiingo/daily//prices?startDate=2012-1-1&endDate=2016-1-1 - print("\nSending request to:", url2, '\n') - requestResponse2 = requests.get(url2, headers=headers) + cprint("\nGet: " + url2 + '\n', 'white', attrs=['dark']) + with Halo(spinner='dots'): + requestResponse2 = requests.get(url2, headers=headers) Functions.fromCache(requestResponse2) loaded_json2 = requestResponse2.json() for i in range(0, len(loaded_json2)-1, 1): @@ -234,38 +248,86 @@ class Stock: listTiingo.append(dates) print(len(listTiingo[0]), "dates") - print("Finding close values for each date") + # print("Finding close values for each date") # Used loop from finding dates listTiingo.append(values) - print(len(listTiingo[1]), "close values") + print(len(listTiingo[0]), 'dates and', + len(listTiingo[1]), "close values") return listTiingo - def datesAndClose(self): - print('\n', Stock.getName(self), sep='') + def Yahoo(self): + url = ''.join(('https://finance.yahoo.com/quote/', + self.name, '?p=', self.name)) + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + t = requests.get(url) + if t.history: + print('Yahoo Finance does not have data for', self.name) + print('Yahoo not available') + return 'N/A' + else: + print('Yahoo Finance has data for', self.name) - sourceList = ['AV', 'IEX', 'Tiingo'] - # sourceList = ['IEX', 'Tiingo', 'AV'] + ticker = self.name + firstDate = datetime.datetime.now().date( + ) - datetime.timedelta(days=self.timeFrame*31) # 31 days as a buffer just in case + with Halo(spinner='dots'): + yahoo_financials = YahooFinancials(ticker) + r = yahoo_financials.get_historical_price_data( + str(firstDate), str(datetime.date.today()), 'daily') + + s = r[self.name]['prices'] + listOfDates = [] + listOfCloseValues = [] + for i in range(0, len(s), 1): + listOfDates.append(s[i]['formatted_date']) + listOfCloseValues.append(s[i]['close']) + listYahoo = [listOfDates, listOfCloseValues] + + # Sometimes close value is a None value + i = 0 + while i < len(listYahoo[1]): + if Functions.listIndexExists(listYahoo[1][i]) == True: + if listYahoo[1][i] == None: + del listYahoo[1][i] + del listYahoo[0][i] + i = i - 1 + i = i + 1 + else: + break + + print(len(listYahoo[0]), 'dates and', + len(listYahoo[1]), "close values") + return listYahoo + + def datesAndClose(self): + cprint('\n' + str(self.name), 'cyan') + + sourceList = Stock.sourceList # Use each source until you get a value for j in range(0, len(sourceList), 1): source = sourceList[j] - print('\nSource being used:', source) + print('Source being used:', source) - if source == 'AV': + if source == 'Alpha Vantage': datesAndCloseList = Stock.AV(self) - elif source == 'Tiingo': - datesAndCloseList = Stock.Tiingo(self) + elif source == 'Yahoo': + datesAndCloseList = Stock.Yahoo(self) elif source == 'IEX': datesAndCloseList = Stock.IEX(self) + elif source == 'Tiingo': + datesAndCloseList = Stock.Tiingo(self) - if datesAndCloseList != 'Not available': + if datesAndCloseList != 'N/A': break else: if j == len(sourceList)-1: print('\nNo sources have data for', self.name) - print('Removing', self.name, - 'from list of stocks to ensure compatibility later') - return 'Not available' + print('Removing ' + self.name + + ' from list of stocks to ensure compatibility later') + return 'N/A' + print('') # Convert dates to datetime allDates = datesAndCloseList[0] @@ -278,14 +340,14 @@ class Stock: for i in datesAndCloseList[1]: if i == 0: print('Found close value of 0. This is likely something like ticker RGN (Daily Time Series with Splits and Dividend Events)') - print('Removing', self.name, + print('Removing ' + self.name + 'from list of stocks to ensure compability later') - return 'Not available' + return 'N/A' return datesAndCloseList def datesAndCloseFitTimeFrame(self): - print('Shortening list to fit time frame') + print('\nShortening list to fit time frame') # Have to do this because if I just make dates = self.allDates & closeValues = self.allCloseValues, then deleting from dates & closeValues also deletes it from self.allDates & self.allCloseValues (I'm not sure why) dates = [] closeValues = [] @@ -295,7 +357,7 @@ class Stock: firstDate = datetime.datetime.now().date() - datetime.timedelta( days=self.timeFrame*30) - print('\n', self.timeFrame, ' months ago: ', firstDate, sep='') + print(self.timeFrame, ' months ago: ', firstDate, sep='') closestDate = Functions.getNearest(dates, firstDate) if closestDate != firstDate: print('Closest date available for', self.name, ':', closestDate) @@ -315,9 +377,7 @@ class Stock: datesAndCloseList2.append(dates) datesAndCloseList2.append(closeValues) - print(len(dates), 'dates') - print(len(closeValues), 'close values') - + print(len(dates), 'dates and', len(closeValues), 'close values') return datesAndCloseList2 def calcAverageMonthlyReturn(self): # pylint: disable=E0202 @@ -345,7 +405,7 @@ class Stock: if firstDate == secondDate: print('Closest date is', firstDate, 'which is after the given time frame.') - return 'Not available' + return 'N/A' # Get corresponding close values and calculate monthly return for i in range(0, len(self.dates), 1): @@ -499,34 +559,49 @@ class Stock: def scrapeYahooFinance(self): # Determine if ETF, Mutual fund, or stock - print('Determining if Yahoo Finance has data for', self.name, end=": ") url = ''.join(('https://finance.yahoo.com/quote/', self.name, '?p=', self.name)) - if requests.get(url).history: - print('No') - return 'Not available' + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + t = requests.get(url) + Functions.fromCache(t) + if t.history: + print('Yahoo Finance does not have data for', self.name) + return 'N/A' else: - print('Yes') + print('Yahoo Finance has data for', self.name) stockType = '' url2 = ''.join(('https://finance.yahoo.com/lookup?s=', self.name)) - print('Sending request to:', url2) - raw_html = requests.get(url2).text + cprint('Get: ' + url2, 'white', attrs=['dark']) + with Halo(spinner='dots'): + x = requests.get(url2) + raw_html = x.text + Functions.fromCache(x) soup2 = BeautifulSoup(raw_html, 'html.parser') # Type (Stock, ETF, Mutual Fund) r = soup2.find_all( 'td', attrs={'class': 'data-col4 Ta(start) Pstart(20px) Miw(30px)'}) - t = soup2.find_all('a', attrs={'class': 'Fw(b)'}) # Name and class + u = soup2.find_all('a', attrs={'class': 'Fw(b)'}) # Name and class z = soup2.find_all('td', attrs={ 'class': 'data-col1 Ta(start) Pstart(10px) Miw(80px)'}) # Name of stock listNames = [] - for i in t: + for i in u: + if i.text.strip() == i.text.strip().upper(): + listNames.append(i.text.strip()) + ''' if len(i.text.strip()) < 6: listNames.append(i.text.strip()) + elif '.' in i.text.strip(): + listNames.append(i.text.strip()) # Example: TSNAX (TSN.AX) + #! If having problems later, separate them by Industries (Mutual funds and ETF's are always N/A) + ''' + for i in range(0, len(listNames), 1): if listNames[i] == self.name: break + r = r[i].text.strip() z = z[i].text.strip() print('Name:', z) @@ -536,36 +611,32 @@ class Stock: elif r == 'Stocks': stockType = 'Stock' elif r == 'Mutual Fund': - stockType = 'Fund' + stockType = 'Mutual Fund' else: print('Could not determine fund type') - return 'Not available' + return 'N/A' print('Type:', stockType) if Stock.indicator == 'Expense Ratio': if stockType == 'Stock': print( self.name, 'is a stock, and therefore does not have an expense ratio') - return 'Not available' + return 'Stock' - url = ''.join(('https://finance.yahoo.com/quote/', - self.name, '?p=', self.name)) - # https://finance.yahoo.com/quote/SPY?p=SPY - print('Sending request to:', url) - raw_html = requests.get(url).text + raw_html = t.text soup = BeautifulSoup(raw_html, 'html.parser') r = soup.find_all('span', attrs={'class': 'Trsdu(0.3s)'}) if r == []: print('Something went wrong with scraping expense ratio') - return('Not available') + return('N/A') if stockType == 'ETF': for i in range(len(r)-1, 0, -1): s = r[i].text.strip() if s[-1] == '%': break - elif stockType == 'Fund': + elif stockType == 'Mutual Fund': count = 0 # Second in set for i in range(0, len(r)-1, 1): s = r[i].text.strip() @@ -578,64 +649,78 @@ class Stock: expenseRatio = float(s.replace('%', '')) else: print('Something went wrong with scraping expense ratio') - return 'Not available' + return 'N/A' + print(Stock.indicator + ': ', end='') print(str(expenseRatio) + '%') return expenseRatio elif Stock.indicator == 'Market Capitalization': - url = ''.join(('https://finance.yahoo.com/quote/', - self.name, '?p=', self.name)) - # https://finance.yahoo.com/quote/GOOGL?p=GOOGL - raw_html = requests.get(url).text + somethingWrong = False + raw_html = t.text soup = BeautifulSoup(raw_html, 'html.parser') r = soup.find_all( 'span', attrs={'class': 'Trsdu(0.3s)'}) if r == []: - print('Something went wrong with scraping market capitalization') - return 'Not available' - marketCap = 0 - for t in r: - s = t.text.strip() - if s[-1] == 'B': - print(s, end='') - s = s.replace('B', '') - marketCap = float(s) * 1000000000 # 1 billion - break - elif s[-1] == 'M': - print(s, end='') - s = s.replace('M', '') - marketCap = float(s) * 1000000 # 1 million - break - elif s[-1] == 'K': - print(s, end='') - s = s.replace('K', '') - marketCap = float(s) * 1000 # 1 thousand - break - if marketCap == 0: - print('\nSomething went wrong with scraping market capitalization') - return 'Not available' - marketCap = int(marketCap) + somethingWrong = True + else: + marketCap = 0 + for t in r: + s = t.text.strip() + if s[-1] == 'B': + print(Stock.indicator + ': ', end='') + print(s, end='') + s = s.replace('B', '') + marketCap = float(s) * 1000000000 # 1 billion + break + elif s[-1] == 'M': + print(Stock.indicator + ': ', end='') + print(s, end='') + s = s.replace('M', '') + marketCap = float(s) * 1000000 # 1 million + break + elif s[-1] == 'K': + print(Stock.indicator + ': ', end='') + print(s, end='') + s = s.replace('K', '') + marketCap = float(s) * 1000 # 1 thousand + break + if marketCap == 0: + somethingWrong = True + if somethingWrong == True: + ticker = self.name + yahoo_financials = YahooFinancials(ticker) + marketCap = yahoo_financials.get_market_cap() + if marketCap != None: + print('(Taken from yahoofinancials)') + print(marketCap) + return int(marketCap) + else: + print( + 'Was not able to scrape or get market capitalization from yahoo finance') + return 'N/A' + marketCap = int(marketCap) + return marketCap + print(' =', marketCap) + marketCap = marketCap / 1000000 + print( + 'Dividing marketCap by 1 million (to work with linear regression module):', marketCap) return marketCap elif Stock.indicator == 'Turnover': if stockType == 'Stock': print(self.name, 'is a stock, and therefore does not have turnover') - return 'Not available' + return 'Stock' - if stockType == 'Fund': - url = ''.join(('https://finance.yahoo.com/quote/', - self.name, '?p=', self.name)) - # https://finance.yahoo.com/quote/SPY?p=SPY - print('Sending request to', url) - raw_html = requests.get(url).text + if stockType == 'Mutual Fund': + raw_html = t.text soup = BeautifulSoup(raw_html, 'html.parser') r = soup.find_all( 'span', attrs={'class': 'Trsdu(0.3s)'}) if r == []: print('Something went wrong without scraping turnover') - return 'Not available' + return 'N/A' turnover = 0 for i in range(len(r)-1, 0, -1): s = r[i].text.strip() @@ -646,25 +731,30 @@ class Stock: url = ''.join(('https://finance.yahoo.com/quote/', self.name, '/profile?p=', self.name)) # https://finance.yahoo.com/quote/SPY/profile?p=SPY - print('Sending request to', url) - raw_html = requests.get(url).text + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + raw_html = requests.get(url).text soup = BeautifulSoup(raw_html, 'html.parser') r = soup.find_all( 'span', attrs={'class': 'W(20%) D(b) Fl(start) Ta(e)'}) if r == []: print('Something went wrong without scraping turnover') - return 'Not available' + return 'N/A' turnover = 0 for i in range(len(r)-1, 0, -1): s = r[i].text.strip() if s[-1] == '%': turnover = float(s.replace('%', '')) break + elif s == 'N/A': + print(self.name, 'has a value of N/A for turnover') + return 'N/A' if turnover == 0: print('Something went wrong with scraping turnover') - return 'Not available' + return 'N/A' + print(Stock.indicator + ': ', end='') print(str(turnover) + '%') return turnover @@ -684,7 +774,9 @@ class Stock: indicatorValue = str( input(Stock.indicator + ' of ' + self.name + ': ')) else: - print('Something is wrong. Indicator was not found. Ending program.') + # print('Something is wrong. Indicator was not found. Ending program.') + cprint( + 'Something is wrong. Indicator was not found. Ending program.', 'white', 'on_red') exit() if Functions.strintIsFloat(indicatorValue) == True: @@ -698,7 +790,7 @@ class Stock: 0, Stock.persTimeFrame, 1))) / Stock.persTimeFrame persistenceSecond = self.averageMonthlyReturn persistence = persistenceSecond-persistenceFirst - print('Change in average monthly return:', persistence) + print('Change (difference) in average monthly return:', persistence) return persistence @@ -765,24 +857,33 @@ def stocksInit(): method = 0 methods = ['Read from a file', 'Enter manually', 'U.S. News popular funds (~35)', 'Kiplinger top-performing funds (50)', 'TheStreet top-rated mutual funds (20)'] - for i in range(0, len(methods), 1): - print(str(i+1) + '. ' + methods[i]) - while method == 0 or method > len(methods): - method = str(input('Which method? ')) - if Functions.stringIsInt(method) == True: - method = int(method) - if method == 0 or method > len(methods): - print('Please choose a valid method') - else: - method = 0 - print('Please choose a number') - print('') + if Stock.config != 'N/A': + methodsConfig = ['Read', 'Manual', + 'U.S. News', 'Kiplinger', 'TheStreet'] + for i in range(0, len(methodsConfig), 1): + if Stock.config['Method'] == methodsConfig[i]: + method = i + 1 + + else: + for i in range(0, len(methods), 1): + print(str(i+1) + '. ' + methods[i]) + while method == 0 or method > len(methods): + method = str(input('Which method? ')) + if Functions.stringIsInt(method) == True: + method = int(method) + if method == 0 or method > len(methods): + print('Please choose a valid method') + else: + method = 0 + print('Please choose a number') + + print('') if method == 1: defaultFiles = ['.gitignore', 'LICENSE', 'main.py', 'Functions.py', - 'README.md', 'requirements.txt', 'cache.sqlite', '_test_runner.py'] # Added by repl.it for whatever reason + 'README.md', 'requirements.txt', 'cache.sqlite', 'yahoofinancials.py', 'termcolor.py', 'README.html', 'config.json', '_test_runner.py'] # Added by repl.it for whatever reason stocksFound = False - print('Files in current directory (not including default files): ') + print('\nFiles in current directory (not including default files): ') listOfFilesTemp = [f for f in os.listdir() if os.path.isfile(f)] listOfFiles = [] for files in listOfFilesTemp: @@ -851,8 +952,9 @@ def stocksInit(): url = 'https://money.usnews.com/funds/mutual-funds/most-popular' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'} - print('Sending request to', url) - f = requests.get(url, headers=headers) + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url, headers=headers) Functions.fromCache(f) raw_html = f.text soup = BeautifulSoup(raw_html, 'html.parser') @@ -878,8 +980,9 @@ def stocksInit(): url = 'https://www.kiplinger.com/tool/investing/T041-S001-top-performing-mutual-funds/index.php' headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'} - print('Sending request to', url) - f = requests.get(url, headers=headers) + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url, headers=headers) Functions.fromCache(f) raw_html = f.text soup = BeautifulSoup(raw_html, 'html.parser') @@ -904,8 +1007,9 @@ def stocksInit(): url = 'https://www.thestreet.com/topic/21421/top-rated-mutual-funds.html' headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'} - print('Sending request to', url) - f = requests.get(url, headers=headers) + cprint('Get: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url, headers=headers) Functions.fromCache(f) raw_html = f.text soup = BeautifulSoup(raw_html, 'html.parser') @@ -977,7 +1081,7 @@ def asyncData(benchmark, listOfStocks): def sendAsync(url): time.sleep(random.randrange(0, 2)) - print('Sending request to', url) + cprint('Get: ' + url, 'white', attrs=['dark']) requests.get(url) return @@ -990,7 +1094,7 @@ def timeFrameInit(): temp = input(' ') isInteger = Functions.stringIsInt(temp) if isInteger == True: - if int(temp) > 1: + if int(temp) > 1 and int(temp) < 1000: months = int(temp) else: print('Please enter a number greater than 1') @@ -1003,15 +1107,15 @@ def timeFrameInit(): def dataMain(listOfStocks): - print('\nGathering dates and close values') i = 0 while i < len(listOfStocks): datesAndCloseList = Stock.datesAndClose(listOfStocks[i]) - if datesAndCloseList == 'Not available': + if datesAndCloseList == 'N/A': del listOfStocks[i] if len(listOfStocks) == 0: - print('No stocks to analyze. Ending program') + # print('No stocks to analyze. Ending program') + cprint('No stocks to analyze. Ending program', 'white', 'on_red') exit() else: listOfStocks[i].allDates = datesAndCloseList[0] @@ -1032,8 +1136,9 @@ def riskFreeRate(): ('https://www.quandl.com/api/v3/datasets/USTREASURY/LONGTERMRATES.json?api_key=', apiQuandl)) # https://www.quandl.com/api/v3/datasets/USTREASURY/LONGTERMRATES.json?api_key=KUh3U3hxke9tCimjhWEF - print("\nSending request to:", url) - f = requests.get(url) + cprint('\nGet: ' + url, 'white', attrs=['dark']) + with Halo(spinner='dots'): + f = requests.get(url) Functions.fromCache(f) json_data = f.text loaded_json = json.loads(json_data) @@ -1043,7 +1148,7 @@ def riskFreeRate(): print('Risk-free rate:', riskFreeRate, end='\n\n') if f.status_code != 200: - print("Quandl not available") + print('Quandl not available') print('Returning 2.50 as risk-free rate', end='\n\n') # return 0.0250 return 2.50 @@ -1052,13 +1157,14 @@ def riskFreeRate(): def returnMain(benchmark, listOfStocks): - print('\nCalculating unadjusted return, Sharpe ratio, Sortino ratio, and Treynor ratio\n') + cprint('\nCalculating return statistics\n', 'white', attrs=['underline']) print('Getting risk-free rate from current 10-year treasury bill rates', end='\n\n') Stock.riskFreeRate = riskFreeRate() - print(benchmark.name, end='\n\n') + cprint(benchmark.name, 'cyan') benchmark.monthlyReturn = Stock.calcMonthlyReturn(benchmark) - if benchmark.monthlyReturn == 'Not available': - print('Please use a lower time frame\nEnding program') + if benchmark.monthlyReturn == 'N/A': + # print('Please use a lower time frame\nEnding program') + cprint('Please use a lower time frame. Ending program', 'white', 'on_red') exit() benchmark.averageMonthlyReturn = Stock.calcAverageMonthlyReturn(benchmark) benchmark.standardDeviation = Stock.calcStandardDeviation(benchmark) @@ -1071,7 +1177,7 @@ def returnMain(benchmark, listOfStocks): i = 0 while i < len(listOfStocks): - print('\n' + listOfStocks[i].name, end='\n\n') + cprint('\n' + listOfStocks[i].name, 'cyan') # Make sure each date has a value for both the benchmark and the stock list1 = [] @@ -1088,11 +1194,13 @@ def returnMain(benchmark, listOfStocks): # Calculate everything for each stock listOfStocks[i].monthlyReturn = Stock.calcMonthlyReturn( listOfStocks[i]) - if listOfStocks[i].monthlyReturn == 'Not available': - print('Removing', listOfStocks[i].name, 'from list of stocks') + if listOfStocks[i].monthlyReturn == 'N/A': + print('Removing ' + listOfStocks[i].name + ' from list of stocks') del listOfStocks[i] if len(listOfStocks) == 0: print('No stocks fit time frame. Ending program') + cprint('No stocks fit time frame. Ending program', + 'white', 'on_red') exit() else: listOfStocks[i].averageMonthlyReturn = Stock.calcAverageMonthlyReturn( @@ -1117,13 +1225,36 @@ def returnMain(benchmark, listOfStocks): i += 1 - print('\nNumber of stocks from original list that fit time frame:', - len(listOfStocks)) + cprint('\nNumber of stocks from original list that fit time frame: ' + + str(len(listOfStocks)), 'green') if len(listOfStocks) < 2: - print('Cannot proceed to the next step. Exiting program.') + #print('Cannot proceed to the next step. Exiting program.') + cprint('Cannot proceed to the next step. Exiting program.', + 'white', 'on_red') exit() +def outlierChoice(): + print('\nWould you like to remove indicator outliers?') + print('1. Yes\n2. No') + found = False + while found == False: + outlierChoice = str(input('Choice: ')) + if Functions.stringIsInt(outlierChoice): + if int(outlierChoice) == 1: + return True + elif int(outlierChoice) == 2: + return False + else: + print('Please enter 1 or 2') + elif outlierChoice.lower() == 'yes': + return True + elif outlierChoice.lower() == 'no': + return False + else: + print('Not valid. Please enter a number or yes or no.') + + def indicatorInit(): # Runs correlation or regression study indicatorFound = False @@ -1220,6 +1351,8 @@ def plot_regression_line(x, y, b, i): plt.xlabel(Stock.indicator + ' (%)') elif Stock.indicator == 'Persistence': plt.xlabel(Stock.indicator + ' (Difference in average monthly return)') + elif Stock.indicator == 'Market Capitalization': + plt.xlabel(Stock.indicator + ' (millions)') else: plt.xlabel(Stock.indicator) @@ -1266,25 +1399,61 @@ def persistenceTimeFrame(): def indicatorMain(listOfStocks): - print('\n' + str(Stock.indicator) + '\n') + cprint('\n' + str(Stock.indicator) + '\n', 'white', attrs=['underline']) listOfStocksIndicatorValues = [] for i in range(0, len(listOfStocks), 1): - print(listOfStocks[i].name) - if Stock.indicator != 'Persistence': - listOfStocks[i].indicatorValue = Stock.scrapeYahooFinance( + cprint(listOfStocks[i].name, 'cyan') + if Stock.indicator == 'Persistence': + listOfStocks[i].indicatorValue = Stock.calcPersistence( listOfStocks[i]) else: - listOfStocks[i].indicatorValue = Stock.calcPersistence( + listOfStocks[i].indicatorValue = Stock.scrapeYahooFinance( listOfStocks[i]) print('') - if listOfStocks[i].indicatorValue == 'Not available': + if listOfStocks[i].indicatorValue == 'N/A': listOfStocks[i].indicatorValue = Stock.indicatorManual( listOfStocks[i]) + elif listOfStocks[i].indicatorValue == 'Stock': + print('Removing ' + listOfStocks[i].name + ' from list of stocks') + del listOfStocks[i] + if len(listOfStocks) < 2: + # print('Not able to go to the next step. Ending program') + cprint('Not able to go to the next step. Ending program', + 'white', 'on_red') + exit() listOfStocksIndicatorValues.append(listOfStocks[i].indicatorValue) + # Remove outliers + if Stock.removeOutliers == True: + cprint('\nRemoving outliers\n', 'white', attrs=['underline']) + temp = Functions.removeOutliers(listOfStocksIndicatorValues) + if temp[0] == listOfStocksIndicatorValues: + print('No outliers\n') + else: + print('First quartile:', temp[2], ', Median:', temp[3], + ', Third quartile:', temp[4], 'Interquartile range:', temp[5]) + # print('Original list:', listOfStocksIndicatorValues) + listOfStocksIndicatorValues = temp[0] + i = 0 + while i < len(listOfStocks)-1: + for j in temp[1]: + if listOfStocks[i].indicatorValue == j: + print('Removing', listOfStocks[i].name, 'because it has a', + Stock.indicator.lower(), 'value of', listOfStocks[i].indicatorValue) + del listOfStocks[i] + i = i - 1 + break + i += 1 + # print('New list:', listOfStocksIndicatorValues, '\n') + print('') + + # Calculate data + cprint('Calculating correlation and linear regression\n', + 'white', attrs=['underline']) + listOfReturns = [] # A list that matches the above list with return values [[averageMonthlyReturn1, aAR2, aAR3], [sharpe1, sharpe2, sharpe3], etc.] tempListOfReturns = [] for i in range(0, len(listOfStocks), 1): @@ -1318,7 +1487,7 @@ def indicatorMain(listOfStocks): listOfReturnStrings = ['Average Monthly Return', 'Sharpe Ratio', 'Sortino Ratio', 'Treynor Ratio', 'Alpha'] for i in range(0, len(Stock.indicatorCorrelation), 1): - print('Correlation with ' + Stock.indicator.lower() + ' and ' + + print('Correlation for ' + Stock.indicator.lower() + ' and ' + listOfReturnStrings[i].lower() + ': ' + str(Stock.indicatorCorrelation[i])) Stock.indicatorRegression = calcIndicatorRegression( @@ -1331,52 +1500,125 @@ def indicatorMain(listOfStocks): listOfReturnStrings[i].lower() + ': ' + formula) +def checkConfig(fileName): + if Functions.fileExists(fileName) == False: + return 'N/A' + file = open(fileName, 'r') + n = file.read() + file.close() + if Functions.validateJson(n) == False: + print('Config file is not valid') + return 'N/A' + t = json.loads(n) + r = t['Config'] + return r + + def main(): + # Check config file for errors and if not, then use values + #! Only use this if you know it is exactly correct. I haven't spent much time debugging this + Stock.config = checkConfig('config.json') + # Check that all required packages are installed - packagesInstalled = Functions.checkPackages( - ['numpy', 'requests', 'bs4', 'requests_cache']) - if not packagesInstalled: - exit() + if Stock.config == 'N/A': + packagesInstalled = Functions.checkPackages( + ['numpy', 'requests', 'bs4', 'requests_cache', 'halo']) + if not packagesInstalled: + exit() + else: + print('All required packages are installed') + + # Check python version is above 3.3 + pythonVersionGood = Functions.checkPythonVersion() + if not pythonVersionGood: + exit() + + # Test internet connection + internetConnection = Functions.isConnected() + if not internetConnection: + exit() + else: + Functions.getJoke() + + # Choose benchmark and makes it class Stock + benchmark = benchmarkInit() + # Add it to a list to work with other functions + benchmarkAsList = [benchmark] + + # Asks for stock(s) ticker and makes them class Stock + listOfStocks = stocksInit() + + # Determine time frame (Years) + timeFrame = timeFrameInit() + Stock.timeFrame = timeFrame # Needs to be a global variable for all stocks + + # Choose indicator + Stock.indicator = indicatorInit() + # Choose time frame for initial persistence + if Stock.indicator == 'Persistence': + Stock.persTimeFrame = persistenceTimeFrame() + + # Choose whether to remove outliers or not + Stock.removeOutliers = outlierChoice() else: - print('All required packages are installed') + if Stock.config['Check Packages'] != False: + packagesInstalled = Functions.checkPackages( + ['numpy', 'requests', 'bs4', 'requests_cache', 'halo']) + if not packagesInstalled: + exit() + else: + print('All required packages are installed') - # Check python version is above 3.3 - pythonVersionGood = Functions.checkPythonVersion() - if not pythonVersionGood: - return + if Stock.config['Check Python Version'] != False: + pythonVersionGood = Functions.checkPythonVersion() + if not pythonVersionGood: + exit() - # Test internet connection + if Stock.config['Check Internet Connection'] != False: + internetConnection = Functions.isConnected() + if not internetConnection: + exit() + if Stock.config['Get Joke'] != False: + Functions.getJoke() - internetConnection = Functions.isConnected() - if not internetConnection: - return - else: - Functions.getJoke() + benchmarksTicker = ['SPY', 'DJIA', 'VTHR', 'EFT'] + if Stock.config['Benchmark'] in benchmarksTicker: + benchmark = Stock() + benchmark.setName(str(Stock.config['Benchmark'])) + benchmarkAsList = [benchmark] + else: + benchmark = benchmarkInit() + benchmarkAsList = [benchmark] - # Functions.getJoke() + listOfStocks = stocksInit() - # Choose benchmark and makes it class Stock - benchmark = benchmarkInit() - # Add it to a list to work with other functions - benchmarkAsList = [benchmark] + if int(Stock.config['Time Frame']) >= 2: + timeFrame = int(Stock.config['Time Frame']) + else: + timeFrame = timeFrameInit() + Stock.timeFrame = timeFrame # Needs to be a global variable for all stocks - # Asks for stock(s) ticker and makes them class Stock - listOfStocks = stocksInit() + indicators = ['Expense Ratio', + 'Market Capitalization', 'Turnover', 'Persistence'] + if Stock.config['Indicator'] in indicators: + Stock.indicator = Stock.config['Indicator'] + else: + Stock.indicator = indicatorInit() - # Determine time frame (Years) - timeFrame = timeFrameInit() - Stock.timeFrame = timeFrame # Needs to be a global variable for all stocks + if Stock.indicator == 'Persistence': + Stock.persTimeFrame = persistenceTimeFrame() - # Choose indicator - Stock.indicator = indicatorInit() - # Choose time frame for initial persistence - if Stock.indicator == 'Persistence': - Stock.persTimeFrame = persistenceTimeFrame() + # Choose whether to remove outliers or not + if Stock.config['Remove Outliers'] != False: + Stock.removeOutliers = True + else: + Stock.removeOutliers = outlierChoice() # Send async request to AV for listOfStocks and benchmark - asyncData(benchmark, listOfStocks) + # asyncData(benchmark, listOfStocks) # Gather data for benchmark and stock(s) + cprint('\nGathering data', 'white', attrs=['underline']) dataMain(benchmarkAsList) dataMain(listOfStocks) @@ -1386,6 +1628,7 @@ def main(): # Choose indicator and calculate correlation with indicator indicatorMain(listOfStocks) + print('') exit() diff --git a/modules/termcolor.py b/modules/termcolor.py new file mode 100644 index 0000000..f11b824 --- /dev/null +++ b/modules/termcolor.py @@ -0,0 +1,168 @@ +# coding: utf-8 +# Copyright (c) 2008-2011 Volvox Development Team +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# Author: Konstantin Lepa + +"""ANSII Color formatting for output in terminal.""" + +from __future__ import print_function +import os + + +__ALL__ = [ 'colored', 'cprint' ] + +VERSION = (1, 1, 0) + +ATTRIBUTES = dict( + list(zip([ + 'bold', + 'dark', + '', + 'underline', + 'blink', + '', + 'reverse', + 'concealed' + ], + list(range(1, 9)) + )) + ) +del ATTRIBUTES[''] + + +HIGHLIGHTS = dict( + list(zip([ + 'on_grey', + 'on_red', + 'on_green', + 'on_yellow', + 'on_blue', + 'on_magenta', + 'on_cyan', + 'on_white' + ], + list(range(40, 48)) + )) + ) + + +COLORS = dict( + list(zip([ + 'grey', + 'red', + 'green', + 'yellow', + 'blue', + 'magenta', + 'cyan', + 'white', + ], + list(range(30, 38)) + )) + ) + + +RESET = '\033[0m' + + +def colored(text, color=None, on_color=None, attrs=None): + """Colorize text. + + Available text colors: + red, green, yellow, blue, magenta, cyan, white. + + Available text highlights: + on_red, on_green, on_yellow, on_blue, on_magenta, on_cyan, on_white. + + Available attributes: + bold, dark, underline, blink, reverse, concealed. + + Example: + colored('Hello, World!', 'red', 'on_grey', ['blue', 'blink']) + colored('Hello, World!', 'green') + """ + if os.getenv('ANSI_COLORS_DISABLED') is None: + fmt_str = '\033[%dm%s' + if color is not None: + text = fmt_str % (COLORS[color], text) + + if on_color is not None: + text = fmt_str % (HIGHLIGHTS[on_color], text) + + if attrs is not None: + for attr in attrs: + text = fmt_str % (ATTRIBUTES[attr], text) + + text += RESET + return text + + +def cprint(text, color=None, on_color=None, attrs=None, **kwargs): + """Print colorize text. + + It accepts arguments of print function. + """ + + print((colored(text, color, on_color, attrs)), **kwargs) + + +if __name__ == '__main__': + print('Current terminal type: %s' % os.getenv('TERM')) + print('Test basic colors:') + cprint('Grey color', 'grey') + cprint('Red color', 'red') + cprint('Green color', 'green') + cprint('Yellow color', 'yellow') + cprint('Blue color', 'blue') + cprint('Magenta color', 'magenta') + cprint('Cyan color', 'cyan') + cprint('White color', 'white') + print(('-' * 78)) + + print('Test highlights:') + cprint('On grey color', on_color='on_grey') + cprint('On red color', on_color='on_red') + cprint('On green color', on_color='on_green') + cprint('On yellow color', on_color='on_yellow') + cprint('On blue color', on_color='on_blue') + cprint('On magenta color', on_color='on_magenta') + cprint('On cyan color', on_color='on_cyan') + cprint('On white color', color='grey', on_color='on_white') + print('-' * 78) + + print('Test attributes:') + cprint('Bold grey color', 'grey', attrs=['bold']) + cprint('Dark red color', 'red', attrs=['dark']) + cprint('Underline green color', 'green', attrs=['underline']) + cprint('Blink yellow color', 'yellow', attrs=['blink']) + cprint('Reversed blue color', 'blue', attrs=['reverse']) + cprint('Concealed Magenta color', 'magenta', attrs=['concealed']) + cprint('Bold underline reverse cyan color', 'cyan', + attrs=['bold', 'underline', 'reverse']) + cprint('Dark blink concealed white color', 'white', + attrs=['dark', 'blink', 'concealed']) + print(('-' * 78)) + + print('Test mixing:') + cprint('Underline red on grey color', 'red', 'on_grey', + ['underline']) + cprint('Reversed green on red color', 'green', 'on_red', ['reverse']) + diff --git a/modules/yahoofinancials.py b/modules/yahoofinancials.py new file mode 100644 index 0000000..9f477f1 --- /dev/null +++ b/modules/yahoofinancials.py @@ -0,0 +1,891 @@ +""" +============================== +The Yahoo Financials Module +Version: 1.5 +============================== + +Author: Connor Sanders +Email: sandersconnor1@gmail.com +Version Released: 01/27/2019 +Tested on Python 2.7, 3.3, 3.4, 3.5, 3.6, and 3.7 + +Copyright (c) 2019 Connor Sanders +MIT License + +List of Included Functions: + +1) get_financial_stmts(frequency, statement_type, reformat=True) + - frequency can be either 'annual' or 'quarterly'. + - statement_type can be 'income', 'balance', 'cash'. + - reformat optional value defaulted to true. Enter False for unprocessed raw data from Yahoo Finance. +2) get_stock_price_data(reformat=True) + - reformat optional value defaulted to true. Enter False for unprocessed raw data from Yahoo Finance. +3) get_stock_earnings_data(reformat=True) + - reformat optional value defaulted to true. Enter False for unprocessed raw data from Yahoo Finance. +4) get_summary_data(reformat=True) + - reformat optional value defaulted to true. Enter False for unprocessed raw data from Yahoo Finance. +5) get_stock_quote_type_data() +6) get_historical_price_data(start_date, end_date, time_interval) + - Gets historical price data for currencies, stocks, indexes, cryptocurrencies, and commodity futures. + - start_date should be entered in the 'YYYY-MM-DD' format. First day that financial data will be pulled. + - end_date should be entered in the 'YYYY-MM-DD' format. Last day that financial data will be pulled. + - time_interval can be either 'daily', 'weekly', or 'monthly'. Parameter determines the time period interval. + +Usage Examples: +from yahoofinancials import YahooFinancials +#tickers = 'AAPL' +#or +tickers = ['AAPL', 'WFC', 'F', 'JPY=X', 'XRP-USD', 'GC=F'] +yahoo_financials = YahooFinancials(tickers) +balance_sheet_data = yahoo_financials.get_financial_stmts('quarterly', 'balance') +earnings_data = yahoo_financials.get_stock_earnings_data() +historical_prices = yahoo_financials.get_historical_price_data('2015-01-15', '2017-10-15', 'weekly') +""" + +import sys +import calendar +import re +from json import loads +import time +from bs4 import BeautifulSoup +import datetime +import pytz +import random +try: + from urllib import FancyURLopener +except: + from urllib.request import FancyURLopener + + +# track the last get timestamp to add a minimum delay between gets - be nice! +_lastget = 0 + + +# Custom Exception class to handle custom error +class ManagedException(Exception): + pass + + +# Class used to open urls for financial data +class UrlOpener(FancyURLopener): + version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11' + + +# Class containing Yahoo Finance ETL Functionality +class YahooFinanceETL(object): + + def __init__(self, ticker): + self.ticker = ticker.upper() if isinstance(ticker, str) else [t.upper() for t in ticker] + self._cache = {} + + # Minimum interval between Yahoo Finance requests for this instance + _MIN_INTERVAL = 7 + + # Meta-data dictionaries for the classes to use + YAHOO_FINANCIAL_TYPES = { + 'income': ['financials', 'incomeStatementHistory', 'incomeStatementHistoryQuarterly'], + 'balance': ['balance-sheet', 'balanceSheetHistory', 'balanceSheetHistoryQuarterly', 'balanceSheetStatements'], + 'cash': ['cash-flow', 'cashflowStatementHistory', 'cashflowStatementHistoryQuarterly', 'cashflowStatements'], + 'keystats': ['key-statistics'], + 'history': ['history'] + } + + # Interval value translation dictionary + _INTERVAL_DICT = { + 'daily': '1d', + 'weekly': '1wk', + 'monthly': '1mo' + } + + # Base Yahoo Finance URL for the class to build on + _BASE_YAHOO_URL = 'https://finance.yahoo.com/quote/' + + # private static method to get the appropriate report type identifier + @staticmethod + def get_report_type(frequency): + if frequency == 'annual': + report_num = 1 + else: + report_num = 2 + return report_num + + # Public static method to format date serial string to readable format and vice versa + @staticmethod + def format_date(in_date): + if isinstance(in_date, str): + form_date = int(calendar.timegm(time.strptime(in_date, '%Y-%m-%d'))) + else: + form_date = str((datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=in_date)).date()) + return form_date + + # Private Static Method to Convert Eastern Time to UTC + @staticmethod + def _convert_to_utc(date, mask='%Y-%m-%d %H:%M:%S'): + utc = pytz.utc + eastern = pytz.timezone('US/Eastern') + date_ = datetime.datetime.strptime(date.replace(" 0:", " 12:"), mask) + date_eastern = eastern.localize(date_, is_dst=None) + date_utc = date_eastern.astimezone(utc) + return date_utc.strftime('%Y-%m-%d %H:%M:%S %Z%z') + + # Private method to scrape data from yahoo finance + def _scrape_data(self, url, tech_type, statement_type): + global _lastget + if not self._cache.get(url): + now = int(time.time()) + if _lastget and now - _lastget < self._MIN_INTERVAL: + time.sleep(self._MIN_INTERVAL - (now - _lastget) + 1) + now = int(time.time()) + _lastget = now + urlopener = UrlOpener() + # Try to open the URL up to 10 times sleeping random time if something goes wrong + max_retry = 10 + for i in range(0, max_retry): + response = urlopener.open(url) + if response.getcode() != 200: + time.sleep(random.randrange(10, 20)) + else: + response_content = response.read() + soup = BeautifulSoup(response_content, "html.parser") + re_script = soup.find("script", text=re.compile("root.App.main")) + if re_script is not None: + script = re_script.text + self._cache[url] = loads(re.search("root.App.main\s+=\s+(\{.*\})", script).group(1)) + response.close() + break + else: + time.sleep(random.randrange(10, 20)) + if i == max_retry - 1: + # Raise a custom exception if we can't get the web page within max_retry attempts + raise ManagedException("Server replied with HTTP " + str(response.getcode()) + + " code while opening the url: " + str(url)) + data = self._cache[url] + if tech_type == '' and statement_type != 'history': + stores = data["context"]["dispatcher"]["stores"]["QuoteSummaryStore"] + elif tech_type != '' and statement_type != 'history': + stores = data["context"]["dispatcher"]["stores"]["QuoteSummaryStore"][tech_type] + else: + stores = data["context"]["dispatcher"]["stores"]["HistoricalPriceStore"] + return stores + + # Private static method to determine if a numerical value is in the data object being cleaned + @staticmethod + def _determine_numeric_value(value_dict): + if 'raw' in value_dict.keys(): + numerical_val = value_dict['raw'] + else: + numerical_val = None + return numerical_val + + # Private method to format date serial string to readable format and vice versa + def _format_time(self, in_time): + form_date_time = datetime.datetime.fromtimestamp(int(in_time)).strftime('%Y-%m-%d %H:%M:%S') + utc_dt = self._convert_to_utc(form_date_time) + return utc_dt + + # Private method to return the a sub dictionary entry for the earning report cleaning + def _get_cleaned_sub_dict_ent(self, key, val_list): + sub_list = [] + for rec in val_list: + sub_sub_dict = {} + for k, v in rec.items(): + if k == 'date': + sub_sub_dict_ent = {k: v} + else: + numerical_val = self._determine_numeric_value(v) + sub_sub_dict_ent = {k: numerical_val} + sub_sub_dict.update(sub_sub_dict_ent) + sub_list.append(sub_sub_dict) + sub_ent = {key: sub_list} + return sub_ent + + # Private method to process raw earnings data and clean + def _clean_earnings_data(self, raw_data): + cleaned_data = {} + earnings_key = 'earningsData' + financials_key = 'financialsData' + for k, v in raw_data.items(): + if k == 'earningsChart': + sub_dict = {} + for k2, v2 in v.items(): + if k2 == 'quarterly': + sub_ent = self._get_cleaned_sub_dict_ent(k2, v2) + elif k2 == 'currentQuarterEstimate': + numerical_val = self._determine_numeric_value(v2) + sub_ent = {k2: numerical_val} + else: + sub_ent = {k2: v2} + sub_dict.update(sub_ent) + dict_ent = {earnings_key: sub_dict} + cleaned_data.update(dict_ent) + elif k == 'financialsChart': + sub_dict = {} + for k2, v2, in v.items(): + sub_ent = self._get_cleaned_sub_dict_ent(k2, v2) + sub_dict.update(sub_ent) + dict_ent = {financials_key: sub_dict} + cleaned_data.update(dict_ent) + else: + if k != 'maxAge': + dict_ent = {k: v} + cleaned_data.update(dict_ent) + return cleaned_data + + # Private method to clean summary and price reports + def _clean_reports(self, raw_data): + cleaned_dict = {} + if raw_data is None: + return None + for k, v in raw_data.items(): + if 'Time' in k: + formatted_utc_time = self._format_time(v) + dict_ent = {k: formatted_utc_time} + elif 'Date' in k: + try: + formatted_date = v['fmt'] + except (KeyError, TypeError): + formatted_date = '-' + dict_ent = {k: formatted_date} + elif v is None or isinstance(v, str) or isinstance(v, int) or isinstance(v, float): + dict_ent = {k: v} + # Python 2 and Unicode + elif sys.version_info < (3, 0) and isinstance(v, unicode): + dict_ent = {k: v} + else: + numerical_val = self._determine_numeric_value(v) + dict_ent = {k: numerical_val} + cleaned_dict.update(dict_ent) + return cleaned_dict + + # Private Static Method to ensure ticker is URL encoded + @staticmethod + def _encode_ticker(ticker_str): + encoded_ticker = ticker_str.replace('=', '%3D') + return encoded_ticker + + # Private method to get time interval code + def _build_historical_url(self, ticker, hist_oj): + url = self._BASE_YAHOO_URL + self._encode_ticker(ticker) + '/history?period1=' + str(hist_oj['start']) + \ + '&period2=' + str(hist_oj['end']) + '&interval=' + hist_oj['interval'] + '&filter=history&frequency=' + \ + hist_oj['interval'] + return url + + # Private Method to clean the dates of the newly returns historical stock data into readable format + def _clean_historical_data(self, hist_data, last_attempt=False): + data = {} + for k, v in hist_data.items(): + if k == 'eventsData': + event_obj = {} + if isinstance(v, list): + dict_ent = {k: event_obj} + else: + for type_key, type_obj in v.items(): + formatted_type_obj = {} + for date_key, date_obj in type_obj.items(): + formatted_date_key = self.format_date(int(date_key)) + cleaned_date = self.format_date(int(date_obj['date'])) + date_obj.update({'formatted_date': cleaned_date}) + formatted_type_obj.update({formatted_date_key: date_obj}) + event_obj.update({type_key: formatted_type_obj}) + dict_ent = {k: event_obj} + elif 'date' in k.lower(): + if v is not None: + cleaned_date = self.format_date(v) + dict_ent = {k: {'formatted_date': cleaned_date, 'date': v}} + else: + if last_attempt is False: + return None + else: + dict_ent = {k: {'formatted_date': None, 'date': v}} + elif isinstance(v, list): + sub_dict_list = [] + for sub_dict in v: + sub_dict['formatted_date'] = self.format_date(sub_dict['date']) + sub_dict_list.append(sub_dict) + dict_ent = {k: sub_dict_list} + else: + dict_ent = {k: v} + data.update(dict_ent) + return data + + # Private Static Method to build API url for GET Request + @staticmethod + def _build_api_url(hist_obj, up_ticker): + base_url = "https://query1.finance.yahoo.com/v8/finance/chart/" + api_url = base_url + up_ticker + '?symbol=' + up_ticker + '&period1=' + str(hist_obj['start']) + '&period2=' + \ + str(hist_obj['end']) + '&interval=' + hist_obj['interval'] + api_url += '&events=div|split|earn&lang=en-US®ion=US' + return api_url + + # Private Method to get financial data via API Call + def _get_api_data(self, api_url, tries=0): + urlopener = UrlOpener() + response = urlopener.open(api_url) + if response.getcode() == 200: + res_content = response.read() + response.close() + if sys.version_info < (3, 0): + return loads(res_content) + return loads(res_content.decode('utf-8')) + else: + if tries < 5: + time.sleep(random.randrange(10, 20)) + tries += 1 + return self._get_api_data(api_url, tries) + else: + return None + + # Private Method to clean API data + def _clean_api_data(self, api_url): + raw_data = self._get_api_data(api_url) + ret_obj = {} + ret_obj.update({'eventsData': []}) + if raw_data is None: + return ret_obj + results = raw_data['chart']['result'] + if results is None: + return ret_obj + for result in results: + tz_sub_dict = {} + ret_obj.update({'eventsData': result.get('events', {})}) + ret_obj.update({'firstTradeDate': result['meta'].get('firstTradeDate', 'NA')}) + ret_obj.update({'currency': result['meta'].get('currency', 'NA')}) + ret_obj.update({'instrumentType': result['meta'].get('instrumentType', 'NA')}) + tz_sub_dict.update({'gmtOffset': result['meta']['gmtoffset']}) + ret_obj.update({'timeZone': tz_sub_dict}) + timestamp_list = result['timestamp'] + high_price_list = result['indicators']['quote'][0]['high'] + low_price_list = result['indicators']['quote'][0]['low'] + open_price_list = result['indicators']['quote'][0]['open'] + close_price_list = result['indicators']['quote'][0]['close'] + volume_list = result['indicators']['quote'][0]['volume'] + adj_close_list = result['indicators']['adjclose'][0]['adjclose'] + i = 0 + prices_list = [] + for timestamp in timestamp_list: + price_dict = {} + price_dict.update({'date': timestamp}) + price_dict.update({'high': high_price_list[i]}) + price_dict.update({'low': low_price_list[i]}) + price_dict.update({'open': open_price_list[i]}) + price_dict.update({'close': close_price_list[i]}) + price_dict.update({'volume': volume_list[i]}) + price_dict.update({'adjclose': adj_close_list[i]}) + prices_list.append(price_dict) + i += 1 + ret_obj.update({'prices': prices_list}) + return ret_obj + + # Private Method to Handle Recursive API Request + def _recursive_api_request(self, hist_obj, up_ticker, i=0): + api_url = self._build_api_url(hist_obj, up_ticker) + re_data = self._clean_api_data(api_url) + cleaned_re_data = self._clean_historical_data(re_data) + if cleaned_re_data is not None: + return cleaned_re_data + else: + if i < 3: + i += 1 + return self._recursive_api_request(hist_obj, up_ticker, i) + else: + return self._clean_historical_data(re_data, True) + + # Private Method to take scrapped data and build a data dictionary with + def _create_dict_ent(self, up_ticker, statement_type, tech_type, report_name, hist_obj): + YAHOO_URL = self._BASE_YAHOO_URL + up_ticker + '/' + self.YAHOO_FINANCIAL_TYPES[statement_type][0] + '?p=' +\ + up_ticker + if tech_type == '' and statement_type != 'history': + try: + re_data = self._scrape_data(YAHOO_URL, tech_type, statement_type) + dict_ent = {up_ticker: re_data[u'' + report_name], 'dataType': report_name} + except KeyError: + re_data = None + dict_ent = {up_ticker: re_data, 'dataType': report_name} + elif tech_type != '' and statement_type != 'history': + try: + re_data = self._scrape_data(YAHOO_URL, tech_type, statement_type) + except KeyError: + re_data = None + dict_ent = {up_ticker: re_data} + else: + YAHOO_URL = self._build_historical_url(up_ticker, hist_obj) + try: + cleaned_re_data = self._recursive_api_request(hist_obj, up_ticker) + except KeyError: + try: + re_data = self._scrape_data(YAHOO_URL, tech_type, statement_type) + cleaned_re_data = self._clean_historical_data(re_data) + except KeyError: + cleaned_re_data = None + dict_ent = {up_ticker: cleaned_re_data} + return dict_ent + + # Private method to return the stmt_id for the reformat_process + def _get_stmt_id(self, statement_type, raw_data): + stmt_id = '' + i = 0 + for key in raw_data.keys(): + if key in self.YAHOO_FINANCIAL_TYPES[statement_type.lower()]: + stmt_id = key + i += 1 + if i != 1: + return None + return stmt_id + + # Private Method for the Reformat Process + def _reformat_stmt_data_process(self, raw_data, statement_type): + final_data_list = [] + if raw_data is not None: + stmt_id = self._get_stmt_id(statement_type, raw_data) + if stmt_id is None: + return final_data_list + hashed_data_list = raw_data[stmt_id] + for data_item in hashed_data_list: + data_date = '' + sub_data_dict = {} + for k, v in data_item.items(): + if k == 'endDate': + data_date = v['fmt'] + elif k != 'maxAge': + numerical_val = self._determine_numeric_value(v) + sub_dict_item = {k: numerical_val} + sub_data_dict.update(sub_dict_item) + dict_item = {data_date: sub_data_dict} + final_data_list.append(dict_item) + return final_data_list + else: + return raw_data + + # Private Method to return subdict entry for the statement reformat process + def _get_sub_dict_ent(self, ticker, raw_data, statement_type): + form_data_list = self._reformat_stmt_data_process(raw_data[ticker], statement_type) + return {ticker: form_data_list} + + # Public method to get time interval code + def get_time_code(self, time_interval): + interval_code = self._INTERVAL_DICT[time_interval.lower()] + return interval_code + + # Public Method to get stock data + def get_stock_data(self, statement_type='income', tech_type='', report_name='', hist_obj={}): + data = {} + if isinstance(self.ticker, str): + dict_ent = self._create_dict_ent(self.ticker, statement_type, tech_type, report_name, hist_obj) + data.update(dict_ent) + else: + for tick in self.ticker: + try: + dict_ent = self._create_dict_ent(tick, statement_type, tech_type, report_name, hist_obj) + data.update(dict_ent) + except ManagedException: + print("Warning! Ticker: " + str(tick) + " error - " + str(ManagedException)) + print("The process is still running...") + continue + return data + + # Public Method to get technical stock datafrom yahoofinancials import YahooFinancials + + def get_stock_tech_data(self, tech_type): + if tech_type == 'defaultKeyStatistics': + return self.get_stock_data(statement_type='keystats', tech_type=tech_type) + else: + return self.get_stock_data(tech_type=tech_type) + + # Public Method to get reformatted statement data + def get_reformatted_stmt_data(self, raw_data, statement_type): + data_dict = {} + sub_dict = {} + data_type = raw_data['dataType'] + if isinstance(self.ticker, str): + sub_dict_ent = self._get_sub_dict_ent(self.ticker, raw_data, statement_type) + sub_dict.update(sub_dict_ent) + dict_ent = {data_type: sub_dict} + data_dict.update(dict_ent) + else: + for tick in self.ticker: + sub_dict_ent = self._get_sub_dict_ent(tick, raw_data, statement_type) + sub_dict.update(sub_dict_ent) + dict_ent = {data_type: sub_dict} + data_dict.update(dict_ent) + return data_dict + + # Public method to get cleaned summary and price report data + def get_clean_data(self, raw_report_data, report_type): + cleaned_data_dict = {} + if isinstance(self.ticker, str): + if report_type == 'earnings': + try: + cleaned_data = self._clean_earnings_data(raw_report_data[self.ticker]) + except: + cleaned_data = None + else: + try: + cleaned_data = self._clean_reports(raw_report_data[self.ticker]) + except: + cleaned_data = None + cleaned_data_dict.update({self.ticker: cleaned_data}) + else: + for tick in self.ticker: + if report_type == 'earnings': + try: + cleaned_data = self._clean_earnings_data(raw_report_data[tick]) + except: + cleaned_data = None + else: + try: + cleaned_data = self._clean_reports(raw_report_data[tick]) + except: + cleaned_data = None + cleaned_data_dict.update({tick: cleaned_data}) + return cleaned_data_dict + + # Private method to handle dividend data requestsfrom yahoofinancials import YahooFinancials + + def _handle_api_dividend_request(self, cur_ticker, start, end, interval): + re_dividends = [] + test_url = 'https://query1.finance.yahoo.com/v8/finance/chart/' + cur_ticker + \ + '?period1=' + str(start) + '&period2=' + str(end) + '&interval=' + interval + '&events=div' + div_dict = self._get_api_data(test_url)['chart']['result'][0]['events']['dividends'] + for div_time_key, div_obj in div_dict.items(): + dividend_obj = { + 'date': div_obj['date'], + 'formatted_date': self.format_date(int(div_obj['date'])), + 'amount': div_obj.get('amount', None) + } + re_dividends.append(dividend_obj) + return sorted(re_dividends, key=lambda div: div['date']) + + # Public method to get daily dividend data + def get_stock_dividend_data(self, start, end, interval): + interval_code = self.get_time_code(interval) + if isinstance(self.ticker, str): + try: + return {self.ticker: self._handle_api_dividend_request(self.ticker, start, end, interval_code)} + except: + return {self.ticker: None} + else: + re_data = {} + for tick in self.ticker: + try: + div_data = self._handle_api_dividend_request(tick, start, end, interval_code) + re_data.update({tick: div_data}) + except: + re_data.update({tick: None}) + return re_data + + +# Class containing methods to create stock data extracts +class YahooFinancials(YahooFinanceETL): + + # Private method that handles financial statement extraction + def _run_financial_stmt(self, statement_type, report_num, reformat): + report_name = self.YAHOO_FINANCIAL_TYPES[statement_type][report_num] + if reformat: + raw_data = self.get_stock_data(statement_type, report_name=report_name) + data = self.get_reformatted_stmt_data(raw_data, statement_type) + else: + data = self.get_stock_data(statement_type, report_name=report_name) + return data + + # Public Method for the user to get financial statement data + def get_financial_stmts(self, frequency, statement_type, reformat=True): + report_num = self.get_report_type(frequency) + if isinstance(statement_type, str): + data = self._run_financial_stmt(statement_type, report_num, reformat) + else: + data = {} + for stmt_type in statement_type: + re_data = self._run_financial_stmt(stmt_type, report_num, reformat) + data.update(re_data) + return data + + # Public Method for the user to get stock price data + def get_stock_price_data(self, reformat=True): + if reformat: + return self.get_clean_data(self.get_stock_tech_data('price'), 'price') + else: + return self.get_stock_tech_data('price') + + # Public Method for the user to return key-statistics data + def get_key_statistics_data(self, reformat=True): + if reformat: + return self.get_clean_data(self.get_stock_tech_data('defaultKeyStatistics'), 'defaultKeyStatistics') + else: + return self.get_stock_tech_data('defaultKeyStatistics') + + # Public Method for the user to get stock earnings data + def get_stock_earnings_data(self, reformat=True): + if reformat: + return self.get_clean_data(self.get_stock_tech_data('earnings'), 'earnings') + else: + return self.get_stock_tech_data('earnings') + + # Public Method for the user to get stock summary data + def get_summary_data(self, reformat=True): + if reformat: + return self.get_clean_data(self.get_stock_tech_data('summaryDetail'), 'summaryDetail') + else: + return self.get_stock_tech_data('summaryDetail') + + # Public Method for the user to get the yahoo summary url + def get_stock_summary_url(self): + if isinstance(self.ticker, str): + return self._BASE_YAHOO_URL + self.ticker + return {t: self._BASE_YAHOO_URL + t for t in self.ticker} + + # Public Method for the user to get stock quote data + def get_stock_quote_type_data(self): + return self.get_stock_tech_data('quoteType') + + # Public Method for user to get historical price data with + def get_historical_price_data(self, start_date, end_date, time_interval): + interval_code = self.get_time_code(time_interval) + start = self.format_date(start_date) + end = self.format_date(end_date) + hist_obj = {'start': start, 'end': end, 'interval': interval_code} + return self.get_stock_data('history', hist_obj=hist_obj) + + # Private Method for Functions needing stock_price_data + def _stock_price_data(self, data_field): + if isinstance(self.ticker, str): + if self.get_stock_price_data()[self.ticker] is None: + return None + return self.get_stock_price_data()[self.ticker].get(data_field, None) + else: + ret_obj = {} + for tick in self.ticker: + if self.get_stock_price_data()[tick] is None: + ret_obj.update({tick: None}) + else: + ret_obj.update({tick: self.get_stock_price_data()[tick].get(data_field, None)}) + return ret_obj + + # Private Method for Functions needing stock_price_data + def _stock_summary_data(self, data_field): + if isinstance(self.ticker, str): + if self.get_summary_data()[self.ticker] is None: + return None + return self.get_summary_data()[self.ticker].get(data_field, None) + else: + ret_obj = {} + for tick in self.ticker: + if self.get_summary_data()[tick] is None: + ret_obj.update({tick: None}) + else: + ret_obj.update({tick: self.get_summary_data()[tick].get(data_field, None)}) + return ret_obj + + # Private Method for Functions needing financial statement data + def _financial_statement_data(self, stmt_type, stmt_code, field_name, freq): + re_data = self.get_financial_stmts(freq, stmt_type)[stmt_code] + if isinstance(self.ticker, str): + try: + date_key = re_data[self.ticker][0].keys()[0] + except (IndexError, AttributeError, TypeError): + date_key = list(re_data[self.ticker][0])[0] + data = re_data[self.ticker][0][date_key][field_name] + else: + data = {} + for tick in self.ticker: + try: + date_key = re_data[tick][0].keys()[0] + except: + try: + date_key = list(re_data[tick][0].keys())[0] + except: + date_key = None + if date_key is not None: + sub_data = re_data[tick][0][date_key][field_name] + data.update({tick: sub_data}) + else: + data.update({tick: None}) + return data + + # Public method to get daily dividend data + def get_daily_dividend_data(self, start_date, end_date): + start = self.format_date(start_date) + end = self.format_date(end_date) + return self.get_stock_dividend_data(start, end, 'daily') + + # Public Price Data Methods + def get_current_price(self): + return self._stock_price_data('regularMarketPrice') + + def get_current_change(self): + return self._stock_price_data('regularMarketChange') + + def get_current_percent_change(self): + return self._stock_price_data('regularMarketChangePercent') + + def get_current_volume(self): + return self._stock_price_data('regularMarketVolume') + + def get_prev_close_price(self): + return self._stock_price_data('regularMarketPreviousClose') + + def get_open_price(self): + return self._stock_price_data('regularMarketOpen') + + def get_ten_day_avg_daily_volume(self): + return self._stock_price_data('averageDailyVolume10Day') + + def get_three_month_avg_daily_volume(self): + return self._stock_price_data('averageDailyVolume3Month') + + def get_stock_exchange(self): + return self._stock_price_data('exchangeName') + + def get_market_cap(self): + return self._stock_price_data('marketCap') + + def get_daily_low(self): + return self._stock_price_data('regularMarketDayLow') + + def get_daily_high(self): + return self._stock_price_data('regularMarketDayHigh') + + def get_currency(self): + return self._stock_price_data('currency') + + # Public Summary Data Methods + def get_yearly_high(self): + return self._stock_summary_data('fiftyTwoWeekHigh') + + def get_yearly_low(self): + return self._stock_summary_data('fiftyTwoWeekLow') + + def get_dividend_yield(self): + return self._stock_summary_data('dividendYield') + + def get_annual_avg_div_yield(self): + return self._stock_summary_data('trailingAnnualDividendYield') + + def get_five_yr_avg_div_yield(self): + return self._stock_summary_data('fiveYearAvgDividendYield') + + def get_dividend_rate(self): + return self._stock_summary_data('dividendRate') + + def get_annual_avg_div_rate(self): + return self._stock_summary_data('trailingAnnualDividendRate') + + def get_50day_moving_avg(self): + return self._stock_summary_data('fiftyDayAverage') + + def get_200day_moving_avg(self): + return self._stock_summary_data('twoHundredDayAverage') + + def get_beta(self): + return self._stock_summary_data('beta') + + def get_payout_ratio(self): + return self._stock_summary_data('payoutRatio') + + def get_pe_ratio(self): + return self._stock_summary_data('trailingPE') + + def get_price_to_sales(self): + return self._stock_summary_data('priceToSalesTrailing12Months') + + def get_exdividend_date(self): + return self._stock_summary_data('exDividendDate') + + # Financial Statement Data Methods + def get_book_value(self): + return self._financial_statement_data('balance', 'balanceSheetHistoryQuarterly', + 'totalStockholderEquity', 'quarterly') + + def get_ebit(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'ebit', 'annual') + + def get_net_income(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'netIncome', 'annual') + + def get_interest_expense(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'interestExpense', 'annual') + + def get_operating_income(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'operatingIncome', 'annual') + + def get_total_operating_expense(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'totalOperatingExpenses', 'annual') + + def get_total_revenue(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'totalRevenue', 'annual') + + def get_cost_of_revenue(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'costOfRevenue', 'annual') + + def get_income_before_tax(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'incomeBeforeTax', 'annual') + + def get_income_tax_expense(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'incomeTaxExpense', 'annual') + + def get_gross_profit(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'grossProfit', 'annual') + + def get_net_income_from_continuing_ops(self): + return self._financial_statement_data('income', 'incomeStatementHistory', + 'netIncomeFromContinuingOps', 'annual') + + def get_research_and_development(self): + return self._financial_statement_data('income', 'incomeStatementHistory', 'researchDevelopment', 'annual') + + # Calculated Financial Methods + def get_earnings_per_share(self): + price_data = self.get_current_price() + pe_ratio = self.get_pe_ratio() + if isinstance(self.ticker, str): + if price_data is not None and pe_ratio is not None: + return price_data / pe_ratio + else: + return None + else: + ret_obj = {} + for tick in self.ticker: + if price_data[tick] is not None and pe_ratio[tick] is not None: + ret_obj.update({tick: price_data[tick] / pe_ratio[tick]}) + else: + ret_obj.update({tick: None}) + return ret_obj + + def get_num_shares_outstanding(self, price_type='current'): + today_low = self._stock_summary_data('dayHigh') + today_high = self._stock_summary_data('dayLow') + cur_market_cap = self._stock_summary_data('marketCap') + if isinstance(self.ticker, str): + if cur_market_cap is not None: + if price_type == 'current': + current = self.get_current_price() + if current is not None: + today_average = current + else: + return None + else: + if today_high is not None and today_low is not None: + today_average = (today_high + today_low) / 2 + else: + return None + return cur_market_cap / today_average + else: + return None + else: + ret_obj = {} + for tick in self.ticker: + if cur_market_cap[tick] is not None: + if price_type == 'current': + current = self.get_current_price() + if current[tick] is not None: + ret_obj.update({tick: cur_market_cap[tick] / current[tick]}) + else: + ret_obj.update({tick: None}) + else: + if today_low[tick] is not None and today_high[tick] is not None: + today_average = (today_high[tick] + today_low[tick]) / 2 + ret_obj.update({tick: cur_market_cap[tick] / today_average}) + else: + ret_obj.update({tick: None}) + else: + ret_obj.update({tick: None}) + return ret_obj \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index d4dbb6f..d201613 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ requests~=2.21.0 numpy~=1.15.4 beautifulsoup4~=4.7.1 -requests-cache~=0.4.13 # NOT REQUIRED \ No newline at end of file +halo~=0.0.23 +requests-cache~=0.4.13 # NOT REQUIRED +yahoofinancials~=1.5 # NOT REQUIRED \ No newline at end of file diff --git a/stocks.txt b/stocks.txt new file mode 100644 index 0000000..c8bb22d --- /dev/null +++ b/stocks.txt @@ -0,0 +1,10 @@ +VFINX +SMARX +BRASX +USIBX +DSIAX +TIHYX +SGYAX +TPLGX +PREFX +FBGRX