import numpy as np import tables as pt import urllib2 import re import string from datetime import datetime urlT = string.Template('http://www.google.ca/finance/historical'+ '?cid=$cid&startdate=$start&enddate=$end&output=csv') ##----------User specified----------## startdate = datetime(2010,1,1) # enddate = datetime.today() # equity_list = [('NYSE' ,'CMI'), # ('NYSE' ,'FCX'), # ('NYSE' ,'MCD'), # ('NYSE' ,'TWI'), # ('NYSE' ,'SLB'), # ('NASDAQ','AAPL'), # ('NASDAQ','MIDD'), # ('NYSE' ,'GG' ), # ('NYSE' ,'AUY'), # ('PINK' ,'LYSDY')] # ##----------User specified----------## def getGoogleID(equity): url = 'http://www.google.ca/finance?q={0[0]}%3A{0[1]}'.format(equity) response = urllib2.urlopen(url, 'r') for line in response: match = re.match('setCompanyId\([0-9]+\);\\n', line) if (match != None): gid = line[13:-3] return gid else: raise Exception('Google ID not found for \'%s\''%equity[1]) def d2strfmt( date ): return date.strftime('%b')+'+'+date.strftime('%d').lstrip('0')+'%2C+'+date.strftime('%Y') date_t = np.dtype([('year', '<i4'),('month', '<i4'),('day', '<i4')]) str_t = np.dtype((np.str_, 8)) equity_format = np.dtype([ ('date' ,date_t), ('open' ,np.float64), ('high' ,np.float64), ('low' ,np.float64), ('close' ,np.float64), ('volume' ,np.int64) ], align=True) filters = pt.Filters(complevel=6, complib='bzip2', shuffle=True) h5file = pt.openFile('market_data.h5', mode='w') group = h5file.createGroup('/', 'equity', filters=None) for exch, tckr in equity_list: gid = getGoogleID((exch,tckr)) table = h5file.createTable(group, tckr, equity_format, title=str(gid), filters=filters, byteorder='little') equityinfo = table.row url = urlT.substitute(cid=gid,start=d2strfmt(startdate),end=d2strfmt(enddate)) u = urllib2.urlopen(url, 'r') u.readline() # Skip header for line in u: parsedline = line.rstrip().split(',') for j in xrange(len(table.colnames)): field = table.colnames[j] if (field=='date'): dt = datetime.strptime(parsedline[j],'%d-%b-%y') equityinfo[field] = (dt.year, dt.month, dt.day) else: if (parsedline[j] == '-'): # Holiday: no information equityinfo[field] = np.nan else: equityinfo[field] = parsedline[j] equityinfo.append() table.flush() h5file.close()This creates a file called 'market_data.h5' which you can view with the ViTables program. The file contains an 'equity' group with one table for each stock. I'll create another post showing how we can use this data. In the meantime, I just wanted to show how we could download and store the data. My next steps are to include FX rates and interest rates.
Monday, June 10, 2013
Downloading Historical Data from Google Finance
My background is in quantitative finance and it is nice to be able to apply what I have learned in school to my own (albeit small) portfolio. This requires some historical market data. My first attempt at this was to visit the Google Finance website and manually download the information into a CSV file. This was rather tedius. I came across Python's urllib2 module which has given me a means to automate this process. I also employ the use of the PyTables module to store my data in the HDF5 format. I am working in a Windows environment on the pythonxy IDE with PyTables version '2.4.0'. The code is as follows:
Labels:
Google Finance,
PyTables,
Python,
urllib2
Location:
Toronto, ON, Canada
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment