import numpy as np import tables as pt import urllib2 import re import string from datetime import datetime urlT = string.Template('http://www.google.ca/finance/historical'+ '?cid=$cid&startdate=$start&enddate=$end&output=csv') ##----------User specified----------## startdate = datetime(2010,1,1) # enddate = datetime.today() # equity_list = [('NYSE' ,'CMI'), # ('NYSE' ,'FCX'), # ('NYSE' ,'MCD'), # ('NYSE' ,'TWI'), # ('NYSE' ,'SLB'), # ('NASDAQ','AAPL'), # ('NASDAQ','MIDD'), # ('NYSE' ,'GG' ), # ('NYSE' ,'AUY'), # ('PINK' ,'LYSDY')] # ##----------User specified----------## def getGoogleID(equity): url = 'http://www.google.ca/finance?q={0[0]}%3A{0[1]}'.format(equity) response = urllib2.urlopen(url, 'r') for line in response: match = re.match('setCompanyId\([0-9]+\);\\n', line) if (match != None): gid = line[13:-3] return gid else: raise Exception('Google ID not found for \'%s\''%equity[1]) def d2strfmt( date ): return date.strftime('%b')+'+'+date.strftime('%d').lstrip('0')+'%2C+'+date.strftime('%Y') date_t = np.dtype([('year', '<i4'),('month', '<i4'),('day', '<i4')]) str_t = np.dtype((np.str_, 8)) equity_format = np.dtype([ ('date' ,date_t), ('open' ,np.float64), ('high' ,np.float64), ('low' ,np.float64), ('close' ,np.float64), ('volume' ,np.int64) ], align=True) filters = pt.Filters(complevel=6, complib='bzip2', shuffle=True) h5file = pt.openFile('market_data.h5', mode='w') group = h5file.createGroup('/', 'equity', filters=None) for exch, tckr in equity_list: gid = getGoogleID((exch,tckr)) table = h5file.createTable(group, tckr, equity_format, title=str(gid), filters=filters, byteorder='little') equityinfo = table.row url = urlT.substitute(cid=gid,start=d2strfmt(startdate),end=d2strfmt(enddate)) u = urllib2.urlopen(url, 'r') u.readline() # Skip header for line in u: parsedline = line.rstrip().split(',') for j in xrange(len(table.colnames)): field = table.colnames[j] if (field=='date'): dt = datetime.strptime(parsedline[j],'%d-%b-%y') equityinfo[field] = (dt.year, dt.month, dt.day) else: if (parsedline[j] == '-'): # Holiday: no information equityinfo[field] = np.nan else: equityinfo[field] = parsedline[j] equityinfo.append() table.flush() h5file.close()This creates a file called 'market_data.h5' which you can view with the ViTables program. The file contains an 'equity' group with one table for each stock. I'll create another post showing how we can use this data. In the meantime, I just wanted to show how we could download and store the data. My next steps are to include FX rates and interest rates.
Monday, June 10, 2013
Downloading Historical Data from Google Finance
My background is in quantitative finance and it is nice to be able to apply what I have learned in school to my own (albeit small) portfolio. This requires some historical market data. My first attempt at this was to visit the Google Finance website and manually download the information into a CSV file. This was rather tedius. I came across Python's urllib2 module which has given me a means to automate this process. I also employ the use of the PyTables module to store my data in the HDF5 format. I am working in a Windows environment on the pythonxy IDE with PyTables version '2.4.0'. The code is as follows:
Labels:
Google Finance,
PyTables,
Python,
urllib2
Location:
Toronto, ON, Canada
Friday, June 7, 2013
Adding fonts to Python's ReportLab Module
I have been experimenting with the ReportLab module for Python and have been suitably impressed thus far. One thing that I found restrictive however was the default number of fonts available. To that end I worked on trying to figure out how to add more font options. I should qualify at the outset that I am working in a Windows environment with the pythonxy IDE using ReportLab version 2.6. You can find instructions for how to do this in both the user guide (download here) and in their FAQ. I really like the "Computer Modern" typeface found in TeX so this example tries to register these fonts to ReportLab.
The first thing I needed to do was to get my hands on the font files. You can find lots of open source fonts here: http://mirror.csclub.uwaterloo.ca/CTAN/fonts/. I got what I believe to be the Computer Modern fonts from here: http://mirror.csclub.uwaterloo.ca/CTAN/fonts/amsfonts.zip. This zip file contains two folders of interest:
The first thing I needed to do was to get my hands on the font files. You can find lots of open source fonts here: http://mirror.csclub.uwaterloo.ca/CTAN/fonts/. I got what I believe to be the Computer Modern fonts from here: http://mirror.csclub.uwaterloo.ca/CTAN/fonts/amsfonts.zip. This zip file contains two folders of interest:
- afm (Adobe Font Metrics)
- pfb (Printer Font Binary)
import os from reportlab.pdfbase import pdfmetrics afmdir = 'path to afm files' pfbdir = 'path to pfb files' afmfiles = os.listdir(afmdir) pfbfiles = os.listdir(pfbdir) for j in xrange(len(afmfiles)): afmfiles[j] = os.path.splitext(afmfiles[j])[0] for j in xrange(len(pfbfiles)): pfbfiles[j] = os.path.splitext(pfbfiles[j])[0] afmfiles = set(afmfiles) pfbfiles = set(pfbfiles) commonfiles = afmfiles.intersection(pfbfiles) fontnames = [] for afmfile in commonfiles: filename = afmdir + afmfile + '.afm' f = open(filename, 'r') try: f.readline() f.readline() line = f.readline().rstrip() fontnames.append(line[9:]) finally: f.close() for j, ffile in enumerate(commonfiles): afmfile = afmdir+ffile+'.afm' pfbfile = pfbdir+ffile+'.pfb' pdfmetrics.registerTypeFace(pdfmetrics.EmbeddedType1Face(afmfile, pfbfile)) pdfmetrics.registerFont(pdfmetrics.Font(fontnames[j], fontnames[j], 'WinAnsiEncoding')) print pdfmetrics.getRegisteredFontNames()A couple of points to note:
- To register the font you need to give it the fontname. This is found directly in the afm file (3rd line).
- I am using the 'WinAnsiEncoding' which means not all the fonts loaded will be usable.
from reportlab.lib.pagesizes import letter from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet from reportlab.platypus import BaseDocTemplate, Frame, PageTemplate, Image, Table, TableStyle, Spacer, PageBreak from reportlab.platypus.doctemplate import _doNothing from reportlab.lib.units import inch import reportlab.lib.colors as colors import defs import locale locale.setlocale(locale.LC_ALL, 'English_Canada.1252') registeredFonts = defs.registerAMSfonts() PDF_VIEWER = 'C:\\Program Files (x86)\\Adobe\\Reader 10.0\\Reader\\AcroRd32.exe' PDF_FILENAME = 'C:\\Users\\Joel\\Documents\\Python\\ReportLab\\rev3\\TestFonts.pdf' elements = [] doc = BaseDocTemplate('TestFonts.pdf', pagesize=letter) f0 = Frame(doc.leftMargin, doc.bottomMargin, doc.width, doc.height, id=None, showBoundary=0) template = PageTemplate(id='test', frames=[f0], onPageEnd=_doNothing) doc.addPageTemplates([template]) val1 = locale.format('%9.2f', -15197114.48, grouping=True) val2 = locale.format('%9.2f', -16790937.16, grouping=True) data = [['Index', 'Value 1', 'Value 2', 'FontName']] for j in range(len(registeredFonts)): data.append(['%d'%j, val1, val2, registeredFonts[j]]) tablestyle = TableStyle([ ('ALIGNMENT',(0,0),(-1,-1), 'RIGHT'), ('FONTSIZE', (0,0),(-1,-1), 10), ('VALIGN', (0,0),(-1,-1), 'BOTTOM'), ('INNERGRID',(0,0),(-1,-1), 0.25, colors.black), ('BOX', (0,0),(-1,-1), 0.5, colors.black)]) for j in xrange(len(registeredFonts)): tablestyle.add('FONTNAME',(0,j),(3,j),registeredFonts[j]) table1 = Table(data, colWidths=doc.width/4.0, rowHeights=18, style=tablestyle) elements.append(table1) elements.append(PageBreak()) doc.build(elements) import subprocess process = subprocess.Popen([PDF_VIEWER, '/A', 'view=FitH', PDF_FILENAME], shell=False, stdout=subprocess.PIPE) process.wait()
I am using the "subprocess" command to have Python automatically launch Adobe's Acrobat Reader to view my newly created document. The results look something like this:
Location:
Toronto, ON, Canada
Subscribe to:
Posts (Atom)