The documentation I read suggested that urlopen can be slow, but I doubt rewriting this program in a threaded fashion would help.
Also, if you see anything that seems off in the programming, let me know; I'm self taught, and my work doesn't get critiqued very often.
#This contains the functions for scraping jobs section of the moffitt website from urllib import * from re import * #This function takes a url and returns the html source def getHTML(url): urlBuffer = urlopen(url) HTML = urlBuffer.read() urlBuffer.close() return HTML #This function takes the main moffitt URL and turns it into the #individual job URLs #This function also strips off trailing 's def getMoffittURLs(self): x = findall('http://tbe.taleo.net/NA4/ats/careers/requisition.*\'',self) x = map (lambda y : y.replace('\'',''),x) return x #function to determind if requirements are out of bounds def isAble(cleanSourceCode): #to lower cleanSourceCode = removeTags(cleanSourceCode) able = True notAble = "m.d. nurse phd".split() for i in range(len(notAble)): if notAble[i] in cleanSourceCode.lower(): able = False return able #removes html tags def removeTags(feed): p= compile(r'<.*?>') return p.sub('',feed) #returns the title def getTitle (feed): a= findall('<td colspan=2><b>.*</b>',feed) a=removeTags(str(a)) return a def main (): mUrl ='http://tbe.taleo.net/NA4/ats/careers/searchResults.jsp?org=MOFFITT&cws=1' html = getHTML(mUrl) links = getMoffittURLs(html) for i in links: able = True job = getHTML(i) able = isAble(job) if able: title = getTitle(job) print title + " is open"