from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import NavigableString, Tag classDesignPattern(BasicNewsRecipe): title = 'Android官方培训课程' description = 'Android官方培训课程' cover_url = 'http://hukai.me/android-training-course-in-chinese/android_training.jpg' url_prefix = 'http://hukai.me/android-training-course-in-chinese/' url = 'http://hukai.me/android-training-course-in-chinese/' no_stylesheets = True keep_only_tags = [dict(attrs={'class' : ['page-inner']})] defget_text(self, tag): text = '' for c in tag.contents: if isinstance(c, NavigableString): text = text + str(c) else: text = text + self.get_text(c)
return text.strip()
defparse_index(self): soup = self.index_to_soup(self.url) div = soup.find('ul', { 'class': 'summary' }) articles = [] for link in div.findAll('a'): til = self.get_text(link) url = self.url_prefix +'/'+ link['href'] a = { 'title': til, 'url': url } articles.append(a) ans = [('Android官方培训课程', articles)] return ans