Skip to content

Beautiful Soup

Usage

from bs4 import BeautifulSoup

soup = BeautifulSoup(html_doc, 'html.parser')

# return str, a prettier version of html_doc
soup.prettify()

# return list, contain bs4's element objects of all a tag
soup.find_all('a')

# return list, contain bs4's element objects of all a tag with href='/link'
soup.find_all('a', href='/link')

# return list, contain bs4's element objects of all a tag that attribute href match '^/link' regular expression
soup.find_all('a', href=re.compile('^/link'))

# return list, contain bs4's element objects of all a tag in class btn
soup.find_all('a', class_='btn')

# return list, contain bs4's element objects of all a tag that content of a tag match '^Link' regular expression
soup.find_all('a', string=re.compile('^Link'))

# return list, contain bs4's element objects of all elements match a.btn css selector
soup.select('a.btn')

# return bs4's element objects of first a tag
soup.find('a')

# return bs4's element objects of id='form-btn'
soup.find(id='form-btn')

# return str, whole content of first a tag
str(soup.find('a'))

# return str, first a tag's content
str(soup.find('a').string)