v Parse HTML - Machine Learning

Parse HTML

Preliminaries

# Load library
from bs4 import BeautifulSoup

Create HTML

# Create some HTML code
html = "<div class='full_name'><span style='font-weight:bold'>Masego</span> Azra</div>"

Parse HTML

# Parse html
soup = BeautifulSoup(html, "lxml")

# Find the div with the class "full_name", show text
soup.find("div", { "class" : "full_name" }).text
'Masego Azra'