Match URLs

StackOverflow

Preliminaries

# Load regex package
import re

Create some text

# Create a variable containing a text string
text = 'My blog is http://www.chrisalbon.com and not http://chrisalbon.com'

Apply regex

# Find any ISBN-10 or ISBN-13 number
re.findall(r'(http|ftp|https):\/\/([\w\-_]+(?:(?:\.[\w\-_]+)+))([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?', text)
[('http', 'www.chrisalbon.com', ''), ('http', 'chrisalbon.com', '')]