v Regular Expression By Example - Python

Regular Expression By Example

This tutorial is based on: http://www.tutorialspoint.com/python/python_reg_expressions.htm

# Import regex
import re
# Create some data
text = 'A flock of 120 quick brown foxes jumped over 30 lazy brown, bears.'

^ Matches beginning of line.

re.findall('^A', text)
['A']

$ Matches end of line.

re.findall('bears.$', text)
['bears.']

. Matches any single character except newline.

re.findall('f..es', text)
['foxes']

[...] Matches any single character in brackets.

# Find all vowels
re.findall('[aeiou]', text)
['o', 'o', 'u', 'i', 'o', 'o', 'e', 'u', 'e', 'o', 'e', 'a', 'o', 'e', 'a']

[# ^...] Matches any single character not in brackets

# Find all characters that are not lower-case vowels
re.findall('[^aeiou]', text)
['A',
 ' ',
 'f',
 'l',
 'c',
 'k',
 ' ',
 'f',
 ' ',
 '1',
 '2',
 '0',
 ' ',
 'q',
 'c',
 'k',
 ' ',
 'b',
 'r',
 'w',
 'n',
 ' ',
 'f',
 'x',
 's',
 ' ',
 'j',
 'm',
 'p',
 'd',
 ' ',
 'v',
 'r',
 ' ',
 '3',
 '0',
 ' ',
 'l',
 'z',
 'y',
 ' ',
 'b',
 'r',
 'w',
 'n',
 ',',
 ' ',
 'b',
 'r',
 's',
 '.']

a | b Matches either a or b.

re.findall('a|A', text)
['A', 'a', 'a']

(re) Groups regular expressions and remembers matched text.

# Find any instance of 'fox'
re.findall('(foxes)', text)
['foxes']

\w Matches word characters.

# Break up string into five character blocks
re.findall('\w\w\w\w\w', text)
['flock', 'quick', 'brown', 'foxes', 'jumpe', 'brown', 'bears']

\W Matches nonword characters.

re.findall('\W\W', text)
[', ']

\s Matches whitespace. Equivalent to [\t\n\r\f].

re.findall('\s', text)
[' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']

\S Matches nonwhitespace.

re.findall('\S\S', text)
['fl',
 'oc',
 'of',
 '12',
 'qu',
 'ic',
 'br',
 'ow',
 'fo',
 'xe',
 'ju',
 'mp',
 'ed',
 'ov',
 'er',
 '30',
 'la',
 'zy',
 'br',
 'ow',
 'n,',
 'be',
 'ar',
 's.']

\d Matches digits. Equivalent to [0-9].

re.findall('\d\d\d', text)
['120']

\D Matches nondigits.

re.findall('\D\D\D\D\D', text)
['A flo',
 'ck of',
 ' quic',
 'k bro',
 'wn fo',
 'xes j',
 'umped',
 ' over',
 ' lazy',
 ' brow',
 'n, be']

\A Matches beginning of string.

re.findall('\AA', text)
['A']

\Z Matches end of string. If a newline exists, it matches just before newline.

re.findall('bears.\Z', text)
['bears.']

\b Matches end of string.

re.findall('\b[foxes]', text)
[]

\n, \t, etc. Matches newlines, carriage returns, tabs, etc.

re.findall('\n', text)
[]

[Pp]ython Match "Python" or "python"

re.findall('[Ff]oxes', 'foxes Foxes Doxes')
['foxes', 'Foxes']

[0-9] Match any digit; same as [0123456789]

re.findall('[Ff]oxes', 'foxes Foxes Doxes')
['foxes', 'Foxes']

[a-z] Match any lowercase ASCII letter

re.findall('[a-z]', 'foxes Foxes')
['f', 'o', 'x', 'e', 's', 'o', 'x', 'e', 's']

[A-Z] Match any uppercase ASCII letter

re.findall('[A-Z]', 'foxes Foxes')
['F']

[a-zA-Z0-9] Match any of the above

re.findall('[a-zA-Z0-9]', 'foxes Foxes')
['f', 'o', 'x', 'e', 's', 'F', 'o', 'x', 'e', 's']

[^aeiou] Match anything other than a lowercase vowel

re.findall('[^aeiou]', 'foxes Foxes')
['f', 'x', 's', ' ', 'F', 'x', 's']

[^0-9] Match anything other than a digit

re.findall('[^0-9]', 'foxes Foxes')
['f', 'o', 'x', 'e', 's', ' ', 'F', 'o', 'x', 'e', 's']

ruby? Match "rub" or "ruby": the y is optional

re.findall('foxes?', 'foxes Foxes')
['foxes']

ruby* Match "rub" plus 0 or more ys

re.findall('ox*', 'foxes Foxes')
['ox', 'ox']

ruby+ Match "rub" plus 1 or more ys

re.findall('ox+', 'foxes Foxes')
['ox', 'ox']

\d{3} Match exactly 3 digits

re.findall('\d{3}', text)
['120']

\d{3,} Match 3 or more digits

re.findall('\d{2,}', text)
['120', '30']

\d{3,5} Match 3, 4, or 5 digits

re.findall('\d{2,3}', text)
['120', '30']

^Python Match "Python" at the start of a string or internal line

re.findall('^A', text)
['A']

Python$ Match "Python" at the end of a string or line

re.findall('bears.$', text)
['bears.']

\APython Match "Python" at the start of a string

re.findall('\AA', text)
['A']

Python\Z Match "Python" at the end of a string

re.findall('bears.\Z', text)
['bears.']

Python(?=!) Match "Python", if followed by an exclamation point

re.findall('bears(?=.)', text)
['bears']

Python(?!!) Match "Python", if not followed by an exclamation point

re.findall('foxes(?!!)', 'foxes foxes!')
['foxes']

python|perl Match "python" or "perl"

re.findall('foxes|foxes!', 'foxes foxes!')
['foxes', 'foxes']

rub(y|le)) Match "ruby" or "ruble"

re.findall('fox(es!)', 'foxes foxes!')
['es!']

Python(!+|\?) "Python" followed by one or more ! or one ?

re.findall('foxes(!)', 'foxes foxes!')
['!']