How to Use Regular Expressions in Python | The School of Code

Settings

Appearance

Choose a typography theme that suits your style

Back to How-to Guides
Python

How to Use Regular Expressions in Python

Learn how to use the re module in Python to search, match, and manipulate text with regular expressions.

PythonRegexRegular ExpressionsText Processing

Regular expressions (regex) are powerful patterns for matching and manipulating text. Python’s re module provides comprehensive regex support.

Importing re

import re

Basic Functions

re.search() - Find First Match

import re

text = "The rain in Spain falls mainly in the plain"

# Search for pattern
match = re.search(r"Spain", text)
if match:
    print(f"Found: {match.group()}")  # Found: Spain
    print(f"Position: {match.start()}-{match.end()}")  # Position: 12-17

# No match returns None
result = re.search(r"France", text)
print(result)  # None

re.match() - Match at Beginning

import re

text = "Hello World"

# Matches at start
match = re.match(r"Hello", text)
print(match.group())  # Hello

# Won't match (not at start)
match = re.match(r"World", text)
print(match)  # None

re.findall() - Find All Matches

import re

text = "cat bat rat cat hat"

# Find all occurrences
matches = re.findall(r"cat", text)
print(matches)  # ['cat', 'cat']

# Find all words ending in 'at'
matches = re.findall(r"\b\w+at\b", text)
print(matches)  # ['cat', 'bat', 'rat', 'cat', 'hat']

re.sub() - Replace Matches

import re

text = "Hello World"

# Replace pattern
new_text = re.sub(r"World", "Python", text)
print(new_text)  # Hello Python

# Replace with function
def uppercase(match):
    return match.group().upper()

text = "hello world"
new_text = re.sub(r"\b\w+\b", uppercase, text)
print(new_text)  # HELLO WORLD

Common Patterns

Character Classes

import re

text = "abc123XYZ"

# \d - digits
print(re.findall(r"\d", text))     # ['1', '2', '3']
print(re.findall(r"\d+", text))    # ['123']

# \w - word characters (a-z, A-Z, 0-9, _)
print(re.findall(r"\w+", text))    # ['abc123XYZ']

# \s - whitespace
text = "Hello World"
print(re.findall(r"\s", text))     # [' ']

# Custom character class
print(re.findall(r"[aeiou]", "hello world"))  # ['e', 'o', 'o']

Quantifiers

import re

# * - zero or more
# + - one or more
# ? - zero or one
# {n} - exactly n
# {n,m} - n to m times

text = "aaa ab a"
print(re.findall(r"a+", text))     # ['aaa', 'a', 'a']
print(re.findall(r"a*", text))     # ['aaa', '', 'a', '', 'a', '']
print(re.findall(r"a{2}", text))   # ['aa']
print(re.findall(r"a{1,3}", text)) # ['aaa', 'a', 'a']

Anchors

import re

text = "hello world"

# ^ - start of string
print(re.search(r"^hello", text))  # Match
print(re.search(r"^world", text))  # None

# $ - end of string
print(re.search(r"world$", text))  # Match
print(re.search(r"hello$", text))  # None

# \b - word boundary
text = "cat category"
print(re.findall(r"\bcat\b", text))  # ['cat'] (not 'category')

Groups

Basic Groups

import re

text = "John Smith, Jane Doe"

# Capture groups with ()
pattern = r"(\w+) (\w+)"
matches = re.findall(pattern, text)
print(matches)  # [('John', 'Smith'), ('Jane', 'Doe')]

# Access groups from match object
match = re.search(r"(\w+) (\w+)", text)
print(match.group(0))  # John Smith (entire match)
print(match.group(1))  # John (first group)
print(match.group(2))  # Smith (second group)

Named Groups

import re

text = "john@example.com"

pattern = r"(?P<username>\w+)@(?P<domain>\w+\.\w+)"
match = re.search(pattern, text)

print(match.group("username"))  # john
print(match.group("domain"))    # example.com
print(match.groupdict())        # {'username': 'john', 'domain': 'example.com'}

Practical Examples

Email Validation

import re

def is_valid_email(email):
    pattern = r"^[\w.-]+@[\w.-]+\.\w+$"
    return bool(re.match(pattern, email))

print(is_valid_email("user@example.com"))  # True
print(is_valid_email("invalid-email"))      # False

Phone Number Extraction

import re

text = "Call me at 123-456-7890 or (555) 123-4567"

# Various phone formats
pattern = r"\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}"
phones = re.findall(pattern, text)
print(phones)  # ['123-456-7890', '(555) 123-4567']

URL Parsing

import re

url = "https://www.example.com/path/page?query=value"

pattern = r"(?P<protocol>https?)://(?P<domain>[\w.]+)(?P<path>/[\w/]*)?(?P<query>\?.*)?"
match = re.search(pattern, url)

print(match.group("protocol"))  # https
print(match.group("domain"))    # www.example.com
print(match.group("path"))      # /path/page

Text Cleaning

import re

text = "Hello,   World!   How   are   you?"

# Remove extra whitespace
clean = re.sub(r"\s+", " ", text)
print(clean)  # Hello, World! How are you?

# Remove non-alphanumeric
clean = re.sub(r"[^\w\s]", "", text)
print(clean)  # Hello   World   How   are   you

Flags

import re

text = "Hello WORLD"

# Case insensitive
match = re.search(r"world", text, re.IGNORECASE)
print(match.group())  # WORLD

# Multiline (^ and $ match line boundaries)
text = "line1\nline2"
matches = re.findall(r"^line\d", text, re.MULTILINE)
print(matches)  # ['line1', 'line2']

# Verbose (allows comments and whitespace)
pattern = re.compile(r"""
    \d{3}     # Area code
    [-.\s]?   # Separator
    \d{3}     # First 3 digits
    [-.\s]?   # Separator
    \d{4}     # Last 4 digits
""", re.VERBOSE)

Summary

  • Use re.search() to find first match
  • Use re.findall() to find all matches
  • Use re.sub() to replace matches
  • Use () for groups, (?P<name>) for named groups
  • Common patterns: \d (digit), \w (word), \s (space)
  • Quantifiers: *, +, ?, {n}, {n,m}
  • Anchors: ^ (start), $ (end), \b (word boundary)