G
GuideDevOps
Lesson 13 of 15

Regular Expressions

Part of the Shell Scripting (Bash) tutorial series.

Basic Pattern Matching

str="test123"
 
# Basic match
if [[ $str =~ ^test ]]; then
    echo "Starts with 'test'"
fi
 
# Case insensitive
if [[ $str =~ ^TEST ]]; then
    shopt -s nocaseglob
fi

Character Classes

str="abc123XYZ"
 
# Digit
if [[ $str =~ [0-9] ]]; then
    echo "Contains digit"
fi
 
# Lowercase
if [[ $str =~ [a-z] ]]; then
    echo "Contains lowercase"
fi
 
# Uppercase
if [[ $str =~ [A-Z] ]]; then
    echo "Contains uppercase"
fi
 
# Whitespace
if [[ $str =~ [[:space:]] ]]; then
    echo "Contains space"
fi

Quantifiers

# * - Zero or more
if [[ "aaa" =~ a* ]]; then echo "Match"; fi
 
# + - One or more
if [[ "aaa" =~ a+ ]]; then echo "Match"; fi
 
# ? - Zero or one
if [[ "color" =~ colou?r ]]; then echo "Match"; fi
 
# {n} - Exactly n
if [[ "aaa" =~ a{3} ]]; then echo "Match"; fi
 
# {n,} - n or more
if [[ "aaaa" =~ a{3,} ]]; then echo "Match"; fi
 
# {n,m} - n to m
if [[ "aaa" =~ a{2,4} ]]; then echo "Match"; fi

Anchors

# ^ Start of string
if [[ $str =~ ^hello ]]; then
    echo "Starts with 'hello'"
fi
 
# $ End of string
if [[ $str =~ world$ ]]; then
    echo "Ends with 'world'"
fi
 
# ^$ Exact match
if [[ $str =~ ^hello$ ]]; then
    echo "Exactly 'hello'"
fi

Capturing Groups

str="John Smith john@example.com"
 
# Capture name and email
if [[ $str =~ ([A-Za-z]+)\s+([A-Za-z]+)\s+(.+@.+) ]]; then
    echo "First: ${BASH_REMATCH[1]}"      # John
    echo "Last: ${BASH_REMATCH[2]}"       # Smith
    echo "Email: ${BASH_REMATCH[3]}"      # john@example.com
fi

Escaping Special Characters

# Escape regex special chars
if [[ "file.txt" =~ \u .txt$ ]]; then
    echo "Has .txt extension"
fi
 
# Literal dollar sign
if [[ "price=$5" =~ \$[0-9]+ ]]; then
    echo "Contains price"
fi

Common Patterns

Email Validation

email="user@example.com"
if [[ $email =~ ^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$ ]]; then
    echo "Valid email"
fi

IP Address

ip="192.168.1.1"
if [[ $ip =~ ^([0-9]{1,3}\.){3}[0-9]{1,3}$ ]]; then
    echo "Valid IP format"
fi

URL Parsing

url="https://user:pass@example.com:8080/path?query=value"
if [[ $url =~ ^(https?)://([^:/@]+)(:([^/@]+))?@?([^/:]+)(:([0-9]+))?(/(.*))?$ ]]; then
    echo "Scheme: ${BASH_REMATCH[1]}"
    echo "Host: ${BASH_REMATCH[5]}"
    echo "Port: ${BASH_REMATCH[7]}"
    echo "Path: ${BASH_REMATCH[9]}"
fi

Date Format

date_str="2024-04-09"
if [[ $date_str =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}$ ]]; then
    echo "Valid date format"
fi

Using grep with Regex

# Basic grep (ERE)
grep "^error" /var/log/syslog
 
# Extended regex
grep -E "(error|warning|critical)" logfile.txt
 
# Perl regex
grep -P "(?<=[0-9]{3})" phonenumber.txt
 
# Case insensitive
grep -i "ERROR" logfile.txt
 
# Invert match
grep -v "debug" logfile.txt

Using sed for Pattern Replacement

# Replace pattern
sed 's/old/new/' file.txt
 
# Replace all occurrences
sed 's/old/new/g' file.txt
 
# Using regex
sed 's/[0-9]\+/NUMBER/g' file.txt
 
# Delete matching lines
sed '/pattern/d' file.txt