Index

Table of contents

perl regex

using a regex

matching

finding a regex match anywhere in input string
$_ = 'input';
if (/regex/) {
	say "matches $&";
	say "capture group 1: $1";
	say "capture group 2: $2";
	say "skipped input  $`  ";
	say "trailing input $'  ";
} else {
	say "no match";
}
alternative syntax for matching against a regex
if (m<regex>) {
if (m[regex]) {
if (m(regex)) {
if (m{regex}) {
if (m^regex^) {
if (m/regex/) {
if (m|regex|) {
if (m!regex!) {
if (m:regex:) {
if (m,regex,) {
if (m.regex.) {
using other input strings than the default $_
if ($string =~ /regex/) {
capturing groups into scalars
my ($group1, $group2) = $input =~ /$regex/;
extract multiple matches from a line into an array
my @matches = $input =~ /$regex/g;
convert matches into a hash
my %map = $input =~ /$regex/g;
iterating multiple matches
while($_ =~ /$regex/g ) { ... }
simple regex tester
#!/bin/perl
use 5.26.1;
my $regex = $ARGV[0];
$_ = $ARGV[1];
/$regex/ ? say "$regex matches $_" : say "no match for $regex on $_";

stubstitution

regex substitution on $_
my $count = s/./replacement/g;
if ($count) {
        say "substitution count $count";
		say "new value: $_";
} else {
        say "no matches";
}
substituting on variable other than $_
$input =~ s/regex/replacement/
create new string instead of modifying original scalar (r flag)
$copy = $input =~ s/regex/replacement/r
alternative syntax for matching against a regex
if (s<regex><replacement>) {
if (s[regex][replacement]) {
if (s(regex)(replacement)) {
if (s{regex}{replacement}) {
if (s^regex^replacement^) {
if (s/regex/replacement/) {
if (s|regex|replacement|) {
if (s!regex!replacement!) {
if (s:regex:replacement:) {
if (s,regex,replacement,) {
if (s.regex.replacement.) {
evaluate replacement part as code
my $var = "system path: &PATH&";
$var =~ s/&([^&]+)&/$ENV{$1}/e;

regex split

splitting a line
@parts = split /regex/, $input;

flags

a    ascii regex
i    case insentitive matching
g    global matches (multiple matches per line)
m    multiline mode
n    non-capturing mode
r    return new scalar, rather than modifying original scalar (substitution)
s    '.' matches newline characters as well
u    unicode regex
x    ignore whitespace in regex (allow regex formatting)

syntax

a         # match the letter a
\.        # match a literal dot '.'
\\        # match backslash
\t        # match tab
\n        # match newline
\r        # match carriage return
\A        # match start of string
\z        # match end of string, no trailing newline
\Z        # match end of string, optional trailing newline
^         # match start line
$         # match end of line

.         # match any character other than newline ('s' flag set: also match newline)
\N        # match any character other than newline (ignores 's' flag)
\d        # match digit
\D        # match non-digit
\s	  # match whitespace
\w        # match [a-zA-Z0-9_] (word character)
\W        # match [^a-zA-Z0-9_] (non-word character)
\b        # match word boundary
\B        # match non-word boundary
\b{lb}    # match line boundary, unicode heuristic for finding convenient locations to break lines
\b{sb}    # match sentence boundary, unicode heuristic
\b{wb}    # match word boundary, unicode heuristic
\p{Space} # match unicode characters using unicode property
\p{Space} # match unicode characters that dont have unicode property
|         # alternative pattern (logical or)
case shifting in replacement
\l        # next character to lower case
\L        # all following characters to lower case
\E        # all following characters to original case (disable case shifting)
\Q        # auto escape until \E
\u        # next character to upper case
\U        # all following characters to upper case
character class
[0-9]	  # match digit
[^0-9]    # match non-digit
[0-9-]	  # match digit or hyphen '-'
[-0-9]    # match digit or hyphen '-'
[0-9.]    # match digit or dot
[aA]      # match 'a' or 'A'
multiplicity
?         # zero or one, greedy
??        # zero or one, lazy
*         # zero or more, greedy
*?        # zero or more, lazy
*+        # zero or more, possessive
+         # one or more, greedy
+?        # one or more, lazy
++        # one or more, possessive
{n}       # exactly n matches
{n,}      # n or more matches, greedy
{n,}?     # n or more matches, lazy
{n,}+     # n or more matches, possessive
{n,m}     # n to m matches, greedy
{n,m}?    # n to m matches, lazy
{n,m}+    # n to m matches, possessive
capturing
(         # open capturing group
)         # close capturing group
\n        # refer to captured group [n]
\g{n}     # refer to captured group [n]
(?:       # non-capturing group
(?<name>  # named capturing group
(P<name>  # named capturing group
\g{name}  # refer to named capturing group
\k<name>  # refer to named capturing group
\?P=name  # refer to named capturing group
look around
(?=	  # Lookahead
(?<=      # Lookbehind
(?!	  # Negative Lookahead (must not be followed by)
(?<!      # Negative Lookbehind (must not be preceded by)

examples

match if a character occurs twice in a row
(.)\1
snake case to camel case
s:([a-z]+)_?:\u\L$1
dromedary case to snake case
s:(.)([A-Z]):\L$1_$2:gr

documentation

https://perldoc.perl.org/perlre.html