Table of contents
perl regex
using a regex
matching
finding a regex match anywhere in input string
$_ = 'input';
if (/regex/) {
say "matches $&";
say "capture group 1: $1";
say "capture group 2: $2";
say "skipped input $` ";
say "trailing input $' ";
} else {
say "no match";
}
alternative syntax for matching against a regex
if (m<regex>) {
if (m[regex]) {
if (m(regex)) {
if (m{regex}) {
if (m^regex^) {
if (m/regex/) {
if (m|regex|) {
if (m!regex!) {
if (m:regex:) {
if (m,regex,) {
if (m.regex.) {
using other input strings than the default $_
if ($string =~ /regex/) {
capturing groups into scalars
my ($group1, $group2) = $input =~ /$regex/;
extract multiple matches from a line into an array
my @matches = $input =~ /$regex/g;
convert matches into a hash
my %map = $input =~ /$regex/g;
iterating multiple matches
while($_ =~ /$regex/g ) { ... }
simple regex tester
#!/bin/perl
use 5.26.1;
my $regex = $ARGV[0];
$_ = $ARGV[1];
/$regex/ ? say "$regex matches $_" : say "no match for $regex on $_";
stubstitution
regex substitution on $_
my $count = s/./replacement/g;
if ($count) {
say "substitution count $count";
say "new value: $_";
} else {
say "no matches";
}
substituting on variable other than $_
$input =~ s/regex/replacement/
create new string instead of modifying original scalar (r flag)
$copy = $input =~ s/regex/replacement/r
alternative syntax for matching against a regex
if (s<regex><replacement>) {
if (s[regex][replacement]) {
if (s(regex)(replacement)) {
if (s{regex}{replacement}) {
if (s^regex^replacement^) {
if (s/regex/replacement/) {
if (s|regex|replacement|) {
if (s!regex!replacement!) {
if (s:regex:replacement:) {
if (s,regex,replacement,) {
if (s.regex.replacement.) {
evaluate replacement part as code
my $var = "system path: &PATH&";
$var =~ s/&([^&]+)&/$ENV{$1}/e;
regex split
splitting a line
@parts = split /regex/, $input;
flags
a ascii regex
i case insentitive matching
g global matches (multiple matches per line)
m multiline mode
n non-capturing mode
r return new scalar, rather than modifying original scalar (substitution)
s '.' matches newline characters as well
u unicode regex
x ignore whitespace in regex (allow regex formatting)
syntax
a # match the letter a
\. # match a literal dot '.'
\\ # match backslash
\t # match tab
\n # match newline
\r # match carriage return
\A # match start of string
\z # match end of string, no trailing newline
\Z # match end of string, optional trailing newline
^ # match start line
$ # match end of line
. # match any character other than newline ('s' flag set: also match newline)
\N # match any character other than newline (ignores 's' flag)
\d # match digit
\D # match non-digit
\s # match whitespace
\w # match [a-zA-Z0-9_] (word character)
\W # match [^a-zA-Z0-9_] (non-word character)
\b # match word boundary
\B # match non-word boundary
\b{lb} # match line boundary, unicode heuristic for finding convenient locations to break lines
\b{sb} # match sentence boundary, unicode heuristic
\b{wb} # match word boundary, unicode heuristic
\p{Space} # match unicode characters using unicode property
\p{Space} # match unicode characters that dont have unicode property
| # alternative pattern (logical or)
case shifting in replacement
\l # next character to lower case
\L # all following characters to lower case
\E # all following characters to original case (disable case shifting)
\Q # auto escape until \E
\u # next character to upper case
\U # all following characters to upper case
character class
[0-9] # match digit
[^0-9] # match non-digit
[0-9-] # match digit or hyphen '-'
[-0-9] # match digit or hyphen '-'
[0-9.] # match digit or dot
[aA] # match 'a' or 'A'
multiplicity
? # zero or one, greedy
?? # zero or one, lazy
* # zero or more, greedy
*? # zero or more, lazy
*+ # zero or more, possessive
+ # one or more, greedy
+? # one or more, lazy
++ # one or more, possessive
{n} # exactly n matches
{n,} # n or more matches, greedy
{n,}? # n or more matches, lazy
{n,}+ # n or more matches, possessive
{n,m} # n to m matches, greedy
{n,m}? # n to m matches, lazy
{n,m}+ # n to m matches, possessive
capturing
( # open capturing group
) # close capturing group
\n # refer to captured group [n]
\g{n} # refer to captured group [n]
(?: # non-capturing group
(?<name> # named capturing group
(P<name> # named capturing group
\g{name} # refer to named capturing group
\k<name> # refer to named capturing group
\?P=name # refer to named capturing group
look around
(?= # Lookahead
(?<= # Lookbehind
(?! # Negative Lookahead (must not be followed by)
(?<! # Negative Lookbehind (must not be preceded by)
examples
match if a character occurs twice in a row
(.)\1
snake case to camel case
s:([a-z]+)_?:\u\L$1
dromedary case to snake case
s:(.)([A-Z]):\L$1_$2:gr
documentation
https://perldoc.perl.org/perlre.html