Index

api
- bitbucket
- github
applications
- apache
  - apache
- databases
  - postgres
  - cassandra
  - mssql
  - mysql
- docker
- jenkins
- kubernetes
- latex
  - latex
  - snuggletex
- openstack
  - openstack
- versioncontrol
  - git
  - mercurial
- vim
- chrome
- firefox
- vlc
- xterm
erlang
html
- canvas
- css
- html
- svg
java
- games
  - javagames
- hobby
  - jl
- j2ee
- lang
- osgi
  - felix
  - osgi
- other
- scripting
- servers
- spring
javascript
- jquery
- javascript
- jsregex
- websockets
linux
- administration
- applications
- core
  - alpine
  - alsa
  - arch
  - compiz
  - debian
  - flatpak
  - i3
  - icewm
  - lightdm
  - pihole
  - snap
  - suse
  - systemd
  - xfce
  - xkb
  - xserver
- io
- shell
  - bash
  - dash
    - dash
    - script necessities
  - awk
  - bats
  - color
  - expr
  - grep
  - ls
  - navigation
  - rofi
  - screen
  - sed
  - text
  - tmux
  - trap
  - xargs
microsoft
- azure
  - javasdk
- dotnet
- mono
  - mono
- windows
perl
python
rust
- rust cli
- rust

python regex

python regex

search

basic usage

import re
regex = '[0-9+]'
string = 'abcd 12345 efgh 67890'
match = re.search(regex, string)
if match:
	print(match.group())    # prints 12345

match object fields

match = re.search(regex, string)
if match:
	print(match.group())   # prints matching part of input
        print(match.span())    # prints tuple: first index of match & last index
        print('input string:' + match.string)

using flags

match = re.search(regex, line, re.IGNORECASE)
match = re.search(regex, line, re.DOTALL | re.MULTILINE)

extracting groups

string = 'someone@example.com'
regex = '(?:\w+)@(.+)'
result = re.search(regex, line)
print(result.group(0));    # prints: someone@example.com
print(result.group(1));    # prints: @example.com
print(result.group(2));    # error: there is no group 2

findall

printing all matches of regex in string

regex = '[0-9+]'
string = 'abcd 12345 efgh 67890'
result = re.findall(regex, string)
print(result);    # prints ['12345', '67890']

split

splitting a line on a regex

regex = '-'
string = '01-01-1970'
result = re.split(regex, string)
print(result);    # prints ['01', '01', '1970']

split a limited number of times

regex = '-'
string = '01-01-1970'
result = re.split(regex, string, 1)
print(result);    # prints ['01', '01-1970']

substitution

substitute 'x' for numbers

string = 'ccv 123'
regex = '[0-9]'
replace = 'x'
result = re.sub(regex, replace, string)
print(result);    # print ccv xxx

only replace first 2

result = re.sub(regex, replace, string, 2)
print(result);    # print ccv xx3

substitute with multiline flag

return re.sub(r'\n\s*\n', '\n', lines, re.MULTILINE)

flags

A | ASCII      # ascii only matching (instead of unicode)
S | DOTALL     # dot '.' matches newline
I | IGNORECASE # case insensitive matching
M | MULTILINE  # multiline matching

syntax

a         # match the letter a
\.        # match a literal dot '.'
\\        # match backslash
^         # match start line
$         # match end of line

.         # match any character other than newline ('s' flag set: also match newline)
\d        # match digit
\D        # match non-digit
\s	  # match whitespace
\w        # match [a-zA-Z0-9_] (word character)
\W        # match [^a-zA-Z0-9_] (non-word character)
|         # alternative pattern (logical or)

character class

[0-9]	  # match digit
[^0-9]    # match non-digit
[0-9-]	  # match digit or hyphen '-'
[-0-9]    # match digit or hyphen '-'
[0-9.]    # match digit or dot
[aA]      # match 'a' or 'A'

multiplicity

?         # zero or one, greedy
??        # zero or one, lazy
*         # zero or more, greedy
*?        # zero or more, lazy
+         # one or more, greedy
+?        # one or more, lazy
{n}       # exactly n matches
{n,}      # n or more matches, greedy
{n,}?     # n or more matches, lazy
{n,m}     # n to m matches, greedy
{n,m}?    # n to m matches, lazy

capturing

(         # open capturing group
)         # close capturing group
(?:       # non-capturing group
\n        # refer to captured group [n]

look around

(?=	  # Lookahead
(?<=      # Lookbehind
(?!	  # Negative Lookahead (must not be followed by)
(?<!      # Negative Lookbehind (must not be preceded by)

documentation

https://docs.python.org/3/howto/regex.html
https://docs.python.org/3/library/re.html#module-re

Erik's Cheat Sheet