Table of contents
AWK
cli
run awk without any kind of input
awk [awk-cmd]
process lines from stin
[cmd] | awk [awk-cmd]
process lines from files
awk [awk-cmd] [file]...
run awk commands from script
awk -f [awk-script-file] [file]...
change field separator
awk -F [separator] '{print $[n]}'
assign variable
awk -v myvar=foo 'BEGIN{print myvar}'
dump final values of variables to ./awkvars.out
awk --dump-variables ...
show warnings
awk --lint ...
fail on warnings
awk --lint=fatal ...
shebang header
#!/usr/bin/awk -f
syntax
structure
execute at start of program
BEGIN {...}
execute on every line
{...}
execute at end of document
END {...}
execute on lines matching regex
/[regex]/{...}
comment
# [comment]
operators
assignment operator
{ myvar=1; print myvar }
{ myvar="foo"; print myvar }
arithmetic operators
{ print 1+1 }
{ print 1-1 }
{ print 1*1 }
{ print 1/1 }
{ print 1%1 }
{ print 1^1 }
{ print 1**1 } # alternative syntax for exponential
{ i=1; print ++i }
{ i=1; print --i }
{ i=1; i++; print i }
{ i=1; i--; print i }
unary minus (or negate)
{ i=-1; print -i }
unary plus (or convert to number)
{ print +"1-returns-one" }
assignment shorthands
{ i=1; i+=1; print i }
{ i=1; i-=1; print i }
{ i=1; i*=1; print i }
{ i=1; i/=1; print i }
{ i=1; i%=1; print i }
{ i=2; i^=2; print i }
{ i=2; i**=2; print i } # alternative syntax for exponential
logical operators (returns 0=false or 1=true)
{ print 1==2 }
{ print 1!=2 }
{ print 1<2 }
{ print 1<=2 }
{ print 1>2 }
{ print 1>=2 }
{ b=1; print !b }
{ print 1<2 && 2<3 }
{ print 1<2 || 2<3 }
ternary operator
{ print NR%2 == 0 ? "even" : "odd" }'
regex operators
{ for(i=1;i<100;i++) if (i ~ "0$") print i }
{ for(i=1;i<100;i++) if (i !~ "[1-9]$") print i }
types
strings
space = string concatenation
{ a="a"; b="b"; c=a b; print c }
arrays
defining an array
{ q[0]="a"; q[1]="b"; print q[1] }
{ q["a"]=0; q["b"]=1; print q["b"] }
iterating an array
{ q[0]="a"; q[1]="b"; for (v in q) print q[v] }
deleting an array element by index
{ q[0]="a"; q[1]="b"; delete q[0]; print q[0] }
basic usage
print column number n of console output
awk '{print $[col]}'
print last column
awk '{print $NF}'
print multiple columns
awk '{print $1,$2}'
replace separators
awk 'BEGIN{FS=":"; OFS="|"} {print $1,$2}'
strings
awk '{ "col:" print $[col] "." }'
math operations on columns
awk '{print $[n] + $[m]}'
awk '{print $[n] - $[m]}'
awk '{print $[n] * $[m]}'
awk '{print $[n] / $[m]}'
substring of column
awk '{print substr($[col], [start-index])}'
print line numbers
awk '{print NR, $0}'
print a line by index
awk 'NR==[n]'
print from line n to line m
awk 'NR==[n],NR==[m]'
print last line (tail)
awk 'END {print $0}'
find lines matching a pattern
awk '/[regex]/'
awk '/[regex]/ {print $0}'
find lines with specific field matching a pattern
awk '$[col] ~ /[regex]/'
case insensitive matching
awk 'BEGIN { IGNORECASE = 1 } /[regex]/'
control flow
if statment
if (condition) {
[expr1]
[expr2]
...
} else if (condition) {
[expr1]
[expr2]
...
} else {
[expr1]
[expr2]
...
}
while loop
while (condition) {
[expr]...
}
do {
[expr]...
} while (condition)
for each loop
for(i in [array]) {
[expr]...
}
for loop
for (init?; condition; action?) {
[expr]...
}
break / continue / exit
{
print "line:" NR
for(i=1;i<10;i++) {
if(NR>10) exit 0
if(i>NR) break
if(i%2 ==0) continue
print i
}
}
functions
user defined functions
function [name](...) {
[expr]...
}
input / output
pipe output to another shell command
{ print "abc" | "sed s/a/b/" }
write output to file
{ print "foo" > "[path]" }
append output to file
{ print "foo" >> "[path]" }
examples
count lines
awk 'END {print NR}'
count matching lines
awk '/[regex]/{ sum++ } END { print sum }'
sum a bunch of numbers
seq 10 | awk '{ sum += $1 } END { print sum }'
whitespace pad
echo foo | awk '{ printf("%5s\n", $0) }'
zero pad numbers
seq 20 | awk '{ printf("%02d\n", $0) }'
print number with thousands separators
awk "BEGIN { printf(\"%'d\n\", 1234567) }"
specify precision for floating point
awk 'BEGIN { printf("%05.2f \n", 3.1415926) }'
print all ascii characters
awk 'BEGIN { for(i=0;i<256;i++) printf("%d: %c \n", i, i) }'
printf arguments out of order
awk 'BEGIN { printf("%3$s %1$s \n", "last", "ignored", "first") }'
date-time format NL
awk 'BEGIN { print strftime("%d-%m-%Y %H:%M:%S", systime()) }'
print command line arguments
awk -v i=1 'BEGIN{while (i<ARGC) print ARGV[i++]}' [arg]...
print an environment variable
awk 'BEGIN { print ENVIRON["PATH"] }'
get pid
awk 'BEGIN { print PROCINFO["pid"] }'
replace field separator
awk 'BEGIN{FS=":"; OFS="|"}'
print all elements in an array
function arp(array) {
for(i in array) {
print i ": " array[i]
}
}
BEGIN {
m["b"] = 2
m["c"] = 3
m["a"] = 1
arp(array)
}
API
built in variables
ARGC = cli argument count
ARGIND = index of current file in argv
ARGV = cli argument array
BINMODE = binary mode
CONVFMT = number conversion format
ENVIRON = environment variables (associative array)
ERRNO = IO errors
FIELDWIDTHS = use fixed field widths instead of field separators
FILENAME = name of current file or - for stdin
FS = input field separator
NF = number of fields in current record (line)
NR = record (line) number
FNR = record (line) number in current file
IGNORECASE = makes GAWK case insensitive
LINT = programmatic control over --lint option
OFMT = output format for numbers
OFS = output field separator
ORS = output record separator
PROCINFO = associative array with process info
RLENGTH = length of last string matched by match() function
RS = input record separator
RSTART = index of string matched by match() function
SUBSEP = separator character for array subscripts
TEXTDOMAIN = used for text translations
built in functions
control flow
next stop processing record and jump to the next one, like a continue statement
nextfile stop processing file and jump to the next one
return return a value from a function
arithmetic functions
atan2(x,y) arctangent in radians (trigonometry)
cos(r) cosinus in radians (trigonometry)
exp(n) exponent
int(v) floor number (drop decimal bits)
log(v) logarithm
rand() random number 0 <= N <1
sin(r) sinus in radians (trigonometry)
sqrt(n) square root
srand(seed?) random value using a seed, or time of day if absent
string functions
gsub(regex, sub, str) global substitution: replace all regex matches in str with sub
index(str, sub) return first index (starting at 1) of sub in str or 0
length(str) get length of string
match(str, regex) return first index of regex match
printf(format, args) print formatted
split(str, arr, regex) split str on every occurence of regex and overwrite the array
strtonum(str) convert decimal [1-9]+, octal 0[1-9+] or hex 0x[1-9a-f]+ number
sub(regex, sub, str) replace a single occurence of regex with sub
substr(str, index, len) extract a substring of length len starting at index
tolower(str) convert to lower case
toupper(str) convert to upper case
array functions
asort([array]) replace keys with indexes, sort values
asorti([array]) replace keys with indexes, values contain keys sorted
delete array[index] delete element in array - no parentheses
date time functions
systime() returns unix timestamp
mktime(datestr) converts string to timestamp
strftime(format, stamp) convert timestamp using date specification
bit manipulation
and(n,m) bitwise AND
compl(n) bitwise complement
lshift(number,shifts) left shift
rshift(number,shifts) right shift
or(n,m) bitwise OR
xor(n,m) bitwise XOR
input output
close(pipe) closes file of pipe
exit [code] quit awk and return [code]
fflush(pipe) flush buffers for pipe, or all pipes if argument is ""
getline read a line from a file
system(cmd) execute a command on the cli and return the return value
printf format
escape sequences
\b backspace (deletes character)
\f form feed
\n newline
\r carriage return
\t tab
\v vertical tab
control letters
%c = single character by ascii index
%d = integer
%i = integer
%e = floating point number scientific notation lowercase e
%E = floating point number scientific notation uppercase E
%f = floating poing number in floating point notation, special values lowercase
%F = floating poing number in floating point notation, special values uppercase
%g = floating poing number, use the shortest suitable notation of %e or %f
%G = floating poing number, use the shortest suitable notation of %E or %F
%o = octal number
%s = string
%x = hexadecimal lowercase
%X = hexadecimal uppercase
%% = single percentile character '%'
format modifiers
%[n]$[c] = positional specifier; refer to arguments by index
%-[w][c] = align left; left justify argument within specified width
%+[w]?[c] = prefix '+' for positive numbers
%0[w][c] = zero pad numbers
%'[w][c] = separate every 3 digits in number with locale specific separator
%#[w]?[c] = prefix octal numbers with "0", hex with "0x"
%[w]?.[p][c] = specify decimal precision for numbers / max length for strings
strftime date format
%a = weekday short
%A = weekday long
%b = month short
%B = month long
%c = default format for date-time
%d = day 01-31 (zero padded)
%e = day 1-31 (space padded)
%G = year
%H = hour 00-23
%i = hour 01-12
%j = day in year 001-366
%m = month 01-12
%M = minute 00-59
%n = newline \n
%p = AM|PM
%r = time format: hh:mm:ss AM|PM
%R = time format: hh:mm
%S = seconds 00-59
%t = tab \t
%T = time format: hh:mm:ss
%u = day of week 1-7 starting at Monday
%U = weeknumber 00-53 starting at Sunday
%V = weeknumber 01-53 starting at Monday
%w = day of week 0-6 starting at Sunday
%V = weeknumber 00-53 starting at Monday
%Y = year
%z = time zone offset
%Z = time zone name
references
https://www.gnu.org/software/gawk/manual/html_node/index.html#SEC_Contents