Index

Table of contents

AWK

cli

run awk without any kind of input
awk [awk-cmd]
process lines from stin
[cmd] | awk [awk-cmd]
process lines from files
awk [awk-cmd] [file]...
run awk commands from script
awk -f [awk-script-file] [file]...
change field separator
awk -F [separator] '{print $[n]}'
assign variable
awk -v myvar=foo 'BEGIN{print myvar}'
dump final values of variables to ./awkvars.out
awk --dump-variables ...
show warnings
awk --lint ...
fail on warnings
awk --lint=fatal ...
shebang header
#!/usr/bin/awk -f

syntax

structure

execute at start of program
BEGIN {...}
execute on every line
{...}
execute at end of document
END {...}
execute on lines matching regex
/[regex]/{...}
comment
# [comment]

operators

assignment operator
{ myvar=1; print myvar }
{ myvar="foo"; print myvar }
arithmetic operators
{ print 1+1 }
{ print 1-1 }
{ print 1*1 }
{ print 1/1 }
{ print 1%1 }
{ print 1^1 }
{ print 1**1 } # alternative syntax for exponential
{ i=1; print ++i }
{ i=1; print --i }
{ i=1; i++; print i }
{ i=1; i--; print i }
unary minus (or negate)
{ i=-1; print -i }
unary plus (or convert to number)
{ print +"1-returns-one" }
assignment shorthands
{ i=1; i+=1; print i }
{ i=1; i-=1; print i }
{ i=1; i*=1; print i }
{ i=1; i/=1; print i }
{ i=1; i%=1; print i }
{ i=2; i^=2; print i }
{ i=2; i**=2; print i } # alternative syntax for exponential
logical operators (returns 0=false or 1=true)
{ print 1==2 }
{ print 1!=2 }
{ print 1<2 }
{ print 1<=2 }
{ print 1>2 }
{ print 1>=2 }
{ b=1; print !b }
{ print 1<2 && 2<3 }
{ print 1<2 || 2<3 }
ternary operator
{ print NR%2 == 0 ? "even" : "odd" }'
regex operators
{ for(i=1;i<100;i++) if (i ~ "0$") print i }
{ for(i=1;i<100;i++) if (i !~ "[1-9]$") print i }

types

strings

space = string concatenation
{ a="a"; b="b"; c=a b; print c }

arrays

defining an array
{ q[0]="a"; q[1]="b"; print q[1] }
{ q["a"]=0; q["b"]=1; print q["b"] }
iterating an array
{ q[0]="a"; q[1]="b"; for (v in q) print q[v] }
deleting an array element by index
{ q[0]="a"; q[1]="b"; delete q[0]; print q[0] }

basic usage

print column number n of console output
awk '{print $[col]}'
print last column
awk '{print $NF}'
print multiple columns
awk '{print $1,$2}'
replace separators
awk 'BEGIN{FS=":"; OFS="|"} {print $1,$2}'
strings
awk '{ "col:" print $[col] "." }'
math operations on columns
awk '{print $[n] + $[m]}'
awk '{print $[n] - $[m]}'
awk '{print $[n] * $[m]}'
awk '{print $[n] / $[m]}'
substring of column
awk '{print substr($[col], [start-index])}'
print line numbers
awk '{print NR, $0}'
print a line by index
awk 'NR==[n]'
print from line n to line m
awk 'NR==[n],NR==[m]'
print last line (tail)
awk 'END {print $0}'
find lines matching a pattern
awk '/[regex]/'
awk '/[regex]/ {print $0}'
find lines with specific field matching a pattern
awk '$[col] ~ /[regex]/'
case insensitive matching
awk 'BEGIN { IGNORECASE = 1 } /[regex]/'

control flow

if statment
if (condition) {
	[expr1]
	[expr2]
	...
} else if (condition) {
	[expr1]
	[expr2]
	...
} else {
	[expr1]
	[expr2]
	...
}
while loop
while (condition) {
	[expr]...
}

do {
	[expr]...
} while (condition)
for each loop
for(i in [array]) {
	[expr]...
}
for loop
for (init?; condition; action?) {
	[expr]...
}
break / continue / exit
{
    print "line:" NR
    for(i=1;i<10;i++) {
        if(NR>10) exit 0
        if(i>NR) break
        if(i%2 ==0) continue
        print i
    }
}

functions

user defined functions
function [name](...) {
	[expr]...
}

input / output

pipe output to another shell command
{ print "abc" | "sed s/a/b/" }
write output to file
{ print "foo" > "[path]" }
append output to file
{ print "foo" >> "[path]" }

examples

count lines
awk 'END {print NR}'
count matching lines
awk '/[regex]/{ sum++ } END { print sum }'
sum a bunch of numbers
seq 10 | awk '{ sum += $1 } END { print sum }'
whitespace pad
echo foo | awk '{ printf("%5s\n", $0) }'
zero pad numbers
seq 20 | awk '{ printf("%02d\n", $0) }'
print number with thousands separators
awk "BEGIN { printf(\"%'d\n\", 1234567) }"
specify precision for floating point
awk 'BEGIN { printf("%05.2f \n", 3.1415926) }'
print all ascii characters
awk 'BEGIN { for(i=0;i<256;i++) printf("%d: %c \n", i, i) }'
printf arguments out of order
awk 'BEGIN { printf("%3$s %1$s \n", "last", "ignored", "first") }'
date-time format NL
awk 'BEGIN { print strftime("%d-%m-%Y %H:%M:%S", systime()) }'
print command line arguments
awk -v i=1 'BEGIN{while (i<ARGC) print ARGV[i++]}' [arg]...
print an environment variable
awk 'BEGIN { print ENVIRON["PATH"] }'
get pid
awk 'BEGIN { print PROCINFO["pid"] }'
replace field separator
awk 'BEGIN{FS=":"; OFS="|"}'
print all elements in an array
function arp(array) {
    for(i in array) {
        print i ": " array[i]
    }
}
BEGIN {
    m["b"] = 2
    m["c"] = 3
    m["a"] = 1
	arp(array)
}

API

built in variables

ARGC        = cli argument count
ARGIND      = index of current file in argv
ARGV        = cli argument array
BINMODE     = binary mode
CONVFMT     = number conversion format
ENVIRON     = environment variables (associative array)
ERRNO       = IO errors
FIELDWIDTHS = use fixed field widths instead of field separators
FILENAME    = name of current file or - for stdin
FS          = input field separator
NF          = number of fields in current record (line)
NR          = record (line) number
FNR         = record (line) number in current file
IGNORECASE  = makes GAWK case insensitive
LINT        = programmatic control over --lint option
OFMT        = output format for numbers
OFS         = output field separator
ORS         = output record separator
PROCINFO    = associative array with process info
RLENGTH     = length of last string matched by match() function
RS          = input record separator
RSTART      = index of string matched by match() function
SUBSEP      = separator character for array subscripts
TEXTDOMAIN  = used for text translations

built in functions

control flow
next                    stop processing record and jump to the next one, like a continue statement
nextfile                stop processing file and jump to the next one
return                  return a value from a function
arithmetic functions
atan2(x,y)              arctangent in radians (trigonometry)
cos(r)                  cosinus in radians (trigonometry)
exp(n)                  exponent
int(v)                  floor number (drop decimal bits)
log(v)                  logarithm
rand()                  random number 0 <= N <1
sin(r)                  sinus in radians (trigonometry)
sqrt(n)                 square root
srand(seed?)            random value using a seed, or time of day if absent
string functions
gsub(regex, sub, str)   global substitution: replace all regex matches in str with sub
index(str, sub)         return first index (starting at 1) of sub in str or 0
length(str)             get length of string
match(str, regex)       return first index of regex match
printf(format, args)    print formatted
split(str, arr, regex)  split str on every occurence of regex and overwrite the array
strtonum(str)           convert decimal [1-9]+, octal 0[1-9+] or hex 0x[1-9a-f]+ number
sub(regex, sub, str)    replace a single occurence of regex with sub
substr(str, index, len) extract a substring of length len starting at index
tolower(str)            convert to lower case
toupper(str)            convert to upper case
array functions
asort([array])          replace keys with indexes, sort values
asorti([array])         replace keys with indexes, values contain keys sorted
delete array[index]     delete element in array - no parentheses
date time functions
systime()               returns unix timestamp
mktime(datestr)         converts string to timestamp
strftime(format, stamp) convert timestamp using date specification
bit manipulation
and(n,m)                bitwise AND
compl(n)                bitwise complement
lshift(number,shifts)   left shift
rshift(number,shifts)   right shift
or(n,m)                 bitwise OR
xor(n,m)                bitwise XOR
input output
close(pipe)             closes file of pipe
exit [code]             quit awk and return [code]
fflush(pipe)            flush buffers for pipe, or all pipes if argument is ""
getline                 read a line from a file
system(cmd)             execute a command on the cli and return the return value

printf format

escape sequences
\b backspace (deletes character)
\f form feed
\n newline
\r carriage return
\t tab
\v vertical tab
control letters
%c = single character by ascii index
%d = integer
%i = integer
%e = floating point number scientific notation lowercase e
%E = floating point number scientific notation uppercase E
%f = floating poing number in floating point notation, special values lowercase
%F = floating poing number in floating point notation, special values uppercase
%g = floating poing number, use the shortest suitable notation of %e or %f
%G = floating poing number, use the shortest suitable notation of %E or %F
%o = octal number
%s = string
%x = hexadecimal lowercase
%X = hexadecimal uppercase
%% = single percentile character '%'
format modifiers
%[n]$[c]     = positional specifier; refer to arguments by index
%-[w][c]     = align left; left justify argument within specified width
%+[w]?[c]    = prefix '+' for positive numbers
%0[w][c]     = zero pad numbers
%'[w][c]     = separate every 3 digits in number with locale specific separator
%#[w]?[c]    = prefix octal numbers with "0", hex with "0x"
%[w]?.[p][c] = specify decimal precision for numbers / max length for strings

strftime date format

%a = weekday short
%A = weekday long
%b = month short
%B = month long
%c = default format for date-time
%d = day 01-31 (zero padded)
%e = day 1-31 (space padded)
%G = year
%H = hour 00-23
%i = hour 01-12
%j = day in year 001-366
%m = month 01-12
%M = minute 00-59
%n = newline \n
%p = AM|PM
%r = time format: hh:mm:ss AM|PM
%R = time format: hh:mm
%S = seconds 00-59
%t = tab \t
%T = time format: hh:mm:ss
%u = day of week 1-7 starting at Monday
%U = weeknumber 00-53 starting at Sunday
%V = weeknumber 01-53 starting at Monday
%w = day of week 0-6 starting at Sunday
%V = weeknumber 00-53 starting at Monday
%Y = year
%z = time zone offset
%Z = time zone name

references

https://www.gnu.org/software/gawk/manual/html_node/index.html#SEC_Contents