#!/bin/bash
#
# Measure the time it takes to build a binary with different compilers on
# different machines, and measure the binary size.
#
# Usage:
# ./ovm-build.sh
# Directories used:
#
# oilshell.org/blob/
# ovm-build/
#
# ~/git/oilshell/
# oil/
# _deps/
# ovm-build # tarballs and extracted source
# _tmp/
# ovm-build/
# raw/ # output CSV
# stage1
# benchmark-data/
# ovm-build/
# raw/
# compiler-id/
# host-id/
set -o nounset
set -o pipefail
set -o errexit
source benchmarks/common.sh # for log, etc.
source build/common.sh # for $CLANG
readonly BASE_DIR=_tmp/ovm-build
readonly TAR_DIR=$PWD/_deps/ovm-build # Make it absolute
#
# Dependencies
#
readonly OIL_VERSION=$(head -n 1 oil-version.txt)
# Leave out mksh for now, because it doesn't follow ./configure make. It just
# has Build.sh.
readonly -a TAR_SUBDIRS=( bash-4.4 dash-0.5.9.1 ) # mksh )
# NOTE: Same list in oilshell.org/blob/run.sh.
tarballs() {
cat < ${prefix}.bytecode-size.tsv
sizes-tsv $BASE_DIR/bin/*/oil.* \
> ${prefix}.bin-sizes.tsv
sizes-tsv $BASE_DIR/bin/*/*sh \
> ${prefix}.other-shell-sizes.tsv
log "Wrote ${prefix}.*.tsv"
# Native portion, but it's not separated out by compiler. We can just
# subtract.
#$TAR_DIR/oil-$OIL_VERSION/_build/oil/ovm* \
}
#
# Unused Demos
#
bytecode-size() {
local zip=_build/oil/bytecode.zip
# 242 files, 1.85 MB
unzip -l $zip | tail -n 1
# 1.88 MB, so there's 30K of header overhead.
ls -l $zip
}
# 6.8 seconds for debug build, instead of 8 seconds.
clang-oil-dbg() {
make clean
CC=$CLANG make _build/oil/ovm-dbg
}
#
# Measure Elapsed Time
#
# Add --target-size? Add that functionality to benchmarks/time.py?
#
# Should we add explicit targets?
# - ovm-clang, ovm-clang-dbg
# - ovm-gcc, ovm-gcc-dbg
#
# It would be possible, but it complicates the makefile.
build-task() {
local raw_dir=$1 # output
local job_id=$2
local host=$3
local host_hash=$4
local compiler_path=$5
local compiler_hash=$6
local src_dir=$7
local action=$8
local times_out="$PWD/$raw_dir/$host.$job_id.times.tsv"
# Definitions that depends on $PWD.
local -a TIME_PREFIX=(
time-tsv \
--output $times_out \
--field "$host" --field "$host_hash" \
--field "$compiler_path" --field "$compiler_hash" \
--field "$src_dir" --field "$action"
)
local bin_base_dir=$PWD/$BASE_DIR/bin
local bin_dir="$bin_base_dir/$(basename $compiler_path)"
mkdir -p $bin_dir
pushd $src_dir >/dev/null
# NOTE: We're not saving the output anywhere. We save the status, which
# protects against basic errors.
case $action in
configure)
"${TIME_PREFIX[@]}" -- ./configure
# Cleaning here relies on the ORDER of tasks.txt. configure happens
# before build. The Clang build shouldn't reuse GCC objects!
# It has to be done after configure, because the Makefile must exist!
make clean
;;
make)
"${TIME_PREFIX[@]}" -- make CC=$compiler_path
local target
case $src_dir in
*/bash*)
target=bash
;;
*/dash*)
target=src/dash
;;
esac
strip $target
cp -v $target $bin_dir
;;
*)
local target=$action # Assume it's a target like _bin/oil.ovm
"${TIME_PREFIX[@]}" -- make CC=$compiler_path $target
cp -v $target $bin_dir
;;
esac
popd >/dev/null
}
oil-tasks() {
local provenance=$1
# NOTE: it MUST be a tarball and not the git repo, because we don't build
# bytecode-*.zip! We care about the "packager's experience".
local dir="$TAR_DIR/oil-$OIL_VERSION"
# Add 1 field for each of 5 fields.
cat $provenance | while read line; do
# NOTE: configure is independent of compiler.
echo "$line" $dir configure
echo "$line" $dir _bin/oil.ovm
echo "$line" $dir _bin/oil.ovm-dbg
done
}
other-shell-tasks() {
local provenance=$1
# NOTE: it MUST be a tarball and not the git repo, because we do the build
# of bytecode.zip! We care about the "package experience".
local tarball='_release/oil.0.5.alpha1.gz'
# Add 1 field for each of 5 fields.
cat $provenance | while read line; do
case $line in
# Skip clang for now.
*clang*)
continue
;;
esac
for dir in "${TAR_SUBDIRS[@]}"; do
echo "$line" $TAR_DIR/$dir configure
echo "$line" $TAR_DIR/$dir make
done
done
}
# 5 releases: 0.0.0 to 0.4.0. For now, just do the 0.5.alpha1 release, and
# show the drop.
oil-historical-tasks() {
echo
}
# action is 'configure', a target name, etc.
readonly HEADER=$'status\telapsed_secs\thost_name\thost_hash\tcompiler_path\tcompiler_hash\tsrc_dir\taction'
readonly NUM_COLUMNS=7 # 5 from provenence, then tarball/target
measure() {
local provenance=$1 # from benchmarks/id.sh compiler-provenance
local raw_dir=${2:-$BASE_DIR/raw}
extract-oil
# Job ID is everything up to the first dot in the filename.
local name=$(basename $provenance)
local prefix=${name%.compiler-provenance.txt} # strip suffix
local times_out="$raw_dir/$prefix.times.tsv"
# NOTE: Do we need two raw dirs?
mkdir -p $BASE_DIR/{raw,stage1,bin} $raw_dir
# TODO: the $times_out calculation is duplicated in build-task()0
# Write Header of the CSV file that is appended to.
echo "$HEADER" > $times_out
local t1=$BASE_DIR/oil-tasks.txt
local t2=$BASE_DIR/other-shell-tasks.txt
oil-tasks $provenance > $t1
other-shell-tasks $provenance > $t2
#grep dash $t2 |
time cat $t1 $t2 |
xargs -n $NUM_COLUMNS -- $0 build-task $raw_dir ||
die "*** Some tasks failed. ***"
measure-sizes $raw_dir/$prefix
cp -v $provenance $raw_dir
}
#
# Data Preparation and Analysis
#
stage1() {
local raw_dir=${1:-$BASE_DIR/raw}
local out=$BASE_DIR/stage1
mkdir -p $out
local x
local -a a b
# Globs are in lexicographical order, which works for our dates.
x=$out/times.tsv
a=($raw_dir/flanders.*.times.tsv)
b=($raw_dir/lisa.*.times.tsv)
tsv-concat ${a[-1]} ${b[-1]} > $x
x=$out/bytecode-size.tsv
a=($raw_dir/flanders.*.bytecode-size.tsv)
b=($raw_dir/lisa.*.bytecode-size.tsv)
tsv-concat ${a[-1]} ${b[-1]} > $x
x=$out/bin-sizes.tsv
a=($raw_dir/flanders.*.bin-sizes.tsv)
b=($raw_dir/lisa.*.bin-sizes.tsv)
tsv-concat ${a[-1]} ${b[-1]} > $x
# Construct a one-column TSV file
local raw_data_tsv=$out/raw-data.tsv
{ echo 'path'
echo ${a[-1]}
echo ${b[-1]}
} > $raw_data_tsv
head $out/*
wc -l $out/*
}
print-report() {
local in_dir=$1
local base_url='../../web'
cat <OVM Build Performance