#!/bin/bash
#
# Measure the time it takes to build a binary with different compilers on
# different machines, and measure the binary size.
#
# Usage:
# ./ovm-build.sh
# Directories used:
#
# oilshell.org/blob/
# ovm-build/
#
# ~/git/oilshell/
# oil/
# _deps/
# ovm-build # tarballs and extracted source
# _tmp/
# ovm-build/
# raw/ # output CSV
# stage1
# benchmark-data/
# ovm-build/
# raw/
# compiler-id/
# host-id/
set -o nounset
set -o pipefail
set -o errexit
source benchmarks/common.sh # for log, etc.
source build/common.sh # for $CLANG
readonly BASE_DIR=_tmp/ovm-build
readonly TAR_DIR=$PWD/_deps/ovm-build # Make it absolute
#
# Dependencies
#
readonly OIL_VERSION=$(head -n 1 oil-version.txt)
# Leave out mksh for now, because it doesn't follow ./configure make. It just
# has Build.sh.
readonly -a TAR_SUBDIRS=( bash-4.4 dash-0.5.9.1 ) # mksh )
# NOTE: Same list in oilshell.org/blob/run.sh.
proc tarballs {
cat <<< """
bash-4.4.tar.gz
dash-0.5.9.1.tar.gz
mksh-R56c.tgz
"""
}
proc download {
mkdir -p $TAR_DIR
tarballs | xargs -n 1 -I {} --verbose -- \
wget --directory $TAR_DIR 'https://www.oilshell.org/blob/ovm-build/{}'
}
# Done MANUALLY.
proc extract-other {
time for f in $TAR_DIR/*gz {
tar -x --directory $TAR_DIR --file $f
}
}
# Done automatically by 'measure' function.
#
# NOTE: We assume that _release/oil.tar exists. It should be made by
# scripts/release.sh build-and-test or benchmark-build.
proc extract-oil {
# This is different than the others tarballs.
rm -r -f -v $TAR_DIR/oil-*
tar -x --directory $TAR_DIR --file _release/oil.tar
}
#
# Measure Size of Binaries.
#
# Other tools:
# - bloaty to look inside elf file
# - nm? Just a flat list of symbols? Counting them would be nice.
# - zipfile.py to look inside bytecode.zip
proc sizes-tsv {
# host_label matches the times.tsv file output by report.R
echo $'host_label\tnum_bytes\tpath'
local host=$(hostname)
find @ARGV -maxdepth 0 -printf "$host\t%s\t%p\n"
}
# NOTE: This should be the same on all x64 machines. But I want to run it on
# x64 machines.
proc measure-sizes {
local prefix=${1:-$BASE_DIR/raw/demo}
# PROBLEM: Do I need provenance for gcc/clang here? I can just join it later
# in R.
sizes-tsv $TAR_DIR/oil-$OIL_VERSION/_build/oil/bytecode-opy.zip \
> ${prefix}.bytecode-size.tsv
sizes-tsv $BASE_DIR/bin/*/oil.* \
> ${prefix}.bin-sizes.tsv
sizes-tsv $BASE_DIR/bin/*/*sh \
> ${prefix}.other-shell-sizes.tsv
log "Wrote ${prefix}.*.tsv"
# Native portion, but it's not separated out by compiler. We can just
# subtract.
#$TAR_DIR/oil-$OIL_VERSION/_build/oil/ovm* \
}
#
# Unused Demos
#
proc bytecode-size {
local zip=_build/oil/bytecode.zip
# 242 files, 1.85 MB
unzip -l $zip | tail -n 1
# 1.88 MB, so there's 30K of header overhead.
ls -l $zip
}
# 6.8 seconds for debug build, instead of 8 seconds.
proc clang-oil-dbg {
make clean"
CC=$CLANG" make _build/oil/ovm-dbg
}
#
# Measure Elapsed Time
#
# Add --target-size? Add that functionality to benchmarks/time.py?
#
# Should we add explicit targets?
# - ovm-clang, ovm-clang-dbg
# - ovm-gcc, ovm-gcc-dbg
#
# It would be possible, but it complicates the makefile.
proc build-task {
local raw_dir=$1 # output
local job_id=$2
local host=$3
local host_hash=$4
local compiler_path=$5
local compiler_hash=$6
local src_dir=$7
local action=$8
local times_out="$PWD/$raw_dir/$host.$job_id.times.tsv"
# Definitions that depends on $PWD.
local -a TIME_PREFIX=(
time-tsv \
--output $times_out \
--field "$host" --field "$host_hash" \
--field "$compiler_path" --field "$compiler_hash" \
--field "$src_dir" --field "$action"
)
local bin_base_dir=$PWD/$BASE_DIR/bin
local bin_dir="$bin_base_dir/$(basename $compiler_path)"
mkdir -p $bin_dir
pushd $src_dir >/dev/null
# NOTE: We're not saving the output anywhere. We save the status, which
# protects against basic errors.
case (action) {
configure {
${TIME_PREFIX[@]} -- ./configure
# Cleaning here relies on the ORDER of tasks.txt. configure happens
# before build. The Clang build shouldn't reuse GCC objects!
# It has to be done after configure, because the Makefile must exist!
make clean
}
make {
${TIME_PREFIX[@]} -- make CC=$compiler_path
local target
case (src_dir) {
*/bash* {
setvar target = 'bash'
}
*/dash* {
setvar target = 'src/dash'
}
}
strip $target
cp -v $target $bin_dir
}
* {
local target=$action # Assume it's a target like _bin/oil.ovm
${TIME_PREFIX[@]} -- make CC=$compiler_path $target
cp -v $target $bin_dir
}
}
popd >/dev/null
}
proc oil-tasks {
local provenance=$1
# NOTE: it MUST be a tarball and not the git repo, because we don't build
# bytecode-*.zip! We care about the "packager's experience".
local dir="$TAR_DIR/oil-$OIL_VERSION"
# Add 1 field for each of 5 fields.
cat $provenance | while read line {
# NOTE: configure is independent of compiler.
echo $line $dir configure
echo $line $dir _bin/oil.ovm
echo $line $dir _bin/oil.ovm-dbg
}
}
proc other-shell-tasks {
local provenance=$1
# NOTE: it MUST be a tarball and not the git repo, because we do the build
# of bytecode.zip! We care about the "package experience".
local tarball='_release/oil.0.5.alpha1.gz'
# Add 1 field for each of 5 fields.
cat $provenance | while read line {
case (line) {
# Skip clang for now.
*clang* {
continue
}
}
for dir in "${TAR_SUBDIRS[@]}" {
echo $line $TAR_DIR/$dir configure
echo $line $TAR_DIR/$dir make
}
}
}
# 5 releases: 0.0.0 to 0.4.0. For now, just do the 0.5.alpha1 release, and
# show the drop.
proc oil-historical-tasks {
echo
}
# action is 'configure', a target name, etc.
readonly HEADER=$'status\telapsed_secs\thost_name\thost_hash\tcompiler_path\tcompiler_hash\tsrc_dir\taction'
readonly NUM_COLUMNS=7 # 5 from provenence, then tarball/target
proc measure {
local provenance=$1 # from benchmarks/id.sh compiler-provenance
local raw_dir=${2:-$BASE_DIR/raw}
extract-oil
# Job ID is everything up to the first dot in the filename.
local name=$(basename $provenance)
local prefix=${name%.compiler-provenance.txt} # strip suffix
local times_out="$raw_dir/$prefix.times.tsv"
# NOTE: Do we need two raw dirs?
mkdir -p $BASE_DIR/{raw,stage1,bin} $raw_dir
# TODO: the $times_out calculation is duplicated in build-task()0
# Write Header of the CSV file that is appended to.
echo $HEADER > $times_out
local t1=$BASE_DIR/oil-tasks.txt
local t2=$BASE_DIR/other-shell-tasks.txt
oil-tasks $provenance > $t1
other-shell-tasks $provenance > $t2
#grep dash $t2 |
time cat $t1 $t2 |
xargs -n $NUM_COLUMNS -- $0 build-task $raw_dir ||
die "*** Some tasks failed. ***"
measure-sizes $raw_dir/$prefix
cp -v $provenance $raw_dir
}
#
# Data Preparation and Analysis
#
proc stage1 {
local raw_dir=${1:-$BASE_DIR/raw}
local out=$BASE_DIR/stage1
mkdir -p $out
local x
local -a a b
# Globs are in lexicographical order, which works for our dates.
setvar x = "$out/times.tsv"
setvar a = ''($raw_dir/flanders.*.times.tsv)
setvar b = ''($raw_dir/lisa.*.times.tsv)
tsv-concat ${a[-1]} ${b[-1]} > $x
setvar x = "$out/bytecode-size.tsv"
setvar a = ''($raw_dir/flanders.*.bytecode-size.tsv)
setvar b = ''($raw_dir/lisa.*.bytecode-size.tsv)
tsv-concat ${a[-1]} ${b[-1]} > $x
setvar x = "$out/bin-sizes.tsv"
setvar a = ''($raw_dir/flanders.*.bin-sizes.tsv)
setvar b = ''($raw_dir/lisa.*.bin-sizes.tsv)
tsv-concat ${a[-1]} ${b[-1]} > $x
# Construct a one-column TSV file
local raw_data_tsv=$out/raw-data.tsv
do { echo 'path'
echo ${a[-1]}
echo ${b[-1]}
} > $raw_data_tsv
head $out/*
wc -l $out/*
}
proc print-report {
local in_dir=$1
local base_url='../../web'
cat <<< """
OVM Build Performance