diff --git a/bash/dedupe.sh b/bash/dedupe.sh new file mode 100755 index 0000000..49a420e --- /dev/null +++ b/bash/dedupe.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +set -eo pipefail +shopt -s globstar + +if [[ -n "$1" ]]; then cd "$1"; fi + +declare -A occurences + +for file in **/*; do + if [[ ! -f "$file" || -L "$file" ]]; then continue; fi + hash=$(xxh128sum "$file" | cut -f1 -d' ') + occurence=${occurences[$hash]} + if [[ -n "$occurence" ]]; then + relpath=$(realpath -s --relative-to "$(dirname "$file")" "$occurence") + ln -sf "$relpath" "$file" + echo "symlinked ${file@Q} to ${relpath@Q}" >&2 + else + echo "first occurence of $hash: ${file@Q}" >&2 + occurences[$hash]=$file + fi +done