Create incremental backups in a git-annex repository
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

git-annex-backup.sh 5.2KB


  1. #!/usr/bin/env bash
  2. #
  3. # git-annex-backup - Create incremental backups in a git-annex repository.
  4. # Copyright © 2016 Peter Colberg.
  5. # Distributed under the MIT license. (See accompanying file LICENSE.)
  6. #
  7. set -e -o pipefail
  8. # Current time in seconds since epoch
  9. NOW=$(date ${GIT_ANNEX_BACKUP_DATE:+--date="${GIT_ANNEX_BACKUP_DATE}"} +%s)
  10. die() {
  11. echo >&2 "$1"
  12. exit 1
  13. }
  14. # Converts time duration to number of seconds.
  15. parse_duration() {
  16. local t=0
  17. if [[ "$1" =~ ^([0-9]+w)?([0-9]+d)?([0-9]+h)?([0-9]+m)?([0-9]+s)?$ ]]; then
  18. if [[ "${BASH_REMATCH[1]}" ]]; then t=$(( $t + ${BASH_REMATCH[1]%w}*604800 )); fi
  19. if [[ "${BASH_REMATCH[2]}" ]]; then t=$(( $t + ${BASH_REMATCH[2]%d}*86400 )); fi
  20. if [[ "${BASH_REMATCH[3]}" ]]; then t=$(( $t + ${BASH_REMATCH[3]%h}*3600 )); fi
  21. if [[ "${BASH_REMATCH[4]}" ]]; then t=$(( $t + ${BASH_REMATCH[4]%m}*60 )); fi
  22. if [[ "${BASH_REMATCH[5]}" ]]; then t=$(( $t + ${BASH_REMATCH[5]%s} )); fi
  23. fi
  24. [[ "$t" -gt 0 ]] || return 1
  25. echo "$t"
  26. }
  27. # Converts seconds since the epoch to UTC timestamp in RFC-3339 format.
  28. format_timestamp() {
  29. date --date=@"$1" --utc +%FT%TZ
  30. }
  31. # Converts UTC timestamp in RFC-3339 format to seconds since the epoch.
  32. parse_timestamp() {
  33. [[ "$1" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$ ]] || return 1
  34. date --date="${BASH_REMATCH[0]}" +%s
  35. }
  36. parse_filename() {
  37. local token
  38. local -a tokens
  39. IFS='.' read -r -a tokens <<<"$1"
  40. for token in "${tokens[@]}"; do
  41. if parse_timestamp "$token"; then
  42. return 0
  43. fi
  44. done
  45. return 1
  46. }
  47. parse_filenames() {
  48. local file
  49. while IFS= read -r file; do
  50. parse_filename "$file" || die "$PROGRAM $COMMAND: invalid timestamp -- $file"
  51. done
  52. }
  53. filter_times() {
  54. local count ref=0 time
  55. while IFS= read -r time; do
  56. count=$#
  57. while [[ "$#" -gt 0 ]]; do
  58. if [[ "$(( $time + $1 ))" -le "$NOW" ]]; then
  59. break
  60. fi
  61. if [[ "$ref" -gt 0 ]] && [[ "$time" -lt "$(( $ref + $1 ))" ]]; then
  62. break
  63. fi
  64. echo "$time"
  65. shift
  66. done
  67. if [[ "$#" -eq "$count" ]]; then
  68. shift "$#"
  69. fi
  70. ref=$time
  71. done
  72. }
  73. filter_tree() {
  74. local file line mode object time type
  75. while IFS= read -r line; do
  76. read -r mode type object file <<<"$line"
  77. time=$(parse_filename "$file") || die "$PROGRAM $COMMAND: invalid timestamp -- $file"
  78. if [[ "$time" -le "$1" ]]; then
  79. echo "$line"
  80. fi
  81. done
  82. }
  83. expire_dirnames() {
  84. local dir time
  85. while IFS= read -r dir; do
  86. time=$(parse_timestamp "$dir") || die "$PROGRAM $COMMAND: invalid timestamp -- $dir"
  87. if [[ "$(( $time + $1 ))" -lt "$NOW" ]]; then
  88. echo "$dir"
  89. fi
  90. done
  91. }
  92. cmd_backup() {
  93. local commit dir opts tempdir time
  94. local -a intervals times timestamps
  95. opts=$(getopt -o hi: -l help,interval: -n "$PROGRAM $COMMAND" -- "$@")
  96. eval set -- "$opts"
  97. while true; do case "$1" in
  98. -h|--help)
  99. cat <<-EOF
  100. usage: $PROGRAM $COMMAND [-i|--interval=DURATION]... COMMAND
  101. EOF
  102. exit 0
  103. ;;
  104. -i|--interval)
  105. intervals+=("$(parse_duration "$2")") || die "$PROGRAM $COMMAND: invalid duration -- $2"
  106. shift 2
  107. ;;
  108. --) shift; break ;;
  109. esac done
  110. intervals=( $(printf '%s\n' "${intervals[@]}" | sort -n -r) )
  111. [[ "$#" -gt 0 ]] || die "$PROGRAM $COMMAND: missing backup command"
  112. commit=$(git rev-parse -q --verify HEAD) || true
  113. if [[ -n "$commit" ]]; then
  114. dir=$(git ls-tree -d --name-only "$commit" | tail -n 1)
  115. fi
  116. if [[ -n "$dir" ]] && [[ "${#intervals[@]}" -gt 0 ]]; then
  117. times=( $(git ls-tree --name-only "$commit:$dir" | parse_filenames | sort -n -u | filter_times "${intervals[@]}") )
  118. [[ "${#times[@]}" -lt "${#intervals[@]}" ]] || exit 0
  119. times=( $(printf '%s\n' "${times[@]}" | sort -n -u -r) )
  120. fi
  121. timestamps=("$(format_timestamp "$NOW")")
  122. for time in "${times[@]}"; do
  123. timestamps+=("$(format_timestamp "$time")")
  124. done
  125. tempdir=$(mktemp -d ".${timestamps[0]}.XXXXXXXXXX")
  126. git reset
  127. if [[ "${#times[@]}" -gt 0 ]]; then
  128. git ls-tree "$commit:$dir" | filter_tree "${times[0]}" | git mktree | xargs git read-tree -u --prefix="$tempdir/"
  129. fi
  130. (cd "$tempdir" && "$1" "${timestamps[@]}")
  131. git-annex add -q "$tempdir"/*."${timestamps[0]}".*
  132. git mv "$tempdir" "${timestamps[0]}"
  133. git commit -q -m "$COMMAND"
  134. }
  135. cmd_prune() {
  136. local commit expire=0 opts
  137. opts=$(getopt -o h -l help,expire: -n "$PROGRAM $COMMAND" -- "$@")
  138. eval set -- "$opts"
  139. while true; do case "$1" in
  140. -h|--help)
  141. cat <<-EOF
  142. usage: $PROGRAM $COMMAND [--expire=DURATION]
  143. EOF
  144. exit 0
  145. ;;
  146. --expire)
  147. expire=$(parse_duration "$2") || die "$PROGRAM $COMMAND: invalid duration -- $2"
  148. shift 2
  149. ;;
  150. --) shift; break ;;
  151. esac done
  152. [[ "$expire" -gt 0 ]] || die "$PROGRAM $COMMAND: missing duration"
  153. commit=$(git rev-parse -q --verify HEAD) || exit 0
  154. git reset
  155. git ls-tree -d --name-only "$commit" | expire_dirnames "$expire" | xargs -r git rm -q -r -f --
  156. git diff --cached --quiet || git commit -q -m "$COMMAND"
  157. }
  158. usage() {
  159. cat <<-EOF
  160. usage: $PROGRAM COMMAND
  161. available commands:
  162. backup create a new backup
  163. prune remove backups older than expiry time
  164. EOF
  165. exit 0
  166. }
  167. PROGRAM=${0##*/}
  168. OPTS=$(getopt -o +h -l help -n "$PROGRAM" -- "$@")
  169. eval set -- "$OPTS"
  170. while true; do case "$1" in
  171. -h|--help) usage ;;
  172. --) shift; break ;;
  173. esac done
  174. [[ "$#" -ge 1 ]] || usage
  175. COMMAND=$1
  176. shift
  177. case "$COMMAND" in
  178. backup) cmd_backup "$@" ;;
  179. prune) cmd_prune "$@" ;;
  180. *) die "$PROGRAM: invalid command -- $COMMAND" ;;
  181. esac
  182. exit 0