summaryrefslogtreecommitdiffstats
path: root/files/reprepro/snapshots/time_based/tails-delete-expired-apt-snapshots
blob: 009228b5a3d881705278ad913368a27ac0fa3def (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#!/usr/bin/perl

# Dependencies:
#
#    libdatetime-perl
#    libdatetime-format-mail-perl
#    libfile-find-rule-perl
#    libfile-slurp-perl
#    libipc-system-simple-perl
#    liblist-moreutils-perl

use strict;
use warnings;

use DateTime;
use DateTime::Format::Mail;
use English qw{-no_match_vars};
use File::Basename;
use File::Find::Rule;
use File::Path qw(remove_tree);
use File::Slurp;
use Getopt::Long;
use IPC::System::Simple qw(runx);
use List::MoreUtils qw(uniq);


sub usage {
    die "Usage: $0 [--dry-run] archive-directory";
}

sub verbose_print {
    print @_
      if $ENV{VERBOSE};
}

my $dryrun = 0;
GetOptions ("dry-run" => \$dryrun)
  or usage();

print STDERR "\n*** DRY RUN *** running in simulation mode\n\n"
  if $dryrun && !$ENV{SILENT};

my $archive_dir = shift @ARGV;
if (! defined $archive_dir) {
    print "E: missing parameter: archive directory\n";
    usage();
}

if (! -d $archive_dir) {
    print "E: specified archive directory ($archive_dir) isn't a directory\n";
    usage();
}

if ($archive_dir !~ m{\A [/]}xms) {
    print "E: specified archive directory ($archive_dir) isn't an absolute path\n";
    usage();
}


if (! -d "$archive_dir/dists" ) {
    print "E: specified archive directory has no dists/ subdirectory\n";
    usage();
}

my @snapshots_dirs = File::Find::Rule->directory()
  ->name( 'snapshots' )
  ->in( "$archive_dir/dists" );

verbose_print("found snapshots directories: @snapshots_dirs\n");

# Let's load once and for all references known by reprepro to see
# which suites/snapshots are present there to see what needs removing
# (use a shell pipeline to avoid loading the entire output of
# dumpreferences into memory; we've seen it take no less than 7.5 GiB):
my @reprepro_references =
  `reprepro -b '$archive_dir' dumpreferences \\
      | awk '/^s=/ {print \$1}' \\
      | sort -u`;
${^CHILD_ERROR_NATIVE} == 0
    or die "E: Failed to load references: ${^CHILD_ERROR_NATIVE}";
chomp @reprepro_references;
verbose_print("reprepro-known snapshot references:", (join "\n - ", '', @reprepro_references), "\n");

my $now = DateTime->now();
my $removed = 0;

my @reprepro_options = ('-b', $archive_dir);
push @reprepro_options, '--verbose' if $ENV{VERBOSE};
push @reprepro_options, '--silent'  if $ENV{SILENT};

foreach my $dir (@snapshots_dirs) {
    # Let's get the suite name based on the directory name; the
    # contents of the Release files might be slightly off, depending
    # on the current reprepro configuration (Jessie/updates is a bit
    # special, and might not be handled perfectly):
    my $suite = $dir;
    # XXX: anchor these two regexps?
    $suite =~ s{ $archive_dir/dists/ }{}x;
    $suite =~ s{/snapshots}{};

    verbose_print("Looking at snapshots for $suite under $dir\n");
    foreach my $snapshot (glob("$dir/*")) {
        my $timestamp = basename $snapshot;
        # Get Valid-Until:
        my $valid_until;
        foreach my $line (read_file("$snapshot/Release")) {
            if ($line =~ /^Valid-Until: (.+)$/) {
                $valid_until = $1;
            }
        }
        # Stop here if needed:
        if (not defined $valid_until) {
            print "W: no Valid-Until field found in $snapshot/Release, skipping\n";
            next;
        }

        # Work around UTC suffix, which isn't parsed by DateTime::Format::Mail:
        $valid_until =~ s/UTC$/+0000/;
        verbose_print(" - $snapshot is valid until $valid_until\n");
        my $date = DateTime::Format::Mail->parse_datetime($valid_until);
        if (DateTime->compare($now, $date) > 0) {
            verbose_print("    expiration time reached\n");

            # Let's look at the references in reprepro to see if there's
            # something to delete:
            my $db_id = "s=$suite=$timestamp";
            if (grep { $db_id eq $_ } @reprepro_references) {
                if ($dryrun) {
                    verbose_print("    should be forgetting: $db_id\n");
                }
                else {
                    verbose_print("    forgetting: $db_id\n");
                    runx('reprepro', @reprepro_options, '_removereferences', $db_id);
                    verbose_print("    forgotten: $db_id\n");
                }
                $removed++;
            }

            # Remove actual files:
            if ($dryrun) {
                verbose_print("    should be removing: $snapshot\n");
            }
            else {
                verbose_print("    removing: $snapshot\n");
                my $rm_files = remove_tree($snapshot);
                verbose_print("    removed: $rm_files files\n");
            }
        }
        else {
            verbose_print("    nothing to do\n");
        }
    }
}

if ($removed > 0) {
    verbose_print("at least one snapshot was removed (total: $removed)\n");
    if ($dryrun) {
        verbose_print("should be running: reprepro deleteunreferenced\n");
    }
    else {
        verbose_print("running: reprepro deleteunreferenced\n");
        runx('reprepro', @reprepro_options, 'deleteunreferenced');
        verbose_print("all done\n");
    }
}