summaryrefslogtreecommitdiffstats
path: root/files/reprepro/snapshots/tagged/tails-prepare-tagged-apt-snapshot-import
blob: d0b69f808ca70780c6fd1b84fe2d73f635097d09 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
#!/usr/bin/perl
# © 2015 Cyril Brulebois <cyril@debamax.com>, for the Tails project.

use strict;
use warnings;
use File::Slurp;
use YAML::XS;
use List::Compare;
use List::MoreUtils qw(any uniq);
use File::Basename;
use File::Path;
use Cwd;
use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
use Dpkg::Control;

# Let's hardcode some components, and the architecture along:
my @archs = qw(amd64);
# deb.tails.boum.org archive signing key:
my $debian_archive_key = "C7988EA7A358D82E";

### Usability:
sub usage {
    die "Usage: $0 manifest-file target-directory";
}

my $manifest = shift @ARGV
    or usage;

if (! -f $manifest) {
    print "E: specified manifest file ($manifest) doesn't exist\n";
    usage;
}

my $target_dir = shift @ARGV
    or usage;

if (! -d $target_dir) {
    mkdir $target_dir
        or die "E: failed to mkdir target directory ($target_dir)";
}

my $yaml = read_file($manifest);
my $data = Load $yaml
    or die "E: failed to load manifest from $manifest: $!";

# TorProject only publishes main:
sub get_components {
    my $origin = shift;
    if ($origin eq 'torproject') {
        return qw(main);
    }
    else {
        return qw(main contrib non-free);
    }
}

# TorProject does not publish deb-src:
sub origin_has_source {
    my $origin = shift;

    return $origin ne 'torproject';
}

# Load all packages from Packages.gz, build a list of all
# package/version found there, but also a mapping from those to their
# respective source package, including the source version (which might
# be different).
sub get_binaries_and_source_mapping {
    my $path = shift;
    my $archs_ref = shift;
    my $components_ref = shift;
    my @results;
    my %sources;

    foreach my $arch (@{$archs_ref}) {
    foreach my $component (@{$components_ref}) {
        my ($p, $v, $s);
        my $packages_file = "$path/$component/binary-$arch/Packages.gz";
        -e $packages_file || next;
        my $buf_out = '';
        open my $buf_fh, '+<', \$buf_out or die $!;
        my $status = gunzip $packages_file => \$buf_out
            or die "gunzip failed: $GunzipError";
        while (not $buf_fh->eof()) {
            my $c = Dpkg::Control->new( type => CTRL_INDEX_PKG );
            my $item = $c->parse($buf_fh, $packages_file);
            my ($p, $v, $s, $a) = ($c->{Package}, $c->{Version}, $c->{Source}, $c->{Architecture});
            push @results, "$p/$v/$a";
            ### Build "source = version" mapping
            #
            # Basically we have 3 cases to handle:
            #
            #  - Easy: We have Package, Version, and no Source mentioned. This means that
            #    Package and Source names are the same, and that the version number is
            #    shared as well. Defaulting to "p = v" works.
            #
            #  - Medium: We have Package, Version, and Source, with source being a single
            #    word. This means a binary package built from a source package which has a
            #    different name; but both have the same version, so "s = v" works.
            #
            #  - Hard: Same case as above, except we have a version specified in the
            #    Source field. Very usual case: binNMUs (binary has a +bN suffix which the
            #    source package doesn't have). Less usual: source building binaries with
            #    very different versions. Examples:
            #     - libdevmapper* from src:lvm2
            #     - linux-image-* from src:linux-latest
            #
            #   In this case we have to extract the source version which is stored between
            #   parentheses in the Source field.

            # Easy as the default:
            my $s_v = "$p = $v";
            # Let's now look whether we are in the Hard or Medium cases:
            if ($s and $s =~ /^(.*) [(](.*)[)]$/) {
                my ($source_name, $source_version) = ($1, $2);
                $s_v = "$source_name = $source_version";
            }
            elsif ($s) {
                $s_v = "$s = $v";
            }
            $sources{"$p/$v/$a"} = $s_v;
        }
    }
    }
    my $n = scalar @results;
    print "D: found $n binaries under $path\n";
    return (\@results, \%sources);
}

# Load all packages from Sources.gz, build a list of all
# source/version found there. The logic seems sufficient different
# from the sub above to justify having a separate sub for this.
sub get_sources {
    my $path = shift;
    my $archs_ref = shift;
    my $components_ref = shift;
    my @results;

    foreach my $arch (@{$archs_ref}) {
    foreach my $component (@{$components_ref}) {
        my ($p, $v, $s);
        my $sources_file = "$path/$component/source/Sources.gz";
        -e $sources_file || next;
        my $buf_out = '';
        open my $buf_fh, '+<', \$buf_out or die $!;
        my $status = gunzip $sources_file => \$buf_out
            or die "gunzip failed: $GunzipError";
        while (not $buf_fh->eof()) {
            my $c = Dpkg::Control->new( type => CTRL_INDEX_SRC );
            my $item = $c->parse($buf_fh, $sources_file);
            my ($p, $v) = ($c->{Package}, $c->{Version});
            push @results, "$p/$v";
        }
    }
    }
    my $n = scalar uniq @results;
    print "D: found $n sources under $path\n";
    return @results;
}

# Load all packages only once:
my @missings;
my @founds;
my %seen;
foreach my $type (qw(binary source)) {
    if ($data->{packages}->{$type}) {
        $seen{$type} = 1;
        foreach my $pkgver (@{ $data->{packages}->{$type} }) {
            my $package = $pkgver->{package};
            my $version = $pkgver->{version};
            if ($type eq 'binary') {
                my $arch = $pkgver->{arch};
                push @missings, "$package/$version/$arch/$type";
            }
            else {
                push @missings, "$package/$version/$type";
            }
        }
    }
}

foreach my $origin (sort keys %{ $data->{origin_references} }) {
    my $reference = $data->{origin_references}->{$origin}->{reference};
    print "I: detected origin: $origin with reference: $reference\n";

    # Prepare for per-origin reprepro configuration, which is needed
    # since some suites are available from multiple origins:
    my $target_conf = "$target_dir/$origin/conf";
    my $distributions_file = "$target_conf/distributions";
    my $updates_file = "$target_conf/updates";

    # Start with empty files:
    File::Path::make_path($target_conf);
    write_file($distributions_file, "")
        or die "unable to write to distributions file ($distributions_file)";
    write_file($updates_file, "")
        or die "unable to write updates file ($updates_file)";

    # The user will likely have everything set up in $HOME already:
    my $reprepro = "repositories/$origin";
    if (! -d "$reprepro/") {
        print "W: no reprepro repository found (expected: $reprepro)\n";
        if ($reference eq 'unknown') {
            print "W: skipping for now, since reference is 'unknown'\n";
        }
        else {
            die "E: not skipping for now, since reference is not 'unknown'\n";
        }
    }
    else {
        # Save for later use in Method fields (conf/updates):
        my $reprepro_path = Cwd::realpath($reprepro);
        # Load some info about the suites:
        # NOTE: Look at Release files on two levels, because of
        # e.g. jessie/updates (security.debian.org)
        my @release_files = (<$reprepro/dists/*/Release>, <$reprepro/dists/*/*/Release>);
        # Don't consider symlinks, reprepro will generate them for us:
        my @suite_dirs = grep { ! -l $_ } uniq sort map { dirname $_ } @release_files;
        # NOTE: Using basename wouldn't work, because of security.d.o:
        my @suites = map { (my $dir = $_) =~ s{$reprepro/dists/}{}; $dir } @suite_dirs;
        print "I: detected suites: @suites\n";

        my @components = get_components($origin);
        my %suite_binaries;
        my %suite_sources;
        my %suite_formulas;
        foreach my $suite_dir (@suite_dirs) {
            (my $suite_name = $suite_dir) =~ s{$reprepro/dists/}{};
            my $suite_path = "$suite_dir/snapshots/$reference";
            if (! -d $suite_path ) {
                print "W: missing snapshot detected ($suite_path)\n";
            }
            my ($binaries_ref, $sources_ref) = get_binaries_and_source_mapping($suite_path, [@archs], [@components]);
            my @all_sources = ($seen{source} && origin_has_source($origin)) ? get_sources($suite_path, [@archs], [@components]) : ();
            my @all_binaries = @{$binaries_ref};
            foreach my $missing (@missings) {
                # Decide early whether that's a binary or source package search:
                $missing =~ m{^(.*)/(.*?)$};
                my ($search, $type) = ($1, $2);
                if ($type eq 'binary') {
                    if (any { $_ eq $search } @all_binaries) {
                        # reprepro will need the binary package name but also
                        # the architecture:
                        $search =~ m{^(.+)/(.+)/(.+)$};
                        my ($binary, $version, $arch) = ($1, $2, $3);
                        push @{ $suite_binaries{$suite_name} }, $binary;
                        push @{ $suite_sources{$suite_name} }, $sources_ref->{$search};
                        push @founds, $missing;
                        push @{ $suite_formulas{$suite_name}{$arch} }, $binary;
                    }
                }
                elsif ($type eq 'source') {
                    if (any { "$_/source" eq $missing } @all_sources) {
                        # build source = version for reprepro:
                        (my $source = $search) =~ s{(.*)/(.*)}{$1 = $2};
                        push @{ $suite_sources{$suite_name} }, $source;
                        push @founds, $missing;
                    }
                }
                else {
                    die "type is neither 'binary' or 'source' (got: $type)";
                }
            }
        }

        # Iterate on all suites, even if empty:
        foreach my $suite (@suites) {
            -d "$reprepro_path/dists/$suite/snapshots/$reference" || next;

            # Mangle 'wheezy/updates' and friends from security.d.o:
            (my $mangled_name = "$origin-$suite") =~ s{/}{_};
            my $filterlist_file = "$target_conf/$mangled_name.pkg";
            my $filtersrclist_file = "$target_conf/$mangled_name.src";
            my @filterlist = uniq sort @{ $suite_binaries{ $suite } || [] };
            my @filtersrclist = uniq sort @{ $suite_sources{ $suite } || [] };
            print "I: saving FilterList    $filterlist_file with ", (scalar @filterlist), " elements\n";
            write_file($filterlist_file,    map { "$_ install\n" } @filterlist);
            print "I: saving FilterSrcList $filtersrclist_file with ", (scalar @filtersrclist), " elements\n";
            write_file($filtersrclist_file, map { "$_\n"         } @filtersrclist);
            # Only include source as an architecture if needed ($seen{source})
            # and possible (origin_has_source):
            my $source = ($seen{source} && origin_has_source($origin)) ? 'source' : '';
            my @formula_bits;
            foreach my $arch (keys %{ $suite_formulas{$suite} }) {
                my $bit = join '|', (map { "Package (== $_)" } @{ $suite_formulas{$suite}{$arch} });
                $bit = "( $bit, Architecture (== $arch) )";
                push @formula_bits, $bit;
            }
            my $filter_formula = join ' | ', @formula_bits;

            # both codename and suite are needed by reprepro:
            write_file($distributions_file, { append => 1 },
                       "Origin: $origin\n",
                       "Label: $origin\n",
                       "Codename: $suite\n",
                       "Suite: $suite\n",
                       "Architectures: ", (join ' ', @archs), " $source\n",
                       "Components: ", (join ' ', @components), "\n",
                       "SignWith: yes\n",
                       "Update: $origin-$suite\n",
                       "\n",
                      );
            write_file($updates_file, { append => 1 },
                       "Name: $origin-$suite\n",
                       "Method: file://$reprepro_path\n",
                       "Suite: $suite/snapshots/$reference\n",
                       "Architectures: ", (join ' ', @archs), " $source\n",
                       "Components: ", (join ' ', @components), "\n",
                       "VerifyRelease: $debian_archive_key\n",
                       "GetInRelease: no\n",
                       "FilterList: purge $mangled_name.pkg\n",
                       "FilterSrcList: purge $mangled_name.src\n",
                       # Buggy: the resulting FilterFormula pulls source
                       # packages only when their name matches the name of a
                       # binary package we also pull. To fix that, we would
                       # need e.g. to add a set of formula bits like
                       # '| $Source (== gnome-keyring)'. By commenting out this
                       # optimization, we waste about 11% of additional disk space,
                       # because we pull a bunch of unneeded amd64 binary packages.
                       # "FilterFormula: $filter_formula\n",
                       "\n",
                      );

        }
    }
    print "\n";
}


my $lc = List::Compare->new(\@missings, \@founds);
my @still_missings = $lc->get_unique;
if (@still_missings) {
    # The tails repository is handled in a specific way when a freeze is
    # in progress, so assume missing packages are coming from there, and
    # only warn:
    print "W: some packages were not found anywhere:", (join "\n - ", '', @still_missings), "\n";
}