#!/usr/bin/python
# Arepo is the Automatic Repackager, working in the fields of Sisyphus.
#
# Arepo operates on two repositories ('origin' and 'shadow')
# and two architectures ('source' and 'target').
# Packages from the 'origin' repo with architecture 'source'
# are compared to packages with architecture 'target',
# repackaged semi-automatically and placed in the 'shadow' repo.
#
# For example, with the following configuration:
# * origin = /Sisyphus
# * flavor = classic
# * source = i586
# * target = x86_64 and
# * shadow = ~/i586-biarch-repo
# and told to repackage glibc, Arepo will compare two packages
# glibc-*.i586.rpm and glibc-*.x86_64.rpm, extract the source-specific
# files from the former, and create the i586-glibc-*.i586.rpm
# package in the biarch repository.

import os, sys, stat, time
import rpm
import yaml
from itertools import *

loglevel = 1

def Log(level, format, *args):
    if level <= loglevel:
        print format % args


def usage():
    print "Usage: arepo <arepo.conf>"
    sys.exit(1)

vars = {}
pkgs = []
hacks = {}

def read_config(name):
    global vars, pkgs, hacks
    config = yaml.load(file(name))
    for key in config:
        if key == "packages":
            pkgs.extend(config[key].split())
        elif key == "hacks":
            hacks.update(config[key])
        else:
            vars[key] = config[key]
    # Warn about mismatches in pkgs and hacks.keys
    orphan_hacks = set(hacks.keys()) - set(pkgs)
    if orphan_hacks:
        Log(1, "! orphaned hacks exist for: %s", ' '.join(orphan_hacks))


class Repo:
    "An RPM repository: a pair of topdir and flavor."

    def __init__(self, name, root_var, flavor_var, parent=None):
        import os.path
        self.name = name
        self.root = os.path.expanduser(root_var)
        self.flavor = flavor_var
        self.parent = parent

    def __str__(self):
        return self.name

def cached(fun):
    """decorator for obj.get_foo() methods; stores
    the first returned value as obj.cached_foo and returns it
    ever after."""
    name = fun.__name__
    assert name.startswith('get_')
    name = 'cached_' + name[4:]
    def method(self, *args, **kw):
        if not hasattr(self, name):
            setattr(self, name, fun(self, *args, **kw))
        return getattr(self, name)
    return method

def flush_caches(obj):
    for attr in obj.__dict__.keys():
        if attr.startswith('cached_'):
            delattr(obj, attr)


class ExternalProgramError(RuntimeError):
    def __init__(self, stderr=None):
        self.stderr = stderr
    def __str__(self):
        return self.stderr
class PackageNotInstallable(ExternalProgramError): pass
class PackageNotFound(PackageNotInstallable): pass

class Arch:
    "Subset of a Repo. Every line in a sources.list file is an Arch."

    def __init__(self, repo, arch_name, deps):
        self.repo = repo
        self.name = arch_name
        self.deps = deps
    def evr_of(self, pkg_name):
        return self.get_aptbox().evr_of(pkg_name)
    def evr_or_none(self, pkg):
        try:
            return self.evr_of(pkg)
        except PackageNotInstallable:
            return None

    def header_for(self, pkg):
        return self.get_aptbox().header_for(pkg)

    @cached
    def get_aptbox(self):
        return Aptbox(self)

    @cached
    def get_hasher(self):
        return Hasher(self)

    def mangle(self, pkgname):
        return self.name + '-' + pkgname

    def __str__(self):
        return "%s.%s" % (self.repo, self.name)

    def get_pkg_dir(self):
        return os.path.join(self.repo.root, self.name,
                'RPMS.' + self.repo.flavor)

    def regenbasedir(self):
        run('genbasedir', '--topdir', self.repo.root,
                self.name, self.repo.flavor)
        flush_caches(self)

    def rdups(self):
        pkg_dir = self.get_pkg_dir()
        allfiles = os.listdir(pkg_dir)
        cmd = ['rpmrdups'] + [os.path.join(pkg_dir, p) for p in allfiles]
        from subprocess import Popen, PIPE
        process = Popen(cmd, env={}, stdout=PIPE, stderr=PIPE)
        out, err = process.communicate()
        if err:
            Log(1, '! rpmrdups says:\n' + err)
        for line in out.split('\n'):
            for pkg in line.split()[1:]:
                Log(2, "+ removing old package '%s'", pkg)
                os.unlink(pkg)

    def try_install(self, pkg):
        return self.get_aptbox().try_install(pkg)

def to_file(*args):
    from os.path import join
    fname = join(*args)
    return file(fname, 'w')

def run(*args, **kwargs):
    Log(3, '+ %s', ' '.join(args))
    import subprocess
    if subprocess.call(args, **kwargs) != 0:
        raise ExternalProgramError

def grep(regex, strings):
    if type(strings) == type(''):
        strings = strings.split('\n')
    import re
    re = re.compile(regex)
    return filter(re.search, strings)

class Dependency:
    def __init__(self, name, flags, version):
        self.name = name
        self.flags = flags
        self.version = version

    def flag(self, name):
        return self.flags & getattr(rpm, 'RPMSENSE_' + name)

    def type(self):
        if self.flag('PREREQ'):
            return 'PreReq'
        else:
            return 'Requires'

    def operation(self):
        operation = ''
        for flag, char in [
                ('LESS', '<'),
                ('GREATER', '>'),
                ('EQUAL', '=')]:
            if self.flag(flag):
                operation += char
        return operation

    def limitations(self):
        lims = filter(self.flag,
                map('SCRIPT_'.__add__,
                    ('BUILD', 'CLEAN', 'INSTALL', 'POST',
                        'POSTUN', 'PRE', 'PREUN', 'PREP', 'VERIFY')))
        if lims:
            return '(%s)' % ','.join(lim[7:].lower() for lim in lims)
        else:
            return ''

    def __str__(self):
        return '%s%s: %s %s %s' % (
                self.type(), self.limitations(), self.name,
                self.operation(), self.version)


class Header:
    def __init__(self, rpm_header):
        self.h = rpm_header

    @staticmethod
    def from_path(path):
        f = file(path)
        Log(3, '= reading package %s', os.path.basename(path))
        header = rpm.headerFromPackage(f.fileno())[0]
        return Header(header)

    def get_evr(self):
        return map(self.h.__getitem__, ('epoch', 'version', 'release'))

    def __getitem__(self, key):
        return self.h[key]

    def get_file_list(self):
        return set(zip(self['filenames'], self['filemodes']))

    def deps_in_for(self, list, arch):
        good_tuples = filter(lambda (name, _, __): name in list,
                zip(self['requirename'],
                    self['requireflags'],
                    self['requireversion']))
        return [Dependency(arch.mangle(name), flags, ver)
                for (name, flags, ver) in good_tuples]

class BasicAptbox:
    goal = 'unknown'
    def __init__(self, arch):
        from tempfile import mkdtemp
        self.dir = mkdtemp(prefix='arepo-%s-%s.' % (self.goal, arch.name))
        self._write_apt_conf()
        self._write_sources_list(arch)

    def apt_conf_fname(self):
        from os.path import join
        return join(self.dir, 'apt.conf')


    def _write_apt_conf(self):
        print >>to_file(self.apt_conf_fname()), """
        Dir::Etc::SourceList "%(basedir)s/sources.list";
        Dir::Etc::SourceParts "/var/empty";
        """ % { 'basedir': self.dir }

    def _write_sources_list(self, arch):
        list = "\n".join(
                "rpm file:%s %s %s" % (a.repo.root, a.name, a.repo.flavor)
                for a in [arch] + arch.deps)
        Log(1, '- writing sources.list for %s', arch)
        Log(2, list)
        print >>to_file(self.dir, 'sources.list'), list

    def __del__(self):
        run('rm', '-rf', self.dir)


class Aptbox(BasicAptbox):
    goal = 'inspector'
    def __init__(self, arch):
        BasicAptbox.__init__(self, arch)
        self.name = str(arch)
        Log(2, '* %s: mkaptbox', self.name)
        run('mkaptbox', self.dir, '--apt-conf', self.apt_conf_fname())
        self.known_uris = {}
        self.known_packages = {}

    def header_for(self, pkg):
        if pkg not in self.known_packages:
            self.try_install(pkg)
        return self.known_packages[pkg]

    def evr_of(self, pkg):
        return self.header_for(pkg).get_evr()

    def try_install(self, pkg):
        """
        Run "apt-get install --print-uris pkg", capture output
        and read headers from the given files. Updates known_packages. """

        from os.path import join
        apt_get = join(self.dir, 'aptbox', 'apt-get')
        print_uris = [apt_get, 'install', '--yes', '--print-uris', pkg]

        Log(2, '* %s: print-uris for %s', self.name, pkg)
        from subprocess import Popen, PIPE
        process = Popen(print_uris, env={}, stdout=PIPE, stderr=PIPE)
        out, err = process.communicate()
        if process.returncode != 0:
            if "Couldn't find package" in err:
                Log(2, '%% %s not found in %s', pkg, self.name)
                raise PackageNotFound(err)
            if "unmet dependencies" in err:
                Log(2, '%% %s not installable', pkg)
                raise PackageNotInstallable(err)
            raise ExternalProgramError(err)

        out = grep("^'", out)
        uris = [line.split("'")[1] for line in out]

        map(self.inspect_uri, uris)

    def inspect_uri(self, uri):
        if uri in self.known_uris:
            return Log(3, ": %s seen before", uri)

        assert uri.startswith('file:')
        path = uri[5:]

        header = Header.from_path(path)
        name = header['name']

        self.known_uris[uri] = name
        self.known_packages[name] = Header(header)

class Hasher(BasicAptbox):
    goal = 'builder'
    def __init__(self, arch):
        BasicAptbox.__init__(self, arch)
        Log(2, '* creating a hasher environment')
        self.target = arch.name
        run('hsh', '--init', self.dir, '--apt-conf', self.apt_conf_fname(),
                '--target', self.target)

    def __del__(self):
        try:
            run('hsh', '--cleanup-only', self.dir)
        finally:
            BasicAptbox.__del__(self)

    def build(self, pkg, spec_string):
        from tempfile import NamedTemporaryFile as TmpFile
        spec = TmpFile(prefix=pkg, suffix='.spec')

        print >>spec, spec_string
        spec.flush()
        Log(4, 'SPEC:\n%s\nEND OF SPEC', spec_string)


        from tempfile import NamedTemporaryFile as TmpFile
        tarfile = TmpFile(prefix=pkg, suffix='.tar')

        basename = os.path.basename(spec.name)
        dirname = os.path.dirname(spec.name)
        run('tar', '-cf', '-', '--label', basename, '-C', dirname,
                basename, stdout=tarfile)

        run('setarch', self.target, 'hsh', self.dir,
                '--without-stuff',
                '--apt-conf', self.apt_conf_fname(),
                '--target', self.target, tarfile.name)

    def repo_dir(self):
        return os.path.join(self.dir, 'repo')

    @cached
    def get_repo(self):
        return Repo('hasher', self.repo_dir(), 'hasher', parent=self)

    @cached
    def get_arch(self):
        return Arch(self.get_repo(), self.target)

    def move_packages_to(self, arch):
        Log(2, '# moving newly built packages to %s', arch)
        from os.path import join
        pkgdir = join(self.repo_dir(), self.target, 'RPMS.hasher')
        for p in os.listdir(pkgdir):
            run('mv', join(pkgdir, p), arch.get_pkg_dir())

def files_entry((filename, mode)):
    return '%s%s' % (stat.S_ISDIR(mode) and '%dir ' or '', filename)

def auto_file_list(pkg, source, target):
    src = source.header_for(pkg)
    dst = target.header_for(pkg)
    return map(files_entry, src.get_file_list() - dst.get_file_list())

def auto_dep_list(header, source):
    return '\n'.join(str(dep) for dep in header.deps_in_for(pkgs, source))

class DictSearch:
    def __init__(self, *refs):
        self.refs = refs
    def __getitem__(self, key):
        for r in self.refs:
            try:
                return r[key]
            except KeyError:
                pass
        raise KeyError(key)

class HacksDict:
    def __init__(self, d):
        self.backend = d
    def __getitem__(self, key):
        try:
            return self.backend[key]
        except KeyError, e:
            parts = key.split('-')
            if len(parts) == 2 and \
                    parts[0] in ('before', 'after', 'replace') and \
                    parts[1] in ('install', 'files'):
                        # Okay, this hack is just empty here
                        return ""
            else:
                raise e

def gen_spec(pkg, source, fl):
    header = source.header_for(pkg)

    return """# Autogenerated. You should not have seen it at all.
Name: ~(mangled-name)s

~(define-ext_epoch)s
%ifdef ext_epoch
Epoch: %ext_epoch
%define chg_epoch %ext_epoch:
%else
%define chg_epoch %nil
%endif

Version: ~(version)s
Release: ~(release)s

Summary: ~(summary)s
License: ~(license)s
Group: ~(group)s
Packager: Sator Arepo <nobody@altlinux.ru>
BuildArch: ~(arch)s

BuildRequires:  ~(name)s = %chg_epoch%version-%release
Requires:       ~(name)s = %chg_epoch%version-%release

~(dep-list)s

AutoReq: no, lib, pkgconfig

%set_strip_method none
%set_verify_elf_method relaxed

%description
[This package was automatically repackaged from ~(arch)s.]

~(description)s

%build
cat >install.sh <<'EOF'
#!/bin/sh -e
for file in "$@"; do
        if [ -d "$file" ]; then
            install -d -- "%buildroot$file"
        else
            mkdir -p -- "`dirname %buildroot$file`"
            cp -a -- "$file" "%buildroot$file"
        fi
done
EOF
chmod +x install.sh

cat >find-deps.sh <<'EOF'
#!/bin/sh -e
file=
handler()
{
        local rc=$?
        trap - EXIT
        [ -z "$file" ] || rm -f -- "$file"
        exit $rc
}
trap handler EXIT HUP INT QUIT PIPE TERM
file=`mktemp -t ${0##*/}.XXXXXXXXXX`

echo 1 >"$file"
(/usr/lib/rpm/${0##*/}; echo $? >"$file") |fgrep -v GLIBC_PRIVATE ||:
exit "$(cat "$file")"

EOF
chmod +x find-deps.sh

ln -s find-deps.sh find-requires
%define __find_requires %_builddir/find-requires
ln -s find-deps.sh find-provides
%define __find_provides %_builddir/find-provides

%install
xargs ./install.sh <<EOF
~(instlist)s
EOF
~(after-install)s

%files
~(filelist)s
~(after-files)s

%changelog
* ~(date)s Sator Arepo <nobody@altlinux.ru> %chg_epoch%version-%release
- Autorepackaged the version from Sisyphus.
    """.replace('%', '%%').replace('~', '%') % DictSearch({
        'mangled-name': source.mangle(pkg),
        'filelist': '\n'.join(fl),
        'instlist': '\n'.join(x.startswith('%dir ') and x[5:] or x for x in fl),
        'date': time.strftime('%a %b %d %Y'),
        'buildarch': source.name,
        'define-ext_epoch': header['epoch'] and
                ('%%define ext_epoch %s' % header['epoch']) or '',
        'dep-list': auto_dep_list(header, source),
    },
    header,
    HacksDict(hacks.get(pkg, {})))


def repackage(pkg, source, target, shadow):
    Log(1, '! repackaging %s', pkg)

    fl = auto_file_list(pkg, source, target)
    Log(3, 'file list: %s', ' '.join(fl))

    spec_string = gen_spec(pkg, source, fl)

    h = source.get_hasher()
    try:
        h.build(pkg, spec_string)
        Log(1, '! repackaged %s', pkg)
        return True
    except:
        Log(1, '! repackaging %s failed', pkg)


def main():
    import getopt
    opts, args = getopt.gnu_getopt(sys.argv[1:], 'vf', ['verbose', 'force'])

    global loglevel
    force_rebuild = False

    for opt, val in opts:
        if   opt in ('-v', '--verbose'):
            loglevel += 1
        elif opt in ('-f', '--force'):
            force_rebuild = True

    if len(args) != 1:
        usage()
    read_config(args[0])

    repo = Repo('Sisyphus', vars['origin'], vars['flavor'])
    noarch = Arch(repo, 'noarch', [])
    source = Arch(repo, vars['source'], [noarch])
    target = Arch(repo, vars['target'], [noarch])
    shadow_repo = Repo('biarch', vars['shadow'], vars['flavor'])
    shadow = Arch(shadow_repo, vars['source'], [source, noarch])

    new_packages = False

    for pkg in pkgs:
        mangled = shadow.mangle(pkg)
        evr_s = source.evr_of(pkg)
        evr_d = shadow.evr_or_none(mangled)
        is_obsolete = (evr_s != evr_d)
        if is_obsolete:
            Log(3, '* %s %s -> %s', pkg, evr_d, evr_s)
        elif force_rebuild:
            Log(2, '* forced rebuild of %s', mangled)
        if is_obsolete or force_rebuild:
            res = repackage(pkg, source, target, shadow)
            if res: new_packages = True

    if new_packages:
        source.get_hasher().move_packages_to(shadow)
        shadow.rdups()
        shadow.regenbasedir()

    # Check that packages can be installed on a target machine.
    Log(1, "! checking that re-packages can be installed")
    model = Arch(shadow_repo, vars['source'], [target, noarch])

    for pkg in imap(shadow.mangle, pkgs):
        try:
            model.try_install(pkg)
        except PackageNotInstallable, e:
            Log(1, "\n* %s is not installable:\n%s\n", pkg, e)

if __name__ == '__main__':
    main()

# vim:set et sw=4:
