#!/usr/bin/python3
#
# An OBS Source Service to retrieve and verify Go module sources
# as specified in go.mod and go.sum.
#
# (C) 2019 SUSE LLC
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# See http://www.gnu.org/licenses/gpl-2.0.html for full license text.
#
"""\
OBS Source Service to download, verify and vendor Go module
dependency sources. Using go.mod and go.sum present in a Go
application, call go tools in sequence:

go mod download
go mod verify
go mod vendor

obs-service-go_modules will create a vendor tarball, compressed with
the specified method (default to "gz"), containing the
vendor/ directory populated by go mod vendor.

See README.md for additional documentation.
"""

import logging
import argparse
import re
import libarchive
import os
import shutil
import sys
import tempfile

from pathlib import Path
from subprocess import run

app_name = "obs-service-go_modules"

description = __doc__

DEFAULT_COMPRESSION = "gz"
DEFAULT_VENDOR_STEM = "vendor"


def get_archive_parameters(args):
    archive = {}
    archive["vendorname"] = args.vendorname
    archive["compression"] = None
    archive["level"] = None
    if args.compression == "obscpio" and "cpio" in libarchive.ffi.READ_FORMATS:
        archive["format"] = "cpio_newc"
        archive["ext"] = "obscpio"
        return archive

    archive["format"] = "gnutar"
    if args.compression == "tar" and "tar" in libarchive.ffi.READ_FORMATS:
        archive["ext"] = "tar"
        return archive

    archive["level"] = 9
    if args.compression == "gz":
        archive["compression"] = "gzip"
    elif args.compression == "zst":
        archive["compression"] = "zstd"
        archive["level"] = 19
    else:
        archive["compression"] = args.compression

    if archive["compression"] not in libarchive.ffi.READ_FILTERS:
        log.error(
            f"The specified compression mode is not supported: {args.compression}"
        )
        exit(1)

    archive["ext"] = "tar." + (args.compression)
    return archive


def basename_from_archive_name(archive_name):
    if os.path.isdir(archive_name):
        basename = archive_name
    else:
        basename = re.sub(
            r"^(?P<service_prefix>_service:[^:]+:)?(?P<basename>.*)\.(?P<extension>obscpio|tar\.[^\.]+)$",
            r"\g<basename>",
            archive_name,
        )
    if basename:
        log.info(f"Detected basename {basename} from archive name")
    return basename


def basename_from_archive(archive_name):
    paths = []
    try:
        with libarchive.file_reader(archive_name) as archive:
            for entry in archive:
                if entry.isdir and ".git" not in entry.pathname:
                    paths.append(entry.name)
            try:
                basename = os.path.commonpath(paths)
            except ValueError:
                return
    except libarchive.exception.ArchiveError:
        return
    log.info(f"Detected basename {basename} from archive")
    return basename


def archive_autodetect():
    """Find the most likely candidate file that contains go.mod and go.sum.
    For most Go applications this will be app-x.y.z.tar.gz or other supported compression.
    Use the name of the .spec file as the stem for the archive to detect.
    Archive formats supported:
    - .tar.bz2
    - .tar.gz
    - .tar.lz
    - .tar.xz
    - .tar.zst
    - .obscpio
    Returns str with filename of the archive or subdirectory
    """
    log.info("Autodetecting archive since no archive param provided in _service")
    specs = sorted(Path.cwd().glob("*.spec"), reverse=True)
    if not specs:
        log.error(f"No spec file found in {Path.cwd()}")
        exit(1)

    archive = None
    spec = specs[0]
    c_exts = ("gz", "xz", "zst", "lz", "bz2")
    for pattern in (
        [f"{spec.stem}*.tar.{c_ext}" for c_ext in c_exts]
        + [f"{spec.stem}*.obscpio"]
        + [f"_service:*:{spec.stem}*tar.{c_ext}" for c_ext in c_exts]
        + [f"_service:*:{spec.stem}*obscpio"]
        + [spec.stem]
    ):
        log.debug(f"Trying to find archive name with pattern {pattern}")
        matches = sorted(spec.parent.glob(pattern), reverse=True)

        if matches:
            archive = matches[0]
            break

    if not archive:
        log.error("Archive autodetection found no matching archive")
        exit(1)

    log.info(f"Archive autodetected at {archive} ({type(archive)})")

    if archive.name != spec.stem:
        # Check that app.spec Version: directive value
        # is a substring of detected archive filename
        # Warn if there is disagreement between the versions.
        pattern = re.compile(r"^Version:\s+([\S]+)$", re.IGNORECASE)
        with spec.open(encoding="utf-8") as f:
            for line in f:
                versionmatch = pattern.match(line)
                if versionmatch:
                    version = versionmatch.groups(0)[0]
            if not version:
                log.warning(f"Version not found in {spec.name}")
            else:
                if not (version in archive.name):
                    log.warning(
                        f"Version {version} in {spec.name} does not match {archive.name}"
                    )
    return str(archive.name)  # return string not PosixPath


def extract(filename, outdir):
    log.info(f"Extracting {filename} to {outdir}")

    cwd = os.getcwd()

    # make path absolute so we can switch away from the current working directory
    filename = os.path.join(cwd, filename)

    log.info(f"Switching to {outdir}")
    os.chdir(outdir)

    try:
        libarchive.extract_file(filename, libarchive.extract.EXTRACT_TIME)
    except libarchive.exception.ArchiveError as archive_error:
        log.error(archive_error)
        exit(1)

    os.chdir(cwd)


def cmd_go_mod(subcmd, moddir):
    """Execute go mod subcommand using subprocess.run().
    Capture both stderr and stdout as text.
    Log as info or error in this function body.
    Return CompletedProcess object to caller for control flow.
    """
    cmd = ["go", "mod"]
    cmd.extend(subcmd)
    log.info(" ".join(cmd))
    # subprocess.run() returns CompletedProcess cp
    if sys.version_info >= (3, 7):
        cp = run(cmd, cwd=moddir, capture_output=True, text=True)
    else:
        cp = run(cmd, cwd=moddir)
    if cp.returncode:
        log.error(cp.stderr.strip())
    return cp


def replace_modules(replace, go_mod_dir):
    """Replace one or more modules
    Parameter replace is a list of strings: 'module=replacement'
    Returns boolean indicating go.mod and go.sum are modified
    """
    log.info(f"Replacing {len(replace)} modules")
    for r in replace:
        cp = cmd_go_mod(["edit", "-replace", r], go_mod_dir)
        if cp.returncode:
            log.error(f"go mod edit -replace={r} failed")
            exit(1)
    # run go mod tidy to update go.mod and go.sum
    cp = cmd_go_mod(["tidy"], go_mod_dir)
    if cp.returncode:
        log.error("go mod tidy failed")
        exit(1)
    return True


def require_modules(require, go_mod_dir):
    """Add require directives to go.mod for one or more modules
    Parameter require is a list of strings: 'module@version'
    Returns boolean indicating go.mod and go.sum are modified
    """
    log.info(f"Setting required versions for {len(require)} modules")
    for r in require:
        cp = cmd_go_mod(["edit", "-require", r], go_mod_dir)
        if cp.returncode:
            log.error(f"go mod edit -require={r} failed")
            exit(1)
    # run go mod tidy to update go.mod and go.sum
    cp = cmd_go_mod(["tidy"], go_mod_dir)
    if cp.returncode:
        log.error("go mod tidy failed")
        exit(1)
    return True


def sanitize_subdir(basedir, subdir):
    ret = os.path.normpath(subdir)
    if basedir == os.path.commonpath([basedir, ret]):
        return ret
    log.error(f"Invalid path: {ret} not subdir of {basedir}")
    exit(1)


def main():
    log.info(f"Running OBS Source Service: {app_name}")

    parser = argparse.ArgumentParser(
        description=description, formatter_class=argparse.RawDescriptionHelpFormatter
    )
    parser.add_argument("--strategy", default="vendor")
    parser.add_argument("--archive")
    parser.add_argument("--outdir")
    parser.add_argument("--compression", default=DEFAULT_COMPRESSION)
    parser.add_argument("--basename")
    parser.add_argument("--vendorname", default=DEFAULT_VENDOR_STEM)
    parser.add_argument("--subdir")
    parser.add_argument(
        "--replace",
        action="append",
        help="go mod edit replace argument: 'module=replacement'. Can be used multiple times.",
    )
    parser.add_argument(
        "--require",
        action="append",
        help="go mod edit require argument: 'module@version'. Can be used multiple times.",
    )
    args = parser.parse_args()

    outdir = args.outdir
    subdir = args.subdir

    replace = args.replace
    require = args.require

    archive_args = get_archive_parameters(args)
    vendor_tarname = f"{archive_args['vendorname']}.{archive_args['ext']}"
    if args.archive:
        archive_matches = sorted(Path.cwd().glob(args.archive), reverse=True)
        if not archive_matches:
            log.error(f"No archive file matches {Path.cwd()}/{args.archive}")
            exit(1)
        archive = str(archive_matches[0])  # use string, not PosixPath
    else:
        archive = archive_autodetect()
    log.info(f"Using archive {archive}")

    with tempfile.TemporaryDirectory() as tempdir:
        if os.path.isdir(archive):
            # Preserve symlinks during copy, some Go tests test symlink loops
            shutil.copytree(
                src=archive, dst=os.path.join(tempdir, archive), symlinks=True
            )
        else:
            extract(archive, tempdir)

        basename = (
            args.basename
            or basename_from_archive(archive)
            or basename_from_archive_name(archive)
        )
        basename = basename.split("/")[-1]
        if subdir:
            go_mod_path = sanitize_subdir(
                tempdir, os.path.join(tempdir, basename, subdir, "go.mod")
            )
        else:
            go_mod_path = sanitize_subdir(
                tempdir, os.path.join(tempdir, basename, "go.mod")
            )
        if go_mod_path and os.path.exists(go_mod_path):
            go_mod_dir = os.path.dirname(go_mod_path)
            log.info(f"Using go.mod found at {go_mod_path}")
        else:
            log.error(f"File go.mod not found under {os.path.join(tempdir, basename)}")
            exit(1)

        modified = False  # sentinel value indicating go.mod and go.sum modification
        if args.replace:
            # replace one or more modules
            # go.mod and go.sum will be modified and should be included in vendor archive
            modified = replace_modules(replace, go_mod_dir)

        if args.require:
            # add require directives to go.mod for one or more modules
            # go.mod and go.sum will be modified and should be included in vendor archive
            modified = require_modules(require, go_mod_dir)

        if args.strategy == "vendor":
            # go subcommand sequence:
            # - go mod download
            #   (is sensitive to invalid module versions, try and log warn if fails)
            # - go mod vendor
            #   (also downloads but use separate steps for visibility in OBS environment)
            # - go mod verify
            #   (validates checksums)

            # return value cp is type subprocess.CompletedProcess
            cp = cmd_go_mod(["download"], go_mod_dir)
            if cp.returncode:
                if "invalid version" in cp.stderr:
                    log.warning(
                        "go mod download is more sensitive to invalid module versions than go mod vendor"
                    )
                    log.warning(
                        "if go mod vendor and go mod verify complete, vendoring is successful"
                    )
                else:
                    log.error("go mod download failed")
                    exit(1)

            cp = cmd_go_mod(["vendor"], go_mod_dir)
            if cp.returncode:
                log.error("go mod vendor failed")
                exit(1)

            cp = cmd_go_mod(["verify"], go_mod_dir)
            if cp.returncode:
                log.error("go mod verify failed")
                exit(1)

            log.info(f"Vendor go.mod dependencies to {vendor_tarname}")
            vendor_tarfile = os.path.join(outdir, vendor_tarname)
            cwd = os.getcwd()
            os.chdir(go_mod_dir)
            vendor_dir = "vendor"

            mtime = os.path.getmtime(go_mod_path)
            log.debug(f"Set archive files times to {mtime}")

            options = []
            if archive_args["compression"] == "gzip":
                options.append("!timestamp")
            if archive_args["level"]:
                options.append(f"compression-level={archive_args['level']}")
            with libarchive.file_writer(
                vendor_tarfile,
                archive_args["format"],
                archive_args["compression"],
                options=",".join(options),
            ) as new_archive:
                try:
                    new_archive.add_files(
                        vendor_dir, mtime=mtime, ctime=mtime, atime=mtime
                    )
                    if modified:
                        new_archive.add_files(
                            "go.mod", mtime=mtime, ctime=mtime, atime=mtime
                        )
                        new_archive.add_files(
                            "go.sum", mtime=mtime, ctime=mtime, atime=mtime
                        )
                except (
                    TypeError
                ):  # If using old libarchive fallback to old non reproducible behavior
                    log.warning(
                        "python libarchive is too old, unable to produce reproducible output"
                    )
                    new_archive.add_files(vendor_dir)
            os.chdir(cwd)


if __name__ == "__main__":
    logging.basicConfig(level=logging.DEBUG)
    log = logging.getLogger(app_name)
    main()
