__init__.py 15 KB
Newer Older
1 2
"""data_repo_renderer."""

Willi Rath's avatar
Willi Rath committed
3 4
import argparse
from pathlib import Path
Willi Rath's avatar
Willi Rath committed
5
from pkg_resources import get_distribution, DistributionNotFound
Willi Rath's avatar
Willi Rath committed
6
import shutil
7
import stat
8
import textwrap
9 10
import yaml

Willi Rath's avatar
Willi Rath committed
11 12 13 14 15 16 17
# Set version string
try:
    __version__ = get_distribution(__name__).version
except DistributionNotFound:
    # package is not installed
    __version__ = "vX.X.X"

18

Willi Rath's avatar
Willi Rath committed
19
class Renderer(object):
Willi Rath's avatar
Willi Rath committed
20
    """The base class for all renderers.
21 22 23 24 25 26 27 28

    Attributes
    ----------
    template : str
        Template string that will be parsed using `str.format()` against an
        input dictionary.

    """
Willi Rath's avatar
Willi Rath committed
29

Willi Rath's avatar
Willi Rath committed
30
    template = ""
Willi Rath's avatar
Willi Rath committed
31

Willi Rath's avatar
Willi Rath committed
32
    def __init__(self, yaml_dict=None, output_file=None):
33 34 35 36 37 38 39 40 41 42 43 44 45 46
        """Initialize the Renderer.

        This will parse the class var `template` against `yaml_dict`, create
        the parents (if any) of `output_file`, and append the parsed `template`
        to `output_file`.

        Parameters
        ----------
        yaml_dict : dict, optional
            This will usually result from a `yaml.load()`.
        output_file : Path | str, optional
            The full path to the output file.

        """
47 48 49 50 51 52 53
        try:
            self.output_file = (Path(output_file) if output_file else None)
            self.yaml_dict = yaml_dict
            self._create_string()
            self._maybe_init_output_dir()
            self._append_to_output_file()
        except Exception as e:
Willi Rath's avatar
Willi Rath committed
54
            pass
Willi Rath's avatar
Willi Rath committed
55

Willi Rath's avatar
Willi Rath committed
56
    def _create_string(self):
Willi Rath's avatar
Willi Rath committed
57 58 59 60
        try:
            self.string = self.template.format(**self.yaml_dict)
        except Exception as e:
            self.string = None
Willi Rath's avatar
Willi Rath committed
61 62

    def _append_to_output_file(self):
Willi Rath's avatar
Willi Rath committed
63 64 65
        if self.output_file is not None:
            with self.output_file.open(mode="a") as f:
                f.write(self.string)
Willi Rath's avatar
Willi Rath committed
66

Willi Rath's avatar
Willi Rath committed
67
    def _maybe_init_output_dir(self):
Willi Rath's avatar
Willi Rath committed
68
        if self.output_file and not self.output_file.exists():
Willi Rath's avatar
Willi Rath committed
69
            self.output_file.parents[0].mkdir(parents=True, exist_ok=True)
Willi Rath's avatar
Willi Rath committed
70

Willi Rath's avatar
Willi Rath committed
71 72
    @property
    def __str__(self):
73 74 75 76 77
        """Return the string created by `_create_string()`.

        This will be equal to the full string written to the `output_file`.

        """
Willi Rath's avatar
Willi Rath committed
78
        return self.string
Willi Rath's avatar
Willi Rath committed
79

Willi Rath's avatar
Willi Rath committed
80

Willi Rath's avatar
Willi Rath committed
81
class InitScript(Renderer):
82 83 84 85 86 87
    """Render a full init script for the rendered repo.

    The resulting script will do nothing if the repo already is a Git
    repository, and do a `git init` and add the remote otherwise.  The init
    script is meant to be run once after the rendering.

Willi Rath's avatar
Willi Rath committed
88 89 90 91
    Note that this adds a `lfs.timeout=30` to the local git config.  For a very
    large number of files and a bad network uplink, this may result in very
    long runtimes before Git LFS finally aborts unsuccessfully.

92 93
    Parameters
    ----------
94 95
    git_path_remote : str
        Full path or URL to the remote repo to be used for Git fetch and push.
96 97

    """
Willi Rath's avatar
Willi Rath committed
98 99 100 101 102 103 104 105

    template = textwrap.dedent(
        """\
        #!/bin/bash

        # Rendered with data_repo_renderer {version}

        git init || exit 1
106
        git remote add origin {{git_path_remote}} || exit 1
Willi Rath's avatar
Willi Rath committed
107
        git config --add lfs.activitytimeout 30
Willi Rath's avatar
Willi Rath committed
108

Willi Rath's avatar
Willi Rath committed
109 110 111 112
        git add .
        git commit -m "Initial commit"
        git push -u origin master

Willi Rath's avatar
Willi Rath committed
113 114 115 116
        """.format(version=__version__)
    )


Willi Rath's avatar
Willi Rath committed
117
class UpdateScriptHeader(Renderer):
118 119 120 121 122 123 124 125 126 127 128 129 130 131
    """Create a header for the update script.

    This header will indicate the version of the renderer used to create the
    repo, set up central logging to `log/update.log` withing the rendered repo,
    and make sure the output directories of the update script exist.

    Parameters
    ----------
    prefixes : str
        A _string_ containing a list of paths to be created.  (Example:  To
        make sure the output directories `doc` and `data` exist, `prefixes`
        should be `"doc data"`.)

    """
Willi Rath's avatar
Willi Rath committed
132

Willi Rath's avatar
Willi Rath committed
133 134 135
    template = ("#!/bin/bash\n\n" +
                "# Rendered with data_repo_renderer {version}\n\n".format(
                    version=__version__) +
136 137 138
                "mkdir -p log\n" +
                "exec &> >(tee -a \"log/update.log\")\n" +
                "date -I'ns'\n\n" +
Willi Rath's avatar
Willi Rath committed
139
                "mkdir -p {prefixes}\n")
Willi Rath's avatar
Willi Rath committed
140 141


142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
class LoadCredentials(Renderer):
    """Source a credential file.

    Credential files should contain variables to be used by
    `WgetRecursiveCred`'s `username_var` and `password_var`.  Example:

    ```bash
    #!/bin/bash
    export AVISO_USERNAME="asdf"
    export AVISO_PASSWORD="asdf"
    ```

    Parameters
    ----------
    credential_files : list
        List of strings containing the full paths to credential files.
        It's recommended to have them in: `~/.data_repo_creds/<repo_name>.cred`

    """

    template = "source \"{cred_file}\""

    def _create_string(self):
        try:
            self.string = "\n"
            for cred_file in self.yaml_dict["credential_files"]:
                self.string += "source \"{}\"\n".format(cred_file)
        except Exception as e:
            self.string = None


Willi Rath's avatar
Willi Rath committed
173
class CurlSingleFile(Renderer):
174 175 176 177 178 179 180 181 182 183 184 185
    """Retrieve a single file with `curl`.

    Parameters
    ----------
    file_name : str
        Name of the output file.  (Without the path.)
    prefix : str
        Full path to the directory that should contain `file_name` in the end.
    url : str
        Full URL of the file.

    """
Willi Rath's avatar
Willi Rath committed
186

Willi Rath's avatar
Willi Rath committed
187
    template = "curl -o \"{prefix}/{file_name}\" \"{url}\"\n"
Willi Rath's avatar
Willi Rath committed
188 189 190


class WgetRecursive(Renderer):
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
    """Recursively retrieve data with `wget`.

    This is meant to crawl web-sites, FTP directories or file listings for data
    files.

    Parameters
    ----------
    prefix : str
        Full path to the directory that should contain the downloaded data in
        the end.
    url : str
        Full URL from which (and below which) the crawling will happen.
    cut_dirs : int | str
        Number of levels which `wget` should ignore from the URL it downloads.
        (Example: Set to `2` to make sure
        `http://www.example.com/asdf/data_file.nc` will go directly to
        `prefix`/`data_file.nc`.)
    accept_files : str
        String containing a comma-separated list of patterns to be accepted.
        (Example:  Set to `"*.nc,*.nc.gz"` to only download `.nc` and `.nc.gz`
        files.)
Willi Rath's avatar
Willi Rath committed
212 213 214 215
    exclude_directories : str (optional)
        String containing a comma-separated list of directories to be excluded.
        (Example:  Set to `"/some-path,/some-other-path"` to not download
        anything from `<fqdn>/some-path/` and `<fqdn>/some-other-path/`.)
216 217

    """
Willi Rath's avatar
Willi Rath committed
218

Willi Rath's avatar
Willi Rath committed
219 220 221 222 223 224 225
    template = ("wget -nv -r -c -np -nH --cut-dirs={cut_dirs}"
                "{accept_files_string}{exclude_directories_string} "
                "-P \"{prefix}\" \"{url}\"\n")

    def __init__(self, yaml_dict=None, *args, **kwargs):
        """Construct `accept_files_string` and `exclude_directories_string`."""
        if "accept_files" not in yaml_dict:
226
            yaml_dict["accept_files_string"] = ""
Willi Rath's avatar
Willi Rath committed
227 228 229 230 231 232 233 234 235 236 237 238 239
        else:
            yaml_dict["accept_files_string"] = (
                " --accept \"{}\"".format(yaml_dict["accept_files"])
            )
        if "exclude_directories" not in yaml_dict:
            yaml_dict["exclude_directories_string"] = ""
        else:
            yaml_dict["exclude_directories_string"] = (
                " -X \"{}\"".format(yaml_dict["exclude_directories"])
            )

        super(WgetRecursive, self).__init__(yaml_dict=yaml_dict, *args,
                                            **kwargs)
Willi Rath's avatar
Willi Rath committed
240 241


242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
class WgetRecursiveCred(Renderer):
    """Recursively retrieve data with `wget` and credentials.

    This is meant to crawl web-sites, FTP directories or file listings for data
    files.

    Parameters
    ----------
    prefix : str
        Full path to the directory that should contain the downloaded data in
        the end.
    url : str
        Full URL from which (and below which) the crawling will happen.
    cut_dirs : int | str
        Number of levels which `wget` should ignore from the URL it downloads.
        (Example: Set to `2` to make sure
        `http://www.example.com/asdf/data_file.nc` will go directly to
        `prefix`/`data_file.nc`.)
    accept_files : str
        String containing a comma-separated list of patterns to be accepted.
        (Example:  Set to `"*.nc,*.nc.gz"` to only download `.nc` and `.nc.gz`
        files.)
    username_var : str
        Variable name in the credential file to hold the user name.
    password_var : str
        Variable name in the credential file to hold the password.

    """

Willi Rath's avatar
Willi Rath committed
271
    template = ("wget -nv -r -c -np -nH --cut-dirs={cut_dirs} "
272 273 274 275
                "--user=\"${username_var}\" --password=\"${password_var}\" "
                "--accept \"{accept_files}\" -P \"{prefix}\" \"{url}\"\n")


276 277 278 279 280 281 282 283 284 285 286 287
class PreProcessing(Renderer):
    """Add all lines from the `pre_processing` field."""

    template = "\n{pp_lines}"

    def _create_string(self):
        pp_lines = ""
        for ppl in self.yaml_dict["pre_processing"]:
            pp_lines += "{}\n".format(ppl)
        self.string = self.template.format(pp_lines=pp_lines)


288
class PostProcessing(Renderer):
289
    """Add all lines from the `post_processing` field."""
290 291 292 293 294 295 296 297 298 299

    template = "\n{pp_lines}"

    def _create_string(self):
        pp_lines = ""
        for ppl in self.yaml_dict["post_processing"]:
            pp_lines += "{}\n".format(ppl)
        self.string = self.template.format(pp_lines=pp_lines)


300 301
class GitUpdate(Renderer):
    """Add block to essentially `git pull` before downloading."""
Willi Rath's avatar
Willi Rath committed
302 303 304

    template = textwrap.dedent(
        """
305 306 307 308
        git remote set-head origin -a
        default_branch=`git symbolic-ref \\
            --short refs/remotes/origin/HEAD | cut -d/ -f2-`
        git checkout ${{default_branch}}
Willi Rath's avatar
Willi Rath committed
309 310 311 312 313 314 315 316
        git pull
        git lfs pull
        git lfs track "data/**"

        """)


class GitFinalize(Renderer):
317 318 319 320 321 322 323 324
    """Add a block to `add`, `commit`, `push`.

    The `target_branch` will be derived from the output of `git describe` and
    contain a Unix timestamp at nanosecond precision.

    Testing / merging / tagging of the target branch will be left to a human.

    """
Willi Rath's avatar
Willi Rath committed
325 326 327 328 329 330 331 332 333 334 335

    template = textwrap.dedent(
        """
        target_branch=`git describe 2> /dev/null`_update_`date +%s%N`
        git checkout -b ${{target_branch}}
        git add .
        git commit -m "Auto-update data"
        git push -u origin ${{target_branch}}
        """)


336
class ReadmeRenderer(Renderer):
337 338 339 340 341 342 343 344
    """Render a full README for the repo.

    Parameters
    ----------
    repo_name : str
        Name of the repo.  Will also be the title of the README.
    repo_description : str
        (Multiline) string with the repo description.
345
    http_path_remote : str
346 347 348
        Full URL of the remote.  (To automatically link the issue tracker etc.)

    """
349 350 351

    template = textwrap.dedent(
        """\
Willi Rath's avatar
Willi Rath committed
352
        # {{repo_name}}
353

Willi Rath's avatar
Willi Rath committed
354 355 356
        People: {{people}}

        {{repo_description}}
357 358 359 360

        ## Known problems

        - Open and closed issues are here:
361
          <{{http_path_remote}}/issues?scope=all&state=all>
362 363

        - Found a problem?  Report it here:
364
          <{{http_path_remote}}/issues/new>
365

Willi Rath's avatar
Willi Rath committed
366

367 368 369 370 371
        ## History

        - Download logs are in [log/update.log](log/update.log).

        - Also have a look at the
372
          [activity log]({{http_path_remote}}/activity).
373

Willi Rath's avatar
Willi Rath committed
374

375 376 377 378
        ## Original Documentation

        See [doc/](doc/) for any of the original documentation.

Willi Rath's avatar
Willi Rath committed
379

380 381 382 383
        ## Maintenance

        Update with
        ```bash
Willi Rath's avatar
Willi Rath committed
384
        update.sh
385 386
        ```

387
        {{maybe_credentials}}
Willi Rath's avatar
Willi Rath committed
388 389
        For details on the configuration, look at [update.sh](update.sh) and
        [meta.yaml](meta.yaml).
390

391 392 393
        *Rendered with
        [data_repo_renderer](https://git.geomar.de/data/data_repo_renderer/)
        {version}*
Willi Rath's avatar
Willi Rath committed
394
        """.format(version=__version__))
395

396
    def __init__(self, yaml_dict=None, *args, **kwargs):
Willi Rath's avatar
Willi Rath committed
397
        """Add credentials file info but use super's init otherwise."""
398 399 400 401 402 403 404 405 406 407 408 409 410
        if "credential_files" not in yaml_dict:
            yaml_dict["maybe_credentials"] = ""
        else:
            cred_string = ("`update.sh` will look for credentials "
                           "in:\n{}\n").format(
                                "\n".join(
                                    "- \"{}\"".format(el)
                                    for el in yaml_dict["credential_files"]))
            yaml_dict["maybe_credentials"] = textwrap.dedent(cred_string)

        super(ReadmeRenderer, self).__init__(yaml_dict=yaml_dict, *args,
                                             **kwargs)

411

412
def _make_file_executable(file_name):
413
    """Make `file_name` executable for the owner."""
414 415 416 417 418
    file_name = Path(file_name)
    new_mode = file_name.stat().st_mode | stat.S_IXUSR
    file_name.chmod(new_mode)


Willi Rath's avatar
Willi Rath committed
419
def cli_run_renderer(argvec=None):
420
    """Run renderer."""
Willi Rath's avatar
Willi Rath committed
421
    # parse command line args
Willi Rath's avatar
Willi Rath committed
422
    parser = argparse.ArgumentParser(description="Data repo renderer")
Willi Rath's avatar
Willi Rath committed
423 424 425 426 427 428
    parser.add_argument("--prefix", help=("Custom path to a directory that "
                                          "will contain the rendered repo. "
                                          "Defaults to: './rendered/'"))
    parser.add_argument("--util", help=("Custom path to a directory with "
                                        "additional scripts. Defaults to: "
                                        "'./util/'"))
Willi Rath's avatar
Willi Rath committed
429 430 431 432
    parser.add_argument("yaml_file", type=str,
                        help="Use this YAML file to render repo.")
    args = parser.parse_args(argvec)

Willi Rath's avatar
Willi Rath committed
433
    # extract arguments
Willi Rath's avatar
Willi Rath committed
434 435
    util_src = Path(args.util if args.util is not None else "./util/")
    prefix = Path(args.prefix if args.prefix is not None else "./rendered/")
Willi Rath's avatar
Willi Rath committed
436 437
    yaml_file = Path(args.yaml_file)

Willi Rath's avatar
Willi Rath committed
438
    # load YAML file
Willi Rath's avatar
Willi Rath committed
439
    with yaml_file.open() as stream:
Willi Rath's avatar
Willi Rath committed
440 441
        yaml_dict = yaml.load(stream)

Willi Rath's avatar
Willi Rath committed
442
    # Set default paths
Willi Rath's avatar
Willi Rath committed
443
    update_script = prefix / "update.sh"
Willi Rath's avatar
Willi Rath committed
444
    init_script = prefix / "init.sh"
Willi Rath's avatar
Willi Rath committed
445
    util_dst = prefix / "util"
Willi Rath's avatar
Willi Rath committed
446
    readme_file = prefix / "README.md"
Willi Rath's avatar
Willi Rath committed
447

Willi Rath's avatar
Willi Rath committed
448 449 450 451 452 453 454 455 456
    # clear files that will be rendered
    def _unlink_files(file_list):
        for f in file_list:
            try:
                f.unlink()
            except Exception as e:
                pass
    _unlink_files([update_script, init_script, readme_file])

Willi Rath's avatar
Willi Rath committed
457
    InitScript(yaml_dict, output_file=init_script)
Willi Rath's avatar
Willi Rath committed
458

Willi Rath's avatar
Willi Rath committed
459
    UpdateScriptHeader(yaml_dict, output_file=update_script)
460

461 462
    LoadCredentials(yaml_dict, output_file=update_script)

463 464
    PreProcessing(yaml_dict, output_file=update_script)

465
    GitUpdate(yaml_dict, output_file=update_script)
Willi Rath's avatar
Willi Rath committed
466

467 468 469 470 471 472
    # append all data download lines (if any)
    try:
        for data in yaml_dict["data"]:
            data["method"](data, output_file=update_script)
    except KeyError as e:
        pass
Willi Rath's avatar
Willi Rath committed
473

474 475 476 477 478 479
    # append all doc download lines (if any)
    try:
        for doc in yaml_dict["doc"]:
            doc["method"](doc, output_file=update_script)
    except KeyError as e:
        pass
Willi Rath's avatar
Willi Rath committed
480

481
    PostProcessing(yaml_dict, output_file=update_script)
Willi Rath's avatar
Willi Rath committed
482
    GitFinalize(yaml_dict, output_file=update_script)
483 484

    ReadmeRenderer(yaml_dict, output_file=readme_file)
Willi Rath's avatar
Willi Rath committed
485 486 487

    # copy yaml file
    shutil.copy(str(yaml_file), str(prefix / "meta.yaml"))
Willi Rath's avatar
Willi Rath committed
488 489 490 491 492 493

    # copy util scripts (if any)
    try:
        shutil.copytree(str(util_src), str(util_dst))
    except Exception as e:
        pass
494 495 496

    # make scripts executable
    _make_file_executable(update_script)
Willi Rath's avatar
Willi Rath committed
497
    _make_file_executable(init_script)
498
    list(map(_make_file_executable, util_dst.glob("*.*")))