Commit a1b2c65c authored by Willi Rath's avatar Willi Rath

Merge branch 'develop' into 'master'

Develop

See merge request !80
parents a1baf9e3 6e730d06
......@@ -184,7 +184,7 @@ class CurlSingleFile(Renderer):
"""
template = "curl -o \"{prefix}/{file_name}\" \"{url}\"\n"
template = "curl -L -o \"{prefix}/{file_name}\" \"{url}\"\n"
class WgetRecursive(Renderer):
......@@ -216,7 +216,7 @@ class WgetRecursive(Renderer):
"""
template = ("wget -nv -r -c -np -nH --cut-dirs={cut_dirs}"
template = ("wget -e robots=off -nv -r -c -np -nH --cut-dirs={cut_dirs}"
"{accept_files_string}{exclude_directories_string} "
"-P \"{prefix}\" \"{url}\"\n")
......@@ -268,11 +268,66 @@ class WgetRecursiveCred(Renderer):
"""
template = ("wget -nv -r -c -np -nH --cut-dirs={cut_dirs} "
template = ("wget -e robots=off -nv -r -c -np -nH --cut-dirs={cut_dirs} "
"--user=\"${username_var}\" --password=\"${password_var}\" "
"--accept \"{accept_files}\" -P \"{prefix}\" \"{url}\"\n")
class JblobCred(Renderer):
"""Download and install Jblob and download data set from CERA.
Parameters
----------
prefix : str
Full path to the directory that should contain the downloaded data in
the end.
cera_data_sets : list
List of cera-dataset acronyms. Example: ['ERA40_SFC06_MM_EWSS',
'ERA40_SFC06_MM_NSSS']
username_var : str
Variable name in the credential file to hold the user name.
password_var : str
Variable name in the credential file to hold the password.
"""
template = textwrap.dedent("""\
# == Get data from CERA ==
# Download Jblob to a tmp directory and use it to download a list of
# given data sets from CERA:
jblob_tmp_dir=$(mktemp -d)
pushd "${{jblob_tmp_dir}}"
curl \\
"http://cera-www.dkrz.de/jblob/Jblob-2.0.10.zip" \\
-o JBlob-2.0.10.zip
unzip JBlob-2.0.10.zip
popd
jblob_jar="${{jblob_tmp_dir}}"/Jblob-2.0.10/Jblob.jar
commons_jar="${{jblob_tmp_dir}}"/Jblob-2.0.10/commons-codec-1.4.jar
java -cp "${{jblob_jar}}":"${{commons_jar}}" \\
de.dkrz.cera.applications.JblobClient \\
--username "${{{username_var}}}" \\
--password "${{{password_var}}}" \\
--dataset {cera_data_sets_string} \\
--dir {prefix}
rm -rf "${{jblob_tmp_dir}}"
# == Get data from CERA ==
""")
def __init__(self, yaml_dict=None, *args, **kwargs):
"""Construct `cera_data_sets_string`."""
if "cera_data_sets" not in yaml_dict:
yaml_dict["cera_data_sets_string"] = ""
else:
yaml_dict["cera_data_sets_string"] = " ".join(
yaml_dict["cera_data_sets"])
super(JblobCred, self).__init__(yaml_dict=yaml_dict, *args,
**kwargs)
class PreProcessing(Renderer):
"""Add all lines from the `pre_processing` field."""
......@@ -366,22 +421,29 @@ class ReadmeRenderer(Renderer):
"""\
# {{repo_name}}
{{repo_description}}
**Note:** *This dataset is just a mirror of an external source.
Please make sure to properly credit the original creators of the data
set just as you would do if you'd obtained the data directly from
them.*
To learn more about how to use and access the data, check the central
documentation: <https://git.geomar.de/data/docs/>
-----
{{repo_description}}
{{maybe_acknowledgements}}
{{maybe_citations}}
## People who might now
## People who might know
To learn more, you can ask the following people:
{{people}}
## Known problems
## Known issues
- Open and closed issues are here:
<{{http_path_remote}}/issues?scope=all&state=all>
......@@ -389,6 +451,7 @@ class ReadmeRenderer(Renderer):
- Found a problem? Report it here:
<{{http_path_remote}}/issues/new>
{{known_issues}}
## History
......@@ -415,7 +478,8 @@ class ReadmeRenderer(Renderer):
[meta.yaml](meta.yaml).
*Rendered with
[data_repo_renderer](https://git.geomar.de/data/data_repo_renderer/)
[data_repo_renderer](
https://git.geomar.de/data/tools/data_repo_renderer/)
{version}*
""".format(version=__version__))
......
......@@ -25,6 +25,18 @@ repo_description: |
asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
asdf asdf asdf asdf asdf asdf
known_issues: |
A sample breakdown of known problems and issues.
## May contain full Markdown
- like
- lists
```
and code
```
acknowledgements: |
The TEST_REPO data was provided by EXAMPLE.COM.
......@@ -83,6 +95,15 @@ data:
password_var: "STH_PWD"
method: !!python/name:data_repo_renderer.WgetRecursiveCred
- cera_data_sets:
- ERAIN_SFC12_MM_EWSS_180
- ERAIN_SFC12_MM_NSSS_181
prefix: data
username_var: CERA_USER
password_var: CERA_PWD
method: !!python/name:data_repo_renderer.JblobCred
doc:
- url: https://www.example.com/doc_01.html
......@@ -126,19 +147,42 @@ git pull
git lfs pull
git lfs track "data/**" "suppl_data/**" "extra_data/**"
curl -o "data/nao_station_monthly.txt" \
curl -L -o "data/nao_station_monthly.txt" \
"https://www.example.com/files/nao_station_monthly.txt"
curl -o "data/nao_station_djfm.txt" \
curl -L -o "data/nao_station_djfm.txt" \
"https://www.example.com/files/nao_station_djfm.txt"
curl -o "data/nao_station_annual.txt" \
curl -L -o "data/nao_station_annual.txt" \
"https://www.example.com/files/nao_station_annual.txt"
wget -nv -r -c -np -nH --cut-dirs=2 --accept "*.nc,*.cdf,*.nc.gz" -P "data" \
"https://www.example.com/"
wget -nv -r -c -np -nH --cut-dirs=2 --user="$STH_USER" --password="$STH_PWD" \
--accept "*.nc" -P "data" "https://www.example.com/restricted/"
curl -o "doc/doc_01.html" \
wget -e robots=off -nv -r -c -np -nH --cut-dirs=2 \
--accept "*.nc,*.cdf,*.nc.gz" -P "data" "https://www.example.com/"
wget -e robots=off -nv -r -c -np -nH --cut-dirs=2 \
--user="$STH_USER" --password="$STH_PWD" --accept "*.nc" -P "data" \
"https://www.example.com/restricted/"
# == Get data from CERA ==
# Download Jblob to a tmp directory and use it to download a list of
# given data sets from CERA:
jblob_tmp_dir=$(mktemp -d)
pushd "${{jblob_tmp_dir}}"
curl \\
"http://cera-www.dkrz.de/jblob/Jblob-2.0.10.zip" \\
-o JBlob-2.0.10.zip
unzip JBlob-2.0.10.zip
popd
jblob_jar="${{jblob_tmp_dir}}"/Jblob-2.0.10/Jblob.jar
commons_jar="${{jblob_tmp_dir}}"/Jblob-2.0.10/commons-codec-1.4.jar
java -cp "${{jblob_jar}}":"${{commons_jar}}" \\
de.dkrz.cera.applications.JblobClient \\
--username "${{CERA_USER}}" \\
--password "${{CERA_PWD}}" \\
--dataset ERAIN_SFC12_MM_EWSS_180 ERAIN_SFC12_MM_NSSS_181 \\
--dir data
rm -rf "${{jblob_tmp_dir}}"
# == Get data from CERA ==
curl -L -o "doc/doc_01.html" \
"https://www.example.com/doc_01.html"
curl -o "doc/doc_02.html" \
curl -L -o "doc/doc_02.html" \
"https://www.example.com/doc_02.html"
gunzip data/*.gz
......
......@@ -11,7 +11,7 @@ def test_base_class_generates_empty_string():
def test_curl_single_file_rendering():
yaml_dict = {"prefix": "pref", "file_name": "fn", "url": "http://url"}
target_string = "curl -o \"pref/fn\" \"http://url\"\n"
target_string = "curl -L -o \"pref/fn\" \"http://url\"\n"
renderer = data_repo_renderer.CurlSingleFile(yaml_dict=yaml_dict)
assert renderer.__str__ == target_string
......@@ -19,7 +19,7 @@ def test_curl_single_file_rendering():
def test_wget_recursive_rendering_with_excluded_dirs_and_accept_files():
yaml_dict = {"prefix": "pref", "cut_dirs": 77, "url": "http://url",
"accept_files": "*.*", "exclude_directories": "/asdf,/zxcv/a"}
target_string = ("wget -nv -r -c -np -nH --cut-dirs=77 "
target_string = ("wget -e robots=off -nv -r -c -np -nH --cut-dirs=77 "
"--accept \"*.*\" -X \"/asdf,/zxcv/a\" -P \"pref\" "
"\"http://url\"\n")
renderer = data_repo_renderer.WgetRecursive(yaml_dict=yaml_dict)
......@@ -28,7 +28,7 @@ def test_wget_recursive_rendering_with_excluded_dirs_and_accept_files():
def test_wget_recursive_rendering_without_excluded_dirs_and_accept_files():
yaml_dict = {"prefix": "pref", "cut_dirs": 77, "url": "http://url"}
target_string = ("wget -nv -r -c -np -nH --cut-dirs=77 "
target_string = ("wget -e robots=off -nv -r -c -np -nH --cut-dirs=77 "
"-P \"pref\" \"http://url\"\n")
renderer = data_repo_renderer.WgetRecursive(yaml_dict=yaml_dict)
assert renderer.__str__ == target_string
......@@ -49,9 +49,41 @@ def test_wget_recursive_cred_rendering():
yaml_dict = {"prefix": "pref", "cut_dirs": 77, "url": "http://url",
"accept_files": "*.*", "username_var": "ASDF_USER",
"password_var": "ASDF_PWD"}
target_string = ("wget -nv -r -c -np -nH --cut-dirs=77 "
target_string = ("wget -e robots=off -nv -r -c -np -nH --cut-dirs=77 "
"--user=\"$ASDF_USER\" --password=\"$ASDF_PWD\" "
"--accept \"*.*\" -P \"pref\" \"http://url\"\n")
renderer = data_repo_renderer.WgetRecursiveCred(yaml_dict=yaml_dict)
print(renderer.yaml_dict, renderer.template)
assert renderer.__str__ == target_string
def test_jblob_cred_rendering():
yaml_dict = {"prefix": "pref", "cera_data_sets": ["example_ds_01",
"example_ds_02"],
"username_var": "ASDF_USER", "password_var": "ASDF_PWD"}
target_string = textwrap.dedent("""\
# == Get data from CERA ==
# Download Jblob to a tmp directory and use it to download a list of
# given data sets from CERA:
jblob_tmp_dir=$(mktemp -d)
pushd "${jblob_tmp_dir}"
curl \\
"http://cera-www.dkrz.de/jblob/Jblob-2.0.10.zip" \\
-o JBlob-2.0.10.zip
unzip JBlob-2.0.10.zip
popd
jblob_jar="${jblob_tmp_dir}"/Jblob-2.0.10/Jblob.jar
commons_jar="${jblob_tmp_dir}"/Jblob-2.0.10/commons-codec-1.4.jar
java -cp "${jblob_jar}":"${commons_jar}" \\
de.dkrz.cera.applications.JblobClient \\
--username "${ASDF_USER}" \\
--password "${ASDF_PWD}" \\
--dataset example_ds_01 example_ds_02 \\
--dir pref
rm -rf "${jblob_tmp_dir}"
# == Get data from CERA ==
""")
renderer = data_repo_renderer.JblobCred(yaml_dict=yaml_dict)
assert renderer.__str__ == target_string
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment