Commit 54ee7f26 authored by Willi Rath's avatar Willi Rath

Let wget ignore robots.txt

parent 123c506c
......@@ -216,7 +216,7 @@ class WgetRecursive(Renderer):
"""
template = ("wget -nv -r -c -np -nH --cut-dirs={cut_dirs}"
template = ("wget -e robots=off -nv -r -c -np -nH --cut-dirs={cut_dirs}"
"{accept_files_string}{exclude_directories_string} "
"-P \"{prefix}\" \"{url}\"\n")
......@@ -268,7 +268,7 @@ class WgetRecursiveCred(Renderer):
"""
template = ("wget -nv -r -c -np -nH --cut-dirs={cut_dirs} "
template = ("wget -e robots=off -nv -r -c -np -nH --cut-dirs={cut_dirs} "
"--user=\"${username_var}\" --password=\"${password_var}\" "
"--accept \"{accept_files}\" -P \"{prefix}\" \"{url}\"\n")
......
......@@ -153,10 +153,11 @@ curl -L -o "data/nao_station_djfm.txt" \
"https://www.example.com/files/nao_station_djfm.txt"
curl -L -o "data/nao_station_annual.txt" \
"https://www.example.com/files/nao_station_annual.txt"
wget -nv -r -c -np -nH --cut-dirs=2 --accept "*.nc,*.cdf,*.nc.gz" -P "data" \
"https://www.example.com/"
wget -nv -r -c -np -nH --cut-dirs=2 --user="$STH_USER" --password="$STH_PWD" \
--accept "*.nc" -P "data" "https://www.example.com/restricted/"
wget -e robots=off -nv -r -c -np -nH --cut-dirs=2 \
--accept "*.nc,*.cdf,*.nc.gz" -P "data" "https://www.example.com/"
wget -e robots=off -nv -r -c -np -nH --cut-dirs=2 \
--user="$STH_USER" --password="$STH_PWD" --accept "*.nc" -P "data" \
"https://www.example.com/restricted/"
# == Get data from CERA ==
# Download Jblob to a tmp directory and use it to download a list of
......
......@@ -19,7 +19,7 @@ def test_curl_single_file_rendering():
def test_wget_recursive_rendering_with_excluded_dirs_and_accept_files():
yaml_dict = {"prefix": "pref", "cut_dirs": 77, "url": "http://url",
"accept_files": "*.*", "exclude_directories": "/asdf,/zxcv/a"}
target_string = ("wget -nv -r -c -np -nH --cut-dirs=77 "
target_string = ("wget -e robots=off -nv -r -c -np -nH --cut-dirs=77 "
"--accept \"*.*\" -X \"/asdf,/zxcv/a\" -P \"pref\" "
"\"http://url\"\n")
renderer = data_repo_renderer.WgetRecursive(yaml_dict=yaml_dict)
......@@ -28,7 +28,7 @@ def test_wget_recursive_rendering_with_excluded_dirs_and_accept_files():
def test_wget_recursive_rendering_without_excluded_dirs_and_accept_files():
yaml_dict = {"prefix": "pref", "cut_dirs": 77, "url": "http://url"}
target_string = ("wget -nv -r -c -np -nH --cut-dirs=77 "
target_string = ("wget -e robots=off -nv -r -c -np -nH --cut-dirs=77 "
"-P \"pref\" \"http://url\"\n")
renderer = data_repo_renderer.WgetRecursive(yaml_dict=yaml_dict)
assert renderer.__str__ == target_string
......@@ -49,7 +49,7 @@ def test_wget_recursive_cred_rendering():
yaml_dict = {"prefix": "pref", "cut_dirs": 77, "url": "http://url",
"accept_files": "*.*", "username_var": "ASDF_USER",
"password_var": "ASDF_PWD"}
target_string = ("wget -nv -r -c -np -nH --cut-dirs=77 "
target_string = ("wget -e robots=off -nv -r -c -np -nH --cut-dirs=77 "
"--user=\"$ASDF_USER\" --password=\"$ASDF_PWD\" "
"--accept \"*.*\" -P \"pref\" \"http://url\"\n")
renderer = data_repo_renderer.WgetRecursiveCred(yaml_dict=yaml_dict)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment