Commit 6e730d06 authored by Willi Rath's avatar Willi Rath
Browse files

Merge branch '65-let-wget-ignore-robots' into 'develop'

Let wget ignore robots.txt

See merge request !79
parents 123c506c 54ee7f26
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -216,7 +216,7 @@ class WgetRecursive(Renderer):

    """

    template = ("wget -nv -r -c -np -nH --cut-dirs={cut_dirs}"
    template = ("wget -e robots=off -nv -r -c -np -nH --cut-dirs={cut_dirs}"
                "{accept_files_string}{exclude_directories_string} "
                "-P \"{prefix}\" \"{url}\"\n")

@@ -268,7 +268,7 @@ class WgetRecursiveCred(Renderer):

    """

    template = ("wget -nv -r -c -np -nH --cut-dirs={cut_dirs} "
    template = ("wget -e robots=off -nv -r -c -np -nH --cut-dirs={cut_dirs} "
                "--user=\"${username_var}\" --password=\"${password_var}\" "
                "--accept \"{accept_files}\" -P \"{prefix}\" \"{url}\"\n")

+5 −4
Original line number Diff line number Diff line
@@ -153,10 +153,11 @@ curl -L -o "data/nao_station_djfm.txt" \
"https://www.example.com/files/nao_station_djfm.txt"
curl -L -o "data/nao_station_annual.txt" \
"https://www.example.com/files/nao_station_annual.txt"
wget -nv -r -c -np -nH --cut-dirs=2 --accept "*.nc,*.cdf,*.nc.gz" -P "data" \
"https://www.example.com/"
wget -nv -r -c -np -nH --cut-dirs=2 --user="$STH_USER" --password="$STH_PWD" \
--accept "*.nc" -P "data" "https://www.example.com/restricted/"
wget -e robots=off -nv -r -c -np -nH --cut-dirs=2 \
--accept "*.nc,*.cdf,*.nc.gz" -P "data" "https://www.example.com/"
wget -e robots=off -nv -r -c -np -nH --cut-dirs=2 \
--user="$STH_USER" --password="$STH_PWD" --accept "*.nc" -P "data" \
"https://www.example.com/restricted/"

# == Get data from CERA ==
# Download Jblob to a tmp directory and use it to download a list of
+3 −3
Original line number Diff line number Diff line
@@ -19,7 +19,7 @@ def test_curl_single_file_rendering():
def test_wget_recursive_rendering_with_excluded_dirs_and_accept_files():
    yaml_dict = {"prefix": "pref", "cut_dirs": 77, "url": "http://url",
                 "accept_files": "*.*", "exclude_directories": "/asdf,/zxcv/a"}
    target_string = ("wget -nv -r -c -np -nH --cut-dirs=77 "
    target_string = ("wget -e robots=off -nv -r -c -np -nH --cut-dirs=77 "
                     "--accept \"*.*\" -X \"/asdf,/zxcv/a\" -P \"pref\" "
                     "\"http://url\"\n")
    renderer = data_repo_renderer.WgetRecursive(yaml_dict=yaml_dict)
@@ -28,7 +28,7 @@ def test_wget_recursive_rendering_with_excluded_dirs_and_accept_files():

def test_wget_recursive_rendering_without_excluded_dirs_and_accept_files():
    yaml_dict = {"prefix": "pref", "cut_dirs": 77, "url": "http://url"}
    target_string = ("wget -nv -r -c -np -nH --cut-dirs=77 "
    target_string = ("wget -e robots=off -nv -r -c -np -nH --cut-dirs=77 "
                     "-P \"pref\" \"http://url\"\n")
    renderer = data_repo_renderer.WgetRecursive(yaml_dict=yaml_dict)
    assert renderer.__str__ == target_string
@@ -49,7 +49,7 @@ def test_wget_recursive_cred_rendering():
    yaml_dict = {"prefix": "pref", "cut_dirs": 77, "url": "http://url",
                 "accept_files": "*.*", "username_var": "ASDF_USER",
                 "password_var": "ASDF_PWD"}
    target_string = ("wget -nv -r -c -np -nH --cut-dirs=77 "
    target_string = ("wget -e robots=off -nv -r -c -np -nH --cut-dirs=77 "
                     "--user=\"$ASDF_USER\" --password=\"$ASDF_PWD\" "
                     "--accept \"*.*\" -P \"pref\" \"http://url\"\n")
    renderer = data_repo_renderer.WgetRecursiveCred(yaml_dict=yaml_dict)