Commit 4e773881 authored by Everardo Gonzalez's avatar Everardo Gonzalez

added feature file names

parent dabf7132
......@@ -3,14 +3,13 @@
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"from urllib.request import urlopen, urlretrieve, quote\n",
"from urllib.parse import urljoin\n",
"from bs4 import BeautifulSoup"
"from bs4 import BeautifulSoup\n",
"from os import listdir"
]
},
{
......@@ -19,6 +18,9 @@
"metadata": {},
"outputs": [],
"source": [
"# files already in directory\n",
"dir_files = listdir(\".\")\n",
"\n",
"# open and read URL\n",
"url = 'https://zenodo.org/record/1471639#.XO5GriaxUUF'\n",
"u = urlopen(url)\n",
......@@ -30,49 +32,87 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"failed to download file CM_MANTLE_VP_MS_CRUST1.r50km.men.5m.nc\n",
"Downloading file 50\n",
"Downloading file 100\n",
"Downloading file 150\n",
"Downloading file 200\n",
"Downloading file 250\n",
"Downloading file 300\n",
"Downloading file 350\n",
"Downloading file 400\n",
"Downloading file 450\n",
"Downloading file 500\n",
"Downloading file 550\n",
"Downloading file 600\n",
"Downloading file 650\n"
]
}
],
"source": [
"# look for html class \"filename\" in URL and try download\n",
"\n",
"# soup = BeautifulSoup(html, \"lxml\")\n",
"\n",
"soup = BeautifulSoup(html, \"html.parser\") # works better \n",
"\n",
"soup = BeautifulSoup(html, \"html.parser\") \n",
"ii = 0\n",
"for link in soup.select('a.filename'):\n",
" ii+=1\n",
" \n",
" if ii%50 == 0:\n",
" print('Downloading file ' + str(ii))\n",
"\n",
" href = link.get('href')\n",
" \n",
" filename = href.split('/', 1)[-1][0:-11]\n",
" filename = filename.split('/')[-1]\n",
" \n",
" if filename in dir_files:\n",
" continue\n",
"\n",
" href = urljoin(url, quote(href))[0:-15]\n",
" \n",
" try:\n",
" urlretrieve(href, filename)\n",
" except:\n",
" print('failed to download file ' + filename)\n"
" print('failed to download file ' + filename)\n",
" continue\n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"display_name": "Python 3",
"language": "python",
"name": "python2"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
"pygments_lexer": "ipython3",
"version": "3.7.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
"nbformat_minor": 1
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment