Commit c534c585 authored by Everardo Gonzalez's avatar Everardo Gonzalez

added feature file names

parent 913f8708
File deleted
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from urllib.request import urlopen, urlretrieve, quote\n",
"from urllib.parse import urljoin\n",
"from bs4 import BeautifulSoup"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# open and read URL\n",
"url = 'https://zenodo.org/record/1471639#.XO5GriaxUUF'\n",
"u = urlopen(url)\n",
"try:\n",
" html = u.read().decode('utf-8')\n",
"finally:\n",
" u.close()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"failed to download file GL_DIST_TO_RIDGE_KM_PLATES.r250km.mlg.5m.nc\n"
]
}
],
"source": [
"# look for html class \"filename\" in URL and try download\n",
"\n",
"# soup = BeautifulSoup(html, \"lxml\")\n",
"\n",
"soup = BeautifulSoup(html, \"html.parser\") # works better \n",
"ii = 0\n",
"for link in soup.select('a.filename'):\n",
" href = link.get('href')\n",
" \n",
" filename = href.split('/', 1)[-1][0:-11]\n",
" filename = filename.split('/')[-1]\n",
"\n",
" href = urljoin(url, quote(href))[0:-15]\n",
" try:\n",
" urlretrieve(href, filename)\n",
" except:\n",
" print('failed to download file ' + filename)\n",
" continue\n",
" \n",
" if ii%50 == 0\n",
" print('Downloading file ' + str(ii))\n",
" ii+=1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment