Commit 53958a41 authored by Richard Olav Rud's avatar Richard Olav Rud
Browse files

Update thredds harvester

parent 8652849c
# ebas-thredds-harvest
Command line program for retrieving EBAS data from THREDDS
\ No newline at end of file
Command line program for retrieving EBAS data from THREDDS
## Usage
> [-h] --sample_type SAMPLE_TYPE --station_code STATION_CODE [--protocol PROTOCOL]
> optional arguments:
> -h, --help show this help message and exit
> --sample_type SAMPLE_TYPE, -s SAMPLE_TYPE
> --station_code STATION_CODE, -c STATION_CODE
> --protocol PROTOCOL, -p PROTOCOL
## Examples
> python -s "ethane" -c "NO0042G" -p "http"
> python -s "particle_number_size_distribution" -c "NO0042G" -p "http"
> python -s "ethane" -c "NO0042G"
import datetime,re
import threddsclient
import argparse
from utilities import get_opendap_urls_by_parameter
#usage: python3 -s ethane -c NO0042G
# TODO: consider adding protocol and catalog as arguments for CLI
parser = argparse.ArgumentParser()
parser.add_argument('--sample_type','-s', required=True)
parser.add_argument('--station_code','-c', required=True)
parser.add_argument('--protocol','-p', required=False)
args = parser.parse_args()
all_opendap_urls = threddsclient.opendap_urls('')
station_opendap_urls = [x for x in all_opendap_urls if station_code in x]
opendap_urls = get_opendap_urls_by_parameter(station_opendap_urls, sample_type, protocol)
for url in opendap_urls:
\ No newline at end of file
# -*- coding: utf-8 -*-
import netCDF4
import threddsclient
"""## Function for getting EBAS data as opendap or direct download for a given parameter
EBAS component name
Specify component name and protocol.
Default output protocol is "opendap urls" which provide access to data remotely, without having to download the physical file.
Also, "http" can be specified to get links to direct download of netcdf files.
def get_opendap_urls_by_parameter(
station_opendap_urls, parameter, protocol='opendap'):
Get all opendap URL's for a given parameter (will try best match in the variable list)
urls = station_opendap_urls
datasets = []
for url in urls:
for var in list(netCDF4.Dataset(url).variables.keys()):
if var.startswith(parameter) and url not in datasets:
if protocol == 'http':
download_urls = []
baseurl = ''
for ds in datasets:
ds = ds.rsplit('/', 1)[1]
ds = "{0}{1}".format(baseurl, ds)
return download_urls
return datasets
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment