Skip to content
Snippets Groups Projects

get_dataset_urls.py

  • Clone with SSH
  • Clone with HTTPS
  • Embed
  • Share
    The snippet can be accessed without any authentication.
    Authored by Richard Olav Rud

    Get URLs from metadata records on oai-pmh. Opendap and direct download.

    get_dataset_urls.py 1.14 KiB
    import requests
    from gis_metadata.iso_metadata_parser import IsoParser
    from gis_metadata.utils import COMPLEX_DEFINITIONS, CONTACTS, format_xpaths, ParserProperty
    from xml.etree import cElementTree as ET
    
    url = 'https://ebas-oai-pmh.nilu.no/oai/provider?verb=GetRecord&metadataPrefix=iso19115&identifier=oai:ebas-oai-pmh.nilu.no:SK0007R.19991231070000.20181210133000.filter_1pack..aerosol.52w.1w.SK01L_f1p_hm_07.SK01L_GF_AAS.lev2.nc'
    
    response = requests.get(url)
    
    #OAI-PMH/metadata
    xml = response.content.decode('utf-8')
    root = ET.fromstring(xml)
    # Get content that is defined as the iso metadata
    metadata = ET.tostring(root[2][0][1][0])
    
    class CustomIsoParser(IsoParser):
    
        def _init_data_map(self):
            super(CustomIsoParser, self)._init_data_map()
    
            dataset_url_prop = 'metadata_dataset_url'
            self._data_map[dataset_url_prop] = 'identificationInfo/SV_ServiceIdentification/containsOperations/SV_OperationMetadata/connectPoint/CI_OnlineResource/linkage/URL'
    
            self._metadata_props.add(dataset_url_prop)
    
    custom_iso_from_file = CustomIsoParser(metadata)
    
    dataset_urls = custom_iso_from_file.metadata_dataset_url
    
    print(dataset_urls)
    0% Loading or .
    You are about to add 0 people to the discussion. Proceed with caution.
    Finish editing this message first!
    Please register or to comment