In [None]:
'''
   Copyright 2023 Spacebel s.a.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
'''

# OpenSearch with Atom

This notebook explains the use of the OpenSearch interface with Atom response format to access collection and granule metadata according to the two-step mechanism recommended by CEOS Best Practices [[RD1]](#RD1).

## Overview  
 The subjects covered in this notebook are:    
 * [Collection Search](#collection-search) 
   * [Access API Description](#access-api-description)
   * [Search by free text](#search-by-free-text)
   * [Search by title](#search-by-title)
   * [Search by platform](#search-by-platform)
   * [Search by instrument](#search-by-instrument)
   * [Search by organisation](#search-by-organisation)
   * [Search by identifier](#search-by-identifier)
   * [Search by concept](#search-by-concept)
 * [Collection Properties](#collection-properties) 
   * [Collection geometry](#collection-geometry) 
   * [Collection temporal extent](#collection-temporal-extent) 
   * [Collection identifier](#collection-identifier) 
   * [Collection keywords](#collection-keywords) 
   * [Collection other representations](#collection-other-representations) 
   * [Collection embedding other formats](#collection-embedding-other-formats) 
   * [Collection related documentation](#collection-related-documentation) 
 * [Granule Search](#granule-search) 
   * [Access API Description](#granule-access-api-description)
   * [Search by bounding box](#granule-search-by-bounding-box)
   * [Search by geometry](#granule-search-by-geometry)
   * [Search by temporal extent](#granule-search-by-temporal-extent)
   * [Search by identifier](#granule-search-by-identifier)
 * [Granule Properties](#granule-properties) 
   * [Geometry](#geometry) 
   * [Temporal extent](#temporal-extent) 
   * [Granule identifier](#granule-identifier) 
   * [Quicklook](#quicklook)
   * [Granule download](#granule-download)
   * [Other representations](#other-representations) 
   * [Embedding other formats](#embedding-other-formats) 
 * [Advanced Topics](#advanced-topics)
   * [Result paging](#result-paging)
   * [Sorting results](#sorting-results)
   * [Faceted search](#faceted-search)
   * [Content negotiation](#content-negotiation)
 * [Further Reading](#further-reading)

In [None]:
import re
import folium
import json, requests, xml
import pandas as pd
import ipywidgets as widgets

from xml.dom import minidom
from IPython.display import Image
from xml.etree import ElementTree
from IPython.display import HTML
from IPython.display import Markdown as md

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Select the top-level OSDD from which the user will be able to choose.
url_osdd_choices = [ 
    
    'https://geo.spacebel.be' + '/api?httpAccept=application%2Fopensearchdescription%2Bxml',
    'https://emc.spacebel.be' + '/api?httpAccept=application%2Fopensearchdescription%2Bxml' ,  
    'https://eocat.spacebel.be' + '/api?httpAccept=application%2Fopensearchdescription%2Bxml' ,
    'https://eocat.esa.int' + '/opensearch/description.xml' ,
    'https://fedeo.ceos.org' + '/opensearch/description.xml' ,  
    'https://geo.spacebel.be' + '/opensearch/description.xml' ]

url_explain_choices = [ 
    'https://geo.spacebel.be' + '/api?httpAccept=application/sru%2Bxml' ,
    'https://emc.spacebel.be' + '/api?httpAccept=application/sru%2Bxml' ,
    'https://eocat.spacebel.be' + '/api?httpAccept=application/sru%2Bxml' ,
    'https://eocat.esa.int' + '/opensearch/request' ,
    'https://fedeo.ceos.org' + '/opensearch/request' ,
    'https://geo.spacebel.be' + '/opensearch/request' ]

# Verification of SSL certificate is to be set to False for the eocat endpoint to work.
# verify_ssl = False
verify_ssl = True

In [None]:
#:tags: [remove-cell]
def load_dataframe( resp ):
  
  df = pd.DataFrame(columns=['dc:identifier', 'atom:title', 'atom:updated', 'atom:link[rel="search"]', 'atom:link[rel="enclosure"]', 'atom:link[rel="icon"]'])

  rt = ElementTree.fromstring(response.text)
  for r in rt.findall('{http://www.w3.org/2005/Atom}entry'):
     name = r.find('{http://purl.org/dc/elements/1.1/}identifier').text
     title = r.find('{http://www.w3.org/2005/Atom}title').text
     updated = r.find('{http://www.w3.org/2005/Atom}updated').text
     dcdate = r.find('{http://purl.org/dc/elements/1.1/}date').text
     # print('collection',count,'-', name, ':')
     # print('\tidentifier: ',name)
     
     try:
         href = r.find('{http://www.w3.org/2005/Atom}link[@rel="search"][@type="application/opensearchdescription+xml"]').attrib['href']
     except AttributeError:
         href= ''

     try:
         rel_enclosure = r.find('{http://www.w3.org/2005/Atom}link[@rel="enclosure"]').attrib['href']
     except AttributeError:
         rel_enclosure= ''

     try:
         rel_icon = r.find('{http://www.w3.org/2005/Atom}link[@rel="icon"]').attrib['href']
     except AttributeError:
         rel_icon= ''

     # append a row to the df 
     new_row = { 'dc:identifier': name, 'atom:title': title, 'dc:date': dcdate, 'atom:updated': updated, 'atom:link[rel="search"]': href, 
        'atom:link[rel="enclosure"]': rel_enclosure , 'atom:link[rel="icon"]': rel_icon}
     # df = df.append(new_row, ignore_index=True) 
     dfn = pd.DataFrame(new_row, index = [0])
     df = pd.concat([df, dfn], ignore_index=True)

  return df

def load_facet( root, facet ):
  # create dataframe with information about 'facet' found in XML tree with 'root".
  ns = {'sru': 'http://a9.com/-/opensearch/extensions/sru/2.0/'}
  el = root.find('.//sru:facet[sru:index="' + facet + '"]', ns)
  
  df = pd.DataFrame(columns=['name', 'count'])

  for r in el.findall('.//sru:term', ns):
    name = r.find('sru:actualTerm', ns).text
    count = r.find('sru:count', ns).text
    
    # append a row to the df 
    dfn = pd.DataFrame({ 'name': name, 'count': int(count) }, index = [0])
    df = pd.concat([df, dfn], ignore_index=True)

  df.set_index('name', inplace=True)
  return df


def show_features_on_map( georss_box, georss_polygon ):
  # display map showing the 'polygon' and use center of 'box' to center the map.
  list1 = georss_polygon.split()
  list2 = georss_box.split()
  
  center = [50.85, 4.3488]
  
  if (georss_box == '' or georss_polygon=='' ):
      
    m = folium.Map(location=center,zoom_start=5)
    folium.LayerControl().add_to(m)
      
  else:
      
    points = []    
    for i in range(0,len(list1),2):
        # print (list2[i], list2[i+1])
        points = points + [ (float(list1[i]), float(list1[i+1])) ]
      
    # use center of the bounding box.
    try:
         center = [ (float(list2[0])+float(list2[2]))/2.0 , (float(list2[1])+float(list2[3]))/2.0 ]  
    except:
         # default center and zoom factor for an empty map
         zoom = 3
         
    # m = Map(basemap=basemaps.OpenStreetMap.Mapnik, center=center)
    m = folium.Map(location=center,zoom_start=2)
    # polygon = Polygon(locations=points, color="green", fill_color="green")
    folium.Polygon(points).add_to(m)
    folium.FitBounds(points, padding=(100,100)).add_to(m)
    # m.add_layer(polygon)
    folium.LayerControl().add_to(m)
    # m.add_control(FullScreenControl())
    # m.add_control(LayersControl(position='topright'))
  
  return m

In [None]:
def get_api_request(template, os_querystring):
  # Fill (URL) template with OpenSearch parameter values provided in os_querystring and return as short HTTP URL without empty parameters.
  
  print("URL template: " + template)
  
  # Limitation: the OSDD may use a default namespace for OpenSearch instead of using "os".
  # We make a simple correction here allowing to use OpenSearch queryables without namespace in requests.
  # A more generic solution to obtain namespaces from the OSDD and compare them with user supplied namespaces is future work.
  
  OS_NAMESPACE = 'os:'
      
  # perform substitutions in template
  for p in os_querystring:
      print("  .. replacing:", p, "by", os_querystring[p])
      # template = re.sub('\{'+p+'.*?\}', os_querystring[p] , template)
      result = re.subn('\{'+p+'.*?\}', os_querystring[p] , template)
      n = result[1]
      template = result[0]
      if (n<1):
          if (':' in p):
                print("ERROR: parameter " + p + " not found in template.")
          else:
                # try with explicit namespace
                result = re.subn('\{'+OS_NAMESPACE+p+'.*?\}', os_querystring[p] , template)
                n = result[1]
                template = result[0]
                if (n<1):
                    print("ERROR: parameter " + OS_NAMESPACE+p + " not found in template.")   
      
      # print("- intermediate new template:" + template)
      
  # remove empty search parameters
  template=re.sub('&?[a-zA-Z]*=\{.*?\}', '' , template)
  
  # print("- AFTER STEP 1 intermediate new template:" + template)
  
  # remove remaining empty search parameters which did not have an HTTP query parameter attached (e.g. /{time:end}).
  template=re.sub('.?\{.*?\}', '' , template)
  
  print("API request: " + template)
            
  return (template)

The Notebook can be used with a number of different endpoints.  Change the OSDD to be used for collection search by executing the notebook.

In [None]:
list = widgets.Dropdown(options=url_osdd_choices, description="Select OSDD", index=0)
list

In [None]:
# Get the selected OSDD endpoint from the list.
url_osdd = list.value
# select the corresponding url_explain accoridng to the list selection
url_explain = url_explain_choices[list.index]

# remove next line
# url_osdd = url_osdd_choices[1]

url_osdd

##  Collection Search

### Access API Description

In [None]:
md("The OpenSearch Description Document is accessible at the fixed location [{url}]({url}) and contains the URL template to be used for collection search.".format(url=url_osdd))

```{index} single: collection search ; OSDD
```

**Example: 2.1**  
>  Access the API Description in OpenSearch Description Document (OSDD) format.

In [None]:
response = requests.get(url_osdd, verify=bool(verify_ssl) )

xmlstr = minidom.parseString(response.text).toprettyxml(indent='  ',newl='')
md("```xml\n" + xmlstr + "\n```\n")

In [None]:
#:tags: [remove-input]
md("The Explain Document is accessible at the location [{url}]({url}) and contains additional information about the API such as default values, definitions of available record schemas at `/explain/responseFormats` etc.".format(url=url_explain))

```{index} single: OSDD ; relations
```
```{index} single: OSDD ; service (rel)
```

**Example: 2.2**  
>  Extract all URL templates from the OSDD. 

The OSDD may contain URL templates for multiple values for `type` (media types) and multiple values for `rel` (relations).  The `rel` values have the following meaning (if present) as defined in [[RD1]](#RD1). :

| **rel** |  **description**  | 
| ----- | ----- |
| collection |  URL template to be used for collection search.  |
| results |  URL template to be used for granule search (default).  |
| service |  URL template to be used for service or application search.  |

In [None]:
from xml.etree import ElementTree
root = ElementTree.fromstring(response.text)

list = pd.DataFrame(columns=['rel', 'type'])
ns = {'os': 'http://a9.com/-/spec/opensearch/1.1/'}
for r in root.findall('os:Url', ns):  
    rel = ''
    mtype = ''
    try:
        rel   = r.attrib['rel']
        mtype = r.attrib['type']
    except:
        pass
    df = pd.DataFrame({ 'rel': rel, 'type': mtype }, index = [0])
    list = pd.concat([list, df], ignore_index=True)  
  
list     


Examples of responses for all media types listed above can be found in the [OGC API Features response formats](all-response-formats) section of the documentation.  When selecting the OSDD URL template for a corresponding media type, its value will be prefilled in the template to provide the correct OGC API Features request where only OpenSearch parameters have to be inserted still.  The remainder of the current document focusses on the Atom response format.

```{index} single: response format ; Atom (collection)
```
```{index} single: collection search ; Atom (response format)
```
```{index} single: OSDD ; collection (rel)
```

<a name='selecting-mediatype-in-osdd'></a>   
**Example: 2.3**  
>  Extract collection search URL template for Atom  

Extract the URL template for collection search `rel="collection"` corresponding to the media type `type="application/atom+xml"` of the search result.

In [None]:
from xml.etree import ElementTree
root = ElementTree.fromstring(response.text)

ns = {'os': 'http://a9.com/-/spec/opensearch/1.1/'}
collection_url_atom = root.find('os:Url[@rel="collection"][@type="application/atom+xml"]', ns)

collection_template = collection_url_atom.attrib['template']
collection_template

### Search by free text

```{index} single: collection search ; searchTerms
```

**Example: 2.4**  
>  Search collections by free text {searchTerms}

In [None]:
osquerystring = {}
osquerystring['count'] = '1'
osquerystring['searchTerms'] = 'forestry'

request_url = get_api_request(collection_template, osquerystring)

response = requests.get(request_url, verify=bool(verify_ssl))
xmlstr = minidom.parseString(response.text).toprettyxml(indent='   ', newl='')
md("```xml\n" + xmlstr + "\n```\n")

<a name='Collection-Search-by-Title'></a>     
### Search by title

```{index} single: collection search ; dc:title
```

**Example: 2.5**  
>  Search collections by title {dc:title}

In [None]:
osquerystring = {}
osquerystring['dc:title'] = 'Column'

request_url = get_api_request(collection_template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

rt = ElementTree.fromstring(response.text)
# Get title of all entries in result page
for r in rt.findall('{http://www.w3.org/2005/Atom}entry'):
     title = r.find('{http://www.w3.org/2005/Atom}title').text
     print("title: ", title)

<a name='Collection-Search-by-Platform'></a>     
### Search by platform

The `<url>` element contains additional information about the parameters available in the template using the OpenSearch Parameter extension syntax.  For example, the `eo:platform` parameter provides the following additional information, including the list of possible values for which the server can provide results.

**Example: 2.7**  
>  Extract available values for the `eo:platform` parameter from the OSDD.

In [None]:
# Extract <Parameter> element for eo:platform
el = collection_url_atom.find('{http://a9.com/-/spec/opensearch/extensions/parameters/1.0/}Parameter[@value="{eo:platform}"]')

# el2 = ElementTree.indent(el)
# https://docs.python.org/3/library/xml.etree.elementtree.html
xmltxt = ElementTree.tostring(el, encoding='unicode', method='xml')
md("```xml\n" + xmltxt + "\n```\n")

# todo: add output scrolling tag to the cell output tag: "output_scroll"
# insert Markdown string in an HTML frame with scrollbar ?

```{index} single: collection search ; eo:platform
```

**Example: 2.8**  
>  Search collections by platform {eo:platform} [[RD3]](#RD3). 

Search parameters which are optional can be skipped in the search template or their value can be left empty.
Prepare a search request by replacing all mandatory search parameters with a value.  

By default, each `<atom:entry>` represents one search result (an EO collection) and the search response contains faceted search results under the element `<sru:facetedResults>`.  The faceted search information groups the number of results by `platform`, by `instrument`, by `organisation` etc.  The original metadata for the collection is not embedded in the response but available as an `atom:link`.

In [None]:
osquerystring = {}
osquerystring['eo:platform'] = 'proba-1'
osquerystring['count'] = '2'

request_url = get_api_request(collection_template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

xmlstr = minidom.parseString(response.text).toprettyxml(indent='   ', newl='')
md("```xml\n" + xmlstr + "\n```\n")

The above response indicates the number of results (`totalResults`) and contains information about a number of collections including a link to the OSDD document to use for granule search.  If more than TBD records are found, then the results are returned in pages and paging links are included to navigate to the next results (`rel=next`).

In [None]:
root = ElementTree.fromstring(response.text)

# extract total results
el = root.find('{http://a9.com/-/spec/opensearch/1.1/}totalResults')
print('totalResults: ', el.text)

dataframe = load_dataframe(response)
dataframe.head(20)

url_osdd_granules = dataframe.iat[0,3]
print(url_osdd_granules)

```{index} single: collection search ; sru:recordSchema
```
```{index} single: collection search ; server-choice
```

**Example: 2.9**  
>  Obtain allowed values for {sru:recordSchema} from the OSDD.   

 
The OSDD template lists the `sur:recordSchema` values that can be used in a collection search request.  They correspond to metadata formats that can be directly embedded in the Search response.  The value `server-choice` can be used to allow the server to propose an appropriate metadata encoding.

The shortnames used for some of the recordSchemas are for backward compatibility and they are explained in the Explain document.

In [None]:
# Extract <Parameter> element for sru:recordSchema
el = collection_url_atom.find('{http://a9.com/-/spec/opensearch/extensions/parameters/1.0/}Parameter[@value="{sru:recordSchema}"]')
xmltxt = ElementTree.tostring(el, encoding='unicode', method='xml')
md("```xml\n" + xmltxt + "\n```\n")

```{index} single: collection search ; sru:facetLimit
```

**Example: 2.10**  
>  Use of parameters {sru:recordSchema} and {sru:facetLimit}.   


The following request is similar to the one above, but requests to embed the complete collection metadata inside the search response (`recordSchema=server-choice`). The request disables the faceted search information (`facetLimit=0`).

In [None]:
osquerystring = {}
osquerystring['eo:platform'] = 'proba-1'
osquerystring['sru:recordSchema'] = 'server-choice'
osquerystring['sru:facetLimit'] = '0'

request_url = get_api_request(collection_template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

xmlstr = minidom.parseString(response.text).toprettyxml(indent='   ',newl='')
md("```xml\n" + xmlstr + "\n```\n")

<a name='Collection-Search-by-Instrument'></a>     
### Search by instrument

```{index} single: collection search ; eo:instrument
```

**Example: 2.11**  
>  Search collections by instrument {eo:instrument} [[RD3]](#RD3).

In [None]:
osquerystring = {}
osquerystring['eo:instrument'] = 'SAR'

request_url = get_api_request(collection_template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

rt = ElementTree.fromstring(response.text)
# Get title of all entries in result page
for r in rt.findall('{http://www.w3.org/2005/Atom}entry'):
     title = r.find('{http://www.w3.org/2005/Atom}title').text
     print("title: ", title)

<a name='Collection-Search-by-Organisation'></a>     
### Search by organisation

```{index} single: collection search ; eo:organisationName
```

**Example: 2.12**  
>  Search collections by organisation {eo:organisationName} [[RD3]](#RD3).

In [None]:
osquerystring = {}
osquerystring['eo:organisationName'] = 'ESA/ESRIN'

request_url = get_api_request(collection_template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

rt = ElementTree.fromstring(response.text)
# Get title of all entries in result page
for r in rt.findall('{http://www.w3.org/2005/Atom}entry'):
     title = r.find('{http://www.w3.org/2005/Atom}title').text
     print("title: ", title)

<a name='Collection-Search-by-Identifier'></a>     
### Search by identifier

```{index} single: collection search ; geo:uid
```

**Example: 2.13**  
>  Search collections by identifier {geo:uid} [[RD2]](#RD2). 

The `geo:uid` optionally combined with a subcatalogue identifier `eo:parentIdentifier` allows retrieving collection metadata for a specific collection.

In [None]:
osquerystring = {}
osquerystring['geo:uid'] = 'PROBA.HRC.1A' 

request_url = get_api_request(collection_template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

xmlstr = minidom.parseString(response.text).toprettyxml(indent='   ', newl='')
md("```xml\n" + xmlstr + "\n```\n")

### Search by concept

```{index} single: collection search ; semantic:classifiedAs
```

**Example: 2.14**  
>  Search collections by concept URI {semantic:classifiedAs}  

Collection metadata includes platform, instrument and science keywords, including the URI of these concepts expressed in the ESA Thesauri (https://thesauri.spacebel.be/) and NASA GCMD thesauri.  The URI of these concepts can be used as search parameter.  

In the current version of the software, the following concept URIs are supported:

* GCMD thesaurus science keyword URI
* ESA thesaurus platform URI
* ESA thesaurus instrument URI

Future versions of the software derived from the EOVOC developments may support both GCMD and ESA URIs for all three categories in addition to GEMET, INSPIRE Themes, Dbpedia, Wikidata and other URI.

In [None]:
osquerystring = {}
# Proba-1 concept in ESA thesaurus
osquerystring['semantic:classifiedAs'] = 'https://earth.esa.int/concept/b3979ff2-d27d-5f22-9e06-a18c5759d9a5'

request_url = get_api_request(collection_template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

dataframe = load_dataframe(response)
dataframe.head(20)

## Collection properties

In [None]:
rt = ElementTree.fromstring(response.text)
r = rt.find('{http://www.w3.org/2005/Atom}entry')  # return first entry

### Collection geometry

```{index} single: collection properties ; geometry
```
```{index} single: GeoRSS ; georss:box
```
```{index} single: GeoRSS ; georss:polygon
```
Geometry information for each collection is included in the Atom entry using GeoRSS response elements.

In [None]:
try:
    box = r.find('{http://www.georss.org/georss}box').text
except AttributeError:
    box= ''

try:
    polygon = r.find('{http://www.georss.org/georss}polygon').text
except AttributeError:
    polygon= ''

print("georss:box:", box )
print("georss:polygon:", polygon )

In [None]:
#:tags: [remove-input]
show_features_on_map(box, polygon)

```{index} single: collection properties ; temporal extent
```
### Collection temporal extent

The `<dc:date>` response element provides temporal information for a collection, i.e. the start time and end time separated by a `/`, encoded as per [RFC-3339](https://www.rfc-editor.org/rfc/rfc3339.txt).  The end time may be absent indicating that the collection is not completed.

In [None]:
try:
    date = r.find('{http://purl.org/dc/elements/1.1/}date').text
except AttributeError:
    date= ''

date

```{index} single: collection properties ; identifier
```
### Collection identifier

The `<dc:identifier>` response element includes the idenfifier of the collection that can be used as value for the `geo:uid` search parameter.

In [None]:
try:
    id = r.find('{http://purl.org/dc/elements/1.1/}identifier').text
except AttributeError:
    id= ''

id

```{index} single: collection properties ; keywords
```
### Collection keywords

The optional `<atom:category>` response elements provide keywords related to the collection.  Keywords can be free text keywords or originate from a controlled thesaurus.  The `term` attribute is used to hold the full concept URI (if available) as per [[RD10]](#RD10).  
When keywords provide a concept URI, then this URI can be used to search for collections by concept with the `semantic:classifiedAs` search parameter.

In [None]:
# build table with extracted keywords
list = pd.DataFrame(columns=['label', 'term'])
for lnk in r.findall('{http://www.w3.org/2005/Atom}category'):
    label = ''
    term = ''
    try:
        label = lnk.attrib['label']
        term = lnk.attrib['term']
    except:
        pass
    # list = list.append( { 'label': label, 'term': term }, ignore_index=True )
    df = pd.DataFrame({ 'label': label, 'term': term }, index = [0])
    list = pd.concat([list, df], ignore_index=True)

list

```{index} single: collection properties ; alternate (atom:link)
```
### Collection other representations

Alternative metadata formats for the collection represented by the Atom entry are available as `<atom:link>` with `rel="alternate"`.  Different servers may advertize different metadata formats.

In [None]:
# build table with rel=alternate links
altList = pd.DataFrame(columns=['title', 'type', 'href'])
for lnk in r.findall('{http://www.w3.org/2005/Atom}link[@rel="alternate"]'):
    df = pd.DataFrame({ 'type': lnk.attrib['type'], 'title': lnk.attrib['title'], 'href': lnk.attrib['href'] }, index = [0])
    altList = pd.concat([altList, df], ignore_index=True)  

#HTML(altList.to_html(render_links=True, escape=False))
altList

### Collection embedding other formats

Alternative metadata formats for the colection provide additional metadata properties and can be directly embedded in the Atom entry using the `sru:recordSchema` parameter.  The ISO19139 and ISO19139-2 formats provide the most detailed representations.

**Example: 3.1**  
>  Get list of supported record schemas {sru:recordSchema} for collections from the OSDD.   

 
The OSDD template lists the `sur:recordSchema` values that can be used in a collection search request.  They correspond to metadata formats that can be directly embedded in the Search response.  The value `server-choice` can be used to allow the server to propose an appropriate metadata encoding.  typically, a short name and a URI can be used for each of the formats.

In [None]:
# Extract corresponding <Parameter> element
el = collection_url_atom.find('{http://a9.com/-/spec/opensearch/extensions/parameters/1.0/}Parameter[@value="{sru:recordSchema}"]')
xmltxt = ElementTree.tostring(el, encoding='unicode', method='xml')
md("```xml\n" + xmltxt + "\n```\n")

```{index} single: collection metadata ; ISO19139-2
```

**Example: 3.2**  
>  Embed ISO19139-2 metadata `iso19139-2` in collection search response {sru:recordSchema} [[RD8]](#RD8).   

The additional properties are included in an `<gmi:MI_Metadata>` element inside the `<atom:entry>`.

In [None]:
osquerystring = {}
osquerystring['geo:uid'] = 'PROBA.HRC.1A' 
osquerystring['sru:recordSchema'] = 'iso19139-2'

request_url = get_api_request(collection_template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

rt = ElementTree.fromstring(response.text)
r = rt.find('{http://www.w3.org/2005/Atom}entry')  # return first entry

try:
    el = r.find('{*}MI_Metadata')
    xmltxt = ElementTree.tostring(el, encoding='unicode', method='xml')
except AttributeError:
    xmltxt= 'Not found.'

md("```xml\n" + xmltxt + "\n```\n")

```{index} single: collection metadata ; ISO19139
```

**Example: 3.3**  
>  Embed ISO19139 metadata `iso19139` in collection search response {sru:recordSchema} [[RD8]](#RD8).   

The additional properties are included in an `<gmd:MD_Metadata>` element inside the `<atom:entry>`.

In [None]:
osquerystring = {}
osquerystring['geo:uid'] = 'PROBA.HRC.1A' 
osquerystring['sru:recordSchema'] = 'iso19139'

request_url = get_api_request(collection_template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

rt = ElementTree.fromstring(response.text)
r = rt.find('{http://www.w3.org/2005/Atom}entry')  # return first entry

try:
    el = r.find('{*}MD_Metadata')
    xmltxt = ElementTree.tostring(el, encoding='unicode', method='xml')
except AttributeError:
    xmltxt= 'Not found.'

md("```xml\n" + xmltxt + "\n```\n")

```{index} single: collection metadata ; DIF-10
```

**Example: 3.4**  
>  Embed DIF10 metadata `dif10` in collection search response {sru:recordSchema} [[RD8]](#RD8).   

The additional properties are included in an `<DIF>` element inside the `<atom:entry>`.

In [None]:
osquerystring = {}
osquerystring['geo:uid'] = 'PROBA.HRC.1A' 
osquerystring['sru:recordSchema'] = 'dif10'

request_url = get_api_request(collection_template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

rt = ElementTree.fromstring(response.text)
r = rt.find('{http://www.w3.org/2005/Atom}entry')  # return first entry

try:
    el = r.find('{*}DIF')
    xmltxt = ElementTree.tostring(el, encoding='unicode', method='xml')
except AttributeError:
    xmltxt= 'Not found.'

md("```xml\n" + xmltxt + "\n```\n")

```{index} single: collection metadata ; Dublin Core
```

**Example: 3.5**  
>  Embed Dublin core metadata `dc` in collection search response {sru:recordSchema} [[RD8]](#RD8).   

The additional properties are included in an `<srw_dc:dc>` element inside the `<atom:entry>`.

In [None]:
osquerystring = {}
osquerystring['geo:uid'] = 'PROBA.HRC.1A' 
osquerystring['sru:recordSchema'] = 'dc'

request_url = get_api_request(collection_template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

rt = ElementTree.fromstring(response.text)
r = rt.find('{http://www.w3.org/2005/Atom}entry')  # return first entry

try:
    el = r.find('{*}dc')
    xmltxt = ElementTree.tostring(el, encoding='unicode', method='xml')
except AttributeError:
    xmltxt= 'Not found.'

md("```xml\n" + xmltxt + "\n```\n")

```{index} single: collection properties ; describedby (atom:link)
```
### Collection related documentation

Collections can optionaly provide access to related documentation via an `<atom:link>` with `rel="describedby"`.

In [None]:
# build table with rel=describedby links
relList = pd.DataFrame(columns=['title', 'type', 'href'])
for lnk in r.findall('{http://www.w3.org/2005/Atom}link[@rel="describedby"]'):
    df = pd.DataFrame({ 'type': lnk.attrib['type'], 'title': lnk.attrib['title'], 'href': lnk.attrib['href'] }, index = [0])
    relList = pd.concat([relList, df], ignore_index=True) 

relList

## Granule Search

<a name='granule-access-api-description'></a>    
### Access API Description

In [None]:
#:tags: [remove-input]
md("The OpenSearch Description Document is accessible at the location {} which is extracted from the collection search response and contains the URL template to be used for granule search.".format(url_osdd_granules))

```{index} single: response format ; Atom (granule)
```
```{index} single: granule search ; OSDD
```
```{index} single: granule search ; Atom (response format)
```
```{index} single: OSDD ; results (rel)
```

**Example: 4.1**  
>  Obtain API Description (OSDD) for the collection via the URL found in collection search response.

In [None]:
response = requests.get(url_osdd_granules, verify=bool(verify_ssl), headers={'Accept': 'application/opensearchdescription+xml'})

xmlstr = minidom.parseString(response.text).toprettyxml(indent='  ', newl='')
md("```xml\n" + xmlstr + "\n```\n")

Extract the URL template for collection search `rel="results"` corresponding to the media type `type="application/atom+xml"` of the search result.

In [None]:
root = ElementTree.fromstring(response.text)

granules_url_atom = root.find('{http://a9.com/-/spec/opensearch/1.1/}Url[@rel="results"][@type="application/atom+xml"]')

template = granules_url_atom.attrib['template']
template

<a name='granule-find-available-record-schemas'></a>    
### Find available record schemas

```{index} single: granule search ; sru:recordSchema
```
```{index} single: granule search ; server-choice
```

**Example: 4.2**  
>  Obtain allowed values for {sru:recordSchema} from the OSDD.   

 
The OSDD template lists the `sru:recordSchema` values that can be used in a granule search request.  They correspond to metadata formats that can be directly embedded in the Search response.  The value `server-choice` can be used to allow the server to propose an appropriate metadata encoding.

In [None]:
# Extract <Parameter> element for sru:recordSchema
el = granules_url_atom.find('{http://a9.com/-/spec/opensearch/extensions/parameters/1.0/}Parameter[@value="{sru:recordSchema}"]')
xmltxt = ElementTree.tostring(el, encoding='unicode', method='xml')
md("```xml\n" + xmltxt + "\n```\n")

<a name='granule-search-by-bounding-box'></a>    
### Search by bounding box

```{index} single: granule search ; geo:box
```

**Example: 4.3**  
>  Search granules by bounding box {geo:box} [[RD2]](#RD2).

Search parameters which are optional in the search template (end with a "?") can be omitted or their value can be left empty.
Prepare a search request by replacing all mandatory search parameters with a value.  

By default, each `<atom:entry>` represents one search result (an EO granule).  The original metadata for the granule is not embedded in the response but available as an `atom:link`.  It can be embedded by using the `sru:recordSchema` search parameter.

In [None]:
osquerystring = {}
osquerystring['count'] = '3'
osquerystring['geo:box'] = '14.90,37.700,14.99,37.780' 

request_url = get_api_request(template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

root = ElementTree.fromstring(response.text)

# extract total results
el = root.find('{http://a9.com/-/spec/opensearch/1.1/}totalResults')
print('totalResults: ', el.text)

dataframe = load_dataframe(response)
dataframe.head(20)

In [None]:
# Table with clickable download and quicklook hyperlinks.
HTML(dataframe.transpose(copy=True).to_html(render_links=True, escape=False))

<a name='granule-search-by-geometry'></a>    
### Search by geometry

Collections may advertise the availability of the optional `geo:geometry` [[RD2]](#RD2) search parameter in the collection OSDD.

```{index} single: granule search ; geo:geometry
```
```{index} single: geo:geometry ; WKT
```

**Example: 4.4**  
>  Obtain profiles for {geo:geometry} from the OSDD.   

 
If the parameter is supported for the collection, then the OSDD template identifies one or more profiles of the `geo:geometry` values that can be used in a granule search request.  Possible profiles include searches by `point`, `linestring`, `multipoint`, `multilinestring` or `polygon`.  In all cases, the geometry value is to be provided in Well-Known Text [(WKT)](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry) format.

In [None]:
# Extract <Parameter> element for geo:geometry
el = granules_url_atom.find('{http://a9.com/-/spec/opensearch/extensions/parameters/1.0/}Parameter[@value="{geo:geometry}"]')
xmltxt = ElementTree.tostring(el, encoding='unicode', method='xml')
md("```xml\n" + xmltxt + "\n```\n")

```{index} single: geo:geometry ; POLYGON
```

**Example: 4.5**  
>  Search granules by polygon geometry {geo:geometry} [[RD2]](#RD2).

In [None]:
osquerystring = {}
osquerystring['count'] = '3'
osquerystring['geo:geometry'] = 'POLYGON((14.90 37.700, 14.90 37.780, 14.99 37.780, 14.99 37.700, 14.90 37.700))' 

request_url = get_api_request(template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

root = ElementTree.fromstring(response.text)

# extract total results
el = root.find('{http://a9.com/-/spec/opensearch/1.1/}totalResults')
print('totalResults: ', el.text)

dataframe = load_dataframe(response)
# dataframe.head(20)
HTML(dataframe.transpose(copy=True).to_html(render_links=True, escape=False))

```{index} single: geo:geometry ; POINT
```

**Example: 4.6**  
>  Search granules by point geometry {geo:geometry} [[RD2]](#RD2).

In [None]:
osquerystring = {}
osquerystring['count'] = '3'
# Port of Antwerp
osquerystring['geo:geometry'] = 'POINT(4.38 51.25)' 

request_url = get_api_request(template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

root = ElementTree.fromstring(response.text)

# extract total results
el = root.find('{http://a9.com/-/spec/opensearch/1.1/}totalResults')
print('totalResults: ', el.text)

dataframe = load_dataframe(response)
# dataframe.head(20)
HTML(dataframe.transpose(copy=True).to_html(render_links=True, escape=False))

<a name='granule-search-by-temporal-extent'></a>    
### Search by temporal extent

```{index} single: granule search ; time:start
```
```{index} single: granule search ; time:end
```

**Example: 4.6**  
>  Search granules by temporal extent {time:start} and {time:end} [[RD2]](#RD2).  Each granule has an acquisition start time and end time.  A granule is returned if the intersection of the temporal search interval with the acquisition start/end interval is not empty.

In [None]:
osquerystring = {}
osquerystring['count'] = '3'
osquerystring['geo:box'] = '14.90,37.700,14.99,37.780' 
osquerystring['time:start'] = '2018-01-01T00:00:00Z'
osquerystring['time:end'] = '2021-03-31T23:59:59Z'

request_url = get_api_request(template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

# xmlstr = minidom.parseString(response.text).toprettyxml(indent='   ')
# print(xmlstr)

root = ElementTree.fromstring(response.text)
# extract total results
el = root.find('{http://a9.com/-/spec/opensearch/1.1/}totalResults')
print('totalResults: ', el.text)

dataframe = load_dataframe(response)
# dataframe.head(20)
HTML(dataframe.transpose(copy=True).to_html(render_links=True, escape=False))

<a name='granule-search-by-identifier'></a>     
### Search by identifier

```{index} single: granule search ; geo:uid
```

**Example: 4.7**  
>  Search granules by identifier {geo:uid} [[RD2]](#RD2). 

The `geo:uid` combined with the collection identifier `eo:parentIdentifier` (already prefilled in the OSDD template extracted from the collection search response) allows retrieving granule metadata for a specific granule.  Use an identifier extracted from the previous search response.

In [None]:
identifier = dataframe.loc[0]['dc:identifier']

osquerystring = {}
osquerystring['geo:uid'] = identifier  # 'PR1_OPER_HRC_HRC_1P_20210401T131711_N37-069_E014-099_0001' 

request_url = get_api_request(template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

xmlstr = minidom.parseString(response.text).toprettyxml(indent='   ', newl='')
md("```xml\n" + xmlstr + "\n```\n")

 
### Search by acquisition parameters

```{index} single: granule search ; acquisition parameters
```
```{index} single: granule search ; eo:illuminationElevationAngle
```

**Example: 4.8**  
>  Search granules by illumination angles {eo:illuminationElevationAngle}, {eo:illuminationAzimuthAngle} [[RD3]](#RD3). 

The `eo:illuminationElevationAngle` and `{eo:illuminationAzimuthAngle}` search parameters allow filtering results by illumination angles.  An interval specifying minimum and maximum allowed values is to be provided, e.g. `[48,50]`.  Only providing the minimum or maximum value can be done by using an open interval, e.g. `[48` or `50]`.

Other acquisition parameters can be advertised as searchable in the collection OSDD, depending on the sensor type e.g.:

- eo:orbitNumber
- eo:orbitDirection
- eo:frame
- eo:track
- etc.


In [None]:
osquerystring = {}
osquerystring['count'] = '3'
osquerystring['geo:box'] = '14.90,37.700,14.99,37.780' 
osquerystring['eo:illuminationElevationAngle'] = '[10,55]'
osquerystring['sru:recordSchema'] = 'server-choice'

request_url = get_api_request(template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

rt = ElementTree.fromstring(response.text)
el = rt.find('{http://a9.com/-/spec/opensearch/1.1/}totalResults')
print('totalResults: ', el.text)

for r in rt.findall('{http://www.w3.org/2005/Atom}entry'): 
    name = r.find('{http://purl.org/dc/elements/1.1/}identifier').text
    try:
        el = r.find('{*}EarthObservation/{*}procedure/{*}EarthObservationEquipment/{*}acquisitionParameters/{*}Acquisition/{*}illuminationElevationAngle')
        value = el.text
        print(name, ':\teop:illuminationElevationAngle =', value) 
    except:
        pass


## Granule properties

In [None]:
# get the first entry (granule) from the previous search results;
rt = ElementTree.fromstring(response.text)
r = rt.find('{http://www.w3.org/2005/Atom}entry')  

```{index} single: granule properties ; geometry
```
### Geometry

Geometry information for each granule is included in the Atom entry using GeoRSS encoding.

In [None]:
try:
    box = r.find('{http://www.georss.org/georss}box').text
except AttributeError:
    box= ''

try:
    polygon = r.find('{http://www.georss.org/georss}polygon').text
except AttributeError:
    polygon= ''

print("georss:box:", box )
print("georss:polygon:", polygon )

In [None]:
#:tags: [remove-input]
show_features_on_map(box, polygon)

```{index} single: granule properties ; temporal extent
```
### Temporal extent

The `<dc:date>` response element provides temporal information for each granule, i.e. the acquisition start time and end time, encoded as per [RFC-3339](https://www.rfc-editor.org/rfc/rfc3339.txt).

In [None]:
try:
    date = r.find('{http://purl.org/dc/elements/1.1/}date').text
except AttributeError:
    date= ''

date

```{index} single: granule properties ; identifier
```
### Granule identifier

The `<dc:identifier>` response element includes the idenfifier of the granule that can be used as value for the `geo:uid` search parameter.

In [None]:
try:
    id = r.find('{http://purl.org/dc/elements/1.1/}identifier').text
except AttributeError:
    id= ''

id

```{index} single: granule properties ; quicklook
```
```{index} single: granule properties ; icon (atom:link)
```
### Quicklook

The `atom:link` with rel="icon" can optionally provide access to a quicklook or browse image.

In [None]:
try:
    href = r.find('{http://www.w3.org/2005/Atom}link[@rel="icon"]').attrib['href']
except AttributeError:
    href= ''

print("Quicklook URL:", href )
Image(url=href, width=200, height=200)

```{index} single: granule properties ; download URL
```
```{index} single: granule properties ; enclosure (atom:link)
```
### Granule download

The `atom:link` with rel="enclosure" provides access to the granule as a file download URL (if available).

In [None]:
try:
    href = r.find('{http://www.w3.org/2005/Atom}link[@rel="enclosure"]').attrib['href']
except AttributeError:
    href= ''
    
print("Granule download URL:", href )

### Other representations

Alternative metadata formats for the granule represented by the Atom entry are available as Atom links with rel="alternate".  Different servers may advertize different metadata formats.

In [None]:
# Present list of alternate links in table

altList = pd.DataFrame(columns=['title', 'type', 'href'])
for lnk in r.findall('{http://www.w3.org/2005/Atom}link[@rel="alternate"]'):
    df = pd.DataFrame({ 'type': lnk.attrib['type'], 'title': lnk.attrib['title'], 'href': lnk.attrib['href'] }, index = [0])
    altList = pd.concat([altList, df], ignore_index=True) 

HTML(altList.to_html(render_links=True, escape=False))

### Embedding other formats

Alternative metadata formats for the granule provide additional metadata properties and can be directly embedded in the Atom entry using the `sru:recordSchema` parameter.  The O&M format (OGC 10-157r4) provides the most detailed representation.

**Example: 5.1**  
>  Get list of supported record schemas {sru:recordSchema} from the OSDD.   

 
The OSDD template for the collection lists the `sur:recordSchema` values that can be used in a granule search request.  They correspond to metadata formats that can be directly embedded in the Search response.  The value `server-choice` can be used to allow the server to propose an appropriate metadata encoding.

In [None]:
# Extract corresponding <Parameter> element
el = granules_url_atom.find('{http://a9.com/-/spec/opensearch/extensions/parameters/1.0/}Parameter[@value="{sru:recordSchema}"]')
xmltxt = ElementTree.tostring(el, encoding='unicode', method='xml')
md("```xml\n" + xmltxt + "\n```\n")

```{index} single: granule metadata ; OGC 10-157r4
```

**Example: 5.2**  
>  Embed O&M metadata in search response {sru:recordSchema} [[RD8]](#RD8).   

The additional properties are included in an `<eop:EarthObservation>` element inside the `<atom:entry>`.  Depending on the type of observation, the `EarthObservation` element may be in the `eop` (General), `opt` (Optical), `sar` (Radar), `atm` (Atmospheric) or other namespace defined by OGC 10-157r4.

In [None]:
osquerystring = {}
osquerystring['count'] = '1'
osquerystring['geo:box'] = '14.90,37.700,14.99,37.780' 
# osquerystring['sru:sortKeys'] = 'start,time,1'
osquerystring['sru:recordSchema'] = 'om'

request_url = get_api_request(template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

rt = ElementTree.fromstring(response.text)
r = rt.find('{http://www.w3.org/2005/Atom}entry')  # return first entry

try:
    el = r.find('{*}EarthObservation')
    # ElementTree.indent(el, space="\t", level=0)
    xmltxt = ElementTree.tostring(el, encoding='unicode', method='xml')
except AttributeError:
    xmltxt= 'Not found.'

md("```xml\n" + xmltxt + "\n```\n")

<a name='Advanced-Topics'></a>  
## Advanced topics

```{index} single: paging
```
### Result paging

Collection and granule search results are provided in pages.  Search responses contain `atom:link` navigation links providing access to the `first`, `last`, `previous` or `next` result pages.

```{index} single: search ; count
```
```{index} single: response element ; first (atom:link)
```
```{index} single: response element ; last (atom:link)
```
```{index} single: response element ; previous (atom:link)
```
```{index} single: response element ; next (atom:link)
```

**Example: 6.1**  
>  Obtain navigation links from search response.

In [None]:
osquerystring = {}
osquerystring['count'] = '10'

request_url = get_api_request(template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

root = ElementTree.fromstring(response.text)
# extract total results
el = root.find('{http://a9.com/-/spec/opensearch/1.1/}totalResults')
print('totalResults: ', el.text)

# Present list of navigation links in table
relList = pd.DataFrame(columns=['rel', 'href'])
for lnk in root.findall('{http://www.w3.org/2005/Atom}link[@type="application/atom+xml"]'):
    if lnk.attrib['rel'] in  ['first', 'last', 'prev', 'next']:
      df = pd.DataFrame({ 'rel': lnk.attrib['rel'], 'href': lnk.attrib['href'] }, index = [0])
      relList = pd.concat([relList, df], ignore_index=True)

HTML(relList.to_html(render_links=True, escape=False))

```{index} single: search ; startIndex
```
```{index} single: response element ; totalResults (Atom)
```
```{index} single: response element ; itemsPerPage (Atom)
```
```{index} single: response element ; startIndex (Atom)
```

**Example: 6.2**  
>  Traverse search results using `{startIndex}`.

The search response includes information about the `totalResults`, `itemsPerPage` and `startIndex`.

```{warning}
The use of large startIndex values is discouraged as the backend implementation is not optimized for larger values.  The implementation may return an HTTP error code when the value exceeds the maximum value allowed.  As a work around, you can narrow down your search by applying (additional) temporal or geographical search parameters thereby avoiding having to navigate very large result sets.
```

In [None]:
osquerystring = {}
osquerystring['count'] = '10'
osquerystring['startIndex'] = '11'

request_url = get_api_request(template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

root = ElementTree.fromstring(response.text)

el = root.find('{http://a9.com/-/spec/opensearch/1.1/}totalResults')
print('totalResults: ', el.text)
el = root.find('{http://a9.com/-/spec/opensearch/1.1/}itemsPerPage')
print('itemsPerPage: ', el.text)
el = root.find('{http://a9.com/-/spec/opensearch/1.1/}startIndex')
print('startIndex: ', el.text)

# Present list of navigation links in table
relList = pd.DataFrame(columns=['rel', 'href'])
for lnk in root.findall('{http://www.w3.org/2005/Atom}link[@type="application/atom+xml"]'):
    if lnk.attrib['rel'] in  ['first', 'last', 'previous', 'next', 'prev']:
      # relList = relList.append( {  'rel': lnk.attrib['rel'], 'href': lnk.attrib['href'] }, ignore_index=True )
      df = pd.DataFrame({  'rel': lnk.attrib['rel'], 'href': lnk.attrib['href'] }, index = [0])
      relList = pd.concat([relList, df], ignore_index=True)

HTML(relList.to_html(render_links=True, escape=False))

```{index} single: collection search ; startPage
```

**Example: 6.3**  
>  Traverse collection search results using `{startPage}`.

`{startPage}` can be used as an alternative for `{startIndex}`.

In [None]:
osquerystring = {}
osquerystring['count'] = '10'
osquerystring['startPage'] = '2'

request_url = get_api_request(collection_template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

root = ElementTree.fromstring(response.text)

# xmltxt = ElementTree.tostring(root, encoding='unicode', method='xml')
# md("```xml\n" + xmltxt + "\n```\n")

el = root.find('{http://a9.com/-/spec/opensearch/1.1/}totalResults')
print('totalResults: ', el.text)
el = root.find('{http://a9.com/-/spec/opensearch/1.1/}itemsPerPage')
print('itemsPerPage: ', el.text)
el = root.find('{http://a9.com/-/spec/opensearch/1.1/}startPage')
print('startPage: ', el.text)

# Present list of navigation links in table
relList = pd.DataFrame(columns=['rel', 'type', 'href'])
for lnk in root.findall('{http://www.w3.org/2005/Atom}link[@type="application/atom+xml"]'):
    if lnk.attrib['rel'] in  ['first', 'last', 'previous', 'next', 'prev']:
      df = pd.DataFrame({ 'type': lnk.attrib['type'], 'rel': lnk.attrib['rel'], 'href': lnk.attrib['href'] }, index = [0])
      relList = pd.concat([relList, df], ignore_index=True)

HTML(relList.to_html(render_links=True, escape=False))

```{index} single: granule search ; startPage
```

**Example: 6.4**  
>  Traverse granule search results using `{startPage}`.

`{startPage}` can be used as an alternative for `{startIndex}`.

```{note}
As for all search parameters, `{startPage}` is only available when it is advertised in the corresponding OSDD document for the collection.
```

In [None]:
osquerystring = {}
osquerystring['count'] = '10'
osquerystring['startPage'] = '2'

request_url = get_api_request(template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

root = ElementTree.fromstring(response.text)

xmltxt = ElementTree.tostring(root, encoding='unicode', method='xml')
md("```xml\n" + xmltxt + "\n```\n")

In [None]:
try: 
  el = root.find('{http://a9.com/-/spec/opensearch/1.1/}totalResults')
  print('totalResults: ', el.text)
  el = root.find('{http://a9.com/-/spec/opensearch/1.1/}itemsPerPage')
  print('itemsPerPage: ', el.text)
  el = root.find('{http://a9.com/-/spec/opensearch/1.1/}startPage')
  print('startPage: ', el.text)
except:
  print('invalid response')
  
# Present list of navigation links in table
relList = pd.DataFrame(columns=['rel', 'type', 'href'])
for lnk in root.findall('{http://www.w3.org/2005/Atom}link[@type="application/atom+xml"]'):
    if lnk.attrib['rel'] in  ['first', 'last', 'previous', 'next', 'prev']:
      df = pd.DataFrame({ 'type': lnk.attrib['type'], 'rel': lnk.attrib['rel'], 'href': lnk.attrib['href'] }, index = [0])
      relList = pd.concat([relList, df], ignore_index=True)

HTML(relList.to_html(render_links=True, escape=False))

```{index} single: sorting
```
### Sorting results

Sorting of search results is available for collection, service and granule searches, but the available criteria differ per resource type.  

```{index} single: granule search ; sru:sortKeys
```

**Example: 6.5**  
>  Obtain supported (granule) sorting criteria {sru:sortKeys} [[RD8]](#RD8) from the OSDD.   

The OSDD template lists the `sur:sortkeys` values that can be used in a granule search request.

In [None]:
# Extract corresponding <Parameter> element
el = granules_url_atom.find('{http://a9.com/-/spec/opensearch/extensions/parameters/1.0/}Parameter[@value="{sru:sortKeys}"]')
xmltxt = ElementTree.tostring(el, encoding='unicode', method='xml')
md("```xml\n" + xmltxt + "\n```\n")

**Example: 6.6**  
>  Results can be sorted according to various criteria with {sru:sortKeys} [[RD8]](#RD8), in descending or ascending order which can be discovered in the OSDD. The example sorts in descending chronological order according to the {time:start} value.

In [None]:
osquerystring = {}
osquerystring['count'] = '4'
osquerystring['geo:box'] = '14.90,37.700,14.99,37.780' 
osquerystring['sru:sortKeys'] = 'start,time,0'

request_url = get_api_request(template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

root = ElementTree.fromstring(response.text)
# extract total results
el = root.find('{http://a9.com/-/spec/opensearch/1.1/}totalResults')
print('totalResults: ', el.text)

dataframe = load_dataframe(response)
HTML(dataframe.transpose(copy=True).to_html(render_links=True, escape=False))

**Example: 6.7**  
>  Results can be sorted according to various criteria with {sru:sortKeys}, in descending or ascending order which can be discovered in the OSDD. The example sorts in ascending chronological order according to the {time:start} value.

In [None]:
osquerystring = {}
osquerystring['count'] = '4'
osquerystring['geo:box'] = '14.90,37.700,14.99,37.780' 
osquerystring['sru:sortKeys'] = 'start,time,1'

request_url = get_api_request(template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

root = ElementTree.fromstring(response.text)
# extract total results
el = root.find('{http://a9.com/-/spec/opensearch/1.1/}totalResults')
print('totalResults: ', el.text)

dataframe = load_dataframe(response)
HTML(dataframe.transpose(copy=True).to_html(render_links=True, escape=False))

```{index} single: faceted search
```

### Faceted search

Faceted search results are available for collection, service and granule searches.  

The server can supply faceted results for a query: i.e. an analysis of how the search results are distributed over various categories (or "facets"). For example, the analysis may reveal how the results are distributed by organization. The client might then refine the query to one particular organization among those listed.

By default, the search response `<atom:feed>` contains faceted search results under the element `<sru:facetedResults>`.  In the current implementation, the faceted search information groups the results by a number of predefined facets which may include `platform`, `instrument` and `organisation` etc.  The client can specify which facets it would like to receive as part of the request with the `sru:facetLimit` search parameter.

The search response does not include the list of all values for the facet.  The list is truncated and provides the values with the largest count.  Future versions of the software may allow paging through all values, which would require support for the `sru:facetStart` search parameter.

The faceted Results are consistent with the OASIS searchRetrieve facetedResults XML schema available at http://docs.oasis-open.org/search-ws/searchRetrieve/v1.0/os/schemas/facetedResults.xsd defined in [[RD7]](#RD7) which may be combined with OpenSearch as in [[RD8]](#RD8).

```{index} single: sru:facetedResults
```

**Example: 6.8**  
>  Current content of `<sru:facetedResults>` of a collection search response and extract facet information for 'platform'.

In [None]:
osquerystring = {}
# osquerystring['eo:platform'] = 'Cryosat-2' # 'proba-1'
osquerystring['count'] = '10'

request_url = get_api_request(collection_template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))
root = ElementTree.fromstring(response.text)

# Extract faceted results for platform
el = root.find('.//{http://a9.com/-/opensearch/extensions/sru/2.0/}facet[{http://a9.com/-/opensearch/extensions/sru/2.0/}index="eo:platform"]')

# Show eo:platform facet information as XML
xmltxt = ElementTree.tostring(el, encoding='unicode', method='xml')
md("```xml\n" + xmltxt + "\n```\n")

**Example: 6.9**  
>  Convert `<sru:facetedResults>` facet information for 'eo:platform' to a dataframe and display as a bar chart.

In [None]:
df = load_facet(root, "eo:platform")
ax = df.plot(kind='barh', figsize=(9, 7))
ax.bar_label(ax.containers[0])
ax

**Example: 6.10**  
>  Convert `<sru:facetedResults>` facet information for 'eo:instrument' to a dataframe and display as a bar chart.

In [None]:
df = load_facet(root, "eo:instrument")
ax = df.plot(kind='barh', figsize=(9, 7))
ax.bar_label(ax.containers[0])
ax

**Example: 6.11**  
>  Convert `<sru:facetedResults>` facet information for 'eo:organizationName' to a dataframe and display as a bar chart.

In [None]:
df = load_facet(root, "eo:organisationName")
ax = df.plot(kind='barh', figsize=(9,7))
ax.bar_label(ax.containers[0])
ax

**Example: 6.12**  
>  Convert `<sru:facetedResults>` facet information for 'eo:processingLevel' to a dataframe and display as a bar chart.

In [None]:
df = load_facet(root, "eo:processingLevel")
ax = df.plot(kind='barh', figsize=(9,7))
ax.bar_label(ax.containers[0])
ax

```{index} single: collection search ; sru:facetLimit
```

```{index} single: granule search ; sru:facetLimit
```

```{index} single: service search ; sru:facetLimit
```

`{sru:facetLimit}` allows to define number of counts that should be reported per facet field.  It can be used in the following ways:

- facetLimit=0 (Do not return facets)
- facetLimit=10 (Return maximum 10 counts per facet)
- facetLimit=eo:organisationName (Return default number of counts for eo:organisationName facet)
- facetLimit=eo:organisationName:20 (Return maximum 20 counts for eo:organisationName facet)

**Example: 6.13**  
>  Search defining which facet information is to be returned {sru:facetLimit}.

In [None]:
osquerystring = {}
osquerystring['sru:facetLimit'] = 'eo:instrument:5'

request_url = get_api_request(collection_template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))
root = ElementTree.fromstring(response.text)

# Extract faceted results
el = root.find('.//{http://a9.com/-/opensearch/extensions/sru/2.0/}facetedResults')

# Show facet information as XML
xmltxt = ElementTree.tostring(el, encoding='unicode', method='xml')
md("```xml\n" + xmltxt + "\n```\n")

```{index} single: text search
```
```{index} single: multi-word search
```
```{index} single: collection search ; text search
```

### Text search

For search parameters performing textual search, e.g. `searchTerms`, `dc:title`, `eo:organisationName` the CEOS Best Practice `BP-006` [[RD1]](#RD1) applies for multi-word searches.

**Example: 6.14**  
>  Search for multiple words.  The example searches for collections with a title containing the words `temperature`, `surface` and `ERS` (in any order).

In [None]:
osquerystring = {}
osquerystring['dc:title'] = 'temperature surface ERS'

request_url = get_api_request(collection_template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

rt = ElementTree.fromstring(response.text)
for r in rt.findall('{http://www.w3.org/2005/Atom}entry'):
     title = r.find('{http://www.w3.org/2005/Atom}title').text
     print("title: ", title)

**Example: 6.15**  
>  Surround with double quotes to search for the occurrence of an exact string.  The example searches for collections with a title containing the exact string `Surface Temperature` (case-insensitive).

In [None]:
osquerystring = {}
osquerystring['dc:title'] = '"Surface Temperature"'

request_url = get_api_request(collection_template, osquerystring)
response = requests.get(request_url, verify=bool(verify_ssl))

rt = ElementTree.fromstring(response.text)
for r in rt.findall('{http://www.w3.org/2005/Atom}entry'):
     title = r.find('{http://www.w3.org/2005/Atom}title').text
     print("title: ", title)

```{index} double: content negotiation ; Accept
```
```{index} double: content negotiation ; httpAccept
```
### Content negotiation

The API provides the `httpAccept` HTTP query parameter to request for different media types which has the same behaviour as the [searchRetrieve httpAccept parameter](http://docs.oasis-open.org/search-ws/searchRetrieve/v1.0/os/part3-sru2.0/searchRetrieve-v1.0-os-part3-sru2.0.html#_Toc324162475) in [[RD7]](#RD7).  When using the OpenSearch interface, this parameter is prefilled with the media type selected when
the URL template was [extracted from the OSDD](#selecting-mediatype-in-osdd).

The underlying OGC API Features interface supports the use of the HTTP header parameter `Accept` to provide the media type when the API is accessed directly without passing via the OSDD.  In case both parameters are provided, the `httpAccept` parameter has precedence.

See also the [OGC API Features "Response formats"](all-response-formats) section of the documentation for an example of all available formats.

<a name='Further-Reading'></a>  
## Further Reading

| **ID**  | **Title** | 
| -------- | --------- | 
| `RD1` <a name="RD1"></a> | [CEOS OpenSearch Best Practice Document, Version 1.3](https://github.com/radiantearth/stac-spec/blob/master/catalog-spec/catalog-spec.md) | 
| `RD2` <a name="RD2"></a> | [OGC 10-032r8 - OpenSearch Geo and Time Extensions ](https://portal.ogc.org/files/?artifact_id=56866) | 
| `RD3` <a name="RD3"></a> | [OGC 13-026r9 - OpenSearch Extension for Earth Observation](http://docs.opengeospatial.org/is/13-026r9/13-026r9.html) | 
| `RD4` <a name="RD4"></a> | [RFC 4287 - The Atom Syndication Format](https://datatracker.ietf.org/doc/html/rfc4287) | 
| `RD5` <a name="RD5"></a> | [WGISS CDA OpenSearch Client Guide](https://ceos.org/document_management/Working_Groups/WGISS/Documents/Discovery-Access/WGISS%20CDA%20OpenSearch%20Client%20Guide-v1.2.pdf) | 
| `RD6` <a name="RD6"></a>| [OASIS searchRetrieve: Part 7. Explain Version 1.0](http://docs.oasis-open.org/search-ws/searchRetrieve/v1.0/os/part7-explain/searchRetrieve-v1.0-os-part7-explain.html) |
| `RD7` <a name="RD7"></a>| [OASIS searchRetrieve: Part 3. APD Binding for SRU 2.0 Version 1.0](http://docs.oasis-open.org/search-ws/searchRetrieve/v1.0/os/part3-sru2.0/searchRetrieve-v1.0-os-part3-sru2.0.html) |
| `RD8` <a name="RD8"></a>| [OpenSearch SRU Extension](https://github.com/dewitt/opensearch/tree/master/mediawiki/Community/Proposal/Specifications/OpenSearch/Extensions/SRU/1.0) |
| `RD9` <a name="RD9"></a>| [OpenSearch Semantic Extension](https://github.com/dewitt/opensearch/tree/master/mediawiki/Community/Proposal/Specifications/OpenSearch/Extensions/Semantic/1.0) |
| `RD10` <a name="RD10"></a>| [OGC 08-167r2 - Semantic annotations in OGC standards](https://portal.ogc.org/files/?artifact_id=47857) |
| `RD11` <a name="RD11"></a>| [OpenSearch 1.1 Draft 6](https://github.com/dewitt/opensearch/blob/master/opensearch-1-1-draft-6.md) |