Top

nexuscli.nexuscli module

This module provides a native python client interface to the NEXUS (https://github.com/apache/incubator-sdap-nexus) webservice API.

Usage:

import nexuscli

nexuscli.set_target("http://nexus-webapp:8083")
nexuscli.dataset_list()
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module provides a native python client interface to the NEXUS (https://github.com/apache/incubator-sdap-nexus)
webservice API.

Usage:

    import nexuscli
    
    nexuscli.set_target("http://nexus-webapp:8083")
    nexuscli.dataset_list()
    
"""
import requests
import numpy as np
from datetime import datetime
from collections import namedtuple, OrderedDict
from pytz import UTC

__pdoc__ = {}
TimeSeries = namedtuple('TimeSeries', ('dataset', 'time', 'mean', 'standard_deviation', 'count', 'minimum', 'maximum'))
TimeSeries.__doc__ = '''\
An object containing Time Series arrays.
'''
__pdoc__['TimeSeries.dataset'] = "Name of the Dataset"
__pdoc__['TimeSeries.time'] = "`numpy` array containing times as `datetime` objects"
__pdoc__['TimeSeries.mean'] = "`numpy` array containing means"
__pdoc__['TimeSeries.standard_deviation'] = "`numpy` array containing standard deviations"
__pdoc__['TimeSeries.count'] = "`numpy` array containing counts"
__pdoc__['TimeSeries.minimum'] = "`numpy` array containing minimums"
__pdoc__['TimeSeries.maximum'] = "`numpy` array containing maximums"

Point = namedtuple('Point', ('time', 'latitude', 'longitude', 'variable'))
Point.__doc__ = '''\
An object containing Point attributes.
'''
__pdoc__['Point.time'] = "time value as `datetime` object"
__pdoc__['Point.latitude'] = "latitude value"
__pdoc__['Point.longitude'] = "longitude value"
__pdoc__['Point.variable'] = "dictionary of variable values"

ISO_FORMAT = "%Y-%m-%dT%H:%M:%SZ"

target = 'http://localhost:8083'

session = requests.session()


def set_target(url, use_session=True):
    """
    Set the URL for the NEXUS webapp endpoint.  
    
    __url__ URL for NEXUS webapp endpoint   
    __return__ None
    """
    global target
    target = url

    if not use_session:
        global session
        session = requests


def dataset_list():
    """
    Get a list of datasets and the start and end time for each.
    
    __return__ list of datasets. Each entry in the list contains `shortname`, `start`, and `end`
    """
    response = session.get("{}/list".format(target))
    data = response.json()

    list_response = []
    for dataset in data:
        dataset['start'] = datetime.utcfromtimestamp(dataset['start'] / 1000).strftime(ISO_FORMAT)
        dataset['end'] = datetime.utcfromtimestamp(dataset['end'] / 1000).strftime(ISO_FORMAT)

        ordered_dict = OrderedDict()
        ordered_dict['shortName'] = dataset['shortName']
        ordered_dict['start'] = dataset['start']
        ordered_dict['end'] = dataset['end']
        list_response.append(ordered_dict)

    return list_response


def daily_difference_average(dataset, bounding_box, start_datetime, end_datetime):
    """
    Generate an anomaly Time series for a given dataset, bounding box, and timeframe.
    
    __dataset__ Name of the dataset as a String  
    __bounding_box__ Bounding box for area of interest as a `shapely.geometry.polygon.Polygon`  
    __start_datetime__ Start time as a `datetime.datetime`  
    __end_datetime__ End time as a `datetime.datetime`  
    
    __return__ List of `nexuscli.nexuscli.TimeSeries` namedtuples
    """
    url = "{}/dailydifferenceaverage_spark?".format(target)

    params = {
        'dataset': dataset,
        'climatology': "{}_CLIM".format(dataset),
        'b': ','.join(str(b) for b in bounding_box.bounds),
        'startTime': start_datetime.strftime(ISO_FORMAT),
        'endTime': end_datetime.strftime(ISO_FORMAT),
    }

    response = session.get(url, params=params)
    response.raise_for_status()
    response = response.json()

    data = np.array(response['data']).flatten()

    assert len(data) > 0, "No data found in {} between {} and {} for Datasets {}.".format(bounding_box.wkt,
                                                                                          start_datetime.strftime(
                                                                                              ISO_FORMAT),
                                                                                          end_datetime.strftime(
                                                                                              ISO_FORMAT),
                                                                                          dataset)

    time_series_result = []

    key_to_index = {k: x for x, k in enumerate(data[0].keys())}

    time_series_data = np.array([tuple(each.values()) for each in [entry for entry in data]])

    if len(time_series_data) > 0:
        time_series_result.append(
            TimeSeries(
                dataset=dataset,
                time=np.array([datetime.utcfromtimestamp(t).replace(tzinfo=UTC) for t in
                               time_series_data[:, key_to_index['time']]]),
                mean=time_series_data[:, key_to_index['mean']],
                standard_deviation=time_series_data[:, key_to_index['std']],
                count=None,
                minimum=None,
                maximum=None,
            )
        )

    return time_series_result


def time_series(datasets, bounding_box, start_datetime, end_datetime, spark=False):
    """
    Send a request to NEXUS to calculate a time series.
    
    __datasets__ Sequence (max length 2) of the name of the dataset(s)  
    __bounding_box__ Bounding box for area of interest as a `shapely.geometry.polygon.Polygon`  
    __start_datetime__ Start time as a `datetime.datetime`  
    __end_datetime__ End time as a `datetime.datetime`  
    __spark__ Optionally use spark. Default: `False`
    
    __return__ List of `nexuscli.nexuscli.TimeSeries` namedtuples
    """

    if isinstance(datasets, str):
        datasets = [datasets]

    assert 0 < len(datasets) <= 2, "datasets must be a sequence of 1 or 2 items"

    params = {
        'ds': ','.join(datasets),
        'b': ','.join(str(b) for b in bounding_box.bounds),
        'startTime': start_datetime.strftime(ISO_FORMAT),
        'endTime': end_datetime.strftime(ISO_FORMAT),
    }

    if spark:
        url = "{}/timeSeriesSpark?".format(target)
        params['spark'] = "mesos,16,32"
    else:
        url = "{}/stats?".format(target)

    response = session.get(url, params=params)
    response.raise_for_status()
    response = response.json()

    data = np.array(response['data']).flatten()

    assert len(data) > 0, "No data found in {} between {} and {} for Datasets {}.".format(bounding_box.wkt,
                                                                                          start_datetime.strftime(
                                                                                              ISO_FORMAT),
                                                                                          end_datetime.strftime(
                                                                                              ISO_FORMAT),
                                                                                          datasets)

    time_series_result = []

    for i in range(0, len(response['meta'])):
        key_to_index = {k: x for x, k in enumerate(data[0].keys())}

        time_series_data = np.array([tuple(each.values()) for each in [entry for entry in data if entry['ds'] == i]])

        if len(time_series_data) > 0:
            time_series_result.append(
                TimeSeries(
                    dataset=response['meta'][i]['shortName'],
                    time=np.array([datetime.utcfromtimestamp(t).replace(tzinfo=UTC) for t in
                                   time_series_data[:, key_to_index['time']]]),
                    mean=time_series_data[:, key_to_index['mean']],
                    standard_deviation=time_series_data[:, key_to_index['std']],
                    count=time_series_data[:, key_to_index['cnt']],
                    minimum=time_series_data[:, key_to_index['min']],
                    maximum=time_series_data[:, key_to_index['max']],
                )
            )

    return time_series_result


def subset(dataset, bounding_box, start_datetime, end_datetime, parameter, metadata_filter):
    """
    Fetches point values for a given dataset and geographical area or metadata criteria and time range.

    __dataset__ Name of the dataset as a String  
    __bounding_box__ Bounding box for area of interest as a `shapely.geometry.polygon.Polygon`  
    __start_datetime__ Start time as a `datetime.datetime`  
    __end_datetime__ End time as a `datetime.datetime`  
    __parameter__ The parameter of interest. One of 'sst', 'sss', 'wind' or None  
    __metadata_filter__ List of key:value String metadata criteria  

    __return__ List of `nexuscli.nexuscli.Point` namedtuples
    """
    url = "{}/datainbounds?".format(target)

    params = {
        'ds': dataset,
        'startTime': start_datetime.strftime(ISO_FORMAT),
        'endTime': end_datetime.strftime(ISO_FORMAT),
        'parameter': parameter,
    }
    if bounding_box:
        params['b'] = ','.join(str(b) for b in bounding_box.bounds)
    else:
        if metadata_filter and len(metadata_filter) > 0:
            params['metadataFilter'] = metadata_filter

    response = session.get(url, params=params)
    response.raise_for_status()
    response = response.json()

    data = np.array(response['data']).flatten()

    assert len(data) > 0, "No data found in {} between {} and {} for Datasets {}.".format(bounding_box.wkt if bounding_box is not None else metadata_filter,
                                                                                          start_datetime.strftime(
                                                                                              ISO_FORMAT),
                                                                                          end_datetime.strftime(
                                                                                              ISO_FORMAT),
                                                                                          dataset)

    subset_result = []
    for d in data:
        subset_result.append(
            Point(
                time=datetime.utcfromtimestamp(d['time']).replace(tzinfo=UTC),
                longitude=d['longitude'],
                latitude=d['latitude'],
                variable=d['data'][0]
            )
        )

    return subset_result

Module variables

var ISO_FORMAT

var session

var target

Functions

def daily_difference_average(

dataset, bounding_box, start_datetime, end_datetime)

Generate an anomaly Time series for a given dataset, bounding box, and timeframe.

dataset Name of the dataset as a String
bounding_box Bounding box for area of interest as a shapely.geometry.polygon.Polygon
start_datetime Start time as a datetime.datetime
end_datetime End time as a datetime.datetime

return List of TimeSeries namedtuples

def daily_difference_average(dataset, bounding_box, start_datetime, end_datetime):
    """
    Generate an anomaly Time series for a given dataset, bounding box, and timeframe.
    
    __dataset__ Name of the dataset as a String  
    __bounding_box__ Bounding box for area of interest as a `shapely.geometry.polygon.Polygon`  
    __start_datetime__ Start time as a `datetime.datetime`  
    __end_datetime__ End time as a `datetime.datetime`  
    
    __return__ List of `nexuscli.nexuscli.TimeSeries` namedtuples
    """
    url = "{}/dailydifferenceaverage_spark?".format(target)

    params = {
        'dataset': dataset,
        'climatology': "{}_CLIM".format(dataset),
        'b': ','.join(str(b) for b in bounding_box.bounds),
        'startTime': start_datetime.strftime(ISO_FORMAT),
        'endTime': end_datetime.strftime(ISO_FORMAT),
    }

    response = session.get(url, params=params)
    response.raise_for_status()
    response = response.json()

    data = np.array(response['data']).flatten()

    assert len(data) > 0, "No data found in {} between {} and {} for Datasets {}.".format(bounding_box.wkt,
                                                                                          start_datetime.strftime(
                                                                                              ISO_FORMAT),
                                                                                          end_datetime.strftime(
                                                                                              ISO_FORMAT),
                                                                                          dataset)

    time_series_result = []

    key_to_index = {k: x for x, k in enumerate(data[0].keys())}

    time_series_data = np.array([tuple(each.values()) for each in [entry for entry in data]])

    if len(time_series_data) > 0:
        time_series_result.append(
            TimeSeries(
                dataset=dataset,
                time=np.array([datetime.utcfromtimestamp(t).replace(tzinfo=UTC) for t in
                               time_series_data[:, key_to_index['time']]]),
                mean=time_series_data[:, key_to_index['mean']],
                standard_deviation=time_series_data[:, key_to_index['std']],
                count=None,
                minimum=None,
                maximum=None,
            )
        )

    return time_series_result

def dataset_list(

)

Get a list of datasets and the start and end time for each.

return list of datasets. Each entry in the list contains shortname, start, and end

def dataset_list():
    """
    Get a list of datasets and the start and end time for each.
    
    __return__ list of datasets. Each entry in the list contains `shortname`, `start`, and `end`
    """
    response = session.get("{}/list".format(target))
    data = response.json()

    list_response = []
    for dataset in data:
        dataset['start'] = datetime.utcfromtimestamp(dataset['start'] / 1000).strftime(ISO_FORMAT)
        dataset['end'] = datetime.utcfromtimestamp(dataset['end'] / 1000).strftime(ISO_FORMAT)

        ordered_dict = OrderedDict()
        ordered_dict['shortName'] = dataset['shortName']
        ordered_dict['start'] = dataset['start']
        ordered_dict['end'] = dataset['end']
        list_response.append(ordered_dict)

    return list_response

def set_target(

url, use_session=True)

Set the URL for the NEXUS webapp endpoint.

url URL for NEXUS webapp endpoint
return None

def set_target(url, use_session=True):
    """
    Set the URL for the NEXUS webapp endpoint.  
    
    __url__ URL for NEXUS webapp endpoint   
    __return__ None
    """
    global target
    target = url

    if not use_session:
        global session
        session = requests

def subset(

dataset, bounding_box, start_datetime, end_datetime, parameter, metadata_filter)

Fetches point values for a given dataset and geographical area or metadata criteria and time range.

dataset Name of the dataset as a String
bounding_box Bounding box for area of interest as a shapely.geometry.polygon.Polygon
start_datetime Start time as a datetime.datetime
end_datetime End time as a datetime.datetime
parameter The parameter of interest. One of 'sst', 'sss', 'wind' or None
metadata_filter List of key:value String metadata criteria

return List of Point namedtuples

def subset(dataset, bounding_box, start_datetime, end_datetime, parameter, metadata_filter):
    """
    Fetches point values for a given dataset and geographical area or metadata criteria and time range.

    __dataset__ Name of the dataset as a String  
    __bounding_box__ Bounding box for area of interest as a `shapely.geometry.polygon.Polygon`  
    __start_datetime__ Start time as a `datetime.datetime`  
    __end_datetime__ End time as a `datetime.datetime`  
    __parameter__ The parameter of interest. One of 'sst', 'sss', 'wind' or None  
    __metadata_filter__ List of key:value String metadata criteria  

    __return__ List of `nexuscli.nexuscli.Point` namedtuples
    """
    url = "{}/datainbounds?".format(target)

    params = {
        'ds': dataset,
        'startTime': start_datetime.strftime(ISO_FORMAT),
        'endTime': end_datetime.strftime(ISO_FORMAT),
        'parameter': parameter,
    }
    if bounding_box:
        params['b'] = ','.join(str(b) for b in bounding_box.bounds)
    else:
        if metadata_filter and len(metadata_filter) > 0:
            params['metadataFilter'] = metadata_filter

    response = session.get(url, params=params)
    response.raise_for_status()
    response = response.json()

    data = np.array(response['data']).flatten()

    assert len(data) > 0, "No data found in {} between {} and {} for Datasets {}.".format(bounding_box.wkt if bounding_box is not None else metadata_filter,
                                                                                          start_datetime.strftime(
                                                                                              ISO_FORMAT),
                                                                                          end_datetime.strftime(
                                                                                              ISO_FORMAT),
                                                                                          dataset)

    subset_result = []
    for d in data:
        subset_result.append(
            Point(
                time=datetime.utcfromtimestamp(d['time']).replace(tzinfo=UTC),
                longitude=d['longitude'],
                latitude=d['latitude'],
                variable=d['data'][0]
            )
        )

    return subset_result

def time_series(

datasets, bounding_box, start_datetime, end_datetime, spark=False)

Send a request to NEXUS to calculate a time series.

datasets Sequence (max length 2) of the name of the dataset(s)
bounding_box Bounding box for area of interest as a shapely.geometry.polygon.Polygon
start_datetime Start time as a datetime.datetime
end_datetime End time as a datetime.datetime
spark Optionally use spark. Default: False

return List of TimeSeries namedtuples

def time_series(datasets, bounding_box, start_datetime, end_datetime, spark=False):
    """
    Send a request to NEXUS to calculate a time series.
    
    __datasets__ Sequence (max length 2) of the name of the dataset(s)  
    __bounding_box__ Bounding box for area of interest as a `shapely.geometry.polygon.Polygon`  
    __start_datetime__ Start time as a `datetime.datetime`  
    __end_datetime__ End time as a `datetime.datetime`  
    __spark__ Optionally use spark. Default: `False`
    
    __return__ List of `nexuscli.nexuscli.TimeSeries` namedtuples
    """

    if isinstance(datasets, str):
        datasets = [datasets]

    assert 0 < len(datasets) <= 2, "datasets must be a sequence of 1 or 2 items"

    params = {
        'ds': ','.join(datasets),
        'b': ','.join(str(b) for b in bounding_box.bounds),
        'startTime': start_datetime.strftime(ISO_FORMAT),
        'endTime': end_datetime.strftime(ISO_FORMAT),
    }

    if spark:
        url = "{}/timeSeriesSpark?".format(target)
        params['spark'] = "mesos,16,32"
    else:
        url = "{}/stats?".format(target)

    response = session.get(url, params=params)
    response.raise_for_status()
    response = response.json()

    data = np.array(response['data']).flatten()

    assert len(data) > 0, "No data found in {} between {} and {} for Datasets {}.".format(bounding_box.wkt,
                                                                                          start_datetime.strftime(
                                                                                              ISO_FORMAT),
                                                                                          end_datetime.strftime(
                                                                                              ISO_FORMAT),
                                                                                          datasets)

    time_series_result = []

    for i in range(0, len(response['meta'])):
        key_to_index = {k: x for x, k in enumerate(data[0].keys())}

        time_series_data = np.array([tuple(each.values()) for each in [entry for entry in data if entry['ds'] == i]])

        if len(time_series_data) > 0:
            time_series_result.append(
                TimeSeries(
                    dataset=response['meta'][i]['shortName'],
                    time=np.array([datetime.utcfromtimestamp(t).replace(tzinfo=UTC) for t in
                                   time_series_data[:, key_to_index['time']]]),
                    mean=time_series_data[:, key_to_index['mean']],
                    standard_deviation=time_series_data[:, key_to_index['std']],
                    count=time_series_data[:, key_to_index['cnt']],
                    minimum=time_series_data[:, key_to_index['min']],
                    maximum=time_series_data[:, key_to_index['max']],
                )
            )

    return time_series_result

Classes

class Point

An object containing Point attributes.

Ancestors (in MRO)

  • Point
  • builtins.tuple
  • builtins.object

Instance variables

var latitude

latitude value

var longitude

longitude value

var time

time value as datetime object

var variable

dictionary of variable values

class TimeSeries

An object containing Time Series arrays.

Ancestors (in MRO)

Instance variables

var count

numpy array containing counts

var dataset

Name of the Dataset

var maximum

numpy array containing maximums

var mean

numpy array containing means

var minimum

numpy array containing minimums

var standard_deviation

numpy array containing standard deviations

var time

numpy array containing times as datetime objects