Example: Using a paged query to retrieve a large number of records

Content Platform Tenant Management Help

Version
9.7.x
File Size
4269 KB
Audience
anonymous
Part Number
MK-95HCPH002-19

The Python example below implements a paged query that uses multiple requests to retrieve a large number of operation records in batches of 50 per request. This query retrieves records for all create operations on objects in the /customers/widgetco/orders directory in the default namespace and returns basic information for each record.

The query uses a JSON request body and requests results in JSON format.

#!/usr/bin/env python
# encoding: utf-8

import pycurl
import StringIO
import time
import json

class OperationBasedQueryTool():
queryArguments = {'operation': {'count': 1, 'verbose': 'false',
   'objectProperties': 'utf8Name, type, size',
   'systemMetadata': {'changeTime': {},
     'directories': {'directory': []},
     'namespaces': {'namespace': []},
     'transactions': {'transaction': []}}}}

def __init__(self):
   self.complete = False

def setConnectionInfo(self, authToken, hostName, urlName):
   """ Set all connection info for subsequent query requests.
   @param authToken: authorization token
   @param hostName: Hostname of the target cluster
   @param urlName: Full URL for the query interface """
   self.curl = pycurl.Curl()
   requestHeaders = {pycurl.HTTPHEADER :["Authorization: HCP
     "authToken, "Accept:application/json", "Content-Type:
     application/json", "Host: admin.%s" % (hostName)]}
    self.curl.setopt(pycurl.FAILONERROR, 1)
   self.curl.setopt(pycurl.HTTPHEADER,
   requestHeaders[pycurl.HTTPHEADER])
   self.curl.setopt(pycurl.URL, urlName)
   for header, value in requestHeaders.iteritems():
   self.curl.setopt(header, value)
   self.curl.setopt(pycurl.CUSTOMREQUEST, 'POST')
   self.curl.setopt(pycurl.SSL_VERIFYPEER, 0)
   self.curl.setopt(pycurl.SSL_VERIFYHOST, 0)
   self.curl.setopt(pycurl.VERBOSE, 0)

def setQueryParameters(self, count, verbose, directories, namespaces,
   transactions, objectProperties, startTimeMillis=0,
   endTimeMillis=int(round(time.time() * 1000))):
   """ Set all parameters related to the query.
   @param count: The number of results to return for each query.
   @param verbose: Indication to return all object property values.
     Value is either true or false.
   @param directories: Dictionary containing list of directory paths.
   @param namespaces: Dictionary containing list of namespaces.
   @param transactions: Dictionary containing list of transaction
     types.
   @param objectProperties: String containing comma-separated list of
     object properties to return for each operation record.
   @param startTimeMillis: The starting timestamp in milliseconds of
     the query window. Default is 0 (zero).
   @param endTimeMillis: The ending timestamp in milliseconds of the
     query window. Default is one minute before time of request. """
   self.queryArguments['operation']['count'] = count
   self.queryArguments['operation']['objectProperties'] =
     objectProperties
   self.queryArguments['operation']['verbose'] = verbose
   self.queryArguments['operation']['systemMetadata']['directories'] =
     directories
   self.queryArguments['operation']['systemMetadata']['namespaces'] =
     namespaces
   self.queryArguments['operation']['systemMetadata']['transactions'] =
     transactions
   self.queryArguments['operation']['systemMetadata']['changeTime']
     ['start'] = startTimeMillis
   self.queryArguments['operation']['systemMetadata']['changeTime']
     ['end'] = endTimeMillis

def issueQuery(self):
   """ Issue an operation-based query request. """
   self.curl.setopt(pycurl.POSTFIELDS, json.dumps(self.queryArguments))
   cout = StringIO.StringIO()
   self.curl.setopt(pycurl.WRITEFUNCTION, cout.write)
   print("Performing query with the following arguments: %s"
     % json.dumps(self.queryArguments))
   self.curl.perform()
   responseCode = self.curl.getinfo(pycurl.RESPONSE_CODE)
   if responseCode == 200:
     queryResult = eval(cout.getvalue())
     if queryResult['queryResult']['status']['code'] == "COMPLETE":
       self.complete = True
     cout.close()
     return queryResult
   else:
     raise Exception("Error: Expected result code 200, but received %s"
       % responseCode)

def setLastResult(self, lastResult):
   """ Sets the last result we received as the starting point for the
       next query we issue.
    @param lastResult: The dictionary containing the last result
       returned by the previous query. """
    self.queryArguments['operation']['lastResult'] = dict()
    self.queryArguments['operation']['lastResult']['urlName'] =
     lastResult['urlName']
   self.queryArguments['operation']['lastResult']
     ['changeTimeMilliseconds'] = lastResult['changeTimeMilliseconds']
    self.queryArguments['operation']['lastResult']['version'] =
      str(lastResult['version'])

  def closeConnection(self):
     """ Cleanup the curl connection after we are finished with it. """
     self.curl.close()

  if __name__ == '__main__':
     authToken = "bXl1c2Vy:3f3c6784e97531774380db177774ac8d"
     hostName = "clusterName.com"
     urlName = "https://admin.%s/query" % hostName
     resultsPerQuery = 50
     objectUrls = []
     queryTool = OperationBasedQueryTool()
    queryTool.setConnectionInfo(authToken, hostName, urlName)
     queryTool.setQueryParameters(resultsPerQuery, "false",
       {'directory':['/customers/widgetco/orders']},
      {'namespace':['Default.Default']},
       {'transaction':['create']})
     try:
       while not queryTool.complete:
         queryResults = queryTool.issueQuery()
         for result in queryResults['queryResult']['resultSet']:
           objectUrls.append(result['urlName'])
        resultCount = len(queryResults['queryResult']['resultSet'])
         queryTool.setLastResult(queryResults['queryResult']['resultSet']
           [resultCount-1])
        print("Query completed. Total objects found: %d" % len(objectUrls))
    finally:
       queryTool.closeConnection()