DAS Command Line Interface (CLI) tool¶
The DAS Command Line Interface (CLI) tool can be downloaded directly from DAS server. It is python-based tool and does not require any additional dependencies, althought a python version of 2.6 and above is required. Its usage is very simple
Usage: das_client.py [options]
For more help please visit https://cmsweb.cern.ch/das/faq
Options:
-h, --help show this help message and exit
-v VERBOSE, --verbose=VERBOSE
verbose output
--query=QUERY specify query for your request
--host=HOST host name of DAS cache server, default is
https://cmsweb.cern.ch
--idx=IDX start index for returned result set, aka pagination,
use w/ limit (default is 0)
--limit=LIMIT number of returned results (default is 10), use
--limit=0 to show all results
--format=FORMAT specify return data format (json or plain), default
plain.
--threshold=THRESHOLD
query waiting threshold in sec, default is 5 minutes
--key=CKEY specify private key file name
--cert=CERT specify private certificate file name
--retry=RETRY specify number of retries upon busy DAS server message
--das-headers show DAS headers in JSON format
--base=BASE specify power base for size_format, default is 10 (can
be 2)
The query parameter specifies an input DAS query <das_queries>, while the format parameter can be used to get results either in JSON or plain (suitable for cut and paste) data format. Here is an example of using das_client tool to retrieve information about dataset pattern
python das_client.py --query="dataset=/ZMM*/*/*"
Showing 1-10 out of 2 results, for more results use --idx/--limit options
/ZMM_14TeV/Summer12-DESIGN42_V17_SLHCTk-v1/GEN-SIM
/ZMM/Summer11-DESIGN42_V11_428_SLHC1-v1/GEN-SIM
And here is the same output using JSON data format, the auxilary DAS headers are also requested:
python das_client.py --query="dataset=/ZMM*/*/*" --format=JSON --das-headers
{'apilist': ['das_core', 'fakeDatasetPattern'],
'ctime': 0.0015709400177,
'data': [{'_id': '523dcd7f0ec3dc12198a44c5',
'cache_id': ['523dcd7f0ec3dc12198a44c3'],
'das': {'api': ['fakeDatasetPattern'],
'condition_keys': ['dataset.name'],
'expire': 1379782315.848377,
'instance': 'cms_dbs_prod_global',
'primary_key': 'dataset.name',
'record': 1,
'services': [{'dbs': ['dbs']}],
'system': ['dbs'],
'ts': 1379782015.863179},
'das_id': ['523dcd7d0ec3dc12198a4498'],
'dataset': [{'created_by': '/DC=ch/DC=cern/OU=computers/CN=wmagent/vocms216.cern.ch',
'creation_time': '2012-02-24 01:40:40',
'datatype': 'mc',
'modification_time': '2012-02-29 21:25:52',
'modified_by': '/DC=org/DC=doegrids/OU=People/CN=Alan Malta Rodrigues 4861',
'name': '/ZMM_14TeV/Summer12-DESIGN42_V17_SLHCTk-v1/GEN-SIM',
'status': 'VALID',
'tag': 'DESIGN42_V17::All'}],
'qhash': 'e5ced95dd57a5cfe1a3126a22a85a301'},
{'_id': '523dcd7f0ec3dc12198a44c6',
'cache_id': ['523dcd7f0ec3dc12198a44c4'],
'das': {'api': ['fakeDatasetPattern'],
'condition_keys': ['dataset.name'],
'expire': 1379782315.848377,
'instance': 'cms_dbs_prod_global',
'primary_key': 'dataset.name',
'record': 1,
'services': [{'dbs': ['dbs']}],
'system': ['dbs'],
'ts': 1379782015.863179},
'das_id': ['523dcd7d0ec3dc12198a4498'],
'dataset': [{'created_by': 'cmsprod@cmsprod01.hep.wisc.edu',
'creation_time': '2011-12-29 17:47:25',
'datatype': 'mc',
'modification_time': '2012-01-05 17:40:17',
'modified_by': '/DC=org/DC=doegrids/OU=People/CN=Ajit Kumar Mohapatra 867118',
'name': '/ZMM/Summer11-DESIGN42_V11_428_SLHC1-v1/GEN-SIM',
'status': 'VALID',
'tag': 'DESIGN42_V11::All'}],
'qhash': 'e5ced95dd57a5cfe1a3126a22a85a301'}],
'incache': True,
'mongo_query': {'fields': ['dataset'],
'instance': 'cms_dbs_prod_global',
'spec': {'dataset.name': '/ZMM*/*/*'}},
'nresults': 2,
'status': 'ok',
'timestamp': 1379782017.68}
Using DAS CLI tool from other applications¶
It is possible to plug DAS CLI tool into other python applications. This can be done as following
from das_client import get_data
# invoke DAS CLI call for given host/query
# host: hostname of DAS server, e.g. https://cmsweb.cern.ch
# query: DAS query, e.g. dataset=/ZMM*/*/*
# idx: start index for pagination, e.g. 0
# limit: end index for pagination, e.g. 10, put 0 to get all results
# debug: True/False flag to get more debugging information
# threshold: 300 sec, is a default threshold to wait for DAS response
# ckey=None, cert=None are parameters which you can used to pass around
# your GRID credentials
# das_headers: True/False flag to get DAS headers, default is True
# please note that prior 1.9.X release the return type is str
# while from 1.9.X and on the return type is JSON
data = get_data(host, query, idx, limit, debug, threshold=300, ckey=None,
cert=None, das_headers=True)
Please note, that aforementioned code snippet requires to load das_client.py which is distributed within CMSSW. Due to CMSSW install policies the version of das_client.py may be quite old. If you need up-to-date das_client.py functionality you can follow this recipe. The code below download das_client.py directly from cmsweb site, compile it and use it in your application:
import os
import json
import urllib2
import httplib
import tempfile
class HTTPSClientHdlr(urllib2.HTTPSHandler):
"""
Simple HTTPS client authentication class based on provided
key/ca information
"""
def __init__(self, key=None, cert=None, level=0):
if level:
urllib2.HTTPSHandler.__init__(self, debuglevel=1)
else:
urllib2.HTTPSHandler.__init__(self)
self.key = key
self.cert = cert
def https_open(self, req):
"""Open request method"""
#Rather than pass in a reference to a connection class, we pass in
# a reference to a function which, for all intents and purposes,
# will behave as a constructor
return self.do_open(self.get_connection, req)
def get_connection(self, host, timeout=300):
"""Connection method"""
if self.key:
return httplib.HTTPSConnection(host, key_file=self.key,
cert_file=self.cert)
return httplib.HTTPSConnection(host)
class DASClient(object):
"""DASClient object"""
def __init__(self, debug=0):
super(DASClient, self).__init__()
self.debug = debug
self.get_data = self.load_das_client()
def get_das_client(self, debug=0):
"Download das_client code from cmsweb"
url = 'https://cmsweb.cern.ch/das/cli'
ckey = os.path.join(os.environ['HOME'], '.globus/userkey.pem')
cert = os.path.join(os.environ['HOME'], '.globus/usercert.pem')
req = urllib2.Request(url=url, headers={})
if ckey and cert:
hdlr = HTTPSClientHdlr(ckey, cert, debug)
else:
hdlr = urllib2.HTTPHandler(debuglevel=debug)
opener = urllib2.build_opener(hdlr)
fdesc = opener.open(req)
cli = fdesc.read()
fdesc.close()
return cli
def load_das_client(self):
"Load DAS client module"
cli = self.get_das_client()
# compile python code as exec statement
obj = compile(cli, '<string>', 'exec')
# define execution namespace
namespace = {}
# execute compiled python code in given namespace
exec obj in namespace
# return get_data object from namespace
return namespace['get_data']
def call(self, query, idx=0, limit=0, debug=0):
"Query DAS data-service"
host = 'https://cmsweb.cern.ch'
data = self.get_data(host, query, idx, limit, debug)
if isinstance(data, basestring):
return json.loads(data)
return data
if __name__ == '__main__':
das = DASClient()
query = "/ZMM*/*/*"
result = das.call(query)
if result['status'] == 'ok':
nres = result['nresults']
data = result['data']
print "Query=%s, #results=%s" % (query, nres)
print data
Here we provide a simple example of how to use das_client to find dataset summary information.
# PLEASE NOTE: to use this example download das_client.py from
# cmsweb.cern.ch/das/cli
# system modules
import os
import sys
import json
from das_client import get_data
def drop_das_fields(row):
"Drop DAS specific headers in given row"
for key in ['das', 'das_id', 'cache_id', 'qhash']:
if row.has_key(key):
del row[key]
def get_info(query):
"Helper function to get information for given query"
host = 'https://cmsweb.cern.ch'
idx = 0
limit = 0
debug = False
data = get_data(host, query, idx, limit, debug)
if isinstance(data, basestring):
dasjson = json.loads(data)
else:
dasjson = data
status = dasjson.get('status')
if status == 'ok':
data = dasjson.get('data')
return data
def get_datasets(query):
"Helper function to get list of datasets for given query pattern"
for row in get_info(query):
for dataset in row['dataset']:
yield dataset['name']
def get_summary(query):
"""
Helper function to get dataset summary information either for a single
dataset or dataset pattern
"""
if query.find('*') == -1:
print "\n### query", query
data = get_info(query)
for row in data:
drop_das_fields(row)
print row
else:
for dataset in get_datasets(query):
query = "dataset=%s" % dataset
data = get_info(query)
print "\n### dataset", dataset
for row in data:
drop_das_fields(row)
print row
if __name__ == '__main__':
# query dataset pattern
query = "dataset=/ZMM*/*/*"
# query specific dataset in certain DBS instance
query = "dataset=/8TeV_T2tt_2j_semilepts_200_75_FSim526_Summer12_minus_v2/alkaloge-MG154_START52_V9_v2/USER instance=cms_dbs_ph_analysis_02"
get_summary(query)