Classes | Functions | Variables
cache_state Namespace Reference

Classes

class  ProgressBar
 

Functions

def make_curl ()
 
def filename_to_namespace (filename)
 
def get_file_qos (c, filename)
 
def is_file_online (c, filename)
 
def request_prestage (c, filename)
 
def is_file_online_pnfs (f)
 
def FilelistCacheCount (files, verbose_flag, METHOD="rest")
 
def FilelistPrestageRequest (files, verbose_flag)
 
def enstore_locations_to_paths (samlist, sparsification=1)
 

Variables

 X509_USER_PROXY = os.getenv("X509_USER_PROXY", "/tmp/x509up_u%d" % os.getuid())
 
 PNFS_DIR_PATTERN = re.compile(r"/pnfs/(?P<area>[^/]+)")
 
 ENSTORE_PATTERN = re.compile(r"^enstore:([^(]+)(\([^)]+\))?")
 
string DCACHE_REST_BASE_URL = "https://fndca.fnal.gov:3880/api/v1/namespace"
 
 parser = argparse.ArgumentParser(epilog=examples, formatter_class=argparse.RawDescriptionHelpFormatter)
 
 gp = parser.add_mutually_exclusive_group()
 
 nargs
 
 default
 
 metavar
 
 help
 
 dest
 
 type
 
 int
 
 action
 
 False
 
 choices
 
 args = parser.parse_args()
 
 stdout
 
 stderr
 
 filelist = Noneifargs.dataset_nameelseargs.files
 
 sam = swc.SAMWebClient("dune")
 
int cache_count = 0
 
 dataset_name
 
 end
 
 dimensions = None
 
 samlist = sam.listFilesAndLocations(dimensions=dimensions, filter_path="enstore")
 
 thislist = sam.listFiles(defname=args.dataset_name)
 
int a = 0
 
 locs = sam.locateFile(f)
 
 l = loc["location"]
 
 m = ENSTORE_PATTERN.match(l)
 
 directory = m.group(1)
 
 fullpath = os.path.join(directory, f)
 
 f
 
 file
 
int miss_count = 0
 
 n_files = len(filelist)
 
int announce = n_files>1
 
 ngood
 
 n
 
 pending_count
 
 total = float(cache_count + miss_count)
 
int cache_frac_str = (" (%d%%)" % round(cache_count/total*100))iftotal>0
 
int miss_frac_str = (" (%d%%)" % round(miss_count/total*100))iftotal>0
 
string pending_string = ""
 

Function Documentation

def cache_state.enstore_locations_to_paths (   samlist,
  sparsification = 1 
)
Convert a list of enstore locations as returned by
   samweb.listFilesAndLocations() into plain pnfs paths. Sparsify by
   `sparsification`

Definition at line 245 of file cache_state.py.

245 def enstore_locations_to_paths(samlist, sparsification=1):
246  """Convert a list of enstore locations as returned by
247  samweb.listFilesAndLocations() into plain pnfs paths. Sparsify by
248  `sparsification`"""
249  pnfspaths=[]
250  for f in samlist[::sparsification]:
251  m=ENSTORE_PATTERN.match(f[0])
252  if m:
253  directory=m.group(1)
254  filename=f[1]
255  pnfspaths.append(os.path.join(directory, filename))
256  else:
257  print( "enstore_locations_to_paths got a non-enstore location", f[0] )
258  return pnfspaths
259 
260 examples="""
261 Examples:
262 
263  Find the cache state of one file:
264 
265  %(prog)s np04_raw_run004513_0008_dl5.root
266 
267  Find the cache state of multiple files. With -v, each file's status
268  is shown; otherwise just a count is shown. Can mix-and-match full
269  paths and SAM filenames:
270 
271  %(prog)s -v /pnfs/dune/tape_backed/myfile.root np04_raw_run004513_0008_dl5.root
272 
273  Summarize the cache state of a SAM dataset:
274 
275  %(prog)s -d protodune-sp_runset_4513_raw_v0
276 
277  Show the cache state of each file matching a SAM query:
278 
279  %(prog)s -v --dim 'run_type protodune-sp and run_number 4513 and data_tier raw'
280 
281  Prestage an individual file, by its SAM filename:
282 
283  %(prog)s -p np04_raw_run004513_0008_dl5.root
284 
285  (In subsequent queries, the file will show up as "pending" until it
286  arrives on disk)
287 
288  Prestage an entire dataset (like samweb prestage-dataset):
289 
290  %(prog)s -p -d protodune-sp_runset_4513_raw_v0
291 """
292 
def enstore_locations_to_paths(samlist, sparsification=1)
Definition: cache_state.py:245
def cache_state.FilelistCacheCount (   files,
  verbose_flag,
  METHOD = "rest" 
)

Definition at line 186 of file cache_state.py.

186 def FilelistCacheCount(files, verbose_flag, METHOD="rest"):
187  assert(METHOD in ("rest", "pnfs"))
188 
189  if len(files) > 1:
190  print( "Checking %d files:" % len(files) )
191  cached = 0
192  pending = 0
193  n = 0
194 
195  # If we're in verbose mode, the per-file output fights with
196  # the progress bar, so disable the progress bar
197  progbar = None if verbose_flag else ProgressBar(len(files))
198 
199  c=make_curl() if METHOD=="rest" else None
200 
201  for f in files:
202  if METHOD=="rest":
203  qos,targetQos=get_file_qos(c, f)
204  if "ONLINE" in qos: cached += 1
205  if "disk" in targetQos: pending += 1
206  if verbose_flag:
207  print( f, qos, "pending" if targetQos else "")
208  elif METHOD=="pnfs":
209  this_cached=is_file_online_pnfs(f)
210  if this_cached: cached += 1
211  if verbose_flag:
212  print( f, "ONLINE" if this_cached else "NEARLINE")
213 
214  n += 1
215  # If we're in verbose mode, the per-file output fights with
216  # the progress bar, so disable the progress bar
217  if not verbose_flag: progbar.Update(n)
218 
219  if not verbose_flag: progbar.Update(progbar.total)
220 
221  # We don't count pending files with the pnfs method, so set it to
222  # something meaningless
223  if METHOD=="pnfs":
224  pending = -1
225  return (cached, pending, n)
226 
def make_curl()
Definition: cache_state.py:61
def is_file_online_pnfs(f)
Definition: cache_state.py:177
def get_file_qos(c, filename)
Definition: cache_state.py:92
def FilelistCacheCount(files, verbose_flag, METHOD="rest")
Definition: cache_state.py:186
def cache_state.FilelistPrestageRequest (   files,
  verbose_flag 
)

Definition at line 228 of file cache_state.py.

228 def FilelistPrestageRequest(files, verbose_flag):
229  announce=len(files) > 1
230  if announce:
231  print( "Prestaging %d files:" % len(files) )
232 
233  c=make_curl()
234  n = len(files)
235  n_request_succeeded = 0
236  for f in files:
237  success=request_prestage(c, f)
238  if success: n_request_succeeded += 1
239  if verbose_flag:
240  print( f, "request succeeded" if success else "request failed" )
241 
242  return (n_request_succeeded, n)
243 
def make_curl()
Definition: cache_state.py:61
def request_prestage(c, filename)
Definition: cache_state.py:148
def FilelistPrestageRequest(files, verbose_flag)
Definition: cache_state.py:228
def cache_state.filename_to_namespace (   filename)

Definition at line 80 of file cache_state.py.

80 def filename_to_namespace(filename):
81  filename_out=filename
82  if filename.startswith("root://fndca1.fnal.gov:1094"):
83  filename_out=filename.replace("root://fndca1.fnal.gov:1094", "")
84  elif filename.startswith("/pnfs/dune"):
85  filename_out=filename.replace("/pnfs/dune", "/pnfs/fnal.gov/usr/dune")
86  elif filename.startswith("enstore:/pnfs/dune"):
87  filename_out=filename.replace("enstore:/pnfs/dune", "/pnfs/fnal.gov/usr/dune")
88 
89  return filename_out
90 
def filename_to_namespace(filename)
Definition: cache_state.py:80
def cache_state.get_file_qos (   c,
  filename 
)
Using curl object `c`, find the "QoS" of `filename`.

QoS is "disk", "tape" or "disk+tape", with the obvious meanings

Returns: (currentQos, targetQos) where targetQos is non-empty if
         there is an outstanding prestage request. currentQos will
         be empty if there is an error (eg, file does not exist)

Uses the dcache REST API frontend, documented in the dcache User Guide, eg:

https://www.dcache.org/manuals/UserGuide-6.0/frontend.shtml

Definition at line 92 of file cache_state.py.

92 def get_file_qos(c, filename):
93  """Using curl object `c`, find the "QoS" of `filename`.
94 
95  QoS is "disk", "tape" or "disk+tape", with the obvious meanings
96 
97  Returns: (currentQos, targetQos) where targetQos is non-empty if
98  there is an outstanding prestage request. currentQos will
99  be empty if there is an error (eg, file does not exist)
100 
101  Uses the dcache REST API frontend, documented in the dcache User Guide, eg:
102 
103  https://www.dcache.org/manuals/UserGuide-6.0/frontend.shtml
104 
105  """
106 
107  # qos=true in the URL causes dcache to tell us whether the file's
108  # on disk or tape, and also the "targetQos", which exists if
109  # there's an outstanding prestage request.
110  #
111  # Update 2020-10-02: it looks like qos is sometimes incorrect, or
112  # at least, not what I thought it was, since online files can have
113  # fileLocality=ONLINE_AND_NEARLINE but qos=tape. So we use
114  # fileLocality for the online-ness of the file, but still request
115  # qos because it gives us the target qos if there's an outstanding
116  # prestage request
117  url="{host}/{path}?qos=true&locality=true".format(host=DCACHE_REST_BASE_URL, path=filename_to_namespace(filename))
118  c.setopt(c.URL, url)
119  mybuffer = BytesIO()
120  c.setopt(c.WRITEFUNCTION, mybuffer.write)
121  c.perform()
122 
123  # Body is a byte string.
124  # We have to know the encoding in order to print it to a text file
125  # such as standard output.
126  body = mybuffer.getvalue().decode('iso-8859-1')
127 
128  j=json.loads(body)
129  qos=""
130  locality=""
131  targetQos=""
132  # "qos" turns out to not quite be right - see comment above
133  # if "currentQos" in j:
134  # qos=j["currentQos"]
135  if "fileLocality" in j:
136  locality=j["fileLocality"]
137  if "targetQos" in j:
138  targetQos=j["targetQos"]
139 
140  return (locality, targetQos)
141 
static bool format(QChar::Decomposition tag, QString &str, int index, int len)
Definition: qstring.cpp:11496
def filename_to_namespace(filename)
Definition: cache_state.py:80
void decode(std::any const &a, Hep2Vector &result)
Definition: CLHEP_ps.h:12
def get_file_qos(c, filename)
Definition: cache_state.py:92
def cache_state.is_file_online (   c,
  filename 
)
Using curl object `c`, returns whether `filename` is online

Definition at line 143 of file cache_state.py.

143 def is_file_online(c, filename):
144  """Using curl object `c`, returns whether `filename` is online"""
145  return "ONLINE" in get_file_qos(c, filename)[0]
146 
def is_file_online(c, filename)
Definition: cache_state.py:143
def get_file_qos(c, filename)
Definition: cache_state.py:92
def cache_state.is_file_online_pnfs (   f)

Definition at line 177 of file cache_state.py.

178  path, filename = os.path.split(f)
179  stat_file="%s/.(get)(%s)(locality)"%(path,filename)
180  theStatFile=open(stat_file)
181  state=theStatFile.readline()
182  theStatFile.close()
183  return 'ONLINE' in state
184 
int open(const char *, int)
Opens a file descriptor.
def is_file_online_pnfs(f)
Definition: cache_state.py:177
def cache_state.make_curl ( )
Returns a pycurl object with the necessary fields set for Fermilab
authentication.

The object can be reused for multiple requests to the
dcache REST API and curl will reuse the connection, which should speed
things up

Definition at line 61 of file cache_state.py.

61 def make_curl():
62  """Returns a pycurl object with the necessary fields set for Fermilab
63  authentication.
64 
65  The object can be reused for multiple requests to the
66  dcache REST API and curl will reuse the connection, which should speed
67  things up"""
68 
69  c = pycurl.Curl()
70  c.setopt(c.CAINFO, X509_USER_PROXY);
71  c.setopt(c.SSLCERT, X509_USER_PROXY);
72  c.setopt(c.SSLKEY, X509_USER_PROXY);
73  c.setopt(c.SSH_PRIVATE_KEYFILE, X509_USER_PROXY);
74  c.setopt(c.FOLLOWLOCATION, True)
75  c.setopt(c.CAPATH, "/etc/grid-security/certificates");
76 
77  return c
78 
def make_curl()
Definition: cache_state.py:61
def cache_state.request_prestage (   c,
  filename 
)
Using curl object `c`, request a prestage for `filename`

Returns whether the request succeeded (according to dcache)

Uses a HTTP post request in a very specific format to request a prestage of a file. Adapted from:

https://github.com/DmitryLitvintsev/scripts/blob/master/bash/bring-online.sh

Uses the dcache REST API frontend, documented in the dcache User Guide, eg:

https://www.dcache.org/manuals/UserGuide-6.0/frontend.shtml

Definition at line 148 of file cache_state.py.

148 def request_prestage(c, filename):
149  """Using curl object `c`, request a prestage for `filename`
150 
151  Returns whether the request succeeded (according to dcache)
152 
153  Uses a HTTP post request in a very specific format to request a prestage of a file. Adapted from:
154 
155  https://github.com/DmitryLitvintsev/scripts/blob/master/bash/bring-online.sh
156 
157  Uses the dcache REST API frontend, documented in the dcache User Guide, eg:
158 
159  https://www.dcache.org/manuals/UserGuide-6.0/frontend.shtml
160  """
161  c.setopt(c.POSTFIELDS, """{"action" : "qos", "target" : "disk+tape"}""")
162  c.setopt(c.HTTPHEADER, ["Accept: application/json", "Content-Type: application/json"])
163  c.setopt(c.POST, 1)
164  c.setopt(c.URL, "{host}/{path}".format(host=DCACHE_REST_BASE_URL, path=filename_to_namespace(filename)))
165  mybuffer = BytesIO()
166  c.setopt(c.WRITEFUNCTION, mybuffer.write)
167  c.perform()
168 
169  # Body is a byte string.
170  # We have to know the encoding in order to print it to a text file
171  # such as standard output.
172  body = mybuffer.getvalue().decode('iso-8859-1')
173  j=json.loads(body)
174  return "status" in j and j["status"]=="success"
175 
def request_prestage(c, filename)
Definition: cache_state.py:148
static bool format(QChar::Decomposition tag, QString &str, int index, int len)
Definition: qstring.cpp:11496
def filename_to_namespace(filename)
Definition: cache_state.py:80
void decode(std::any const &a, Hep2Vector &result)
Definition: CLHEP_ps.h:12

Variable Documentation

int cache_state.a = 0

Definition at line 357 of file cache_state.py.

cache_state.action

Definition at line 317 of file cache_state.py.

int cache_state.announce = n_files>1

Definition at line 427 of file cache_state.py.

cache_state.args = parser.parse_args()

Definition at line 321 of file cache_state.py.

cache_state.cache_count = 0

Definition at line 336 of file cache_state.py.

int cache_state.cache_frac_str = (" (%d%%)" % round(cache_count/total*100))iftotal>0

Definition at line 437 of file cache_state.py.

cache_state.choices

Definition at line 319 of file cache_state.py.

cache_state.dataset_name

Definition at line 342 of file cache_state.py.

string cache_state.DCACHE_REST_BASE_URL = "https://fndca.fnal.gov:3880/api/v1/namespace"

Definition at line 32 of file cache_state.py.

cache_state.default

Definition at line 300 of file cache_state.py.

cache_state.dest

Definition at line 306 of file cache_state.py.

string cache_state.dimensions = None

Definition at line 345 of file cache_state.py.

cache_state.directory = m.group(1)

Definition at line 417 of file cache_state.py.

cache_state.end

Definition at line 342 of file cache_state.py.

cache_state.ENSTORE_PATTERN = re.compile(r"^enstore:([^(]+)(\([^)]+\))?")

Definition at line 26 of file cache_state.py.

cache_state.f

Definition at line 421 of file cache_state.py.

cache_state.False

Definition at line 317 of file cache_state.py.

cache_state.file

Definition at line 421 of file cache_state.py.

list cache_state.filelist = Noneifargs.dataset_nameelseargs.files

Definition at line 332 of file cache_state.py.

cache_state.fullpath = os.path.join(directory, f)

Definition at line 418 of file cache_state.py.

cache_state.gp = parser.add_mutually_exclusive_group()

Definition at line 297 of file cache_state.py.

cache_state.help

Definition at line 302 of file cache_state.py.

cache_state.int

Definition at line 315 of file cache_state.py.

cache_state.l = loc["location"]

Definition at line 414 of file cache_state.py.

cache_state.locs = sam.locateFile(f)

Definition at line 360 of file cache_state.py.

cache_state.m = ENSTORE_PATTERN.match(l)

Definition at line 415 of file cache_state.py.

cache_state.metavar

Definition at line 301 of file cache_state.py.

cache_state.miss_count = 0

Definition at line 424 of file cache_state.py.

int cache_state.miss_frac_str = (" (%d%%)" % round(miss_count/total*100))iftotal>0

Definition at line 438 of file cache_state.py.

cache_state.n

Definition at line 430 of file cache_state.py.

cache_state.n_files = len(filelist)

Definition at line 426 of file cache_state.py.

cache_state.nargs

Definition at line 299 of file cache_state.py.

cache_state.ngood

Definition at line 430 of file cache_state.py.

cache_state.parser = argparse.ArgumentParser(epilog=examples, formatter_class=argparse.RawDescriptionHelpFormatter)

Definition at line 295 of file cache_state.py.

cache_state.pending_count

Definition at line 433 of file cache_state.py.

string cache_state.pending_string = ""

Definition at line 442 of file cache_state.py.

cache_state.PNFS_DIR_PATTERN = re.compile(r"/pnfs/(?P<area>[^/]+)")

Definition at line 20 of file cache_state.py.

cache_state.sam = swc.SAMWebClient("dune")

Definition at line 334 of file cache_state.py.

list cache_state.samlist = sam.listFilesAndLocations(dimensions=dimensions, filter_path="enstore")

Definition at line 351 of file cache_state.py.

cache_state.stderr

Definition at line 327 of file cache_state.py.

cache_state.stdout

Definition at line 327 of file cache_state.py.

cache_state.thislist = sam.listFiles(defname=args.dataset_name)

Definition at line 354 of file cache_state.py.

cache_state.total = float(cache_count + miss_count)

Definition at line 433 of file cache_state.py.

cache_state.type

Definition at line 315 of file cache_state.py.

cache_state.X509_USER_PROXY = os.getenv("X509_USER_PROXY", "/tmp/x509up_u%d" % os.getuid())

Definition at line 19 of file cache_state.py.