"""Tools for handling logged network data.
The :mod:`~.logging.tools` module provides methods and objects designed to
simplify loading and handling logged network data. The following methods are
available:
- :func:`.dump_to_list` for loading log file data into a list
- :func:`.dump_to_array` for loading log file data into a numpy array
- :func:`.dump_to_csv` for writing log file data to a CSV file
.. sectionauthor:: Asher Bender <a.bender@acfr.usyd.edu.au>
.. codeauthor:: Asher Bender <a.bender@acfr.usyd.edu.au>
"""
import os
import csv
import numpy as np
import mcl.logging.file
def _is_string_list(arg):
"""Return True if input is a list of strings."""
try:
if all([isinstance(itm, basestring) for itm in arg]):
return True
except:
pass
return False
[docs]def dump_to_list(source, min_time=None, max_time=None, message=False,
metadata=True):
"""Load log file data into a list.
The :func:`.dump_to_list` function parses a log file or directory of log
files into a list. Each element in the list is returned as it is recorded
in the log file(s) (see `metadata`)::
{'elapsed_time': float(),
'topic': str(),
'message': dict or Message()}
Args:
source (str): Path to network data log(s) to convert into a
list. `source` can point to a single file or a directory containing
multiple log files. If the log files are split, provide the prefix
to the log files.
min_time (float): Minimum time to extract from dataset.
max_time (float): Maximum time to extract from dataset.
message (bool): If set to :data:`True` messages will automatically be
decoded into the MCL :class:`.Message` type stored in the log
file(s). If set to :data:`False` (default), message data is
returned as a dictionary. Note: to read data as MCL messages, the
messages must be loaded into the namespace and recorded in the log
file header.
metadata (bool): If set to :data:`True` (default), each element in the
list will store a dictionary containing the elapsed time, topic and
payload. If set to :data:`False` only the payload will be stored in
each element of the list.
Returns:
list: A list of chronologically ordered network messages.
"""
# Force message type.
if not isinstance(metadata, bool):
msg = "'metadata' must be a boolean."
raise TypeError(msg)
# Create object for reading a directory of network logs in time order.
try:
if os.path.isdir(source):
dumps = mcl.logging.file.ReadDirectory(source,
min_time=min_time,
max_time=max_time,
message=message)
else:
dumps = mcl.logging.file.ReadFile(source,
min_time=min_time,
max_time=max_time,
message=message)
except:
raise
# Read data from files.
messages = list()
while True:
# Parse line from file(s) as a dictionary in the following format:
#
# {'elapsed_time': float(),
# 'topic': str(),
# 'message': dict or <:class:`.Message`>}
#
message = dumps.read()
# Write message to file.
if message:
if metadata:
messages.append(message)
else:
messages.append(message['payload'])
else:
break
return messages
[docs]def dump_to_array(source, keys, min_time=None, max_time=None):
"""Load log file data into a numpy array.
The :func:`.dump_to_array` function parses network data logs into a
:obj:`numpy:numpy.array`. To parse data into a :obj:`numpy:numpy.array`,
the following conditions must be met:
- All messages loaded must be the same MCL :class:`.Message` type.
- All logged messages must contain the specified keys.
- The contents of the message keys must be convertible to a float.
Args:
source (str): Path to network data log(s) to convert into a
list. `source` can point to a single file or a directory containing
multiple log files. If the log files are split, provide the prefix
to the log files.
keys (list): List of message attributes to load into numpy array. The
items in this list specify what is copied into the numpy columns.
min_time (float): Minimum time to extract from dataset.
max_time (float): Maximum time to extract from dataset.
Returns:
numpy.array: A :obj:`numpy:numpy.array` containing the requested keys
(columns) from each message (rows) in the network log.
Raises:
IOError: If the input `source` does not exist.
TypeError: If the input `keys` is not a string or list of strings. A
TypeError will also be raised if all loaded message packets are not
of the same type.
KeyError: If the input keys do not exist in the loaded objects.
"""
# Ensure input keys is a list of strings.
if not _is_string_list(keys):
raise TypeError("'keys' must be a list of strings.")
# Load network logs into a list.
try:
message_list = dump_to_list(source,
min_time=min_time,
max_time=max_time,
metadata=False)
except:
raise
# Pre-allocate memory for array.
rows = len(message_list)
cols = len(keys)
array = np.zeros((rows, cols))
# Return nothing if there is no data in range.
if rows == 0:
return None
# Ensure all keys exist in the list before proceeding.
for key in keys:
if key not in message_list[0]:
msg = "The key '%s' does not exist in the message objects "
msg += "stored in '%s'."
raise KeyError(msg % (key, source))
# Ensure all messages are the same object.
for message in message_list:
if 'name' not in message or message['name'] != message_list[0]['name']:
msg = "Found a '%s' message object. "
msg += "Expected all message objects to be '%s' messages."
raise TypeError(msg % (message['name'], message_list[0]['name']))
# Copy message fields into array.
for (i, message) in enumerate(message_list):
try:
row = np.array([float(message[key]) for key in keys])
array[i, :] = row
except:
msg = "Could not convert the key '%s' to a float. "
raise TypeError(msg % key)
return array
[docs]def dump_to_csv(source, csv_file, keys, min_time=None, max_time=None):
"""Write log file data to a CSV file.
Args:
source (str): Path to network data log(s) to convert into a
list. `source` can point to a single file or a directory containing
multiple log files. If the log files are split, provide the prefix
to the log files.
csv_file (str): Path to write CSV file.
keys (list): List of message attributes to load into columns of the CSV
file.
min_time (float): Minimum time to extract from dataset.
max_time (float): Maximum time to extract from dataset.
Raises:
IOError: If the input `source` does not exist.
TypeError: If the input `keys` is not a string or list of strings. A
TypeError will also be raised if all loaded message packets are not
of the same type.
KeyError: If the input keys do not exist in the loaded objects.
"""
# Ensure input keys is a list of strings.
if not _is_string_list(keys):
raise TypeError("'keys' must be a list of strings.")
# Load message dumps into a list.
try:
message_list = dump_to_list(source,
min_time=min_time,
max_time=max_time,
metadata=False)
except:
raise
# Return nothing if there is no data in range.
if len(message_list) == 0:
return None
# Ensure all keys exist in the list before proceeding.
for key in keys:
if key not in message_list[0]:
msg = "The key '%s' does not exist in the message objects "
msg += "stored in '%s'."
raise KeyError(msg % (key, source))
# Ensure all messages are the same object.
for message in message_list:
if 'name' not in message or message['name'] != message_list[0]['name']:
msg = "Found a '%s' message object. "
msg += "Expected all message objects to be '%s' messages."
raise TypeError(msg % (message['name'], message_list[0]['name']))
# Copy message fields into array.
with open(csv_file, 'wb') as f:
csv_writer = csv.writer(f)
for message in message_list:
try:
csv_writer.writerow([message[key] for key in keys])
except: # pragma: no cover
msg = 'Could not convert keys in the message:'
msg += '\n\n%s\n\n'
msg += 'into an array.'
raise Exception(msg % str(message))