import random
from numpy import random as np_random
from typing import Generator
from .utils import gen_fake_cpu_work, gen_random_files
[docs]class GenFunction(object):
def __init__(self):
self._day_idx = -1
self._num_req_x_day = -1
@property
[docs] def day_idx(self):
return self.day_idx
@day_idx.setter
def day_idx(self, value: int):
"""Set the index of the current day.
Usually set by the Generator object to indicate
the current day sequence.
:param value: current day index
:type value: int
:return: self
:rtype: GenFunction
"""
self._day_idx = value
return self
@property
[docs] def num_req_x_day(self):
return self.num_req_x_day
@num_req_x_day.setter
def num_req_x_day(self, value: int):
"""Set the number of requests per day.
:param value: number of requests
:type value: int
:return: self
:rtype: GenFunction
"""
self._num_req_x_day = value
return self
[docs] def gen_day_elements(self, max_num: int = -1) -> Generator[int, None, None]:
"""Generates all the day's entries.
:param max_num: maximum number of requests, defaults to -1
:type max_num: int, optional
:yield: the percentage of work done
:rtype: Generator[int, None, None]
"""
raise NotImplementedError
@property
[docs] def name(self):
return repr(self)
[docs]class RandomGenerator(GenFunction):
def __init__(self, num_files: int, min_file_size: int, max_file_size: int,
size_generator_function: str):
"""Initialize the random function parameters.
:param num_files: total number of files
:type num_files: int
:param min_file_size: minumum size of the files
:type min_file_size: int
:param max_file_size: maximum size of the files
:type max_file_size: int
:param size_generator_function: name of the size generator function
:type size_generator_function: str
"""
super().__init__()
self._num_files: int = num_files
self._min_file_size: int = min_file_size
self._max_file_size: int = max_file_size
self._size_generator_function: str = size_generator_function
self._files = gen_random_files(
num_files, min_file_size, max_file_size, size_generator_function
)
[docs] def __repr__(self):
return "Random Generator"
[docs] def gen_day_elements(self, max_num: int = -1) -> Generator[int, None, None]:
"""Generates all the day's entries.
:param max_num: maximum number of requests, defaults to -1
:type max_num: int, optional
:yield: the percentage of work done
:rtype: Generator[int, None, None]
"""
filenames: list = list(self._files.keys())
for _ in range(max_num):
cur_file = random.choice(filenames)
yield {
'Filename': cur_file,
'Size': self._files[cur_file]['Size'],
'Protocol': self._files[cur_file]['Protocol'],
}, None
[docs]class HighFrequencyDataset(GenFunction):
"""Dataset to test the frequency aspect."""
def __init__(self, num_files: int, min_file_size: int, max_file_size: int,
lambda_less_req_files: float, lambda_more_req_files: float,
perc_more_req_files: float, perc_files_x_day: float,
size_generator_function: str):
"""Initialize the frequency function parameters.
:param num_files: total number of files
:type num_files: int
:param min_file_size: minumum size of the files
:type min_file_size: int
:param max_file_size: maximum size of the files
:type max_file_size: int
:param lambda_less_req_files: Poisson distribution lambda for less requested files
:type lambda_less_req_files: float
:param lambda_more_req_files: Poisson distribution lambda for more requested files
:type lambda_more_req_files: float
:param perc_more_req_files: percentage of more requested files
:type perc_more_req_files: float
:param perc_files_x_day: percentage of files per day (selected files)
:type perc_files_x_day: float
:param size_generator_function: name of the size generator function
:type size_generator_function: str
"""
super().__init__()
self._num_files: int = num_files
self._min_file_size: int = min_file_size
self._max_file_size: int = max_file_size
self._lambda_less_req_files: float = lambda_less_req_files
self._lambda_more_req_files: float = lambda_more_req_files
self._perc_more_req_files: float = perc_more_req_files
self._perc_files_x_day: float = perc_files_x_day
self._size_generator_function: str = size_generator_function
self._num_more_req_files = int(
(num_files / 100.) * perc_more_req_files)
self._num_less_req_files = num_files - self._num_more_req_files
self._more_req_files = gen_random_files(
self._num_more_req_files, min_file_size, max_file_size,
size_generator_function
)
self._less_req_files = gen_random_files(
self._num_less_req_files, min_file_size, max_file_size,
size_generator_function,
start_from=self._num_more_req_files,
)
assert len(set(self._more_req_files.keys()) &
set(self._less_req_files.keys())) == 0
self._more_req_files_freq = {
filename: freq
for filename, freq in enumerate(
np_random.poisson(
lam=self._lambda_more_req_files,
size=self._num_more_req_files,
)
)
}
self._less_req_files_freq = {
filename: freq
for filename, freq in enumerate(
np_random.poisson(
lam=self._lambda_less_req_files,
size=self._num_less_req_files,
),
self._num_more_req_files
)
}
[docs] def __repr__(self):
return "High Frequency Dataset"
[docs] def gen_day_elements(self, max_num: int = -1) -> Generator[int, None, None]:
"""Generates all the day's entries.
:param max_num: maximum number of requests, defaults to -1
:type max_num: int, optional
:yield: the percentage of work done
:rtype: Generator[int, None, None]
"""
file_perc_x_day = self._perc_files_x_day / 100.
filenames = list(self._more_req_files.keys()) + \
list(self._less_req_files.keys())
num_visible_files = int(len(filenames) * file_perc_x_day)
random.shuffle(filenames)
filenames = filenames[:num_visible_files]
all_requests = []
for cur_file in filenames:
if cur_file in self._more_req_files_freq:
max_num_req = self._more_req_files_freq[cur_file]
file_info = self._more_req_files[cur_file]
elif cur_file in self._less_req_files_freq:
max_num_req = self._less_req_files_freq[cur_file]
file_info = self._less_req_files[cur_file]
for _ in range(random.randint(0, max_num_req)):
all_requests.append({
'Filename': cur_file,
**file_info.copy(),
})
random.shuffle(all_requests)
for num, elm in enumerate(all_requests):
yield elm, float(num / len(all_requests)) * 100.
[docs]class RecencyFocusedDataset(GenFunction):
"""Dataset to test the recency aspect."""
def __init__(self, num_files: int, min_file_size: int, max_file_size: int,
perc_files_x_day: float, size_generator_function: str):
"""Initialize the recency function parameters.
:param num_files: total number of files
:type num_files: int
:param min_file_size: minumum size of the files
:type min_file_size: int
:param max_file_size: maximum size of the files
:type max_file_size: int
:param perc_files_x_day: percentage of files per day (selected files)
:type perc_files_x_day: float
:param size_generator_function: name of the size generator function
:type size_generator_function: str
"""
super().__init__()
self._num_files: int = num_files
self._min_file_size: int = min_file_size
self._max_file_size: int = max_file_size
self._perc_files_x_day: float = perc_files_x_day
self._size_generator_function: str = size_generator_function
self._files = gen_random_files(
num_files, min_file_size, max_file_size,
size_generator_function
)
[docs] def __repr__(self):
return "Recency Focused Dataset"
[docs] def gen_day_elements(self, max_num: int = -1) -> Generator[int, None, None]:
"""Generates all the day's entries.
:param max_num: maximum number of requests, defaults to -1
:type max_num: int, optional
:yield: the percentage of work done
:rtype: Generator[int, None, None]
"""
all_requests = []
file_perc_x_day = self._perc_files_x_day / 100.
filenames = list(self._files.keys())
num_visible_files = int(len(self._files) * file_perc_x_day)
random.shuffle(filenames)
filenames = filenames[:num_visible_files]
while len(all_requests) < max_num:
for cur_file in filenames:
file_info = self._files[cur_file]
all_requests.append({
'Filename': cur_file,
**file_info.copy(),
})
if len(all_requests) == max_num:
break
if random.random() > 0.5:
filenames = list(reversed(filenames))
for num, elm in enumerate(all_requests):
yield elm, float(num / len(all_requests)) * 100.
[docs]class SizeFocusedDataset(GenFunction):
"""Dataset to test the different distribution of file sizes."""
def __init__(self, num_files: int, min_file_size: int, max_file_size: int,
noise_min_file_size: int, noise_max_file_size: int,
perc_noise: float, perc_files_x_day: float,
size_generator_function: str):
"""Initialize the size function parameters.
:param num_files: total number of files
:type num_files: int
:param min_file_size: minumum size of the files
:type min_file_size: int
:param max_file_size: maximum size of the files
:type max_file_size: int
:param noise_min_file_size: minimum size of the noise files
:type noise_min_file_size: int
:param noise_max_file_size: maximum size of the noise files
:type noise_max_file_size: int
:param perc_noise: percentage of noise files
:type perc_noise: float
:param perc_files_x_day: percentage of files per day (selected files)
:type perc_files_x_day: float
:param size_generator_function: name of the size generator function
:type size_generator_function: str
"""
super().__init__()
self._num_files: int = num_files
self._min_file_size: int = min_file_size
self._max_file_size: int = max_file_size
self._noise_min_file_size: int = noise_min_file_size
self._noise_max_file_size: int = noise_max_file_size
self._perc_noise: float = perc_noise
self._perc_files_x_day: float = perc_files_x_day
self._size_generator_function: str = size_generator_function
num_noise_files = int((num_files / 100.) * self._perc_noise)
num_normal_files = num_files - num_noise_files
self._files = {
**gen_random_files(
num_normal_files, min_file_size, max_file_size,
size_generator_function,
),
**gen_random_files(
num_noise_files, noise_min_file_size, noise_max_file_size,
size_generator_function,
start_from=num_normal_files
)
}
[docs] def __repr__(self):
return "Size Focused Dataset"
[docs] def gen_day_elements(self, max_num: int = -1) -> Generator[int, None, None]:
"""Generates all the day's entries.
:param max_num: maximum number of requests, defaults to -1
:type max_num: int, optional
:yield: the percentage of work done
:rtype: Generator[int, None, None]
"""
all_requests = []
file_perc_x_day = self._perc_files_x_day / 100.
filenames = list(self._files.keys())
num_visible_files = int(len(self._files) * file_perc_x_day)
random.shuffle(filenames)
filenames = filenames[:num_visible_files]
while len(all_requests) < max_num:
random.shuffle(filenames)
for cur_file in filenames:
if len(all_requests) == max_num:
break
all_requests.append({
'Filename': cur_file,
**self._files[cur_file].copy(),
})
for num, elm in enumerate(all_requests):
yield elm, float(num / len(all_requests)) * 100.